aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Analysis/InstructionSimplify.cpp7
-rw-r--r--llvm/lib/Analysis/MLInlineAdvisor.cpp58
-rw-r--r--llvm/lib/Analysis/ScalarEvolution.cpp52
-rw-r--r--llvm/lib/CAS/CMakeLists.txt2
-rw-r--r--llvm/lib/CAS/OnDiskCommon.cpp56
-rw-r--r--llvm/lib/CAS/OnDiskCommon.h22
-rw-r--r--llvm/lib/CAS/OnDiskDataAllocator.cpp6
-rw-r--r--llvm/lib/CAS/OnDiskGraphDB.cpp1755
-rw-r--r--llvm/lib/CAS/OnDiskKeyValueDB.cpp113
-rw-r--r--llvm/lib/CodeGen/MIRFSDiscriminator.cpp6
-rw-r--r--llvm/lib/CodeGen/MIRSampleProfile.cpp4
-rw-r--r--llvm/lib/DWARFLinker/Classic/DWARFLinker.cpp6
-rw-r--r--llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp16
-rw-r--r--llvm/lib/ExecutionEngine/Orc/Core.cpp1002
-rw-r--r--llvm/lib/ExecutionEngine/Orc/SimpleRemoteEPC.cpp2
-rw-r--r--llvm/lib/IR/RuntimeLibcalls.cpp3
-rw-r--r--llvm/lib/MC/MCParser/MasmParser.cpp2
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrInfo.cpp144
-rw-r--r--llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp16
-rw-r--r--llvm/lib/Target/AMDGPU/SIDefines.h3
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp13
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.cpp140
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.h5
-rw-r--r--llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp63
-rw-r--r--llvm/lib/Target/BPF/BTFDebug.cpp112
-rw-r--r--llvm/lib/Target/Hexagon/CMakeLists.txt1
-rw-r--r--llvm/lib/Target/Hexagon/Hexagon.h3
-rw-r--r--llvm/lib/Target/Hexagon/HexagonGenInsert.cpp8
-rw-r--r--llvm/lib/Target/Hexagon/HexagonQFPOptimizer.cpp334
-rw-r--r--llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp2
-rw-r--r--llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp2
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelLowering.cpp5
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfo.td24
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoZb.td38
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp11
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp14
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp14
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h14
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp21
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp6
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h4
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVUtils.cpp15
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVUtils.h3
-rw-r--r--llvm/lib/Target/X86/X86.td1
-rw-r--r--llvm/lib/TargetParser/ARMTargetParser.cpp8
-rw-r--r--llvm/lib/TargetParser/X86TargetParser.cpp3
-rw-r--r--llvm/lib/Transforms/Scalar/GVNSink.cpp1
-rw-r--r--llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp1
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp4
-rw-r--r--llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp7
-rw-r--r--llvm/lib/WindowsDriver/MSVCPaths.cpp75
52 files changed, 3092 insertions, 1137 deletions
diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp
index 20a8e1c..dc813f6 100644
--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -5440,9 +5440,10 @@ static Value *simplifyCastInst(unsigned CastOpc, Value *Op, Type *Ty,
// ptrtoint (ptradd (Ptr, X - ptrtoint(Ptr))) -> X
Value *Ptr, *X;
- if (CastOpc == Instruction::PtrToInt &&
- match(Op, m_PtrAdd(m_Value(Ptr),
- m_Sub(m_Value(X), m_PtrToInt(m_Deferred(Ptr))))) &&
+ if ((CastOpc == Instruction::PtrToInt || CastOpc == Instruction::PtrToAddr) &&
+ match(Op,
+ m_PtrAdd(m_Value(Ptr),
+ m_Sub(m_Value(X), m_PtrToIntOrAddr(m_Deferred(Ptr))))) &&
X->getType() == Ty && Ty == Q.DL.getIndexType(Ptr->getType()))
return X;
diff --git a/llvm/lib/Analysis/MLInlineAdvisor.cpp b/llvm/lib/Analysis/MLInlineAdvisor.cpp
index 1d1a5560..9a5ae2a 100644
--- a/llvm/lib/Analysis/MLInlineAdvisor.cpp
+++ b/llvm/lib/Analysis/MLInlineAdvisor.cpp
@@ -324,32 +324,44 @@ void MLInlineAdvisor::onSuccessfulInlining(const MLInlineAdvice &Advice,
FAM.invalidate(*Caller, PA);
}
Advice.updateCachedCallerFPI(FAM);
- int64_t IRSizeAfter =
- getIRSize(*Caller) + (CalleeWasDeleted ? 0 : Advice.CalleeIRSize);
- CurrentIRSize += IRSizeAfter - (Advice.CallerIRSize + Advice.CalleeIRSize);
+ if (Caller == Callee) {
+ assert(!CalleeWasDeleted);
+ // We double-counted CallerAndCalleeEdges - since the caller and callee
+ // would be the same
+ assert(Advice.CallerAndCalleeEdges % 2 == 0);
+ CurrentIRSize += getIRSize(*Caller) - Advice.CallerIRSize;
+ EdgeCount += getCachedFPI(*Caller).DirectCallsToDefinedFunctions -
+ Advice.CallerAndCalleeEdges / 2;
+ // The NodeCount would stay the same.
+ } else {
+ int64_t IRSizeAfter =
+ getIRSize(*Caller) + (CalleeWasDeleted ? 0 : Advice.CalleeIRSize);
+ CurrentIRSize += IRSizeAfter - (Advice.CallerIRSize + Advice.CalleeIRSize);
+
+ // We can delta-update module-wide features. We know the inlining only
+ // changed the caller, and maybe the callee (by deleting the latter). Nodes
+ // are simple to update. For edges, we 'forget' the edges that the caller
+ // and callee used to have before inlining, and add back what they currently
+ // have together.
+ int64_t NewCallerAndCalleeEdges =
+ getCachedFPI(*Caller).DirectCallsToDefinedFunctions;
+
+ // A dead function's node is not actually removed from the call graph until
+ // the end of the call graph walk, but the node no longer belongs to any
+ // valid SCC.
+ if (CalleeWasDeleted) {
+ --NodeCount;
+ NodesInLastSCC.erase(CG.lookup(*Callee));
+ DeadFunctions.insert(Callee);
+ } else {
+ NewCallerAndCalleeEdges +=
+ getCachedFPI(*Callee).DirectCallsToDefinedFunctions;
+ }
+ EdgeCount += (NewCallerAndCalleeEdges - Advice.CallerAndCalleeEdges);
+ }
if (CurrentIRSize > SizeIncreaseThreshold * InitialIRSize)
ForceStop = true;
- // We can delta-update module-wide features. We know the inlining only changed
- // the caller, and maybe the callee (by deleting the latter).
- // Nodes are simple to update.
- // For edges, we 'forget' the edges that the caller and callee used to have
- // before inlining, and add back what they currently have together.
- int64_t NewCallerAndCalleeEdges =
- getCachedFPI(*Caller).DirectCallsToDefinedFunctions;
-
- // A dead function's node is not actually removed from the call graph until
- // the end of the call graph walk, but the node no longer belongs to any valid
- // SCC.
- if (CalleeWasDeleted) {
- --NodeCount;
- NodesInLastSCC.erase(CG.lookup(*Callee));
- DeadFunctions.insert(Callee);
- } else {
- NewCallerAndCalleeEdges +=
- getCachedFPI(*Callee).DirectCallsToDefinedFunctions;
- }
- EdgeCount += (NewCallerAndCalleeEdges - Advice.CallerAndCalleeEdges);
assert(CurrentIRSize >= 0 && EdgeCount >= 0 && NodeCount >= 0);
}
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index c4eb838..6f7dd79 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -15474,11 +15474,26 @@ void ScalarEvolution::LoopGuards::collectFromPHI(
}
// Return a new SCEV that modifies \p Expr to the closest number divides by
+// \p Divisor and less or equal than Expr. For now, only handle constant
+// Expr.
+static const SCEV *getPreviousSCEVDivisibleByDivisor(const SCEV *Expr,
+ const APInt &DivisorVal,
+ ScalarEvolution &SE) {
+ const APInt *ExprVal;
+ if (!match(Expr, m_scev_APInt(ExprVal)) || ExprVal->isNegative() ||
+ DivisorVal.isNonPositive())
+ return Expr;
+ APInt Rem = ExprVal->urem(DivisorVal);
+ // return the SCEV: Expr - Expr % Divisor
+ return SE.getConstant(*ExprVal - Rem);
+}
+
+// Return a new SCEV that modifies \p Expr to the closest number divides by
// \p Divisor and greater or equal than Expr. For now, only handle constant
// Expr.
-static const SCEV *getNextSCEVDividesByDivisor(const SCEV *Expr,
- const APInt &DivisorVal,
- ScalarEvolution &SE) {
+static const SCEV *getNextSCEVDivisibleByDivisor(const SCEV *Expr,
+ const APInt &DivisorVal,
+ ScalarEvolution &SE) {
const APInt *ExprVal;
if (!match(Expr, m_scev_APInt(ExprVal)) || ExprVal->isNegative() ||
DivisorVal.isNonPositive())
@@ -15557,20 +15572,6 @@ void ScalarEvolution::LoopGuards::collectFromBlock(
match(LHS, m_scev_APInt(C)) && C->isNonNegative();
};
- // Return a new SCEV that modifies \p Expr to the closest number divides by
- // \p Divisor and less or equal than Expr. For now, only handle constant
- // Expr.
- auto GetPreviousSCEVDividesByDivisor = [&](const SCEV *Expr,
- const APInt &DivisorVal) {
- const APInt *ExprVal;
- if (!match(Expr, m_scev_APInt(ExprVal)) || ExprVal->isNegative() ||
- DivisorVal.isNonPositive())
- return Expr;
- APInt Rem = ExprVal->urem(DivisorVal);
- // return the SCEV: Expr - Expr % Divisor
- return SE.getConstant(*ExprVal - Rem);
- };
-
// Apply divisibilty by \p Divisor on MinMaxExpr with constant values,
// recursively. This is done by aligning up/down the constant value to the
// Divisor.
@@ -15592,8 +15593,9 @@ void ScalarEvolution::LoopGuards::collectFromBlock(
assert(SE.isKnownNonNegative(MinMaxLHS) &&
"Expected non-negative operand!");
auto *DivisibleExpr =
- IsMin ? GetPreviousSCEVDividesByDivisor(MinMaxLHS, DivisorVal)
- : getNextSCEVDividesByDivisor(MinMaxLHS, DivisorVal, SE);
+ IsMin
+ ? getPreviousSCEVDivisibleByDivisor(MinMaxLHS, DivisorVal, SE)
+ : getNextSCEVDivisibleByDivisor(MinMaxLHS, DivisorVal, SE);
SmallVector<const SCEV *> Ops = {
ApplyDivisibiltyOnMinMaxExpr(MinMaxRHS, Divisor), DivisibleExpr};
return SE.getMinMaxExpr(SCTy, Ops);
@@ -15670,21 +15672,21 @@ void ScalarEvolution::LoopGuards::collectFromBlock(
[[fallthrough]];
case CmpInst::ICMP_SLT: {
RHS = SE.getMinusSCEV(RHS, One);
- RHS = GetPreviousSCEVDividesByDivisor(RHS, DividesBy);
+ RHS = getPreviousSCEVDivisibleByDivisor(RHS, DividesBy, SE);
break;
}
case CmpInst::ICMP_UGT:
case CmpInst::ICMP_SGT:
RHS = SE.getAddExpr(RHS, One);
- RHS = getNextSCEVDividesByDivisor(RHS, DividesBy, SE);
+ RHS = getNextSCEVDivisibleByDivisor(RHS, DividesBy, SE);
break;
case CmpInst::ICMP_ULE:
case CmpInst::ICMP_SLE:
- RHS = GetPreviousSCEVDividesByDivisor(RHS, DividesBy);
+ RHS = getPreviousSCEVDivisibleByDivisor(RHS, DividesBy, SE);
break;
case CmpInst::ICMP_UGE:
case CmpInst::ICMP_SGE:
- RHS = getNextSCEVDividesByDivisor(RHS, DividesBy, SE);
+ RHS = getNextSCEVDivisibleByDivisor(RHS, DividesBy, SE);
break;
default:
break;
@@ -15738,7 +15740,7 @@ void ScalarEvolution::LoopGuards::collectFromBlock(
case CmpInst::ICMP_NE:
if (match(RHS, m_scev_Zero())) {
const SCEV *OneAlignedUp =
- getNextSCEVDividesByDivisor(One, DividesBy, SE);
+ getNextSCEVDivisibleByDivisor(One, DividesBy, SE);
To = SE.getUMaxExpr(FromRewritten, OneAlignedUp);
} else {
// LHS != RHS can be rewritten as (LHS - RHS) = UMax(1, LHS - RHS),
@@ -15965,7 +15967,7 @@ const SCEV *ScalarEvolution::LoopGuards::rewrite(const SCEV *Expr) const {
if (LHS > RHS)
std::swap(LHS, RHS);
if (NotEqual.contains({LHS, RHS})) {
- const SCEV *OneAlignedUp = getNextSCEVDividesByDivisor(
+ const SCEV *OneAlignedUp = getNextSCEVDivisibleByDivisor(
SE.getOne(S->getType()), SE.getConstantMultiple(S), SE);
return SE.getUMaxExpr(OneAlignedUp, S);
}
diff --git a/llvm/lib/CAS/CMakeLists.txt b/llvm/lib/CAS/CMakeLists.txt
index bca39b6..a2f8c49 100644
--- a/llvm/lib/CAS/CMakeLists.txt
+++ b/llvm/lib/CAS/CMakeLists.txt
@@ -8,6 +8,8 @@ add_llvm_component_library(LLVMCAS
ObjectStore.cpp
OnDiskCommon.cpp
OnDiskDataAllocator.cpp
+ OnDiskGraphDB.cpp
+ OnDiskKeyValueDB.cpp
OnDiskTrieRawHashMap.cpp
ADDITIONAL_HEADER_DIRS
diff --git a/llvm/lib/CAS/OnDiskCommon.cpp b/llvm/lib/CAS/OnDiskCommon.cpp
index 25aa06b..281bde9 100644
--- a/llvm/lib/CAS/OnDiskCommon.cpp
+++ b/llvm/lib/CAS/OnDiskCommon.cpp
@@ -7,9 +7,10 @@
//===----------------------------------------------------------------------===//
#include "OnDiskCommon.h"
-#include "llvm/Config/config.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Process.h"
+#include <mutex>
#include <thread>
#if __has_include(<sys/file.h>)
@@ -25,8 +26,44 @@
#include <fcntl.h>
#endif
+#if __has_include(<sys/mount.h>)
+#include <sys/mount.h> // statfs
+#endif
+
using namespace llvm;
+static uint64_t OnDiskCASMaxMappingSize = 0;
+
+Expected<std::optional<uint64_t>> cas::ondisk::getOverriddenMaxMappingSize() {
+ static std::once_flag Flag;
+ Error Err = Error::success();
+ std::call_once(Flag, [&Err] {
+ ErrorAsOutParameter EAO(&Err);
+ constexpr const char *EnvVar = "LLVM_CAS_MAX_MAPPING_SIZE";
+ auto Value = sys::Process::GetEnv(EnvVar);
+ if (!Value)
+ return;
+
+ uint64_t Size;
+ if (StringRef(*Value).getAsInteger(/*auto*/ 0, Size))
+ Err = createStringError(inconvertibleErrorCode(),
+ "invalid value for %s: expected integer", EnvVar);
+ OnDiskCASMaxMappingSize = Size;
+ });
+
+ if (Err)
+ return std::move(Err);
+
+ if (OnDiskCASMaxMappingSize == 0)
+ return std::nullopt;
+
+ return OnDiskCASMaxMappingSize;
+}
+
+void cas::ondisk::setMaxMappingSize(uint64_t Size) {
+ OnDiskCASMaxMappingSize = Size;
+}
+
std::error_code cas::ondisk::lockFileThreadSafe(int FD,
sys::fs::LockKind Kind) {
#if HAVE_FLOCK
@@ -125,3 +162,20 @@ Expected<size_t> cas::ondisk::preallocateFileTail(int FD, size_t CurrentSize,
return NewSize; // Pretend it worked.
#endif
}
+
+bool cas::ondisk::useSmallMappingSize(const Twine &P) {
+ // Add exceptions to use small database file here.
+#if defined(__APPLE__) && __has_include(<sys/mount.h>)
+ // macOS tmpfs does not support sparse tails.
+ SmallString<128> PathStorage;
+ StringRef Path = P.toNullTerminatedStringRef(PathStorage);
+ struct statfs StatFS;
+ if (statfs(Path.data(), &StatFS) != 0)
+ return false;
+
+ if (strcmp(StatFS.f_fstypename, "tmpfs") == 0)
+ return true;
+#endif
+ // Default to use regular database file.
+ return false;
+}
diff --git a/llvm/lib/CAS/OnDiskCommon.h b/llvm/lib/CAS/OnDiskCommon.h
index 8b79ffe..ac00662 100644
--- a/llvm/lib/CAS/OnDiskCommon.h
+++ b/llvm/lib/CAS/OnDiskCommon.h
@@ -12,9 +12,31 @@
#include "llvm/Support/Error.h"
#include "llvm/Support/FileSystem.h"
#include <chrono>
+#include <optional>
namespace llvm::cas::ondisk {
+/// The version for all the ondisk database files. It needs to be bumped when
+/// compatibility breaking changes are introduced.
+constexpr StringLiteral CASFormatVersion = "v1";
+
+/// Retrieves an overridden maximum mapping size for CAS files, if any,
+/// speicified by LLVM_CAS_MAX_MAPPING_SIZE in the environment or set by
+/// `setMaxMappingSize()`. If the value from environment is unreadable, returns
+/// an error.
+Expected<std::optional<uint64_t>> getOverriddenMaxMappingSize();
+
+/// Set MaxMappingSize for ondisk CAS. This function is not thread-safe and
+/// should be set before creaing any ondisk CAS and does not affect CAS already
+/// created. Set value 0 to use default size.
+void setMaxMappingSize(uint64_t Size);
+
+/// Whether to use a small file mapping for ondisk databases created in \p Path.
+///
+/// For some file system that doesn't support sparse file, use a smaller file
+/// mapping to avoid consuming too much disk space on creation.
+bool useSmallMappingSize(const Twine &Path);
+
/// Thread-safe alternative to \c sys::fs::lockFile. This does not support all
/// the platforms that \c sys::fs::lockFile does, so keep it in the CAS library
/// for now.
diff --git a/llvm/lib/CAS/OnDiskDataAllocator.cpp b/llvm/lib/CAS/OnDiskDataAllocator.cpp
index 13bbd66..9c68bc4 100644
--- a/llvm/lib/CAS/OnDiskDataAllocator.cpp
+++ b/llvm/lib/CAS/OnDiskDataAllocator.cpp
@@ -185,7 +185,7 @@ Expected<ArrayRef<char>> OnDiskDataAllocator::get(FileOffset Offset,
return ArrayRef<char>{Impl->File.getRegion().data() + Offset.get(), Size};
}
-MutableArrayRef<uint8_t> OnDiskDataAllocator::getUserHeader() {
+MutableArrayRef<uint8_t> OnDiskDataAllocator::getUserHeader() const {
return Impl->Store.getUserHeader();
}
@@ -221,7 +221,9 @@ Expected<ArrayRef<char>> OnDiskDataAllocator::get(FileOffset Offset,
"OnDiskDataAllocator is not supported");
}
-MutableArrayRef<uint8_t> OnDiskDataAllocator::getUserHeader() { return {}; }
+MutableArrayRef<uint8_t> OnDiskDataAllocator::getUserHeader() const {
+ return {};
+}
size_t OnDiskDataAllocator::size() const { return 0; }
size_t OnDiskDataAllocator::capacity() const { return 0; }
diff --git a/llvm/lib/CAS/OnDiskGraphDB.cpp b/llvm/lib/CAS/OnDiskGraphDB.cpp
new file mode 100644
index 0000000..72bb98c
--- /dev/null
+++ b/llvm/lib/CAS/OnDiskGraphDB.cpp
@@ -0,0 +1,1755 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This file implements OnDiskGraphDB, an on-disk CAS nodes database,
+/// independent of a particular hashing algorithm. It only needs to be
+/// configured for the hash size and controls the schema of the storage.
+///
+/// OnDiskGraphDB defines:
+///
+/// - How the data is stored inside database, either as a standalone file, or
+/// allocated inside a datapool.
+/// - How references to other objects inside the same database is stored. They
+/// are stored as internal references, instead of full hash value to save
+/// space.
+/// - How to chain databases together and import objects from upstream
+/// databases.
+///
+/// Here's a top-level description of the current layout:
+///
+/// - db/index.<version>: a file for the "index" table, named by \a
+/// IndexTableName and managed by \a TrieRawHashMap. The contents are 8B
+/// that are accessed atomically, describing the object kind and where/how
+/// it's stored (including an optional file offset). See \a TrieRecord for
+/// more details.
+/// - db/data.<version>: a file for the "data" table, named by \a
+/// DataPoolTableName and managed by \a DataStore. New objects within
+/// TrieRecord::MaxEmbeddedSize are inserted here as \a
+/// TrieRecord::StorageKind::DataPool.
+/// - db/obj.<offset>.<version>: a file storing an object outside the main
+/// "data" table, named by its offset into the "index" table, with the
+/// format of \a TrieRecord::StorageKind::Standalone.
+/// - db/leaf.<offset>.<version>: a file storing a leaf node outside the
+/// main "data" table, named by its offset into the "index" table, with
+/// the format of \a TrieRecord::StorageKind::StandaloneLeaf.
+/// - db/leaf+0.<offset>.<version>: a file storing a null-terminated leaf object
+/// outside the main "data" table, named by its offset into the "index" table,
+/// with the format of \a TrieRecord::StorageKind::StandaloneLeaf0.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CAS/OnDiskGraphDB.h"
+#include "OnDiskCommon.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/ScopeExit.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/CAS/OnDiskDataAllocator.h"
+#include "llvm/CAS/OnDiskTrieRawHashMap.h"
+#include "llvm/Support/Alignment.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Errc.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/Process.h"
+#include <atomic>
+#include <mutex>
+#include <optional>
+
+#define DEBUG_TYPE "on-disk-cas"
+
+using namespace llvm;
+using namespace llvm::cas;
+using namespace llvm::cas::ondisk;
+
+static constexpr StringLiteral IndexTableName = "llvm.cas.index";
+static constexpr StringLiteral DataPoolTableName = "llvm.cas.data";
+
+static constexpr StringLiteral IndexFilePrefix = "index.";
+static constexpr StringLiteral DataPoolFilePrefix = "data.";
+
+static constexpr StringLiteral FilePrefixObject = "obj.";
+static constexpr StringLiteral FilePrefixLeaf = "leaf.";
+static constexpr StringLiteral FilePrefixLeaf0 = "leaf+0.";
+
+static Error createCorruptObjectError(Expected<ArrayRef<uint8_t>> ID) {
+ if (!ID)
+ return ID.takeError();
+
+ return createStringError(llvm::errc::invalid_argument,
+ "corrupt object '" + toHex(*ID) + "'");
+}
+
+namespace {
+
+/// Trie record data: 8 bytes, atomic<uint64_t>
+/// - 1-byte: StorageKind
+/// - 7-bytes: DataStoreOffset (offset into referenced file)
+class TrieRecord {
+public:
+ enum class StorageKind : uint8_t {
+ /// Unknown object.
+ Unknown = 0,
+
+ /// data.vX: main pool, full DataStore record.
+ DataPool = 1,
+
+ /// obj.<TrieRecordOffset>.vX: standalone, with a full DataStore record.
+ Standalone = 10,
+
+ /// leaf.<TrieRecordOffset>.vX: standalone, just the data. File contents
+ /// exactly the data content and file size matches the data size. No refs.
+ StandaloneLeaf = 11,
+
+ /// leaf+0.<TrieRecordOffset>.vX: standalone, just the data plus an
+ /// extra null character ('\0'). File size is 1 bigger than the data size.
+ /// No refs.
+ StandaloneLeaf0 = 12,
+ };
+
+ static StringRef getStandaloneFilePrefix(StorageKind SK) {
+ switch (SK) {
+ default:
+ llvm_unreachable("Expected standalone storage kind");
+ case TrieRecord::StorageKind::Standalone:
+ return FilePrefixObject;
+ case TrieRecord::StorageKind::StandaloneLeaf:
+ return FilePrefixLeaf;
+ case TrieRecord::StorageKind::StandaloneLeaf0:
+ return FilePrefixLeaf0;
+ }
+ }
+
+ enum Limits : int64_t {
+ /// Saves files bigger than 64KB standalone instead of embedding them.
+ MaxEmbeddedSize = 64LL * 1024LL - 1,
+ };
+
+ struct Data {
+ StorageKind SK = StorageKind::Unknown;
+ FileOffset Offset;
+ };
+
+ /// Pack StorageKind and Offset from Data into 8 byte TrieRecord.
+ static uint64_t pack(Data D) {
+ assert(D.Offset.get() < (int64_t)(1ULL << 56));
+ uint64_t Packed = uint64_t(D.SK) << 56 | D.Offset.get();
+ assert(D.SK != StorageKind::Unknown || Packed == 0);
+#ifndef NDEBUG
+ Data RoundTrip = unpack(Packed);
+ assert(D.SK == RoundTrip.SK);
+ assert(D.Offset.get() == RoundTrip.Offset.get());
+#endif
+ return Packed;
+ }
+
+ // Unpack TrieRecord into Data.
+ static Data unpack(uint64_t Packed) {
+ Data D;
+ if (!Packed)
+ return D;
+ D.SK = (StorageKind)(Packed >> 56);
+ D.Offset = FileOffset(Packed & (UINT64_MAX >> 8));
+ return D;
+ }
+
+ TrieRecord() : Storage(0) {}
+
+ Data load() const { return unpack(Storage); }
+ bool compare_exchange_strong(Data &Existing, Data New);
+
+private:
+ std::atomic<uint64_t> Storage;
+};
+
+/// DataStore record data: 4B + size? + refs? + data + 0
+/// - 4-bytes: Header
+/// - {0,4,8}-bytes: DataSize (may be packed in Header)
+/// - {0,4,8}-bytes: NumRefs (may be packed in Header)
+/// - NumRefs*{4,8}-bytes: Refs[] (end-ptr is 8-byte aligned)
+/// - <data>
+/// - 1-byte: 0-term
+struct DataRecordHandle {
+ /// NumRefs storage: 4B, 2B, 1B, or 0B (no refs). Or, 8B, for alignment
+ /// convenience to avoid computing padding later.
+ enum class NumRefsFlags : uint8_t {
+ Uses0B = 0U,
+ Uses1B = 1U,
+ Uses2B = 2U,
+ Uses4B = 3U,
+ Uses8B = 4U,
+ Max = Uses8B,
+ };
+
+ /// DataSize storage: 8B, 4B, 2B, or 1B.
+ enum class DataSizeFlags {
+ Uses1B = 0U,
+ Uses2B = 1U,
+ Uses4B = 2U,
+ Uses8B = 3U,
+ Max = Uses8B,
+ };
+
+ /// Kind of ref stored in Refs[]: InternalRef or InternalRef4B.
+ enum class RefKindFlags {
+ InternalRef = 0U,
+ InternalRef4B = 1U,
+ Max = InternalRef4B,
+ };
+
+ enum Counts : int {
+ NumRefsShift = 0,
+ NumRefsBits = 3,
+ DataSizeShift = NumRefsShift + NumRefsBits,
+ DataSizeBits = 2,
+ RefKindShift = DataSizeShift + DataSizeBits,
+ RefKindBits = 1,
+ };
+ static_assert(((UINT32_MAX << NumRefsBits) & (uint32_t)NumRefsFlags::Max) ==
+ 0,
+ "Not enough bits");
+ static_assert(((UINT32_MAX << DataSizeBits) & (uint32_t)DataSizeFlags::Max) ==
+ 0,
+ "Not enough bits");
+ static_assert(((UINT32_MAX << RefKindBits) & (uint32_t)RefKindFlags::Max) ==
+ 0,
+ "Not enough bits");
+
+ /// Layout of the DataRecordHandle and how to decode it.
+ struct LayoutFlags {
+ NumRefsFlags NumRefs;
+ DataSizeFlags DataSize;
+ RefKindFlags RefKind;
+
+ static uint64_t pack(LayoutFlags LF) {
+ unsigned Packed = ((unsigned)LF.NumRefs << NumRefsShift) |
+ ((unsigned)LF.DataSize << DataSizeShift) |
+ ((unsigned)LF.RefKind << RefKindShift);
+#ifndef NDEBUG
+ LayoutFlags RoundTrip = unpack(Packed);
+ assert(LF.NumRefs == RoundTrip.NumRefs);
+ assert(LF.DataSize == RoundTrip.DataSize);
+ assert(LF.RefKind == RoundTrip.RefKind);
+#endif
+ return Packed;
+ }
+ static LayoutFlags unpack(uint64_t Storage) {
+ assert(Storage <= UINT8_MAX && "Expect storage to fit in a byte");
+ LayoutFlags LF;
+ LF.NumRefs =
+ (NumRefsFlags)((Storage >> NumRefsShift) & ((1U << NumRefsBits) - 1));
+ LF.DataSize = (DataSizeFlags)((Storage >> DataSizeShift) &
+ ((1U << DataSizeBits) - 1));
+ LF.RefKind =
+ (RefKindFlags)((Storage >> RefKindShift) & ((1U << RefKindBits) - 1));
+ return LF;
+ }
+ };
+
+ /// Header layout:
+ /// - 1-byte: LayoutFlags
+ /// - 1-byte: 1B size field
+ /// - {0,2}-bytes: 2B size field
+ struct Header {
+ using PackTy = uint32_t;
+ PackTy Packed;
+
+ static constexpr unsigned LayoutFlagsShift =
+ (sizeof(PackTy) - 1) * CHAR_BIT;
+ };
+
+ struct Input {
+ InternalRefArrayRef Refs;
+ ArrayRef<char> Data;
+ };
+
+ LayoutFlags getLayoutFlags() const {
+ return LayoutFlags::unpack(H->Packed >> Header::LayoutFlagsShift);
+ }
+
+ uint64_t getDataSize() const;
+ void skipDataSize(LayoutFlags LF, int64_t &RelOffset) const;
+ uint32_t getNumRefs() const;
+ void skipNumRefs(LayoutFlags LF, int64_t &RelOffset) const;
+ int64_t getRefsRelOffset() const;
+ int64_t getDataRelOffset() const;
+
+ static uint64_t getTotalSize(uint64_t DataRelOffset, uint64_t DataSize) {
+ return DataRelOffset + DataSize + 1;
+ }
+ uint64_t getTotalSize() const {
+ return getDataRelOffset() + getDataSize() + 1;
+ }
+
+ /// Describe the layout of data stored and how to decode from
+ /// DataRecordHandle.
+ struct Layout {
+ explicit Layout(const Input &I);
+
+ LayoutFlags Flags;
+ uint64_t DataSize = 0;
+ uint32_t NumRefs = 0;
+ int64_t RefsRelOffset = 0;
+ int64_t DataRelOffset = 0;
+ uint64_t getTotalSize() const {
+ return DataRecordHandle::getTotalSize(DataRelOffset, DataSize);
+ }
+ };
+
+ InternalRefArrayRef getRefs() const {
+ assert(H && "Expected valid handle");
+ auto *BeginByte = reinterpret_cast<const char *>(H) + getRefsRelOffset();
+ size_t Size = getNumRefs();
+ if (!Size)
+ return InternalRefArrayRef();
+ if (getLayoutFlags().RefKind == RefKindFlags::InternalRef4B)
+ return ArrayRef(reinterpret_cast<const InternalRef4B *>(BeginByte), Size);
+ return ArrayRef(reinterpret_cast<const InternalRef *>(BeginByte), Size);
+ }
+
+ ArrayRef<char> getData() const {
+ assert(H && "Expected valid handle");
+ return ArrayRef(reinterpret_cast<const char *>(H) + getDataRelOffset(),
+ getDataSize());
+ }
+
+ static DataRecordHandle create(function_ref<char *(size_t Size)> Alloc,
+ const Input &I);
+ static Expected<DataRecordHandle>
+ createWithError(function_ref<Expected<char *>(size_t Size)> Alloc,
+ const Input &I);
+ static DataRecordHandle construct(char *Mem, const Input &I);
+
+ static DataRecordHandle get(const char *Mem) {
+ return DataRecordHandle(
+ *reinterpret_cast<const DataRecordHandle::Header *>(Mem));
+ }
+ static Expected<DataRecordHandle>
+ getFromDataPool(const OnDiskDataAllocator &Pool, FileOffset Offset);
+
+ explicit operator bool() const { return H; }
+ const Header &getHeader() const { return *H; }
+
+ DataRecordHandle() = default;
+ explicit DataRecordHandle(const Header &H) : H(&H) {}
+
+private:
+ static DataRecordHandle constructImpl(char *Mem, const Input &I,
+ const Layout &L);
+ const Header *H = nullptr;
+};
+
+/// Proxy for any on-disk object or raw data.
+struct OnDiskContent {
+ std::optional<DataRecordHandle> Record;
+ std::optional<ArrayRef<char>> Bytes;
+};
+
+/// Data loaded inside the memory from standalone file.
+class StandaloneDataInMemory {
+public:
+ OnDiskContent getContent() const;
+
+ StandaloneDataInMemory(std::unique_ptr<sys::fs::mapped_file_region> Region,
+ TrieRecord::StorageKind SK)
+ : Region(std::move(Region)), SK(SK) {
+#ifndef NDEBUG
+ bool IsStandalone = false;
+ switch (SK) {
+ case TrieRecord::StorageKind::Standalone:
+ case TrieRecord::StorageKind::StandaloneLeaf:
+ case TrieRecord::StorageKind::StandaloneLeaf0:
+ IsStandalone = true;
+ break;
+ default:
+ break;
+ }
+ assert(IsStandalone);
+#endif
+ }
+
+private:
+ std::unique_ptr<sys::fs::mapped_file_region> Region;
+ TrieRecord::StorageKind SK;
+};
+
+/// Container to lookup loaded standalone objects.
+template <size_t NumShards> class StandaloneDataMap {
+ static_assert(isPowerOf2_64(NumShards), "Expected power of 2");
+
+public:
+ uintptr_t insert(ArrayRef<uint8_t> Hash, TrieRecord::StorageKind SK,
+ std::unique_ptr<sys::fs::mapped_file_region> Region);
+
+ const StandaloneDataInMemory *lookup(ArrayRef<uint8_t> Hash) const;
+ bool count(ArrayRef<uint8_t> Hash) const { return bool(lookup(Hash)); }
+
+private:
+ struct Shard {
+ /// Needs to store a std::unique_ptr for a stable address identity.
+ DenseMap<const uint8_t *, std::unique_ptr<StandaloneDataInMemory>> Map;
+ mutable std::mutex Mutex;
+ };
+ Shard &getShard(ArrayRef<uint8_t> Hash) {
+ return const_cast<Shard &>(
+ const_cast<const StandaloneDataMap *>(this)->getShard(Hash));
+ }
+ const Shard &getShard(ArrayRef<uint8_t> Hash) const {
+ static_assert(NumShards <= 256, "Expected only 8 bits of shard");
+ return Shards[Hash[0] % NumShards];
+ }
+
+ Shard Shards[NumShards];
+};
+
+using StandaloneDataMapTy = StandaloneDataMap<16>;
+
+/// A vector of internal node references.
+class InternalRefVector {
+public:
+ void push_back(InternalRef Ref) {
+ if (NeedsFull)
+ return FullRefs.push_back(Ref);
+ if (std::optional<InternalRef4B> Small = InternalRef4B::tryToShrink(Ref))
+ return SmallRefs.push_back(*Small);
+ NeedsFull = true;
+ assert(FullRefs.empty());
+ FullRefs.reserve(SmallRefs.size() + 1);
+ for (InternalRef4B Small : SmallRefs)
+ FullRefs.push_back(Small);
+ FullRefs.push_back(Ref);
+ SmallRefs.clear();
+ }
+
+ operator InternalRefArrayRef() const {
+ assert(SmallRefs.empty() || FullRefs.empty());
+ return NeedsFull ? InternalRefArrayRef(FullRefs)
+ : InternalRefArrayRef(SmallRefs);
+ }
+
+private:
+ bool NeedsFull = false;
+ SmallVector<InternalRef4B> SmallRefs;
+ SmallVector<InternalRef> FullRefs;
+};
+
+} // namespace
+
+Expected<DataRecordHandle> DataRecordHandle::createWithError(
+ function_ref<Expected<char *>(size_t Size)> Alloc, const Input &I) {
+ Layout L(I);
+ if (Expected<char *> Mem = Alloc(L.getTotalSize()))
+ return constructImpl(*Mem, I, L);
+ else
+ return Mem.takeError();
+}
+
+DataRecordHandle
+DataRecordHandle::create(function_ref<char *(size_t Size)> Alloc,
+ const Input &I) {
+ Layout L(I);
+ return constructImpl(Alloc(L.getTotalSize()), I, L);
+}
+
+ObjectHandle ObjectHandle::fromFileOffset(FileOffset Offset) {
+ // Store the file offset as it is.
+ assert(!(Offset.get() & 0x1));
+ return ObjectHandle(Offset.get());
+}
+
+ObjectHandle ObjectHandle::fromMemory(uintptr_t Ptr) {
+ // Store the pointer from memory with lowest bit set.
+ assert(!(Ptr & 0x1));
+ return ObjectHandle(Ptr | 1);
+}
+
+/// Proxy for an on-disk index record.
+struct OnDiskGraphDB::IndexProxy {
+ FileOffset Offset;
+ ArrayRef<uint8_t> Hash;
+ TrieRecord &Ref;
+};
+
+template <size_t N>
+uintptr_t StandaloneDataMap<N>::insert(
+ ArrayRef<uint8_t> Hash, TrieRecord::StorageKind SK,
+ std::unique_ptr<sys::fs::mapped_file_region> Region) {
+ auto &S = getShard(Hash);
+ std::lock_guard<std::mutex> Lock(S.Mutex);
+ auto &V = S.Map[Hash.data()];
+ if (!V)
+ V = std::make_unique<StandaloneDataInMemory>(std::move(Region), SK);
+ return reinterpret_cast<uintptr_t>(V.get());
+}
+
+template <size_t N>
+const StandaloneDataInMemory *
+StandaloneDataMap<N>::lookup(ArrayRef<uint8_t> Hash) const {
+ auto &S = getShard(Hash);
+ std::lock_guard<std::mutex> Lock(S.Mutex);
+ auto I = S.Map.find(Hash.data());
+ if (I == S.Map.end())
+ return nullptr;
+ return &*I->second;
+}
+
+namespace {
+
+/// Copy of \a sys::fs::TempFile that skips RemoveOnSignal, which is too
+/// expensive to register/unregister at this rate.
+///
+/// FIXME: Add a TempFileManager that maintains a thread-safe list of open temp
+/// files and has a signal handler registerd that removes them all.
+class TempFile {
+ bool Done = false;
+ TempFile(StringRef Name, int FD) : TmpName(std::string(Name)), FD(FD) {}
+
+public:
+ /// This creates a temporary file with createUniqueFile.
+ static Expected<TempFile> create(const Twine &Model);
+ TempFile(TempFile &&Other) { *this = std::move(Other); }
+ TempFile &operator=(TempFile &&Other) {
+ TmpName = std::move(Other.TmpName);
+ FD = Other.FD;
+ Other.Done = true;
+ Other.FD = -1;
+ return *this;
+ }
+
+ // Name of the temporary file.
+ std::string TmpName;
+
+ // The open file descriptor.
+ int FD = -1;
+
+ // Keep this with the given name.
+ Error keep(const Twine &Name);
+ Error discard();
+
+ // This checks that keep or delete was called.
+ ~TempFile() { consumeError(discard()); }
+};
+
+class MappedTempFile {
+public:
+ char *data() const { return Map.data(); }
+ size_t size() const { return Map.size(); }
+
+ Error discard() {
+ assert(Map && "Map already destroyed");
+ Map.unmap();
+ return Temp.discard();
+ }
+
+ Error keep(const Twine &Name) {
+ assert(Map && "Map already destroyed");
+ Map.unmap();
+ return Temp.keep(Name);
+ }
+
+ MappedTempFile(TempFile Temp, sys::fs::mapped_file_region Map)
+ : Temp(std::move(Temp)), Map(std::move(Map)) {}
+
+private:
+ TempFile Temp;
+ sys::fs::mapped_file_region Map;
+};
+} // namespace
+
+Error TempFile::discard() {
+ Done = true;
+ if (FD != -1) {
+ sys::fs::file_t File = sys::fs::convertFDToNativeFile(FD);
+ if (std::error_code EC = sys::fs::closeFile(File))
+ return errorCodeToError(EC);
+ }
+ FD = -1;
+
+ // Always try to close and remove.
+ std::error_code RemoveEC;
+ if (!TmpName.empty()) {
+ std::error_code EC = sys::fs::remove(TmpName);
+ if (EC)
+ return errorCodeToError(EC);
+ }
+ TmpName = "";
+
+ return Error::success();
+}
+
+Error TempFile::keep(const Twine &Name) {
+ assert(!Done);
+ Done = true;
+ // Always try to close and rename.
+ std::error_code RenameEC = sys::fs::rename(TmpName, Name);
+
+ if (!RenameEC)
+ TmpName = "";
+
+ sys::fs::file_t File = sys::fs::convertFDToNativeFile(FD);
+ if (std::error_code EC = sys::fs::closeFile(File))
+ return errorCodeToError(EC);
+ FD = -1;
+
+ return errorCodeToError(RenameEC);
+}
+
+Expected<TempFile> TempFile::create(const Twine &Model) {
+ int FD;
+ SmallString<128> ResultPath;
+ if (std::error_code EC = sys::fs::createUniqueFile(Model, FD, ResultPath))
+ return errorCodeToError(EC);
+
+ TempFile Ret(ResultPath, FD);
+ return std::move(Ret);
+}
+
+bool TrieRecord::compare_exchange_strong(Data &Existing, Data New) {
+ uint64_t ExistingPacked = pack(Existing);
+ uint64_t NewPacked = pack(New);
+ if (Storage.compare_exchange_strong(ExistingPacked, NewPacked))
+ return true;
+ Existing = unpack(ExistingPacked);
+ return false;
+}
+
+DataRecordHandle DataRecordHandle::construct(char *Mem, const Input &I) {
+ return constructImpl(Mem, I, Layout(I));
+}
+
+Expected<DataRecordHandle>
+DataRecordHandle::getFromDataPool(const OnDiskDataAllocator &Pool,
+ FileOffset Offset) {
+ auto HeaderData = Pool.get(Offset, sizeof(DataRecordHandle::Header));
+ if (!HeaderData)
+ return HeaderData.takeError();
+
+ auto Record = DataRecordHandle::get(HeaderData->data());
+ if (Record.getTotalSize() + Offset.get() > Pool.size())
+ return createStringError(
+ make_error_code(std::errc::illegal_byte_sequence),
+ "data record span passed the end of the data pool");
+
+ return Record;
+}
+
+DataRecordHandle DataRecordHandle::constructImpl(char *Mem, const Input &I,
+ const Layout &L) {
+ char *Next = Mem + sizeof(Header);
+
+ // Fill in Packed and set other data, then come back to construct the header.
+ Header::PackTy Packed = 0;
+ Packed |= LayoutFlags::pack(L.Flags) << Header::LayoutFlagsShift;
+
+ // Construct DataSize.
+ switch (L.Flags.DataSize) {
+ case DataSizeFlags::Uses1B:
+ assert(I.Data.size() <= UINT8_MAX);
+ Packed |= (Header::PackTy)I.Data.size()
+ << ((sizeof(Packed) - 2) * CHAR_BIT);
+ break;
+ case DataSizeFlags::Uses2B:
+ assert(I.Data.size() <= UINT16_MAX);
+ Packed |= (Header::PackTy)I.Data.size()
+ << ((sizeof(Packed) - 4) * CHAR_BIT);
+ break;
+ case DataSizeFlags::Uses4B:
+ support::endian::write32le(Next, I.Data.size());
+ Next += 4;
+ break;
+ case DataSizeFlags::Uses8B:
+ support::endian::write64le(Next, I.Data.size());
+ Next += 8;
+ break;
+ }
+
+ // Construct NumRefs.
+ //
+ // NOTE: May be writing NumRefs even if there are zero refs in order to fix
+ // alignment.
+ switch (L.Flags.NumRefs) {
+ case NumRefsFlags::Uses0B:
+ break;
+ case NumRefsFlags::Uses1B:
+ assert(I.Refs.size() <= UINT8_MAX);
+ Packed |= (Header::PackTy)I.Refs.size()
+ << ((sizeof(Packed) - 2) * CHAR_BIT);
+ break;
+ case NumRefsFlags::Uses2B:
+ assert(I.Refs.size() <= UINT16_MAX);
+ Packed |= (Header::PackTy)I.Refs.size()
+ << ((sizeof(Packed) - 4) * CHAR_BIT);
+ break;
+ case NumRefsFlags::Uses4B:
+ support::endian::write32le(Next, I.Refs.size());
+ Next += 4;
+ break;
+ case NumRefsFlags::Uses8B:
+ support::endian::write64le(Next, I.Refs.size());
+ Next += 8;
+ break;
+ }
+
+ // Construct Refs[].
+ if (!I.Refs.empty()) {
+ assert((L.Flags.RefKind == RefKindFlags::InternalRef4B) == I.Refs.is4B());
+ ArrayRef<uint8_t> RefsBuffer = I.Refs.getBuffer();
+ llvm::copy(RefsBuffer, Next);
+ Next += RefsBuffer.size();
+ }
+
+ // Construct Data and the trailing null.
+ assert(isAddrAligned(Align(8), Next));
+ llvm::copy(I.Data, Next);
+ Next[I.Data.size()] = 0;
+
+ // Construct the header itself and return.
+ Header *H = new (Mem) Header{Packed};
+ DataRecordHandle Record(*H);
+ assert(Record.getData() == I.Data);
+ assert(Record.getNumRefs() == I.Refs.size());
+ assert(Record.getRefs() == I.Refs);
+ assert(Record.getLayoutFlags().DataSize == L.Flags.DataSize);
+ assert(Record.getLayoutFlags().NumRefs == L.Flags.NumRefs);
+ assert(Record.getLayoutFlags().RefKind == L.Flags.RefKind);
+ return Record;
+}
+
+DataRecordHandle::Layout::Layout(const Input &I) {
+ // Start initial relative offsets right after the Header.
+ uint64_t RelOffset = sizeof(Header);
+
+ // Initialize the easy stuff.
+ DataSize = I.Data.size();
+ NumRefs = I.Refs.size();
+
+ // Check refs size.
+ Flags.RefKind =
+ I.Refs.is4B() ? RefKindFlags::InternalRef4B : RefKindFlags::InternalRef;
+
+ // Find the smallest slot available for DataSize.
+ bool Has1B = true;
+ bool Has2B = true;
+ if (DataSize <= UINT8_MAX && Has1B) {
+ Flags.DataSize = DataSizeFlags::Uses1B;
+ Has1B = false;
+ } else if (DataSize <= UINT16_MAX && Has2B) {
+ Flags.DataSize = DataSizeFlags::Uses2B;
+ Has2B = false;
+ } else if (DataSize <= UINT32_MAX) {
+ Flags.DataSize = DataSizeFlags::Uses4B;
+ RelOffset += 4;
+ } else {
+ Flags.DataSize = DataSizeFlags::Uses8B;
+ RelOffset += 8;
+ }
+
+ // Find the smallest slot available for NumRefs. Never sets NumRefs8B here.
+ if (!NumRefs) {
+ Flags.NumRefs = NumRefsFlags::Uses0B;
+ } else if (NumRefs <= UINT8_MAX && Has1B) {
+ Flags.NumRefs = NumRefsFlags::Uses1B;
+ Has1B = false;
+ } else if (NumRefs <= UINT16_MAX && Has2B) {
+ Flags.NumRefs = NumRefsFlags::Uses2B;
+ Has2B = false;
+ } else {
+ Flags.NumRefs = NumRefsFlags::Uses4B;
+ RelOffset += 4;
+ }
+
+ // Helper to "upgrade" either DataSize or NumRefs by 4B to avoid complicated
+ // padding rules when reading and writing. This also bumps RelOffset.
+ //
+ // The value for NumRefs is strictly limited to UINT32_MAX, but it can be
+ // stored as 8B. This means we can *always* find a size to grow.
+ //
+ // NOTE: Only call this once.
+ auto GrowSizeFieldsBy4B = [&]() {
+ assert(isAligned(Align(4), RelOffset));
+ RelOffset += 4;
+
+ assert(Flags.NumRefs != NumRefsFlags::Uses8B &&
+ "Expected to be able to grow NumRefs8B");
+
+ // First try to grow DataSize. NumRefs will not (yet) be 8B, and if
+ // DataSize is upgraded to 8B it'll already be aligned.
+ //
+ // Failing that, grow NumRefs.
+ if (Flags.DataSize < DataSizeFlags::Uses4B)
+ Flags.DataSize = DataSizeFlags::Uses4B; // DataSize: Packed => 4B.
+ else if (Flags.DataSize < DataSizeFlags::Uses8B)
+ Flags.DataSize = DataSizeFlags::Uses8B; // DataSize: 4B => 8B.
+ else if (Flags.NumRefs < NumRefsFlags::Uses4B)
+ Flags.NumRefs = NumRefsFlags::Uses4B; // NumRefs: Packed => 4B.
+ else
+ Flags.NumRefs = NumRefsFlags::Uses8B; // NumRefs: 4B => 8B.
+ };
+
+ assert(isAligned(Align(4), RelOffset));
+ if (Flags.RefKind == RefKindFlags::InternalRef) {
+ // List of 8B refs should be 8B-aligned. Grow one of the sizes to get this
+ // without padding.
+ if (!isAligned(Align(8), RelOffset))
+ GrowSizeFieldsBy4B();
+
+ assert(isAligned(Align(8), RelOffset));
+ RefsRelOffset = RelOffset;
+ RelOffset += 8 * NumRefs;
+ } else {
+ // The array of 4B refs doesn't need 8B alignment, but the data will need
+ // to be 8B-aligned. Detect this now, and, if necessary, shift everything
+ // by 4B by growing one of the sizes.
+ // If we remove the need for 8B-alignment for data there is <1% savings in
+ // disk storage for a clang build using MCCAS but the 8B-alignment may be
+ // useful in the future so keep it for now.
+ uint64_t RefListSize = 4 * NumRefs;
+ if (!isAligned(Align(8), RelOffset + RefListSize))
+ GrowSizeFieldsBy4B();
+ RefsRelOffset = RelOffset;
+ RelOffset += RefListSize;
+ }
+
+ assert(isAligned(Align(8), RelOffset));
+ DataRelOffset = RelOffset;
+}
+
+uint64_t DataRecordHandle::getDataSize() const {
+ int64_t RelOffset = sizeof(Header);
+ auto *DataSizePtr = reinterpret_cast<const char *>(H) + RelOffset;
+ switch (getLayoutFlags().DataSize) {
+ case DataSizeFlags::Uses1B:
+ return (H->Packed >> ((sizeof(Header::PackTy) - 2) * CHAR_BIT)) & UINT8_MAX;
+ case DataSizeFlags::Uses2B:
+ return (H->Packed >> ((sizeof(Header::PackTy) - 4) * CHAR_BIT)) &
+ UINT16_MAX;
+ case DataSizeFlags::Uses4B:
+ return support::endian::read32le(DataSizePtr);
+ case DataSizeFlags::Uses8B:
+ return support::endian::read64le(DataSizePtr);
+ }
+}
+
+void DataRecordHandle::skipDataSize(LayoutFlags LF, int64_t &RelOffset) const {
+ if (LF.DataSize >= DataSizeFlags::Uses4B)
+ RelOffset += 4;
+ if (LF.DataSize >= DataSizeFlags::Uses8B)
+ RelOffset += 4;
+}
+
+uint32_t DataRecordHandle::getNumRefs() const {
+ LayoutFlags LF = getLayoutFlags();
+ int64_t RelOffset = sizeof(Header);
+ skipDataSize(LF, RelOffset);
+ auto *NumRefsPtr = reinterpret_cast<const char *>(H) + RelOffset;
+ switch (LF.NumRefs) {
+ case NumRefsFlags::Uses0B:
+ return 0;
+ case NumRefsFlags::Uses1B:
+ return (H->Packed >> ((sizeof(Header::PackTy) - 2) * CHAR_BIT)) & UINT8_MAX;
+ case NumRefsFlags::Uses2B:
+ return (H->Packed >> ((sizeof(Header::PackTy) - 4) * CHAR_BIT)) &
+ UINT16_MAX;
+ case NumRefsFlags::Uses4B:
+ return support::endian::read32le(NumRefsPtr);
+ case NumRefsFlags::Uses8B:
+ return support::endian::read64le(NumRefsPtr);
+ }
+}
+
+void DataRecordHandle::skipNumRefs(LayoutFlags LF, int64_t &RelOffset) const {
+ if (LF.NumRefs >= NumRefsFlags::Uses4B)
+ RelOffset += 4;
+ if (LF.NumRefs >= NumRefsFlags::Uses8B)
+ RelOffset += 4;
+}
+
+int64_t DataRecordHandle::getRefsRelOffset() const {
+ LayoutFlags LF = getLayoutFlags();
+ int64_t RelOffset = sizeof(Header);
+ skipDataSize(LF, RelOffset);
+ skipNumRefs(LF, RelOffset);
+ return RelOffset;
+}
+
+int64_t DataRecordHandle::getDataRelOffset() const {
+ LayoutFlags LF = getLayoutFlags();
+ int64_t RelOffset = sizeof(Header);
+ skipDataSize(LF, RelOffset);
+ skipNumRefs(LF, RelOffset);
+ uint32_t RefSize = LF.RefKind == RefKindFlags::InternalRef4B ? 4 : 8;
+ RelOffset += RefSize * getNumRefs();
+ return RelOffset;
+}
+
+Error OnDiskGraphDB::validate(bool Deep, HashingFuncT Hasher) const {
+ return Index.validate([&](FileOffset Offset,
+ OnDiskTrieRawHashMap::ConstValueProxy Record)
+ -> Error {
+ auto formatError = [&](Twine Msg) {
+ return createStringError(
+ llvm::errc::illegal_byte_sequence,
+ "bad record at 0x" +
+ utohexstr((unsigned)Offset.get(), /*LowerCase=*/true) + ": " +
+ Msg.str());
+ };
+
+ if (Record.Data.size() != sizeof(TrieRecord))
+ return formatError("wrong data record size");
+ if (!isAligned(Align::Of<TrieRecord>(), Record.Data.size()))
+ return formatError("wrong data record alignment");
+
+ auto *R = reinterpret_cast<const TrieRecord *>(Record.Data.data());
+ TrieRecord::Data D = R->load();
+ std::unique_ptr<MemoryBuffer> FileBuffer;
+ if ((uint8_t)D.SK != (uint8_t)TrieRecord::StorageKind::Unknown &&
+ (uint8_t)D.SK != (uint8_t)TrieRecord::StorageKind::DataPool &&
+ (uint8_t)D.SK != (uint8_t)TrieRecord::StorageKind::Standalone &&
+ (uint8_t)D.SK != (uint8_t)TrieRecord::StorageKind::StandaloneLeaf &&
+ (uint8_t)D.SK != (uint8_t)TrieRecord::StorageKind::StandaloneLeaf0)
+ return formatError("invalid record kind value");
+
+ auto Ref = InternalRef::getFromOffset(Offset);
+ auto I = getIndexProxyFromRef(Ref);
+ if (!I)
+ return I.takeError();
+
+ switch (D.SK) {
+ case TrieRecord::StorageKind::Unknown:
+ // This could be an abandoned entry due to a termination before updating
+ // the record. It can be reused by later insertion so just skip this entry
+ // for now.
+ return Error::success();
+ case TrieRecord::StorageKind::DataPool:
+ // Check offset is a postive value, and large enough to hold the
+ // header for the data record.
+ if (D.Offset.get() <= 0 ||
+ (uint64_t)D.Offset.get() + sizeof(DataRecordHandle::Header) >=
+ DataPool.size())
+ return formatError("datapool record out of bound");
+ break;
+ case TrieRecord::StorageKind::Standalone:
+ case TrieRecord::StorageKind::StandaloneLeaf:
+ case TrieRecord::StorageKind::StandaloneLeaf0:
+ SmallString<256> Path;
+ getStandalonePath(TrieRecord::getStandaloneFilePrefix(D.SK), *I, Path);
+ // If need to validate the content of the file later, just load the
+ // buffer here. Otherwise, just check the existance of the file.
+ if (Deep) {
+ auto File = MemoryBuffer::getFile(Path, /*IsText=*/false,
+ /*RequiresNullTerminator=*/false);
+ if (!File || !*File)
+ return formatError("record file \'" + Path + "\' does not exist");
+
+ FileBuffer = std::move(*File);
+ } else if (!llvm::sys::fs::exists(Path))
+ return formatError("record file \'" + Path + "\' does not exist");
+ }
+
+ if (!Deep)
+ return Error::success();
+
+ auto dataError = [&](Twine Msg) {
+ return createStringError(llvm::errc::illegal_byte_sequence,
+ "bad data for digest \'" + toHex(I->Hash) +
+ "\': " + Msg.str());
+ };
+ SmallVector<ArrayRef<uint8_t>> Refs;
+ ArrayRef<char> StoredData;
+
+ switch (D.SK) {
+ case TrieRecord::StorageKind::Unknown:
+ llvm_unreachable("already handled");
+ case TrieRecord::StorageKind::DataPool: {
+ auto DataRecord = DataRecordHandle::getFromDataPool(DataPool, D.Offset);
+ if (!DataRecord)
+ return dataError(toString(DataRecord.takeError()));
+
+ for (auto InternRef : DataRecord->getRefs()) {
+ auto Index = getIndexProxyFromRef(InternRef);
+ if (!Index)
+ return Index.takeError();
+ Refs.push_back(Index->Hash);
+ }
+ StoredData = DataRecord->getData();
+ break;
+ }
+ case TrieRecord::StorageKind::Standalone: {
+ if (FileBuffer->getBufferSize() < sizeof(DataRecordHandle::Header))
+ return dataError("data record is not big enough to read the header");
+ auto DataRecord = DataRecordHandle::get(FileBuffer->getBufferStart());
+ if (DataRecord.getTotalSize() < FileBuffer->getBufferSize())
+ return dataError(
+ "data record span passed the end of the standalone file");
+ for (auto InternRef : DataRecord.getRefs()) {
+ auto Index = getIndexProxyFromRef(InternRef);
+ if (!Index)
+ return Index.takeError();
+ Refs.push_back(Index->Hash);
+ }
+ StoredData = DataRecord.getData();
+ break;
+ }
+ case TrieRecord::StorageKind::StandaloneLeaf:
+ case TrieRecord::StorageKind::StandaloneLeaf0: {
+ StoredData = arrayRefFromStringRef<char>(FileBuffer->getBuffer());
+ if (D.SK == TrieRecord::StorageKind::StandaloneLeaf0) {
+ if (!FileBuffer->getBuffer().ends_with('\0'))
+ return dataError("standalone file is not zero terminated");
+ StoredData = StoredData.drop_back(1);
+ }
+ break;
+ }
+ }
+
+ SmallVector<uint8_t> ComputedHash;
+ Hasher(Refs, StoredData, ComputedHash);
+ if (I->Hash != ArrayRef(ComputedHash))
+ return dataError("hash mismatch, got \'" + toHex(ComputedHash) +
+ "\' instead");
+
+ return Error::success();
+ });
+}
+
+void OnDiskGraphDB::print(raw_ostream &OS) const {
+ OS << "on-disk-root-path: " << RootPath << "\n";
+
+ struct PoolInfo {
+ uint64_t Offset;
+ };
+ SmallVector<PoolInfo> Pool;
+
+ OS << "\n";
+ OS << "index:\n";
+ Index.print(OS, [&](ArrayRef<char> Data) {
+ assert(Data.size() == sizeof(TrieRecord));
+ assert(isAligned(Align::Of<TrieRecord>(), Data.size()));
+ auto *R = reinterpret_cast<const TrieRecord *>(Data.data());
+ TrieRecord::Data D = R->load();
+ OS << " SK=";
+ switch (D.SK) {
+ case TrieRecord::StorageKind::Unknown:
+ OS << "unknown ";
+ break;
+ case TrieRecord::StorageKind::DataPool:
+ OS << "datapool ";
+ Pool.push_back({D.Offset.get()});
+ break;
+ case TrieRecord::StorageKind::Standalone:
+ OS << "standalone-data ";
+ break;
+ case TrieRecord::StorageKind::StandaloneLeaf:
+ OS << "standalone-leaf ";
+ break;
+ case TrieRecord::StorageKind::StandaloneLeaf0:
+ OS << "standalone-leaf+0";
+ break;
+ }
+ OS << " Offset=" << (void *)D.Offset.get();
+ });
+ if (Pool.empty())
+ return;
+
+ OS << "\n";
+ OS << "pool:\n";
+ llvm::sort(
+ Pool, [](PoolInfo LHS, PoolInfo RHS) { return LHS.Offset < RHS.Offset; });
+ for (PoolInfo PI : Pool) {
+ OS << "- addr=" << (void *)PI.Offset << " ";
+ auto D = DataRecordHandle::getFromDataPool(DataPool, FileOffset(PI.Offset));
+ if (!D) {
+ OS << "error: " << toString(D.takeError());
+ return;
+ }
+
+ OS << "record refs=" << D->getNumRefs() << " data=" << D->getDataSize()
+ << " size=" << D->getTotalSize()
+ << " end=" << (void *)(PI.Offset + D->getTotalSize()) << "\n";
+ }
+}
+
+Expected<OnDiskGraphDB::IndexProxy>
+OnDiskGraphDB::indexHash(ArrayRef<uint8_t> Hash) {
+ auto P = Index.insertLazy(
+ Hash, [](FileOffset TentativeOffset,
+ OnDiskTrieRawHashMap::ValueProxy TentativeValue) {
+ assert(TentativeValue.Data.size() == sizeof(TrieRecord));
+ assert(
+ isAddrAligned(Align::Of<TrieRecord>(), TentativeValue.Data.data()));
+ new (TentativeValue.Data.data()) TrieRecord();
+ });
+ if (LLVM_UNLIKELY(!P))
+ return P.takeError();
+
+ assert(*P && "Expected insertion");
+ return getIndexProxyFromPointer(*P);
+}
+
+OnDiskGraphDB::IndexProxy OnDiskGraphDB::getIndexProxyFromPointer(
+ OnDiskTrieRawHashMap::ConstOnDiskPtr P) const {
+ assert(P);
+ assert(P.getOffset());
+ return IndexProxy{P.getOffset(), P->Hash,
+ *const_cast<TrieRecord *>(
+ reinterpret_cast<const TrieRecord *>(P->Data.data()))};
+}
+
+Expected<ObjectID> OnDiskGraphDB::getReference(ArrayRef<uint8_t> Hash) {
+ auto I = indexHash(Hash);
+ if (LLVM_UNLIKELY(!I))
+ return I.takeError();
+ return getExternalReference(*I);
+}
+
+ObjectID OnDiskGraphDB::getExternalReference(const IndexProxy &I) {
+ return getExternalReference(makeInternalRef(I.Offset));
+}
+
+std::optional<ObjectID>
+OnDiskGraphDB::getExistingReference(ArrayRef<uint8_t> Digest) {
+ auto tryUpstream =
+ [&](std::optional<IndexProxy> I) -> std::optional<ObjectID> {
+ if (!UpstreamDB)
+ return std::nullopt;
+ std::optional<ObjectID> UpstreamID =
+ UpstreamDB->getExistingReference(Digest);
+ if (LLVM_UNLIKELY(!UpstreamID))
+ return std::nullopt;
+ auto Ref = expectedToOptional(indexHash(Digest));
+ if (!Ref)
+ return std::nullopt;
+ if (!I)
+ I.emplace(*Ref);
+ return getExternalReference(*I);
+ };
+
+ OnDiskTrieRawHashMap::ConstOnDiskPtr P = Index.find(Digest);
+ if (!P)
+ return tryUpstream(std::nullopt);
+ IndexProxy I = getIndexProxyFromPointer(P);
+ TrieRecord::Data Obj = I.Ref.load();
+ if (Obj.SK == TrieRecord::StorageKind::Unknown)
+ return tryUpstream(I);
+ return getExternalReference(makeInternalRef(I.Offset));
+}
+
+Expected<OnDiskGraphDB::IndexProxy>
+OnDiskGraphDB::getIndexProxyFromRef(InternalRef Ref) const {
+ auto P = Index.recoverFromFileOffset(Ref.getFileOffset());
+ if (LLVM_UNLIKELY(!P))
+ return P.takeError();
+ return getIndexProxyFromPointer(*P);
+}
+
+Expected<ArrayRef<uint8_t>> OnDiskGraphDB::getDigest(InternalRef Ref) const {
+ auto I = getIndexProxyFromRef(Ref);
+ if (!I)
+ return I.takeError();
+ return I->Hash;
+}
+
+ArrayRef<uint8_t> OnDiskGraphDB::getDigest(const IndexProxy &I) const {
+ return I.Hash;
+}
+
+static OnDiskContent getContentFromHandle(const OnDiskDataAllocator &DataPool,
+ ObjectHandle OH) {
+ // Decode ObjectHandle to locate the stored content.
+ uint64_t Data = OH.getOpaqueData();
+ if (Data & 1) {
+ const auto *SDIM =
+ reinterpret_cast<const StandaloneDataInMemory *>(Data & (-1ULL << 1));
+ return SDIM->getContent();
+ }
+
+ auto DataHandle =
+ cantFail(DataRecordHandle::getFromDataPool(DataPool, FileOffset(Data)));
+ assert(DataHandle.getData().end()[0] == 0 && "Null termination");
+ return OnDiskContent{DataHandle, std::nullopt};
+}
+
+ArrayRef<char> OnDiskGraphDB::getObjectData(ObjectHandle Node) const {
+ OnDiskContent Content = getContentFromHandle(DataPool, Node);
+ if (Content.Bytes)
+ return *Content.Bytes;
+ assert(Content.Record && "Expected record or bytes");
+ return Content.Record->getData();
+}
+
+InternalRefArrayRef OnDiskGraphDB::getInternalRefs(ObjectHandle Node) const {
+ if (std::optional<DataRecordHandle> Record =
+ getContentFromHandle(DataPool, Node).Record)
+ return Record->getRefs();
+ return std::nullopt;
+}
+
+Expected<std::optional<ObjectHandle>>
+OnDiskGraphDB::load(ObjectID ExternalRef) {
+ InternalRef Ref = getInternalRef(ExternalRef);
+ auto I = getIndexProxyFromRef(Ref);
+ if (!I)
+ return I.takeError();
+ TrieRecord::Data Object = I->Ref.load();
+
+ if (Object.SK == TrieRecord::StorageKind::Unknown) {
+ if (!UpstreamDB)
+ return std::nullopt;
+ return faultInFromUpstream(ExternalRef);
+ }
+
+ if (Object.SK == TrieRecord::StorageKind::DataPool)
+ return ObjectHandle::fromFileOffset(Object.Offset);
+
+ // Only TrieRecord::StorageKind::Standalone (and variants) need to be
+ // explicitly loaded.
+ //
+ // There's corruption if standalone objects have offsets, or if we get here
+ // for something that isn't standalone.
+ if (Object.Offset)
+ return createCorruptObjectError(getDigest(*I));
+ switch (Object.SK) {
+ case TrieRecord::StorageKind::Unknown:
+ case TrieRecord::StorageKind::DataPool:
+ llvm_unreachable("unexpected storage kind");
+ case TrieRecord::StorageKind::Standalone:
+ case TrieRecord::StorageKind::StandaloneLeaf0:
+ case TrieRecord::StorageKind::StandaloneLeaf:
+ break;
+ }
+
+ // Load it from disk.
+ //
+ // Note: Creation logic guarantees that data that needs null-termination is
+ // suitably 0-padded. Requiring null-termination here would be too expensive
+ // for extremely large objects that happen to be page-aligned.
+ SmallString<256> Path;
+ getStandalonePath(TrieRecord::getStandaloneFilePrefix(Object.SK), *I, Path);
+
+ auto File = sys::fs::openNativeFileForRead(Path);
+ if (!File)
+ return createFileError(Path, File.takeError());
+
+ auto CloseFile = make_scope_exit([&]() { sys::fs::closeFile(*File); });
+
+ sys::fs::file_status Status;
+ if (std::error_code EC = sys::fs::status(*File, Status))
+ return createCorruptObjectError(getDigest(*I));
+
+ std::error_code EC;
+ auto Region = std::make_unique<sys::fs::mapped_file_region>(
+ *File, sys::fs::mapped_file_region::readonly, Status.getSize(), 0, EC);
+ if (EC)
+ return createCorruptObjectError(getDigest(*I));
+
+ return ObjectHandle::fromMemory(
+ static_cast<StandaloneDataMapTy *>(StandaloneData)
+ ->insert(I->Hash, Object.SK, std::move(Region)));
+}
+
+Expected<bool> OnDiskGraphDB::isMaterialized(ObjectID Ref) {
+ auto Presence = getObjectPresence(Ref, /*CheckUpstream=*/true);
+ if (!Presence)
+ return Presence.takeError();
+
+ switch (*Presence) {
+ case ObjectPresence::Missing:
+ return false;
+ case ObjectPresence::InPrimaryDB:
+ return true;
+ case ObjectPresence::OnlyInUpstreamDB:
+ if (auto FaultInResult = faultInFromUpstream(Ref); !FaultInResult)
+ return FaultInResult.takeError();
+ return true;
+ }
+}
+
+Expected<OnDiskGraphDB::ObjectPresence>
+OnDiskGraphDB::getObjectPresence(ObjectID ExternalRef,
+ bool CheckUpstream) const {
+ InternalRef Ref = getInternalRef(ExternalRef);
+ auto I = getIndexProxyFromRef(Ref);
+ if (!I)
+ return I.takeError();
+
+ TrieRecord::Data Object = I->Ref.load();
+ if (Object.SK != TrieRecord::StorageKind::Unknown)
+ return ObjectPresence::InPrimaryDB;
+ if (!CheckUpstream || !UpstreamDB)
+ return ObjectPresence::Missing;
+ std::optional<ObjectID> UpstreamID =
+ UpstreamDB->getExistingReference(getDigest(*I));
+ return UpstreamID.has_value() ? ObjectPresence::OnlyInUpstreamDB
+ : ObjectPresence::Missing;
+}
+
+InternalRef OnDiskGraphDB::makeInternalRef(FileOffset IndexOffset) {
+ return InternalRef::getFromOffset(IndexOffset);
+}
+
+void OnDiskGraphDB::getStandalonePath(StringRef Prefix, const IndexProxy &I,
+ SmallVectorImpl<char> &Path) const {
+ Path.assign(RootPath.begin(), RootPath.end());
+ sys::path::append(Path,
+ Prefix + Twine(I.Offset.get()) + "." + CASFormatVersion);
+}
+
+OnDiskContent StandaloneDataInMemory::getContent() const {
+ bool Leaf0 = false;
+ bool Leaf = false;
+ switch (SK) {
+ default:
+ llvm_unreachable("Storage kind must be standalone");
+ case TrieRecord::StorageKind::Standalone:
+ break;
+ case TrieRecord::StorageKind::StandaloneLeaf0:
+ Leaf = Leaf0 = true;
+ break;
+ case TrieRecord::StorageKind::StandaloneLeaf:
+ Leaf = true;
+ break;
+ }
+
+ if (Leaf) {
+ StringRef Data(Region->data(), Region->size());
+ assert(Data.drop_back(Leaf0).end()[0] == 0 &&
+ "Standalone node data missing null termination");
+ return OnDiskContent{std::nullopt,
+ arrayRefFromStringRef<char>(Data.drop_back(Leaf0))};
+ }
+
+ DataRecordHandle Record = DataRecordHandle::get(Region->data());
+ assert(Record.getData().end()[0] == 0 &&
+ "Standalone object record missing null termination for data");
+ return OnDiskContent{Record, std::nullopt};
+}
+
+static Expected<MappedTempFile> createTempFile(StringRef FinalPath,
+ uint64_t Size) {
+ assert(Size && "Unexpected request for an empty temp file");
+ Expected<TempFile> File = TempFile::create(FinalPath + ".%%%%%%");
+ if (!File)
+ return File.takeError();
+
+ if (Error E = preallocateFileTail(File->FD, 0, Size).takeError())
+ return createFileError(File->TmpName, std::move(E));
+
+ if (auto EC = sys::fs::resize_file_before_mapping_readwrite(File->FD, Size))
+ return createFileError(File->TmpName, EC);
+
+ std::error_code EC;
+ sys::fs::mapped_file_region Map(sys::fs::convertFDToNativeFile(File->FD),
+ sys::fs::mapped_file_region::readwrite, Size,
+ 0, EC);
+ if (EC)
+ return createFileError(File->TmpName, EC);
+ return MappedTempFile(std::move(*File), std::move(Map));
+}
+
+static size_t getPageSize() {
+ static int PageSize = sys::Process::getPageSizeEstimate();
+ return PageSize;
+}
+
+Error OnDiskGraphDB::createStandaloneLeaf(IndexProxy &I, ArrayRef<char> Data) {
+ assert(Data.size() > TrieRecord::MaxEmbeddedSize &&
+ "Expected a bigger file for external content...");
+
+ bool Leaf0 = isAligned(Align(getPageSize()), Data.size());
+ TrieRecord::StorageKind SK = Leaf0 ? TrieRecord::StorageKind::StandaloneLeaf0
+ : TrieRecord::StorageKind::StandaloneLeaf;
+
+ SmallString<256> Path;
+ int64_t FileSize = Data.size() + Leaf0;
+ getStandalonePath(TrieRecord::getStandaloneFilePrefix(SK), I, Path);
+
+ // Write the file. Don't reuse this mapped_file_region, which is read/write.
+ // Let load() pull up one that's read-only.
+ Expected<MappedTempFile> File = createTempFile(Path, FileSize);
+ if (!File)
+ return File.takeError();
+ assert(File->size() == (uint64_t)FileSize);
+ llvm::copy(Data, File->data());
+ if (Leaf0)
+ File->data()[Data.size()] = 0;
+ assert(File->data()[Data.size()] == 0);
+ if (Error E = File->keep(Path))
+ return E;
+
+ // Store the object reference.
+ TrieRecord::Data Existing;
+ {
+ TrieRecord::Data Leaf{SK, FileOffset()};
+ if (I.Ref.compare_exchange_strong(Existing, Leaf)) {
+ recordStandaloneSizeIncrease(FileSize);
+ return Error::success();
+ }
+ }
+
+ // If there was a race, confirm that the new value has valid storage.
+ if (Existing.SK == TrieRecord::StorageKind::Unknown)
+ return createCorruptObjectError(getDigest(I));
+
+ return Error::success();
+}
+
+Error OnDiskGraphDB::store(ObjectID ID, ArrayRef<ObjectID> Refs,
+ ArrayRef<char> Data) {
+ auto I = getIndexProxyFromRef(getInternalRef(ID));
+ if (LLVM_UNLIKELY(!I))
+ return I.takeError();
+
+ // Early return in case the node exists.
+ {
+ TrieRecord::Data Existing = I->Ref.load();
+ if (Existing.SK != TrieRecord::StorageKind::Unknown)
+ return Error::success();
+ }
+
+ // Big leaf nodes.
+ if (Refs.empty() && Data.size() > TrieRecord::MaxEmbeddedSize)
+ return createStandaloneLeaf(*I, Data);
+
+ // TODO: Check whether it's worth checking the index for an already existing
+ // object (like storeTreeImpl() does) before building up the
+ // InternalRefVector.
+ InternalRefVector InternalRefs;
+ for (ObjectID Ref : Refs)
+ InternalRefs.push_back(getInternalRef(Ref));
+
+ // Create the object.
+
+ DataRecordHandle::Input Input{InternalRefs, Data};
+
+ // Compute the storage kind, allocate it, and create the record.
+ TrieRecord::StorageKind SK = TrieRecord::StorageKind::Unknown;
+ FileOffset PoolOffset;
+ SmallString<256> Path;
+ std::optional<MappedTempFile> File;
+ std::optional<uint64_t> FileSize;
+ auto AllocStandaloneFile = [&](size_t Size) -> Expected<char *> {
+ getStandalonePath(TrieRecord::getStandaloneFilePrefix(
+ TrieRecord::StorageKind::Standalone),
+ *I, Path);
+ if (Error E = createTempFile(Path, Size).moveInto(File))
+ return std::move(E);
+ assert(File->size() == Size);
+ FileSize = Size;
+ SK = TrieRecord::StorageKind::Standalone;
+ return File->data();
+ };
+ auto Alloc = [&](size_t Size) -> Expected<char *> {
+ if (Size <= TrieRecord::MaxEmbeddedSize) {
+ SK = TrieRecord::StorageKind::DataPool;
+ auto P = DataPool.allocate(Size);
+ if (LLVM_UNLIKELY(!P)) {
+ char *NewAlloc = nullptr;
+ auto NewE = handleErrors(
+ P.takeError(), [&](std::unique_ptr<StringError> E) -> Error {
+ if (E->convertToErrorCode() == std::errc::not_enough_memory)
+ return AllocStandaloneFile(Size).moveInto(NewAlloc);
+ return Error(std::move(E));
+ });
+ if (!NewE)
+ return NewAlloc;
+ return std::move(NewE);
+ }
+ PoolOffset = P->getOffset();
+ LLVM_DEBUG({
+ dbgs() << "pool-alloc addr=" << (void *)PoolOffset.get()
+ << " size=" << Size
+ << " end=" << (void *)(PoolOffset.get() + Size) << "\n";
+ });
+ return (*P)->data();
+ }
+ return AllocStandaloneFile(Size);
+ };
+
+ DataRecordHandle Record;
+ if (Error E =
+ DataRecordHandle::createWithError(Alloc, Input).moveInto(Record))
+ return E;
+ assert(Record.getData().end()[0] == 0 && "Expected null-termination");
+ assert(Record.getData() == Input.Data && "Expected initialization");
+ assert(SK != TrieRecord::StorageKind::Unknown);
+ assert(bool(File) != bool(PoolOffset) &&
+ "Expected either a mapped file or a pooled offset");
+
+ // Check for a race before calling MappedTempFile::keep().
+ //
+ // Then decide what to do with the file. Better to discard than overwrite if
+ // another thread/process has already added this.
+ TrieRecord::Data Existing = I->Ref.load();
+ {
+ TrieRecord::Data NewObject{SK, PoolOffset};
+ if (File) {
+ if (Existing.SK == TrieRecord::StorageKind::Unknown) {
+ // Keep the file!
+ if (Error E = File->keep(Path))
+ return E;
+ } else {
+ File.reset();
+ }
+ }
+
+ // If we didn't already see a racing/existing write, then try storing the
+ // new object. If that races, confirm that the new value has valid storage.
+ //
+ // TODO: Find a way to reuse the storage from the new-but-abandoned record
+ // handle.
+ if (Existing.SK == TrieRecord::StorageKind::Unknown) {
+ if (I->Ref.compare_exchange_strong(Existing, NewObject)) {
+ if (FileSize)
+ recordStandaloneSizeIncrease(*FileSize);
+ return Error::success();
+ }
+ }
+ }
+
+ if (Existing.SK == TrieRecord::StorageKind::Unknown)
+ return createCorruptObjectError(getDigest(*I));
+
+ // Load existing object.
+ return Error::success();
+}
+
+void OnDiskGraphDB::recordStandaloneSizeIncrease(size_t SizeIncrease) {
+ standaloneStorageSize().fetch_add(SizeIncrease, std::memory_order_relaxed);
+}
+
+std::atomic<uint64_t> &OnDiskGraphDB::standaloneStorageSize() const {
+ MutableArrayRef<uint8_t> UserHeader = DataPool.getUserHeader();
+ assert(UserHeader.size() == sizeof(std::atomic<uint64_t>));
+ assert(isAddrAligned(Align(8), UserHeader.data()));
+ return *reinterpret_cast<std::atomic<uint64_t> *>(UserHeader.data());
+}
+
+uint64_t OnDiskGraphDB::getStandaloneStorageSize() const {
+ return standaloneStorageSize().load(std::memory_order_relaxed);
+}
+
+size_t OnDiskGraphDB::getStorageSize() const {
+ return Index.size() + DataPool.size() + getStandaloneStorageSize();
+}
+
+unsigned OnDiskGraphDB::getHardStorageLimitUtilization() const {
+ unsigned IndexPercent = Index.size() * 100ULL / Index.capacity();
+ unsigned DataPercent = DataPool.size() * 100ULL / DataPool.capacity();
+ return std::max(IndexPercent, DataPercent);
+}
+
+Expected<std::unique_ptr<OnDiskGraphDB>> OnDiskGraphDB::open(
+ StringRef AbsPath, StringRef HashName, unsigned HashByteSize,
+ std::unique_ptr<OnDiskGraphDB> UpstreamDB, FaultInPolicy Policy) {
+ if (std::error_code EC = sys::fs::create_directories(AbsPath))
+ return createFileError(AbsPath, EC);
+
+ constexpr uint64_t MB = 1024ull * 1024ull;
+ constexpr uint64_t GB = 1024ull * 1024ull * 1024ull;
+
+ uint64_t MaxIndexSize = 12 * GB;
+ uint64_t MaxDataPoolSize = 24 * GB;
+
+ if (useSmallMappingSize(AbsPath)) {
+ MaxIndexSize = 1 * GB;
+ MaxDataPoolSize = 2 * GB;
+ }
+
+ auto CustomSize = getOverriddenMaxMappingSize();
+ if (!CustomSize)
+ return CustomSize.takeError();
+ if (*CustomSize)
+ MaxIndexSize = MaxDataPoolSize = **CustomSize;
+
+ SmallString<256> IndexPath(AbsPath);
+ sys::path::append(IndexPath, IndexFilePrefix + CASFormatVersion);
+ std::optional<OnDiskTrieRawHashMap> Index;
+ if (Error E = OnDiskTrieRawHashMap::create(
+ IndexPath, IndexTableName + "[" + HashName + "]",
+ HashByteSize * CHAR_BIT,
+ /*DataSize=*/sizeof(TrieRecord), MaxIndexSize,
+ /*MinFileSize=*/MB)
+ .moveInto(Index))
+ return std::move(E);
+
+ uint32_t UserHeaderSize = sizeof(std::atomic<uint64_t>);
+
+ SmallString<256> DataPoolPath(AbsPath);
+ sys::path::append(DataPoolPath, DataPoolFilePrefix + CASFormatVersion);
+ std::optional<OnDiskDataAllocator> DataPool;
+ StringRef PolicyName =
+ Policy == FaultInPolicy::SingleNode ? "single" : "full";
+ if (Error E = OnDiskDataAllocator::create(
+ DataPoolPath,
+ DataPoolTableName + "[" + HashName + "]" + PolicyName,
+ MaxDataPoolSize, /*MinFileSize=*/MB, UserHeaderSize,
+ [](void *UserHeaderPtr) {
+ new (UserHeaderPtr) std::atomic<uint64_t>(0);
+ })
+ .moveInto(DataPool))
+ return std::move(E);
+ if (DataPool->getUserHeader().size() != UserHeaderSize)
+ return createStringError(llvm::errc::argument_out_of_domain,
+ "unexpected user header in '" + DataPoolPath +
+ "'");
+
+ return std::unique_ptr<OnDiskGraphDB>(
+ new OnDiskGraphDB(AbsPath, std::move(*Index), std::move(*DataPool),
+ std::move(UpstreamDB), Policy));
+}
+
+OnDiskGraphDB::OnDiskGraphDB(StringRef RootPath, OnDiskTrieRawHashMap Index,
+ OnDiskDataAllocator DataPool,
+ std::unique_ptr<OnDiskGraphDB> UpstreamDB,
+ FaultInPolicy Policy)
+ : Index(std::move(Index)), DataPool(std::move(DataPool)),
+ RootPath(RootPath.str()), UpstreamDB(std::move(UpstreamDB)),
+ FIPolicy(Policy) {
+ /// Lifetime for "big" objects not in DataPool.
+ ///
+ /// NOTE: Could use ThreadSafeTrieRawHashMap here. For now, doing something
+ /// simpler on the assumption there won't be much contention since most data
+ /// is not big. If there is contention, and we've already fixed ObjectProxy
+ /// object handles to be cheap enough to use consistently, the fix might be
+ /// to use better use of them rather than optimizing this map.
+ ///
+ /// FIXME: Figure out the right number of shards, if any.
+ StandaloneData = new StandaloneDataMapTy();
+}
+
+OnDiskGraphDB::~OnDiskGraphDB() {
+ delete static_cast<StandaloneDataMapTy *>(StandaloneData);
+}
+
+Error OnDiskGraphDB::importFullTree(ObjectID PrimaryID,
+ ObjectHandle UpstreamNode) {
+ // Copies the full CAS tree from upstream. Uses depth-first copying to protect
+ // against the process dying during importing and leaving the database with an
+ // incomplete tree. Note that if the upstream has missing nodes then the tree
+ // will be copied with missing nodes as well, it won't be considered an error.
+
+ struct UpstreamCursor {
+ ObjectHandle Node;
+ size_t RefsCount;
+ object_refs_iterator RefI;
+ object_refs_iterator RefE;
+ };
+ /// Keeps track of the state of visitation for current node and all of its
+ /// parents.
+ SmallVector<UpstreamCursor, 16> CursorStack;
+ /// Keeps track of the currently visited nodes as they are imported into
+ /// primary database, from current node and its parents. When a node is
+ /// entered for visitation it appends its own ID, then appends referenced IDs
+ /// as they get imported. When a node is fully imported it removes the
+ /// referenced IDs from the bottom of the stack which leaves its own ID at the
+ /// bottom, adding to the list of referenced IDs for the parent node.
+ SmallVector<ObjectID, 128> PrimaryNodesStack;
+
+ auto enqueueNode = [&](ObjectID PrimaryID, std::optional<ObjectHandle> Node) {
+ PrimaryNodesStack.push_back(PrimaryID);
+ if (!Node)
+ return;
+ auto Refs = UpstreamDB->getObjectRefs(*Node);
+ CursorStack.push_back({*Node,
+ (size_t)std::distance(Refs.begin(), Refs.end()),
+ Refs.begin(), Refs.end()});
+ };
+
+ enqueueNode(PrimaryID, UpstreamNode);
+
+ while (!CursorStack.empty()) {
+ UpstreamCursor &Cur = CursorStack.back();
+ if (Cur.RefI == Cur.RefE) {
+ // Copy the node data into the primary store.
+ // FIXME: Use hard-link or cloning if the file-system supports it and data
+ // is stored into a separate file.
+
+ // The bottom of \p PrimaryNodesStack contains the primary ID for the
+ // current node plus the list of imported referenced IDs.
+ assert(PrimaryNodesStack.size() >= Cur.RefsCount + 1);
+ ObjectID PrimaryID = *(PrimaryNodesStack.end() - Cur.RefsCount - 1);
+ auto PrimaryRefs = ArrayRef(PrimaryNodesStack)
+ .slice(PrimaryNodesStack.size() - Cur.RefsCount);
+ auto Data = UpstreamDB->getObjectData(Cur.Node);
+ if (Error E = store(PrimaryID, PrimaryRefs, Data))
+ return E;
+ // Remove the current node and its IDs from the stack.
+ PrimaryNodesStack.truncate(PrimaryNodesStack.size() - Cur.RefsCount);
+ CursorStack.pop_back();
+ continue;
+ }
+
+ ObjectID UpstreamID = *(Cur.RefI++);
+ auto PrimaryID = getReference(UpstreamDB->getDigest(UpstreamID));
+ if (LLVM_UNLIKELY(!PrimaryID))
+ return PrimaryID.takeError();
+ if (containsObject(*PrimaryID, /*CheckUpstream=*/false)) {
+ // This \p ObjectID already exists in the primary. Either it was imported
+ // via \p importFullTree or the client created it, in which case the
+ // client takes responsibility for how it was formed.
+ enqueueNode(*PrimaryID, std::nullopt);
+ continue;
+ }
+ Expected<std::optional<ObjectHandle>> UpstreamNode =
+ UpstreamDB->load(UpstreamID);
+ if (!UpstreamNode)
+ return UpstreamNode.takeError();
+ enqueueNode(*PrimaryID, *UpstreamNode);
+ }
+
+ assert(PrimaryNodesStack.size() == 1);
+ assert(PrimaryNodesStack.front() == PrimaryID);
+ return Error::success();
+}
+
+Error OnDiskGraphDB::importSingleNode(ObjectID PrimaryID,
+ ObjectHandle UpstreamNode) {
+ // Copies only a single node, it doesn't copy the referenced nodes.
+
+ // Copy the node data into the primary store.
+ // FIXME: Use hard-link or cloning if the file-system supports it and data is
+ // stored into a separate file.
+
+ auto Data = UpstreamDB->getObjectData(UpstreamNode);
+ auto UpstreamRefs = UpstreamDB->getObjectRefs(UpstreamNode);
+ SmallVector<ObjectID, 64> Refs;
+ Refs.reserve(std::distance(UpstreamRefs.begin(), UpstreamRefs.end()));
+ for (ObjectID UpstreamRef : UpstreamRefs) {
+ auto Ref = getReference(UpstreamDB->getDigest(UpstreamRef));
+ if (LLVM_UNLIKELY(!Ref))
+ return Ref.takeError();
+ Refs.push_back(*Ref);
+ }
+
+ return store(PrimaryID, Refs, Data);
+}
+
+Expected<std::optional<ObjectHandle>>
+OnDiskGraphDB::faultInFromUpstream(ObjectID PrimaryID) {
+ assert(UpstreamDB);
+
+ auto UpstreamID = UpstreamDB->getReference(getDigest(PrimaryID));
+ if (LLVM_UNLIKELY(!UpstreamID))
+ return UpstreamID.takeError();
+
+ Expected<std::optional<ObjectHandle>> UpstreamNode =
+ UpstreamDB->load(*UpstreamID);
+ if (!UpstreamNode)
+ return UpstreamNode.takeError();
+ if (!*UpstreamNode)
+ return std::nullopt;
+
+ if (Error E = FIPolicy == FaultInPolicy::SingleNode
+ ? importSingleNode(PrimaryID, **UpstreamNode)
+ : importFullTree(PrimaryID, **UpstreamNode))
+ return std::move(E);
+ return load(PrimaryID);
+}
diff --git a/llvm/lib/CAS/OnDiskKeyValueDB.cpp b/llvm/lib/CAS/OnDiskKeyValueDB.cpp
new file mode 100644
index 0000000..2186071
--- /dev/null
+++ b/llvm/lib/CAS/OnDiskKeyValueDB.cpp
@@ -0,0 +1,113 @@
+//===- OnDiskKeyValueDB.cpp -------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This file implements OnDiskKeyValueDB, an ondisk key value database.
+///
+/// The KeyValue database file is named `actions.<version>` inside the CAS
+/// directory. The database stores a mapping between a fixed-sized key and a
+/// fixed-sized value, where the size of key and value can be configured when
+/// opening the database.
+///
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CAS/OnDiskKeyValueDB.h"
+#include "OnDiskCommon.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/Alignment.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Errc.h"
+#include "llvm/Support/Path.h"
+
+using namespace llvm;
+using namespace llvm::cas;
+using namespace llvm::cas::ondisk;
+
+static constexpr StringLiteral ActionCacheFile = "actions.";
+
+Expected<ArrayRef<char>> OnDiskKeyValueDB::put(ArrayRef<uint8_t> Key,
+ ArrayRef<char> Value) {
+ if (LLVM_UNLIKELY(Value.size() != ValueSize))
+ return createStringError(errc::invalid_argument,
+ "expected value size of " + itostr(ValueSize) +
+ ", got: " + itostr(Value.size()));
+ assert(Value.size() == ValueSize);
+ auto ActionP = Cache.insertLazy(
+ Key, [&](FileOffset TentativeOffset,
+ OnDiskTrieRawHashMap::ValueProxy TentativeValue) {
+ assert(TentativeValue.Data.size() == ValueSize);
+ llvm::copy(Value, TentativeValue.Data.data());
+ });
+ if (LLVM_UNLIKELY(!ActionP))
+ return ActionP.takeError();
+ return (*ActionP)->Data;
+}
+
+Expected<std::optional<ArrayRef<char>>>
+OnDiskKeyValueDB::get(ArrayRef<uint8_t> Key) {
+ // Check the result cache.
+ OnDiskTrieRawHashMap::ConstOnDiskPtr ActionP = Cache.find(Key);
+ if (!ActionP)
+ return std::nullopt;
+ assert(isAddrAligned(Align(8), ActionP->Data.data()));
+ return ActionP->Data;
+}
+
+Expected<std::unique_ptr<OnDiskKeyValueDB>>
+OnDiskKeyValueDB::open(StringRef Path, StringRef HashName, unsigned KeySize,
+ StringRef ValueName, size_t ValueSize) {
+ if (std::error_code EC = sys::fs::create_directories(Path))
+ return createFileError(Path, EC);
+
+ SmallString<256> CachePath(Path);
+ sys::path::append(CachePath, ActionCacheFile + CASFormatVersion);
+ constexpr uint64_t MB = 1024ull * 1024ull;
+ constexpr uint64_t GB = 1024ull * 1024ull * 1024ull;
+
+ uint64_t MaxFileSize = GB;
+ auto CustomSize = getOverriddenMaxMappingSize();
+ if (!CustomSize)
+ return CustomSize.takeError();
+ if (*CustomSize)
+ MaxFileSize = **CustomSize;
+
+ std::optional<OnDiskTrieRawHashMap> ActionCache;
+ if (Error E = OnDiskTrieRawHashMap::create(
+ CachePath,
+ "llvm.actioncache[" + HashName + "->" + ValueName + "]",
+ KeySize * 8,
+ /*DataSize=*/ValueSize, MaxFileSize, /*MinFileSize=*/MB)
+ .moveInto(ActionCache))
+ return std::move(E);
+
+ return std::unique_ptr<OnDiskKeyValueDB>(
+ new OnDiskKeyValueDB(ValueSize, std::move(*ActionCache)));
+}
+
+Error OnDiskKeyValueDB::validate(CheckValueT CheckValue) const {
+ return Cache.validate(
+ [&](FileOffset Offset,
+ OnDiskTrieRawHashMap::ConstValueProxy Record) -> Error {
+ auto formatError = [&](Twine Msg) {
+ return createStringError(
+ llvm::errc::illegal_byte_sequence,
+ "bad cache value at 0x" +
+ utohexstr((unsigned)Offset.get(), /*LowerCase=*/true) + ": " +
+ Msg.str());
+ };
+
+ if (Record.Data.size() != ValueSize)
+ return formatError("wrong cache value size");
+ if (!isAddrAligned(Align(8), Record.Data.data()))
+ return formatError("wrong cache value alignment");
+ if (CheckValue)
+ return CheckValue(Offset, Record.Data);
+ return Error::success();
+ });
+}
diff --git a/llvm/lib/CodeGen/MIRFSDiscriminator.cpp b/llvm/lib/CodeGen/MIRFSDiscriminator.cpp
index d988a2a..e37f784 100644
--- a/llvm/lib/CodeGen/MIRFSDiscriminator.cpp
+++ b/llvm/lib/CodeGen/MIRFSDiscriminator.cpp
@@ -15,6 +15,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
+#include "llvm/CodeGen/MIRFSDiscriminatorOptions.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/Function.h"
@@ -35,13 +36,10 @@ using namespace sampleprofutil;
// TODO(xur): Remove this option and related code once we make true as the
// default.
-namespace llvm {
-cl::opt<bool> ImprovedFSDiscriminator(
+cl::opt<bool> llvm::ImprovedFSDiscriminator(
"improved-fs-discriminator", cl::Hidden, cl::init(false),
cl::desc("New FS discriminators encoding (incompatible with the original "
"encoding)"));
-} // namespace llvm
-
char MIRAddFSDiscriminators::ID = 0;
INITIALIZE_PASS(MIRAddFSDiscriminators, DEBUG_TYPE,
diff --git a/llvm/lib/CodeGen/MIRSampleProfile.cpp b/llvm/lib/CodeGen/MIRSampleProfile.cpp
index 9bba50e8..d44f577 100644
--- a/llvm/lib/CodeGen/MIRSampleProfile.cpp
+++ b/llvm/lib/CodeGen/MIRSampleProfile.cpp
@@ -15,6 +15,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
+#include "llvm/CodeGen/MIRFSDiscriminatorOptions.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
#include "llvm/CodeGen/MachineDominators.h"
@@ -62,9 +63,6 @@ static cl::opt<bool> ViewBFIAfter("fs-viewbfi-after", cl::Hidden,
cl::init(false),
cl::desc("View BFI after MIR loader"));
-namespace llvm {
-extern cl::opt<bool> ImprovedFSDiscriminator;
-}
char MIRProfileLoaderPass::ID = 0;
INITIALIZE_PASS_BEGIN(MIRProfileLoaderPass, DEBUG_TYPE,
diff --git a/llvm/lib/DWARFLinker/Classic/DWARFLinker.cpp b/llvm/lib/DWARFLinker/Classic/DWARFLinker.cpp
index 8052773..8637b55 100644
--- a/llvm/lib/DWARFLinker/Classic/DWARFLinker.cpp
+++ b/llvm/lib/DWARFLinker/Classic/DWARFLinker.cpp
@@ -2427,11 +2427,13 @@ void DWARFLinker::DIECloner::generateLineTableForUnit(CompileUnit &Unit) {
uint64_t OrigStmtSeq = StmtSeq.get();
// 1. Get the original row index from the stmt list offset.
auto OrigRowIter = SeqOffToOrigRow.find(OrigStmtSeq);
+ const uint64_t InvalidOffset =
+ Unit.getOrigUnit().getFormParams().getDwarfMaxOffset();
// Check whether we have an output sequence for the StmtSeq offset.
// Some sequences are discarded by the DWARFLinker if they are invalid
// (empty).
if (OrigRowIter == SeqOffToOrigRow.end()) {
- StmtSeq.set(UINT64_MAX);
+ StmtSeq.set(InvalidOffset);
continue;
}
size_t OrigRowIndex = OrigRowIter->second;
@@ -2441,7 +2443,7 @@ void DWARFLinker::DIECloner::generateLineTableForUnit(CompileUnit &Unit) {
if (NewRowIter == OrigRowToNewRow.end()) {
// If the original row index is not found in the map, update the
// stmt_sequence attribute to the 'invalid offset' magic value.
- StmtSeq.set(UINT64_MAX);
+ StmtSeq.set(InvalidOffset);
continue;
}
diff --git a/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp b/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp
index fa39603..a326a01 100644
--- a/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp
+++ b/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp
@@ -320,12 +320,16 @@ static void convertFunctionLineTable(OutputAggregator &Out, CUInfo &CUI,
// Attempt to retrieve DW_AT_LLVM_stmt_sequence if present.
std::optional<uint64_t> StmtSeqOffset;
if (auto StmtSeqAttr = Die.find(llvm::dwarf::DW_AT_LLVM_stmt_sequence)) {
- // The `DW_AT_LLVM_stmt_sequence` attribute might be set to `UINT64_MAX`
- // when it refers to an empty line sequence. In such cases, the DWARF linker
- // will exclude the empty sequence from the final output and assign
- // `UINT64_MAX` to the `DW_AT_LLVM_stmt_sequence` attribute.
- uint64_t StmtSeqVal = dwarf::toSectionOffset(StmtSeqAttr, UINT64_MAX);
- if (StmtSeqVal != UINT64_MAX)
+ // The `DW_AT_LLVM_stmt_sequence` attribute might be set to an invalid
+ // sentinel value when it refers to an empty line sequence. In such cases,
+ // the DWARF linker will exclude the empty sequence from the final output
+ // and assign the sentinel value to the `DW_AT_LLVM_stmt_sequence`
+ // attribute. The sentinel value is UINT32_MAX for DWARF32 and UINT64_MAX
+ // for DWARF64.
+ const uint64_t InvalidOffset =
+ Die.getDwarfUnit()->getFormParams().getDwarfMaxOffset();
+ uint64_t StmtSeqVal = dwarf::toSectionOffset(StmtSeqAttr, InvalidOffset);
+ if (StmtSeqVal != InvalidOffset)
StmtSeqOffset = StmtSeqVal;
}
diff --git a/llvm/lib/ExecutionEngine/Orc/Core.cpp b/llvm/lib/ExecutionEngine/Orc/Core.cpp
index f47b7ec..8d413a3 100644
--- a/llvm/lib/ExecutionEngine/Orc/Core.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/Core.cpp
@@ -1173,39 +1173,7 @@ void JITDylib::dump(raw_ostream &OS) {
<< " pending queries: { ";
for (const auto &Q : KV.second.pendingQueries())
OS << Q.get() << " (" << Q->getRequiredState() << ") ";
- OS << "}\n Defining EDU: ";
- if (KV.second.DefiningEDU) {
- OS << KV.second.DefiningEDU.get() << " { ";
- for (auto &[Name, Flags] : KV.second.DefiningEDU->Symbols)
- OS << Name << " ";
- OS << "}\n";
- OS << " Dependencies:\n";
- if (!KV.second.DefiningEDU->Dependencies.empty()) {
- for (auto &[DepJD, Deps] : KV.second.DefiningEDU->Dependencies) {
- OS << " " << DepJD->getName() << ": [ ";
- for (auto &Dep : Deps)
- OS << Dep << " ";
- OS << "]\n";
- }
- } else
- OS << " none\n";
- } else
- OS << "none\n";
- OS << " Dependant EDUs:\n";
- if (!KV.second.DependantEDUs.empty()) {
- for (auto &DependantEDU : KV.second.DependantEDUs) {
- OS << " " << DependantEDU << ": "
- << DependantEDU->JD->getName() << " { ";
- for (auto &[Name, Flags] : DependantEDU->Symbols)
- OS << Name << " ";
- OS << "}\n";
- }
- } else
- OS << " none\n";
- assert((Symbols[KV.first].getState() != SymbolState::Ready ||
- (KV.second.pendingQueries().empty() && !KV.second.DefiningEDU &&
- !KV.second.DependantEDUs.empty())) &&
- "Stale materializing info entry");
+ OS << "}\n";
}
});
}
@@ -1967,9 +1935,6 @@ bool ExecutionSession::verifySessionState(Twine Phase) {
return runSessionLocked([&]() {
bool AllOk = true;
- // We'll collect these and verify them later to avoid redundant checks.
- DenseSet<JITDylib::EmissionDepUnit *> EDUsToCheck;
-
for (auto &JD : JDs) {
auto LogFailure = [&]() -> raw_fd_ostream & {
@@ -2063,86 +2028,6 @@ bool ExecutionSession::verifySessionState(Twine Phase) {
<< " has stale or misordered queries.\n";
}
}
-
- // If there's a DefiningEDU then check that...
- // 1. The JD matches.
- // 2. The symbol is in the EDU's Symbols map.
- // 3. The symbol table entry is in the Emitted state.
- if (MII.DefiningEDU) {
-
- EDUsToCheck.insert(MII.DefiningEDU.get());
-
- if (MII.DefiningEDU->JD != JD.get()) {
- LogFailure() << "symbol " << Sym
- << " has DefiningEDU with incorrect JD"
- << (llvm::is_contained(JDs, MII.DefiningEDU->JD)
- ? " (JD not currently in ExecutionSession"
- : "")
- << "\n";
- }
-
- if (SymItr->second.getState() != SymbolState::Emitted) {
- LogFailure()
- << "symbol " << Sym
- << " has DefiningEDU, but is not in Emitted state.\n";
- }
- }
-
- // Check that JDs for any DependantEDUs are also in the session --
- // that guarantees that we'll also visit them during this loop.
- for (auto &DepEDU : MII.DependantEDUs) {
- if (!llvm::is_contained(JDs, DepEDU->JD)) {
- LogFailure() << "symbol " << Sym << " has DependantEDU "
- << (void *)DepEDU << " with JD (" << DepEDU->JD
- << ") that isn't in ExecutionSession.\n";
- }
- }
- }
- }
- }
-
- // Check EDUs.
- for (auto *EDU : EDUsToCheck) {
- assert(EDU->JD->State == JITDylib::Open && "EDU->JD is not Open");
-
- auto LogFailure = [&]() -> raw_fd_ostream & {
- AllOk = false;
- auto &Stream = errs();
- Stream << "In EDU defining " << EDU->JD->getName() << ": { ";
- for (auto &[Sym, Flags] : EDU->Symbols)
- Stream << Sym << " ";
- Stream << "}, ";
- return Stream;
- };
-
- if (EDU->Symbols.empty())
- LogFailure() << "no symbols defined.\n";
- else {
- for (auto &[Sym, Flags] : EDU->Symbols) {
- if (!Sym)
- LogFailure() << "null symbol defined.\n";
- else {
- if (!EDU->JD->Symbols.count(SymbolStringPtr(Sym))) {
- LogFailure() << "symbol " << Sym
- << " isn't present in JD's symbol table.\n";
- }
- }
- }
- }
-
- for (auto &[DepJD, Symbols] : EDU->Dependencies) {
- if (!llvm::is_contained(JDs, DepJD)) {
- LogFailure() << "dependant symbols listed for JD that isn't in "
- "ExecutionSession.\n";
- } else {
- for (auto &DepSym : Symbols) {
- if (!DepJD->Symbols.count(SymbolStringPtr(DepSym))) {
- LogFailure()
- << "dependant symbol " << DepSym
- << " does not appear in symbol table for dependant JD "
- << DepJD->getName() << ".\n";
- }
- }
}
}
}
@@ -2917,359 +2802,64 @@ Error ExecutionSession::OL_notifyResolved(MaterializationResponsibility &MR,
return MR.JD.resolve(MR, Symbols);
}
-template <typename HandleNewDepFn>
-void ExecutionSession::propagateExtraEmitDeps(
- std::deque<JITDylib::EmissionDepUnit *> Worklist, EDUInfosMap &EDUInfos,
- HandleNewDepFn HandleNewDep) {
-
- // Iterate to a fixed-point to propagate extra-emit dependencies through the
- // EDU graph.
- while (!Worklist.empty()) {
- auto &EDU = *Worklist.front();
- Worklist.pop_front();
-
- assert(EDUInfos.count(&EDU) && "No info entry for EDU");
- auto &EDUInfo = EDUInfos[&EDU];
-
- // Propagate new dependencies to users.
- for (auto *UserEDU : EDUInfo.IntraEmitUsers) {
-
- // UserEDUInfo only present if UserEDU has its own users.
- JITDylib::EmissionDepUnitInfo *UserEDUInfo = nullptr;
- {
- auto UserEDUInfoItr = EDUInfos.find(UserEDU);
- if (UserEDUInfoItr != EDUInfos.end())
- UserEDUInfo = &UserEDUInfoItr->second;
- }
-
- for (auto &[DepJD, Deps] : EDUInfo.NewDeps) {
- auto &UserEDUDepsForJD = UserEDU->Dependencies[DepJD];
- DenseSet<NonOwningSymbolStringPtr> *UserEDUNewDepsForJD = nullptr;
- for (auto Dep : Deps) {
- if (UserEDUDepsForJD.insert(Dep).second) {
- HandleNewDep(*UserEDU, *DepJD, Dep);
- if (UserEDUInfo) {
- if (!UserEDUNewDepsForJD) {
- // If UserEDU has no new deps then it's not in the worklist
- // yet, so add it.
- if (UserEDUInfo->NewDeps.empty())
- Worklist.push_back(UserEDU);
- UserEDUNewDepsForJD = &UserEDUInfo->NewDeps[DepJD];
- }
- // Add (DepJD, Dep) to NewDeps.
- UserEDUNewDepsForJD->insert(Dep);
- }
- }
+WaitingOnGraph::ExternalState
+ExecutionSession::IL_getSymbolState(JITDylib *JD,
+ NonOwningSymbolStringPtr Name) {
+ if (JD->State != JITDylib::Open)
+ return WaitingOnGraph::ExternalState::Failed;
+
+ auto I = JD->Symbols.find_as(Name);
+
+ // FIXME: Can we eliminate this possibility if we support query binding?
+ if (I == JD->Symbols.end())
+ return WaitingOnGraph::ExternalState::Failed;
+
+ if (I->second.getFlags().hasError())
+ return WaitingOnGraph::ExternalState::Failed;
+
+ if (I->second.getState() == SymbolState::Ready)
+ return WaitingOnGraph::ExternalState::Ready;
+
+ return WaitingOnGraph::ExternalState::None;
+}
+
+template <typename UpdateSymbolFn, typename UpdateQueryFn>
+void ExecutionSession::IL_collectQueries(
+ JITDylib::AsynchronousSymbolQuerySet &Qs,
+ WaitingOnGraph::ContainerElementsMap &QualifiedSymbols,
+ UpdateSymbolFn &&UpdateSymbol, UpdateQueryFn &&UpdateQuery) {
+
+ for (auto &[JD, Symbols] : QualifiedSymbols) {
+ // IL_emit and JITDylib removal are synchronized by the session lock.
+ // Since JITDylib removal removes any contained nodes from the
+ // WaitingOnGraph, we should be able to assert that all nodes in the
+ // WaitingOnGraph have not been removed.
+ assert(JD->State == JITDylib::Open &&
+ "WaitingOnGraph includes definition in defunct JITDylib");
+ for (auto &Symbol : Symbols) {
+ // Update symbol table.
+ auto I = JD->Symbols.find_as(Symbol);
+ assert(I != JD->Symbols.end() &&
+ "Failed Symbol missing from JD symbol table");
+ auto &Entry = I->second;
+ UpdateSymbol(Entry);
+
+ // Collect queries.
+ auto J = JD->MaterializingInfos.find_as(Symbol);
+ if (J != JD->MaterializingInfos.end()) {
+ for (auto &Q : J->second.takeAllPendingQueries()) {
+ UpdateQuery(*Q, *JD, Symbol, Entry);
+ Qs.insert(std::move(Q));
}
+ JD->MaterializingInfos.erase(J);
}
}
-
- EDUInfo.NewDeps.clear();
- }
-}
-
-// Note: This method modifies the emitted set.
-ExecutionSession::EDUInfosMap ExecutionSession::simplifyDepGroups(
- MaterializationResponsibility &MR,
- ArrayRef<SymbolDependenceGroup> EmittedDeps) {
-
- auto &TargetJD = MR.getTargetJITDylib();
-
- // 1. Build initial EmissionDepUnit -> EmissionDepUnitInfo and
- // Symbol -> EmissionDepUnit mappings.
- DenseMap<JITDylib::EmissionDepUnit *, JITDylib::EmissionDepUnitInfo> EDUInfos;
- EDUInfos.reserve(EmittedDeps.size());
- DenseMap<NonOwningSymbolStringPtr, JITDylib::EmissionDepUnit *> EDUForSymbol;
- for (auto &DG : EmittedDeps) {
- assert(!DG.Symbols.empty() && "DepGroup does not cover any symbols");
-
- // Skip empty EDUs.
- if (DG.Dependencies.empty())
- continue;
-
- auto TmpEDU = std::make_shared<JITDylib::EmissionDepUnit>(TargetJD);
- auto &EDUInfo = EDUInfos[TmpEDU.get()];
- EDUInfo.EDU = std::move(TmpEDU);
- for (const auto &Symbol : DG.Symbols) {
- NonOwningSymbolStringPtr NonOwningSymbol(Symbol);
- assert(!EDUForSymbol.count(NonOwningSymbol) &&
- "Symbol should not appear in more than one SymbolDependenceGroup");
- assert(MR.getSymbols().count(Symbol) &&
- "Symbol in DepGroups not in the emitted set");
- auto NewlyEmittedItr = MR.getSymbols().find(Symbol);
- EDUInfo.EDU->Symbols[NonOwningSymbol] = NewlyEmittedItr->second;
- EDUForSymbol[NonOwningSymbol] = EDUInfo.EDU.get();
- }
- }
-
- // 2. Build a "residual" EDU to cover all symbols that have no dependencies.
- {
- DenseMap<NonOwningSymbolStringPtr, JITSymbolFlags> ResidualSymbolFlags;
- for (auto &[Sym, Flags] : MR.getSymbols()) {
- if (!EDUForSymbol.count(NonOwningSymbolStringPtr(Sym)))
- ResidualSymbolFlags[NonOwningSymbolStringPtr(Sym)] = Flags;
- }
- if (!ResidualSymbolFlags.empty()) {
- auto ResidualEDU = std::make_shared<JITDylib::EmissionDepUnit>(TargetJD);
- ResidualEDU->Symbols = std::move(ResidualSymbolFlags);
- auto &ResidualEDUInfo = EDUInfos[ResidualEDU.get()];
- ResidualEDUInfo.EDU = std::move(ResidualEDU);
-
- // If the residual EDU is the only one then bail out early.
- if (EDUInfos.size() == 1)
- return EDUInfos;
-
- // Otherwise add the residual EDU to the EDUForSymbol map.
- for (auto &[Sym, Flags] : ResidualEDUInfo.EDU->Symbols)
- EDUForSymbol[Sym] = ResidualEDUInfo.EDU.get();
- }
- }
-
-#ifndef NDEBUG
- assert(EDUForSymbol.size() == MR.getSymbols().size() &&
- "MR symbols not fully covered by EDUs?");
- for (auto &[Sym, Flags] : MR.getSymbols()) {
- assert(EDUForSymbol.count(NonOwningSymbolStringPtr(Sym)) &&
- "Sym in MR not covered by EDU");
- }
-#endif // NDEBUG
-
- // 3. Use the DepGroups array to build a graph of dependencies between
- // EmissionDepUnits in this finalization. We want to remove these
- // intra-finalization uses, propagating dependencies on symbols outside
- // this finalization. Add EDUs to the worklist.
- for (auto &DG : EmittedDeps) {
-
- // Skip SymbolDependenceGroups with no dependencies.
- if (DG.Dependencies.empty())
- continue;
-
- assert(EDUForSymbol.count(NonOwningSymbolStringPtr(*DG.Symbols.begin())) &&
- "No EDU for DG");
- auto &EDU =
- *EDUForSymbol.find(NonOwningSymbolStringPtr(*DG.Symbols.begin()))
- ->second;
-
- for (auto &[DepJD, Deps] : DG.Dependencies) {
- DenseSet<NonOwningSymbolStringPtr> NewDepsForJD;
-
- assert(!Deps.empty() && "Dependence set for DepJD is empty");
-
- if (DepJD != &TargetJD) {
- // DepJD is some other JITDylib.There can't be any intra-finalization
- // edges here, so just skip.
- for (auto &Dep : Deps)
- NewDepsForJD.insert(NonOwningSymbolStringPtr(Dep));
- } else {
- // DepJD is the Target JITDylib. Check for intra-finaliztaion edges,
- // skipping any and recording the intra-finalization use instead.
- for (auto &Dep : Deps) {
- NonOwningSymbolStringPtr NonOwningDep(Dep);
- auto I = EDUForSymbol.find(NonOwningDep);
- if (I == EDUForSymbol.end()) {
- if (!MR.getSymbols().count(Dep))
- NewDepsForJD.insert(NonOwningDep);
- continue;
- }
-
- if (I->second != &EDU)
- EDUInfos[I->second].IntraEmitUsers.insert(&EDU);
- }
- }
-
- if (!NewDepsForJD.empty())
- EDU.Dependencies[DepJD] = std::move(NewDepsForJD);
- }
- }
-
- // 4. Build the worklist.
- std::deque<JITDylib::EmissionDepUnit *> Worklist;
- for (auto &[EDU, EDUInfo] : EDUInfos) {
- // If this EDU has extra-finalization dependencies and intra-finalization
- // users then add it to the worklist.
- if (!EDU->Dependencies.empty()) {
- auto I = EDUInfos.find(EDU);
- if (I != EDUInfos.end()) {
- auto &EDUInfo = I->second;
- if (!EDUInfo.IntraEmitUsers.empty()) {
- EDUInfo.NewDeps = EDU->Dependencies;
- Worklist.push_back(EDU);
- }
- }
- }
- }
-
- // 4. Propagate dependencies through the EDU graph.
- propagateExtraEmitDeps(
- Worklist, EDUInfos,
- [](JITDylib::EmissionDepUnit &, JITDylib &, NonOwningSymbolStringPtr) {});
-
- return EDUInfos;
-}
-
-void ExecutionSession::IL_makeEDUReady(
- std::shared_ptr<JITDylib::EmissionDepUnit> EDU,
- JITDylib::AsynchronousSymbolQuerySet &Queries) {
-
- // The symbols for this EDU are ready.
- auto &JD = *EDU->JD;
-
- for (auto &[Sym, Flags] : EDU->Symbols) {
- assert(JD.Symbols.count(SymbolStringPtr(Sym)) &&
- "JD does not have an entry for Sym");
- auto &Entry = JD.Symbols[SymbolStringPtr(Sym)];
-
- assert(((Entry.getFlags().hasMaterializationSideEffectsOnly() &&
- Entry.getState() == SymbolState::Materializing) ||
- Entry.getState() == SymbolState::Resolved ||
- Entry.getState() == SymbolState::Emitted) &&
- "Emitting from state other than Resolved");
-
- Entry.setState(SymbolState::Ready);
-
- auto MII = JD.MaterializingInfos.find(SymbolStringPtr(Sym));
-
- // Check for pending queries.
- if (MII == JD.MaterializingInfos.end())
- continue;
- auto &MI = MII->second;
-
- for (auto &Q : MI.takeQueriesMeeting(SymbolState::Ready)) {
- Q->notifySymbolMetRequiredState(SymbolStringPtr(Sym), Entry.getSymbol());
- if (Q->isComplete())
- Queries.insert(Q);
- Q->removeQueryDependence(JD, SymbolStringPtr(Sym));
- }
-
- JD.MaterializingInfos.erase(MII);
- }
-
- JD.shrinkMaterializationInfoMemory();
-}
-
-void ExecutionSession::IL_makeEDUEmitted(
- std::shared_ptr<JITDylib::EmissionDepUnit> EDU,
- JITDylib::AsynchronousSymbolQuerySet &Queries) {
-
- // The symbols for this EDU are emitted, but not ready.
- auto &JD = *EDU->JD;
-
- for (auto &[Sym, Flags] : EDU->Symbols) {
- assert(JD.Symbols.count(SymbolStringPtr(Sym)) &&
- "JD does not have an entry for Sym");
- auto &Entry = JD.Symbols[SymbolStringPtr(Sym)];
-
- assert(((Entry.getFlags().hasMaterializationSideEffectsOnly() &&
- Entry.getState() == SymbolState::Materializing) ||
- Entry.getState() == SymbolState::Resolved ||
- Entry.getState() == SymbolState::Emitted) &&
- "Emitting from state other than Resolved");
-
- if (Entry.getState() == SymbolState::Emitted) {
- // This was already emitted, so we can skip the rest of this loop.
-#ifndef NDEBUG
- for (auto &[Sym, Flags] : EDU->Symbols) {
- assert(JD.Symbols.count(SymbolStringPtr(Sym)) &&
- "JD does not have an entry for Sym");
- auto &Entry = JD.Symbols[SymbolStringPtr(Sym)];
- assert(Entry.getState() == SymbolState::Emitted &&
- "Symbols for EDU in inconsistent state");
- assert(JD.MaterializingInfos.count(SymbolStringPtr(Sym)) &&
- "Emitted symbol has no MI");
- auto MI = JD.MaterializingInfos[SymbolStringPtr(Sym)];
- assert(MI.takeQueriesMeeting(SymbolState::Emitted).empty() &&
- "Already-emitted symbol has waiting-on-emitted queries");
- }
-#endif // NDEBUG
- break;
- }
-
- Entry.setState(SymbolState::Emitted);
- auto &MI = JD.MaterializingInfos[SymbolStringPtr(Sym)];
- MI.DefiningEDU = EDU;
-
- for (auto &Q : MI.takeQueriesMeeting(SymbolState::Emitted)) {
- Q->notifySymbolMetRequiredState(SymbolStringPtr(Sym), Entry.getSymbol());
- if (Q->isComplete())
- Queries.insert(Q);
- }
}
-
- for (auto &[DepJD, Deps] : EDU->Dependencies) {
- for (auto &Dep : Deps)
- DepJD->MaterializingInfos[SymbolStringPtr(Dep)].DependantEDUs.insert(
- EDU.get());
- }
-}
-
-/// Removes the given dependence from EDU. If EDU's dependence set becomes
-/// empty then this function adds an entry for it to the EDUInfos map.
-/// Returns true if a new EDUInfosMap entry is added.
-bool ExecutionSession::IL_removeEDUDependence(JITDylib::EmissionDepUnit &EDU,
- JITDylib &DepJD,
- NonOwningSymbolStringPtr DepSym,
- EDUInfosMap &EDUInfos) {
- assert(EDU.Dependencies.count(&DepJD) &&
- "JD does not appear in Dependencies of DependantEDU");
- assert(EDU.Dependencies[&DepJD].count(DepSym) &&
- "Symbol does not appear in Dependencies of DependantEDU");
- auto &JDDeps = EDU.Dependencies[&DepJD];
- JDDeps.erase(DepSym);
- if (JDDeps.empty()) {
- EDU.Dependencies.erase(&DepJD);
- if (EDU.Dependencies.empty()) {
- // If the dependencies set has become empty then EDU _may_ be ready
- // (we won't know for sure until we've propagated the extra-emit deps).
- // Create an EDUInfo for it (if it doesn't have one already) so that
- // it'll be visited after propagation.
- auto &DepEDUInfo = EDUInfos[&EDU];
- if (!DepEDUInfo.EDU) {
- assert(EDU.JD->Symbols.count(
- SymbolStringPtr(EDU.Symbols.begin()->first)) &&
- "Missing symbol entry for first symbol in EDU");
- auto DepEDUFirstMI = EDU.JD->MaterializingInfos.find(
- SymbolStringPtr(EDU.Symbols.begin()->first));
- assert(DepEDUFirstMI != EDU.JD->MaterializingInfos.end() &&
- "Missing MI for first symbol in DependantEDU");
- DepEDUInfo.EDU = DepEDUFirstMI->second.DefiningEDU;
- return true;
- }
- }
- }
- return false;
}
-Error ExecutionSession::makeJDClosedError(JITDylib::EmissionDepUnit &EDU,
- JITDylib &ClosedJD) {
- SymbolNameSet FailedSymbols;
- for (auto &[Sym, Flags] : EDU.Symbols)
- FailedSymbols.insert(SymbolStringPtr(Sym));
- SymbolDependenceMap BadDeps;
- for (auto &Dep : EDU.Dependencies[&ClosedJD])
- BadDeps[&ClosedJD].insert(SymbolStringPtr(Dep));
- return make_error<UnsatisfiedSymbolDependencies>(
- ClosedJD.getExecutionSession().getSymbolStringPool(), EDU.JD,
- std::move(FailedSymbols), std::move(BadDeps),
- ClosedJD.getName() + " is closed");
-}
-
-Error ExecutionSession::makeUnsatisfiedDepsError(JITDylib::EmissionDepUnit &EDU,
- JITDylib &BadJD,
- SymbolNameSet BadDeps) {
- SymbolNameSet FailedSymbols;
- for (auto &[Sym, Flags] : EDU.Symbols)
- FailedSymbols.insert(SymbolStringPtr(Sym));
- SymbolDependenceMap BadDepsMap;
- BadDepsMap[&BadJD] = std::move(BadDeps);
- return make_error<UnsatisfiedSymbolDependencies>(
- BadJD.getExecutionSession().getSymbolStringPool(), &BadJD,
- std::move(FailedSymbols), std::move(BadDepsMap),
- "dependencies removed or in error state");
-}
-
-Expected<JITDylib::AsynchronousSymbolQuerySet>
+Expected<ExecutionSession::EmitQueries>
ExecutionSession::IL_emit(MaterializationResponsibility &MR,
- EDUInfosMap EDUInfos) {
+ WaitingOnGraph::SimplifyResult SR) {
if (MR.RT->isDefunct())
return make_error<ResourceTrackerDefunct>(MR.RT);
@@ -3279,169 +2869,50 @@ ExecutionSession::IL_emit(MaterializationResponsibility &MR,
return make_error<StringError>("JITDylib " + TargetJD.getName() +
" is defunct",
inconvertibleErrorCode());
+
#ifdef EXPENSIVE_CHECKS
verifySessionState("entering ExecutionSession::IL_emit");
#endif
- // Walk all EDUs:
- // 1. Verifying that dependencies are available (not removed or in the error
- // state.
- // 2. Removing any dependencies that are already Ready.
- // 3. Lifting any EDUs for Emitted symbols into the EDUInfos map.
- // 4. Finding any dependant EDUs and lifting them into the EDUInfos map.
- std::deque<JITDylib::EmissionDepUnit *> Worklist;
- for (auto &[EDU, _] : EDUInfos)
- Worklist.push_back(EDU);
-
- for (auto *EDU : Worklist) {
- auto *EDUInfo = &EDUInfos[EDU];
-
- SmallVector<JITDylib *> DepJDsToRemove;
- for (auto &[DepJD, Deps] : EDU->Dependencies) {
- if (DepJD->State != JITDylib::Open)
- return makeJDClosedError(*EDU, *DepJD);
-
- SymbolNameSet BadDeps;
- SmallVector<NonOwningSymbolStringPtr> DepsToRemove;
- for (auto &Dep : Deps) {
- auto DepEntryItr = DepJD->Symbols.find(SymbolStringPtr(Dep));
-
- // If this dep has been removed or moved to the error state then add it
- // to the bad deps set. We aggregate these bad deps for more
- // comprehensive error messages.
- if (DepEntryItr == DepJD->Symbols.end() ||
- DepEntryItr->second.getFlags().hasError()) {
- BadDeps.insert(SymbolStringPtr(Dep));
- continue;
- }
-
- // If this dep isn't emitted yet then just add it to the NewDeps set to
- // be propagated.
- auto &DepEntry = DepEntryItr->second;
- if (DepEntry.getState() < SymbolState::Emitted) {
- EDUInfo->NewDeps[DepJD].insert(Dep);
- continue;
- }
-
- // This dep has been emitted, so add it to the list to be removed from
- // EDU.
- DepsToRemove.push_back(Dep);
-
- // If Dep is Ready then there's nothing further to do.
- if (DepEntry.getState() == SymbolState::Ready) {
- assert(!DepJD->MaterializingInfos.count(SymbolStringPtr(Dep)) &&
- "Unexpected MaterializationInfo attached to ready symbol");
- continue;
- }
+ auto ER = G.emit(std::move(SR),
+ [this](JITDylib *JD, NonOwningSymbolStringPtr Name) {
+ return IL_getSymbolState(JD, Name);
+ });
- // If we get here then Dep is Emitted. We need to look up its defining
- // EDU and add this EDU to the defining EDU's list of users (this means
- // creating an EDUInfos entry if the defining EDU doesn't have one
- // already).
- assert(DepJD->MaterializingInfos.count(SymbolStringPtr(Dep)) &&
- "Expected MaterializationInfo for emitted dependency");
- auto &DepMI = DepJD->MaterializingInfos[SymbolStringPtr(Dep)];
- assert(DepMI.DefiningEDU &&
- "Emitted symbol does not have a defining EDU");
- assert(DepMI.DependantEDUs.empty() &&
- "Already-emitted symbol has dependant EDUs?");
- auto &DepEDUInfo = EDUInfos[DepMI.DefiningEDU.get()];
- if (!DepEDUInfo.EDU) {
- // No EDUInfo yet -- build initial entry, and reset the EDUInfo
- // pointer, which we will have invalidated.
- EDUInfo = &EDUInfos[EDU];
- DepEDUInfo.EDU = DepMI.DefiningEDU;
- for (auto &[DepDepJD, DepDeps] : DepEDUInfo.EDU->Dependencies) {
- if (DepDepJD == &TargetJD) {
- for (auto &DepDep : DepDeps)
- if (!MR.getSymbols().count(SymbolStringPtr(DepDep)))
- DepEDUInfo.NewDeps[DepDepJD].insert(DepDep);
- } else
- DepEDUInfo.NewDeps[DepDepJD] = DepDeps;
- }
- }
- DepEDUInfo.IntraEmitUsers.insert(EDU);
- }
-
- // Some dependencies were removed or in an error state -- error out.
- if (!BadDeps.empty())
- return makeUnsatisfiedDepsError(*EDU, *DepJD, std::move(BadDeps));
-
- // Remove the emitted / ready deps from DepJD.
- for (auto &Dep : DepsToRemove)
- Deps.erase(Dep);
-
- // If there are no further deps in DepJD then flag it for removal too.
- if (Deps.empty())
- DepJDsToRemove.push_back(DepJD);
- }
+ EmitQueries EQ;
- // Remove any JDs whose dependence sets have become empty.
- for (auto &DepJD : DepJDsToRemove) {
- assert(EDU->Dependencies.count(DepJD) &&
- "Trying to remove non-existent dep entries");
- EDU->Dependencies.erase(DepJD);
- }
-
- // Now look for users of this EDU.
- for (auto &[Sym, Flags] : EDU->Symbols) {
- assert(TargetJD.Symbols.count(SymbolStringPtr(Sym)) &&
- "Sym not present in symbol table");
- assert((TargetJD.Symbols[SymbolStringPtr(Sym)].getState() ==
- SymbolState::Resolved ||
- TargetJD.Symbols[SymbolStringPtr(Sym)]
- .getFlags()
- .hasMaterializationSideEffectsOnly()) &&
- "Emitting symbol not in the resolved state");
- assert(!TargetJD.Symbols[SymbolStringPtr(Sym)].getFlags().hasError() &&
- "Symbol is already in an error state");
-
- auto MII = TargetJD.MaterializingInfos.find(SymbolStringPtr(Sym));
- if (MII == TargetJD.MaterializingInfos.end() ||
- MII->second.DependantEDUs.empty())
- continue;
-
- for (auto &DependantEDU : MII->second.DependantEDUs) {
- if (IL_removeEDUDependence(*DependantEDU, TargetJD, Sym, EDUInfos))
- EDUInfo = &EDUInfos[EDU];
- EDUInfo->IntraEmitUsers.insert(DependantEDU);
- }
- MII->second.DependantEDUs.clear();
- }
- }
-
- Worklist.clear();
- for (auto &[EDU, EDUInfo] : EDUInfos) {
- if (!EDUInfo.IntraEmitUsers.empty() && !EDU->Dependencies.empty()) {
- if (EDUInfo.NewDeps.empty())
- EDUInfo.NewDeps = EDU->Dependencies;
- Worklist.push_back(EDU);
- }
- }
-
- propagateExtraEmitDeps(
- Worklist, EDUInfos,
- [](JITDylib::EmissionDepUnit &EDU, JITDylib &JD,
- NonOwningSymbolStringPtr Sym) {
- JD.MaterializingInfos[SymbolStringPtr(Sym)].DependantEDUs.insert(&EDU);
- });
+ // Handle failed queries.
+ for (auto &SN : ER.Failed)
+ IL_collectQueries(
+ EQ.Failed, SN->defs(),
+ [](JITDylib::SymbolTableEntry &E) {
+ E.setFlags(E.getFlags() = JITSymbolFlags::HasError);
+ },
+ [&](AsynchronousSymbolQuery &Q, JITDylib &JD,
+ NonOwningSymbolStringPtr Name, JITDylib::SymbolTableEntry &E) {
+ auto &FS = EQ.FailedSymsForQuery[&Q];
+ if (!FS)
+ FS = std::make_shared<SymbolDependenceMap>();
+ (*FS)[&JD].insert(SymbolStringPtr(Name));
+ });
- JITDylib::AsynchronousSymbolQuerySet CompletedQueries;
+ for (auto &FQ : EQ.Failed)
+ FQ->detach();
- // Extract completed queries and lodge not-yet-ready EDUs in the
- // session.
- for (auto &[EDU, EDUInfo] : EDUInfos) {
- if (EDU->Dependencies.empty())
- IL_makeEDUReady(std::move(EDUInfo.EDU), CompletedQueries);
- else
- IL_makeEDUEmitted(std::move(EDUInfo.EDU), CompletedQueries);
- }
+ for (auto &SN : ER.Ready)
+ IL_collectQueries(
+ EQ.Updated, SN->defs(),
+ [](JITDylib::SymbolTableEntry &E) { E.setState(SymbolState::Ready); },
+ [](AsynchronousSymbolQuery &Q, JITDylib &JD,
+ NonOwningSymbolStringPtr Name, JITDylib::SymbolTableEntry &E) {
+ Q.notifySymbolMetRequiredState(SymbolStringPtr(Name), E.getSymbol());
+ });
#ifdef EXPENSIVE_CHECKS
verifySessionState("exiting ExecutionSession::IL_emit");
#endif
- return std::move(CompletedQueries);
+ return std::move(EQ);
}
Error ExecutionSession::OL_notifyEmitted(
@@ -3471,40 +2942,127 @@ Error ExecutionSession::OL_notifyEmitted(
}
#endif // NDEBUG
- auto EDUInfos = simplifyDepGroups(MR, DepGroups);
+ std::vector<std::unique_ptr<WaitingOnGraph::SuperNode>> SNs;
+ WaitingOnGraph::ContainerElementsMap Residual;
+ {
+ auto &JDResidual = Residual[&MR.getTargetJITDylib()];
+ for (auto &[Name, Flags] : MR.getSymbols())
+ JDResidual.insert(NonOwningSymbolStringPtr(Name));
+
+ for (auto &SDG : DepGroups) {
+ WaitingOnGraph::ContainerElementsMap Defs;
+ assert(!SDG.Symbols.empty());
+ auto &JDDefs = Defs[&MR.getTargetJITDylib()];
+ for (auto &Def : SDG.Symbols) {
+ JDDefs.insert(NonOwningSymbolStringPtr(Def));
+ JDResidual.erase(NonOwningSymbolStringPtr(Def));
+ }
+ WaitingOnGraph::ContainerElementsMap Deps;
+ if (!SDG.Dependencies.empty()) {
+ for (auto &[JD, Syms] : SDG.Dependencies) {
+ auto &JDDeps = Deps[JD];
+ for (auto &Dep : Syms)
+ JDDeps.insert(NonOwningSymbolStringPtr(Dep));
+ }
+ }
+ SNs.push_back(std::make_unique<WaitingOnGraph::SuperNode>(
+ std::move(Defs), std::move(Deps)));
+ }
+ if (!JDResidual.empty())
+ SNs.push_back(std::make_unique<WaitingOnGraph::SuperNode>(
+ std::move(Residual), WaitingOnGraph::ContainerElementsMap()));
+ }
+
+ auto SR = WaitingOnGraph::simplify(std::move(SNs));
LLVM_DEBUG({
dbgs() << " Simplified dependencies:\n";
- for (auto &[EDU, EDUInfo] : EDUInfos) {
- dbgs() << " Symbols: { ";
- for (auto &[Sym, Flags] : EDU->Symbols)
- dbgs() << Sym << " ";
- dbgs() << "}, Dependencies: { ";
- for (auto &[DepJD, Deps] : EDU->Dependencies) {
- dbgs() << "(" << DepJD->getName() << ", { ";
- for (auto &Dep : Deps)
- dbgs() << Dep << " ";
- dbgs() << "}) ";
+ for (auto &SN : SR.superNodes()) {
+
+ auto SortedLibs = [](WaitingOnGraph::ContainerElementsMap &C) {
+ std::vector<JITDylib *> JDs;
+ for (auto &[JD, _] : C)
+ JDs.push_back(JD);
+ llvm::sort(JDs, [](const JITDylib *LHS, const JITDylib *RHS) {
+ return LHS->getName() < RHS->getName();
+ });
+ return JDs;
+ };
+
+ auto SortedNames = [](WaitingOnGraph::ElementSet &Elems) {
+ std::vector<NonOwningSymbolStringPtr> Names(Elems.begin(), Elems.end());
+ llvm::sort(Names, [](const NonOwningSymbolStringPtr &LHS,
+ const NonOwningSymbolStringPtr &RHS) {
+ return *LHS < *RHS;
+ });
+ return Names;
+ };
+
+ dbgs() << " Defs: {";
+ for (auto *JD : SortedLibs(SN->defs())) {
+ dbgs() << " (" << JD->getName() << ", [";
+ for (auto &Sym : SortedNames(SN->defs()[JD]))
+ dbgs() << " " << Sym;
+ dbgs() << " ])";
+ }
+ dbgs() << " }, Deps: {";
+ for (auto *JD : SortedLibs(SN->deps())) {
+ dbgs() << " (" << JD->getName() << ", [";
+ for (auto &Sym : SortedNames(SN->deps()[JD]))
+ dbgs() << " " << Sym;
+ dbgs() << " ])";
}
- dbgs() << "}\n";
+ dbgs() << " }\n";
}
});
-
- auto CompletedQueries =
- runSessionLocked([&]() { return IL_emit(MR, EDUInfos); });
+ auto EmitQueries =
+ runSessionLocked([&]() { return IL_emit(MR, std::move(SR)); });
// On error bail out.
- if (!CompletedQueries)
- return CompletedQueries.takeError();
+ if (!EmitQueries)
+ return EmitQueries.takeError();
- MR.SymbolFlags.clear();
+ // Otherwise notify failed queries, and any updated queries that have been
+ // completed.
- // Otherwise notify all the completed queries.
- for (auto &Q : *CompletedQueries) {
- assert(Q->isComplete() && "Q is not complete");
- Q->handleComplete(*this);
+ // FIXME: Get rid of error return from notifyEmitted.
+ SymbolDependenceMap BadDeps;
+ {
+ for (auto &FQ : EmitQueries->Failed) {
+ FQ->detach();
+ assert(EmitQueries->FailedSymsForQuery.count(FQ.get()) &&
+ "Missing failed symbols for query");
+ auto FailedSyms = std::move(EmitQueries->FailedSymsForQuery[FQ.get()]);
+ for (auto &[JD, Syms] : *FailedSyms) {
+ auto &BadDepsForJD = BadDeps[JD];
+ for (auto &Sym : Syms)
+ BadDepsForJD.insert(Sym);
+ }
+ FQ->handleFailed(make_error<FailedToMaterialize>(getSymbolStringPool(),
+ std::move(FailedSyms)));
+ }
+ }
+
+ for (auto &UQ : EmitQueries->Updated)
+ if (UQ->isComplete())
+ UQ->handleComplete(*this);
+
+ // If there are any bad dependencies then return an error.
+ if (!BadDeps.empty()) {
+ SymbolNameSet BadNames;
+ // Note: The name set calculated here is bogus: it includes all symbols in
+ // the MR, not just the ones that failed. We want to remove the error
+ // return path from notifyEmitted anyway, so this is just a brief
+ // placeholder to maintain (roughly) the current error behavior.
+ for (auto &[Name, Flags] : MR.getSymbols())
+ BadNames.insert(Name);
+ MR.SymbolFlags.clear();
+ return make_error<UnsatisfiedSymbolDependencies>(
+ getSymbolStringPool(), &MR.getTargetJITDylib(), std::move(BadNames),
+ std::move(BadDeps), "dependencies removed or in error state");
}
+ MR.SymbolFlags.clear();
return Error::success();
}
@@ -3535,158 +3093,48 @@ ExecutionSession::IL_failSymbols(JITDylib &JD,
#endif
JITDylib::AsynchronousSymbolQuerySet FailedQueries;
- auto FailedSymbolsMap = std::make_shared<SymbolDependenceMap>();
- auto ExtractFailedQueries = [&](JITDylib::MaterializingInfo &MI) {
- JITDylib::AsynchronousSymbolQueryList ToDetach;
- for (auto &Q : MI.pendingQueries()) {
- // Add the query to the list to be failed and detach it.
- FailedQueries.insert(Q);
- ToDetach.push_back(Q);
+ auto Fail = [&](JITDylib *FailJD, NonOwningSymbolStringPtr FailSym) {
+ auto I = FailJD->Symbols.find_as(FailSym);
+ assert(I != FailJD->Symbols.end());
+ I->second.setFlags(I->second.getFlags() | JITSymbolFlags::HasError);
+ auto J = FailJD->MaterializingInfos.find_as(FailSym);
+ if (J != FailJD->MaterializingInfos.end()) {
+ for (auto &Q : J->second.takeAllPendingQueries())
+ FailedQueries.insert(std::move(Q));
+ FailJD->MaterializingInfos.erase(J);
}
- for (auto &Q : ToDetach)
- Q->detach();
- assert(!MI.hasQueriesPending() && "Queries still pending after detach");
};
- for (auto &Name : SymbolsToFail) {
- (*FailedSymbolsMap)[&JD].insert(Name);
-
- // Look up the symbol to fail.
- auto SymI = JD.Symbols.find(Name);
-
- // FIXME: Revisit this. We should be able to assert sequencing between
- // ResourceTracker removal and symbol failure.
- //
- // It's possible that this symbol has already been removed, e.g. if a
- // materialization failure happens concurrently with a ResourceTracker or
- // JITDylib removal. In that case we can safely skip this symbol and
- // continue.
- if (SymI == JD.Symbols.end())
- continue;
- auto &Sym = SymI->second;
-
- // If the symbol is already in the error state then we must have visited
- // it earlier.
- if (Sym.getFlags().hasError()) {
- assert(!JD.MaterializingInfos.count(Name) &&
- "Symbol in error state still has MaterializingInfo");
- continue;
- }
+ auto FailedSymbolsMap = std::make_shared<SymbolDependenceMap>();
- // Move the symbol into the error state.
- Sym.setFlags(Sym.getFlags() | JITSymbolFlags::HasError);
-
- // FIXME: Come up with a sane mapping of state to
- // presence-of-MaterializingInfo so that we can assert presence / absence
- // here, rather than testing it.
- auto MII = JD.MaterializingInfos.find(Name);
- if (MII == JD.MaterializingInfos.end())
- continue;
-
- auto &MI = MII->second;
-
- // Collect queries to be failed for this MII.
- ExtractFailedQueries(MI);
-
- if (MI.DefiningEDU) {
- // If there is a DefiningEDU for this symbol then remove this
- // symbol from it.
- assert(MI.DependantEDUs.empty() &&
- "Symbol with DefiningEDU should not have DependantEDUs");
- assert(Sym.getState() >= SymbolState::Emitted &&
- "Symbol has EDU, should have been emitted");
- assert(MI.DefiningEDU->Symbols.count(NonOwningSymbolStringPtr(Name)) &&
- "Symbol does not appear in its DefiningEDU");
- MI.DefiningEDU->Symbols.erase(NonOwningSymbolStringPtr(Name));
-
- // Remove this EDU from the dependants lists of its dependencies.
- for (auto &[DepJD, DepSyms] : MI.DefiningEDU->Dependencies) {
- for (auto DepSym : DepSyms) {
- assert(DepJD->Symbols.count(SymbolStringPtr(DepSym)) &&
- "DepSym not in DepJD");
- assert(DepJD->MaterializingInfos.count(SymbolStringPtr(DepSym)) &&
- "DepSym has not MaterializingInfo");
- auto &SymMI = DepJD->MaterializingInfos[SymbolStringPtr(DepSym)];
- assert(SymMI.DependantEDUs.count(MI.DefiningEDU.get()) &&
- "DefiningEDU missing from DependantEDUs list of dependency");
- SymMI.DependantEDUs.erase(MI.DefiningEDU.get());
- }
- }
+ {
+ auto &FailedSymsForJD = (*FailedSymbolsMap)[&JD];
+ for (auto &Sym : SymbolsToFail) {
+ FailedSymsForJD.insert(Sym);
+ Fail(&JD, NonOwningSymbolStringPtr(Sym));
+ }
+ }
- MI.DefiningEDU = nullptr;
- } else {
- // Otherwise if there are any EDUs waiting on this symbol then move
- // those symbols to the error state too, and deregister them from the
- // symbols that they depend on.
- // Note: We use a copy of DependantEDUs here since we'll be removing
- // from the original set as we go.
- for (auto &DependantEDU : MI.DependantEDUs) {
-
- // Remove DependantEDU from all of its users DependantEDUs lists.
- for (auto &[DepJD, DepSyms] : DependantEDU->Dependencies) {
- for (auto DepSym : DepSyms) {
- // Skip self-reference to avoid invalidating the MI.DependantEDUs
- // map. We'll clear this later.
- if (DepJD == &JD && DepSym == Name)
- continue;
- assert(DepJD->Symbols.count(SymbolStringPtr(DepSym)) &&
- "DepSym not in DepJD?");
- assert(DepJD->MaterializingInfos.count(SymbolStringPtr(DepSym)) &&
- "DependantEDU not registered with symbol it depends on");
- auto &SymMI = DepJD->MaterializingInfos[SymbolStringPtr(DepSym)];
- assert(SymMI.DependantEDUs.count(DependantEDU) &&
- "DependantEDU missing from DependantEDUs list");
- SymMI.DependantEDUs.erase(DependantEDU);
- }
- }
+ WaitingOnGraph::ContainerElementsMap ToFail;
+ auto &JDToFail = ToFail[&JD];
+ for (auto &Sym : SymbolsToFail)
+ JDToFail.insert(NonOwningSymbolStringPtr(Sym));
- // Move any symbols defined by DependantEDU into the error state and
- // fail any queries waiting on them.
- auto &DepJD = *DependantEDU->JD;
- auto DepEDUSymbols = std::move(DependantEDU->Symbols);
- for (auto &[DepName, Flags] : DepEDUSymbols) {
- auto DepSymItr = DepJD.Symbols.find(SymbolStringPtr(DepName));
- assert(DepSymItr != DepJD.Symbols.end() &&
- "Symbol not present in table");
- auto &DepSym = DepSymItr->second;
-
- assert(DepSym.getState() >= SymbolState::Emitted &&
- "Symbol has EDU, should have been emitted");
- assert(!DepSym.getFlags().hasError() &&
- "Symbol is already in the error state?");
- DepSym.setFlags(DepSym.getFlags() | JITSymbolFlags::HasError);
- (*FailedSymbolsMap)[&DepJD].insert(SymbolStringPtr(DepName));
-
- // This symbol has a defining EDU so its MaterializingInfo object must
- // exist.
- auto DepMIItr =
- DepJD.MaterializingInfos.find(SymbolStringPtr(DepName));
- assert(DepMIItr != DepJD.MaterializingInfos.end() &&
- "Symbol has defining EDU but not MaterializingInfo");
- auto &DepMI = DepMIItr->second;
- assert(DepMI.DefiningEDU.get() == DependantEDU &&
- "Bad EDU dependence edge");
- assert(DepMI.DependantEDUs.empty() &&
- "Symbol was emitted, should not have any DependantEDUs");
- ExtractFailedQueries(DepMI);
- DepJD.MaterializingInfos.erase(SymbolStringPtr(DepName));
- }
+ auto FailedSNs = G.fail(ToFail);
- DepJD.shrinkMaterializationInfoMemory();
+ for (auto &SN : FailedSNs) {
+ for (auto &[FailJD, Defs] : SN->defs()) {
+ auto &FailedSymsForFailJD = (*FailedSymbolsMap)[FailJD];
+ for (auto &Def : Defs) {
+ FailedSymsForFailJD.insert(SymbolStringPtr(Def));
+ Fail(FailJD, Def);
}
-
- MI.DependantEDUs.clear();
}
-
- assert(!MI.DefiningEDU && "DefiningEDU should have been reset");
- assert(MI.DependantEDUs.empty() &&
- "DependantEDUs should have been removed above");
- assert(!MI.hasQueriesPending() &&
- "Can not delete MaterializingInfo with queries pending");
- JD.MaterializingInfos.erase(Name);
}
- JD.shrinkMaterializationInfoMemory();
+ // Detach all failed queries.
+ for (auto &Q : FailedQueries)
+ Q->detach();
#ifdef EXPENSIVE_CHECKS
verifySessionState("exiting ExecutionSession::IL_failSymbols");
@@ -3721,9 +3169,11 @@ void ExecutionSession::OL_notifyFailed(MaterializationResponsibility &MR) {
return IL_failSymbols(MR.getTargetJITDylib(), SymbolsToFail);
});
- for (auto &Q : FailedQueries)
+ for (auto &Q : FailedQueries) {
+ Q->detach();
Q->handleFailed(
make_error<FailedToMaterialize>(getSymbolStringPool(), FailedSymbols));
+ }
}
Error ExecutionSession::OL_replace(MaterializationResponsibility &MR,
diff --git a/llvm/lib/ExecutionEngine/Orc/SimpleRemoteEPC.cpp b/llvm/lib/ExecutionEngine/Orc/SimpleRemoteEPC.cpp
index dec1df7..893523c 100644
--- a/llvm/lib/ExecutionEngine/Orc/SimpleRemoteEPC.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/SimpleRemoteEPC.cpp
@@ -448,7 +448,7 @@ Error SimpleRemoteEPC::handleHangup(SimpleRemoteEPCArgBytesVector ArgBytes) {
if (const char *ErrMsg = WFR.getOutOfBandError())
return make_error<StringError>(ErrMsg, inconvertibleErrorCode());
- detail::SPSSerializableError Info;
+ orc::shared::detail::SPSSerializableError Info;
SPSInputBuffer IB(WFR.data(), WFR.size());
if (!SPSArgList<SPSError>::deserialize(IB, Info))
return make_error<StringError>("Could not deserialize hangup info",
diff --git a/llvm/lib/IR/RuntimeLibcalls.cpp b/llvm/lib/IR/RuntimeLibcalls.cpp
index 7ea2e46..77af29b 100644
--- a/llvm/lib/IR/RuntimeLibcalls.cpp
+++ b/llvm/lib/IR/RuntimeLibcalls.cpp
@@ -21,9 +21,6 @@ using namespace RTLIB;
#define GET_SET_TARGET_RUNTIME_LIBCALL_SETS
#define DEFINE_GET_LOOKUP_LIBCALL_IMPL_NAME
#include "llvm/IR/RuntimeLibcalls.inc"
-#undef GET_INIT_RUNTIME_LIBCALL_NAMES
-#undef GET_SET_TARGET_RUNTIME_LIBCALL_SETS
-#undef DEFINE_GET_LOOKUP_LIBCALL_IMPL_NAME
/// Set default libcall names. If a target wants to opt-out of a libcall it
/// should be placed here.
diff --git a/llvm/lib/MC/MCParser/MasmParser.cpp b/llvm/lib/MC/MCParser/MasmParser.cpp
index 7f0ea78..d4901d9 100644
--- a/llvm/lib/MC/MCParser/MasmParser.cpp
+++ b/llvm/lib/MC/MCParser/MasmParser.cpp
@@ -2903,7 +2903,7 @@ bool MasmParser::parseIdentifier(StringRef &Res,
if (Position == StartOfStatement &&
StringSwitch<bool>(Res)
.CaseLower("echo", true)
- .CasesLower("ifdef", "ifndef", "elseifdef", "elseifndef", true)
+ .CasesLower({"ifdef", "ifndef", "elseifdef", "elseifndef"}, true)
.Default(false)) {
ExpandNextToken = DoNotExpandMacros;
}
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 12c600f..d5117da 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -700,7 +700,7 @@ static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) {
// csel instruction. If so, return the folded opcode, and the replacement
// register.
static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
- unsigned *NewVReg = nullptr) {
+ unsigned *NewReg = nullptr) {
VReg = removeCopies(MRI, VReg);
if (!Register::isVirtualRegister(VReg))
return 0;
@@ -708,8 +708,37 @@ static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg));
const MachineInstr *DefMI = MRI.getVRegDef(VReg);
unsigned Opc = 0;
- unsigned SrcOpNum = 0;
+ unsigned SrcReg = 0;
switch (DefMI->getOpcode()) {
+ case AArch64::SUBREG_TO_REG:
+ // Check for the following way to define an 64-bit immediate:
+ // %0:gpr32 = MOVi32imm 1
+ // %1:gpr64 = SUBREG_TO_REG 0, %0:gpr32, %subreg.sub_32
+ if (!DefMI->getOperand(1).isImm() || DefMI->getOperand(1).getImm() != 0)
+ return 0;
+ if (!DefMI->getOperand(2).isReg())
+ return 0;
+ if (!DefMI->getOperand(3).isImm() ||
+ DefMI->getOperand(3).getImm() != AArch64::sub_32)
+ return 0;
+ DefMI = MRI.getVRegDef(DefMI->getOperand(2).getReg());
+ if (DefMI->getOpcode() != AArch64::MOVi32imm)
+ return 0;
+ if (!DefMI->getOperand(1).isImm() || DefMI->getOperand(1).getImm() != 1)
+ return 0;
+ assert(Is64Bit);
+ SrcReg = AArch64::XZR;
+ Opc = AArch64::CSINCXr;
+ break;
+
+ case AArch64::MOVi32imm:
+ case AArch64::MOVi64imm:
+ if (!DefMI->getOperand(1).isImm() || DefMI->getOperand(1).getImm() != 1)
+ return 0;
+ SrcReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
+ Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
+ break;
+
case AArch64::ADDSXri:
case AArch64::ADDSWri:
// if NZCV is used, do not fold.
@@ -724,7 +753,7 @@ static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 ||
DefMI->getOperand(3).getImm() != 0)
return 0;
- SrcOpNum = 1;
+ SrcReg = DefMI->getOperand(1).getReg();
Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
break;
@@ -734,7 +763,7 @@ static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
return 0;
- SrcOpNum = 2;
+ SrcReg = DefMI->getOperand(2).getReg();
Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr;
break;
}
@@ -753,17 +782,17 @@ static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
return 0;
- SrcOpNum = 2;
+ SrcReg = DefMI->getOperand(2).getReg();
Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr;
break;
}
default:
return 0;
}
- assert(Opc && SrcOpNum && "Missing parameters");
+ assert(Opc && SrcReg && "Missing parameters");
- if (NewVReg)
- *NewVReg = DefMI->getOperand(SrcOpNum).getReg();
+ if (NewReg)
+ *NewReg = SrcReg;
return Opc;
}
@@ -964,28 +993,34 @@ void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB,
// Try folding simple instructions into the csel.
if (TryFold) {
- unsigned NewVReg = 0;
- unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewVReg);
+ unsigned NewReg = 0;
+ unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewReg);
if (FoldedOpc) {
// The folded opcodes csinc, csinc and csneg apply the operation to
// FalseReg, so we need to invert the condition.
CC = AArch64CC::getInvertedCondCode(CC);
TrueReg = FalseReg;
} else
- FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewVReg);
+ FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewReg);
// Fold the operation. Leave any dead instructions for DCE to clean up.
if (FoldedOpc) {
- FalseReg = NewVReg;
+ FalseReg = NewReg;
Opc = FoldedOpc;
- // The extends the live range of NewVReg.
- MRI.clearKillFlags(NewVReg);
+ // Extend the live range of NewReg.
+ MRI.clearKillFlags(NewReg);
}
}
// Pull all virtual register into the appropriate class.
MRI.constrainRegClass(TrueReg, RC);
- MRI.constrainRegClass(FalseReg, RC);
+ // FalseReg might be WZR or XZR if the folded operand is a literal 1.
+ assert(
+ (FalseReg.isVirtual() || FalseReg == AArch64::WZR ||
+ FalseReg == AArch64::XZR) &&
+ "FalseReg was folded into a non-virtual register other than WZR or XZR");
+ if (FalseReg.isVirtual())
+ MRI.constrainRegClass(FalseReg, RC);
// Insert the csel.
BuildMI(MBB, I, DL, get(Opc), DstReg)
@@ -5063,7 +5098,7 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
bool RenamableDest,
bool RenamableSrc) const {
if (AArch64::GPR32spRegClass.contains(DestReg) &&
- (AArch64::GPR32spRegClass.contains(SrcReg) || SrcReg == AArch64::WZR)) {
+ AArch64::GPR32spRegClass.contains(SrcReg)) {
if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) {
// If either operand is WSP, expand to ADD #0.
if (Subtarget.hasZeroCycleRegMoveGPR64() &&
@@ -5088,21 +5123,14 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
.addImm(0)
.addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
}
- } else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroingGPR32()) {
- BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg)
- .addImm(0)
- .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
} else if (Subtarget.hasZeroCycleRegMoveGPR64() &&
!Subtarget.hasZeroCycleRegMoveGPR32()) {
// Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move.
MCRegister DestRegX = RI.getMatchingSuperReg(DestReg, AArch64::sub_32,
&AArch64::GPR64spRegClass);
assert(DestRegX.isValid() && "Destination super-reg not valid");
- MCRegister SrcRegX =
- SrcReg == AArch64::WZR
- ? AArch64::XZR
- : RI.getMatchingSuperReg(SrcReg, AArch64::sub_32,
- &AArch64::GPR64spRegClass);
+ MCRegister SrcRegX = RI.getMatchingSuperReg(SrcReg, AArch64::sub_32,
+ &AArch64::GPR64spRegClass);
assert(SrcRegX.isValid() && "Source super-reg not valid");
// This instruction is reading and writing X registers. This may upset
// the register scavenger and machine verifier, so we need to indicate
@@ -5121,6 +5149,51 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
return;
}
+ // GPR32 zeroing
+ if (AArch64::GPR32spRegClass.contains(DestReg) && SrcReg == AArch64::WZR) {
+ if (Subtarget.hasZeroCycleZeroingGPR32()) {
+ BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg)
+ .addImm(0)
+ .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
+ } else {
+ BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg)
+ .addReg(AArch64::WZR)
+ .addReg(AArch64::WZR);
+ }
+ return;
+ }
+
+ if (AArch64::GPR64spRegClass.contains(DestReg) &&
+ AArch64::GPR64spRegClass.contains(SrcReg)) {
+ if (DestReg == AArch64::SP || SrcReg == AArch64::SP) {
+ // If either operand is SP, expand to ADD #0.
+ BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc))
+ .addImm(0)
+ .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
+ } else {
+ // Otherwise, expand to ORR XZR.
+ BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg)
+ .addReg(AArch64::XZR)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ }
+ return;
+ }
+
+ // GPR64 zeroing
+ if (AArch64::GPR64spRegClass.contains(DestReg) && SrcReg == AArch64::XZR) {
+ if (Subtarget.hasZeroCycleZeroingGPR64()) {
+ BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg)
+ .addImm(0)
+ .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
+ } else {
+ BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg)
+ .addReg(AArch64::XZR)
+ .addReg(AArch64::XZR);
+ }
+ return;
+ }
+
// Copy a Predicate register by ORRing with itself.
if (AArch64::PPRRegClass.contains(DestReg) &&
AArch64::PPRRegClass.contains(SrcReg)) {
@@ -5205,27 +5278,6 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
return;
}
- if (AArch64::GPR64spRegClass.contains(DestReg) &&
- (AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) {
- if (DestReg == AArch64::SP || SrcReg == AArch64::SP) {
- // If either operand is SP, expand to ADD #0.
- BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc))
- .addImm(0)
- .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
- } else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroingGPR64()) {
- BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg)
- .addImm(0)
- .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
- } else {
- // Otherwise, expand to ORR XZR.
- BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg)
- .addReg(AArch64::XZR)
- .addReg(SrcReg, getKillRegState(KillSrc));
- }
- return;
- }
-
// Copy a DDDD register quad by copying the individual sub-registers.
if (AArch64::DDDDRegClass.contains(DestReg) &&
AArch64::DDDDRegClass.contains(SrcReg)) {
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 479e345..e3370d3 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -5722,7 +5722,7 @@ InstructionCost AArch64TTIImpl::getPartialReductionCost(
}
// Add additional cost for the extends that would need to be inserted.
- return Cost + 4;
+ return Cost + 2;
}
InstructionCost
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 12915c73..97c2c9c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -3446,10 +3446,14 @@ bool AMDGPUInstructionSelector::selectBufferLoadLds(MachineInstr &MI) const {
: 0); // swz
MachineMemOperand *LoadMMO = *MI.memoperands_begin();
+ // Don't set the offset value here because the pointer points to the base of
+ // the buffer.
MachinePointerInfo LoadPtrI = LoadMMO->getPointerInfo();
- LoadPtrI.Offset = MI.getOperand(6 + OpOffset).getImm();
+
MachinePointerInfo StorePtrI = LoadPtrI;
- StorePtrI.V = nullptr;
+ LoadPtrI.V = PoisonValue::get(PointerType::get(MF->getFunction().getContext(),
+ AMDGPUAS::BUFFER_RESOURCE));
+ LoadPtrI.AddrSpace = AMDGPUAS::BUFFER_RESOURCE;
StorePtrI.AddrSpace = AMDGPUAS::LOCAL_ADDRESS;
auto F = LoadMMO->getFlags() &
@@ -3627,13 +3631,17 @@ bool AMDGPUInstructionSelector::selectGlobalLoadLds(MachineInstr &MI) const{
if (isSGPR(Addr))
MIB.addReg(VOffset);
- MIB.add(MI.getOperand(4)) // offset
- .add(MI.getOperand(5)); // cpol
+ MIB.add(MI.getOperand(4)); // offset
+
+ unsigned Aux = MI.getOperand(5).getImm();
+ MIB.addImm(Aux & ~AMDGPU::CPol::VIRTUAL_BITS); // cpol
MachineMemOperand *LoadMMO = *MI.memoperands_begin();
MachinePointerInfo LoadPtrI = LoadMMO->getPointerInfo();
LoadPtrI.Offset = MI.getOperand(4).getImm();
MachinePointerInfo StorePtrI = LoadPtrI;
+ LoadPtrI.V = PoisonValue::get(PointerType::get(MF->getFunction().getContext(),
+ AMDGPUAS::GLOBAL_ADDRESS));
LoadPtrI.AddrSpace = AMDGPUAS::GLOBAL_ADDRESS;
StorePtrI.AddrSpace = AMDGPUAS::LOCAL_ADDRESS;
auto F = LoadMMO->getFlags() &
diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h
index ecc2824..b7a92a0 100644
--- a/llvm/lib/Target/AMDGPU/SIDefines.h
+++ b/llvm/lib/Target/AMDGPU/SIDefines.h
@@ -423,6 +423,9 @@ enum CPol {
// Volatile (used to preserve/signal operation volatility for buffer
// operations not a real instruction bit)
VOLATILE = 1 << 31,
+ // The set of "cache policy" bits used for compiler features that
+ // do not correspond to handware features.
+ VIRTUAL_BITS = VOLATILE,
};
} // namespace CPol
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index a2841c11..a757421 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -1651,6 +1651,9 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.memVT = EVT::getIntegerVT(CI.getContext(), Width * 8);
Info.ptrVal = CI.getArgOperand(1);
Info.flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
+ auto *Aux = cast<ConstantInt>(CI.getArgOperand(CI.arg_size() - 1));
+ if (Aux->getZExtValue() & AMDGPU::CPol::VOLATILE)
+ Info.flags |= MachineMemOperand::MOVolatile;
return true;
}
case Intrinsic::amdgcn_ds_bvh_stack_rtn:
@@ -11219,8 +11222,8 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
MachinePointerInfo StorePtrI = LoadPtrI;
LoadPtrI.V = PoisonValue::get(
- PointerType::get(*DAG.getContext(), AMDGPUAS::GLOBAL_ADDRESS));
- LoadPtrI.AddrSpace = AMDGPUAS::GLOBAL_ADDRESS;
+ PointerType::get(*DAG.getContext(), AMDGPUAS::BUFFER_RESOURCE));
+ LoadPtrI.AddrSpace = AMDGPUAS::BUFFER_RESOURCE;
StorePtrI.AddrSpace = AMDGPUAS::LOCAL_ADDRESS;
auto F = LoadMMO->getFlags() &
@@ -11307,7 +11310,11 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
}
Ops.push_back(Op.getOperand(5)); // Offset
- Ops.push_back(Op.getOperand(6)); // CPol
+
+ unsigned Aux = Op.getConstantOperandVal(6);
+ Ops.push_back(DAG.getTargetConstant(Aux & ~AMDGPU::CPol::VIRTUAL_BITS, DL,
+ MVT::i32)); // CPol
+
Ops.push_back(M0Val.getValue(0)); // Chain
Ops.push_back(M0Val.getValue(1)); // Glue
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 50447f4..2ff2d2f 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -4032,28 +4032,31 @@ static unsigned getNewFMAInst(const GCNSubtarget &ST, unsigned Opc) {
}
}
+/// Helper struct for the implementation of 3-address conversion to communicate
+/// updates made to instruction operands.
+struct SIInstrInfo::ThreeAddressUpdates {
+ /// Other instruction whose def is no longer used by the converted
+ /// instruction.
+ MachineInstr *RemoveMIUse = nullptr;
+};
+
MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
LiveVariables *LV,
LiveIntervals *LIS) const {
MachineBasicBlock &MBB = *MI.getParent();
- unsigned Opc = MI.getOpcode();
+ ThreeAddressUpdates U;
+ MachineInstr *NewMI = convertToThreeAddressImpl(MI, U);
- // Handle MFMA.
- int NewMFMAOpc = AMDGPU::getMFMAEarlyClobberOp(Opc);
- if (NewMFMAOpc != -1) {
- MachineInstrBuilder MIB =
- BuildMI(MBB, MI, MI.getDebugLoc(), get(NewMFMAOpc));
- for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I)
- MIB.add(MI.getOperand(I));
- updateLiveVariables(LV, MI, *MIB);
+ if (NewMI) {
+ updateLiveVariables(LV, MI, *NewMI);
if (LIS) {
- LIS->ReplaceMachineInstrInMaps(MI, *MIB);
+ LIS->ReplaceMachineInstrInMaps(MI, *NewMI);
// SlotIndex of defs needs to be updated when converting to early-clobber
- MachineOperand &Def = MIB->getOperand(0);
+ MachineOperand &Def = NewMI->getOperand(0);
if (Def.isEarlyClobber() && Def.isReg() &&
LIS->hasInterval(Def.getReg())) {
- SlotIndex OldIndex = LIS->getInstructionIndex(*MIB).getRegSlot(false);
- SlotIndex NewIndex = LIS->getInstructionIndex(*MIB).getRegSlot(true);
+ SlotIndex OldIndex = LIS->getInstructionIndex(*NewMI).getRegSlot(false);
+ SlotIndex NewIndex = LIS->getInstructionIndex(*NewMI).getRegSlot(true);
auto &LI = LIS->getInterval(Def.getReg());
auto UpdateDefIndex = [&](LiveRange &LR) {
auto *S = LR.find(OldIndex);
@@ -4068,6 +4071,58 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
UpdateDefIndex(SR);
}
}
+ }
+
+ if (U.RemoveMIUse) {
+ MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ // The only user is the instruction which will be killed.
+ Register DefReg = U.RemoveMIUse->getOperand(0).getReg();
+
+ if (MRI.hasOneNonDBGUse(DefReg)) {
+ // We cannot just remove the DefMI here, calling pass will crash.
+ U.RemoveMIUse->setDesc(get(AMDGPU::IMPLICIT_DEF));
+ U.RemoveMIUse->getOperand(0).setIsDead(true);
+ for (unsigned I = U.RemoveMIUse->getNumOperands() - 1; I != 0; --I)
+ U.RemoveMIUse->removeOperand(I);
+ if (LV)
+ LV->getVarInfo(DefReg).AliveBlocks.clear();
+ }
+
+ if (LIS) {
+ LiveInterval &DefLI = LIS->getInterval(DefReg);
+
+ // We cannot delete the original instruction here, so hack out the use
+ // in the original instruction with a dummy register so we can use
+ // shrinkToUses to deal with any multi-use edge cases. Other targets do
+ // not have the complexity of deleting a use to consider here.
+ Register DummyReg = MRI.cloneVirtualRegister(DefReg);
+ for (MachineOperand &MIOp : MI.uses()) {
+ if (MIOp.isReg() && MIOp.getReg() == DefReg) {
+ MIOp.setIsUndef(true);
+ MIOp.setReg(DummyReg);
+ }
+ }
+
+ LIS->shrinkToUses(&DefLI);
+ }
+ }
+
+ return NewMI;
+}
+
+MachineInstr *
+SIInstrInfo::convertToThreeAddressImpl(MachineInstr &MI,
+ ThreeAddressUpdates &U) const {
+ MachineBasicBlock &MBB = *MI.getParent();
+ unsigned Opc = MI.getOpcode();
+
+ // Handle MFMA.
+ int NewMFMAOpc = AMDGPU::getMFMAEarlyClobberOp(Opc);
+ if (NewMFMAOpc != -1) {
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MI, MI.getDebugLoc(), get(NewMFMAOpc));
+ for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I)
+ MIB.add(MI.getOperand(I));
return MIB;
}
@@ -4077,11 +4132,6 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
.setMIFlags(MI.getFlags());
for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I)
MIB->addOperand(MI.getOperand(I));
-
- updateLiveVariables(LV, MI, *MIB);
- if (LIS)
- LIS->ReplaceMachineInstrInMaps(MI, *MIB);
-
return MIB;
}
@@ -4152,39 +4202,6 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
(ST.getConstantBusLimit(Opc) > 1 || !Src0->isReg() ||
!RI.isSGPRReg(MBB.getParent()->getRegInfo(), Src0->getReg()))) {
MachineInstr *DefMI;
- const auto killDef = [&]() -> void {
- MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
- // The only user is the instruction which will be killed.
- Register DefReg = DefMI->getOperand(0).getReg();
-
- if (MRI.hasOneNonDBGUse(DefReg)) {
- // We cannot just remove the DefMI here, calling pass will crash.
- DefMI->setDesc(get(AMDGPU::IMPLICIT_DEF));
- DefMI->getOperand(0).setIsDead(true);
- for (unsigned I = DefMI->getNumOperands() - 1; I != 0; --I)
- DefMI->removeOperand(I);
- if (LV)
- LV->getVarInfo(DefReg).AliveBlocks.clear();
- }
-
- if (LIS) {
- LiveInterval &DefLI = LIS->getInterval(DefReg);
-
- // We cannot delete the original instruction here, so hack out the use
- // in the original instruction with a dummy register so we can use
- // shrinkToUses to deal with any multi-use edge cases. Other targets do
- // not have the complexity of deleting a use to consider here.
- Register DummyReg = MRI.cloneVirtualRegister(DefReg);
- for (MachineOperand &MIOp : MI.uses()) {
- if (MIOp.isReg() && MIOp.getReg() == DefReg) {
- MIOp.setIsUndef(true);
- MIOp.setReg(DummyReg);
- }
- }
-
- LIS->shrinkToUses(&DefLI);
- }
- };
int64_t Imm;
if (!Src0Literal && getFoldableImm(Src2, Imm, &DefMI)) {
@@ -4196,10 +4213,7 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
.add(*Src1)
.addImm(Imm)
.setMIFlags(MI.getFlags());
- updateLiveVariables(LV, MI, *MIB);
- if (LIS)
- LIS->ReplaceMachineInstrInMaps(MI, *MIB);
- killDef();
+ U.RemoveMIUse = DefMI;
return MIB;
}
}
@@ -4212,11 +4226,7 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
.addImm(Imm)
.add(*Src2)
.setMIFlags(MI.getFlags());
- updateLiveVariables(LV, MI, *MIB);
-
- if (LIS)
- LIS->ReplaceMachineInstrInMaps(MI, *MIB);
- killDef();
+ U.RemoveMIUse = DefMI;
return MIB;
}
}
@@ -4235,12 +4245,7 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
.addImm(Imm)
.add(*Src2)
.setMIFlags(MI.getFlags());
- updateLiveVariables(LV, MI, *MIB);
-
- if (LIS)
- LIS->ReplaceMachineInstrInMaps(MI, *MIB);
- if (DefMI)
- killDef();
+ U.RemoveMIUse = DefMI;
return MIB;
}
}
@@ -4269,9 +4274,6 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
.setMIFlags(MI.getFlags());
if (AMDGPU::hasNamedOperand(NewOpc, AMDGPU::OpName::op_sel))
MIB.addImm(OpSel ? OpSel->getImm() : 0);
- updateLiveVariables(LV, MI, *MIB);
- if (LIS)
- LIS->ReplaceMachineInstrInMaps(MI, *MIB);
return MIB;
}
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index df27ec1..e1d7a07 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -88,6 +88,8 @@ private:
};
class SIInstrInfo final : public AMDGPUGenInstrInfo {
+ struct ThreeAddressUpdates;
+
private:
const SIRegisterInfo RI;
const GCNSubtarget &ST;
@@ -190,6 +192,9 @@ private:
bool resultDependsOnExec(const MachineInstr &MI) const;
+ MachineInstr *convertToThreeAddressImpl(MachineInstr &MI,
+ ThreeAddressUpdates &Updates) const;
+
protected:
/// If the specific machine instruction is a instruction that moves/copies
/// value from one register to another register return destination and source
diff --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
index 362ef14..bdbc000 100644
--- a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
@@ -25,6 +25,7 @@
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/MemoryModelRelaxationAnnotations.h"
#include "llvm/IR/PassManager.h"
+#include "llvm/Support/AMDGPUAddrSpace.h"
#include "llvm/Support/AtomicOrdering.h"
#include "llvm/TargetParser/TargetParser.h"
@@ -277,6 +278,12 @@ public:
/// rmw operation, "std::nullopt" otherwise.
std::optional<SIMemOpInfo>
getAtomicCmpxchgOrRmwInfo(const MachineBasicBlock::iterator &MI) const;
+
+ /// \returns DMA to LDS info if \p MI is as a direct-to/from-LDS load/store,
+ /// along with an indication of whether this is a load or store. If it is not
+ /// a direct-to-LDS operation, returns std::nullopt.
+ std::optional<SIMemOpInfo>
+ getLDSDMAInfo(const MachineBasicBlock::iterator &MI) const;
};
class SICacheControl {
@@ -703,6 +710,9 @@ private:
/// instructions are added/deleted or \p MI is modified, false otherwise.
bool expandAtomicCmpxchgOrRmw(const SIMemOpInfo &MOI,
MachineBasicBlock::iterator &MI);
+ /// Expands LDS DMA operation \p MI. Returns true if instructions are
+ /// added/deleted or \p MI is modified, false otherwise.
+ bool expandLDSDMA(const SIMemOpInfo &MOI, MachineBasicBlock::iterator &MI);
public:
SIMemoryLegalizer(const MachineModuleInfo &MMI) : MMI(MMI) {};
@@ -832,6 +842,9 @@ SIAtomicAddrSpace SIMemOpAccess::toSIAtomicAddrSpace(unsigned AS) const {
return SIAtomicAddrSpace::SCRATCH;
if (AS == AMDGPUAS::REGION_ADDRESS)
return SIAtomicAddrSpace::GDS;
+ if (AS == AMDGPUAS::BUFFER_FAT_POINTER || AS == AMDGPUAS::BUFFER_RESOURCE ||
+ AS == AMDGPUAS::BUFFER_STRIDED_POINTER)
+ return SIAtomicAddrSpace::GLOBAL;
return SIAtomicAddrSpace::OTHER;
}
@@ -987,6 +1000,16 @@ std::optional<SIMemOpInfo> SIMemOpAccess::getAtomicCmpxchgOrRmwInfo(
return constructFromMIWithMMO(MI);
}
+std::optional<SIMemOpInfo>
+SIMemOpAccess::getLDSDMAInfo(const MachineBasicBlock::iterator &MI) const {
+ assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic);
+
+ if (!SIInstrInfo::isLDSDMA(*MI))
+ return std::nullopt;
+
+ return constructFromMIWithMMO(MI);
+}
+
SICacheControl::SICacheControl(const GCNSubtarget &ST) : ST(ST) {
TII = ST.getInstrInfo();
IV = getIsaVersion(ST.getCPU());
@@ -1099,7 +1122,7 @@ bool SIGfx6CacheControl::enableVolatileAndOrNonTemporal(
// Only handle load and store, not atomic read-modify-write insructions. The
// latter use glc to indicate if the atomic returns a result and so must not
// be used for cache control.
- assert(MI->mayLoad() ^ MI->mayStore());
+ assert((MI->mayLoad() ^ MI->mayStore()) || SIInstrInfo::isLDSDMA(*MI));
// Only update load and store, not LLVM IR atomic read-modify-write
// instructions. The latter are always marked as volatile so cannot sensibly
@@ -1429,7 +1452,7 @@ bool SIGfx90ACacheControl::enableVolatileAndOrNonTemporal(
// Only handle load and store, not atomic read-modify-write insructions. The
// latter use glc to indicate if the atomic returns a result and so must not
// be used for cache control.
- assert(MI->mayLoad() ^ MI->mayStore());
+ assert((MI->mayLoad() ^ MI->mayStore()) || SIInstrInfo::isLDSDMA(*MI));
// Only update load and store, not LLVM IR atomic read-modify-write
// instructions. The latter are always marked as volatile so cannot sensibly
@@ -1733,7 +1756,7 @@ bool SIGfx940CacheControl::enableVolatileAndOrNonTemporal(
// Only handle load and store, not atomic read-modify-write insructions. The
// latter use glc to indicate if the atomic returns a result and so must not
// be used for cache control.
- assert(MI->mayLoad() ^ MI->mayStore());
+ assert((MI->mayLoad() ^ MI->mayStore()) || SIInstrInfo::isLDSDMA(*MI));
// Only update load and store, not LLVM IR atomic read-modify-write
// instructions. The latter are always marked as volatile so cannot sensibly
@@ -1968,7 +1991,7 @@ bool SIGfx10CacheControl::enableVolatileAndOrNonTemporal(
// Only handle load and store, not atomic read-modify-write insructions. The
// latter use glc to indicate if the atomic returns a result and so must not
// be used for cache control.
- assert(MI->mayLoad() ^ MI->mayStore());
+ assert((MI->mayLoad() ^ MI->mayStore()) || SIInstrInfo::isLDSDMA(*MI));
// Only update load and store, not LLVM IR atomic read-modify-write
// instructions. The latter are always marked as volatile so cannot sensibly
@@ -2266,7 +2289,7 @@ bool SIGfx11CacheControl::enableVolatileAndOrNonTemporal(
// Only handle load and store, not atomic read-modify-write insructions. The
// latter use glc to indicate if the atomic returns a result and so must not
// be used for cache control.
- assert(MI->mayLoad() ^ MI->mayStore());
+ assert((MI->mayLoad() ^ MI->mayStore()) || SIInstrInfo::isLDSDMA(*MI));
// Only update load and store, not LLVM IR atomic read-modify-write
// instructions. The latter are always marked as volatile so cannot sensibly
@@ -2611,7 +2634,7 @@ bool SIGfx12CacheControl::enableVolatileAndOrNonTemporal(
bool IsVolatile, bool IsNonTemporal, bool IsLastUse = false) const {
// Only handle load and store, not atomic read-modify-write instructions.
- assert(MI->mayLoad() ^ MI->mayStore());
+ assert((MI->mayLoad() ^ MI->mayStore()) || SIInstrInfo::isLDSDMA(*MI));
// Only update load and store, not LLVM IR atomic read-modify-write
// instructions. The latter are always marked as volatile so cannot sensibly
@@ -2934,6 +2957,23 @@ bool SIMemoryLegalizer::expandAtomicCmpxchgOrRmw(const SIMemOpInfo &MOI,
return Changed;
}
+bool SIMemoryLegalizer::expandLDSDMA(const SIMemOpInfo &MOI,
+ MachineBasicBlock::iterator &MI) {
+ assert(MI->mayLoad() && MI->mayStore());
+
+ // The volatility or nontemporal-ness of the operation is a
+ // function of the global memory, not the LDS.
+ SIMemOp OpKind =
+ SIInstrInfo::mayWriteLDSThroughDMA(*MI) ? SIMemOp::LOAD : SIMemOp::STORE;
+
+ // Handle volatile and/or nontemporal markers on direct-to-LDS loads and
+ // stores. The operation is treated as a volatile/nontemporal store
+ // to its second argument.
+ return CC->enableVolatileAndOrNonTemporal(
+ MI, MOI.getInstrAddrSpace(), OpKind, MOI.isVolatile(),
+ MOI.isNonTemporal(), MOI.isLastUse());
+}
+
bool SIMemoryLegalizerLegacy::runOnMachineFunction(MachineFunction &MF) {
const MachineModuleInfo &MMI =
getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
@@ -2985,14 +3025,17 @@ bool SIMemoryLegalizer::run(MachineFunction &MF) {
if (!(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic))
continue;
- if (const auto &MOI = MOA.getLoadInfo(MI))
+ if (const auto &MOI = MOA.getLoadInfo(MI)) {
Changed |= expandLoad(*MOI, MI);
- else if (const auto &MOI = MOA.getStoreInfo(MI)) {
+ } else if (const auto &MOI = MOA.getStoreInfo(MI)) {
Changed |= expandStore(*MOI, MI);
- } else if (const auto &MOI = MOA.getAtomicFenceInfo(MI))
+ } else if (const auto &MOI = MOA.getLDSDMAInfo(MI)) {
+ Changed |= expandLDSDMA(*MOI, MI);
+ } else if (const auto &MOI = MOA.getAtomicFenceInfo(MI)) {
Changed |= expandAtomicFence(*MOI, MI);
- else if (const auto &MOI = MOA.getAtomicCmpxchgOrRmwInfo(MI))
+ } else if (const auto &MOI = MOA.getAtomicCmpxchgOrRmwInfo(MI)) {
Changed |= expandAtomicCmpxchgOrRmw(*MOI, MI);
+ }
}
}
diff --git a/llvm/lib/Target/BPF/BTFDebug.cpp b/llvm/lib/Target/BPF/BTFDebug.cpp
index ba4b489..9b5fc9d 100644
--- a/llvm/lib/Target/BPF/BTFDebug.cpp
+++ b/llvm/lib/Target/BPF/BTFDebug.cpp
@@ -14,6 +14,7 @@
#include "BPF.h"
#include "BPFCORE.h"
#include "MCTargetDesc/BPFMCTargetDesc.h"
+#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
@@ -23,6 +24,7 @@
#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/LineIterator.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
@@ -301,21 +303,59 @@ void BTFTypeStruct::completeType(BTFDebug &BDebug) {
BTFType.NameOff = BDebug.addString(STy->getName());
+ if (STy->getTag() == dwarf::DW_TAG_variant_part) {
+ // Variant parts might have a discriminator, which has its own memory
+ // location, and variants, which share the memory location afterwards. LLVM
+ // DI doesn't consider discriminator as an element and instead keeps
+ // it as a separate reference.
+ // To keep BTF simple, let's represent the structure as an union with
+ // discriminator as the first element.
+ // The offsets inside variant types are already handled correctly in the
+ // DI.
+ const auto *DTy = STy->getDiscriminator();
+ if (DTy) {
+ struct BTF::BTFMember Discriminator;
+
+ Discriminator.NameOff = BDebug.addString(DTy->getName());
+ Discriminator.Offset = DTy->getOffsetInBits();
+ const auto *BaseTy = DTy->getBaseType();
+ Discriminator.Type = BDebug.getTypeId(BaseTy);
+
+ Members.push_back(Discriminator);
+ }
+ }
+
// Add struct/union members.
const DINodeArray Elements = STy->getElements();
for (const auto *Element : Elements) {
struct BTF::BTFMember BTFMember;
- const auto *DDTy = cast<DIDerivedType>(Element);
- BTFMember.NameOff = BDebug.addString(DDTy->getName());
- if (HasBitField) {
- uint8_t BitFieldSize = DDTy->isBitField() ? DDTy->getSizeInBits() : 0;
- BTFMember.Offset = BitFieldSize << 24 | DDTy->getOffsetInBits();
- } else {
- BTFMember.Offset = DDTy->getOffsetInBits();
+ switch (Element->getTag()) {
+ case dwarf::DW_TAG_member: {
+ const auto *DDTy = cast<DIDerivedType>(Element);
+
+ BTFMember.NameOff = BDebug.addString(DDTy->getName());
+ if (HasBitField) {
+ uint8_t BitFieldSize = DDTy->isBitField() ? DDTy->getSizeInBits() : 0;
+ BTFMember.Offset = BitFieldSize << 24 | DDTy->getOffsetInBits();
+ } else {
+ BTFMember.Offset = DDTy->getOffsetInBits();
+ }
+ const auto *BaseTy = tryRemoveAtomicType(DDTy->getBaseType());
+ BTFMember.Type = BDebug.getTypeId(BaseTy);
+ break;
+ }
+ case dwarf::DW_TAG_variant_part: {
+ const auto *DCTy = dyn_cast<DICompositeType>(Element);
+
+ BTFMember.NameOff = BDebug.addString(DCTy->getName());
+ BTFMember.Offset = DCTy->getOffsetInBits();
+ BTFMember.Type = BDebug.getTypeId(DCTy);
+ break;
+ }
+ default:
+ llvm_unreachable("Unexpected DI tag of a struct/union element");
}
- const auto *BaseTy = tryRemoveAtomicType(DDTy->getBaseType());
- BTFMember.Type = BDebug.getTypeId(BaseTy);
Members.push_back(BTFMember);
}
}
@@ -672,16 +712,28 @@ void BTFDebug::visitStructType(const DICompositeType *CTy, bool IsStruct,
uint32_t &TypeId) {
const DINodeArray Elements = CTy->getElements();
uint32_t VLen = Elements.size();
+ // Variant parts might have a discriminator. LLVM DI doesn't consider it as
+ // an element and instead keeps it as a separate reference. But we represent
+ // it as an element in BTF.
+ if (CTy->getTag() == dwarf::DW_TAG_variant_part) {
+ const auto *DTy = CTy->getDiscriminator();
+ if (DTy) {
+ visitTypeEntry(DTy);
+ VLen++;
+ }
+ }
if (VLen > BTF::MAX_VLEN)
return;
// Check whether we have any bitfield members or not
bool HasBitField = false;
for (const auto *Element : Elements) {
- auto E = cast<DIDerivedType>(Element);
- if (E->isBitField()) {
- HasBitField = true;
- break;
+ if (Element->getTag() == dwarf::DW_TAG_member) {
+ auto E = cast<DIDerivedType>(Element);
+ if (E->isBitField()) {
+ HasBitField = true;
+ break;
+ }
}
}
@@ -696,9 +748,22 @@ void BTFDebug::visitStructType(const DICompositeType *CTy, bool IsStruct,
// Visit all struct members.
int FieldNo = 0;
for (const auto *Element : Elements) {
- const auto Elem = cast<DIDerivedType>(Element);
- visitTypeEntry(Elem);
- processDeclAnnotations(Elem->getAnnotations(), TypeId, FieldNo);
+ switch (Element->getTag()) {
+ case dwarf::DW_TAG_member: {
+ const auto Elem = cast<DIDerivedType>(Element);
+ visitTypeEntry(Elem);
+ processDeclAnnotations(Elem->getAnnotations(), TypeId, FieldNo);
+ break;
+ }
+ case dwarf::DW_TAG_variant_part: {
+ const auto Elem = cast<DICompositeType>(Element);
+ visitTypeEntry(Elem);
+ processDeclAnnotations(Elem->getAnnotations(), TypeId, FieldNo);
+ break;
+ }
+ default:
+ llvm_unreachable("Unexpected DI tag of a struct/union element");
+ }
FieldNo++;
}
}
@@ -781,16 +846,25 @@ void BTFDebug::visitFwdDeclType(const DICompositeType *CTy, bool IsUnion,
void BTFDebug::visitCompositeType(const DICompositeType *CTy,
uint32_t &TypeId) {
auto Tag = CTy->getTag();
- if (Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type) {
+ switch (Tag) {
+ case dwarf::DW_TAG_structure_type:
+ case dwarf::DW_TAG_union_type:
+ case dwarf::DW_TAG_variant_part:
// Handle forward declaration differently as it does not have members.
if (CTy->isForwardDecl())
visitFwdDeclType(CTy, Tag == dwarf::DW_TAG_union_type, TypeId);
else
visitStructType(CTy, Tag == dwarf::DW_TAG_structure_type, TypeId);
- } else if (Tag == dwarf::DW_TAG_array_type)
+ break;
+ case dwarf::DW_TAG_array_type:
visitArrayType(CTy, TypeId);
- else if (Tag == dwarf::DW_TAG_enumeration_type)
+ break;
+ case dwarf::DW_TAG_enumeration_type:
visitEnumType(CTy, TypeId);
+ break;
+ default:
+ llvm_unreachable("Unexpected DI tag of a composite type");
+ }
}
bool BTFDebug::IsForwardDeclCandidate(const DIType *Base) {
diff --git a/llvm/lib/Target/Hexagon/CMakeLists.txt b/llvm/lib/Target/Hexagon/CMakeLists.txt
index d758260..1a5f096 100644
--- a/llvm/lib/Target/Hexagon/CMakeLists.txt
+++ b/llvm/lib/Target/Hexagon/CMakeLists.txt
@@ -54,6 +54,7 @@ add_llvm_target(HexagonCodeGen
HexagonOptAddrMode.cpp
HexagonOptimizeSZextends.cpp
HexagonPeephole.cpp
+ HexagonQFPOptimizer.cpp
HexagonRDFOpt.cpp
HexagonRegisterInfo.cpp
HexagonSelectionDAGInfo.cpp
diff --git a/llvm/lib/Target/Hexagon/Hexagon.h b/llvm/lib/Target/Hexagon/Hexagon.h
index 109aba5..422ab20 100644
--- a/llvm/lib/Target/Hexagon/Hexagon.h
+++ b/llvm/lib/Target/Hexagon/Hexagon.h
@@ -67,6 +67,8 @@ void initializeHexagonPeepholePass(PassRegistry &);
void initializeHexagonSplitConst32AndConst64Pass(PassRegistry &);
void initializeHexagonVectorPrintPass(PassRegistry &);
+void initializeHexagonQFPOptimizerPass(PassRegistry &);
+
Pass *createHexagonLoopIdiomPass();
Pass *createHexagonVectorLoopCarriedReuseLegacyPass();
@@ -112,6 +114,7 @@ FunctionPass *createHexagonVectorCombineLegacyPass();
FunctionPass *createHexagonVectorPrint();
FunctionPass *createHexagonVExtract();
FunctionPass *createHexagonExpandCondsets();
+FunctionPass *createHexagonQFPOptimizer();
} // end namespace llvm;
diff --git a/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp b/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp
index 4ddbe7a..ff876f6 100644
--- a/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp
@@ -920,6 +920,10 @@ void HexagonGenInsert::collectInBlock(MachineBasicBlock *B,
// successors have been processed.
RegisterSet BlockDefs, InsDefs;
for (MachineInstr &MI : *B) {
+ // Stop if the map size is too large.
+ if (IFMap.size() >= MaxIFMSize)
+ break;
+
InsDefs.clear();
getInstrDefs(&MI, InsDefs);
// Leave those alone. They are more transparent than "insert".
@@ -942,8 +946,8 @@ void HexagonGenInsert::collectInBlock(MachineBasicBlock *B,
findRecordInsertForms(VR, AVs);
// Stop if the map size is too large.
- if (IFMap.size() > MaxIFMSize)
- return;
+ if (IFMap.size() >= MaxIFMSize)
+ break;
}
}
diff --git a/llvm/lib/Target/Hexagon/HexagonQFPOptimizer.cpp b/llvm/lib/Target/Hexagon/HexagonQFPOptimizer.cpp
new file mode 100644
index 0000000..479ac90
--- /dev/null
+++ b/llvm/lib/Target/Hexagon/HexagonQFPOptimizer.cpp
@@ -0,0 +1,334 @@
+//===----- HexagonQFPOptimizer.cpp - Qualcomm-FP to IEEE-FP conversions
+// optimizer ------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Basic infrastructure for optimizing intermediate conversion instructions
+// generated while performing vector floating point operations.
+// Currently run at the starting of the code generation for Hexagon, cleans
+// up redundant conversion instructions and replaces the uses of conversion
+// with appropriate machine operand. Liveness is preserved after this pass.
+//
+// @note: The redundant conversion instructions are not eliminated in this pass.
+// In this pass, we are only trying to replace the uses of conversion
+// instructions with its appropriate QFP instruction. We are leaving the job to
+// Dead instruction Elimination pass to remove redundant conversion
+// instructions.
+//
+// Brief overview of working of this QFP optimizer.
+// This version of Hexagon QFP optimizer basically iterates over each
+// instruction, checks whether if it belongs to hexagon floating point HVX
+// arithmetic instruction category(Add, Sub, Mul). And then it finds the unique
+// definition for the machine operands corresponding to the instruction.
+//
+// Example:
+// MachineInstruction *MI be the HVX vadd instruction
+// MI -> $v0 = V6_vadd_sf $v1, $v2
+// MachineOperand *DefMI1 = MRI->getVRegDef(MI->getOperand(1).getReg());
+// MachineOperand *DefMI2 = MRI->getVRegDef(MI->getOperand(2).getReg());
+//
+// In the above example, DefMI1 and DefMI2 gives the unique definitions
+// corresponding to the operands($v1 and &v2 respectively) of instruction MI.
+//
+// If both of the definitions are not conversion instructions(V6_vconv_sf_qf32,
+// V6_vconv_hf_qf16), then it will skip optimizing the current instruction and
+// iterates over next instruction.
+//
+// If one the definitions is conversion instruction then our pass will replace
+// the arithmetic instruction with its corresponding mix variant.
+// In the above example, if $v1 is conversion instruction
+// DefMI1 -> $v1 = V6_vconv_sf_qf32 $v3
+// After Transformation:
+// MI -> $v0 = V6_vadd_qf32_mix $v3, $v2 ($v1 is replaced with $v3)
+//
+// If both the definitions are conversion instructions then the instruction will
+// be replaced with its qf variant
+// In the above example, if $v1 and $v2 are conversion instructions
+// DefMI1 -> $v1 = V6_vconv_sf_qf32 $v3
+// DefMI2 -> $v2 = V6_vconv_sf_qf32 $v4
+// After Transformation:
+// MI -> $v0 = V6_vadd_qf32 $v3, $v4 ($v1 is replaced with $v3, $v2 is replaced
+// with $v4)
+//
+// Currently, in this pass, we are not handling the case when the definitions
+// are PHI inst.
+//
+//===----------------------------------------------------------------------===//
+#include <unordered_set>
+#define HEXAGON_QFP_OPTIMIZER "QFP optimizer pass"
+
+#include "Hexagon.h"
+#include "HexagonInstrInfo.h"
+#include "HexagonSubtarget.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <map>
+#include <vector>
+
+#define DEBUG_TYPE "hexagon-qfp-optimizer"
+
+using namespace llvm;
+
+cl::opt<bool>
+ DisableQFOptimizer("disable-qfp-opt", cl::init(false),
+ cl::desc("Disable optimization of Qfloat operations."));
+
+namespace {
+const std::map<unsigned short, unsigned short> QFPInstMap{
+ {Hexagon::V6_vadd_hf, Hexagon::V6_vadd_qf16_mix},
+ {Hexagon::V6_vadd_qf16_mix, Hexagon::V6_vadd_qf16},
+ {Hexagon::V6_vadd_sf, Hexagon::V6_vadd_qf32_mix},
+ {Hexagon::V6_vadd_qf32_mix, Hexagon::V6_vadd_qf32},
+ {Hexagon::V6_vsub_hf, Hexagon::V6_vsub_qf16_mix},
+ {Hexagon::V6_vsub_qf16_mix, Hexagon::V6_vsub_qf16},
+ {Hexagon::V6_vsub_sf, Hexagon::V6_vsub_qf32_mix},
+ {Hexagon::V6_vsub_qf32_mix, Hexagon::V6_vsub_qf32},
+ {Hexagon::V6_vmpy_qf16_hf, Hexagon::V6_vmpy_qf16_mix_hf},
+ {Hexagon::V6_vmpy_qf16_mix_hf, Hexagon::V6_vmpy_qf16},
+ {Hexagon::V6_vmpy_qf32_hf, Hexagon::V6_vmpy_qf32_mix_hf},
+ {Hexagon::V6_vmpy_qf32_mix_hf, Hexagon::V6_vmpy_qf32_qf16},
+ {Hexagon::V6_vmpy_qf32_sf, Hexagon::V6_vmpy_qf32}};
+} // namespace
+
+namespace llvm {
+
+FunctionPass *createHexagonQFPOptimizer();
+void initializeHexagonQFPOptimizerPass(PassRegistry &);
+
+} // namespace llvm
+
+namespace {
+
+struct HexagonQFPOptimizer : public MachineFunctionPass {
+public:
+ static char ID;
+
+ HexagonQFPOptimizer() : MachineFunctionPass(ID) {}
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ bool optimizeQfp(MachineInstr *MI, MachineBasicBlock *MBB);
+
+ StringRef getPassName() const override { return HEXAGON_QFP_OPTIMIZER; }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+private:
+ const HexagonSubtarget *HST = nullptr;
+ const HexagonInstrInfo *HII = nullptr;
+ const MachineRegisterInfo *MRI = nullptr;
+};
+
+char HexagonQFPOptimizer::ID = 0;
+} // namespace
+
+INITIALIZE_PASS(HexagonQFPOptimizer, "hexagon-qfp-optimizer",
+ HEXAGON_QFP_OPTIMIZER, false, false)
+
+FunctionPass *llvm::createHexagonQFPOptimizer() {
+ return new HexagonQFPOptimizer();
+}
+
+bool HexagonQFPOptimizer::optimizeQfp(MachineInstr *MI,
+ MachineBasicBlock *MBB) {
+
+ // Early exit:
+ // - if instruction is invalid or has too few operands (QFP ops need 2 sources
+ // + 1 dest),
+ // - or does not have a transformation mapping.
+ if (MI->getNumOperands() < 3)
+ return false;
+ auto It = QFPInstMap.find(MI->getOpcode());
+ if (It == QFPInstMap.end())
+ return false;
+ unsigned short InstTy = It->second;
+
+ unsigned Op0F = 0;
+ unsigned Op1F = 0;
+ // Get the reaching defs of MI, DefMI1 and DefMI2
+ MachineInstr *DefMI1 = nullptr;
+ MachineInstr *DefMI2 = nullptr;
+
+ if (MI->getOperand(1).isReg())
+ DefMI1 = MRI->getVRegDef(MI->getOperand(1).getReg());
+ if (MI->getOperand(2).isReg())
+ DefMI2 = MRI->getVRegDef(MI->getOperand(2).getReg());
+ if (!DefMI1 || !DefMI2)
+ return false;
+
+ MachineOperand &Res = MI->getOperand(0);
+ MachineInstr *Inst1 = nullptr;
+ MachineInstr *Inst2 = nullptr;
+ LLVM_DEBUG(dbgs() << "\n[Reaching Defs of operands]: "; DefMI1->dump();
+ DefMI2->dump());
+
+ // Get the reaching defs of DefMI
+ if (DefMI1->getNumOperands() > 1 && DefMI1->getOperand(1).isReg() &&
+ DefMI1->getOperand(1).getReg().isVirtual())
+ Inst1 = MRI->getVRegDef(DefMI1->getOperand(1).getReg());
+
+ if (DefMI2->getNumOperands() > 1 && DefMI2->getOperand(1).isReg() &&
+ DefMI2->getOperand(1).getReg().isVirtual())
+ Inst2 = MRI->getVRegDef(DefMI2->getOperand(1).getReg());
+
+ unsigned Def1OP = DefMI1->getOpcode();
+ unsigned Def2OP = DefMI2->getOpcode();
+
+ MachineInstrBuilder MIB;
+ // Case 1: Both reaching defs of MI are qf to sf/hf conversions
+ if ((Def1OP == Hexagon::V6_vconv_sf_qf32 &&
+ Def2OP == Hexagon::V6_vconv_sf_qf32) ||
+ (Def1OP == Hexagon::V6_vconv_hf_qf16 &&
+ Def2OP == Hexagon::V6_vconv_hf_qf16)) {
+
+ // If the reaching defs of DefMI are W register type, we return
+ if ((Inst1 && Inst1->getNumOperands() > 0 && Inst1->getOperand(0).isReg() &&
+ MRI->getRegClass(Inst1->getOperand(0).getReg()) ==
+ &Hexagon::HvxWRRegClass) ||
+ (Inst2 && Inst2->getNumOperands() > 0 && Inst2->getOperand(0).isReg() &&
+ MRI->getRegClass(Inst2->getOperand(0).getReg()) ==
+ &Hexagon::HvxWRRegClass))
+ return false;
+
+ // Analyze the use operands of the conversion to get their KILL status
+ MachineOperand &Src1 = DefMI1->getOperand(1);
+ MachineOperand &Src2 = DefMI2->getOperand(1);
+
+ Op0F = getKillRegState(Src1.isKill());
+ Src1.setIsKill(false);
+
+ Op1F = getKillRegState(Src2.isKill());
+ Src2.setIsKill(false);
+
+ if (MI->getOpcode() != Hexagon::V6_vmpy_qf32_sf) {
+ auto OuterIt = QFPInstMap.find(MI->getOpcode());
+ if (OuterIt == QFPInstMap.end())
+ return false;
+ auto InnerIt = QFPInstMap.find(OuterIt->second);
+ if (InnerIt == QFPInstMap.end())
+ return false;
+ InstTy = InnerIt->second;
+ }
+
+ MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), HII->get(InstTy), Res.getReg())
+ .addReg(Src1.getReg(), Op0F, Src1.getSubReg())
+ .addReg(Src2.getReg(), Op1F, Src2.getSubReg());
+ LLVM_DEBUG(dbgs() << "\n[Inserting]: "; MIB.getInstr()->dump());
+ return true;
+
+ // Case 2: Left operand is conversion to sf/hf
+ } else if (((Def1OP == Hexagon::V6_vconv_sf_qf32 &&
+ Def2OP != Hexagon::V6_vconv_sf_qf32) ||
+ (Def1OP == Hexagon::V6_vconv_hf_qf16 &&
+ Def2OP != Hexagon::V6_vconv_hf_qf16)) &&
+ !DefMI2->isPHI() &&
+ (MI->getOpcode() != Hexagon::V6_vmpy_qf32_sf)) {
+
+ if (Inst1 && MRI->getRegClass(Inst1->getOperand(0).getReg()) ==
+ &Hexagon::HvxWRRegClass)
+ return false;
+
+ MachineOperand &Src1 = DefMI1->getOperand(1);
+ MachineOperand &Src2 = MI->getOperand(2);
+
+ Op0F = getKillRegState(Src1.isKill());
+ Src1.setIsKill(false);
+ Op1F = getKillRegState(Src2.isKill());
+ MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), HII->get(InstTy), Res.getReg())
+ .addReg(Src1.getReg(), Op0F, Src1.getSubReg())
+ .addReg(Src2.getReg(), Op1F, Src2.getSubReg());
+ LLVM_DEBUG(dbgs() << "\n[Inserting]: "; MIB.getInstr()->dump());
+ return true;
+
+ // Case 2: Left operand is conversion to sf/hf
+ } else if (((Def1OP != Hexagon::V6_vconv_sf_qf32 &&
+ Def2OP == Hexagon::V6_vconv_sf_qf32) ||
+ (Def1OP != Hexagon::V6_vconv_hf_qf16 &&
+ Def2OP == Hexagon::V6_vconv_hf_qf16)) &&
+ !DefMI1->isPHI() &&
+ (MI->getOpcode() != Hexagon::V6_vmpy_qf32_sf)) {
+ // The second operand of original instruction is converted.
+ // In "mix" instructions, "qf" operand is always the first operand.
+
+ // Caveat: vsub is not commutative w.r.t operands.
+ if (InstTy == Hexagon::V6_vsub_qf16_mix ||
+ InstTy == Hexagon::V6_vsub_qf32_mix)
+ return false;
+
+ if (Inst2 && MRI->getRegClass(Inst2->getOperand(0).getReg()) ==
+ &Hexagon::HvxWRRegClass)
+ return false;
+
+ MachineOperand &Src1 = MI->getOperand(1);
+ MachineOperand &Src2 = DefMI2->getOperand(1);
+
+ Op1F = getKillRegState(Src2.isKill());
+ Src2.setIsKill(false);
+ Op0F = getKillRegState(Src1.isKill());
+ MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), HII->get(InstTy), Res.getReg())
+ .addReg(Src2.getReg(), Op1F,
+ Src2.getSubReg()) // Notice the operands are flipped.
+ .addReg(Src1.getReg(), Op0F, Src1.getSubReg());
+ LLVM_DEBUG(dbgs() << "\n[Inserting]: "; MIB.getInstr()->dump());
+ return true;
+ }
+
+ return false;
+}
+
+bool HexagonQFPOptimizer::runOnMachineFunction(MachineFunction &MF) {
+
+ bool Changed = false;
+
+ if (DisableQFOptimizer)
+ return Changed;
+
+ HST = &MF.getSubtarget<HexagonSubtarget>();
+ if (!HST->useHVXV68Ops() || !HST->usePackets() ||
+ skipFunction(MF.getFunction()))
+ return false;
+ HII = HST->getInstrInfo();
+ MRI = &MF.getRegInfo();
+
+ MachineFunction::iterator MBBI = MF.begin();
+ LLVM_DEBUG(dbgs() << "\n=== Running QFPOptimzer Pass for : " << MF.getName()
+ << " Optimize intermediate conversions ===\n");
+ while (MBBI != MF.end()) {
+ MachineBasicBlock *MBB = &*MBBI;
+ MachineBasicBlock::iterator MII = MBBI->instr_begin();
+ while (MII != MBBI->instr_end()) {
+ MachineInstr *MI = &*MII;
+ ++MII; // As MI might be removed.
+
+ if (QFPInstMap.count(MI->getOpcode()) &&
+ MI->getOpcode() != Hexagon::V6_vconv_sf_qf32 &&
+ MI->getOpcode() != Hexagon::V6_vconv_hf_qf16) {
+ LLVM_DEBUG(dbgs() << "\n###Analyzing for removal: "; MI->dump());
+ if (optimizeQfp(MI, MBB)) {
+ MI->eraseFromParent();
+ LLVM_DEBUG(dbgs() << "\t....Removing....");
+ Changed = true;
+ }
+ }
+ }
+ ++MBBI;
+ }
+ return Changed;
+}
diff --git a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
index f5d8b69..d9824a31 100644
--- a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -220,6 +220,7 @@ LLVMInitializeHexagonTarget() {
initializeHexagonPeepholePass(PR);
initializeHexagonSplitConst32AndConst64Pass(PR);
initializeHexagonVectorPrintPass(PR);
+ initializeHexagonQFPOptimizerPass(PR);
}
HexagonTargetMachine::HexagonTargetMachine(const Target &T, const Triple &TT,
@@ -386,6 +387,7 @@ bool HexagonPassConfig::addInstSelector() {
addPass(createHexagonGenInsert());
if (EnableEarlyIf)
addPass(createHexagonEarlyIfConversion());
+ addPass(createHexagonQFPOptimizer());
}
return false;
diff --git a/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp b/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp
index ab93bba..b00589a 100644
--- a/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp
+++ b/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp
@@ -68,7 +68,7 @@ const llvm::StringRef RISCVSEWInstrument::DESC_NAME = "RISCV-SEW";
bool RISCVSEWInstrument::isDataValid(llvm::StringRef Data) {
// Return true if not one of the valid SEW strings
return StringSwitch<bool>(Data)
- .Cases("E8", "E16", "E32", "E64", true)
+ .Cases({"E8", "E16", "E32", "E64"}, true)
.Default(false);
}
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 169465e..0a53ba9 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -12649,10 +12649,7 @@ SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op,
Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
// Reassemble the low and high pieces reversed.
- // FIXME: This is a CONCAT_VECTORS.
- SDValue Res = DAG.getInsertSubvector(DL, DAG.getUNDEF(VecVT), Hi, 0);
- return DAG.getInsertSubvector(DL, Res, Lo,
- LoVT.getVectorMinNumElements());
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, Hi, Lo);
}
// Just promote the int type to i16 which will double the LMUL.
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
index 66717b9..7c89686 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@@ -1511,16 +1511,16 @@ def GIShiftMask32 :
GIComplexOperandMatcher<s64, "selectShiftMask32">,
GIComplexPatternEquiv<shiftMask32>;
-class shiftop<SDPatternOperator operator>
- : PatFrag<(ops node:$val, node:$count),
- (operator node:$val, (XLenVT (shiftMaskXLen node:$count)))>;
-class shiftopw<SDPatternOperator operator>
- : PatFrag<(ops node:$val, node:$count),
- (operator node:$val, (i64 (shiftMask32 node:$count)))>;
+class PatGprShiftMaskXLen<SDPatternOperator OpNode, RVInst Inst>
+ : Pat<(OpNode GPR:$rs1, shiftMaskXLen:$rs2),
+ (Inst GPR:$rs1, shiftMaskXLen:$rs2)>;
+class PatGprShiftMask32<SDPatternOperator OpNode, RVInst Inst>
+ : Pat<(OpNode GPR:$rs1, shiftMask32:$rs2),
+ (Inst GPR:$rs1, shiftMask32:$rs2)>;
-def : PatGprGpr<shiftop<shl>, SLL>;
-def : PatGprGpr<shiftop<srl>, SRL>;
-def : PatGprGpr<shiftop<sra>, SRA>;
+def : PatGprShiftMaskXLen<shl, SLL>;
+def : PatGprShiftMaskXLen<srl, SRL>;
+def : PatGprShiftMaskXLen<sra, SRA>;
// This is a special case of the ADD instruction used to facilitate the use of a
// fourth operand to emit a relocation on a symbol relating to this instruction.
@@ -2203,9 +2203,9 @@ def : Pat<(sra (sext_inreg GPR:$rs1, i32), uimm5:$shamt),
def : Pat<(i64 (sra (shl GPR:$rs1, (i64 32)), uimm6gt32:$shamt)),
(SRAIW GPR:$rs1, (ImmSub32 uimm6gt32:$shamt))>;
-def : PatGprGpr<shiftopw<riscv_sllw>, SLLW>;
-def : PatGprGpr<shiftopw<riscv_srlw>, SRLW>;
-def : PatGprGpr<shiftopw<riscv_sraw>, SRAW>;
+def : PatGprShiftMask32<riscv_sllw, SLLW>;
+def : PatGprShiftMask32<riscv_srlw, SRLW>;
+def : PatGprShiftMask32<riscv_sraw, SRAW>;
// Select W instructions if only the lower 32 bits of the result are used.
def : PatGprGpr<binop_allwusers<add>, ADDW>;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
index 57fbaa0..62b7bcd 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
@@ -506,8 +506,8 @@ def : Pat<(XLenVT (xor GPR:$rs1, invLogicImm:$rs2)), (XNOR GPR:$rs1, invLogicImm
} // Predicates = [HasStdExtZbbOrZbkb]
let Predicates = [HasStdExtZbbOrZbkb] in {
-def : PatGprGpr<shiftop<rotl>, ROL>;
-def : PatGprGpr<shiftop<rotr>, ROR>;
+def : PatGprShiftMaskXLen<rotl, ROL>;
+def : PatGprShiftMaskXLen<rotr, ROR>;
def : PatGprImm<rotr, RORI, uimmlog2xlen>;
// There's no encoding for roli in the the 'B' extension as it can be
@@ -517,29 +517,29 @@ def : Pat<(XLenVT (rotl GPR:$rs1, uimmlog2xlen:$shamt)),
} // Predicates = [HasStdExtZbbOrZbkb]
let Predicates = [HasStdExtZbbOrZbkb, IsRV64] in {
-def : PatGprGpr<shiftopw<riscv_rolw>, ROLW>;
-def : PatGprGpr<shiftopw<riscv_rorw>, RORW>;
+def : PatGprShiftMask32<riscv_rolw, ROLW>;
+def : PatGprShiftMask32<riscv_rorw, RORW>;
def : PatGprImm<riscv_rorw, RORIW, uimm5>;
def : Pat<(riscv_rolw GPR:$rs1, uimm5:$rs2),
(RORIW GPR:$rs1, (ImmSubFrom32 uimm5:$rs2))>;
} // Predicates = [HasStdExtZbbOrZbkb, IsRV64]
let Predicates = [HasStdExtZbs] in {
-def : Pat<(XLenVT (and (not (shiftop<shl> 1, (XLenVT GPR:$rs2))), GPR:$rs1)),
- (BCLR GPR:$rs1, GPR:$rs2)>;
-def : Pat<(XLenVT (and (rotl -2, (XLenVT GPR:$rs2)), GPR:$rs1)),
- (BCLR GPR:$rs1, GPR:$rs2)>;
-def : Pat<(XLenVT (or (shiftop<shl> 1, (XLenVT GPR:$rs2)), GPR:$rs1)),
- (BSET GPR:$rs1, GPR:$rs2)>;
-def : Pat<(XLenVT (xor (shiftop<shl> 1, (XLenVT GPR:$rs2)), GPR:$rs1)),
- (BINV GPR:$rs1, GPR:$rs2)>;
-def : Pat<(XLenVT (and (shiftop<srl> GPR:$rs1, (XLenVT GPR:$rs2)), 1)),
- (BEXT GPR:$rs1, GPR:$rs2)>;
-
-def : Pat<(XLenVT (shiftop<shl> 1, (XLenVT GPR:$rs2))),
- (BSET (XLenVT X0), GPR:$rs2)>;
-def : Pat<(XLenVT (not (shiftop<shl> -1, (XLenVT GPR:$rs2)))),
- (ADDI (XLenVT (BSET (XLenVT X0), GPR:$rs2)), -1)>;
+def : Pat<(XLenVT (and (not (shl 1, shiftMaskXLen:$rs2)), GPR:$rs1)),
+ (BCLR GPR:$rs1, shiftMaskXLen:$rs2)>;
+def : Pat<(XLenVT (and (rotl -2, shiftMaskXLen:$rs2), GPR:$rs1)),
+ (BCLR GPR:$rs1, shiftMaskXLen:$rs2)>;
+def : Pat<(XLenVT (or (shl 1, shiftMaskXLen:$rs2), GPR:$rs1)),
+ (BSET GPR:$rs1, shiftMaskXLen:$rs2)>;
+def : Pat<(XLenVT (xor (shl 1, shiftMaskXLen:$rs2), GPR:$rs1)),
+ (BINV GPR:$rs1, shiftMaskXLen:$rs2)>;
+def : Pat<(XLenVT (and (srl GPR:$rs1, shiftMaskXLen:$rs2), 1)),
+ (BEXT GPR:$rs1, shiftMaskXLen:$rs2)>;
+
+def : Pat<(XLenVT (shl 1, shiftMaskXLen:$rs2)),
+ (BSET (XLenVT X0), shiftMaskXLen:$rs2)>;
+def : Pat<(XLenVT (not (shl -1, shiftMaskXLen:$rs2))),
+ (ADDI (XLenVT (BSET (XLenVT X0), shiftMaskXLen:$rs2)), -1)>;
def : Pat<(XLenVT (and GPR:$rs1, BCLRMask:$mask)),
(BCLRI GPR:$rs1, BCLRMask:$mask)>;
diff --git a/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp b/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp
index dbe8e18..d91923b 100644
--- a/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp
@@ -507,7 +507,9 @@ static Register buildLoadInst(SPIRVType *BaseType, Register PtrRegister,
static Register buildBuiltinVariableLoad(
MachineIRBuilder &MIRBuilder, SPIRVType *VariableType,
SPIRVGlobalRegistry *GR, SPIRV::BuiltIn::BuiltIn BuiltinValue, LLT LLType,
- Register Reg = Register(0), bool isConst = true, bool hasLinkageTy = true) {
+ Register Reg = Register(0), bool isConst = true,
+ const std::optional<SPIRV::LinkageType::LinkageType> &LinkageTy = {
+ SPIRV::LinkageType::Import}) {
Register NewRegister =
MIRBuilder.getMRI()->createVirtualRegister(&SPIRV::pIDRegClass);
MIRBuilder.getMRI()->setType(
@@ -521,9 +523,8 @@ static Register buildBuiltinVariableLoad(
// Set up the global OpVariable with the necessary builtin decorations.
Register Variable = GR->buildGlobalVariable(
NewRegister, PtrType, getLinkStringForBuiltIn(BuiltinValue), nullptr,
- SPIRV::StorageClass::Input, nullptr, /* isConst= */ isConst,
- /* HasLinkageTy */ hasLinkageTy, SPIRV::LinkageType::Import, MIRBuilder,
- false);
+ SPIRV::StorageClass::Input, nullptr, /* isConst= */ isConst, LinkageTy,
+ MIRBuilder, false);
// Load the value from the global variable.
Register LoadedRegister =
@@ -1851,7 +1852,7 @@ static bool generateWaveInst(const SPIRV::IncomingCall *Call,
return buildBuiltinVariableLoad(
MIRBuilder, Call->ReturnType, GR, Value, LLType, Call->ReturnRegister,
- /* isConst= */ false, /* hasLinkageTy= */ false);
+ /* isConst= */ false, /* LinkageType= */ std::nullopt);
}
// We expect a builtin
diff --git a/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp b/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp
index 1a7c02c..9e11c3a 100644
--- a/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp
@@ -479,19 +479,9 @@ bool SPIRVCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
.addImm(static_cast<uint32_t>(getExecutionModel(*ST, F)))
.addUse(FuncVReg);
addStringImm(F.getName(), MIB);
- } else if (F.getLinkage() != GlobalValue::InternalLinkage &&
- F.getLinkage() != GlobalValue::PrivateLinkage &&
- F.getVisibility() != GlobalValue::HiddenVisibility) {
- SPIRV::LinkageType::LinkageType LnkTy =
- F.isDeclaration()
- ? SPIRV::LinkageType::Import
- : (F.getLinkage() == GlobalValue::LinkOnceODRLinkage &&
- ST->canUseExtension(
- SPIRV::Extension::SPV_KHR_linkonce_odr)
- ? SPIRV::LinkageType::LinkOnceODR
- : SPIRV::LinkageType::Export);
+ } else if (const auto LnkTy = getSpirvLinkageTypeFor(*ST, F)) {
buildOpDecorate(FuncVReg, MIRBuilder, SPIRV::Decoration::LinkageAttributes,
- {static_cast<uint32_t>(LnkTy)}, F.getName());
+ {static_cast<uint32_t>(*LnkTy)}, F.getName());
}
// Handle function pointers decoration
diff --git a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp
index 6fd1c7e..6181abb 100644
--- a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp
@@ -712,9 +712,9 @@ SPIRVGlobalRegistry::buildConstantSampler(Register ResReg, unsigned AddrMode,
Register SPIRVGlobalRegistry::buildGlobalVariable(
Register ResVReg, SPIRVType *BaseType, StringRef Name,
const GlobalValue *GV, SPIRV::StorageClass::StorageClass Storage,
- const MachineInstr *Init, bool IsConst, bool HasLinkageTy,
- SPIRV::LinkageType::LinkageType LinkageType, MachineIRBuilder &MIRBuilder,
- bool IsInstSelector) {
+ const MachineInstr *Init, bool IsConst,
+ const std::optional<SPIRV::LinkageType::LinkageType> &LinkageType,
+ MachineIRBuilder &MIRBuilder, bool IsInstSelector) {
const GlobalVariable *GVar = nullptr;
if (GV) {
GVar = cast<const GlobalVariable>(GV);
@@ -792,9 +792,9 @@ Register SPIRVGlobalRegistry::buildGlobalVariable(
buildOpDecorate(Reg, MIRBuilder, SPIRV::Decoration::Alignment, {Alignment});
}
- if (HasLinkageTy)
+ if (LinkageType)
buildOpDecorate(Reg, MIRBuilder, SPIRV::Decoration::LinkageAttributes,
- {static_cast<uint32_t>(LinkageType)}, Name);
+ {static_cast<uint32_t>(*LinkageType)}, Name);
SPIRV::BuiltIn::BuiltIn BuiltInId;
if (getSpirvBuiltInIdByName(Name, BuiltInId))
@@ -821,8 +821,8 @@ Register SPIRVGlobalRegistry::getOrCreateGlobalVariableWithBinding(
MIRBuilder.getMRI()->createVirtualRegister(&SPIRV::iIDRegClass);
buildGlobalVariable(VarReg, VarType, Name, nullptr,
- getPointerStorageClass(VarType), nullptr, false, false,
- SPIRV::LinkageType::Import, MIRBuilder, false);
+ getPointerStorageClass(VarType), nullptr, false,
+ std::nullopt, MIRBuilder, false);
buildOpDecorate(VarReg, MIRBuilder, SPIRV::Decoration::DescriptorSet, {Set});
buildOpDecorate(VarReg, MIRBuilder, SPIRV::Decoration::Binding, {Binding});
diff --git a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h
index a648def..c230e62 100644
--- a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h
+++ b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h
@@ -548,14 +548,12 @@ public:
MachineIRBuilder &MIRBuilder);
Register getOrCreateUndef(MachineInstr &I, SPIRVType *SpvType,
const SPIRVInstrInfo &TII);
- Register buildGlobalVariable(Register Reg, SPIRVType *BaseType,
- StringRef Name, const GlobalValue *GV,
- SPIRV::StorageClass::StorageClass Storage,
- const MachineInstr *Init, bool IsConst,
- bool HasLinkageTy,
- SPIRV::LinkageType::LinkageType LinkageType,
- MachineIRBuilder &MIRBuilder,
- bool IsInstSelector);
+ Register buildGlobalVariable(
+ Register Reg, SPIRVType *BaseType, StringRef Name, const GlobalValue *GV,
+ SPIRV::StorageClass::StorageClass Storage, const MachineInstr *Init,
+ bool IsConst,
+ const std::optional<SPIRV::LinkageType::LinkageType> &LinkageType,
+ MachineIRBuilder &MIRBuilder, bool IsInstSelector);
Register getOrCreateGlobalVariableWithBinding(const SPIRVType *VarType,
uint32_t Set, uint32_t Binding,
StringRef Name,
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
index a0cff4d..5591d9f 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
@@ -4350,15 +4350,8 @@ bool SPIRVInstructionSelector::selectGlobalValue(
if (hasInitializer(GlobalVar) && !Init)
return true;
- bool HasLnkTy = !GV->hasInternalLinkage() && !GV->hasPrivateLinkage() &&
- !GV->hasHiddenVisibility();
- SPIRV::LinkageType::LinkageType LnkType =
- GV->isDeclarationForLinker()
- ? SPIRV::LinkageType::Import
- : (GV->hasLinkOnceODRLinkage() &&
- STI.canUseExtension(SPIRV::Extension::SPV_KHR_linkonce_odr)
- ? SPIRV::LinkageType::LinkOnceODR
- : SPIRV::LinkageType::Export);
+ const std::optional<SPIRV::LinkageType::LinkageType> LnkType =
+ getSpirvLinkageTypeFor(STI, *GV);
const unsigned AddrSpace = GV->getAddressSpace();
SPIRV::StorageClass::StorageClass StorageClass =
@@ -4366,7 +4359,7 @@ bool SPIRVInstructionSelector::selectGlobalValue(
SPIRVType *ResType = GR.getOrCreateSPIRVPointerType(GVType, I, StorageClass);
Register Reg = GR.buildGlobalVariable(
ResVReg, ResType, GlobalIdent, GV, StorageClass, Init,
- GlobalVar->isConstant(), HasLnkTy, LnkType, MIRBuilder, true);
+ GlobalVar->isConstant(), LnkType, MIRBuilder, true);
return Reg.isValid();
}
@@ -4517,8 +4510,8 @@ bool SPIRVInstructionSelector::loadVec3BuiltinInputID(
// builtin variable.
Register Variable = GR.buildGlobalVariable(
NewRegister, PtrType, getLinkStringForBuiltIn(BuiltInValue), nullptr,
- SPIRV::StorageClass::Input, nullptr, true, false,
- SPIRV::LinkageType::Import, MIRBuilder, false);
+ SPIRV::StorageClass::Input, nullptr, true, std::nullopt, MIRBuilder,
+ false);
// Create new register for loading value.
MachineRegisterInfo *MRI = MIRBuilder.getMRI();
@@ -4570,8 +4563,8 @@ bool SPIRVInstructionSelector::loadBuiltinInputID(
// builtin variable.
Register Variable = GR.buildGlobalVariable(
NewRegister, PtrType, getLinkStringForBuiltIn(BuiltInValue), nullptr,
- SPIRV::StorageClass::Input, nullptr, true, false,
- SPIRV::LinkageType::Import, MIRBuilder, false);
+ SPIRV::StorageClass::Input, nullptr, true, std::nullopt, MIRBuilder,
+ false);
// Load uint value from the global variable.
auto MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(SPIRV::OpLoad))
diff --git a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
index 61a0bbe..f7cdfcb 100644
--- a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
@@ -547,9 +547,9 @@ void SPIRVModuleAnalysis::collectFuncNames(MachineInstr &MI,
if (MI.getOpcode() == SPIRV::OpDecorate) {
// If it's got Import linkage.
auto Dec = MI.getOperand(1).getImm();
- if (Dec == static_cast<unsigned>(SPIRV::Decoration::LinkageAttributes)) {
+ if (Dec == SPIRV::Decoration::LinkageAttributes) {
auto Lnk = MI.getOperand(MI.getNumOperands() - 1).getImm();
- if (Lnk == static_cast<unsigned>(SPIRV::LinkageType::Import)) {
+ if (Lnk == SPIRV::LinkageType::Import) {
// Map imported function name to function ID register.
const Function *ImportedFunc =
F->getParent()->getFunction(getStringImm(MI, 2));
@@ -635,7 +635,7 @@ static void collectOtherInstr(MachineInstr &MI, SPIRV::ModuleAnalysisInfo &MAI,
void SPIRVModuleAnalysis::processOtherInstrs(const Module &M) {
InstrTraces IS;
for (auto F = M.begin(), E = M.end(); F != E; ++F) {
- if ((*F).isDeclaration())
+ if (F->isDeclaration())
continue;
MachineFunction *MF = MMI->getMachineFunction(*F);
assert(MF);
diff --git a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h
index d8376cd..2d19f6de 100644
--- a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h
+++ b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h
@@ -169,9 +169,7 @@ struct ModuleAnalysisInfo {
MCRegister getFuncReg(const Function *F) {
assert(F && "Function is null");
- auto FuncPtrRegPair = FuncMap.find(F);
- return FuncPtrRegPair == FuncMap.end() ? MCRegister()
- : FuncPtrRegPair->second;
+ return FuncMap.lookup(F);
}
MCRegister getExtInstSetReg(unsigned SetNum) { return ExtInstSetMap[SetNum]; }
InstrList &getMSInstrs(unsigned MSType) { return MS[MSType]; }
diff --git a/llvm/lib/Target/SPIRV/SPIRVUtils.cpp b/llvm/lib/Target/SPIRV/SPIRVUtils.cpp
index 1d47c89..4e2cc88 100644
--- a/llvm/lib/Target/SPIRV/SPIRVUtils.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVUtils.cpp
@@ -1040,4 +1040,19 @@ getFirstValidInstructionInsertPoint(MachineBasicBlock &BB) {
: VarPos;
}
+std::optional<SPIRV::LinkageType::LinkageType>
+getSpirvLinkageTypeFor(const SPIRVSubtarget &ST, const GlobalValue &GV) {
+ if (GV.hasLocalLinkage() || GV.hasHiddenVisibility())
+ return std::nullopt;
+
+ if (GV.isDeclarationForLinker())
+ return SPIRV::LinkageType::Import;
+
+ if (GV.hasLinkOnceODRLinkage() &&
+ ST.canUseExtension(SPIRV::Extension::SPV_KHR_linkonce_odr))
+ return SPIRV::LinkageType::LinkOnceODR;
+
+ return SPIRV::LinkageType::Export;
+}
+
} // namespace llvm
diff --git a/llvm/lib/Target/SPIRV/SPIRVUtils.h b/llvm/lib/Target/SPIRV/SPIRVUtils.h
index 5777a24..99d9d40 100644
--- a/llvm/lib/Target/SPIRV/SPIRVUtils.h
+++ b/llvm/lib/Target/SPIRV/SPIRVUtils.h
@@ -559,5 +559,8 @@ unsigned getArrayComponentCount(const MachineRegisterInfo *MRI,
const MachineInstr *ResType);
MachineBasicBlock::iterator
getFirstValidInstructionInsertPoint(MachineBasicBlock &BB);
+
+std::optional<SPIRV::LinkageType::LinkageType>
+getSpirvLinkageTypeFor(const SPIRVSubtarget &ST, const GlobalValue &GV);
} // namespace llvm
#endif // LLVM_LIB_TARGET_SPIRV_SPIRVUTILS_H
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index 8e08d16..a1fd366 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -1164,7 +1164,6 @@ def ProcessorFeatures {
FeatureAVXNECONVERT,
FeatureAVXVNNIINT8,
FeatureAVXVNNIINT16,
- FeatureUSERMSR,
FeatureSHA512,
FeatureSM3,
FeatureEGPR,
diff --git a/llvm/lib/TargetParser/ARMTargetParser.cpp b/llvm/lib/TargetParser/ARMTargetParser.cpp
index 08944e6..7882045 100644
--- a/llvm/lib/TargetParser/ARMTargetParser.cpp
+++ b/llvm/lib/TargetParser/ARMTargetParser.cpp
@@ -235,16 +235,16 @@ ARM::NeonSupportLevel ARM::getFPUNeonSupportLevel(ARM::FPUKind FPUKind) {
StringRef ARM::getFPUSynonym(StringRef FPU) {
return StringSwitch<StringRef>(FPU)
- .Cases("fpa", "fpe2", "fpe3", "maverick", "invalid") // Unsupported
+ .Cases({"fpa", "fpe2", "fpe3", "maverick"}, "invalid") // Unsupported
.Case("vfp2", "vfpv2")
.Case("vfp3", "vfpv3")
.Case("vfp4", "vfpv4")
.Case("vfp3-d16", "vfpv3-d16")
.Case("vfp4-d16", "vfpv4-d16")
- .Cases("fp4-sp-d16", "vfpv4-sp-d16", "fpv4-sp-d16")
- .Cases("fp4-dp-d16", "fpv4-dp-d16", "vfpv4-d16")
+ .Cases({"fp4-sp-d16", "vfpv4-sp-d16"}, "fpv4-sp-d16")
+ .Cases({"fp4-dp-d16", "fpv4-dp-d16"}, "vfpv4-d16")
.Case("fp5-sp-d16", "fpv5-sp-d16")
- .Cases("fp5-dp-d16", "fpv5-dp-d16", "fpv5-d16")
+ .Cases({"fp5-dp-d16", "fpv5-dp-d16"}, "fpv5-d16")
// FIXME: Clang uses it, but it's bogus, since neon defaults to vfpv3.
.Case("neon-vfpv3", "neon")
.Default(FPU);
diff --git a/llvm/lib/TargetParser/X86TargetParser.cpp b/llvm/lib/TargetParser/X86TargetParser.cpp
index dd13ce3..b13c795 100644
--- a/llvm/lib/TargetParser/X86TargetParser.cpp
+++ b/llvm/lib/TargetParser/X86TargetParser.cpp
@@ -143,8 +143,7 @@ constexpr FeatureBitset FeaturesDiamondRapids =
FeatureAVXVNNIINT8 | FeatureAVXVNNIINT16 | FeatureSHA512 | FeatureSM3 |
FeatureSM4 | FeatureEGPR | FeatureZU | FeatureCCMP | FeaturePush2Pop2 |
FeaturePPX | FeatureNDD | FeatureNF | FeatureMOVRS | FeatureAMX_MOVRS |
- FeatureAMX_AVX512 | FeatureAMX_FP8 | FeatureAMX_TF32 |
- FeatureAMX_TRANSPOSE | FeatureUSERMSR;
+ FeatureAMX_AVX512 | FeatureAMX_FP8 | FeatureAMX_TF32 | FeatureAMX_TRANSPOSE;
// Intel Atom processors.
// Bonnell has feature parity with Core2 and adds MOVBE.
diff --git a/llvm/lib/Transforms/Scalar/GVNSink.cpp b/llvm/lib/Transforms/Scalar/GVNSink.cpp
index b9534def..a06f832 100644
--- a/llvm/lib/Transforms/Scalar/GVNSink.cpp
+++ b/llvm/lib/Transforms/Scalar/GVNSink.cpp
@@ -430,6 +430,7 @@ public:
case Instruction::FPTrunc:
case Instruction::FPExt:
case Instruction::PtrToInt:
+ case Instruction::PtrToAddr:
case Instruction::IntToPtr:
case Instruction::BitCast:
case Instruction::AddrSpaceCast:
diff --git a/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp b/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp
index fa66a03..23e1243 100644
--- a/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp
+++ b/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp
@@ -227,6 +227,7 @@ static InstructionCost ComputeSpeculationCost(const Instruction *I,
case Instruction::Call:
case Instruction::BitCast:
case Instruction::PtrToInt:
+ case Instruction::PtrToAddr:
case Instruction::IntToPtr:
case Instruction::AddrSpaceCast:
case Instruction::FPToUI:
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index febdc54..1cc9173 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1011,6 +1011,10 @@ public:
/// \returns True if instruction \p I can be truncated to a smaller bitwidth
/// for vectorization factor \p VF.
bool canTruncateToMinimalBitwidth(Instruction *I, ElementCount VF) const {
+ // Truncs must truncate at most to their destination type.
+ if (isa_and_nonnull<TruncInst>(I) && MinBWs.contains(I) &&
+ I->getType()->getScalarSizeInBits() < MinBWs.lookup(I))
+ return false;
return VF.isVector() && MinBWs.contains(I) &&
!isProfitableToScalarize(I, VF) &&
!isScalarAfterVectorization(I, VF);
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 048a3e6..3f18bd7 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -10546,8 +10546,11 @@ static bool tryToFindDuplicates(SmallVectorImpl<Value *> &VL,
PoisonValue::get(UniqueValues.front()->getType()));
// Check that extended with poisons/copyable operations are still valid
// for vectorization (div/rem are not allowed).
- if (!S.areInstructionsWithCopyableElements() &&
- !getSameOpcode(PaddedUniqueValues, TLI).valid()) {
+ if ((!S.areInstructionsWithCopyableElements() &&
+ !getSameOpcode(PaddedUniqueValues, TLI).valid()) ||
+ (S.areInstructionsWithCopyableElements() && S.isMulDivLikeOp() &&
+ (S.getMainOp()->isIntDivRem() || S.getMainOp()->isFPDivRem() ||
+ isa<CallInst>(S.getMainOp())))) {
LLVM_DEBUG(dbgs() << "SLP: Scalar used twice in bundle.\n");
ReuseShuffleIndices.clear();
return false;
diff --git a/llvm/lib/WindowsDriver/MSVCPaths.cpp b/llvm/lib/WindowsDriver/MSVCPaths.cpp
index 1fc8974..09468da 100644
--- a/llvm/lib/WindowsDriver/MSVCPaths.cpp
+++ b/llvm/lib/WindowsDriver/MSVCPaths.cpp
@@ -259,9 +259,7 @@ static bool getSystemRegistryString(const char *keyPath, const char *valueName,
#endif // _WIN32
}
-namespace llvm {
-
-const char *archToWindowsSDKArch(Triple::ArchType Arch) {
+const char *llvm::archToWindowsSDKArch(Triple::ArchType Arch) {
switch (Arch) {
case Triple::ArchType::x86:
return "x86";
@@ -277,7 +275,7 @@ const char *archToWindowsSDKArch(Triple::ArchType Arch) {
}
}
-const char *archToLegacyVCArch(Triple::ArchType Arch) {
+const char *llvm::archToLegacyVCArch(Triple::ArchType Arch) {
switch (Arch) {
case Triple::ArchType::x86:
// x86 is default in legacy VC toolchains.
@@ -295,7 +293,7 @@ const char *archToLegacyVCArch(Triple::ArchType Arch) {
}
}
-const char *archToDevDivInternalArch(Triple::ArchType Arch) {
+const char *llvm::archToDevDivInternalArch(Triple::ArchType Arch) {
switch (Arch) {
case Triple::ArchType::x86:
return "i386";
@@ -311,8 +309,9 @@ const char *archToDevDivInternalArch(Triple::ArchType Arch) {
}
}
-bool appendArchToWindowsSDKLibPath(int SDKMajor, SmallString<128> LibPath,
- Triple::ArchType Arch, std::string &path) {
+bool llvm::appendArchToWindowsSDKLibPath(int SDKMajor, SmallString<128> LibPath,
+ Triple::ArchType Arch,
+ std::string &path) {
if (SDKMajor >= 8) {
sys::path::append(LibPath, archToWindowsSDKArch(Arch));
} else {
@@ -336,10 +335,11 @@ bool appendArchToWindowsSDKLibPath(int SDKMajor, SmallString<128> LibPath,
return true;
}
-std::string getSubDirectoryPath(SubDirectoryType Type, ToolsetLayout VSLayout,
- const std::string &VCToolChainPath,
- Triple::ArchType TargetArch,
- StringRef SubdirParent) {
+std::string llvm::getSubDirectoryPath(SubDirectoryType Type,
+ ToolsetLayout VSLayout,
+ const std::string &VCToolChainPath,
+ Triple::ArchType TargetArch,
+ StringRef SubdirParent) {
const char *SubdirName;
const char *IncludeName;
switch (VSLayout) {
@@ -390,19 +390,22 @@ std::string getSubDirectoryPath(SubDirectoryType Type, ToolsetLayout VSLayout,
return std::string(Path);
}
-bool useUniversalCRT(ToolsetLayout VSLayout, const std::string &VCToolChainPath,
- Triple::ArchType TargetArch, vfs::FileSystem &VFS) {
+bool llvm::useUniversalCRT(ToolsetLayout VSLayout,
+ const std::string &VCToolChainPath,
+ Triple::ArchType TargetArch, vfs::FileSystem &VFS) {
SmallString<128> TestPath(getSubDirectoryPath(
SubDirectoryType::Include, VSLayout, VCToolChainPath, TargetArch));
sys::path::append(TestPath, "stdlib.h");
return !VFS.exists(TestPath);
}
-bool getWindowsSDKDir(vfs::FileSystem &VFS, std::optional<StringRef> WinSdkDir,
- std::optional<StringRef> WinSdkVersion,
- std::optional<StringRef> WinSysRoot, std::string &Path,
- int &Major, std::string &WindowsSDKIncludeVersion,
- std::string &WindowsSDKLibVersion) {
+bool llvm::getWindowsSDKDir(vfs::FileSystem &VFS,
+ std::optional<StringRef> WinSdkDir,
+ std::optional<StringRef> WinSdkVersion,
+ std::optional<StringRef> WinSysRoot,
+ std::string &Path, int &Major,
+ std::string &WindowsSDKIncludeVersion,
+ std::string &WindowsSDKLibVersion) {
// Trust /winsdkdir and /winsdkversion if present.
if (getWindowsSDKDirViaCommandLine(VFS, WinSdkDir, WinSdkVersion, WinSysRoot,
Path, Major, WindowsSDKIncludeVersion)) {
@@ -460,11 +463,11 @@ bool getWindowsSDKDir(vfs::FileSystem &VFS, std::optional<StringRef> WinSdkDir,
return false;
}
-bool getUniversalCRTSdkDir(vfs::FileSystem &VFS,
- std::optional<StringRef> WinSdkDir,
- std::optional<StringRef> WinSdkVersion,
- std::optional<StringRef> WinSysRoot,
- std::string &Path, std::string &UCRTVersion) {
+bool llvm::getUniversalCRTSdkDir(vfs::FileSystem &VFS,
+ std::optional<StringRef> WinSdkDir,
+ std::optional<StringRef> WinSdkVersion,
+ std::optional<StringRef> WinSysRoot,
+ std::string &Path, std::string &UCRTVersion) {
// If /winsdkdir is passed, use it as location for the UCRT too.
// FIXME: Should there be a dedicated /ucrtdir to override /winsdkdir?
int Major;
@@ -491,11 +494,11 @@ bool getUniversalCRTSdkDir(vfs::FileSystem &VFS,
return getWindows10SDKVersionFromPath(VFS, Path, UCRTVersion);
}
-bool findVCToolChainViaCommandLine(vfs::FileSystem &VFS,
- std::optional<StringRef> VCToolsDir,
- std::optional<StringRef> VCToolsVersion,
- std::optional<StringRef> WinSysRoot,
- std::string &Path, ToolsetLayout &VSLayout) {
+bool llvm::findVCToolChainViaCommandLine(
+ vfs::FileSystem &VFS, std::optional<StringRef> VCToolsDir,
+ std::optional<StringRef> VCToolsVersion,
+ std::optional<StringRef> WinSysRoot, std::string &Path,
+ ToolsetLayout &VSLayout) {
// Don't validate the input; trust the value supplied by the user.
// The primary motivation is to prevent unnecessary file and registry access.
if (VCToolsDir || WinSysRoot) {
@@ -518,8 +521,9 @@ bool findVCToolChainViaCommandLine(vfs::FileSystem &VFS,
return false;
}
-bool findVCToolChainViaEnvironment(vfs::FileSystem &VFS, std::string &Path,
- ToolsetLayout &VSLayout) {
+bool llvm::findVCToolChainViaEnvironment(vfs::FileSystem &VFS,
+ std::string &Path,
+ ToolsetLayout &VSLayout) {
// These variables are typically set by vcvarsall.bat
// when launching a developer command prompt.
if (std::optional<std::string> VCToolsInstallDir =
@@ -627,9 +631,9 @@ bool findVCToolChainViaEnvironment(vfs::FileSystem &VFS, std::string &Path,
return false;
}
-bool findVCToolChainViaSetupConfig(vfs::FileSystem &VFS,
- std::optional<StringRef> VCToolsVersion,
- std::string &Path, ToolsetLayout &VSLayout) {
+bool llvm::findVCToolChainViaSetupConfig(
+ vfs::FileSystem &VFS, std::optional<StringRef> VCToolsVersion,
+ std::string &Path, ToolsetLayout &VSLayout) {
#if !defined(USE_MSVC_SETUP_API)
return false;
#else
@@ -724,7 +728,8 @@ bool findVCToolChainViaSetupConfig(vfs::FileSystem &VFS,
#endif
}
-bool findVCToolChainViaRegistry(std::string &Path, ToolsetLayout &VSLayout) {
+bool llvm::findVCToolChainViaRegistry(std::string &Path,
+ ToolsetLayout &VSLayout) {
std::string VSInstallPath;
if (getSystemRegistryString(R"(SOFTWARE\Microsoft\VisualStudio\$VERSION)",
"InstallDir", VSInstallPath, nullptr) ||
@@ -744,5 +749,3 @@ bool findVCToolChainViaRegistry(std::string &Path, ToolsetLayout &VSLayout) {
}
return false;
}
-
-} // namespace llvm