aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Analysis/HashRecognize.cpp2
-rw-r--r--llvm/lib/Analysis/LoopAccessAnalysis.cpp32
-rw-r--r--llvm/lib/Analysis/ValueTracking.cpp26
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp3
-rw-r--r--llvm/lib/CodeGen/MachineRegisterInfo.cpp2
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp2
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp2
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp9
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp39
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp15
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp25
-rw-r--r--llvm/lib/IR/Assumptions.cpp12
-rw-r--r--llvm/lib/IR/DebugInfoMetadata.cpp3
-rw-r--r--llvm/lib/IR/DiagnosticHandler.cpp2
-rw-r--r--llvm/lib/IR/ModuleSummaryIndex.cpp2
-rw-r--r--llvm/lib/IR/PassInstrumentation.cpp7
-rw-r--r--llvm/lib/IR/ProfDataUtils.cpp99
-rw-r--r--llvm/lib/IR/SafepointIRVerifier.cpp2
-rw-r--r--llvm/lib/IR/VFABIDemangler.cpp4
-rw-r--r--llvm/lib/IR/Value.cpp2
-rw-r--r--llvm/lib/Support/CMakeLists.txt1
-rw-r--r--llvm/lib/Support/Jobserver.cpp259
-rw-r--r--llvm/lib/Support/Parallel.cpp98
-rw-r--r--llvm/lib/Support/ThreadPool.cpp108
-rw-r--r--llvm/lib/Support/Threading.cpp5
-rw-r--r--llvm/lib/Support/Unix/Jobserver.inc195
-rw-r--r--llvm/lib/Support/Windows/Jobserver.inc79
-rw-r--r--llvm/lib/TableGen/Error.cpp58
-rw-r--r--llvm/lib/TableGen/Main.cpp4
-rw-r--r--llvm/lib/TableGen/Record.cpp6
-rw-r--r--llvm/lib/TableGen/TGParser.cpp6
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.cpp15
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrFormats.td104
-rw-r--r--llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td30
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp4
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp3
-rw-r--r--llvm/lib/Target/AArch64/SMEInstrFormats.td12
-rw-r--r--llvm/lib/Target/AArch64/SVEInstrFormats.td106
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPU.td2
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp20
-rw-r--r--llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp5
-rw-r--r--llvm/lib/Target/AMDGPU/SIFoldOperands.cpp7
-rw-r--r--llvm/lib/Target/AMDGPU/SIFrameLowering.cpp3
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp25
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.h5
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.cpp51
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.h1
-rw-r--r--llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp10
-rw-r--r--llvm/lib/Target/AMDGPU/SIRegisterInfo.td19
-rw-r--r--llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp9
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp3
-rw-r--r--llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp2
-rw-r--r--llvm/lib/Target/RISCV/RISCVFrameLowering.cpp2
-rw-r--r--llvm/lib/Target/RISCV/RISCVGISel.td12
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelLowering.cpp2
-rw-r--r--llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp6
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfo.cpp4
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoZalasr.td4
-rw-r--r--llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp4
-rw-r--r--llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp16
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVLegalizePointerCast.cpp25
-rw-r--r--llvm/lib/Target/Sparc/DelaySlotFiller.cpp4
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp18
-rw-r--r--llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp178
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp10
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp20
-rw-r--r--llvm/lib/Transforms/Scalar/NewGVN.cpp4
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp7
68 files changed, 1374 insertions, 487 deletions
diff --git a/llvm/lib/Analysis/HashRecognize.cpp b/llvm/lib/Analysis/HashRecognize.cpp
index 5d7ee1f..4529123 100644
--- a/llvm/lib/Analysis/HashRecognize.cpp
+++ b/llvm/lib/Analysis/HashRecognize.cpp
@@ -97,7 +97,7 @@ static bool containsUnreachable(const Loop &L,
}
}
}
- return std::distance(Latch->begin(), Latch->end()) != Visited.size();
+ return Latch->size() != Visited.size();
}
/// A structure that can hold either a Simple Recurrence or a Conditional
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index 47dccde..7adb25d 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -233,19 +233,25 @@ static bool evaluatePtrAddRecAtMaxBTCWillNotWrap(
const SCEV *DerefBytesSCEV = SE.getConstant(WiderTy, DerefBytes);
// Check if we have a suitable dereferencable assumption we can use.
- if (!StartPtrV->canBeFreed()) {
- Instruction *CtxI = &*L->getHeader()->getFirstNonPHIIt();
- if (BasicBlock *LoopPred = L->getLoopPredecessor()) {
- if (isa<BranchInst>(LoopPred->getTerminator()))
- CtxI = LoopPred->getTerminator();
- }
-
- RetainedKnowledge DerefRK = getKnowledgeValidInContext(
- StartPtrV, {Attribute::Dereferenceable}, *AC, CtxI, DT);
- if (DerefRK) {
- DerefBytesSCEV =
- SE.getUMaxExpr(DerefBytesSCEV, SE.getSCEV(DerefRK.IRArgValue));
- }
+ Instruction *CtxI = &*L->getHeader()->getFirstNonPHIIt();
+ if (BasicBlock *LoopPred = L->getLoopPredecessor()) {
+ if (isa<BranchInst>(LoopPred->getTerminator()))
+ CtxI = LoopPred->getTerminator();
+ }
+ RetainedKnowledge DerefRK;
+ getKnowledgeForValue(StartPtrV, {Attribute::Dereferenceable}, *AC,
+ [&](RetainedKnowledge RK, Instruction *Assume, auto) {
+ if (!isValidAssumeForContext(Assume, CtxI, DT))
+ return false;
+ if (StartPtrV->canBeFreed() &&
+ !willNotFreeBetween(Assume, CtxI))
+ return false;
+ DerefRK = std::max(DerefRK, RK);
+ return true;
+ });
+ if (DerefRK) {
+ DerefBytesSCEV =
+ SE.getUMaxExpr(DerefBytesSCEV, SE.getSCEV(DerefRK.IRArgValue));
}
if (DerefBytesSCEV->isZero())
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 09a8fbe..1eda7a7 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -89,6 +89,9 @@ using namespace llvm::PatternMatch;
static cl::opt<unsigned> DomConditionsMaxUses("dom-conditions-max-uses",
cl::Hidden, cl::init(20));
+/// Maximum number of instructions to check between assume and context
+/// instruction.
+static constexpr unsigned MaxInstrsToCheckForFree = 16;
/// Returns the bitwidth of the given scalar or pointer type. For vector types,
/// returns the element type's bitwidth.
@@ -561,6 +564,29 @@ bool llvm::isValidAssumeForContext(const Instruction *Inv,
return false;
}
+bool llvm::willNotFreeBetween(const Instruction *Assume,
+ const Instruction *CtxI) {
+ if (CtxI->getParent() != Assume->getParent() || !Assume->comesBefore(CtxI))
+ return false;
+ // Make sure the current function cannot arrange for another thread to free on
+ // its behalf.
+ if (!CtxI->getFunction()->hasNoSync())
+ return false;
+
+ // Check if there are any calls between the assume and CtxI that may
+ // free memory.
+ for (const auto &[Idx, I] :
+ enumerate(make_range(Assume->getIterator(), CtxI->getIterator()))) {
+ // Limit number of instructions to walk.
+ if (Idx > MaxInstrsToCheckForFree)
+ return false;
+ if (const auto *CB = dyn_cast<CallBase>(&I))
+ if (!CB->hasFnAttr(Attribute::NoFree))
+ return false;
+ }
+ return true;
+}
+
// TODO: cmpExcludesZero misses many cases where `RHS` is non-constant but
// we still have enough information about `RHS` to conclude non-zero. For
// example Pred=EQ, RHS=isKnownNonZero. cmpExcludesZero is called in loops
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
index 1703b27..bc0bb34 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
@@ -618,12 +618,15 @@ bool DwarfExpression::addExpression(
case dwarf::DW_OP_dup:
case dwarf::DW_OP_push_object_address:
case dwarf::DW_OP_over:
+ case dwarf::DW_OP_rot:
case dwarf::DW_OP_eq:
case dwarf::DW_OP_ne:
case dwarf::DW_OP_gt:
case dwarf::DW_OP_ge:
case dwarf::DW_OP_lt:
case dwarf::DW_OP_le:
+ case dwarf::DW_OP_neg:
+ case dwarf::DW_OP_abs:
emitOp(OpNum);
break;
case dwarf::DW_OP_deref:
diff --git a/llvm/lib/CodeGen/MachineRegisterInfo.cpp b/llvm/lib/CodeGen/MachineRegisterInfo.cpp
index abb3f3e..ae284f3 100644
--- a/llvm/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/llvm/lib/CodeGen/MachineRegisterInfo.cpp
@@ -83,8 +83,6 @@ constrainRegClass(MachineRegisterInfo &MRI, Register Reg,
const TargetRegisterClass *MachineRegisterInfo::constrainRegClass(
Register Reg, const TargetRegisterClass *RC, unsigned MinNumRegs) {
- if (Reg.isPhysical())
- return nullptr;
return ::constrainRegClass(*this, Reg, getRegClass(Reg), RC, MinNumRegs);
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 558c5a0..309f1be 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -6046,7 +6046,7 @@ static SDValue isSaturatingMinMax(SDValue N0, SDValue N1, SDValue N2,
return N02;
}
- if (MaxC == 0 && MinCPlus1.isPowerOf2()) {
+ if (MaxC == 0 && MinC != 0 && MinCPlus1.isPowerOf2()) {
BW = MinCPlus1.exactLogBase2();
Unsigned = true;
return N02;
diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index 11bc64c..bb10cf6 100644
--- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -160,7 +160,7 @@ void InstrEmitter::EmitCopyFromReg(SDValue Op, bool IsClone, Register SrcReg,
// If all uses are reading from the src physical register and copying the
// register is either impossible or very expensive, then don't create a copy.
- if (MatchReg && SrcRC->getCopyCost() < 0) {
+ if (MatchReg && SrcRC->expensiveOrImpossibleToCopy()) {
VRBase = SrcReg;
} else {
// Create the reg, emit the copy.
diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index 31e7855..4f4fb9c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -111,15 +111,11 @@ SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) {
static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op,
const TargetRegisterInfo *TRI,
const TargetInstrInfo *TII,
- const TargetLowering &TLI,
MCRegister &PhysReg, int &Cost) {
if (Op != 2 || User->getOpcode() != ISD::CopyToReg)
return;
Register Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
- if (TLI.checkForPhysRegDependency(Def, User, Op, TRI, TII, PhysReg, Cost))
- return;
-
if (Reg.isVirtual())
return;
@@ -136,7 +132,7 @@ static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op,
if (PhysReg) {
const TargetRegisterClass *RC =
TRI->getMinimalPhysRegClass(Reg, Def->getSimpleValueType(ResNo));
- Cost = RC->getCopyCost();
+ Cost = RC->expensiveOrImpossibleToCopy() ? -1 : RC->getCopyCost();
}
}
@@ -490,8 +486,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
MCRegister PhysReg;
int Cost = 1;
// Determine if this is a physical register dependency.
- const TargetLowering &TLI = DAG->getTargetLoweringInfo();
- CheckForPhysRegDependency(OpN, N, i, TRI, TII, TLI, PhysReg, Cost);
+ CheckForPhysRegDependency(OpN, N, i, TRI, TII, PhysReg, Cost);
assert((!PhysReg || !isChain) && "Chain dependence via physreg data?");
// FIXME: See ScheduleDAGSDNodes::EmitCopyFromReg. For now, scheduler
// emits a copy from the physical register to a virtual register unless
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 95f53fe..6ea2e27 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -12698,6 +12698,45 @@ unsigned SelectionDAG::AssignTopologicalOrder() {
return DAGSize;
}
+void SelectionDAG::getTopologicallyOrderedNodes(
+ SmallVectorImpl<const SDNode *> &SortedNodes) const {
+ SortedNodes.clear();
+ // Node -> remaining number of outstanding operands.
+ DenseMap<const SDNode *, unsigned> RemainingOperands;
+
+ // Put nodes without any operands into SortedNodes first.
+ for (const SDNode &N : allnodes()) {
+ checkForCycles(&N, this);
+ unsigned NumOperands = N.getNumOperands();
+ if (NumOperands == 0)
+ SortedNodes.push_back(&N);
+ else
+ // Record their total number of outstanding operands.
+ RemainingOperands[&N] = NumOperands;
+ }
+
+ // A node is pushed into SortedNodes when all of its operands (predecessors in
+ // the graph) are also in SortedNodes.
+ for (unsigned i = 0U; i < SortedNodes.size(); ++i) {
+ const SDNode *N = SortedNodes[i];
+ for (const SDNode *U : N->users()) {
+ unsigned &NumRemOperands = RemainingOperands[U];
+ assert(NumRemOperands && "Invalid number of remaining operands");
+ --NumRemOperands;
+ if (!NumRemOperands)
+ SortedNodes.push_back(U);
+ }
+ }
+
+ assert(SortedNodes.size() == AllNodes.size() && "Node count mismatch");
+ assert(SortedNodes.front()->getOpcode() == ISD::EntryToken &&
+ "First node in topological sort is not the entry token");
+ assert(SortedNodes.front()->getNumOperands() == 0 &&
+ "First node in topological sort has operands");
+ assert(SortedNodes.back()->use_empty() &&
+ "Last node in topologic sort has users");
+}
+
/// AddDbgValue - Add a dbg_value SDNode. If SD is non-null that means the
/// value is produced by SD.
void SelectionDAG::AddDbgValue(SDDbgValue *DB, bool isParameter) {
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 4b2a00c..fcfbfe6 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -1061,13 +1061,24 @@ static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) {
N->dump(G);
}
-LLVM_DUMP_METHOD void SelectionDAG::dump() const {
+LLVM_DUMP_METHOD void SelectionDAG::dump(bool Sorted) const {
dbgs() << "SelectionDAG has " << AllNodes.size() << " nodes:\n";
- for (const SDNode &N : allnodes()) {
+ auto dumpEachNode = [this](const SDNode &N) {
if (!N.hasOneUse() && &N != getRoot().getNode() &&
(!shouldPrintInline(N, this) || N.use_empty()))
DumpNodes(&N, 2, this);
+ };
+
+ if (Sorted) {
+ SmallVector<const SDNode *> SortedNodes;
+ SortedNodes.reserve(AllNodes.size());
+ getTopologicallyOrderedNodes(SortedNodes);
+ for (const SDNode *N : SortedNodes)
+ dumpEachNode(*N);
+ } else {
+ for (const SDNode &N : allnodes())
+ dumpEachNode(N);
}
if (getRoot().getNode()) DumpNodes(getRoot().getNode(), 2, this);
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index e61558c..c35f29d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -144,6 +144,11 @@ UseMBPI("use-mbpi",
cl::init(true), cl::Hidden);
#ifndef NDEBUG
+static cl::opt<bool>
+ DumpSortedDAG("dump-sorted-dags", cl::Hidden,
+ cl::desc("Print DAGs with sorted nodes in debug dump"),
+ cl::init(false));
+
static cl::opt<std::string>
FilterDAGBasicBlockName("filter-view-dags", cl::Hidden,
cl::desc("Only display the basic block whose name "
@@ -932,7 +937,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
ISEL_DUMP(dbgs() << "\nInitial selection DAG: "
<< printMBBReference(*FuncInfo->MBB) << " '" << BlockName
<< "'\n";
- CurDAG->dump());
+ CurDAG->dump(DumpSortedDAG));
#if !defined(NDEBUG) && LLVM_ENABLE_ABI_BREAKING_CHECKS
if (TTI->hasBranchDivergence())
@@ -952,7 +957,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
ISEL_DUMP(dbgs() << "\nOptimized lowered selection DAG: "
<< printMBBReference(*FuncInfo->MBB) << " '" << BlockName
<< "'\n";
- CurDAG->dump());
+ CurDAG->dump(DumpSortedDAG));
#if !defined(NDEBUG) && LLVM_ENABLE_ABI_BREAKING_CHECKS
if (TTI->hasBranchDivergence())
@@ -974,7 +979,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
ISEL_DUMP(dbgs() << "\nType-legalized selection DAG: "
<< printMBBReference(*FuncInfo->MBB) << " '" << BlockName
<< "'\n";
- CurDAG->dump());
+ CurDAG->dump(DumpSortedDAG));
#if !defined(NDEBUG) && LLVM_ENABLE_ABI_BREAKING_CHECKS
if (TTI->hasBranchDivergence())
@@ -998,7 +1003,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
ISEL_DUMP(dbgs() << "\nOptimized type-legalized selection DAG: "
<< printMBBReference(*FuncInfo->MBB) << " '" << BlockName
<< "'\n";
- CurDAG->dump());
+ CurDAG->dump(DumpSortedDAG));
#if !defined(NDEBUG) && LLVM_ENABLE_ABI_BREAKING_CHECKS
if (TTI->hasBranchDivergence())
@@ -1016,7 +1021,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
ISEL_DUMP(dbgs() << "\nVector-legalized selection DAG: "
<< printMBBReference(*FuncInfo->MBB) << " '" << BlockName
<< "'\n";
- CurDAG->dump());
+ CurDAG->dump(DumpSortedDAG));
#if !defined(NDEBUG) && LLVM_ENABLE_ABI_BREAKING_CHECKS
if (TTI->hasBranchDivergence())
@@ -1032,7 +1037,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
ISEL_DUMP(dbgs() << "\nVector/type-legalized selection DAG: "
<< printMBBReference(*FuncInfo->MBB) << " '" << BlockName
<< "'\n";
- CurDAG->dump());
+ CurDAG->dump(DumpSortedDAG));
#if !defined(NDEBUG) && LLVM_ENABLE_ABI_BREAKING_CHECKS
if (TTI->hasBranchDivergence())
@@ -1052,7 +1057,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
ISEL_DUMP(dbgs() << "\nOptimized vector-legalized selection DAG: "
<< printMBBReference(*FuncInfo->MBB) << " '" << BlockName
<< "'\n";
- CurDAG->dump());
+ CurDAG->dump(DumpSortedDAG));
#if !defined(NDEBUG) && LLVM_ENABLE_ABI_BREAKING_CHECKS
if (TTI->hasBranchDivergence())
@@ -1072,7 +1077,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
ISEL_DUMP(dbgs() << "\nLegalized selection DAG: "
<< printMBBReference(*FuncInfo->MBB) << " '" << BlockName
<< "'\n";
- CurDAG->dump());
+ CurDAG->dump(DumpSortedDAG));
#if !defined(NDEBUG) && LLVM_ENABLE_ABI_BREAKING_CHECKS
if (TTI->hasBranchDivergence())
@@ -1092,7 +1097,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
ISEL_DUMP(dbgs() << "\nOptimized legalized selection DAG: "
<< printMBBReference(*FuncInfo->MBB) << " '" << BlockName
<< "'\n";
- CurDAG->dump());
+ CurDAG->dump(DumpSortedDAG));
#if !defined(NDEBUG) && LLVM_ENABLE_ABI_BREAKING_CHECKS
if (TTI->hasBranchDivergence())
@@ -1116,7 +1121,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
ISEL_DUMP(dbgs() << "\nSelected selection DAG: "
<< printMBBReference(*FuncInfo->MBB) << " '" << BlockName
<< "'\n";
- CurDAG->dump());
+ CurDAG->dump(DumpSortedDAG));
if (ViewSchedDAGs && MatchFilterBB)
CurDAG->viewGraph("scheduler input for " + BlockName);
diff --git a/llvm/lib/IR/Assumptions.cpp b/llvm/lib/IR/Assumptions.cpp
index f8bbcb3..3397f0e 100644
--- a/llvm/lib/IR/Assumptions.cpp
+++ b/llvm/lib/IR/Assumptions.cpp
@@ -20,9 +20,8 @@
using namespace llvm;
-namespace {
-bool hasAssumption(const Attribute &A,
- const KnownAssumptionString &AssumptionStr) {
+static bool hasAssumption(const Attribute &A,
+ const KnownAssumptionString &AssumptionStr) {
if (!A.isValid())
return false;
assert(A.isStringAttribute() && "Expected a string attribute!");
@@ -33,7 +32,7 @@ bool hasAssumption(const Attribute &A,
return llvm::is_contained(Strings, AssumptionStr);
}
-DenseSet<StringRef> getAssumptions(const Attribute &A) {
+static DenseSet<StringRef> getAssumptions(const Attribute &A) {
if (!A.isValid())
return DenseSet<StringRef>();
assert(A.isStringAttribute() && "Expected a string attribute!");
@@ -47,8 +46,8 @@ DenseSet<StringRef> getAssumptions(const Attribute &A) {
}
template <typename AttrSite>
-bool addAssumptionsImpl(AttrSite &Site,
- const DenseSet<StringRef> &Assumptions) {
+static bool addAssumptionsImpl(AttrSite &Site,
+ const DenseSet<StringRef> &Assumptions) {
if (Assumptions.empty())
return false;
@@ -64,7 +63,6 @@ bool addAssumptionsImpl(AttrSite &Site,
return true;
}
-} // namespace
bool llvm::hasAssumption(const Function &F,
const KnownAssumptionString &AssumptionStr) {
diff --git a/llvm/lib/IR/DebugInfoMetadata.cpp b/llvm/lib/IR/DebugInfoMetadata.cpp
index 1ededb9e7..77d044b 100644
--- a/llvm/lib/IR/DebugInfoMetadata.cpp
+++ b/llvm/lib/IR/DebugInfoMetadata.cpp
@@ -1768,6 +1768,7 @@ bool DIExpression::isValid() const {
case dwarf::DW_OP_bregx:
case dwarf::DW_OP_push_object_address:
case dwarf::DW_OP_over:
+ case dwarf::DW_OP_rot:
case dwarf::DW_OP_consts:
case dwarf::DW_OP_eq:
case dwarf::DW_OP_ne:
@@ -1775,6 +1776,8 @@ bool DIExpression::isValid() const {
case dwarf::DW_OP_ge:
case dwarf::DW_OP_lt:
case dwarf::DW_OP_le:
+ case dwarf::DW_OP_neg:
+ case dwarf::DW_OP_abs:
break;
}
}
diff --git a/llvm/lib/IR/DiagnosticHandler.cpp b/llvm/lib/IR/DiagnosticHandler.cpp
index 683eade..eb2fe3b 100644
--- a/llvm/lib/IR/DiagnosticHandler.cpp
+++ b/llvm/lib/IR/DiagnosticHandler.cpp
@@ -36,6 +36,7 @@ struct PassRemarksOpt {
}
}
};
+} // namespace
static PassRemarksOpt PassRemarksPassedOptLoc;
static PassRemarksOpt PassRemarksMissedOptLoc;
@@ -66,7 +67,6 @@ static cl::opt<PassRemarksOpt, true, cl::parser<std::string>>
"Enable optimization analysis remarks from passes whose name match "
"the given regular expression"),
cl::Hidden, cl::location(PassRemarksAnalysisOptLoc), cl::ValueRequired);
-}
bool DiagnosticHandler::isAnalysisRemarkEnabled(StringRef PassName) const {
return (PassRemarksAnalysisOptLoc.Pattern &&
diff --git a/llvm/lib/IR/ModuleSummaryIndex.cpp b/llvm/lib/IR/ModuleSummaryIndex.cpp
index d9024b0..dc55b63 100644
--- a/llvm/lib/IR/ModuleSummaryIndex.cpp
+++ b/llvm/lib/IR/ModuleSummaryIndex.cpp
@@ -409,7 +409,7 @@ struct Edge {
GlobalValue::GUID Src;
GlobalValue::GUID Dst;
};
-}
+} // namespace
void Attributes::add(const Twine &Name, const Twine &Value,
const Twine &Comment) {
diff --git a/llvm/lib/IR/PassInstrumentation.cpp b/llvm/lib/IR/PassInstrumentation.cpp
index 70bbe8f..52aad8f 100644
--- a/llvm/lib/IR/PassInstrumentation.cpp
+++ b/llvm/lib/IR/PassInstrumentation.cpp
@@ -15,7 +15,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/IR/PassManager.h"
-namespace llvm {
+using namespace llvm;
template struct LLVM_EXPORT_TEMPLATE Any::TypeId<const Module *>;
template struct LLVM_EXPORT_TEMPLATE Any::TypeId<const Function *>;
@@ -42,7 +42,8 @@ PassInstrumentationCallbacks::getPassNameForClassName(StringRef ClassName) {
AnalysisKey PassInstrumentationAnalysis::Key;
-bool isSpecialPass(StringRef PassID, const std::vector<StringRef> &Specials) {
+bool llvm::isSpecialPass(StringRef PassID,
+ const std::vector<StringRef> &Specials) {
size_t Pos = PassID.find('<');
StringRef Prefix = PassID;
if (Pos != StringRef::npos)
@@ -50,5 +51,3 @@ bool isSpecialPass(StringRef PassID, const std::vector<StringRef> &Specials) {
return any_of(Specials,
[Prefix](StringRef S) { return Prefix.ends_with(S); });
}
-
-} // namespace llvm
diff --git a/llvm/lib/IR/ProfDataUtils.cpp b/llvm/lib/IR/ProfDataUtils.cpp
index edeca97..fc2be51 100644
--- a/llvm/lib/IR/ProfDataUtils.cpp
+++ b/llvm/lib/IR/ProfDataUtils.cpp
@@ -24,8 +24,6 @@
using namespace llvm;
-namespace {
-
// MD_prof nodes have the following layout
//
// In general:
@@ -41,14 +39,15 @@ namespace {
// correctly, and can change the behavior in the future if the layout changes
// the minimum number of operands for MD_prof nodes with branch weights
-constexpr unsigned MinBWOps = 3;
+static constexpr unsigned MinBWOps = 3;
// the minimum number of operands for MD_prof nodes with value profiles
-constexpr unsigned MinVPOps = 5;
+static constexpr unsigned MinVPOps = 5;
// We may want to add support for other MD_prof types, so provide an abstraction
// for checking the metadata type.
-bool isTargetMD(const MDNode *ProfData, const char *Name, unsigned MinOps) {
+static bool isTargetMD(const MDNode *ProfData, const char *Name,
+ unsigned MinOps) {
// TODO: This routine may be simplified if MD_prof used an enum instead of a
// string to differentiate the types of MD_prof nodes.
if (!ProfData || !Name || MinOps < 2)
@@ -101,14 +100,11 @@ static SmallVector<uint32_t> fitWeights(ArrayRef<uint64_t> Weights) {
return Ret;
}
-} // namespace
-
-namespace llvm {
-cl::opt<bool> ElideAllZeroBranchWeights("elide-all-zero-branch-weights",
+static cl::opt<bool> ElideAllZeroBranchWeights("elide-all-zero-branch-weights",
#if defined(LLVM_ENABLE_PROFCHECK)
- cl::init(false)
+ cl::init(false)
#else
- cl::init(true)
+ cl::init(true)
#endif
);
const char *MDProfLabels::BranchWeights = "branch_weights";
@@ -118,21 +114,21 @@ const char *MDProfLabels::FunctionEntryCount = "function_entry_count";
const char *MDProfLabels::SyntheticFunctionEntryCount =
"synthetic_function_entry_count";
const char *MDProfLabels::UnknownBranchWeightsMarker = "unknown";
-const char *LLVMLoopEstimatedTripCount = "llvm.loop.estimated_trip_count";
+const char *llvm::LLVMLoopEstimatedTripCount = "llvm.loop.estimated_trip_count";
-bool hasProfMD(const Instruction &I) {
+bool llvm::hasProfMD(const Instruction &I) {
return I.hasMetadata(LLVMContext::MD_prof);
}
-bool isBranchWeightMD(const MDNode *ProfileData) {
+bool llvm::isBranchWeightMD(const MDNode *ProfileData) {
return isTargetMD(ProfileData, MDProfLabels::BranchWeights, MinBWOps);
}
-bool isValueProfileMD(const MDNode *ProfileData) {
+bool llvm::isValueProfileMD(const MDNode *ProfileData) {
return isTargetMD(ProfileData, MDProfLabels::ValueProfile, MinVPOps);
}
-bool hasBranchWeightMD(const Instruction &I) {
+bool llvm::hasBranchWeightMD(const Instruction &I) {
auto *ProfileData = I.getMetadata(LLVMContext::MD_prof);
return isBranchWeightMD(ProfileData);
}
@@ -147,16 +143,16 @@ static bool hasCountTypeMD(const Instruction &I) {
return isa<CallBase>(I) && !isBranchWeightMD(ProfileData);
}
-bool hasValidBranchWeightMD(const Instruction &I) {
+bool llvm::hasValidBranchWeightMD(const Instruction &I) {
return getValidBranchWeightMDNode(I);
}
-bool hasBranchWeightOrigin(const Instruction &I) {
+bool llvm::hasBranchWeightOrigin(const Instruction &I) {
auto *ProfileData = I.getMetadata(LLVMContext::MD_prof);
return hasBranchWeightOrigin(ProfileData);
}
-bool hasBranchWeightOrigin(const MDNode *ProfileData) {
+bool llvm::hasBranchWeightOrigin(const MDNode *ProfileData) {
if (!isBranchWeightMD(ProfileData))
return false;
auto *ProfDataName = dyn_cast<MDString>(ProfileData->getOperand(1));
@@ -168,54 +164,54 @@ bool hasBranchWeightOrigin(const MDNode *ProfileData) {
return ProfDataName != nullptr;
}
-unsigned getBranchWeightOffset(const MDNode *ProfileData) {
+unsigned llvm::getBranchWeightOffset(const MDNode *ProfileData) {
return hasBranchWeightOrigin(ProfileData) ? 2 : 1;
}
-unsigned getNumBranchWeights(const MDNode &ProfileData) {
+unsigned llvm::getNumBranchWeights(const MDNode &ProfileData) {
return ProfileData.getNumOperands() - getBranchWeightOffset(&ProfileData);
}
-MDNode *getBranchWeightMDNode(const Instruction &I) {
+MDNode *llvm::getBranchWeightMDNode(const Instruction &I) {
auto *ProfileData = I.getMetadata(LLVMContext::MD_prof);
if (!isBranchWeightMD(ProfileData))
return nullptr;
return ProfileData;
}
-MDNode *getValidBranchWeightMDNode(const Instruction &I) {
+MDNode *llvm::getValidBranchWeightMDNode(const Instruction &I) {
auto *ProfileData = getBranchWeightMDNode(I);
if (ProfileData && getNumBranchWeights(*ProfileData) == I.getNumSuccessors())
return ProfileData;
return nullptr;
}
-void extractFromBranchWeightMD32(const MDNode *ProfileData,
- SmallVectorImpl<uint32_t> &Weights) {
+void llvm::extractFromBranchWeightMD32(const MDNode *ProfileData,
+ SmallVectorImpl<uint32_t> &Weights) {
extractFromBranchWeightMD(ProfileData, Weights);
}
-void extractFromBranchWeightMD64(const MDNode *ProfileData,
- SmallVectorImpl<uint64_t> &Weights) {
+void llvm::extractFromBranchWeightMD64(const MDNode *ProfileData,
+ SmallVectorImpl<uint64_t> &Weights) {
extractFromBranchWeightMD(ProfileData, Weights);
}
-bool extractBranchWeights(const MDNode *ProfileData,
- SmallVectorImpl<uint32_t> &Weights) {
+bool llvm::extractBranchWeights(const MDNode *ProfileData,
+ SmallVectorImpl<uint32_t> &Weights) {
if (!isBranchWeightMD(ProfileData))
return false;
extractFromBranchWeightMD(ProfileData, Weights);
return true;
}
-bool extractBranchWeights(const Instruction &I,
- SmallVectorImpl<uint32_t> &Weights) {
+bool llvm::extractBranchWeights(const Instruction &I,
+ SmallVectorImpl<uint32_t> &Weights) {
auto *ProfileData = I.getMetadata(LLVMContext::MD_prof);
return extractBranchWeights(ProfileData, Weights);
}
-bool extractBranchWeights(const Instruction &I, uint64_t &TrueVal,
- uint64_t &FalseVal) {
+bool llvm::extractBranchWeights(const Instruction &I, uint64_t &TrueVal,
+ uint64_t &FalseVal) {
assert((I.getOpcode() == Instruction::Br ||
I.getOpcode() == Instruction::Select) &&
"Looking for branch weights on something besides branch, select, or "
@@ -234,7 +230,8 @@ bool extractBranchWeights(const Instruction &I, uint64_t &TrueVal,
return true;
}
-bool extractProfTotalWeight(const MDNode *ProfileData, uint64_t &TotalVal) {
+bool llvm::extractProfTotalWeight(const MDNode *ProfileData,
+ uint64_t &TotalVal) {
TotalVal = 0;
if (!ProfileData)
return false;
@@ -262,11 +259,12 @@ bool extractProfTotalWeight(const MDNode *ProfileData, uint64_t &TotalVal) {
return false;
}
-bool extractProfTotalWeight(const Instruction &I, uint64_t &TotalVal) {
+bool llvm::extractProfTotalWeight(const Instruction &I, uint64_t &TotalVal) {
return extractProfTotalWeight(I.getMetadata(LLVMContext::MD_prof), TotalVal);
}
-void setExplicitlyUnknownBranchWeights(Instruction &I, StringRef PassName) {
+void llvm::setExplicitlyUnknownBranchWeights(Instruction &I,
+ StringRef PassName) {
MDBuilder MDB(I.getContext());
I.setMetadata(
LLVMContext::MD_prof,
@@ -275,14 +273,16 @@ void setExplicitlyUnknownBranchWeights(Instruction &I, StringRef PassName) {
MDB.createString(PassName)}));
}
-void setExplicitlyUnknownBranchWeightsIfProfiled(Instruction &I, Function &F,
- StringRef PassName) {
+void llvm::setExplicitlyUnknownBranchWeightsIfProfiled(Instruction &I,
+ Function &F,
+ StringRef PassName) {
if (std::optional<Function::ProfileCount> EC = F.getEntryCount();
EC && EC->getCount() > 0)
setExplicitlyUnknownBranchWeights(I, PassName);
}
-void setExplicitlyUnknownFunctionEntryCount(Function &F, StringRef PassName) {
+void llvm::setExplicitlyUnknownFunctionEntryCount(Function &F,
+ StringRef PassName) {
MDBuilder MDB(F.getContext());
F.setMetadata(
LLVMContext::MD_prof,
@@ -291,21 +291,21 @@ void setExplicitlyUnknownFunctionEntryCount(Function &F, StringRef PassName) {
MDB.createString(PassName)}));
}
-bool isExplicitlyUnknownProfileMetadata(const MDNode &MD) {
+bool llvm::isExplicitlyUnknownProfileMetadata(const MDNode &MD) {
if (MD.getNumOperands() != 2)
return false;
return MD.getOperand(0).equalsStr(MDProfLabels::UnknownBranchWeightsMarker);
}
-bool hasExplicitlyUnknownBranchWeights(const Instruction &I) {
+bool llvm::hasExplicitlyUnknownBranchWeights(const Instruction &I) {
auto *MD = I.getMetadata(LLVMContext::MD_prof);
if (!MD)
return false;
return isExplicitlyUnknownProfileMetadata(*MD);
}
-void setBranchWeights(Instruction &I, ArrayRef<uint32_t> Weights,
- bool IsExpected, bool ElideAllZero) {
+void llvm::setBranchWeights(Instruction &I, ArrayRef<uint32_t> Weights,
+ bool IsExpected, bool ElideAllZero) {
if ((ElideAllZeroBranchWeights && ElideAllZero) &&
llvm::all_of(Weights, [](uint32_t V) { return V == 0; })) {
I.setMetadata(LLVMContext::MD_prof, nullptr);
@@ -317,13 +317,14 @@ void setBranchWeights(Instruction &I, ArrayRef<uint32_t> Weights,
I.setMetadata(LLVMContext::MD_prof, BranchWeights);
}
-void setFittedBranchWeights(Instruction &I, ArrayRef<uint64_t> Weights,
- bool IsExpected, bool ElideAllZero) {
+void llvm::setFittedBranchWeights(Instruction &I, ArrayRef<uint64_t> Weights,
+ bool IsExpected, bool ElideAllZero) {
setBranchWeights(I, fitWeights(Weights), IsExpected, ElideAllZero);
}
-SmallVector<uint32_t> downscaleWeights(ArrayRef<uint64_t> Weights,
- std::optional<uint64_t> KnownMaxCount) {
+SmallVector<uint32_t>
+llvm::downscaleWeights(ArrayRef<uint64_t> Weights,
+ std::optional<uint64_t> KnownMaxCount) {
uint64_t MaxCount = KnownMaxCount.has_value() ? KnownMaxCount.value()
: *llvm::max_element(Weights);
assert(MaxCount > 0 && "Bad max count");
@@ -334,7 +335,7 @@ SmallVector<uint32_t> downscaleWeights(ArrayRef<uint64_t> Weights,
return DownscaledWeights;
}
-void scaleProfData(Instruction &I, uint64_t S, uint64_t T) {
+void llvm::scaleProfData(Instruction &I, uint64_t S, uint64_t T) {
assert(T != 0 && "Caller should guarantee");
auto *ProfileData = I.getMetadata(LLVMContext::MD_prof);
if (ProfileData == nullptr)
@@ -387,5 +388,3 @@ void scaleProfData(Instruction &I, uint64_t S, uint64_t T) {
}
I.setMetadata(LLVMContext::MD_prof, MDNode::get(C, Vals));
}
-
-} // namespace llvm
diff --git a/llvm/lib/IR/SafepointIRVerifier.cpp b/llvm/lib/IR/SafepointIRVerifier.cpp
index e54894c..e35b5b3 100644
--- a/llvm/lib/IR/SafepointIRVerifier.cpp
+++ b/llvm/lib/IR/SafepointIRVerifier.cpp
@@ -196,7 +196,6 @@ protected:
static void Verify(const Function &F, const DominatorTree &DT,
const CFGDeadness &CD);
-namespace llvm {
PreservedAnalyses SafepointIRVerifierPass::run(Function &F,
FunctionAnalysisManager &AM) {
const auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
@@ -205,7 +204,6 @@ PreservedAnalyses SafepointIRVerifierPass::run(Function &F,
Verify(F, DT, CD);
return PreservedAnalyses::all();
}
-} // namespace llvm
namespace {
diff --git a/llvm/lib/IR/VFABIDemangler.cpp b/llvm/lib/IR/VFABIDemangler.cpp
index 2de05a5..4fcf436 100644
--- a/llvm/lib/IR/VFABIDemangler.cpp
+++ b/llvm/lib/IR/VFABIDemangler.cpp
@@ -20,15 +20,16 @@ using namespace llvm;
#define DEBUG_TYPE "vfabi-demangler"
-namespace {
/// Utilities for the Vector Function ABI name parser.
+namespace {
/// Return types for the parser functions.
enum class ParseRet {
OK, // Found.
None, // Not found.
Error // Syntax error.
};
+} // namespace
/// Extracts the `<isa>` information from the mangled string, and
/// sets the `ISA` accordingly. If successful, the <isa> token is removed
@@ -372,7 +373,6 @@ getScalableECFromSignature(const FunctionType *Signature, const VFISAKind ISA,
return std::nullopt;
}
-} // namespace
// Format of the ABI name:
// _ZGV<isa><mask><vlen><parameters>_<scalarname>[(<redirection>)]
diff --git a/llvm/lib/IR/Value.cpp b/llvm/lib/IR/Value.cpp
index a347609..b775cbb 100644
--- a/llvm/lib/IR/Value.cpp
+++ b/llvm/lib/IR/Value.cpp
@@ -622,6 +622,7 @@ enum PointerStripKind {
PSK_InBoundsConstantIndices,
PSK_InBounds
};
+} // end anonymous namespace
template <PointerStripKind StripKind> static void NoopCallback(const Value *) {}
@@ -696,7 +697,6 @@ static const Value *stripPointerCastsAndOffsets(
return V;
}
-} // end anonymous namespace
const Value *Value::stripPointerCasts() const {
return stripPointerCastsAndOffsets<PSK_ZeroIndices>(this);
diff --git a/llvm/lib/Support/CMakeLists.txt b/llvm/lib/Support/CMakeLists.txt
index 7da972f..42b21b5 100644
--- a/llvm/lib/Support/CMakeLists.txt
+++ b/llvm/lib/Support/CMakeLists.txt
@@ -207,6 +207,7 @@ add_llvm_component_library(LLVMSupport
InstructionCost.cpp
IntEqClasses.cpp
IntervalMap.cpp
+ Jobserver.cpp
JSON.cpp
KnownBits.cpp
KnownFPClass.cpp
diff --git a/llvm/lib/Support/Jobserver.cpp b/llvm/lib/Support/Jobserver.cpp
new file mode 100644
index 0000000..9f726eb
--- /dev/null
+++ b/llvm/lib/Support/Jobserver.cpp
@@ -0,0 +1,259 @@
+//===- llvm/Support/Jobserver.cpp - Jobserver Client Implementation -------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Jobserver.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Config/llvm-config.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <atomic>
+#include <memory>
+#include <mutex>
+#include <new>
+
+#define DEBUG_TYPE "jobserver"
+
+using namespace llvm;
+
+namespace {
+struct FdPair {
+ int Read = -1;
+ int Write = -1;
+ bool isValid() const { return Read >= 0 && Write >= 0; }
+};
+
+struct JobserverConfig {
+ enum Mode {
+ None,
+ PosixFifo,
+ PosixPipe,
+ Win32Semaphore,
+ };
+ Mode TheMode = None;
+ std::string Path;
+ FdPair PipeFDs;
+};
+
+/// A helper function that checks if `Input` starts with `Prefix`.
+/// If it does, it removes the prefix from `Input`, assigns the remainder to
+/// `Value`, and returns true. Otherwise, it returns false.
+bool getPrefixedValue(StringRef Input, StringRef Prefix, StringRef &Value) {
+ if (Input.consume_front(Prefix)) {
+ Value = Input;
+ return true;
+ }
+ return false;
+}
+
+/// A helper function to parse a string in the format "R,W" where R and W are
+/// non-negative integers representing file descriptors. It populates the
+/// `ReadFD` and `WriteFD` output parameters. Returns true on success.
+static std::optional<FdPair> getFileDescriptorPair(StringRef Input) {
+ FdPair FDs;
+ if (Input.consumeInteger(10, FDs.Read))
+ return std::nullopt;
+ if (!Input.consume_front(","))
+ return std::nullopt;
+ if (Input.consumeInteger(10, FDs.Write))
+ return std::nullopt;
+ if (!Input.empty() || !FDs.isValid())
+ return std::nullopt;
+ return FDs;
+}
+
+/// Parses the `MAKEFLAGS` environment variable string to find jobserver
+/// arguments. It splits the string into space-separated arguments and searches
+/// for `--jobserver-auth` or `--jobserver-fds`. Based on the value of these
+/// arguments, it determines the jobserver mode (Pipe, FIFO, or Semaphore) and
+/// connection details (file descriptors or path).
+Expected<JobserverConfig> parseNativeMakeFlags(StringRef MakeFlags) {
+ JobserverConfig Config;
+ if (MakeFlags.empty())
+ return Config;
+
+ // Split the MAKEFLAGS string into arguments.
+ SmallVector<StringRef, 8> Args;
+ SplitString(MakeFlags, Args);
+
+ // If '-n' (dry-run) is present as a legacy flag (not starting with '-'),
+ // disable the jobserver.
+ if (!Args.empty() && !Args[0].starts_with("-") && Args[0].contains('n'))
+ return Config;
+
+ // Iterate through arguments to find jobserver flags.
+ // Note that make may pass multiple --jobserver-auth flags; the last one wins.
+ for (StringRef Arg : Args) {
+ StringRef Value;
+ if (getPrefixedValue(Arg, "--jobserver-auth=", Value)) {
+ // Try to parse as a file descriptor pair first.
+ if (auto FDPair = getFileDescriptorPair(Value)) {
+ Config.TheMode = JobserverConfig::PosixPipe;
+ Config.PipeFDs = *FDPair;
+ } else {
+ StringRef FifoPath;
+ // If not FDs, try to parse as a named pipe (fifo).
+ if (getPrefixedValue(Value, "fifo:", FifoPath)) {
+ Config.TheMode = JobserverConfig::PosixFifo;
+ Config.Path = FifoPath.str();
+ } else {
+ // Otherwise, assume it's a Windows semaphore.
+ Config.TheMode = JobserverConfig::Win32Semaphore;
+ Config.Path = Value.str();
+ }
+ }
+ } else if (getPrefixedValue(Arg, "--jobserver-fds=", Value)) {
+ // This is an alternative, older syntax for the pipe-based server.
+ if (auto FDPair = getFileDescriptorPair(Value)) {
+ Config.TheMode = JobserverConfig::PosixPipe;
+ Config.PipeFDs = *FDPair;
+ } else {
+ return createStringError(inconvertibleErrorCode(),
+ "Invalid file descriptor pair in MAKEFLAGS");
+ }
+ }
+ }
+
+// Perform platform-specific validation.
+#ifdef _WIN32
+ if (Config.TheMode == JobserverConfig::PosixFifo ||
+ Config.TheMode == JobserverConfig::PosixPipe)
+ return createStringError(
+ inconvertibleErrorCode(),
+ "FIFO/Pipe-based jobserver is not supported on Windows");
+#else
+ if (Config.TheMode == JobserverConfig::Win32Semaphore)
+ return createStringError(
+ inconvertibleErrorCode(),
+ "Semaphore-based jobserver is not supported on this platform");
+#endif
+ return Config;
+}
+
+std::once_flag GJobserverOnceFlag;
+JobserverClient *GJobserver = nullptr;
+
+} // namespace
+
+namespace llvm {
+class JobserverClientImpl : public JobserverClient {
+ bool IsInitialized = false;
+ std::atomic<bool> HasImplicitSlot{true};
+ unsigned NumJobs = 0;
+
+public:
+ JobserverClientImpl(const JobserverConfig &Config);
+ ~JobserverClientImpl() override;
+
+ JobSlot tryAcquire() override;
+ void release(JobSlot Slot) override;
+ unsigned getNumJobs() const override { return NumJobs; }
+
+ bool isValid() const { return IsInitialized; }
+
+private:
+#if defined(LLVM_ON_UNIX)
+ int ReadFD = -1;
+ int WriteFD = -1;
+ std::string FifoPath;
+#elif defined(_WIN32)
+ void *Semaphore = nullptr;
+#endif
+};
+} // namespace llvm
+
+// Include the platform-specific parts of the class.
+#if defined(LLVM_ON_UNIX)
+#include "Unix/Jobserver.inc"
+#elif defined(_WIN32)
+#include "Windows/Jobserver.inc"
+#else
+// Dummy implementation for unsupported platforms.
+JobserverClientImpl::JobserverClientImpl(const JobserverConfig &Config) {}
+JobserverClientImpl::~JobserverClientImpl() = default;
+JobSlot JobserverClientImpl::tryAcquire() { return JobSlot(); }
+void JobserverClientImpl::release(JobSlot Slot) {}
+#endif
+
+namespace llvm {
+JobserverClient::~JobserverClient() = default;
+
+uint8_t JobSlot::getExplicitValue() const {
+ assert(isExplicit() && "Cannot get value of implicit or invalid slot");
+ return static_cast<uint8_t>(Value);
+}
+
+/// This is the main entry point for acquiring a jobserver client. It uses a
+/// std::call_once to ensure the singleton `GJobserver` instance is created
+/// safely in a multi-threaded environment. On first call, it reads the
+/// `MAKEFLAGS` environment variable, parses it, and attempts to construct and
+/// initialize a `JobserverClientImpl`. If successful, the global instance is
+/// stored in `GJobserver`. Subsequent calls will return the existing instance.
+JobserverClient *JobserverClient::getInstance() {
+ std::call_once(GJobserverOnceFlag, []() {
+ LLVM_DEBUG(
+ dbgs()
+ << "JobserverClient::getInstance() called for the first time.\n");
+ const char *MakeFlagsEnv = getenv("MAKEFLAGS");
+ if (!MakeFlagsEnv) {
+ errs() << "Warning: failed to create jobserver client due to MAKEFLAGS "
+ "environment variable not found\n";
+ return;
+ }
+
+ LLVM_DEBUG(dbgs() << "Found MAKEFLAGS = \"" << MakeFlagsEnv << "\"\n");
+
+ auto ConfigOrErr = parseNativeMakeFlags(MakeFlagsEnv);
+ if (Error Err = ConfigOrErr.takeError()) {
+ errs() << "Warning: failed to create jobserver client due to invalid "
+ "MAKEFLAGS environment variable: "
+ << toString(std::move(Err)) << "\n";
+ return;
+ }
+
+ JobserverConfig Config = *ConfigOrErr;
+ if (Config.TheMode == JobserverConfig::None) {
+ errs() << "Warning: failed to create jobserver client due to jobserver "
+ "mode missing in MAKEFLAGS environment variable\n";
+ return;
+ }
+
+ if (Config.TheMode == JobserverConfig::PosixPipe) {
+#if defined(LLVM_ON_UNIX)
+ if (!areFdsValid(Config.PipeFDs.Read, Config.PipeFDs.Write)) {
+ errs() << "Warning: failed to create jobserver client due to invalid "
+ "Pipe FDs in MAKEFLAGS environment variable\n";
+ return;
+ }
+#endif
+ }
+
+ auto Client = std::make_unique<JobserverClientImpl>(Config);
+ if (Client->isValid()) {
+ LLVM_DEBUG(dbgs() << "Jobserver client created successfully!\n");
+ GJobserver = Client.release();
+ } else
+ errs() << "Warning: jobserver client initialization failed.\n";
+ });
+ return GJobserver;
+}
+
+/// For testing purposes only. This function resets the singleton instance by
+/// destroying the existing client and re-initializing the `std::once_flag`.
+/// This allows tests to simulate the first-time initialization of the
+/// jobserver client multiple times.
+void JobserverClient::resetForTesting() {
+ delete GJobserver;
+ GJobserver = nullptr;
+ // Re-construct the std::once_flag in place to reset the singleton state.
+ new (&GJobserverOnceFlag) std::once_flag();
+}
+} // namespace llvm
diff --git a/llvm/lib/Support/Parallel.cpp b/llvm/lib/Support/Parallel.cpp
index 3ac6fc7..8e0c724 100644
--- a/llvm/lib/Support/Parallel.cpp
+++ b/llvm/lib/Support/Parallel.cpp
@@ -7,12 +7,17 @@
//===----------------------------------------------------------------------===//
#include "llvm/Support/Parallel.h"
+#include "llvm/ADT/ScopeExit.h"
#include "llvm/Config/llvm-config.h"
+#include "llvm/Support/ExponentialBackoff.h"
+#include "llvm/Support/Jobserver.h"
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/Threading.h"
#include <atomic>
#include <future>
+#include <memory>
+#include <mutex>
#include <thread>
#include <vector>
@@ -49,6 +54,9 @@ public:
class ThreadPoolExecutor : public Executor {
public:
explicit ThreadPoolExecutor(ThreadPoolStrategy S) {
+ if (S.UseJobserver)
+ TheJobserver = JobserverClient::getInstance();
+
ThreadCount = S.compute_thread_count();
// Spawn all but one of the threads in another thread as spawning threads
// can take a while.
@@ -69,6 +77,10 @@ public:
});
}
+ // To make sure the thread pool executor can only be created with a parallel
+ // strategy.
+ ThreadPoolExecutor() = delete;
+
void stop() {
{
std::lock_guard<std::mutex> Lock(Mutex);
@@ -111,15 +123,62 @@ private:
void work(ThreadPoolStrategy S, unsigned ThreadID) {
threadIndex = ThreadID;
S.apply_thread_strategy(ThreadID);
+ // Note on jobserver deadlock avoidance:
+ // GNU Make grants each invoked process one implicit job slot. Our
+ // JobserverClient models this by returning an implicit JobSlot on the
+ // first successful tryAcquire() in a process. This guarantees forward
+ // progress without requiring a dedicated "always-on" thread here.
+
+ static thread_local std::unique_ptr<ExponentialBackoff> Backoff;
+
while (true) {
- std::unique_lock<std::mutex> Lock(Mutex);
- Cond.wait(Lock, [&] { return Stop || !WorkStack.empty(); });
- if (Stop)
- break;
- auto Task = std::move(WorkStack.back());
- WorkStack.pop_back();
- Lock.unlock();
- Task();
+ if (TheJobserver) {
+ // Jobserver-mode scheduling:
+ // - Acquire one job slot (with exponential backoff to avoid busy-wait).
+ // - While holding the slot, drain and run tasks from the local queue.
+ // - Release the slot when the queue is empty or when shutting down.
+ // Rationale: Holding a slot amortizes acquire/release overhead over
+ // multiple tasks and avoids requeue/yield churn, while still enforcing
+ // the jobserver’s global concurrency limit. With K available slots,
+ // up to K workers run tasks in parallel; within each worker tasks run
+ // sequentially until the local queue is empty.
+ ExponentialBackoff Backoff(std::chrono::hours(24));
+ JobSlot Slot;
+ do {
+ if (Stop)
+ return;
+ Slot = TheJobserver->tryAcquire();
+ if (Slot.isValid())
+ break;
+ } while (Backoff.waitForNextAttempt());
+
+ auto SlotReleaser = llvm::make_scope_exit(
+ [&] { TheJobserver->release(std::move(Slot)); });
+
+ while (true) {
+ std::function<void()> Task;
+ {
+ std::unique_lock<std::mutex> Lock(Mutex);
+ Cond.wait(Lock, [&] { return Stop || !WorkStack.empty(); });
+ if (Stop && WorkStack.empty())
+ return;
+ if (WorkStack.empty())
+ break;
+ Task = std::move(WorkStack.back());
+ WorkStack.pop_back();
+ }
+ Task();
+ }
+ } else {
+ std::unique_lock<std::mutex> Lock(Mutex);
+ Cond.wait(Lock, [&] { return Stop || !WorkStack.empty(); });
+ if (Stop)
+ break;
+ auto Task = std::move(WorkStack.back());
+ WorkStack.pop_back();
+ Lock.unlock();
+ Task();
+ }
}
}
@@ -130,9 +189,20 @@ private:
std::promise<void> ThreadsCreated;
std::vector<std::thread> Threads;
unsigned ThreadCount;
+
+ JobserverClient *TheJobserver = nullptr;
};
-Executor *Executor::getDefaultExecutor() {
+// A global raw pointer to the executor. Lifetime is managed by the
+// objects created within createExecutor().
+static Executor *TheExec = nullptr;
+static std::once_flag Flag;
+
+// This function will be called exactly once to create the executor.
+// It contains the necessary platform-specific logic. Since functions
+// called by std::call_once cannot return value, we have to set the
+// executor as a global variable.
+void createExecutor() {
#ifdef _WIN32
// The ManagedStatic enables the ThreadPoolExecutor to be stopped via
// llvm_shutdown() which allows a "clean" fast exit, e.g. via _exit(). This
@@ -156,16 +226,22 @@ Executor *Executor::getDefaultExecutor() {
ThreadPoolExecutor::Deleter>
ManagedExec;
static std::unique_ptr<ThreadPoolExecutor> Exec(&(*ManagedExec));
- return Exec.get();
+ TheExec = Exec.get();
#else
// ManagedStatic is not desired on other platforms. When `Exec` is destroyed
// by llvm_shutdown(), worker threads will clean up and invoke TLS
// destructors. This can lead to race conditions if other threads attempt to
// access TLS objects that have already been destroyed.
static ThreadPoolExecutor Exec(strategy);
- return &Exec;
+ TheExec = &Exec;
#endif
}
+
+Executor *Executor::getDefaultExecutor() {
+ // Use std::call_once to lazily and safely initialize the executor.
+ std::call_once(Flag, createExecutor);
+ return TheExec;
+}
} // namespace
} // namespace detail
diff --git a/llvm/lib/Support/ThreadPool.cpp b/llvm/lib/Support/ThreadPool.cpp
index c304f0f..6960268 100644
--- a/llvm/lib/Support/ThreadPool.cpp
+++ b/llvm/lib/Support/ThreadPool.cpp
@@ -6,6 +6,7 @@
//
//===----------------------------------------------------------------------===//
//
+//
// This file implements a crude C++11 based thread pool.
//
//===----------------------------------------------------------------------===//
@@ -14,6 +15,8 @@
#include "llvm/Config/llvm-config.h"
+#include "llvm/ADT/ScopeExit.h"
+#include "llvm/Support/ExponentialBackoff.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/Threading.h"
#include "llvm/Support/raw_ostream.h"
@@ -33,7 +36,10 @@ ThreadPoolInterface::~ThreadPoolInterface() = default;
#if LLVM_ENABLE_THREADS
StdThreadPool::StdThreadPool(ThreadPoolStrategy S)
- : Strategy(S), MaxThreadCount(S.compute_thread_count()) {}
+ : Strategy(S), MaxThreadCount(S.compute_thread_count()) {
+ if (Strategy.UseJobserver)
+ TheJobserver = JobserverClient::getInstance();
+}
void StdThreadPool::grow(int requested) {
llvm::sys::ScopedWriter LockGuard(ThreadsLock);
@@ -45,7 +51,15 @@ void StdThreadPool::grow(int requested) {
Threads.emplace_back([this, ThreadID] {
set_thread_name(formatv("llvm-worker-{0}", ThreadID));
Strategy.apply_thread_strategy(ThreadID);
- processTasks(nullptr);
+ // Note on jobserver deadlock avoidance:
+ // GNU Make grants each invoked process one implicit job slot.
+ // JobserverClient::tryAcquire() returns that implicit slot on the first
+ // successful call in a process, ensuring forward progress without a
+ // dedicated "always-on" thread.
+ if (TheJobserver)
+ processTasksWithJobserver();
+ else
+ processTasks(nullptr);
});
}
}
@@ -133,6 +147,96 @@ void StdThreadPool::processTasks(ThreadPoolTaskGroup *WaitingForGroup) {
}
}
+/// Main loop for worker threads when using a jobserver.
+/// This function uses a two-level queue; it first acquires a job slot from the
+/// external jobserver, then retrieves a task from the internal queue.
+/// This allows the thread pool to cooperate with build systems like `make -j`.
+void StdThreadPool::processTasksWithJobserver() {
+ while (true) {
+ // Acquire a job slot from the external jobserver.
+ // This polls for a slot and yields the thread to avoid a high-CPU wait.
+ JobSlot Slot;
+ // The timeout for the backoff can be very long, as the shutdown
+ // is checked on each iteration. The sleep duration is capped by MaxWait
+ // in ExponentialBackoff, so shutdown latency is not a problem.
+ ExponentialBackoff Backoff(std::chrono::hours(24));
+ bool AcquiredToken = false;
+ do {
+ // Return if the thread pool is shutting down.
+ {
+ std::unique_lock<std::mutex> LockGuard(QueueLock);
+ if (!EnableFlag)
+ return;
+ }
+
+ Slot = TheJobserver->tryAcquire();
+ if (Slot.isValid()) {
+ AcquiredToken = true;
+ break;
+ }
+ } while (Backoff.waitForNextAttempt());
+
+ if (!AcquiredToken) {
+ // This is practically unreachable with a 24h timeout and indicates a
+ // deeper problem if hit.
+ report_fatal_error("Timed out waiting for jobserver token.");
+ }
+
+ // `make_scope_exit` guarantees the job slot is released, even if the
+ // task throws or we exit early. This prevents deadlocking the build.
+ auto SlotReleaser =
+ make_scope_exit([&] { TheJobserver->release(std::move(Slot)); });
+
+ // While we hold a job slot, process tasks from the internal queue.
+ while (true) {
+ std::function<void()> Task;
+ ThreadPoolTaskGroup *GroupOfTask = nullptr;
+
+ {
+ std::unique_lock<std::mutex> LockGuard(QueueLock);
+
+ // Wait until a task is available or the pool is shutting down.
+ QueueCondition.wait(LockGuard,
+ [&] { return !EnableFlag || !Tasks.empty(); });
+
+ // If shutting down and the queue is empty, the thread can terminate.
+ if (!EnableFlag && Tasks.empty())
+ return;
+
+ // If the queue is empty, we're done processing tasks for now.
+ // Break the inner loop to release the job slot.
+ if (Tasks.empty())
+ break;
+
+ // A task is available. Mark it as active before releasing the lock
+ // to prevent race conditions with `wait()`.
+ ++ActiveThreads;
+ Task = std::move(Tasks.front().first);
+ GroupOfTask = Tasks.front().second;
+ if (GroupOfTask != nullptr)
+ ++ActiveGroups[GroupOfTask];
+ Tasks.pop_front();
+ } // The queue lock is released.
+
+ // Run the task. The job slot remains acquired during execution.
+ Task();
+
+ // The task has finished. Update the active count and notify any waiters.
+ {
+ std::lock_guard<std::mutex> LockGuard(QueueLock);
+ --ActiveThreads;
+ if (GroupOfTask != nullptr) {
+ auto A = ActiveGroups.find(GroupOfTask);
+ if (--(A->second) == 0)
+ ActiveGroups.erase(A);
+ }
+ // If all tasks are complete, notify any waiting threads.
+ if (workCompletedUnlocked(nullptr))
+ CompletionCondition.notify_all();
+ }
+ }
+ }
+}
bool StdThreadPool::workCompletedUnlocked(ThreadPoolTaskGroup *Group) const {
if (Group == nullptr)
return !ActiveThreads && Tasks.empty();
diff --git a/llvm/lib/Support/Threading.cpp b/llvm/lib/Support/Threading.cpp
index 693de0e..9da357a 100644
--- a/llvm/lib/Support/Threading.cpp
+++ b/llvm/lib/Support/Threading.cpp
@@ -14,6 +14,7 @@
#include "llvm/Support/Threading.h"
#include "llvm/Config/config.h"
#include "llvm/Config/llvm-config.h"
+#include "llvm/Support/Jobserver.h"
#include <cassert>
#include <optional>
@@ -51,6 +52,10 @@ int llvm::get_physical_cores() { return -1; }
static int computeHostNumHardwareThreads();
unsigned llvm::ThreadPoolStrategy::compute_thread_count() const {
+ if (UseJobserver)
+ if (auto JS = JobserverClient::getInstance())
+ return JS->getNumJobs();
+
int MaxThreadCount =
UseHyperThreads ? computeHostNumHardwareThreads() : get_physical_cores();
if (MaxThreadCount <= 0)
diff --git a/llvm/lib/Support/Unix/Jobserver.inc b/llvm/lib/Support/Unix/Jobserver.inc
new file mode 100644
index 0000000..53bf7f2
--- /dev/null
+++ b/llvm/lib/Support/Unix/Jobserver.inc
@@ -0,0 +1,195 @@
+//===- llvm/Support/Unix/Jobserver.inc - Unix Jobserver Impl ----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the UNIX-specific parts of the JobserverClient class.
+//
+//===----------------------------------------------------------------------===//
+
+#include <atomic>
+#include <cassert>
+#include <cerrno>
+#include <fcntl.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+namespace {
+/// Returns true if the given file descriptor is a FIFO (named pipe).
+bool isFifo(int FD) {
+ struct stat StatBuf;
+ if (::fstat(FD, &StatBuf) != 0)
+ return false;
+ return S_ISFIFO(StatBuf.st_mode);
+}
+
+/// Returns true if the given file descriptors are valid.
+bool areFdsValid(int ReadFD, int WriteFD) {
+ if (ReadFD == -1 || WriteFD == -1)
+ return false;
+ // Check if the file descriptors are actually valid by checking their flags.
+ return ::fcntl(ReadFD, F_GETFD) != -1 && ::fcntl(WriteFD, F_GETFD) != -1;
+}
+} // namespace
+
+/// The constructor sets up the client based on the provided configuration.
+/// For pipe-based jobservers, it duplicates the inherited file descriptors,
+/// sets them to close-on-exec, and makes the read descriptor non-blocking.
+/// For FIFO-based jobservers, it opens the named pipe. After setup, it drains
+/// all available tokens from the jobserver to determine the total number of
+/// available jobs (`NumJobs`), then immediately releases them back.
+JobserverClientImpl::JobserverClientImpl(const JobserverConfig &Config) {
+ switch (Config.TheMode) {
+ case JobserverConfig::PosixPipe: {
+ // Duplicate the read and write file descriptors.
+ int NewReadFD = ::dup(Config.PipeFDs.Read);
+ if (NewReadFD < 0)
+ return;
+ int NewWriteFD = ::dup(Config.PipeFDs.Write);
+ if (NewWriteFD < 0) {
+ ::close(NewReadFD);
+ return;
+ }
+ // Set the new descriptors to be closed automatically on exec().
+ if (::fcntl(NewReadFD, F_SETFD, FD_CLOEXEC) == -1 ||
+ ::fcntl(NewWriteFD, F_SETFD, FD_CLOEXEC) == -1) {
+ ::close(NewReadFD);
+ ::close(NewWriteFD);
+ return;
+ }
+ // Set the read descriptor to non-blocking.
+ int flags = ::fcntl(NewReadFD, F_GETFL, 0);
+ if (flags == -1 || ::fcntl(NewReadFD, F_SETFL, flags | O_NONBLOCK) == -1) {
+ ::close(NewReadFD);
+ ::close(NewWriteFD);
+ return;
+ }
+ ReadFD = NewReadFD;
+ WriteFD = NewWriteFD;
+ break;
+ }
+ case JobserverConfig::PosixFifo:
+ // Open the FIFO for reading. It must be non-blocking and close-on-exec.
+ ReadFD = ::open(Config.Path.c_str(), O_RDONLY | O_NONBLOCK | O_CLOEXEC);
+ if (ReadFD < 0 || !isFifo(ReadFD)) {
+ if (ReadFD >= 0)
+ ::close(ReadFD);
+ ReadFD = -1;
+ return;
+ }
+ FifoPath = Config.Path;
+ // The write FD is opened on-demand in release().
+ WriteFD = -1;
+ break;
+ default:
+ return;
+ }
+
+ IsInitialized = true;
+ // Determine the total number of jobs by acquiring all available slots and
+ // then immediately releasing them.
+ SmallVector<JobSlot, 8> Slots;
+ while (true) {
+ auto S = tryAcquire();
+ if (!S.isValid())
+ break;
+ Slots.push_back(std::move(S));
+ }
+ NumJobs = Slots.size();
+ assert(NumJobs >= 1 && "Invalid number of jobs");
+ for (auto &S : Slots)
+ release(std::move(S));
+}
+
+/// The destructor closes any open file descriptors.
+JobserverClientImpl::~JobserverClientImpl() {
+ if (ReadFD >= 0)
+ ::close(ReadFD);
+ if (WriteFD >= 0)
+ ::close(WriteFD);
+}
+
+/// Tries to acquire a job slot. The first call to this function will always
+/// successfully acquire the single "implicit" slot that is granted to every
+/// process started by `make`. Subsequent calls attempt to read a one-byte
+/// token from the jobserver's read pipe. A successful read grants one
+/// explicit job slot. The read is non-blocking; if no token is available,
+/// it fails and returns an invalid JobSlot.
+JobSlot JobserverClientImpl::tryAcquire() {
+ if (!IsInitialized)
+ return JobSlot();
+
+ // The first acquisition is always for the implicit slot.
+ if (HasImplicitSlot.exchange(false, std::memory_order_acquire)) {
+ LLVM_DEBUG(dbgs() << "Acquired implicit job slot.\n");
+ return JobSlot::createImplicit();
+ }
+
+ char Token;
+ ssize_t Ret;
+ LLVM_DEBUG(dbgs() << "Attempting to read token from FD " << ReadFD << ".\n");
+ // Loop to retry on EINTR (interrupted system call).
+ do {
+ Ret = ::read(ReadFD, &Token, 1);
+ } while (Ret < 0 && errno == EINTR);
+
+ if (Ret == 1) {
+ LLVM_DEBUG(dbgs() << "Acquired explicit token '" << Token << "'.\n");
+ return JobSlot::createExplicit(static_cast<uint8_t>(Token));
+ }
+
+ LLVM_DEBUG(dbgs() << "Failed to acquire job slot, read returned " << Ret
+ << ".\n");
+ return JobSlot();
+}
+
+/// Releases a job slot back to the pool. If the slot is implicit, it simply
+/// resets a flag. If the slot is explicit, it writes the character token
+/// associated with the slot back into the jobserver's write pipe. For FIFO
+/// jobservers, this may require opening the FIFO for writing if it hasn't
+/// been already.
+void JobserverClientImpl::release(JobSlot Slot) {
+ if (!Slot.isValid())
+ return;
+
+ // Releasing the implicit slot just makes it available for the next acquire.
+ if (Slot.isImplicit()) {
+ LLVM_DEBUG(dbgs() << "Released implicit job slot.\n");
+ [[maybe_unused]] bool was_already_released =
+ HasImplicitSlot.exchange(true, std::memory_order_release);
+ assert(!was_already_released && "Implicit slot released twice");
+ return;
+ }
+
+ uint8_t Token = Slot.getExplicitValue();
+ LLVM_DEBUG(dbgs() << "Releasing explicit token '" << (char)Token << "' to FD "
+ << WriteFD << ".\n");
+
+ // For FIFO-based jobservers, the write FD might not be open yet.
+ // Open it on the first release.
+ if (WriteFD < 0) {
+ LLVM_DEBUG(dbgs() << "WriteFD is invalid, opening FIFO: " << FifoPath
+ << "\n");
+ WriteFD = ::open(FifoPath.c_str(), O_WRONLY | O_CLOEXEC);
+ if (WriteFD < 0) {
+ LLVM_DEBUG(dbgs() << "Failed to open FIFO for writing.\n");
+ return;
+ }
+ LLVM_DEBUG(dbgs() << "Opened FIFO as new WriteFD: " << WriteFD << "\n");
+ }
+
+ ssize_t Written;
+ // Loop to retry on EINTR (interrupted system call).
+ do {
+ Written = ::write(WriteFD, &Token, 1);
+ } while (Written < 0 && errno == EINTR);
+
+ if (Written <= 0) {
+ LLVM_DEBUG(dbgs() << "Failed to write token to pipe, write returned "
+ << Written << "\n");
+ }
+}
diff --git a/llvm/lib/Support/Windows/Jobserver.inc b/llvm/lib/Support/Windows/Jobserver.inc
new file mode 100644
index 0000000..79028ee
--- /dev/null
+++ b/llvm/lib/Support/Windows/Jobserver.inc
@@ -0,0 +1,79 @@
+//==- llvm/Support/Windows/Jobserver.inc - Windows Jobserver Impl -*- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Windows-specific parts of the JobserverClient class.
+// On Windows, the jobserver is implemented using a named semaphore.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Windows/WindowsSupport.h"
+#include <atomic>
+#include <cassert>
+
+namespace llvm {
+/// The constructor for the Windows jobserver client. It attempts to open a
+/// handle to an existing named semaphore, the name of which is provided by
+/// GNU make in the --jobserver-auth argument. If the semaphore is opened
+/// successfully, the client is marked as initialized.
+JobserverClientImpl::JobserverClientImpl(const JobserverConfig &Config) {
+ Semaphore = (void *)::OpenSemaphoreA(SEMAPHORE_MODIFY_STATE | SYNCHRONIZE,
+ FALSE, Config.Path.c_str());
+ if (Semaphore != nullptr)
+ IsInitialized = true;
+}
+
+/// The destructor closes the handle to the semaphore, releasing the resource.
+JobserverClientImpl::~JobserverClientImpl() {
+ if (Semaphore != nullptr)
+ ::CloseHandle((HANDLE)Semaphore);
+}
+
+/// Tries to acquire a job slot. The first call always returns the implicit
+/// slot. Subsequent calls use a non-blocking wait on the semaphore
+/// (`WaitForSingleObject` with a timeout of 0). If the wait succeeds, the
+/// semaphore's count is decremented, and an explicit job slot is acquired.
+/// If the wait times out, it means no slots are available, and an invalid
+/// slot is returned.
+JobSlot JobserverClientImpl::tryAcquire() {
+ if (!IsInitialized)
+ return JobSlot();
+
+ // First, grant the implicit slot.
+ if (HasImplicitSlot.exchange(false, std::memory_order_acquire)) {
+ return JobSlot::createImplicit();
+ }
+
+ // Try to acquire a slot from the semaphore without blocking.
+ if (::WaitForSingleObject((HANDLE)Semaphore, 0) == WAIT_OBJECT_0) {
+ // The explicit token value is arbitrary on Windows, as the semaphore
+ // count is the real resource.
+ return JobSlot::createExplicit(1);
+ }
+
+ return JobSlot(); // Invalid slot
+}
+
+/// Releases a job slot back to the pool. If the slot is implicit, it simply
+/// resets a flag. For an explicit slot, it increments the semaphore's count
+/// by one using `ReleaseSemaphore`, making the slot available to other
+/// processes.
+void JobserverClientImpl::release(JobSlot Slot) {
+ if (!IsInitialized || !Slot.isValid())
+ return;
+
+ if (Slot.isImplicit()) {
+ [[maybe_unused]] bool was_already_released =
+ HasImplicitSlot.exchange(true, std::memory_order_release);
+ assert(!was_already_released && "Implicit slot released twice");
+ return;
+ }
+
+ // Release the slot by incrementing the semaphore count.
+ (void)::ReleaseSemaphore((HANDLE)Semaphore, 1, NULL);
+}
+} // namespace llvm
diff --git a/llvm/lib/TableGen/Error.cpp b/llvm/lib/TableGen/Error.cpp
index de0c4c9..3ba2c6c 100644
--- a/llvm/lib/TableGen/Error.cpp
+++ b/llvm/lib/TableGen/Error.cpp
@@ -19,10 +19,10 @@
#include "llvm/TableGen/Record.h"
#include <cstdlib>
-namespace llvm {
+using namespace llvm;
-SourceMgr SrcMgr;
-unsigned ErrorsPrinted = 0;
+SourceMgr llvm::SrcMgr;
+unsigned llvm::ErrorsPrinted = 0;
static void PrintMessage(ArrayRef<SMLoc> Locs, SourceMgr::DiagKind Kind,
const Twine &Msg) {
@@ -49,118 +49,118 @@ static void PrintMessage(ArrayRef<SMLoc> Locs, SourceMgr::DiagKind Kind,
// Functions to print notes.
-void PrintNote(const Twine &Msg) {
- WithColor::note() << Msg << "\n";
-}
+void llvm::PrintNote(const Twine &Msg) { WithColor::note() << Msg << "\n"; }
-void PrintNote(function_ref<void(raw_ostream &OS)> PrintMsg) {
+void llvm::PrintNote(function_ref<void(raw_ostream &OS)> PrintMsg) {
PrintMsg(WithColor::note());
}
-void PrintNote(ArrayRef<SMLoc> NoteLoc, const Twine &Msg) {
+void llvm::PrintNote(ArrayRef<SMLoc> NoteLoc, const Twine &Msg) {
PrintMessage(NoteLoc, SourceMgr::DK_Note, Msg);
}
// Functions to print fatal notes.
-void PrintFatalNote(const Twine &Msg) {
+void llvm::PrintFatalNote(const Twine &Msg) {
PrintNote(Msg);
fatal_exit();
}
-void PrintFatalNote(ArrayRef<SMLoc> NoteLoc, const Twine &Msg) {
+void llvm::PrintFatalNote(ArrayRef<SMLoc> NoteLoc, const Twine &Msg) {
PrintNote(NoteLoc, Msg);
fatal_exit();
}
// This method takes a Record and uses the source location
// stored in it.
-void PrintFatalNote(const Record *Rec, const Twine &Msg) {
+void llvm::PrintFatalNote(const Record *Rec, const Twine &Msg) {
PrintNote(Rec->getLoc(), Msg);
fatal_exit();
}
// This method takes a RecordVal and uses the source location
// stored in it.
-void PrintFatalNote(const RecordVal *RecVal, const Twine &Msg) {
+void llvm::PrintFatalNote(const RecordVal *RecVal, const Twine &Msg) {
PrintNote(RecVal->getLoc(), Msg);
fatal_exit();
}
// Functions to print warnings.
-void PrintWarning(const Twine &Msg) { WithColor::warning() << Msg << "\n"; }
+void llvm::PrintWarning(const Twine &Msg) {
+ WithColor::warning() << Msg << "\n";
+}
-void PrintWarning(ArrayRef<SMLoc> WarningLoc, const Twine &Msg) {
+void llvm::PrintWarning(ArrayRef<SMLoc> WarningLoc, const Twine &Msg) {
PrintMessage(WarningLoc, SourceMgr::DK_Warning, Msg);
}
-void PrintWarning(const char *Loc, const Twine &Msg) {
+void llvm::PrintWarning(const char *Loc, const Twine &Msg) {
SrcMgr.PrintMessage(SMLoc::getFromPointer(Loc), SourceMgr::DK_Warning, Msg);
}
// Functions to print errors.
-void PrintError(const Twine &Msg) { WithColor::error() << Msg << "\n"; }
+void llvm::PrintError(const Twine &Msg) { WithColor::error() << Msg << "\n"; }
-void PrintError(function_ref<void(raw_ostream &OS)> PrintMsg) {
+void llvm::PrintError(function_ref<void(raw_ostream &OS)> PrintMsg) {
PrintMsg(WithColor::error());
}
-void PrintError(ArrayRef<SMLoc> ErrorLoc, const Twine &Msg) {
+void llvm::PrintError(ArrayRef<SMLoc> ErrorLoc, const Twine &Msg) {
PrintMessage(ErrorLoc, SourceMgr::DK_Error, Msg);
}
-void PrintError(const char *Loc, const Twine &Msg) {
+void llvm::PrintError(const char *Loc, const Twine &Msg) {
SrcMgr.PrintMessage(SMLoc::getFromPointer(Loc), SourceMgr::DK_Error, Msg);
}
// This method takes a Record and uses the source location
// stored in it.
-void PrintError(const Record *Rec, const Twine &Msg) {
+void llvm::PrintError(const Record *Rec, const Twine &Msg) {
PrintMessage(Rec->getLoc(), SourceMgr::DK_Error, Msg);
}
// This method takes a RecordVal and uses the source location
// stored in it.
-void PrintError(const RecordVal *RecVal, const Twine &Msg) {
+void llvm::PrintError(const RecordVal *RecVal, const Twine &Msg) {
PrintMessage(RecVal->getLoc(), SourceMgr::DK_Error, Msg);
}
// Functions to print fatal errors.
-void PrintFatalError(const Twine &Msg) {
+void llvm::PrintFatalError(const Twine &Msg) {
PrintError(Msg);
fatal_exit();
}
-void PrintFatalError(function_ref<void(raw_ostream &OS)> PrintMsg) {
+void llvm::PrintFatalError(function_ref<void(raw_ostream &OS)> PrintMsg) {
PrintError(PrintMsg);
fatal_exit();
}
-void PrintFatalError(ArrayRef<SMLoc> ErrorLoc, const Twine &Msg) {
+void llvm::PrintFatalError(ArrayRef<SMLoc> ErrorLoc, const Twine &Msg) {
PrintError(ErrorLoc, Msg);
fatal_exit();
}
// This method takes a Record and uses the source location
// stored in it.
-void PrintFatalError(const Record *Rec, const Twine &Msg) {
+void llvm::PrintFatalError(const Record *Rec, const Twine &Msg) {
PrintError(Rec->getLoc(), Msg);
fatal_exit();
}
// This method takes a RecordVal and uses the source location
// stored in it.
-void PrintFatalError(const RecordVal *RecVal, const Twine &Msg) {
+void llvm::PrintFatalError(const RecordVal *RecVal, const Twine &Msg) {
PrintError(RecVal->getLoc(), Msg);
fatal_exit();
}
// Check an assertion: Obtain the condition value and be sure it is true.
// If not, print a nonfatal error along with the message.
-bool CheckAssert(SMLoc Loc, const Init *Condition, const Init *Message) {
+bool llvm::CheckAssert(SMLoc Loc, const Init *Condition, const Init *Message) {
auto *CondValue = dyn_cast_or_null<IntInit>(Condition->convertInitializerTo(
IntRecTy::get(Condition->getRecordKeeper())));
if (!CondValue) {
@@ -178,11 +178,9 @@ bool CheckAssert(SMLoc Loc, const Init *Condition, const Init *Message) {
}
// Dump a message to stderr.
-void dumpMessage(SMLoc Loc, const Init *Message) {
+void llvm::dumpMessage(SMLoc Loc, const Init *Message) {
if (auto *MessageInit = dyn_cast<StringInit>(Message))
PrintNote(Loc, MessageInit->getValue());
else
PrintError(Loc, "dump value is not of type string");
}
-
-} // end namespace llvm
diff --git a/llvm/lib/TableGen/Main.cpp b/llvm/lib/TableGen/Main.cpp
index f545706..42043f7 100644
--- a/llvm/lib/TableGen/Main.cpp
+++ b/llvm/lib/TableGen/Main.cpp
@@ -64,14 +64,12 @@ WriteIfChanged("write-if-changed", cl::desc("Only write output if it changed"));
static cl::opt<bool>
TimePhases("time-phases", cl::desc("Time phases of parser and backend"));
-namespace llvm {
-cl::opt<bool> EmitLongStrLiterals(
+cl::opt<bool> llvm::EmitLongStrLiterals(
"long-string-literals",
cl::desc("when emitting large string tables, prefer string literals over "
"comma-separated char literals. This can be a readability and "
"compile-time performance win, but upsets some compilers"),
cl::Hidden, cl::init(true));
-} // end namespace llvm
static cl::opt<bool> NoWarnOnUnusedTemplateArgs(
"no-warn-on-unused-template-args",
diff --git a/llvm/lib/TableGen/Record.cpp b/llvm/lib/TableGen/Record.cpp
index 051a896..2ea3a24 100644
--- a/llvm/lib/TableGen/Record.cpp
+++ b/llvm/lib/TableGen/Record.cpp
@@ -46,8 +46,7 @@ using namespace llvm;
// Context
//===----------------------------------------------------------------------===//
-namespace llvm {
-namespace detail {
+namespace llvm::detail {
/// This class represents the internal implementation of the RecordKeeper.
/// It contains all of the contextual static state of the Record classes. It is
/// kept out-of-line to simplify dependencies, and also make it easier for
@@ -100,8 +99,7 @@ struct RecordKeeperImpl {
void dumpAllocationStats(raw_ostream &OS) const;
};
-} // namespace detail
-} // namespace llvm
+} // namespace llvm::detail
void detail::RecordKeeperImpl::dumpAllocationStats(raw_ostream &OS) const {
// Dump memory allocation related stats.
diff --git a/llvm/lib/TableGen/TGParser.cpp b/llvm/lib/TableGen/TGParser.cpp
index f928ded..3d31d8e 100644
--- a/llvm/lib/TableGen/TGParser.cpp
+++ b/llvm/lib/TableGen/TGParser.cpp
@@ -31,8 +31,6 @@ using namespace llvm;
// Support Code for the Semantic Actions.
//===----------------------------------------------------------------------===//
-namespace llvm {
-
RecordsEntry::RecordsEntry(std::unique_ptr<Record> Rec) : Rec(std::move(Rec)) {}
RecordsEntry::RecordsEntry(std::unique_ptr<ForeachLoop> Loop)
: Loop(std::move(Loop)) {}
@@ -41,6 +39,7 @@ RecordsEntry::RecordsEntry(std::unique_ptr<Record::AssertionInfo> Assertion)
RecordsEntry::RecordsEntry(std::unique_ptr<Record::DumpInfo> Dump)
: Dump(std::move(Dump)) {}
+namespace llvm {
struct SubClassReference {
SMRange RefRange;
const Record *Rec = nullptr;
@@ -61,6 +60,7 @@ struct SubMultiClassReference {
bool isInvalid() const { return MC == nullptr; }
void dump() const;
};
+} // end namespace llvm
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void SubMultiClassReference::dump() const {
@@ -74,8 +74,6 @@ LLVM_DUMP_METHOD void SubMultiClassReference::dump() const {
}
#endif
-} // end namespace llvm
-
static bool checkBitsConcrete(Record &R, const RecordVal &RV) {
const auto *BV = cast<BitsInit>(RV.getValue());
for (unsigned i = 0, e = BV->getNumBits(); i != e; ++i) {
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 70d5ad7d..dc8e7c8 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -16461,7 +16461,7 @@ SDValue AArch64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op,
if (isVShiftLImm(Op.getOperand(1), VT, false, Cnt) && Cnt < EltSize)
return DAG.getNode(AArch64ISD::VSHL, DL, VT, Op.getOperand(0),
- DAG.getConstant(Cnt, DL, MVT::i32));
+ DAG.getTargetConstant(Cnt, DL, MVT::i32));
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
DAG.getConstant(Intrinsic::aarch64_neon_ushl, DL,
MVT::i32),
@@ -16491,7 +16491,8 @@ SDValue AArch64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op,
unsigned Opc =
(Op.getOpcode() == ISD::SRA) ? AArch64ISD::VASHR : AArch64ISD::VLSHR;
return DAG.getNode(Opc, DL, VT, Op.getOperand(0),
- DAG.getConstant(Cnt, DL, MVT::i32), Op->getFlags());
+ DAG.getTargetConstant(Cnt, DL, MVT::i32),
+ Op->getFlags());
}
// Right shift register. Note, there is not a shift right register
@@ -19973,7 +19974,7 @@ static SDValue performFpToIntCombine(SDNode *N, SelectionDAG &DAG,
SDValue FixConv =
DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ResTy,
DAG.getConstant(IntrinsicOpcode, DL, MVT::i32),
- Op->getOperand(0), DAG.getConstant(C, DL, MVT::i32));
+ Op->getOperand(0), DAG.getTargetConstant(C, DL, MVT::i32));
// We can handle smaller integers by generating an extra trunc.
if (IntBits < FloatBits)
FixConv = DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), FixConv);
@@ -20696,7 +20697,7 @@ static SDValue performConcatVectorsCombine(SDNode *N,
N100 = DAG.getNode(AArch64ISD::NVCAST, DL, VT, N100);
SDValue Uzp = DAG.getNode(AArch64ISD::UZP2, DL, VT, N000, N100);
SDValue NewShiftConstant =
- DAG.getConstant(N001ConstVal - NScalarSize, DL, MVT::i32);
+ DAG.getTargetConstant(N001ConstVal - NScalarSize, DL, MVT::i32);
return DAG.getNode(AArch64ISD::VLSHR, DL, VT, Uzp, NewShiftConstant);
}
@@ -22373,14 +22374,14 @@ static SDValue tryCombineShiftImm(unsigned IID, SDNode *N, SelectionDAG &DAG) {
if (IsRightShift && ShiftAmount <= -1 && ShiftAmount >= -(int)ElemBits) {
Op = DAG.getNode(Opcode, DL, VT, Op,
- DAG.getSignedConstant(-ShiftAmount, DL, MVT::i32));
+ DAG.getSignedConstant(-ShiftAmount, DL, MVT::i32, true));
if (N->getValueType(0) == MVT::i64)
Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, Op,
DAG.getConstant(0, DL, MVT::i64));
return Op;
} else if (!IsRightShift && ShiftAmount >= 0 && ShiftAmount < ElemBits) {
Op = DAG.getNode(Opcode, DL, VT, Op,
- DAG.getConstant(ShiftAmount, DL, MVT::i32));
+ DAG.getTargetConstant(ShiftAmount, DL, MVT::i32));
if (N->getValueType(0) == MVT::i64)
Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, Op,
DAG.getConstant(0, DL, MVT::i64));
@@ -23198,7 +23199,7 @@ static SDValue performZExtUZPCombine(SDNode *N, SelectionDAG &DAG) {
Op.getOperand(ExtOffset == 0 ? 0 : 1));
if (Shift != 0)
BC = DAG.getNode(AArch64ISD::VLSHR, DL, VT, BC,
- DAG.getConstant(Shift, DL, MVT::i32));
+ DAG.getTargetConstant(Shift, DL, MVT::i32));
return DAG.getNode(ISD::AND, DL, VT, BC, DAG.getConstant(Mask, DL, VT));
}
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index 6ef0a95..09ce713 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -812,49 +812,49 @@ def fixedpoint_recip_f16_i64 : fixedpoint_recip_i64<f16>;
def fixedpoint_recip_f32_i64 : fixedpoint_recip_i64<f32>;
def fixedpoint_recip_f64_i64 : fixedpoint_recip_i64<f64>;
-def vecshiftR8 : Operand<i32>, ImmLeaf<i32, [{
+def vecshiftR8 : Operand<i32>, TImmLeaf<i32, [{
return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 9);
}]> {
let EncoderMethod = "getVecShiftR8OpValue";
let DecoderMethod = "DecodeVecShiftR8Imm";
let ParserMatchClass = Imm1_8Operand;
}
-def vecshiftR16 : Operand<i32>, ImmLeaf<i32, [{
+def vecshiftR16 : Operand<i32>, TImmLeaf<i32, [{
return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 17);
}]> {
let EncoderMethod = "getVecShiftR16OpValue";
let DecoderMethod = "DecodeVecShiftR16Imm";
let ParserMatchClass = Imm1_16Operand;
}
-def vecshiftR16Narrow : Operand<i32>, ImmLeaf<i32, [{
+def vecshiftR16Narrow : Operand<i32>, TImmLeaf<i32, [{
return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 9);
}]> {
let EncoderMethod = "getVecShiftR16OpValue";
let DecoderMethod = "DecodeVecShiftR16ImmNarrow";
let ParserMatchClass = Imm1_8Operand;
}
-def vecshiftR32 : Operand<i32>, ImmLeaf<i32, [{
+def vecshiftR32 : Operand<i32>, TImmLeaf<i32, [{
return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 33);
}]> {
let EncoderMethod = "getVecShiftR32OpValue";
let DecoderMethod = "DecodeVecShiftR32Imm";
let ParserMatchClass = Imm1_32Operand;
}
-def vecshiftR32Narrow : Operand<i32>, ImmLeaf<i32, [{
+def vecshiftR32Narrow : Operand<i32>, TImmLeaf<i32, [{
return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 17);
}]> {
let EncoderMethod = "getVecShiftR32OpValue";
let DecoderMethod = "DecodeVecShiftR32ImmNarrow";
let ParserMatchClass = Imm1_16Operand;
}
-def vecshiftR64 : Operand<i32>, ImmLeaf<i32, [{
+def vecshiftR64 : Operand<i32>, TImmLeaf<i32, [{
return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 65);
}]> {
let EncoderMethod = "getVecShiftR64OpValue";
let DecoderMethod = "DecodeVecShiftR64Imm";
let ParserMatchClass = Imm1_64Operand;
}
-def vecshiftR64Narrow : Operand<i32>, ImmLeaf<i32, [{
+def vecshiftR64Narrow : Operand<i32>, TImmLeaf<i32, [{
return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 33);
}]> {
let EncoderMethod = "getVecShiftR64OpValue";
@@ -862,37 +862,6 @@ def vecshiftR64Narrow : Operand<i32>, ImmLeaf<i32, [{
let ParserMatchClass = Imm1_32Operand;
}
-// Same as vecshiftR#N, but use TargetConstant (TimmLeaf) instead of Constant
-// (ImmLeaf)
-def tvecshiftR8 : Operand<i32>, TImmLeaf<i32, [{
- return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 9);
-}]> {
- let EncoderMethod = "getVecShiftR8OpValue";
- let DecoderMethod = "DecodeVecShiftR8Imm";
- let ParserMatchClass = Imm1_8Operand;
-}
-def tvecshiftR16 : Operand<i32>, TImmLeaf<i32, [{
- return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 17);
-}]> {
- let EncoderMethod = "getVecShiftR16OpValue";
- let DecoderMethod = "DecodeVecShiftR16Imm";
- let ParserMatchClass = Imm1_16Operand;
-}
-def tvecshiftR32 : Operand<i32>, TImmLeaf<i32, [{
- return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 33);
-}]> {
- let EncoderMethod = "getVecShiftR32OpValue";
- let DecoderMethod = "DecodeVecShiftR32Imm";
- let ParserMatchClass = Imm1_32Operand;
-}
-def tvecshiftR64 : Operand<i32>, TImmLeaf<i32, [{
- return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 65);
-}]> {
- let EncoderMethod = "getVecShiftR64OpValue";
- let DecoderMethod = "DecodeVecShiftR64Imm";
- let ParserMatchClass = Imm1_64Operand;
-}
-
def Imm0_0Operand : AsmImmRange<0, 0>;
def Imm0_1Operand : AsmImmRange<0, 1>;
def Imm1_1Operand : AsmImmRange<1, 1>;
@@ -904,28 +873,28 @@ def Imm0_15Operand : AsmImmRange<0, 15>;
def Imm0_31Operand : AsmImmRange<0, 31>;
def Imm0_63Operand : AsmImmRange<0, 63>;
-def vecshiftL8 : Operand<i32>, ImmLeaf<i32, [{
+def vecshiftL8 : Operand<i32>, TImmLeaf<i32, [{
return (((uint32_t)Imm) < 8);
}]> {
let EncoderMethod = "getVecShiftL8OpValue";
let DecoderMethod = "DecodeVecShiftL8Imm";
let ParserMatchClass = Imm0_7Operand;
}
-def vecshiftL16 : Operand<i32>, ImmLeaf<i32, [{
+def vecshiftL16 : Operand<i32>, TImmLeaf<i32, [{
return (((uint32_t)Imm) < 16);
}]> {
let EncoderMethod = "getVecShiftL16OpValue";
let DecoderMethod = "DecodeVecShiftL16Imm";
let ParserMatchClass = Imm0_15Operand;
}
-def vecshiftL32 : Operand<i32>, ImmLeaf<i32, [{
+def vecshiftL32 : Operand<i32>, TImmLeaf<i32, [{
return (((uint32_t)Imm) < 32);
}]> {
let EncoderMethod = "getVecShiftL32OpValue";
let DecoderMethod = "DecodeVecShiftL32Imm";
let ParserMatchClass = Imm0_31Operand;
}
-def vecshiftL64 : Operand<i32>, ImmLeaf<i32, [{
+def vecshiftL64 : Operand<i32>, TImmLeaf<i32, [{
return (((uint32_t)Imm) < 64);
}]> {
let EncoderMethod = "getVecShiftL64OpValue";
@@ -933,36 +902,6 @@ def vecshiftL64 : Operand<i32>, ImmLeaf<i32, [{
let ParserMatchClass = Imm0_63Operand;
}
-// Same as vecshiftL#N, but use TargetConstant (TimmLeaf) instead of Constant
-// (ImmLeaf)
-def tvecshiftL8 : Operand<i32>, TImmLeaf<i32, [{
- return (((uint32_t)Imm) < 8);
-}]> {
- let EncoderMethod = "getVecShiftL8OpValue";
- let DecoderMethod = "DecodeVecShiftL8Imm";
- let ParserMatchClass = Imm0_7Operand;
-}
-def tvecshiftL16 : Operand<i32>, TImmLeaf<i32, [{
- return (((uint32_t)Imm) < 16);
-}]> {
- let EncoderMethod = "getVecShiftL16OpValue";
- let DecoderMethod = "DecodeVecShiftL16Imm";
- let ParserMatchClass = Imm0_15Operand;
-}
-def tvecshiftL32 : Operand<i32>, TImmLeaf<i32, [{
- return (((uint32_t)Imm) < 32);
-}]> {
- let EncoderMethod = "getVecShiftL32OpValue";
- let DecoderMethod = "DecodeVecShiftL32Imm";
- let ParserMatchClass = Imm0_31Operand;
-}
-def tvecshiftL64 : Operand<i32>, TImmLeaf<i32, [{
- return (((uint32_t)Imm) < 64);
-}]> {
- let EncoderMethod = "getVecShiftL64OpValue";
- let DecoderMethod = "DecodeVecShiftL64Imm";
- let ParserMatchClass = Imm0_63Operand;
-}
// Crazy immediate formats used by 32-bit and 64-bit logical immediate
// instructions for splatting repeating bit patterns across the immediate.
@@ -10232,7 +10171,7 @@ multiclass SIMDVectorRShiftSD<bit U, bits<5> opc, string asm,
def v4i16_shift : BaseSIMDVectorShift<0, U, opc, {0,0,1,?,?,?,?},
V64, V64, vecshiftR16,
asm, ".4h", ".4h",
- [(set (v4i16 V64:$Rd), (OpNode (v4f16 V64:$Rn), (i32 imm:$imm)))]> {
+ [(set (v4i16 V64:$Rd), (OpNode (v4f16 V64:$Rn), (i32 vecshiftR16:$imm)))]> {
bits<4> imm;
let Inst{19-16} = imm;
}
@@ -10240,15 +10179,16 @@ multiclass SIMDVectorRShiftSD<bit U, bits<5> opc, string asm,
def v8i16_shift : BaseSIMDVectorShift<1, U, opc, {0,0,1,?,?,?,?},
V128, V128, vecshiftR16,
asm, ".8h", ".8h",
- [(set (v8i16 V128:$Rd), (OpNode (v8f16 V128:$Rn), (i32 imm:$imm)))]> {
+ [(set (v8i16 V128:$Rd), (OpNode (v8f16 V128:$Rn), (i32 vecshiftR16:$imm)))]> {
bits<4> imm;
let Inst{19-16} = imm;
}
} // Predicates = [HasNEON, HasFullFP16]
+
def v2i32_shift : BaseSIMDVectorShift<0, U, opc, {0,1,?,?,?,?,?},
V64, V64, vecshiftR32,
asm, ".2s", ".2s",
- [(set (v2i32 V64:$Rd), (OpNode (v2f32 V64:$Rn), (i32 imm:$imm)))]> {
+ [(set (v2i32 V64:$Rd), (OpNode (v2f32 V64:$Rn), (i32 vecshiftR32:$imm)))]> {
bits<5> imm;
let Inst{20-16} = imm;
}
@@ -10256,7 +10196,7 @@ multiclass SIMDVectorRShiftSD<bit U, bits<5> opc, string asm,
def v4i32_shift : BaseSIMDVectorShift<1, U, opc, {0,1,?,?,?,?,?},
V128, V128, vecshiftR32,
asm, ".4s", ".4s",
- [(set (v4i32 V128:$Rd), (OpNode (v4f32 V128:$Rn), (i32 imm:$imm)))]> {
+ [(set (v4i32 V128:$Rd), (OpNode (v4f32 V128:$Rn), (i32 vecshiftR32:$imm)))]> {
bits<5> imm;
let Inst{20-16} = imm;
}
@@ -10264,7 +10204,7 @@ multiclass SIMDVectorRShiftSD<bit U, bits<5> opc, string asm,
def v2i64_shift : BaseSIMDVectorShift<1, U, opc, {1,?,?,?,?,?,?},
V128, V128, vecshiftR64,
asm, ".2d", ".2d",
- [(set (v2i64 V128:$Rd), (OpNode (v2f64 V128:$Rn), (i32 imm:$imm)))]> {
+ [(set (v2i64 V128:$Rd), (OpNode (v2f64 V128:$Rn), (i32 vecshiftR64:$imm)))]> {
bits<6> imm;
let Inst{21-16} = imm;
}
@@ -10276,7 +10216,7 @@ multiclass SIMDVectorRShiftToFP<bit U, bits<5> opc, string asm,
def v4i16_shift : BaseSIMDVectorShift<0, U, opc, {0,0,1,?,?,?,?},
V64, V64, vecshiftR16,
asm, ".4h", ".4h",
- [(set (v4f16 V64:$Rd), (OpNode (v4i16 V64:$Rn), (i32 imm:$imm)))]> {
+ [(set (v4f16 V64:$Rd), (OpNode (v4i16 V64:$Rn), (i32 vecshiftR16:$imm)))]> {
bits<4> imm;
let Inst{19-16} = imm;
}
@@ -10284,7 +10224,7 @@ multiclass SIMDVectorRShiftToFP<bit U, bits<5> opc, string asm,
def v8i16_shift : BaseSIMDVectorShift<1, U, opc, {0,0,1,?,?,?,?},
V128, V128, vecshiftR16,
asm, ".8h", ".8h",
- [(set (v8f16 V128:$Rd), (OpNode (v8i16 V128:$Rn), (i32 imm:$imm)))]> {
+ [(set (v8f16 V128:$Rd), (OpNode (v8i16 V128:$Rn), (i32 vecshiftR16:$imm)))]> {
bits<4> imm;
let Inst{19-16} = imm;
}
@@ -10293,7 +10233,7 @@ multiclass SIMDVectorRShiftToFP<bit U, bits<5> opc, string asm,
def v2i32_shift : BaseSIMDVectorShift<0, U, opc, {0,1,?,?,?,?,?},
V64, V64, vecshiftR32,
asm, ".2s", ".2s",
- [(set (v2f32 V64:$Rd), (OpNode (v2i32 V64:$Rn), (i32 imm:$imm)))]> {
+ [(set (v2f32 V64:$Rd), (OpNode (v2i32 V64:$Rn), (i32 vecshiftR32:$imm)))]> {
bits<5> imm;
let Inst{20-16} = imm;
}
@@ -10301,7 +10241,7 @@ multiclass SIMDVectorRShiftToFP<bit U, bits<5> opc, string asm,
def v4i32_shift : BaseSIMDVectorShift<1, U, opc, {0,1,?,?,?,?,?},
V128, V128, vecshiftR32,
asm, ".4s", ".4s",
- [(set (v4f32 V128:$Rd), (OpNode (v4i32 V128:$Rn), (i32 imm:$imm)))]> {
+ [(set (v4f32 V128:$Rd), (OpNode (v4i32 V128:$Rn), (i32 vecshiftR32:$imm)))]> {
bits<5> imm;
let Inst{20-16} = imm;
}
@@ -10309,7 +10249,7 @@ multiclass SIMDVectorRShiftToFP<bit U, bits<5> opc, string asm,
def v2i64_shift : BaseSIMDVectorShift<1, U, opc, {1,?,?,?,?,?,?},
V128, V128, vecshiftR64,
asm, ".2d", ".2d",
- [(set (v2f64 V128:$Rd), (OpNode (v2i64 V128:$Rn), (i32 imm:$imm)))]> {
+ [(set (v2f64 V128:$Rd), (OpNode (v2i64 V128:$Rn), (i32 vecshiftR64:$imm)))]> {
bits<6> imm;
let Inst{21-16} = imm;
}
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 36c9cb6..bc6b931 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -1010,6 +1010,36 @@ let Predicates = [HasSVE_or_SME] in {
defm SEL_ZPZZ : sve_int_sel_vvv<"sel", vselect>;
defm SPLICE_ZPZ : sve_int_perm_splice<"splice", AArch64splice>;
+
+ // mul x (splat -1) -> neg x
+ def : Pat<(nxv16i8 (AArch64mul_m1 nxv16i1:$Op1, nxv16i8:$Op2, (nxv16i8 (splat_vector (i32 -1))))),
+ (NEG_ZPmZ_B $Op2, $Op1, $Op2)>;
+ def : Pat<(nxv8i16 (AArch64mul_m1 nxv8i1:$Op1, nxv8i16:$Op2, (nxv8i16 (splat_vector (i32 -1))))),
+ (NEG_ZPmZ_H $Op2, $Op1, $Op2)>;
+ def : Pat<(nxv4i32 (AArch64mul_m1 nxv4i1:$Op1, nxv4i32:$Op2, (nxv4i32 (splat_vector (i32 -1))))),
+ (NEG_ZPmZ_S $Op2, $Op1, $Op2)>;
+ def : Pat<(nxv2i64 (AArch64mul_m1 nxv2i1:$Op1, nxv2i64:$Op2, (nxv2i64 (splat_vector (i64 -1))))),
+ (NEG_ZPmZ_D $Op2, $Op1, $Op2)>;
+
+ let AddedComplexity = 5 in {
+ def : Pat<(nxv16i8 (AArch64mul_p nxv16i1:$Op1, nxv16i8:$Op2, (nxv16i8 (splat_vector (i32 -1))))),
+ (NEG_ZPmZ_B_UNDEF $Op2, $Op1, $Op2)>;
+ def : Pat<(nxv8i16 (AArch64mul_p nxv8i1:$Op1, nxv8i16:$Op2, (nxv8i16 (splat_vector (i32 -1))))),
+ (NEG_ZPmZ_H_UNDEF $Op2, $Op1, $Op2)>;
+ def : Pat<(nxv4i32 (AArch64mul_p nxv4i1:$Op1, nxv4i32:$Op2, (nxv4i32 (splat_vector (i32 -1))))),
+ (NEG_ZPmZ_S_UNDEF $Op2, $Op1, $Op2)>;
+ def : Pat<(nxv2i64 (AArch64mul_p nxv2i1:$Op1, nxv2i64:$Op2, (nxv2i64 (splat_vector (i64 -1))))),
+ (NEG_ZPmZ_D_UNDEF $Op2, $Op1, $Op2)>;
+ }
+
+ def : Pat<(nxv16i8 (AArch64mul_m1 nxv16i1:$Op1, (nxv16i8 (splat_vector (i32 -1))), nxv16i8:$Op2)),
+ (NEG_ZPmZ_B (DUP_ZI_B -1, 0), $Op1, $Op2)>;
+ def : Pat<(nxv8i16 (AArch64mul_m1 nxv8i1:$Op1, (nxv8i16 (splat_vector (i32 -1))), nxv8i16:$Op2)),
+ (NEG_ZPmZ_H (DUP_ZI_H -1, 0), $Op1, $Op2)>;
+ def : Pat<(nxv4i32 (AArch64mul_m1 nxv4i1:$Op1, (nxv4i32 (splat_vector (i32 -1))), nxv4i32:$Op2)),
+ (NEG_ZPmZ_S (DUP_ZI_S -1, 0), $Op1, $Op2)>;
+ def : Pat<(nxv2i64 (AArch64mul_m1 nxv2i1:$Op1, (nxv2i64 (splat_vector (i64 -1))), nxv2i64:$Op2)),
+ (NEG_ZPmZ_D (DUP_ZI_D -1, 0), $Op1, $Op2)>;
} // End HasSVE_or_SME
// COMPACT - word and doubleword
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 96cc3f3..3e55b76 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -2957,9 +2957,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
AtomicOrdering Order = LdSt.getMMO().getSuccessOrdering();
// Need special instructions for atomics that affect ordering.
- if (Order != AtomicOrdering::NotAtomic &&
- Order != AtomicOrdering::Unordered &&
- Order != AtomicOrdering::Monotonic) {
+ if (isStrongerThanMonotonic(Order)) {
assert(!isa<GZExtLoad>(LdSt));
assert(MemSizeInBytes <= 8 &&
"128-bit atomics should already be custom-legalized");
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
index 6025f1c..63313da 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
@@ -556,8 +556,7 @@ void applyVAshrLshrImm(MachineInstr &MI, MachineRegisterInfo &MRI,
unsigned NewOpc =
Opc == TargetOpcode::G_ASHR ? AArch64::G_VASHR : AArch64::G_VLSHR;
MachineIRBuilder MIB(MI);
- auto ImmDef = MIB.buildConstant(LLT::scalar(32), Imm);
- MIB.buildInstr(NewOpc, {MI.getOperand(0)}, {MI.getOperand(1), ImmDef});
+ MIB.buildInstr(NewOpc, {MI.getOperand(0)}, {MI.getOperand(1)}).addImm(Imm);
MI.eraseFromParent();
}
diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td
index 539470d..be44b8f 100644
--- a/llvm/lib/Target/AArch64/SMEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td
@@ -4967,7 +4967,7 @@ multiclass sme2_movaz_array_to_vec_vg4_multi<string mnemonic> {
//===----------------------------------------------------------------------===//
// SME2 multi-vec saturating shift right narrow
class sme2_sat_shift_vector_vg2<string mnemonic, bit op, bit u>
- : I<(outs ZPR16:$Zd), (ins ZZ_s_mul_r:$Zn, tvecshiftR16:$imm4),
+ : I<(outs ZPR16:$Zd), (ins ZZ_s_mul_r:$Zn, vecshiftR16:$imm4),
mnemonic, "\t$Zd, $Zn, $imm4",
"", []>, Sched<[]> {
bits<4> imm4;
@@ -4985,7 +4985,7 @@ class sme2_sat_shift_vector_vg2<string mnemonic, bit op, bit u>
multiclass sme2_sat_shift_vector_vg2<string mnemonic, bit op, bit u, SDPatternOperator intrinsic> {
def _H : sme2_sat_shift_vector_vg2<mnemonic, op, u>;
- def : SME2_Sat_Shift_VG2_Pat<NAME # _H, intrinsic, nxv8i16, nxv4i32, tvecshiftR16>;
+ def : SME2_Sat_Shift_VG2_Pat<NAME # _H, intrinsic, nxv8i16, nxv4i32, vecshiftR16>;
}
class sme2_sat_shift_vector_vg4<bits<2> sz, bits<3> op, ZPRRegOp zpr_ty,
@@ -5008,20 +5008,20 @@ class sme2_sat_shift_vector_vg4<bits<2> sz, bits<3> op, ZPRRegOp zpr_ty,
}
multiclass sme2_sat_shift_vector_vg4<string mnemonic, bits<3> op, SDPatternOperator intrinsic> {
- def _B : sme2_sat_shift_vector_vg4<{0,1}, op, ZPR8, ZZZZ_s_mul_r, tvecshiftR32,
+ def _B : sme2_sat_shift_vector_vg4<{0,1}, op, ZPR8, ZZZZ_s_mul_r, vecshiftR32,
mnemonic>{
bits<5> imm;
let Inst{20-16} = imm;
}
- def _H : sme2_sat_shift_vector_vg4<{1,?}, op, ZPR16, ZZZZ_d_mul_r, tvecshiftR64,
+ def _H : sme2_sat_shift_vector_vg4<{1,?}, op, ZPR16, ZZZZ_d_mul_r, vecshiftR64,
mnemonic> {
bits<6> imm;
let Inst{22} = imm{5};
let Inst{20-16} = imm{4-0};
}
- def : SME2_Sat_Shift_VG4_Pat<NAME # _B, intrinsic, nxv16i8, nxv4i32, tvecshiftR32>;
- def : SME2_Sat_Shift_VG4_Pat<NAME # _H, intrinsic, nxv8i16, nxv2i64, tvecshiftR64>;
+ def : SME2_Sat_Shift_VG4_Pat<NAME # _B, intrinsic, nxv16i8, nxv4i32, vecshiftR32>;
+ def : SME2_Sat_Shift_VG4_Pat<NAME # _H, intrinsic, nxv8i16, nxv2i64, vecshiftR64>;
}
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 9a23c35..3cdd505 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -4436,9 +4436,9 @@ multiclass sve2_bitwise_shift_left_long<bits<2> opc, string asm,
ZPR64, ZPR32, vecshiftL32> {
let Inst{20-19} = imm{4-3};
}
- def : SVE_2_Op_Imm_Pat<nxv8i16, op, nxv16i8, i32, tvecshiftL8, !cast<Instruction>(NAME # _H)>;
- def : SVE_2_Op_Imm_Pat<nxv4i32, op, nxv8i16, i32, tvecshiftL16, !cast<Instruction>(NAME # _S)>;
- def : SVE_2_Op_Imm_Pat<nxv2i64, op, nxv4i32, i32, tvecshiftL32, !cast<Instruction>(NAME # _D)>;
+ def : SVE_2_Op_Imm_Pat<nxv8i16, op, nxv16i8, i32, vecshiftL8, !cast<Instruction>(NAME # _H)>;
+ def : SVE_2_Op_Imm_Pat<nxv4i32, op, nxv8i16, i32, vecshiftL16, !cast<Instruction>(NAME # _S)>;
+ def : SVE_2_Op_Imm_Pat<nxv2i64, op, nxv4i32, i32, vecshiftL32, !cast<Instruction>(NAME # _D)>;
}
//===----------------------------------------------------------------------===//
@@ -4481,10 +4481,10 @@ multiclass sve2_int_bin_shift_imm_left<bit opc, string asm,
let Inst{20-19} = imm{4-3};
}
- def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i8, nxv16i8, i32, tvecshiftL8, !cast<Instruction>(NAME # _B)>;
- def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i16, nxv8i16, i32, tvecshiftL16, !cast<Instruction>(NAME # _H)>;
- def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv4i32, i32, tvecshiftL32, !cast<Instruction>(NAME # _S)>;
- def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i64, nxv2i64, i32, tvecshiftL64, !cast<Instruction>(NAME # _D)>;
+ def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i8, nxv16i8, i32, vecshiftL8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i16, nxv8i16, i32, vecshiftL16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv4i32, i32, vecshiftL32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i64, nxv2i64, i32, vecshiftL64, !cast<Instruction>(NAME # _D)>;
}
multiclass sve2_int_bin_shift_imm_right<bit opc, string asm,
@@ -4501,10 +4501,10 @@ multiclass sve2_int_bin_shift_imm_right<bit opc, string asm,
let Inst{20-19} = imm{4-3};
}
- def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i8, nxv16i8, i32, tvecshiftR8, !cast<Instruction>(NAME # _B)>;
- def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i16, nxv8i16, i32, tvecshiftR16, !cast<Instruction>(NAME # _H)>;
- def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv4i32, i32, tvecshiftR32, !cast<Instruction>(NAME # _S)>;
- def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i64, nxv2i64, i32, tvecshiftR64, !cast<Instruction>(NAME # _D)>;
+ def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i8, nxv16i8, i32, vecshiftR8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i16, nxv8i16, i32, vecshiftR16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv4i32, i32, vecshiftR32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i64, nxv2i64, i32, vecshiftR64, !cast<Instruction>(NAME # _D)>;
}
class sve2_int_bin_accum_shift_imm<bits<4> tsz8_64, bits<2> opc, string asm,
@@ -4546,10 +4546,10 @@ multiclass sve2_int_bin_accum_shift_imm_right<bits<2> opc, string asm,
let Inst{20-19} = imm{4-3};
}
- def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i8, nxv16i8, i32, tvecshiftR8, !cast<Instruction>(NAME # _B)>;
- def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i16, nxv8i16, i32, tvecshiftR16, !cast<Instruction>(NAME # _H)>;
- def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv4i32, i32, tvecshiftR32, !cast<Instruction>(NAME # _S)>;
- def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i64, nxv2i64, i32, tvecshiftR64, !cast<Instruction>(NAME # _D)>;
+ def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i8, nxv16i8, i32, vecshiftR8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i16, nxv8i16, i32, vecshiftR16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv4i32, i32, vecshiftR32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i64, nxv2i64, i32, vecshiftR64, !cast<Instruction>(NAME # _D)>;
def : SVE_Shift_Add_All_Active_Pat<nxv16i8, shift_op, nxv16i1, nxv16i8, nxv16i8, i32, !cast<Instruction>(NAME # _B)>;
def : SVE_Shift_Add_All_Active_Pat<nxv8i16, shift_op, nxv8i1, nxv8i16, nxv8i16, i32, !cast<Instruction>(NAME # _H)>;
@@ -4676,18 +4676,18 @@ class sve2_int_bin_shift_imm_narrow_bottom<bits<3> tsz8_64, bits<3> opc,
multiclass sve2_int_bin_shift_imm_right_narrow_bottom<bits<3> opc, string asm,
SDPatternOperator op> {
def _B : sve2_int_bin_shift_imm_narrow_bottom<{0,0,1}, opc, asm, ZPR8, ZPR16,
- tvecshiftR8>;
+ vecshiftR8>;
def _H : sve2_int_bin_shift_imm_narrow_bottom<{0,1,?}, opc, asm, ZPR16, ZPR32,
- tvecshiftR16> {
+ vecshiftR16> {
let Inst{19} = imm{3};
}
def _S : sve2_int_bin_shift_imm_narrow_bottom<{1,?,?}, opc, asm, ZPR32, ZPR64,
- tvecshiftR32> {
+ vecshiftR32> {
let Inst{20-19} = imm{4-3};
}
- def : SVE_2_Op_Imm_Pat<nxv16i8, op, nxv8i16, i32, tvecshiftR8, !cast<Instruction>(NAME # _B)>;
- def : SVE_2_Op_Imm_Pat<nxv8i16, op, nxv4i32, i32, tvecshiftR16, !cast<Instruction>(NAME # _H)>;
- def : SVE_2_Op_Imm_Pat<nxv4i32, op, nxv2i64, i32, tvecshiftR32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_2_Op_Imm_Pat<nxv16i8, op, nxv8i16, i32, vecshiftR8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_2_Op_Imm_Pat<nxv8i16, op, nxv4i32, i32, vecshiftR16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_2_Op_Imm_Pat<nxv4i32, op, nxv2i64, i32, vecshiftR32, !cast<Instruction>(NAME # _S)>;
}
class sve2_int_bin_shift_imm_narrow_top<bits<3> tsz8_64, bits<3> opc,
@@ -4717,18 +4717,18 @@ class sve2_int_bin_shift_imm_narrow_top<bits<3> tsz8_64, bits<3> opc,
multiclass sve2_int_bin_shift_imm_right_narrow_top<bits<3> opc, string asm,
SDPatternOperator op> {
def _B : sve2_int_bin_shift_imm_narrow_top<{0,0,1}, opc, asm, ZPR8, ZPR16,
- tvecshiftR8>;
+ vecshiftR8>;
def _H : sve2_int_bin_shift_imm_narrow_top<{0,1,?}, opc, asm, ZPR16, ZPR32,
- tvecshiftR16> {
+ vecshiftR16> {
let Inst{19} = imm{3};
}
def _S : sve2_int_bin_shift_imm_narrow_top<{1,?,?}, opc, asm, ZPR32, ZPR64,
- tvecshiftR32> {
+ vecshiftR32> {
let Inst{20-19} = imm{4-3};
}
- def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i8, nxv8i16, i32, tvecshiftR8, !cast<Instruction>(NAME # _B)>;
- def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i16, nxv4i32, i32, tvecshiftR16, !cast<Instruction>(NAME # _H)>;
- def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv2i64, i32, tvecshiftR32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i8, nxv8i16, i32, vecshiftR8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i16, nxv4i32, i32, vecshiftR16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv2i64, i32, vecshiftR32, !cast<Instruction>(NAME # _S)>;
}
class sve2_int_addsub_narrow_high_bottom<bits<2> sz, bits<2> opc, string asm,
@@ -5461,10 +5461,10 @@ multiclass sve2_int_rotate_right_imm<string asm, SDPatternOperator op> {
let Inst{20-19} = imm{4-3};
}
- def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i8, nxv16i8, i32, tvecshiftR8, !cast<Instruction>(NAME # _B)>;
- def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i16, nxv8i16, i32, tvecshiftR16, !cast<Instruction>(NAME # _H)>;
- def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv4i32, i32, tvecshiftR32, !cast<Instruction>(NAME # _S)>;
- def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i64, nxv2i64, i32, tvecshiftR64, !cast<Instruction>(NAME # _D)>;
+ def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i8, nxv16i8, i32, vecshiftR8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i16, nxv8i16, i32, vecshiftR16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv4i32, i32, vecshiftR32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i64, nxv2i64, i32, vecshiftR64, !cast<Instruction>(NAME # _D)>;
}
//===----------------------------------------------------------------------===//
@@ -6443,10 +6443,10 @@ multiclass sve_int_bin_pred_shift_imm_left<bits<4> opc, string asm, string Ps,
let Inst{9-8} = imm{4-3};
}
- def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i1, nxv16i8, i32, tvecshiftL8, !cast<Instruction>(NAME # _B)>;
- def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i1, nxv8i16, i32, tvecshiftL16, !cast<Instruction>(NAME # _H)>;
- def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i1, nxv4i32, i32, tvecshiftL32, !cast<Instruction>(NAME # _S)>;
- def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i1, nxv2i64, i32, tvecshiftL64, !cast<Instruction>(NAME # _D)>;
+ def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i1, nxv16i8, i32, vecshiftL8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i1, nxv8i16, i32, vecshiftL16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i1, nxv4i32, i32, vecshiftL32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i1, nxv2i64, i32, vecshiftL64, !cast<Instruction>(NAME # _D)>;
}
// As above but shift amount takes the form of a "vector immediate".
@@ -6460,15 +6460,15 @@ multiclass sve_int_bin_pred_shift_imm_left_dup<bits<4> opc, string asm,
}
multiclass sve_int_bin_pred_shift_imm_left_zeroing_bhsd<SDPatternOperator op> {
- def _B_ZERO : PredTwoOpImmPseudo<NAME # _B, ZPR8, tvecshiftL8, FalseLanesZero>;
- def _H_ZERO : PredTwoOpImmPseudo<NAME # _H, ZPR16, tvecshiftL16, FalseLanesZero>;
- def _S_ZERO : PredTwoOpImmPseudo<NAME # _S, ZPR32, tvecshiftL32, FalseLanesZero>;
- def _D_ZERO : PredTwoOpImmPseudo<NAME # _D, ZPR64, tvecshiftL64, FalseLanesZero>;
+ def _B_ZERO : PredTwoOpImmPseudo<NAME # _B, ZPR8, vecshiftL8, FalseLanesZero>;
+ def _H_ZERO : PredTwoOpImmPseudo<NAME # _H, ZPR16, vecshiftL16, FalseLanesZero>;
+ def _S_ZERO : PredTwoOpImmPseudo<NAME # _S, ZPR32, vecshiftL32, FalseLanesZero>;
+ def _D_ZERO : PredTwoOpImmPseudo<NAME # _D, ZPR64, vecshiftL64, FalseLanesZero>;
- def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv16i8, op, nxv16i1, nxv16i8, tvecshiftL8, !cast<Pseudo>(NAME # _B_ZERO)>;
- def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv8i16, op, nxv8i1, nxv8i16, tvecshiftL16, !cast<Pseudo>(NAME # _H_ZERO)>;
- def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv4i32, op, nxv4i1, nxv4i32, tvecshiftL32, !cast<Pseudo>(NAME # _S_ZERO)>;
- def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv2i64, op, nxv2i1, nxv2i64, tvecshiftL64, !cast<Pseudo>(NAME # _D_ZERO)>;
+ def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv16i8, op, nxv16i1, nxv16i8, vecshiftL8, !cast<Pseudo>(NAME # _B_ZERO)>;
+ def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv8i16, op, nxv8i1, nxv8i16, vecshiftL16, !cast<Pseudo>(NAME # _H_ZERO)>;
+ def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv4i32, op, nxv4i1, nxv4i32, vecshiftL32, !cast<Pseudo>(NAME # _S_ZERO)>;
+ def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv2i64, op, nxv2i1, nxv2i64, vecshiftL64, !cast<Pseudo>(NAME # _D_ZERO)>;
}
multiclass sve_int_bin_pred_shift_imm_right<bits<4> opc, string asm, string Ps,
@@ -6489,10 +6489,10 @@ multiclass sve_int_bin_pred_shift_imm_right<bits<4> opc, string asm, string Ps,
let Inst{9-8} = imm{4-3};
}
- def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i1, nxv16i8, i32, tvecshiftR8, !cast<Instruction>(NAME # _B)>;
- def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i1, nxv8i16, i32, tvecshiftR16, !cast<Instruction>(NAME # _H)>;
- def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i1, nxv4i32, i32, tvecshiftR32, !cast<Instruction>(NAME # _S)>;
- def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i1, nxv2i64, i32, tvecshiftR64, !cast<Instruction>(NAME # _D)>;
+ def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i1, nxv16i8, i32, vecshiftR8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i1, nxv8i16, i32, vecshiftR16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i1, nxv4i32, i32, vecshiftR32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i1, nxv2i64, i32, vecshiftR64, !cast<Instruction>(NAME # _D)>;
}
// As above but shift amount takes the form of a "vector immediate".
@@ -6511,10 +6511,10 @@ multiclass sve_int_bin_pred_shift_imm_right_zeroing_bhsd<SDPatternOperator op =
def _S_ZERO : PredTwoOpImmPseudo<NAME # _S, ZPR32, vecshiftR32, FalseLanesZero>;
def _D_ZERO : PredTwoOpImmPseudo<NAME # _D, ZPR64, vecshiftR64, FalseLanesZero>;
- def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv16i8, op, nxv16i1, nxv16i8, tvecshiftR8, !cast<Pseudo>(NAME # _B_ZERO)>;
- def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv8i16, op, nxv8i1, nxv8i16, tvecshiftR16, !cast<Pseudo>(NAME # _H_ZERO)>;
- def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv4i32, op, nxv4i1, nxv4i32, tvecshiftR32, !cast<Pseudo>(NAME # _S_ZERO)>;
- def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv2i64, op, nxv2i1, nxv2i64, tvecshiftR64, !cast<Pseudo>(NAME # _D_ZERO)>;
+ def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv16i8, op, nxv16i1, nxv16i8, vecshiftR8, !cast<Pseudo>(NAME # _B_ZERO)>;
+ def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv8i16, op, nxv8i1, nxv8i16, vecshiftR16, !cast<Pseudo>(NAME # _H_ZERO)>;
+ def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv4i32, op, nxv4i1, nxv4i32, vecshiftR32, !cast<Pseudo>(NAME # _S_ZERO)>;
+ def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv2i64, op, nxv2i1, nxv2i64, vecshiftR64, !cast<Pseudo>(NAME # _D_ZERO)>;
}
class sve_int_bin_pred_shift<bits<2> sz8_64, bit wide, bits<3> opc,
@@ -10031,7 +10031,7 @@ multiclass sve2p1_multi_vec_extract_narrow<string mnemonic, bits<2> opc, SDPatte
// SVE2 multi-vec shift narrow
class sve2p1_multi_vec_shift_narrow<string mnemonic, bits<3> opc, bits<2> tsz>
- : I<(outs ZPR16:$Zd), (ins ZZ_s_mul_r:$Zn, tvecshiftR16:$imm4),
+ : I<(outs ZPR16:$Zd), (ins ZZ_s_mul_r:$Zn, vecshiftR16:$imm4),
mnemonic, "\t$Zd, $Zn, $imm4",
"", []>, Sched<[]> {
bits<5> Zd;
@@ -10055,7 +10055,7 @@ class sve2p1_multi_vec_shift_narrow<string mnemonic, bits<3> opc, bits<2> tsz>
multiclass sve2p1_multi_vec_shift_narrow<string mnemonic, bits<3> opc, SDPatternOperator intrinsic> {
def NAME : sve2p1_multi_vec_shift_narrow<mnemonic, opc, 0b01>;
- def : SVE2p1_Sat_Shift_VG2_Pat<NAME, intrinsic, nxv8i16, nxv4i32, tvecshiftR16>;
+ def : SVE2p1_Sat_Shift_VG2_Pat<NAME, intrinsic, nxv8i16, nxv4i32, vecshiftR16>;
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 7003a40..9446144 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -2126,6 +2126,8 @@ def FeatureISAVersion12_50 : FeatureSet<
FeatureLdsBarrierArriveAtomic,
FeatureSetPrioIncWgInst,
Feature45BitNumRecordsBufferResource,
+ FeatureSupportsXNACK,
+ FeatureXNACK,
]>;
def FeatureISAVersion12_51 : FeatureSet<
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index 2ba3156..9dd64e0 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -131,10 +131,8 @@ static bool isDSAddress(const Constant *C) {
return AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS;
}
-/// Returns true if the function requires the implicit argument be passed
-/// regardless of the function contents.
-static bool funcRequiresHostcallPtr(const Function &F) {
- // Sanitizers require the hostcall buffer passed in the implicit arguments.
+/// Returns true if sanitizer attributes are present on a function.
+static bool hasSanitizerAttributes(const Function &F) {
return F.hasFnAttribute(Attribute::SanitizeAddress) ||
F.hasFnAttribute(Attribute::SanitizeThread) ||
F.hasFnAttribute(Attribute::SanitizeMemory) ||
@@ -469,15 +467,21 @@ struct AAAMDAttributesFunction : public AAAMDAttributes {
// If the function requires the implicit arg pointer due to sanitizers,
// assume it's needed even if explicitly marked as not requiring it.
- const bool NeedsHostcall = funcRequiresHostcallPtr(*F);
- if (NeedsHostcall) {
+ // Flat scratch initialization is needed because `asan_malloc_impl`
+ // calls introduced later in pipeline will have flat scratch accesses.
+ // FIXME: FLAT_SCRATCH_INIT will not be required here if device-libs
+ // implementation for `asan_malloc_impl` is updated.
+ const bool HasSanitizerAttrs = hasSanitizerAttributes(*F);
+ if (HasSanitizerAttrs) {
removeAssumedBits(IMPLICIT_ARG_PTR);
removeAssumedBits(HOSTCALL_PTR);
+ removeAssumedBits(FLAT_SCRATCH_INIT);
}
for (auto Attr : ImplicitAttrs) {
- if (NeedsHostcall &&
- (Attr.first == IMPLICIT_ARG_PTR || Attr.first == HOSTCALL_PTR))
+ if (HasSanitizerAttrs &&
+ (Attr.first == IMPLICIT_ARG_PTR || Attr.first == HOSTCALL_PTR ||
+ Attr.first == FLAT_SCRATCH_INIT))
continue;
if (F->hasFnAttribute(Attr.second))
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index 2d5ae29..2120bf8 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -2303,7 +2303,10 @@ Expected<bool> AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC1(
KdStream << Indent << ".amdhsa_reserve_vcc " << 0 << '\n';
if (!hasArchitectedFlatScratch())
KdStream << Indent << ".amdhsa_reserve_flat_scratch " << 0 << '\n';
- KdStream << Indent << ".amdhsa_reserve_xnack_mask " << 0 << '\n';
+ bool ReservedXnackMask = STI.hasFeature(AMDGPU::FeatureXNACK);
+ assert(!ReservedXnackMask || STI.hasFeature(AMDGPU::FeatureSupportsXNACK));
+ KdStream << Indent << ".amdhsa_reserve_xnack_mask " << ReservedXnackMask
+ << '\n';
KdStream << Indent << ".amdhsa_next_free_sgpr " << NextFreeSGPR << "\n";
CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_PRIORITY);
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index fed3778..90c828b 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -722,7 +722,8 @@ bool SIFoldOperandsImpl::updateOperand(FoldCandidate &Fold) const {
return false;
}
- if (!MRI->constrainRegClass(New->getReg(), ConstrainRC)) {
+ if (New->getReg().isVirtual() &&
+ !MRI->constrainRegClass(New->getReg(), ConstrainRC)) {
LLVM_DEBUG(dbgs() << "Cannot constrain " << printReg(New->getReg(), TRI)
<< TRI->getRegClassName(ConstrainRC) << '\n');
return false;
@@ -931,7 +932,9 @@ static MachineOperand *lookUpCopyChain(const SIInstrInfo &TII,
for (MachineInstr *SubDef = MRI.getVRegDef(SrcReg);
SubDef && TII.isFoldableCopy(*SubDef);
SubDef = MRI.getVRegDef(Sub->getReg())) {
- MachineOperand &SrcOp = SubDef->getOperand(1);
+ unsigned SrcIdx = TII.getFoldableCopySrcIdx(*SubDef);
+ MachineOperand &SrcOp = SubDef->getOperand(SrcIdx);
+
if (SrcOp.isImm())
return &SrcOp;
if (!SrcOp.isReg() || SrcOp.getReg().isPhysical())
diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
index e4b3528..0189e7b 100644
--- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -306,7 +306,8 @@ class PrologEpilogSGPRSpillBuilder {
buildEpilogRestore(ST, TRI, *FuncInfo, LiveUnits, MF, MBB, MI, DL,
TmpVGPR, FI, FrameReg, DwordOff);
- MRI.constrainRegClass(SubReg, &AMDGPU::SReg_32_XM0RegClass);
+ assert(SubReg.isPhysical());
+
BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg)
.addReg(TmpVGPR, RegState::Kill);
DwordOff += 4;
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index f7265c5..e233457 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -18860,31 +18860,6 @@ SITargetLowering::getTargetMMOFlags(const Instruction &I) const {
return Flags;
}
-bool SITargetLowering::checkForPhysRegDependency(
- SDNode *Def, SDNode *User, unsigned Op, const TargetRegisterInfo *TRI,
- const TargetInstrInfo *TII, MCRegister &PhysReg, int &Cost) const {
- if (User->getOpcode() != ISD::CopyToReg)
- return false;
- if (!Def->isMachineOpcode())
- return false;
- MachineSDNode *MDef = dyn_cast<MachineSDNode>(Def);
- if (!MDef)
- return false;
-
- unsigned ResNo = User->getOperand(Op).getResNo();
- if (User->getOperand(Op)->getValueType(ResNo) != MVT::i1)
- return false;
- const MCInstrDesc &II = TII->get(MDef->getMachineOpcode());
- if (II.isCompare() && II.hasImplicitDefOfPhysReg(AMDGPU::SCC)) {
- PhysReg = AMDGPU::SCC;
- const TargetRegisterClass *RC =
- TRI->getMinimalPhysRegClass(PhysReg, Def->getSimpleValueType(ResNo));
- Cost = RC->getCopyCost();
- return true;
- }
- return false;
-}
-
void SITargetLowering::emitExpandAtomicAddrSpacePredicate(
Instruction *AI) const {
// Given: atomicrmw fadd ptr %addr, float %val ordering
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index a474dab..74e58f4 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -561,11 +561,6 @@ public:
bool denormalsEnabledForType(const SelectionDAG &DAG, EVT VT) const;
bool denormalsEnabledForType(LLT Ty, const MachineFunction &MF) const;
- bool checkForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op,
- const TargetRegisterInfo *TRI,
- const TargetInstrInfo *TII,
- MCRegister &PhysReg, int &Cost) const override;
-
bool isKnownNeverNaNForTargetNode(SDValue Op, const APInt &DemandedElts,
const SelectionDAG &DAG, bool SNaN = false,
unsigned Depth = 0) const override;
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 56435a5..46757cf 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -2112,8 +2112,6 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
MI.setDesc(get(AMDGPU::V_READLANE_B32));
- MI.getMF()->getRegInfo().constrainRegClass(MI.getOperand(0).getReg(),
- &AMDGPU::SReg_32_XM0RegClass);
break;
case AMDGPU::AV_MOV_B32_IMM_PSEUDO: {
Register Dst = MI.getOperand(0).getReg();
@@ -3435,6 +3433,32 @@ bool SIInstrInfo::isFoldableCopy(const MachineInstr &MI) {
}
}
+unsigned SIInstrInfo::getFoldableCopySrcIdx(const MachineInstr &MI) {
+ switch (MI.getOpcode()) {
+ case AMDGPU::V_MOV_B16_t16_e32:
+ case AMDGPU::V_MOV_B16_t16_e64:
+ return 2;
+ case AMDGPU::V_MOV_B32_e32:
+ case AMDGPU::V_MOV_B32_e64:
+ case AMDGPU::V_MOV_B64_PSEUDO:
+ case AMDGPU::V_MOV_B64_e32:
+ case AMDGPU::V_MOV_B64_e64:
+ case AMDGPU::S_MOV_B32:
+ case AMDGPU::S_MOV_B64:
+ case AMDGPU::S_MOV_B64_IMM_PSEUDO:
+ case AMDGPU::COPY:
+ case AMDGPU::WWM_COPY:
+ case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
+ case AMDGPU::V_ACCVGPR_READ_B32_e64:
+ case AMDGPU::V_ACCVGPR_MOV_B32:
+ case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
+ case AMDGPU::AV_MOV_B64_IMM_PSEUDO:
+ return 1;
+ default:
+ llvm_unreachable("MI is not a foldable copy");
+ }
+}
+
static constexpr AMDGPU::OpName ModifierOpNames[] = {
AMDGPU::OpName::src0_modifiers, AMDGPU::OpName::src1_modifiers,
AMDGPU::OpName::src2_modifiers, AMDGPU::OpName::clamp,
@@ -8117,21 +8141,14 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
// hope for the best.
if (Inst.isCopy() && DstReg.isPhysical() &&
RI.isVGPR(MRI, Inst.getOperand(1).getReg())) {
- // TODO: Only works for 32 bit registers.
- if (MRI.constrainRegClass(DstReg, &AMDGPU::SReg_32_XM0RegClass)) {
- BuildMI(*Inst.getParent(), &Inst, Inst.getDebugLoc(),
- get(AMDGPU::V_READFIRSTLANE_B32), DstReg)
- .add(Inst.getOperand(1));
- } else {
- Register NewDst =
- MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
- BuildMI(*Inst.getParent(), &Inst, Inst.getDebugLoc(),
- get(AMDGPU::V_READFIRSTLANE_B32), NewDst)
- .add(Inst.getOperand(1));
- BuildMI(*Inst.getParent(), &Inst, Inst.getDebugLoc(), get(AMDGPU::COPY),
- DstReg)
- .addReg(NewDst);
- }
+ Register NewDst = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+ BuildMI(*Inst.getParent(), &Inst, Inst.getDebugLoc(),
+ get(AMDGPU::V_READFIRSTLANE_B32), NewDst)
+ .add(Inst.getOperand(1));
+ BuildMI(*Inst.getParent(), &Inst, Inst.getDebugLoc(), get(AMDGPU::COPY),
+ DstReg)
+ .addReg(NewDst);
+
Inst.eraseFromParent();
return;
}
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index a21089f..cc59acf 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -417,6 +417,7 @@ public:
const MachineInstr &MIb) const override;
static bool isFoldableCopy(const MachineInstr &MI);
+ static unsigned getFoldableCopySrcIdx(const MachineInstr &MI);
void removeModOperands(MachineInstr &MI) const;
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 205237f..3c2dd42 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -2222,8 +2222,6 @@ bool SIRegisterInfo::spillEmergencySGPR(MachineBasicBlock::iterator MI,
// Don't need to write VGPR out.
}
- MachineRegisterInfo &MRI = MI->getMF()->getRegInfo();
-
// Restore clobbered registers in the specified restore block.
MI = RestoreMBB.end();
SB.setMI(&RestoreMBB, MI);
@@ -2238,7 +2236,8 @@ bool SIRegisterInfo::spillEmergencySGPR(MachineBasicBlock::iterator MI,
SB.NumSubRegs == 1
? SB.SuperReg
: Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
- MRI.constrainRegClass(SubReg, &AMDGPU::SReg_32_XM0RegClass);
+
+ assert(SubReg.isPhysical());
bool LastSubReg = (i + 1 == e);
auto MIB = BuildMI(*SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_READLANE_B32),
SubReg)
@@ -3059,8 +3058,7 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
if (IsSALU && LiveSCC) {
Register NewDest;
if (IsCopy) {
- MF->getRegInfo().constrainRegClass(ResultReg,
- &AMDGPU::SReg_32_XM0RegClass);
+ assert(ResultReg.isPhysical());
NewDest = ResultReg;
} else {
NewDest = RS->scavengeRegisterBackwards(AMDGPU::SReg_32_XM0RegClass,
@@ -3190,8 +3188,6 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
Register NewDest;
if (IsCopy) {
- MF->getRegInfo().constrainRegClass(ResultReg,
- &AMDGPU::SReg_32_XM0RegClass);
NewDest = ResultReg;
} else {
NewDest = RS->scavengeRegisterBackwards(
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index 5630580..f98e312 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -367,19 +367,6 @@ def SCC_CLASS : SIRegisterClass<"AMDGPU", [i1], 1, (add SCC)> {
let BaseClassOrder = 10000;
}
-def M0_CLASS : SIRegisterClass<"AMDGPU", [i32], 32, (add M0)> {
- let CopyCost = 1;
- let isAllocatable = 0;
- let HasSGPR = 1;
-}
-
-def M0_CLASS_LO16 : SIRegisterClass<"AMDGPU", [i16, f16, bf16], 16, (add M0_LO16)> {
- let CopyCost = 1;
- let Size = 16;
- let isAllocatable = 0;
- let HasSGPR = 1;
-}
-
// TODO: Do we need to set DwarfRegAlias on register tuples?
def SGPR_LO16 : SIRegisterClass<"AMDGPU", [i16, f16, bf16], 16,
@@ -797,7 +784,7 @@ def SReg_LO16 : SIRegisterClass<"AMDGPU", [i16, f16, bf16], 16,
TMA_LO_LO16, TMA_HI_LO16, TBA_LO_LO16, TBA_HI_LO16, SRC_SHARED_BASE_LO_LO16,
SRC_SHARED_LIMIT_LO_LO16, SRC_PRIVATE_BASE_LO_LO16, SRC_PRIVATE_LIMIT_LO_LO16,
SRC_POPS_EXITING_WAVE_ID_LO16, SRC_VCCZ_LO16, SRC_EXECZ_LO16, SRC_SCC_LO16,
- EXEC_LO_LO16, EXEC_HI_LO16, M0_CLASS_LO16, SRC_FLAT_SCRATCH_BASE_LO_LO16,
+ EXEC_LO_LO16, EXEC_HI_LO16, M0_LO16, SRC_FLAT_SCRATCH_BASE_LO_LO16,
SRC_FLAT_SCRATCH_BASE_HI_LO16)> {
let Size = 16;
let isAllocatable = 0;
@@ -805,7 +792,7 @@ def SReg_LO16 : SIRegisterClass<"AMDGPU", [i16, f16, bf16], 16,
}
def SReg_32_XEXEC : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2f16, v2bf16, i1], 32,
- (add SReg_32_XM0_XEXEC, M0_CLASS)> {
+ (add SReg_32_XM0_XEXEC, M0)> {
let AllocationPriority = 0;
}
@@ -830,7 +817,7 @@ def APERTURE_Class : SIRegisterClass<"AMDGPU", Reg64Types.types, 32,
// Register class for all scalar registers (SGPRs + Special Registers)
def SReg_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2f16, v2bf16, i1], 32,
- (add SReg_32_XM0, M0_CLASS)> {
+ (add SReg_32_XM0, M0)> {
let AllocationPriority = 0;
let HasSGPR = 1;
let BaseClassOrder = 32;
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
index d0dfa47..a94e131 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
@@ -359,6 +359,8 @@ HexagonTargetLowering::initializeHVXLowering() {
setCondCodeAction(ISD::SETULE, MVT::v64f16, Expand);
setCondCodeAction(ISD::SETUGE, MVT::v64f16, Expand);
setCondCodeAction(ISD::SETULT, MVT::v64f16, Expand);
+ setCondCodeAction(ISD::SETUO, MVT::v64f16, Expand);
+ setCondCodeAction(ISD::SETO, MVT::v64f16, Expand);
setCondCodeAction(ISD::SETNE, MVT::v32f32, Expand);
setCondCodeAction(ISD::SETLE, MVT::v32f32, Expand);
@@ -372,6 +374,8 @@ HexagonTargetLowering::initializeHVXLowering() {
setCondCodeAction(ISD::SETULE, MVT::v32f32, Expand);
setCondCodeAction(ISD::SETUGE, MVT::v32f32, Expand);
setCondCodeAction(ISD::SETULT, MVT::v32f32, Expand);
+ setCondCodeAction(ISD::SETUO, MVT::v32f32, Expand);
+ setCondCodeAction(ISD::SETO, MVT::v32f32, Expand);
// Boolean vectors.
@@ -449,6 +453,7 @@ HexagonTargetLowering::initializeHVXLowering() {
// Include cases which are not hander earlier
setOperationAction(ISD::UINT_TO_FP, MVT::v32i1, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v64i1, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v32i1, Custom);
setTargetDAGCombine({ISD::CONCAT_VECTORS, ISD::TRUNCATE, ISD::VSELECT});
}
@@ -2337,7 +2342,7 @@ HexagonTargetLowering::LowerHvxFpToInt(SDValue Op, SelectionDAG &DAG) const {
return ExpandHvxFpToInt(Op, DAG);
}
-// For vector type v32i1 uint_to_fp to v32f32:
+// For vector type v32i1 uint_to_fp/sint_to_fp to v32f32:
// R1 = #1, R2 holds the v32i1 param
// V1 = vsplat(R1)
// V2 = vsplat(R2)
@@ -2464,7 +2469,7 @@ HexagonTargetLowering::LowerHvxIntToFp(SDValue Op, SelectionDAG &DAG) const {
MVT IntTy = ty(Op.getOperand(0)).getVectorElementType();
MVT FpTy = ResTy.getVectorElementType();
- if (Op.getOpcode() == ISD::UINT_TO_FP) {
+ if (Op.getOpcode() == ISD::UINT_TO_FP || Op.getOpcode() == ISD::SINT_TO_FP) {
if (ResTy == MVT::v32f32 && ty(Op.getOperand(0)) == MVT::v32i1)
return LowerHvxPred32ToFp(Op, DAG);
if (ResTy == MVT::v64f16 && ty(Op.getOperand(0)) == MVT::v64i1)
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 4cfbfca..7ddf996 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -2860,8 +2860,7 @@ static SDValue fillSubVectorFromBuildVector(BuildVectorSDNode *Node,
EVT ResTy, unsigned first) {
unsigned NumElts = ResTy.getVectorNumElements();
- assert(first >= 0 &&
- first + NumElts <= Node->getSimpleValueType(0).getVectorNumElements());
+ assert(first + NumElts <= Node->getSimpleValueType(0).getVectorNumElements());
SmallVector<SDValue, 16> Ops(Node->op_begin() + first,
Node->op_begin() + first + NumElts);
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
index 395d2c4..662d3f6 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
@@ -629,7 +629,7 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST)
getActionDefinitionsBuilder({G_FCOS, G_FSIN, G_FTAN, G_FPOW, G_FLOG, G_FLOG2,
G_FLOG10, G_FEXP, G_FEXP2, G_FEXP10, G_FACOS,
G_FASIN, G_FATAN, G_FATAN2, G_FCOSH, G_FSINH,
- G_FTANH})
+ G_FTANH, G_FMODF})
.libcallFor({s32, s64})
.libcallFor(ST.is64Bit(), {s128});
getActionDefinitionsBuilder({G_FPOWI, G_FLDEXP})
diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
index 7d4535a..b37b740 100644
--- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
@@ -1560,7 +1560,7 @@ static MCRegister getRVVBaseRegister(const RISCVRegisterInfo &TRI,
MCRegister BaseReg = TRI.getSubReg(Reg, RISCV::sub_vrm1_0);
// If it's not a grouped vector register, it doesn't have subregister, so
// the base register is just itself.
- if (BaseReg == RISCV::NoRegister)
+ if (!BaseReg.isValid())
BaseReg = Reg;
return BaseReg;
}
diff --git a/llvm/lib/Target/RISCV/RISCVGISel.td b/llvm/lib/Target/RISCV/RISCVGISel.td
index cf6f83a..7f5d0af 100644
--- a/llvm/lib/Target/RISCV/RISCVGISel.td
+++ b/llvm/lib/Target/RISCV/RISCVGISel.td
@@ -126,13 +126,6 @@ let Predicates = [HasAtomicLdSt, IsRV64] in {
// RV64 i32 patterns not used by SelectionDAG
//===----------------------------------------------------------------------===//
-def uimm5i32 : ImmLeaf<i32, [{return isUInt<5>(Imm);}]>;
-
-def zext_is_sext : PatFrag<(ops node:$src), (zext node:$src), [{
- KnownBits Known = CurDAG->computeKnownBits(N->getOperand(0), 0);
- return Known.isNonNegative();
-}]>;
-
let Predicates = [IsRV64] in {
def : LdPat<extloadi8, LBU, i32>; // Prefer unsigned due to no c.lb in Zcb.
def : LdPat<extloadi16, LH, i32>;
@@ -140,15 +133,10 @@ def : LdPat<extloadi16, LH, i32>;
def : StPat<truncstorei8, SB, GPR, i32>;
def : StPat<truncstorei16, SH, GPR, i32>;
-def : Pat<(anyext (i32 GPR:$src)), (COPY GPR:$src)>;
def : Pat<(sext (i32 GPR:$src)), (ADDIW GPR:$src, 0)>;
-def : Pat<(i32 (trunc GPR:$src)), (COPY GPR:$src)>;
def : Pat<(sext_inreg (i64 (add GPR:$rs1, simm12_lo:$imm)), i32),
(ADDIW GPR:$rs1, simm12_lo:$imm)>;
-
-// Use sext if the sign bit of the input is 0.
-def : Pat<(zext_is_sext (i32 GPR:$src)), (ADDIW GPR:$src, 0)>;
}
let Predicates = [IsRV64, NoStdExtZba] in
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 50649cf..dcce2d2 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -533,7 +533,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction({ISD::FREM, ISD::FPOW, ISD::FPOWI,
ISD::FCOS, ISD::FSIN, ISD::FSINCOS, ISD::FEXP,
ISD::FEXP2, ISD::FEXP10, ISD::FLOG, ISD::FLOG2,
- ISD::FLOG10, ISD::FLDEXP, ISD::FFREXP},
+ ISD::FLOG10, ISD::FLDEXP, ISD::FFREXP, ISD::FMODF},
MVT::f16, Promote);
// FIXME: Need to promote f16 STRICT_* to f32 libcalls, but we don't have
diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
index 6a6ead2..cf8d120 100644
--- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
@@ -128,7 +128,7 @@ static bool hasUndefinedPassthru(const MachineInstr &MI) {
// All undefined passthrus should be $noreg: see
// RISCVDAGToDAGISel::doPeepholeNoRegPassThru
const MachineOperand &UseMO = MI.getOperand(UseOpIdx);
- return UseMO.getReg() == RISCV::NoRegister || UseMO.isUndef();
+ return !UseMO.getReg().isValid() || UseMO.isUndef();
}
/// Return true if \p MI is a copy that will be lowered to one or more vmvNr.vs.
@@ -1454,7 +1454,7 @@ void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
Register Reg = VLOp.getReg();
// Erase the AVL operand from the instruction.
- VLOp.setReg(RISCV::NoRegister);
+ VLOp.setReg(Register());
VLOp.setIsKill(false);
if (LIS) {
LiveInterval &LI = LIS->getInterval(Reg);
@@ -1663,7 +1663,7 @@ void RISCVInsertVSETVLI::coalesceVSETVLIs(MachineBasicBlock &MBB) const {
if (!MO.isReg() || !MO.getReg().isVirtual())
return;
Register OldVLReg = MO.getReg();
- MO.setReg(RISCV::NoRegister);
+ MO.setReg(Register());
if (LIS)
LIS->shrinkToUses(&LIS->getInterval(OldVLReg));
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 1e6b04f8..7db4832 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -1364,7 +1364,7 @@ void RISCVInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
RS->scavengeRegisterBackwards(RISCV::GPRRegClass, MI.getIterator(),
/*RestoreAfter=*/false, /*SpAdj=*/0,
/*AllowSpill=*/false);
- if (TmpGPR != RISCV::NoRegister)
+ if (TmpGPR.isValid())
RS->setRegUsed(TmpGPR);
else {
// The case when there is no scavenged register needs special handling.
@@ -3021,7 +3021,7 @@ bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI,
ErrInfo = "Invalid operand type for VL operand";
return false;
}
- if (Op.isReg() && Op.getReg() != RISCV::NoRegister) {
+ if (Op.isReg() && Op.getReg().isValid()) {
const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
auto *RC = MRI.getRegClass(Op.getReg());
if (!RISCV::GPRRegClass.hasSubClassEq(RC)) {
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZalasr.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZalasr.td
index 1674c95..1dd7332 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZalasr.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZalasr.td
@@ -26,7 +26,7 @@ class LAQ_r<bit aq, bit rl, bits<3> funct3, string opcodestr>
let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in
class SRL_r<bit aq, bit rl, bits<3> funct3, string opcodestr>
: RVInstRAtomic<0b00111, aq, rl, funct3, OPC_AMO,
- (outs ), (ins GPRMemZeroOffset:$rs1, GPR:$rs2),
+ (outs), (ins GPR:$rs2, GPRMemZeroOffset:$rs1),
opcodestr, "$rs2, $rs1"> {
let rd = 0;
}
@@ -71,7 +71,7 @@ class PatLAQ<SDPatternOperator OpNode, RVInst Inst, ValueType vt = XLenVT>
// while atomic_store has data, addr
class PatSRL<SDPatternOperator OpNode, RVInst Inst, ValueType vt = XLenVT>
: Pat<(OpNode (vt GPR:$rs2), (XLenVT GPRMemZeroOffset:$rs1)),
- (Inst GPRMemZeroOffset:$rs1, GPR:$rs2)>;
+ (Inst GPR:$rs2, GPRMemZeroOffset:$rs1)>;
let Predicates = [HasStdExtZalasr] in {
diff --git a/llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp b/llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp
index f8d33ae..54569b1 100644
--- a/llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp
+++ b/llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp
@@ -259,7 +259,7 @@ static RegImmPair getRegImmPairPreventingCompression(const MachineInstr &MI) {
if (isCompressibleLoad(MI) || isCompressibleStore(MI)) {
const MachineOperand &MOImm = MI.getOperand(2);
if (!MOImm.isImm())
- return RegImmPair(RISCV::NoRegister, 0);
+ return RegImmPair(Register(), 0);
int64_t Offset = MOImm.getImm();
int64_t NewBaseAdjust = getBaseAdjustForCompression(Offset, Opcode);
@@ -292,7 +292,7 @@ static RegImmPair getRegImmPairPreventingCompression(const MachineInstr &MI) {
}
}
}
- return RegImmPair(RISCV::NoRegister, 0);
+ return RegImmPair(Register(), 0);
}
// Check all uses after FirstMI of the given register, keeping a vector of
diff --git a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
index ffba284..fdf9a4f 100644
--- a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
@@ -382,7 +382,7 @@ bool RISCVVectorPeephole::convertAllOnesVMergeToVMv(MachineInstr &MI) const {
// vmv.v.v doesn't have a mask operand, so we may be able to inflate the
// register class for the destination and passthru operands e.g. VRNoV0 -> VR
MRI->recomputeRegClass(MI.getOperand(0).getReg());
- if (MI.getOperand(1).getReg() != RISCV::NoRegister)
+ if (MI.getOperand(1).getReg().isValid())
MRI->recomputeRegClass(MI.getOperand(1).getReg());
return true;
}
@@ -448,7 +448,7 @@ bool RISCVVectorPeephole::convertSameMaskVMergeToVMv(MachineInstr &MI) {
Register FalseReg = MI.getOperand(2).getReg();
if (TruePassthruReg != FalseReg) {
// If True's passthru is undef see if we can change it to False
- if (TruePassthruReg != RISCV::NoRegister ||
+ if (TruePassthruReg.isValid() ||
!MRI->hasOneUse(MI.getOperand(3).getReg()) ||
!ensureDominates(MI.getOperand(2), *True))
return false;
@@ -467,7 +467,7 @@ bool RISCVVectorPeephole::convertSameMaskVMergeToVMv(MachineInstr &MI) {
// vmv.v.v doesn't have a mask operand, so we may be able to inflate the
// register class for the destination and passthru operands e.g. VRNoV0 -> VR
MRI->recomputeRegClass(MI.getOperand(0).getReg());
- if (MI.getOperand(1).getReg() != RISCV::NoRegister)
+ if (MI.getOperand(1).getReg().isValid())
MRI->recomputeRegClass(MI.getOperand(1).getReg());
return true;
}
@@ -517,7 +517,7 @@ bool RISCVVectorPeephole::convertToUnmasked(MachineInstr &MI) const {
if (RISCVII::isFirstDefTiedToFirstUse(MaskedMCID)) {
unsigned PassthruOpIdx = MI.getNumExplicitDefs();
if (HasPassthru) {
- if (MI.getOperand(PassthruOpIdx).getReg() != RISCV::NoRegister)
+ if (MI.getOperand(PassthruOpIdx).getReg())
MRI->recomputeRegClass(MI.getOperand(PassthruOpIdx).getReg());
} else
MI.removeOperand(PassthruOpIdx);
@@ -576,7 +576,7 @@ static bool dominates(MachineBasicBlock::const_iterator A,
bool RISCVVectorPeephole::ensureDominates(const MachineOperand &MO,
MachineInstr &Src) const {
assert(MO.getParent()->getParent() == Src.getParent());
- if (!MO.isReg() || MO.getReg() == RISCV::NoRegister)
+ if (!MO.isReg() || !MO.getReg().isValid())
return true;
MachineInstr *Def = MRI->getVRegDef(MO.getReg());
@@ -593,7 +593,7 @@ bool RISCVVectorPeephole::ensureDominates(const MachineOperand &MO,
bool RISCVVectorPeephole::foldUndefPassthruVMV_V_V(MachineInstr &MI) {
if (RISCV::getRVVMCOpcode(MI.getOpcode()) != RISCV::VMV_V_V)
return false;
- if (MI.getOperand(1).getReg() != RISCV::NoRegister)
+ if (MI.getOperand(1).getReg().isValid())
return false;
// If the input was a pseudo with a policy operand, we can give it a tail
@@ -654,7 +654,7 @@ bool RISCVVectorPeephole::foldVMV_V_V(MachineInstr &MI) {
// Src needs to have the same passthru as VMV_V_V
MachineOperand &SrcPassthru = Src->getOperand(Src->getNumExplicitDefs());
- if (SrcPassthru.getReg() != RISCV::NoRegister &&
+ if (SrcPassthru.getReg().isValid() &&
SrcPassthru.getReg() != Passthru.getReg())
return false;
@@ -672,7 +672,7 @@ bool RISCVVectorPeephole::foldVMV_V_V(MachineInstr &MI) {
if (SrcPassthru.getReg() != Passthru.getReg()) {
SrcPassthru.setReg(Passthru.getReg());
// If Src is masked then its passthru needs to be in VRNoV0.
- if (Passthru.getReg() != RISCV::NoRegister)
+ if (Passthru.getReg().isValid())
MRI->constrainRegClass(
Passthru.getReg(),
TII->getRegClass(Src->getDesc(), SrcPassthru.getOperandNo(), TRI));
diff --git a/llvm/lib/Target/SPIRV/SPIRVLegalizePointerCast.cpp b/llvm/lib/Target/SPIRV/SPIRVLegalizePointerCast.cpp
index 7505507..e8c849e 100644
--- a/llvm/lib/Target/SPIRV/SPIRVLegalizePointerCast.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVLegalizePointerCast.cpp
@@ -188,8 +188,31 @@ class SPIRVLegalizePointerCast : public FunctionPass {
FixedVectorType *SrcType = cast<FixedVectorType>(Src->getType());
FixedVectorType *DstType =
cast<FixedVectorType>(GR->findDeducedElementType(Dst));
- assert(DstType->getNumElements() >= SrcType->getNumElements());
+ auto dstNumElements = DstType->getNumElements();
+ auto srcNumElements = SrcType->getNumElements();
+
+ // if the element type differs, it is a bitcast.
+ if (DstType->getElementType() != SrcType->getElementType()) {
+ // Support bitcast between vectors of different sizes only if
+ // the total bitwidth is the same.
+ [[maybe_unused]] auto dstBitWidth =
+ DstType->getElementType()->getScalarSizeInBits() * dstNumElements;
+ [[maybe_unused]] auto srcBitWidth =
+ SrcType->getElementType()->getScalarSizeInBits() * srcNumElements;
+ assert(dstBitWidth == srcBitWidth &&
+ "Unsupported bitcast between vectors of different sizes.");
+
+ Src =
+ B.CreateIntrinsic(Intrinsic::spv_bitcast, {DstType, SrcType}, {Src});
+ buildAssignType(B, DstType, Src);
+ SrcType = DstType;
+
+ StoreInst *SI = B.CreateStore(Src, Dst);
+ SI->setAlignment(Alignment);
+ return SI;
+ }
+ assert(DstType->getNumElements() >= SrcType->getNumElements());
LoadInst *LI = B.CreateLoad(DstType, Dst);
LI->setAlignment(Alignment);
Value *OldValues = LI;
diff --git a/llvm/lib/Target/Sparc/DelaySlotFiller.cpp b/llvm/lib/Target/Sparc/DelaySlotFiller.cpp
index 6c19049..024030d 100644
--- a/llvm/lib/Target/Sparc/DelaySlotFiller.cpp
+++ b/llvm/lib/Target/Sparc/DelaySlotFiller.cpp
@@ -206,8 +206,8 @@ Filler::findDelayInstr(MachineBasicBlock &MBB,
if (!done)
--I;
- // skip debug instruction
- if (I->isDebugInstr())
+ // Skip meta instructions.
+ if (I->isMetaInstruction())
continue;
if (I->hasUnmodeledSideEffects() || I->isInlineAsm() || I->isPosition() ||
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 3802506..931a10b 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -13783,10 +13783,12 @@ static SDValue lowerV4I32Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
// so prevents folding a load into this instruction or making a copy.
const int UnpackLoMask[] = {0, 0, 1, 1};
const int UnpackHiMask[] = {2, 2, 3, 3};
- if (isShuffleEquivalent(Mask, {0, 0, 1, 1}, V1, V2))
- Mask = UnpackLoMask;
- else if (isShuffleEquivalent(Mask, {2, 2, 3, 3}, V1, V2))
- Mask = UnpackHiMask;
+ if (!isSingleElementRepeatedMask(Mask)) {
+ if (isShuffleEquivalent(Mask, {0, 0, 1, 1}, V1, V2))
+ Mask = UnpackLoMask;
+ else if (isShuffleEquivalent(Mask, {2, 2, 3, 3}, V1, V2))
+ Mask = UnpackHiMask;
+ }
return DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32, V1,
getV4X86ShuffleImm8ForMask(Mask, DL, DAG));
@@ -58135,6 +58137,14 @@ static SDValue combineAdd(SDNode *N, SelectionDAG &DAG,
if (SDValue V = combineToHorizontalAddSub(N, DAG, Subtarget))
return V;
+ // Prefer VSHLI to reduce uses, X86FixupInstTunings may revert this depending
+ // on the scheduler model. Limit multiple users to AVX+ targets to prevent
+ // introducing extra register moves.
+ if (Op0 == Op1 && supportedVectorShiftWithImm(VT, Subtarget, ISD::SHL))
+ if (Subtarget.hasAVX() || N->isOnlyUserOf(Op0.getNode()))
+ return getTargetVShiftByConstNode(X86ISD::VSHLI, DL, VT.getSimpleVT(),
+ Op0, 1, DAG);
+
// Canonicalize hidden LEA pattern:
// Fold (add (sub (shl x, c), y), z) -> (sub (add (shl x, c), z), y)
// iff c < 4
diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
index ddb95a4..faeab95 100644
--- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
+++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
@@ -29,6 +29,7 @@
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/MemoryProfileInfo.h"
#include "llvm/Analysis/ModuleSummaryAnalysis.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
@@ -40,6 +41,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/GraphWriter.h"
#include "llvm/Support/InterleavedRange.h"
+#include "llvm/Support/SHA1.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/Utils/CallPromotionUtils.h"
@@ -60,6 +62,9 @@ STATISTIC(FunctionClonesThinBackend,
"Number of function clones created during ThinLTO backend");
STATISTIC(FunctionsClonedThinBackend,
"Number of functions that had clones created during ThinLTO backend");
+STATISTIC(
+ FunctionCloneDuplicatesThinBackend,
+ "Number of function clone duplicates detected during ThinLTO backend");
STATISTIC(AllocTypeNotCold, "Number of not cold static allocations (possibly "
"cloned) during whole program analysis");
STATISTIC(AllocTypeCold, "Number of cold static allocations (possibly cloned) "
@@ -5186,19 +5191,127 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
return Changed;
}
+// Compute a SHA1 hash of the callsite and alloc version information of clone I
+// in the summary, to use in detection of duplicate clones.
+uint64_t ComputeHash(const FunctionSummary *FS, unsigned I) {
+ SHA1 Hasher;
+ // Update hash with any callsites that call non-default (non-zero) callee
+ // versions.
+ for (auto &SN : FS->callsites()) {
+ // In theory all callsites and allocs in this function should have the same
+ // number of clone entries, but handle any discrepancies gracefully below
+ // for NDEBUG builds.
+ assert(
+ SN.Clones.size() > I &&
+ "Callsite summary has fewer entries than other summaries in function");
+ if (SN.Clones.size() <= I || !SN.Clones[I])
+ continue;
+ uint8_t Data[sizeof(SN.Clones[I])];
+ support::endian::write32le(Data, SN.Clones[I]);
+ Hasher.update(Data);
+ }
+ // Update hash with any allocs that have non-default (non-None) hints.
+ for (auto &AN : FS->allocs()) {
+ // In theory all callsites and allocs in this function should have the same
+ // number of clone entries, but handle any discrepancies gracefully below
+ // for NDEBUG builds.
+ assert(AN.Versions.size() > I &&
+ "Alloc summary has fewer entries than other summaries in function");
+ if (AN.Versions.size() <= I ||
+ (AllocationType)AN.Versions[I] == AllocationType::None)
+ continue;
+ Hasher.update(ArrayRef<uint8_t>(&AN.Versions[I], 1));
+ }
+ return support::endian::read64le(Hasher.result().data());
+}
+
static SmallVector<std::unique_ptr<ValueToValueMapTy>, 4> createFunctionClones(
Function &F, unsigned NumClones, Module &M, OptimizationRemarkEmitter &ORE,
std::map<const Function *, SmallPtrSet<const GlobalAlias *, 1>>
- &FuncToAliasMap) {
+ &FuncToAliasMap,
+ FunctionSummary *FS) {
+ auto TakeDeclNameAndReplace = [](GlobalValue *DeclGV, GlobalValue *NewGV) {
+ // We might have created this when adjusting callsite in another
+ // function. It should be a declaration.
+ assert(DeclGV->isDeclaration());
+ NewGV->takeName(DeclGV);
+ DeclGV->replaceAllUsesWith(NewGV);
+ DeclGV->eraseFromParent();
+ };
+
+ // Handle aliases to this function, and create analogous alias clones to the
+ // provided clone of this function.
+ auto CloneFuncAliases = [&](Function *NewF, unsigned I) {
+ if (!FuncToAliasMap.count(&F))
+ return;
+ for (auto *A : FuncToAliasMap[&F]) {
+ std::string AliasName = getMemProfFuncName(A->getName(), I);
+ auto *PrevA = M.getNamedAlias(AliasName);
+ auto *NewA = GlobalAlias::create(A->getValueType(),
+ A->getType()->getPointerAddressSpace(),
+ A->getLinkage(), AliasName, NewF);
+ NewA->copyAttributesFrom(A);
+ if (PrevA)
+ TakeDeclNameAndReplace(PrevA, NewA);
+ }
+ };
+
// The first "clone" is the original copy, we should only call this if we
// needed to create new clones.
assert(NumClones > 1);
SmallVector<std::unique_ptr<ValueToValueMapTy>, 4> VMaps;
VMaps.reserve(NumClones - 1);
FunctionsClonedThinBackend++;
+
+ // Map of hash of callsite/alloc versions to the instantiated function clone
+ // (possibly the original) implementing those calls. Used to avoid
+ // instantiating duplicate function clones.
+ // FIXME: Ideally the thin link would not generate such duplicate clones to
+ // start with, but right now it happens due to phase ordering in the function
+ // assignment and possible new clones that produces. We simply make each
+ // duplicate an alias to the matching instantiated clone recorded in the map
+ // (except for available_externally which are made declarations as they would
+ // be aliases in the prevailing module, and available_externally aliases are
+ // not well supported right now).
+ DenseMap<uint64_t, Function *> HashToFunc;
+
+ // Save the hash of the original function version.
+ HashToFunc[ComputeHash(FS, 0)] = &F;
+
for (unsigned I = 1; I < NumClones; I++) {
VMaps.emplace_back(std::make_unique<ValueToValueMapTy>());
+ std::string Name = getMemProfFuncName(F.getName(), I);
+ auto Hash = ComputeHash(FS, I);
+ // If this clone would duplicate a previously seen clone, don't generate the
+ // duplicate clone body, just make an alias to satisfy any (potentially
+ // cross-module) references.
+ if (HashToFunc.contains(Hash)) {
+ FunctionCloneDuplicatesThinBackend++;
+ auto *Func = HashToFunc[Hash];
+ if (Func->hasAvailableExternallyLinkage()) {
+ // Skip these as EliminateAvailableExternallyPass does not handle
+ // available_externally aliases correctly and we end up with an
+ // available_externally alias to a declaration. Just create a
+ // declaration for now as we know we will have a definition in another
+ // module.
+ auto Decl = M.getOrInsertFunction(Name, Func->getFunctionType());
+ ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemprofClone", &F)
+ << "created clone decl " << ore::NV("Decl", Decl.getCallee()));
+ continue;
+ }
+ auto *PrevF = M.getFunction(Name);
+ auto *Alias = GlobalAlias::create(Name, Func);
+ if (PrevF)
+ TakeDeclNameAndReplace(PrevF, Alias);
+ ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemprofClone", &F)
+ << "created clone alias " << ore::NV("Alias", Alias));
+
+ // Now handle aliases to this function, and clone those as well.
+ CloneFuncAliases(Func, I);
+ continue;
+ }
auto *NewF = CloneFunction(&F, *VMaps.back());
+ HashToFunc[Hash] = NewF;
FunctionClonesThinBackend++;
// Strip memprof and callsite metadata from clone as they are no longer
// needed.
@@ -5208,40 +5321,17 @@ static SmallVector<std::unique_ptr<ValueToValueMapTy>, 4> createFunctionClones(
Inst.setMetadata(LLVMContext::MD_callsite, nullptr);
}
}
- std::string Name = getMemProfFuncName(F.getName(), I);
auto *PrevF = M.getFunction(Name);
- if (PrevF) {
- // We might have created this when adjusting callsite in another
- // function. It should be a declaration.
- assert(PrevF->isDeclaration());
- NewF->takeName(PrevF);
- PrevF->replaceAllUsesWith(NewF);
- PrevF->eraseFromParent();
- } else
+ if (PrevF)
+ TakeDeclNameAndReplace(PrevF, NewF);
+ else
NewF->setName(Name);
updateSubprogramLinkageName(NewF, Name);
ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemprofClone", &F)
<< "created clone " << ore::NV("NewFunction", NewF));
// Now handle aliases to this function, and clone those as well.
- if (!FuncToAliasMap.count(&F))
- continue;
- for (auto *A : FuncToAliasMap[&F]) {
- std::string Name = getMemProfFuncName(A->getName(), I);
- auto *PrevA = M.getNamedAlias(Name);
- auto *NewA = GlobalAlias::create(A->getValueType(),
- A->getType()->getPointerAddressSpace(),
- A->getLinkage(), Name, NewF);
- NewA->copyAttributesFrom(A);
- if (PrevA) {
- // We might have created this when adjusting callsite in another
- // function. It should be a declaration.
- assert(PrevA->isDeclaration());
- NewA->takeName(PrevA);
- PrevA->replaceAllUsesWith(NewA);
- PrevA->eraseFromParent();
- }
- }
+ CloneFuncAliases(NewF, I);
}
return VMaps;
}
@@ -5401,7 +5491,7 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {
SmallVector<std::unique_ptr<ValueToValueMapTy>, 4> VMaps;
bool ClonesCreated = false;
unsigned NumClonesCreated = 0;
- auto CloneFuncIfNeeded = [&](unsigned NumClones) {
+ auto CloneFuncIfNeeded = [&](unsigned NumClones, FunctionSummary *FS) {
// We should at least have version 0 which is the original copy.
assert(NumClones > 0);
// If only one copy needed use original.
@@ -5415,7 +5505,7 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {
assert(NumClonesCreated == NumClones);
return;
}
- VMaps = createFunctionClones(F, NumClones, M, ORE, FuncToAliasMap);
+ VMaps = createFunctionClones(F, NumClones, M, ORE, FuncToAliasMap, FS);
// The first "clone" is the original copy, which doesn't have a VMap.
assert(VMaps.size() == NumClones - 1);
Changed = true;
@@ -5424,9 +5514,9 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {
};
auto CloneCallsite = [&](const CallsiteInfo &StackNode, CallBase *CB,
- Function *CalledFunction) {
+ Function *CalledFunction, FunctionSummary *FS) {
// Perform cloning if not yet done.
- CloneFuncIfNeeded(/*NumClones=*/StackNode.Clones.size());
+ CloneFuncIfNeeded(/*NumClones=*/StackNode.Clones.size(), FS);
assert(!isMemProfClone(*CalledFunction));
@@ -5448,6 +5538,10 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {
// below.
auto CalleeOrigName = CalledFunction->getName();
for (unsigned J = 0; J < StackNode.Clones.size(); J++) {
+ // If the VMap is empty, this clone was a duplicate of another and was
+ // created as an alias or a declaration.
+ if (J > 0 && VMaps[J - 1]->empty())
+ continue;
// Do nothing if this version calls the original version of its
// callee.
if (!StackNode.Clones[J])
@@ -5591,7 +5685,7 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {
#endif
// Perform cloning if not yet done.
- CloneFuncIfNeeded(/*NumClones=*/AllocNode.Versions.size());
+ CloneFuncIfNeeded(/*NumClones=*/AllocNode.Versions.size(), FS);
OrigAllocsThinBackend++;
AllocVersionsThinBackend += AllocNode.Versions.size();
@@ -5624,6 +5718,10 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {
// Update the allocation types per the summary info.
for (unsigned J = 0; J < AllocNode.Versions.size(); J++) {
+ // If the VMap is empty, this clone was a duplicate of another and
+ // was created as an alias or a declaration.
+ if (J > 0 && VMaps[J - 1]->empty())
+ continue;
// Ignore any that didn't get an assigned allocation type.
if (AllocNode.Versions[J] == (uint8_t)AllocationType::None)
continue;
@@ -5670,7 +5768,7 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {
// we don't need to do ICP, but might need to clone this
// function as it is the target of other cloned calls.
if (NumClones)
- CloneFuncIfNeeded(NumClones);
+ CloneFuncIfNeeded(NumClones, FS);
}
else {
@@ -5690,7 +5788,7 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {
}
#endif
- CloneCallsite(StackNode, CB, CalledFunction);
+ CloneCallsite(StackNode, CB, CalledFunction, FS);
}
} else if (CB->isTailCall() && CalledFunction) {
// Locate the synthesized callsite info for the callee VI, if any was
@@ -5700,7 +5798,7 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {
if (CalleeVI && MapTailCallCalleeVIToCallsite.count(CalleeVI)) {
auto Callsite = MapTailCallCalleeVIToCallsite.find(CalleeVI);
assert(Callsite != MapTailCallCalleeVIToCallsite.end());
- CloneCallsite(Callsite->second, CB, CalledFunction);
+ CloneCallsite(Callsite->second, CB, CalledFunction, FS);
}
}
}
@@ -5846,6 +5944,10 @@ void MemProfContextDisambiguation::performICP(
// check.
CallBase *CBClone = CB;
for (unsigned J = 0; J < NumClones; J++) {
+ // If the VMap is empty, this clone was a duplicate of another and was
+ // created as an alias or a declaration.
+ if (J > 0 && VMaps[J - 1]->empty())
+ continue;
// Copy 0 is the original function.
if (J > 0)
CBClone = cast<CallBase>((*VMaps[J - 1])[CB]);
@@ -5891,6 +5993,10 @@ void MemProfContextDisambiguation::performICP(
// TotalCount and the number promoted.
CallBase *CBClone = CB;
for (unsigned J = 0; J < NumClones; J++) {
+ // If the VMap is empty, this clone was a duplicate of another and was
+ // created as an alias or a declaration.
+ if (J > 0 && VMaps[J - 1]->empty())
+ continue;
// Copy 0 is the original function.
if (J > 0)
CBClone = cast<CallBase>((*VMaps[J - 1])[CB]);
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index cf6d0ec..e1e24a9 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -318,18 +318,18 @@ Value *InstCombinerImpl::simplifyMaskedLoad(IntrinsicInst &II) {
// * Single constant active lane -> store
// * Narrow width by halfs excluding zero/undef lanes
Instruction *InstCombinerImpl::simplifyMaskedStore(IntrinsicInst &II) {
+ Value *StorePtr = II.getArgOperand(1);
+ Align Alignment = cast<ConstantInt>(II.getArgOperand(2))->getAlignValue();
auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(3));
if (!ConstMask)
return nullptr;
// If the mask is all zeros, this instruction does nothing.
- if (ConstMask->isNullValue())
+ if (maskIsAllZeroOrUndef(ConstMask))
return eraseInstFromFunction(II);
// If the mask is all ones, this is a plain vector store of the 1st argument.
- if (ConstMask->isAllOnesValue()) {
- Value *StorePtr = II.getArgOperand(1);
- Align Alignment = cast<ConstantInt>(II.getArgOperand(2))->getAlignValue();
+ if (maskIsAllOneOrUndef(ConstMask)) {
StoreInst *S =
new StoreInst(II.getArgOperand(0), StorePtr, false, Alignment);
S->copyMetadata(II);
@@ -389,7 +389,7 @@ Instruction *InstCombinerImpl::simplifyMaskedScatter(IntrinsicInst &II) {
return nullptr;
// If the mask is all zeros, a scatter does nothing.
- if (ConstMask->isNullValue())
+ if (maskIsAllZeroOrUndef(ConstMask))
return eraseInstFromFunction(II);
// Vector splat address -> scalar store
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 87000a1..3df448d 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -50,6 +50,9 @@
using namespace llvm;
using namespace PatternMatch;
+namespace llvm {
+extern cl::opt<bool> ProfcheckDisableMetadataFixes;
+}
/// Replace a select operand based on an equality comparison with the identity
/// constant of a binop.
@@ -4492,8 +4495,21 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) {
auto FoldSelectWithAndOrCond = [&](bool IsAnd, Value *A,
Value *B) -> Instruction * {
if (Value *V = simplifySelectInst(B, TrueVal, FalseVal,
- SQ.getWithInstruction(&SI)))
- return SelectInst::Create(A, IsAnd ? V : TrueVal, IsAnd ? FalseVal : V);
+ SQ.getWithInstruction(&SI))) {
+ Value *NewTrueVal = IsAnd ? V : TrueVal;
+ Value *NewFalseVal = IsAnd ? FalseVal : V;
+
+ // If the True and False values don't change, then preserve the branch
+ // metadata of the original select as the net effect of this change is to
+ // simplify the conditional.
+ Instruction *MDFrom = nullptr;
+ if (NewTrueVal == TrueVal && NewFalseVal == FalseVal &&
+ !ProfcheckDisableMetadataFixes) {
+ MDFrom = &SI;
+ }
+ return SelectInst::Create(A, NewTrueVal, NewFalseVal, "", nullptr,
+ MDFrom);
+ }
// Is (select B, T, F) a SPF?
if (CondVal->hasOneUse() && SelType->isIntOrIntVectorTy()) {
diff --git a/llvm/lib/Transforms/Scalar/NewGVN.cpp b/llvm/lib/Transforms/Scalar/NewGVN.cpp
index 9d4fb79..d6b7633 100644
--- a/llvm/lib/Transforms/Scalar/NewGVN.cpp
+++ b/llvm/lib/Transforms/Scalar/NewGVN.cpp
@@ -1646,10 +1646,6 @@ NewGVN::performSymbolicPredicateInfoEvaluation(BitCastInst *I) const {
// Evaluate read only and pure calls, and create an expression result.
NewGVN::ExprResult NewGVN::performSymbolicCallEvaluation(Instruction *I) const {
auto *CI = cast<CallInst>(I);
- if (auto *II = dyn_cast<IntrinsicInst>(I)) {
- if (auto *ReturnedValue = II->getReturnedArgOperand())
- return ExprResult::some(createVariableOrConstant(ReturnedValue));
- }
// FIXME: Currently the calls which may access the thread id may
// be considered as not accessing the memory. But this is
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 43d61f2..a88cffc 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -3298,10 +3298,11 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF,
UI->getOpcode(), ValTy, Alignment, AS, Ctx.CostKind, OpInfo);
Type *PtrTy = isSingleScalar() ? ScalarPtrTy : toVectorTy(ScalarPtrTy, VF);
-
+ bool UsedByLoadStoreAddress = isUsedByLoadStoreAddress(this);
InstructionCost ScalarCost =
ScalarMemOpCost + Ctx.TTI.getAddressComputationCost(
- PtrTy, &Ctx.SE, nullptr, Ctx.CostKind);
+ PtrTy, UsedByLoadStoreAddress ? nullptr : &Ctx.SE,
+ nullptr, Ctx.CostKind);
if (isSingleScalar())
return ScalarCost;
@@ -3312,7 +3313,7 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF,
// vectorized addressing or the loaded value is used as part of an address
// of another load or store.
bool PreferVectorizedAddressing = Ctx.TTI.prefersVectorizedAddressing();
- if (PreferVectorizedAddressing || !isUsedByLoadStoreAddress(this)) {
+ if (PreferVectorizedAddressing || !UsedByLoadStoreAddress) {
bool EfficientVectorLoadStore =
Ctx.TTI.supportsEfficientVectorElementLoadStore();
if (!(IsLoad && !PreferVectorizedAddressing) &&