aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Analysis/HashRecognize.cpp2
-rw-r--r--llvm/lib/Analysis/LoopAccessAnalysis.cpp32
-rw-r--r--llvm/lib/Analysis/ValueTracking.cpp26
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp3
-rw-r--r--llvm/lib/CodeGen/MachineRegisterInfo.cpp2
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp2
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp9
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/JITLink.cpp3
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp30
-rw-r--r--llvm/lib/IR/Assumptions.cpp12
-rw-r--r--llvm/lib/IR/DebugInfoMetadata.cpp3
-rw-r--r--llvm/lib/IR/DiagnosticHandler.cpp2
-rw-r--r--llvm/lib/IR/ModuleSummaryIndex.cpp2
-rw-r--r--llvm/lib/IR/PassInstrumentation.cpp7
-rw-r--r--llvm/lib/IR/ProfDataUtils.cpp99
-rw-r--r--llvm/lib/IR/SafepointIRVerifier.cpp2
-rw-r--r--llvm/lib/IR/VFABIDemangler.cpp4
-rw-r--r--llvm/lib/IR/Value.cpp2
-rw-r--r--llvm/lib/Object/BuildID.cpp26
-rw-r--r--llvm/lib/TableGen/Error.cpp58
-rw-r--r--llvm/lib/TableGen/Main.cpp4
-rw-r--r--llvm/lib/TableGen/Record.cpp6
-rw-r--r--llvm/lib/TableGen/TGParser.cpp6
-rw-r--r--llvm/lib/Target/AArch64/AArch64FrameLowering.cpp2
-rw-r--r--llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td30
-rw-r--r--llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp121
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPU.td2
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp20
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp5
-rw-r--r--llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp5
-rw-r--r--llvm/lib/Target/AMDGPU/SIFoldOperands.cpp3
-rw-r--r--llvm/lib/Target/AMDGPU/SIFrameLowering.cpp3
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp25
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.h5
-rw-r--r--llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp11
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.cpp25
-rw-r--r--llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp10
-rw-r--r--llvm/lib/Target/AMDGPU/SIRegisterInfo.td16
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.h10
-rw-r--r--llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp9
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp3
-rw-r--r--llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp2
-rw-r--r--llvm/lib/Target/RISCV/RISCVFrameLowering.cpp2
-rw-r--r--llvm/lib/Target/RISCV/RISCVGISel.td10
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelLowering.cpp2
-rw-r--r--llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp6
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfo.cpp4
-rw-r--r--llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp4
-rw-r--r--llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp16
-rw-r--r--llvm/lib/Target/Sparc/DelaySlotFiller.cpp4
-rw-r--r--llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp4
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp21
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp10
-rw-r--r--llvm/lib/Transforms/Scalar/GVN.cpp33
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp2
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp10
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanTransforms.h2
57 files changed, 455 insertions, 324 deletions
diff --git a/llvm/lib/Analysis/HashRecognize.cpp b/llvm/lib/Analysis/HashRecognize.cpp
index 5d7ee1f..4529123 100644
--- a/llvm/lib/Analysis/HashRecognize.cpp
+++ b/llvm/lib/Analysis/HashRecognize.cpp
@@ -97,7 +97,7 @@ static bool containsUnreachable(const Loop &L,
}
}
}
- return std::distance(Latch->begin(), Latch->end()) != Visited.size();
+ return Latch->size() != Visited.size();
}
/// A structure that can hold either a Simple Recurrence or a Conditional
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index 47dccde..7adb25d 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -233,19 +233,25 @@ static bool evaluatePtrAddRecAtMaxBTCWillNotWrap(
const SCEV *DerefBytesSCEV = SE.getConstant(WiderTy, DerefBytes);
// Check if we have a suitable dereferencable assumption we can use.
- if (!StartPtrV->canBeFreed()) {
- Instruction *CtxI = &*L->getHeader()->getFirstNonPHIIt();
- if (BasicBlock *LoopPred = L->getLoopPredecessor()) {
- if (isa<BranchInst>(LoopPred->getTerminator()))
- CtxI = LoopPred->getTerminator();
- }
-
- RetainedKnowledge DerefRK = getKnowledgeValidInContext(
- StartPtrV, {Attribute::Dereferenceable}, *AC, CtxI, DT);
- if (DerefRK) {
- DerefBytesSCEV =
- SE.getUMaxExpr(DerefBytesSCEV, SE.getSCEV(DerefRK.IRArgValue));
- }
+ Instruction *CtxI = &*L->getHeader()->getFirstNonPHIIt();
+ if (BasicBlock *LoopPred = L->getLoopPredecessor()) {
+ if (isa<BranchInst>(LoopPred->getTerminator()))
+ CtxI = LoopPred->getTerminator();
+ }
+ RetainedKnowledge DerefRK;
+ getKnowledgeForValue(StartPtrV, {Attribute::Dereferenceable}, *AC,
+ [&](RetainedKnowledge RK, Instruction *Assume, auto) {
+ if (!isValidAssumeForContext(Assume, CtxI, DT))
+ return false;
+ if (StartPtrV->canBeFreed() &&
+ !willNotFreeBetween(Assume, CtxI))
+ return false;
+ DerefRK = std::max(DerefRK, RK);
+ return true;
+ });
+ if (DerefRK) {
+ DerefBytesSCEV =
+ SE.getUMaxExpr(DerefBytesSCEV, SE.getSCEV(DerefRK.IRArgValue));
}
if (DerefBytesSCEV->isZero())
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 09a8fbe..1eda7a7 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -89,6 +89,9 @@ using namespace llvm::PatternMatch;
static cl::opt<unsigned> DomConditionsMaxUses("dom-conditions-max-uses",
cl::Hidden, cl::init(20));
+/// Maximum number of instructions to check between assume and context
+/// instruction.
+static constexpr unsigned MaxInstrsToCheckForFree = 16;
/// Returns the bitwidth of the given scalar or pointer type. For vector types,
/// returns the element type's bitwidth.
@@ -561,6 +564,29 @@ bool llvm::isValidAssumeForContext(const Instruction *Inv,
return false;
}
+bool llvm::willNotFreeBetween(const Instruction *Assume,
+ const Instruction *CtxI) {
+ if (CtxI->getParent() != Assume->getParent() || !Assume->comesBefore(CtxI))
+ return false;
+ // Make sure the current function cannot arrange for another thread to free on
+ // its behalf.
+ if (!CtxI->getFunction()->hasNoSync())
+ return false;
+
+ // Check if there are any calls between the assume and CtxI that may
+ // free memory.
+ for (const auto &[Idx, I] :
+ enumerate(make_range(Assume->getIterator(), CtxI->getIterator()))) {
+ // Limit number of instructions to walk.
+ if (Idx > MaxInstrsToCheckForFree)
+ return false;
+ if (const auto *CB = dyn_cast<CallBase>(&I))
+ if (!CB->hasFnAttr(Attribute::NoFree))
+ return false;
+ }
+ return true;
+}
+
// TODO: cmpExcludesZero misses many cases where `RHS` is non-constant but
// we still have enough information about `RHS` to conclude non-zero. For
// example Pred=EQ, RHS=isKnownNonZero. cmpExcludesZero is called in loops
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
index 1703b27..bc0bb34 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
@@ -618,12 +618,15 @@ bool DwarfExpression::addExpression(
case dwarf::DW_OP_dup:
case dwarf::DW_OP_push_object_address:
case dwarf::DW_OP_over:
+ case dwarf::DW_OP_rot:
case dwarf::DW_OP_eq:
case dwarf::DW_OP_ne:
case dwarf::DW_OP_gt:
case dwarf::DW_OP_ge:
case dwarf::DW_OP_lt:
case dwarf::DW_OP_le:
+ case dwarf::DW_OP_neg:
+ case dwarf::DW_OP_abs:
emitOp(OpNum);
break;
case dwarf::DW_OP_deref:
diff --git a/llvm/lib/CodeGen/MachineRegisterInfo.cpp b/llvm/lib/CodeGen/MachineRegisterInfo.cpp
index abb3f3e..ae284f3 100644
--- a/llvm/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/llvm/lib/CodeGen/MachineRegisterInfo.cpp
@@ -83,8 +83,6 @@ constrainRegClass(MachineRegisterInfo &MRI, Register Reg,
const TargetRegisterClass *MachineRegisterInfo::constrainRegClass(
Register Reg, const TargetRegisterClass *RC, unsigned MinNumRegs) {
- if (Reg.isPhysical())
- return nullptr;
return ::constrainRegClass(*this, Reg, getRegClass(Reg), RC, MinNumRegs);
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index 11bc64c..bb10cf6 100644
--- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -160,7 +160,7 @@ void InstrEmitter::EmitCopyFromReg(SDValue Op, bool IsClone, Register SrcReg,
// If all uses are reading from the src physical register and copying the
// register is either impossible or very expensive, then don't create a copy.
- if (MatchReg && SrcRC->getCopyCost() < 0) {
+ if (MatchReg && SrcRC->expensiveOrImpossibleToCopy()) {
VRBase = SrcReg;
} else {
// Create the reg, emit the copy.
diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index 31e7855..4f4fb9c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -111,15 +111,11 @@ SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) {
static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op,
const TargetRegisterInfo *TRI,
const TargetInstrInfo *TII,
- const TargetLowering &TLI,
MCRegister &PhysReg, int &Cost) {
if (Op != 2 || User->getOpcode() != ISD::CopyToReg)
return;
Register Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
- if (TLI.checkForPhysRegDependency(Def, User, Op, TRI, TII, PhysReg, Cost))
- return;
-
if (Reg.isVirtual())
return;
@@ -136,7 +132,7 @@ static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op,
if (PhysReg) {
const TargetRegisterClass *RC =
TRI->getMinimalPhysRegClass(Reg, Def->getSimpleValueType(ResNo));
- Cost = RC->getCopyCost();
+ Cost = RC->expensiveOrImpossibleToCopy() ? -1 : RC->getCopyCost();
}
}
@@ -490,8 +486,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
MCRegister PhysReg;
int Cost = 1;
// Determine if this is a physical register dependency.
- const TargetLowering &TLI = DAG->getTargetLoweringInfo();
- CheckForPhysRegDependency(OpN, N, i, TRI, TII, TLI, PhysReg, Cost);
+ CheckForPhysRegDependency(OpN, N, i, TRI, TII, PhysReg, Cost);
assert((!PhysReg || !isChain) && "Chain dependence via physreg data?");
// FIXME: See ScheduleDAGSDNodes::EmitCopyFromReg. For now, scheduler
// emits a copy from the physical register to a virtual register unless
diff --git a/llvm/lib/ExecutionEngine/JITLink/JITLink.cpp b/llvm/lib/ExecutionEngine/JITLink/JITLink.cpp
index 23b72da..6e316f1 100644
--- a/llvm/lib/ExecutionEngine/JITLink/JITLink.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/JITLink.cpp
@@ -280,6 +280,9 @@ std::vector<Block *> LinkGraph::splitBlockImpl(std::vector<Block *> Blocks,
void LinkGraph::dump(raw_ostream &OS) {
DenseMap<Block *, std::vector<Symbol *>> BlockSymbols;
+ OS << "LinkGraph \"" << getName()
+ << "\" (triple = " << getTargetTriple().str() << ")\n";
+
// Map from blocks to the symbols pointing at them.
for (auto *Sym : defined_symbols())
BlockSymbols[&Sym->getBlock()].push_back(Sym);
diff --git a/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp b/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp
index 584b9f0..17050b0 100644
--- a/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp
@@ -21,23 +21,21 @@ JITLinkerBase::~JITLinkerBase() = default;
void JITLinkerBase::linkPhase1(std::unique_ptr<JITLinkerBase> Self) {
- LLVM_DEBUG({
- dbgs() << "Starting link phase 1 for graph " << G->getName() << "\n";
- });
+ LLVM_DEBUG(dbgs() << "Starting link phase 1\n");
// Prune and optimize the graph.
if (auto Err = runPasses(Passes.PrePrunePasses))
return Ctx->notifyFailed(std::move(Err));
LLVM_DEBUG({
- dbgs() << "Link graph \"" << G->getName() << "\" pre-pruning:\n";
+ dbgs() << "Link graph pre-pruning:\n";
G->dump(dbgs());
});
prune(*G);
LLVM_DEBUG({
- dbgs() << "Link graph \"" << G->getName() << "\" post-pruning:\n";
+ dbgs() << "Link graph post-pruning:\n";
G->dump(dbgs());
});
@@ -67,14 +65,15 @@ void JITLinkerBase::linkPhase1(std::unique_ptr<JITLinkerBase> Self) {
void JITLinkerBase::linkPhase2(std::unique_ptr<JITLinkerBase> Self,
AllocResult AR) {
+ LLVM_DEBUG(dbgs() << "Starting link phase 2\n");
+
if (AR)
Alloc = std::move(*AR);
else
return Ctx->notifyFailed(AR.takeError());
LLVM_DEBUG({
- dbgs() << "Link graph \"" << G->getName()
- << "\" before post-allocation passes:\n";
+ dbgs() << "Link graph before post-allocation passes:\n";
G->dump(dbgs());
});
@@ -131,9 +130,7 @@ void JITLinkerBase::linkPhase2(std::unique_ptr<JITLinkerBase> Self,
void JITLinkerBase::linkPhase3(std::unique_ptr<JITLinkerBase> Self,
Expected<AsyncLookupResult> LR) {
- LLVM_DEBUG({
- dbgs() << "Starting link phase 3 for graph " << G->getName() << "\n";
- });
+ LLVM_DEBUG(dbgs() << "Starting link phase 3\n");
// If the lookup failed, bail out.
if (!LR)
@@ -143,8 +140,7 @@ void JITLinkerBase::linkPhase3(std::unique_ptr<JITLinkerBase> Self,
applyLookupResult(*LR);
LLVM_DEBUG({
- dbgs() << "Link graph \"" << G->getName()
- << "\" before pre-fixup passes:\n";
+ dbgs() << "Link graph before pre-fixup passes:\n";
G->dump(dbgs());
});
@@ -152,7 +148,7 @@ void JITLinkerBase::linkPhase3(std::unique_ptr<JITLinkerBase> Self,
return abandonAllocAndBailOut(std::move(Self), std::move(Err));
LLVM_DEBUG({
- dbgs() << "Link graph \"" << G->getName() << "\" before copy-and-fixup:\n";
+ dbgs() << "Link graph before copy-and-fixup:\n";
G->dump(dbgs());
});
@@ -161,7 +157,7 @@ void JITLinkerBase::linkPhase3(std::unique_ptr<JITLinkerBase> Self,
return abandonAllocAndBailOut(std::move(Self), std::move(Err));
LLVM_DEBUG({
- dbgs() << "Link graph \"" << G->getName() << "\" after copy-and-fixup:\n";
+ dbgs() << "Link graph after copy-and-fixup:\n";
G->dump(dbgs());
});
@@ -186,16 +182,14 @@ void JITLinkerBase::linkPhase3(std::unique_ptr<JITLinkerBase> Self,
void JITLinkerBase::linkPhase4(std::unique_ptr<JITLinkerBase> Self,
FinalizeResult FR) {
- LLVM_DEBUG({
- dbgs() << "Starting link phase 4 for graph " << G->getName() << "\n";
- });
+ LLVM_DEBUG(dbgs() << "Starting link phase 4\n");
if (!FR)
return Ctx->notifyFailed(FR.takeError());
Ctx->notifyFinalized(std::move(*FR));
- LLVM_DEBUG({ dbgs() << "Link of graph " << G->getName() << " complete\n"; });
+ LLVM_DEBUG({ dbgs() << "Link complete\n"; });
}
Error JITLinkerBase::runPasses(LinkGraphPassList &Passes) {
diff --git a/llvm/lib/IR/Assumptions.cpp b/llvm/lib/IR/Assumptions.cpp
index f8bbcb3..3397f0e 100644
--- a/llvm/lib/IR/Assumptions.cpp
+++ b/llvm/lib/IR/Assumptions.cpp
@@ -20,9 +20,8 @@
using namespace llvm;
-namespace {
-bool hasAssumption(const Attribute &A,
- const KnownAssumptionString &AssumptionStr) {
+static bool hasAssumption(const Attribute &A,
+ const KnownAssumptionString &AssumptionStr) {
if (!A.isValid())
return false;
assert(A.isStringAttribute() && "Expected a string attribute!");
@@ -33,7 +32,7 @@ bool hasAssumption(const Attribute &A,
return llvm::is_contained(Strings, AssumptionStr);
}
-DenseSet<StringRef> getAssumptions(const Attribute &A) {
+static DenseSet<StringRef> getAssumptions(const Attribute &A) {
if (!A.isValid())
return DenseSet<StringRef>();
assert(A.isStringAttribute() && "Expected a string attribute!");
@@ -47,8 +46,8 @@ DenseSet<StringRef> getAssumptions(const Attribute &A) {
}
template <typename AttrSite>
-bool addAssumptionsImpl(AttrSite &Site,
- const DenseSet<StringRef> &Assumptions) {
+static bool addAssumptionsImpl(AttrSite &Site,
+ const DenseSet<StringRef> &Assumptions) {
if (Assumptions.empty())
return false;
@@ -64,7 +63,6 @@ bool addAssumptionsImpl(AttrSite &Site,
return true;
}
-} // namespace
bool llvm::hasAssumption(const Function &F,
const KnownAssumptionString &AssumptionStr) {
diff --git a/llvm/lib/IR/DebugInfoMetadata.cpp b/llvm/lib/IR/DebugInfoMetadata.cpp
index 1ededb9e7..77d044b 100644
--- a/llvm/lib/IR/DebugInfoMetadata.cpp
+++ b/llvm/lib/IR/DebugInfoMetadata.cpp
@@ -1768,6 +1768,7 @@ bool DIExpression::isValid() const {
case dwarf::DW_OP_bregx:
case dwarf::DW_OP_push_object_address:
case dwarf::DW_OP_over:
+ case dwarf::DW_OP_rot:
case dwarf::DW_OP_consts:
case dwarf::DW_OP_eq:
case dwarf::DW_OP_ne:
@@ -1775,6 +1776,8 @@ bool DIExpression::isValid() const {
case dwarf::DW_OP_ge:
case dwarf::DW_OP_lt:
case dwarf::DW_OP_le:
+ case dwarf::DW_OP_neg:
+ case dwarf::DW_OP_abs:
break;
}
}
diff --git a/llvm/lib/IR/DiagnosticHandler.cpp b/llvm/lib/IR/DiagnosticHandler.cpp
index 683eade..eb2fe3b 100644
--- a/llvm/lib/IR/DiagnosticHandler.cpp
+++ b/llvm/lib/IR/DiagnosticHandler.cpp
@@ -36,6 +36,7 @@ struct PassRemarksOpt {
}
}
};
+} // namespace
static PassRemarksOpt PassRemarksPassedOptLoc;
static PassRemarksOpt PassRemarksMissedOptLoc;
@@ -66,7 +67,6 @@ static cl::opt<PassRemarksOpt, true, cl::parser<std::string>>
"Enable optimization analysis remarks from passes whose name match "
"the given regular expression"),
cl::Hidden, cl::location(PassRemarksAnalysisOptLoc), cl::ValueRequired);
-}
bool DiagnosticHandler::isAnalysisRemarkEnabled(StringRef PassName) const {
return (PassRemarksAnalysisOptLoc.Pattern &&
diff --git a/llvm/lib/IR/ModuleSummaryIndex.cpp b/llvm/lib/IR/ModuleSummaryIndex.cpp
index d9024b0..dc55b63 100644
--- a/llvm/lib/IR/ModuleSummaryIndex.cpp
+++ b/llvm/lib/IR/ModuleSummaryIndex.cpp
@@ -409,7 +409,7 @@ struct Edge {
GlobalValue::GUID Src;
GlobalValue::GUID Dst;
};
-}
+} // namespace
void Attributes::add(const Twine &Name, const Twine &Value,
const Twine &Comment) {
diff --git a/llvm/lib/IR/PassInstrumentation.cpp b/llvm/lib/IR/PassInstrumentation.cpp
index 70bbe8f..52aad8f 100644
--- a/llvm/lib/IR/PassInstrumentation.cpp
+++ b/llvm/lib/IR/PassInstrumentation.cpp
@@ -15,7 +15,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/IR/PassManager.h"
-namespace llvm {
+using namespace llvm;
template struct LLVM_EXPORT_TEMPLATE Any::TypeId<const Module *>;
template struct LLVM_EXPORT_TEMPLATE Any::TypeId<const Function *>;
@@ -42,7 +42,8 @@ PassInstrumentationCallbacks::getPassNameForClassName(StringRef ClassName) {
AnalysisKey PassInstrumentationAnalysis::Key;
-bool isSpecialPass(StringRef PassID, const std::vector<StringRef> &Specials) {
+bool llvm::isSpecialPass(StringRef PassID,
+ const std::vector<StringRef> &Specials) {
size_t Pos = PassID.find('<');
StringRef Prefix = PassID;
if (Pos != StringRef::npos)
@@ -50,5 +51,3 @@ bool isSpecialPass(StringRef PassID, const std::vector<StringRef> &Specials) {
return any_of(Specials,
[Prefix](StringRef S) { return Prefix.ends_with(S); });
}
-
-} // namespace llvm
diff --git a/llvm/lib/IR/ProfDataUtils.cpp b/llvm/lib/IR/ProfDataUtils.cpp
index edeca97..fc2be51 100644
--- a/llvm/lib/IR/ProfDataUtils.cpp
+++ b/llvm/lib/IR/ProfDataUtils.cpp
@@ -24,8 +24,6 @@
using namespace llvm;
-namespace {
-
// MD_prof nodes have the following layout
//
// In general:
@@ -41,14 +39,15 @@ namespace {
// correctly, and can change the behavior in the future if the layout changes
// the minimum number of operands for MD_prof nodes with branch weights
-constexpr unsigned MinBWOps = 3;
+static constexpr unsigned MinBWOps = 3;
// the minimum number of operands for MD_prof nodes with value profiles
-constexpr unsigned MinVPOps = 5;
+static constexpr unsigned MinVPOps = 5;
// We may want to add support for other MD_prof types, so provide an abstraction
// for checking the metadata type.
-bool isTargetMD(const MDNode *ProfData, const char *Name, unsigned MinOps) {
+static bool isTargetMD(const MDNode *ProfData, const char *Name,
+ unsigned MinOps) {
// TODO: This routine may be simplified if MD_prof used an enum instead of a
// string to differentiate the types of MD_prof nodes.
if (!ProfData || !Name || MinOps < 2)
@@ -101,14 +100,11 @@ static SmallVector<uint32_t> fitWeights(ArrayRef<uint64_t> Weights) {
return Ret;
}
-} // namespace
-
-namespace llvm {
-cl::opt<bool> ElideAllZeroBranchWeights("elide-all-zero-branch-weights",
+static cl::opt<bool> ElideAllZeroBranchWeights("elide-all-zero-branch-weights",
#if defined(LLVM_ENABLE_PROFCHECK)
- cl::init(false)
+ cl::init(false)
#else
- cl::init(true)
+ cl::init(true)
#endif
);
const char *MDProfLabels::BranchWeights = "branch_weights";
@@ -118,21 +114,21 @@ const char *MDProfLabels::FunctionEntryCount = "function_entry_count";
const char *MDProfLabels::SyntheticFunctionEntryCount =
"synthetic_function_entry_count";
const char *MDProfLabels::UnknownBranchWeightsMarker = "unknown";
-const char *LLVMLoopEstimatedTripCount = "llvm.loop.estimated_trip_count";
+const char *llvm::LLVMLoopEstimatedTripCount = "llvm.loop.estimated_trip_count";
-bool hasProfMD(const Instruction &I) {
+bool llvm::hasProfMD(const Instruction &I) {
return I.hasMetadata(LLVMContext::MD_prof);
}
-bool isBranchWeightMD(const MDNode *ProfileData) {
+bool llvm::isBranchWeightMD(const MDNode *ProfileData) {
return isTargetMD(ProfileData, MDProfLabels::BranchWeights, MinBWOps);
}
-bool isValueProfileMD(const MDNode *ProfileData) {
+bool llvm::isValueProfileMD(const MDNode *ProfileData) {
return isTargetMD(ProfileData, MDProfLabels::ValueProfile, MinVPOps);
}
-bool hasBranchWeightMD(const Instruction &I) {
+bool llvm::hasBranchWeightMD(const Instruction &I) {
auto *ProfileData = I.getMetadata(LLVMContext::MD_prof);
return isBranchWeightMD(ProfileData);
}
@@ -147,16 +143,16 @@ static bool hasCountTypeMD(const Instruction &I) {
return isa<CallBase>(I) && !isBranchWeightMD(ProfileData);
}
-bool hasValidBranchWeightMD(const Instruction &I) {
+bool llvm::hasValidBranchWeightMD(const Instruction &I) {
return getValidBranchWeightMDNode(I);
}
-bool hasBranchWeightOrigin(const Instruction &I) {
+bool llvm::hasBranchWeightOrigin(const Instruction &I) {
auto *ProfileData = I.getMetadata(LLVMContext::MD_prof);
return hasBranchWeightOrigin(ProfileData);
}
-bool hasBranchWeightOrigin(const MDNode *ProfileData) {
+bool llvm::hasBranchWeightOrigin(const MDNode *ProfileData) {
if (!isBranchWeightMD(ProfileData))
return false;
auto *ProfDataName = dyn_cast<MDString>(ProfileData->getOperand(1));
@@ -168,54 +164,54 @@ bool hasBranchWeightOrigin(const MDNode *ProfileData) {
return ProfDataName != nullptr;
}
-unsigned getBranchWeightOffset(const MDNode *ProfileData) {
+unsigned llvm::getBranchWeightOffset(const MDNode *ProfileData) {
return hasBranchWeightOrigin(ProfileData) ? 2 : 1;
}
-unsigned getNumBranchWeights(const MDNode &ProfileData) {
+unsigned llvm::getNumBranchWeights(const MDNode &ProfileData) {
return ProfileData.getNumOperands() - getBranchWeightOffset(&ProfileData);
}
-MDNode *getBranchWeightMDNode(const Instruction &I) {
+MDNode *llvm::getBranchWeightMDNode(const Instruction &I) {
auto *ProfileData = I.getMetadata(LLVMContext::MD_prof);
if (!isBranchWeightMD(ProfileData))
return nullptr;
return ProfileData;
}
-MDNode *getValidBranchWeightMDNode(const Instruction &I) {
+MDNode *llvm::getValidBranchWeightMDNode(const Instruction &I) {
auto *ProfileData = getBranchWeightMDNode(I);
if (ProfileData && getNumBranchWeights(*ProfileData) == I.getNumSuccessors())
return ProfileData;
return nullptr;
}
-void extractFromBranchWeightMD32(const MDNode *ProfileData,
- SmallVectorImpl<uint32_t> &Weights) {
+void llvm::extractFromBranchWeightMD32(const MDNode *ProfileData,
+ SmallVectorImpl<uint32_t> &Weights) {
extractFromBranchWeightMD(ProfileData, Weights);
}
-void extractFromBranchWeightMD64(const MDNode *ProfileData,
- SmallVectorImpl<uint64_t> &Weights) {
+void llvm::extractFromBranchWeightMD64(const MDNode *ProfileData,
+ SmallVectorImpl<uint64_t> &Weights) {
extractFromBranchWeightMD(ProfileData, Weights);
}
-bool extractBranchWeights(const MDNode *ProfileData,
- SmallVectorImpl<uint32_t> &Weights) {
+bool llvm::extractBranchWeights(const MDNode *ProfileData,
+ SmallVectorImpl<uint32_t> &Weights) {
if (!isBranchWeightMD(ProfileData))
return false;
extractFromBranchWeightMD(ProfileData, Weights);
return true;
}
-bool extractBranchWeights(const Instruction &I,
- SmallVectorImpl<uint32_t> &Weights) {
+bool llvm::extractBranchWeights(const Instruction &I,
+ SmallVectorImpl<uint32_t> &Weights) {
auto *ProfileData = I.getMetadata(LLVMContext::MD_prof);
return extractBranchWeights(ProfileData, Weights);
}
-bool extractBranchWeights(const Instruction &I, uint64_t &TrueVal,
- uint64_t &FalseVal) {
+bool llvm::extractBranchWeights(const Instruction &I, uint64_t &TrueVal,
+ uint64_t &FalseVal) {
assert((I.getOpcode() == Instruction::Br ||
I.getOpcode() == Instruction::Select) &&
"Looking for branch weights on something besides branch, select, or "
@@ -234,7 +230,8 @@ bool extractBranchWeights(const Instruction &I, uint64_t &TrueVal,
return true;
}
-bool extractProfTotalWeight(const MDNode *ProfileData, uint64_t &TotalVal) {
+bool llvm::extractProfTotalWeight(const MDNode *ProfileData,
+ uint64_t &TotalVal) {
TotalVal = 0;
if (!ProfileData)
return false;
@@ -262,11 +259,12 @@ bool extractProfTotalWeight(const MDNode *ProfileData, uint64_t &TotalVal) {
return false;
}
-bool extractProfTotalWeight(const Instruction &I, uint64_t &TotalVal) {
+bool llvm::extractProfTotalWeight(const Instruction &I, uint64_t &TotalVal) {
return extractProfTotalWeight(I.getMetadata(LLVMContext::MD_prof), TotalVal);
}
-void setExplicitlyUnknownBranchWeights(Instruction &I, StringRef PassName) {
+void llvm::setExplicitlyUnknownBranchWeights(Instruction &I,
+ StringRef PassName) {
MDBuilder MDB(I.getContext());
I.setMetadata(
LLVMContext::MD_prof,
@@ -275,14 +273,16 @@ void setExplicitlyUnknownBranchWeights(Instruction &I, StringRef PassName) {
MDB.createString(PassName)}));
}
-void setExplicitlyUnknownBranchWeightsIfProfiled(Instruction &I, Function &F,
- StringRef PassName) {
+void llvm::setExplicitlyUnknownBranchWeightsIfProfiled(Instruction &I,
+ Function &F,
+ StringRef PassName) {
if (std::optional<Function::ProfileCount> EC = F.getEntryCount();
EC && EC->getCount() > 0)
setExplicitlyUnknownBranchWeights(I, PassName);
}
-void setExplicitlyUnknownFunctionEntryCount(Function &F, StringRef PassName) {
+void llvm::setExplicitlyUnknownFunctionEntryCount(Function &F,
+ StringRef PassName) {
MDBuilder MDB(F.getContext());
F.setMetadata(
LLVMContext::MD_prof,
@@ -291,21 +291,21 @@ void setExplicitlyUnknownFunctionEntryCount(Function &F, StringRef PassName) {
MDB.createString(PassName)}));
}
-bool isExplicitlyUnknownProfileMetadata(const MDNode &MD) {
+bool llvm::isExplicitlyUnknownProfileMetadata(const MDNode &MD) {
if (MD.getNumOperands() != 2)
return false;
return MD.getOperand(0).equalsStr(MDProfLabels::UnknownBranchWeightsMarker);
}
-bool hasExplicitlyUnknownBranchWeights(const Instruction &I) {
+bool llvm::hasExplicitlyUnknownBranchWeights(const Instruction &I) {
auto *MD = I.getMetadata(LLVMContext::MD_prof);
if (!MD)
return false;
return isExplicitlyUnknownProfileMetadata(*MD);
}
-void setBranchWeights(Instruction &I, ArrayRef<uint32_t> Weights,
- bool IsExpected, bool ElideAllZero) {
+void llvm::setBranchWeights(Instruction &I, ArrayRef<uint32_t> Weights,
+ bool IsExpected, bool ElideAllZero) {
if ((ElideAllZeroBranchWeights && ElideAllZero) &&
llvm::all_of(Weights, [](uint32_t V) { return V == 0; })) {
I.setMetadata(LLVMContext::MD_prof, nullptr);
@@ -317,13 +317,14 @@ void setBranchWeights(Instruction &I, ArrayRef<uint32_t> Weights,
I.setMetadata(LLVMContext::MD_prof, BranchWeights);
}
-void setFittedBranchWeights(Instruction &I, ArrayRef<uint64_t> Weights,
- bool IsExpected, bool ElideAllZero) {
+void llvm::setFittedBranchWeights(Instruction &I, ArrayRef<uint64_t> Weights,
+ bool IsExpected, bool ElideAllZero) {
setBranchWeights(I, fitWeights(Weights), IsExpected, ElideAllZero);
}
-SmallVector<uint32_t> downscaleWeights(ArrayRef<uint64_t> Weights,
- std::optional<uint64_t> KnownMaxCount) {
+SmallVector<uint32_t>
+llvm::downscaleWeights(ArrayRef<uint64_t> Weights,
+ std::optional<uint64_t> KnownMaxCount) {
uint64_t MaxCount = KnownMaxCount.has_value() ? KnownMaxCount.value()
: *llvm::max_element(Weights);
assert(MaxCount > 0 && "Bad max count");
@@ -334,7 +335,7 @@ SmallVector<uint32_t> downscaleWeights(ArrayRef<uint64_t> Weights,
return DownscaledWeights;
}
-void scaleProfData(Instruction &I, uint64_t S, uint64_t T) {
+void llvm::scaleProfData(Instruction &I, uint64_t S, uint64_t T) {
assert(T != 0 && "Caller should guarantee");
auto *ProfileData = I.getMetadata(LLVMContext::MD_prof);
if (ProfileData == nullptr)
@@ -387,5 +388,3 @@ void scaleProfData(Instruction &I, uint64_t S, uint64_t T) {
}
I.setMetadata(LLVMContext::MD_prof, MDNode::get(C, Vals));
}
-
-} // namespace llvm
diff --git a/llvm/lib/IR/SafepointIRVerifier.cpp b/llvm/lib/IR/SafepointIRVerifier.cpp
index e54894c..e35b5b3 100644
--- a/llvm/lib/IR/SafepointIRVerifier.cpp
+++ b/llvm/lib/IR/SafepointIRVerifier.cpp
@@ -196,7 +196,6 @@ protected:
static void Verify(const Function &F, const DominatorTree &DT,
const CFGDeadness &CD);
-namespace llvm {
PreservedAnalyses SafepointIRVerifierPass::run(Function &F,
FunctionAnalysisManager &AM) {
const auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
@@ -205,7 +204,6 @@ PreservedAnalyses SafepointIRVerifierPass::run(Function &F,
Verify(F, DT, CD);
return PreservedAnalyses::all();
}
-} // namespace llvm
namespace {
diff --git a/llvm/lib/IR/VFABIDemangler.cpp b/llvm/lib/IR/VFABIDemangler.cpp
index 2de05a5..4fcf436 100644
--- a/llvm/lib/IR/VFABIDemangler.cpp
+++ b/llvm/lib/IR/VFABIDemangler.cpp
@@ -20,15 +20,16 @@ using namespace llvm;
#define DEBUG_TYPE "vfabi-demangler"
-namespace {
/// Utilities for the Vector Function ABI name parser.
+namespace {
/// Return types for the parser functions.
enum class ParseRet {
OK, // Found.
None, // Not found.
Error // Syntax error.
};
+} // namespace
/// Extracts the `<isa>` information from the mangled string, and
/// sets the `ISA` accordingly. If successful, the <isa> token is removed
@@ -372,7 +373,6 @@ getScalableECFromSignature(const FunctionType *Signature, const VFISAKind ISA,
return std::nullopt;
}
-} // namespace
// Format of the ABI name:
// _ZGV<isa><mask><vlen><parameters>_<scalarname>[(<redirection>)]
diff --git a/llvm/lib/IR/Value.cpp b/llvm/lib/IR/Value.cpp
index a347609..b775cbb 100644
--- a/llvm/lib/IR/Value.cpp
+++ b/llvm/lib/IR/Value.cpp
@@ -622,6 +622,7 @@ enum PointerStripKind {
PSK_InBoundsConstantIndices,
PSK_InBounds
};
+} // end anonymous namespace
template <PointerStripKind StripKind> static void NoopCallback(const Value *) {}
@@ -696,7 +697,6 @@ static const Value *stripPointerCastsAndOffsets(
return V;
}
-} // end anonymous namespace
const Value *Value::stripPointerCasts() const {
return stripPointerCastsAndOffsets<PSK_ZeroIndices>(this);
diff --git a/llvm/lib/Object/BuildID.cpp b/llvm/lib/Object/BuildID.cpp
index 89d6bc3..d1ee597 100644
--- a/llvm/lib/Object/BuildID.cpp
+++ b/llvm/lib/Object/BuildID.cpp
@@ -24,6 +24,24 @@ using namespace llvm::object;
namespace {
template <typename ELFT> BuildIDRef getBuildID(const ELFFile<ELFT> &Obj) {
+ auto findBuildID = [&Obj](const auto &ShdrOrPhdr,
+ uint64_t Alignment) -> std::optional<BuildIDRef> {
+ Error Err = Error::success();
+ for (auto N : Obj.notes(ShdrOrPhdr, Err))
+ if (N.getType() == ELF::NT_GNU_BUILD_ID &&
+ N.getName() == ELF::ELF_NOTE_GNU)
+ return N.getDesc(Alignment);
+ consumeError(std::move(Err));
+ return std::nullopt;
+ };
+
+ auto Sections = cantFail(Obj.sections());
+ for (const auto &S : Sections) {
+ if (S.sh_type != ELF::SHT_NOTE)
+ continue;
+ if (std::optional<BuildIDRef> ShdrRes = findBuildID(S, S.sh_addralign))
+ return ShdrRes.value();
+ }
auto PhdrsOrErr = Obj.program_headers();
if (!PhdrsOrErr) {
consumeError(PhdrsOrErr.takeError());
@@ -32,12 +50,8 @@ template <typename ELFT> BuildIDRef getBuildID(const ELFFile<ELFT> &Obj) {
for (const auto &P : *PhdrsOrErr) {
if (P.p_type != ELF::PT_NOTE)
continue;
- Error Err = Error::success();
- for (auto N : Obj.notes(P, Err))
- if (N.getType() == ELF::NT_GNU_BUILD_ID &&
- N.getName() == ELF::ELF_NOTE_GNU)
- return N.getDesc(P.p_align);
- consumeError(std::move(Err));
+ if (std::optional<BuildIDRef> PhdrRes = findBuildID(P, P.p_align))
+ return PhdrRes.value();
}
return {};
}
diff --git a/llvm/lib/TableGen/Error.cpp b/llvm/lib/TableGen/Error.cpp
index de0c4c9..3ba2c6c 100644
--- a/llvm/lib/TableGen/Error.cpp
+++ b/llvm/lib/TableGen/Error.cpp
@@ -19,10 +19,10 @@
#include "llvm/TableGen/Record.h"
#include <cstdlib>
-namespace llvm {
+using namespace llvm;
-SourceMgr SrcMgr;
-unsigned ErrorsPrinted = 0;
+SourceMgr llvm::SrcMgr;
+unsigned llvm::ErrorsPrinted = 0;
static void PrintMessage(ArrayRef<SMLoc> Locs, SourceMgr::DiagKind Kind,
const Twine &Msg) {
@@ -49,118 +49,118 @@ static void PrintMessage(ArrayRef<SMLoc> Locs, SourceMgr::DiagKind Kind,
// Functions to print notes.
-void PrintNote(const Twine &Msg) {
- WithColor::note() << Msg << "\n";
-}
+void llvm::PrintNote(const Twine &Msg) { WithColor::note() << Msg << "\n"; }
-void PrintNote(function_ref<void(raw_ostream &OS)> PrintMsg) {
+void llvm::PrintNote(function_ref<void(raw_ostream &OS)> PrintMsg) {
PrintMsg(WithColor::note());
}
-void PrintNote(ArrayRef<SMLoc> NoteLoc, const Twine &Msg) {
+void llvm::PrintNote(ArrayRef<SMLoc> NoteLoc, const Twine &Msg) {
PrintMessage(NoteLoc, SourceMgr::DK_Note, Msg);
}
// Functions to print fatal notes.
-void PrintFatalNote(const Twine &Msg) {
+void llvm::PrintFatalNote(const Twine &Msg) {
PrintNote(Msg);
fatal_exit();
}
-void PrintFatalNote(ArrayRef<SMLoc> NoteLoc, const Twine &Msg) {
+void llvm::PrintFatalNote(ArrayRef<SMLoc> NoteLoc, const Twine &Msg) {
PrintNote(NoteLoc, Msg);
fatal_exit();
}
// This method takes a Record and uses the source location
// stored in it.
-void PrintFatalNote(const Record *Rec, const Twine &Msg) {
+void llvm::PrintFatalNote(const Record *Rec, const Twine &Msg) {
PrintNote(Rec->getLoc(), Msg);
fatal_exit();
}
// This method takes a RecordVal and uses the source location
// stored in it.
-void PrintFatalNote(const RecordVal *RecVal, const Twine &Msg) {
+void llvm::PrintFatalNote(const RecordVal *RecVal, const Twine &Msg) {
PrintNote(RecVal->getLoc(), Msg);
fatal_exit();
}
// Functions to print warnings.
-void PrintWarning(const Twine &Msg) { WithColor::warning() << Msg << "\n"; }
+void llvm::PrintWarning(const Twine &Msg) {
+ WithColor::warning() << Msg << "\n";
+}
-void PrintWarning(ArrayRef<SMLoc> WarningLoc, const Twine &Msg) {
+void llvm::PrintWarning(ArrayRef<SMLoc> WarningLoc, const Twine &Msg) {
PrintMessage(WarningLoc, SourceMgr::DK_Warning, Msg);
}
-void PrintWarning(const char *Loc, const Twine &Msg) {
+void llvm::PrintWarning(const char *Loc, const Twine &Msg) {
SrcMgr.PrintMessage(SMLoc::getFromPointer(Loc), SourceMgr::DK_Warning, Msg);
}
// Functions to print errors.
-void PrintError(const Twine &Msg) { WithColor::error() << Msg << "\n"; }
+void llvm::PrintError(const Twine &Msg) { WithColor::error() << Msg << "\n"; }
-void PrintError(function_ref<void(raw_ostream &OS)> PrintMsg) {
+void llvm::PrintError(function_ref<void(raw_ostream &OS)> PrintMsg) {
PrintMsg(WithColor::error());
}
-void PrintError(ArrayRef<SMLoc> ErrorLoc, const Twine &Msg) {
+void llvm::PrintError(ArrayRef<SMLoc> ErrorLoc, const Twine &Msg) {
PrintMessage(ErrorLoc, SourceMgr::DK_Error, Msg);
}
-void PrintError(const char *Loc, const Twine &Msg) {
+void llvm::PrintError(const char *Loc, const Twine &Msg) {
SrcMgr.PrintMessage(SMLoc::getFromPointer(Loc), SourceMgr::DK_Error, Msg);
}
// This method takes a Record and uses the source location
// stored in it.
-void PrintError(const Record *Rec, const Twine &Msg) {
+void llvm::PrintError(const Record *Rec, const Twine &Msg) {
PrintMessage(Rec->getLoc(), SourceMgr::DK_Error, Msg);
}
// This method takes a RecordVal and uses the source location
// stored in it.
-void PrintError(const RecordVal *RecVal, const Twine &Msg) {
+void llvm::PrintError(const RecordVal *RecVal, const Twine &Msg) {
PrintMessage(RecVal->getLoc(), SourceMgr::DK_Error, Msg);
}
// Functions to print fatal errors.
-void PrintFatalError(const Twine &Msg) {
+void llvm::PrintFatalError(const Twine &Msg) {
PrintError(Msg);
fatal_exit();
}
-void PrintFatalError(function_ref<void(raw_ostream &OS)> PrintMsg) {
+void llvm::PrintFatalError(function_ref<void(raw_ostream &OS)> PrintMsg) {
PrintError(PrintMsg);
fatal_exit();
}
-void PrintFatalError(ArrayRef<SMLoc> ErrorLoc, const Twine &Msg) {
+void llvm::PrintFatalError(ArrayRef<SMLoc> ErrorLoc, const Twine &Msg) {
PrintError(ErrorLoc, Msg);
fatal_exit();
}
// This method takes a Record and uses the source location
// stored in it.
-void PrintFatalError(const Record *Rec, const Twine &Msg) {
+void llvm::PrintFatalError(const Record *Rec, const Twine &Msg) {
PrintError(Rec->getLoc(), Msg);
fatal_exit();
}
// This method takes a RecordVal and uses the source location
// stored in it.
-void PrintFatalError(const RecordVal *RecVal, const Twine &Msg) {
+void llvm::PrintFatalError(const RecordVal *RecVal, const Twine &Msg) {
PrintError(RecVal->getLoc(), Msg);
fatal_exit();
}
// Check an assertion: Obtain the condition value and be sure it is true.
// If not, print a nonfatal error along with the message.
-bool CheckAssert(SMLoc Loc, const Init *Condition, const Init *Message) {
+bool llvm::CheckAssert(SMLoc Loc, const Init *Condition, const Init *Message) {
auto *CondValue = dyn_cast_or_null<IntInit>(Condition->convertInitializerTo(
IntRecTy::get(Condition->getRecordKeeper())));
if (!CondValue) {
@@ -178,11 +178,9 @@ bool CheckAssert(SMLoc Loc, const Init *Condition, const Init *Message) {
}
// Dump a message to stderr.
-void dumpMessage(SMLoc Loc, const Init *Message) {
+void llvm::dumpMessage(SMLoc Loc, const Init *Message) {
if (auto *MessageInit = dyn_cast<StringInit>(Message))
PrintNote(Loc, MessageInit->getValue());
else
PrintError(Loc, "dump value is not of type string");
}
-
-} // end namespace llvm
diff --git a/llvm/lib/TableGen/Main.cpp b/llvm/lib/TableGen/Main.cpp
index f545706..42043f7 100644
--- a/llvm/lib/TableGen/Main.cpp
+++ b/llvm/lib/TableGen/Main.cpp
@@ -64,14 +64,12 @@ WriteIfChanged("write-if-changed", cl::desc("Only write output if it changed"));
static cl::opt<bool>
TimePhases("time-phases", cl::desc("Time phases of parser and backend"));
-namespace llvm {
-cl::opt<bool> EmitLongStrLiterals(
+cl::opt<bool> llvm::EmitLongStrLiterals(
"long-string-literals",
cl::desc("when emitting large string tables, prefer string literals over "
"comma-separated char literals. This can be a readability and "
"compile-time performance win, but upsets some compilers"),
cl::Hidden, cl::init(true));
-} // end namespace llvm
static cl::opt<bool> NoWarnOnUnusedTemplateArgs(
"no-warn-on-unused-template-args",
diff --git a/llvm/lib/TableGen/Record.cpp b/llvm/lib/TableGen/Record.cpp
index 051a896..2ea3a24 100644
--- a/llvm/lib/TableGen/Record.cpp
+++ b/llvm/lib/TableGen/Record.cpp
@@ -46,8 +46,7 @@ using namespace llvm;
// Context
//===----------------------------------------------------------------------===//
-namespace llvm {
-namespace detail {
+namespace llvm::detail {
/// This class represents the internal implementation of the RecordKeeper.
/// It contains all of the contextual static state of the Record classes. It is
/// kept out-of-line to simplify dependencies, and also make it easier for
@@ -100,8 +99,7 @@ struct RecordKeeperImpl {
void dumpAllocationStats(raw_ostream &OS) const;
};
-} // namespace detail
-} // namespace llvm
+} // namespace llvm::detail
void detail::RecordKeeperImpl::dumpAllocationStats(raw_ostream &OS) const {
// Dump memory allocation related stats.
diff --git a/llvm/lib/TableGen/TGParser.cpp b/llvm/lib/TableGen/TGParser.cpp
index f928ded..3d31d8e 100644
--- a/llvm/lib/TableGen/TGParser.cpp
+++ b/llvm/lib/TableGen/TGParser.cpp
@@ -31,8 +31,6 @@ using namespace llvm;
// Support Code for the Semantic Actions.
//===----------------------------------------------------------------------===//
-namespace llvm {
-
RecordsEntry::RecordsEntry(std::unique_ptr<Record> Rec) : Rec(std::move(Rec)) {}
RecordsEntry::RecordsEntry(std::unique_ptr<ForeachLoop> Loop)
: Loop(std::move(Loop)) {}
@@ -41,6 +39,7 @@ RecordsEntry::RecordsEntry(std::unique_ptr<Record::AssertionInfo> Assertion)
RecordsEntry::RecordsEntry(std::unique_ptr<Record::DumpInfo> Dump)
: Dump(std::move(Dump)) {}
+namespace llvm {
struct SubClassReference {
SMRange RefRange;
const Record *Rec = nullptr;
@@ -61,6 +60,7 @@ struct SubMultiClassReference {
bool isInvalid() const { return MC == nullptr; }
void dump() const;
};
+} // end namespace llvm
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void SubMultiClassReference::dump() const {
@@ -74,8 +74,6 @@ LLVM_DUMP_METHOD void SubMultiClassReference::dump() const {
}
#endif
-} // end namespace llvm
-
static bool checkBitsConcrete(Record &R, const RecordVal &RV) {
const auto *BV = cast<BitsInit>(RV.getValue());
for (unsigned i = 0, e = BV->getNumBits(); i != e; ++i) {
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index 8d6eb91..4357264d 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -282,7 +282,7 @@ static cl::opt<bool> OrderFrameObjects("aarch64-order-frame-objects",
static cl::opt<bool>
SplitSVEObjects("aarch64-split-sve-objects",
cl::desc("Split allocation of ZPR & PPR objects"),
- cl::init(false), cl::Hidden);
+ cl::init(true), cl::Hidden);
cl::opt<bool> EnableHomogeneousPrologEpilog(
"homogeneous-prolog-epilog", cl::Hidden,
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 36c9cb6..bc6b931 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -1010,6 +1010,36 @@ let Predicates = [HasSVE_or_SME] in {
defm SEL_ZPZZ : sve_int_sel_vvv<"sel", vselect>;
defm SPLICE_ZPZ : sve_int_perm_splice<"splice", AArch64splice>;
+
+ // mul x (splat -1) -> neg x
+ def : Pat<(nxv16i8 (AArch64mul_m1 nxv16i1:$Op1, nxv16i8:$Op2, (nxv16i8 (splat_vector (i32 -1))))),
+ (NEG_ZPmZ_B $Op2, $Op1, $Op2)>;
+ def : Pat<(nxv8i16 (AArch64mul_m1 nxv8i1:$Op1, nxv8i16:$Op2, (nxv8i16 (splat_vector (i32 -1))))),
+ (NEG_ZPmZ_H $Op2, $Op1, $Op2)>;
+ def : Pat<(nxv4i32 (AArch64mul_m1 nxv4i1:$Op1, nxv4i32:$Op2, (nxv4i32 (splat_vector (i32 -1))))),
+ (NEG_ZPmZ_S $Op2, $Op1, $Op2)>;
+ def : Pat<(nxv2i64 (AArch64mul_m1 nxv2i1:$Op1, nxv2i64:$Op2, (nxv2i64 (splat_vector (i64 -1))))),
+ (NEG_ZPmZ_D $Op2, $Op1, $Op2)>;
+
+ let AddedComplexity = 5 in {
+ def : Pat<(nxv16i8 (AArch64mul_p nxv16i1:$Op1, nxv16i8:$Op2, (nxv16i8 (splat_vector (i32 -1))))),
+ (NEG_ZPmZ_B_UNDEF $Op2, $Op1, $Op2)>;
+ def : Pat<(nxv8i16 (AArch64mul_p nxv8i1:$Op1, nxv8i16:$Op2, (nxv8i16 (splat_vector (i32 -1))))),
+ (NEG_ZPmZ_H_UNDEF $Op2, $Op1, $Op2)>;
+ def : Pat<(nxv4i32 (AArch64mul_p nxv4i1:$Op1, nxv4i32:$Op2, (nxv4i32 (splat_vector (i32 -1))))),
+ (NEG_ZPmZ_S_UNDEF $Op2, $Op1, $Op2)>;
+ def : Pat<(nxv2i64 (AArch64mul_p nxv2i1:$Op1, nxv2i64:$Op2, (nxv2i64 (splat_vector (i64 -1))))),
+ (NEG_ZPmZ_D_UNDEF $Op2, $Op1, $Op2)>;
+ }
+
+ def : Pat<(nxv16i8 (AArch64mul_m1 nxv16i1:$Op1, (nxv16i8 (splat_vector (i32 -1))), nxv16i8:$Op2)),
+ (NEG_ZPmZ_B (DUP_ZI_B -1, 0), $Op1, $Op2)>;
+ def : Pat<(nxv8i16 (AArch64mul_m1 nxv8i1:$Op1, (nxv8i16 (splat_vector (i32 -1))), nxv8i16:$Op2)),
+ (NEG_ZPmZ_H (DUP_ZI_H -1, 0), $Op1, $Op2)>;
+ def : Pat<(nxv4i32 (AArch64mul_m1 nxv4i1:$Op1, (nxv4i32 (splat_vector (i32 -1))), nxv4i32:$Op2)),
+ (NEG_ZPmZ_S (DUP_ZI_S -1, 0), $Op1, $Op2)>;
+ def : Pat<(nxv2i64 (AArch64mul_m1 nxv2i1:$Op1, (nxv2i64 (splat_vector (i64 -1))), nxv2i64:$Op2)),
+ (NEG_ZPmZ_D (DUP_ZI_D -1, 0), $Op1, $Op2)>;
} // End HasSVE_or_SME
// COMPACT - word and doubleword
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 8c4b4f6..50a8754 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -5632,75 +5632,94 @@ InstructionCost AArch64TTIImpl::getPartialReductionCost(
TTI::PartialReductionExtendKind OpBExtend, std::optional<unsigned> BinOp,
TTI::TargetCostKind CostKind) const {
InstructionCost Invalid = InstructionCost::getInvalid();
- InstructionCost Cost(TTI::TCC_Basic);
if (CostKind != TTI::TCK_RecipThroughput)
return Invalid;
- // Sub opcodes currently only occur in chained cases.
- // Independent partial reduction subtractions are still costed as an add
+ if (VF.isFixed() && !ST->isSVEorStreamingSVEAvailable() &&
+ (!ST->isNeonAvailable() || !ST->hasDotProd()))
+ return Invalid;
+
if ((Opcode != Instruction::Add && Opcode != Instruction::Sub) ||
OpAExtend == TTI::PR_None)
return Invalid;
+ assert((BinOp || (OpBExtend == TTI::PR_None && !InputTypeB)) &&
+ (!BinOp || (OpBExtend != TTI::PR_None && InputTypeB)) &&
+ "Unexpected values for OpBExtend or InputTypeB");
+
// We only support multiply binary operations for now, and for muls we
// require the types being extended to be the same.
- // NOTE: For muls AArch64 supports lowering mixed extensions to a usdot but
- // only if the i8mm or sve/streaming features are available.
- if (BinOp && (*BinOp != Instruction::Mul || InputTypeA != InputTypeB ||
- OpBExtend == TTI::PR_None ||
- (OpAExtend != OpBExtend && !ST->hasMatMulInt8() &&
- !ST->isSVEorStreamingSVEAvailable())))
+ if (BinOp && (*BinOp != Instruction::Mul || InputTypeA != InputTypeB))
return Invalid;
- assert((BinOp || (OpBExtend == TTI::PR_None && !InputTypeB)) &&
- "Unexpected values for OpBExtend or InputTypeB");
- EVT InputEVT = EVT::getEVT(InputTypeA);
- EVT AccumEVT = EVT::getEVT(AccumType);
+ bool IsUSDot = OpBExtend != TTI::PR_None && OpAExtend != OpBExtend;
+ if (IsUSDot && !ST->hasMatMulInt8())
+ return Invalid;
+
+ unsigned Ratio =
+ AccumType->getScalarSizeInBits() / InputTypeA->getScalarSizeInBits();
+ if (VF.getKnownMinValue() <= Ratio)
+ return Invalid;
+
+ VectorType *InputVectorType = VectorType::get(InputTypeA, VF);
+ VectorType *AccumVectorType =
+ VectorType::get(AccumType, VF.divideCoefficientBy(Ratio));
+ // We don't yet support all kinds of legalization.
+ auto TA = TLI->getTypeAction(AccumVectorType->getContext(),
+ EVT::getEVT(AccumVectorType));
+ switch (TA) {
+ default:
+ return Invalid;
+ case TargetLowering::TypeLegal:
+ case TargetLowering::TypePromoteInteger:
+ case TargetLowering::TypeSplitVector:
+ break;
+ }
+
+ // Check what kind of type-legalisation happens.
+ std::pair<InstructionCost, MVT> AccumLT =
+ getTypeLegalizationCost(AccumVectorType);
+ std::pair<InstructionCost, MVT> InputLT =
+ getTypeLegalizationCost(InputVectorType);
- unsigned VFMinValue = VF.getKnownMinValue();
+ InstructionCost Cost = InputLT.first * TTI::TCC_Basic;
- if (VF.isScalable()) {
- if (!ST->isSVEorStreamingSVEAvailable())
- return Invalid;
+ // Prefer using full types by costing half-full input types as more expensive.
+ if (TypeSize::isKnownLT(InputVectorType->getPrimitiveSizeInBits(),
+ TypeSize::getScalable(128)))
+ // FIXME: This can be removed after the cost of the extends are folded into
+ // the dot-product expression in VPlan, after landing:
+ // https://github.com/llvm/llvm-project/pull/147302
+ Cost *= 2;
- // Don't accept a partial reduction if the scaled accumulator is vscale x 1,
- // since we can't lower that type.
- unsigned Scale =
- AccumEVT.getScalarSizeInBits() / InputEVT.getScalarSizeInBits();
- if (VFMinValue == Scale)
- return Invalid;
+ if (ST->isSVEorStreamingSVEAvailable() && !IsUSDot) {
+ // i16 -> i64 is natively supported for udot/sdot
+ if (AccumLT.second.getScalarType() == MVT::i64 &&
+ InputLT.second.getScalarType() == MVT::i16)
+ return Cost;
+ // i8 -> i64 is supported with an extra level of extends
+ if (AccumLT.second.getScalarType() == MVT::i64 &&
+ InputLT.second.getScalarType() == MVT::i8)
+ // FIXME: This cost should probably be a little higher, e.g. Cost + 2
+ // because it requires two extra extends on the inputs. But if we'd change
+ // that now, a regular reduction would be cheaper because the costs of
+ // the extends in the IR are still counted. This can be fixed
+ // after https://github.com/llvm/llvm-project/pull/147302 has landed.
+ return Cost;
}
- if (VF.isFixed() &&
- (!ST->isNeonAvailable() || !ST->hasDotProd() || AccumEVT == MVT::i64))
- return Invalid;
- if (InputEVT == MVT::i8) {
- switch (VFMinValue) {
- default:
- return Invalid;
- case 8:
- if (AccumEVT == MVT::i32)
- Cost *= 2;
- else if (AccumEVT != MVT::i64)
- return Invalid;
- break;
- case 16:
- if (AccumEVT == MVT::i64)
- Cost *= 2;
- else if (AccumEVT != MVT::i32)
- return Invalid;
- break;
- }
- } else if (InputEVT == MVT::i16) {
- // FIXME: Allow i32 accumulator but increase cost, as we would extend
- // it to i64.
- if (VFMinValue != 8 || AccumEVT != MVT::i64)
- return Invalid;
- } else
- return Invalid;
+ // i8 -> i32 is natively supported for udot/sdot/usdot, both for NEON and SVE.
+ if (ST->isSVEorStreamingSVEAvailable() ||
+ (AccumLT.second.isFixedLengthVector() && ST->isNeonAvailable() &&
+ ST->hasDotProd())) {
+ if (AccumLT.second.getScalarType() == MVT::i32 &&
+ InputLT.second.getScalarType() == MVT::i8)
+ return Cost;
+ }
- return Cost;
+ // Add additional cost for the extends that would need to be inserted.
+ return Cost + 4;
}
InstructionCost
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 7003a40..9446144 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -2126,6 +2126,8 @@ def FeatureISAVersion12_50 : FeatureSet<
FeatureLdsBarrierArriveAtomic,
FeatureSetPrioIncWgInst,
Feature45BitNumRecordsBufferResource,
+ FeatureSupportsXNACK,
+ FeatureXNACK,
]>;
def FeatureISAVersion12_51 : FeatureSet<
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index 2ba3156..9dd64e0 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -131,10 +131,8 @@ static bool isDSAddress(const Constant *C) {
return AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS;
}
-/// Returns true if the function requires the implicit argument be passed
-/// regardless of the function contents.
-static bool funcRequiresHostcallPtr(const Function &F) {
- // Sanitizers require the hostcall buffer passed in the implicit arguments.
+/// Returns true if sanitizer attributes are present on a function.
+static bool hasSanitizerAttributes(const Function &F) {
return F.hasFnAttribute(Attribute::SanitizeAddress) ||
F.hasFnAttribute(Attribute::SanitizeThread) ||
F.hasFnAttribute(Attribute::SanitizeMemory) ||
@@ -469,15 +467,21 @@ struct AAAMDAttributesFunction : public AAAMDAttributes {
// If the function requires the implicit arg pointer due to sanitizers,
// assume it's needed even if explicitly marked as not requiring it.
- const bool NeedsHostcall = funcRequiresHostcallPtr(*F);
- if (NeedsHostcall) {
+ // Flat scratch initialization is needed because `asan_malloc_impl`
+ // calls introduced later in pipeline will have flat scratch accesses.
+ // FIXME: FLAT_SCRATCH_INIT will not be required here if device-libs
+ // implementation for `asan_malloc_impl` is updated.
+ const bool HasSanitizerAttrs = hasSanitizerAttributes(*F);
+ if (HasSanitizerAttrs) {
removeAssumedBits(IMPLICIT_ARG_PTR);
removeAssumedBits(HOSTCALL_PTR);
+ removeAssumedBits(FLAT_SCRATCH_INIT);
}
for (auto Attr : ImplicitAttrs) {
- if (NeedsHostcall &&
- (Attr.first == IMPLICIT_ARG_PTR || Attr.first == HOSTCALL_PTR))
+ if (HasSanitizerAttrs &&
+ (Attr.first == IMPLICIT_ARG_PTR || Attr.first == HOSTCALL_PTR ||
+ Attr.first == FLAT_SCRATCH_INIT))
continue;
if (F->hasFnAttribute(Attr.second))
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
index 6efa78e..a4ef524 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
@@ -608,8 +608,6 @@ public:
? LDSToKernelsThatNeedToAccessItIndirectly[HybridModuleRoot]
: EmptySet;
- const size_t HybridModuleRootKernelsSize = HybridModuleRootKernels.size();
-
for (auto &K : LDSToKernelsThatNeedToAccessItIndirectly) {
// Each iteration of this loop assigns exactly one global variable to
// exactly one of the implementation strategies.
@@ -649,8 +647,7 @@ public:
ModuleScopeVariables.insert(GV);
} else if (K.second.size() == 1) {
KernelAccessVariables.insert(GV);
- } else if (K.second.size() == HybridModuleRootKernelsSize &&
- set_is_subset(K.second, HybridModuleRootKernels)) {
+ } else if (K.second == HybridModuleRootKernels) {
ModuleScopeVariables.insert(GV);
} else {
TableLookupVariables.insert(GV);
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index 2d5ae29..2120bf8 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -2303,7 +2303,10 @@ Expected<bool> AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC1(
KdStream << Indent << ".amdhsa_reserve_vcc " << 0 << '\n';
if (!hasArchitectedFlatScratch())
KdStream << Indent << ".amdhsa_reserve_flat_scratch " << 0 << '\n';
- KdStream << Indent << ".amdhsa_reserve_xnack_mask " << 0 << '\n';
+ bool ReservedXnackMask = STI.hasFeature(AMDGPU::FeatureXNACK);
+ assert(!ReservedXnackMask || STI.hasFeature(AMDGPU::FeatureSupportsXNACK));
+ KdStream << Indent << ".amdhsa_reserve_xnack_mask " << ReservedXnackMask
+ << '\n';
KdStream << Indent << ".amdhsa_next_free_sgpr " << NextFreeSGPR << "\n";
CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_PRIORITY);
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index fed3778..82789bc 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -722,7 +722,8 @@ bool SIFoldOperandsImpl::updateOperand(FoldCandidate &Fold) const {
return false;
}
- if (!MRI->constrainRegClass(New->getReg(), ConstrainRC)) {
+ if (New->getReg().isVirtual() &&
+ !MRI->constrainRegClass(New->getReg(), ConstrainRC)) {
LLVM_DEBUG(dbgs() << "Cannot constrain " << printReg(New->getReg(), TRI)
<< TRI->getRegClassName(ConstrainRC) << '\n');
return false;
diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
index e4b3528..0189e7b 100644
--- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -306,7 +306,8 @@ class PrologEpilogSGPRSpillBuilder {
buildEpilogRestore(ST, TRI, *FuncInfo, LiveUnits, MF, MBB, MI, DL,
TmpVGPR, FI, FrameReg, DwordOff);
- MRI.constrainRegClass(SubReg, &AMDGPU::SReg_32_XM0RegClass);
+ assert(SubReg.isPhysical());
+
BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg)
.addReg(TmpVGPR, RegState::Kill);
DwordOff += 4;
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index f7265c5..e233457 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -18860,31 +18860,6 @@ SITargetLowering::getTargetMMOFlags(const Instruction &I) const {
return Flags;
}
-bool SITargetLowering::checkForPhysRegDependency(
- SDNode *Def, SDNode *User, unsigned Op, const TargetRegisterInfo *TRI,
- const TargetInstrInfo *TII, MCRegister &PhysReg, int &Cost) const {
- if (User->getOpcode() != ISD::CopyToReg)
- return false;
- if (!Def->isMachineOpcode())
- return false;
- MachineSDNode *MDef = dyn_cast<MachineSDNode>(Def);
- if (!MDef)
- return false;
-
- unsigned ResNo = User->getOperand(Op).getResNo();
- if (User->getOperand(Op)->getValueType(ResNo) != MVT::i1)
- return false;
- const MCInstrDesc &II = TII->get(MDef->getMachineOpcode());
- if (II.isCompare() && II.hasImplicitDefOfPhysReg(AMDGPU::SCC)) {
- PhysReg = AMDGPU::SCC;
- const TargetRegisterClass *RC =
- TRI->getMinimalPhysRegClass(PhysReg, Def->getSimpleValueType(ResNo));
- Cost = RC->getCopyCost();
- return true;
- }
- return false;
-}
-
void SITargetLowering::emitExpandAtomicAddrSpacePredicate(
Instruction *AI) const {
// Given: atomicrmw fadd ptr %addr, float %val ordering
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index a474dab..74e58f4 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -561,11 +561,6 @@ public:
bool denormalsEnabledForType(const SelectionDAG &DAG, EVT VT) const;
bool denormalsEnabledForType(LLT Ty, const MachineFunction &MF) const;
- bool checkForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op,
- const TargetRegisterInfo *TRI,
- const TargetInstrInfo *TII,
- MCRegister &PhysReg, int &Cost) const override;
-
bool isKnownNeverNaNForTargetNode(SDValue Op, const APInt &DemandedElts,
const SelectionDAG &DAG, bool SNaN = false,
unsigned Depth = 0) const override;
diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index 76bfce8..5e27b37 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -1013,6 +1013,15 @@ void WaitcntBrackets::updateByEvent(WaitEventType E, MachineInstr &Inst) {
}
}
} else if (T == X_CNT) {
+ WaitEventType OtherEvent = E == SMEM_GROUP ? VMEM_GROUP : SMEM_GROUP;
+ if (PendingEvents & (1 << OtherEvent)) {
+ // Hardware inserts an implicit xcnt between interleaved
+ // SMEM and VMEM operations. So there will never be
+ // outstanding address translations for both SMEM and
+ // VMEM at the same time.
+ setScoreLB(T, CurrScore - 1);
+ PendingEvents &= ~(1 << OtherEvent);
+ }
for (const MachineOperand &Op : Inst.all_uses())
setScoreByOperand(&Inst, Op, T, CurrScore);
} else /* LGKM_CNT || EXP_CNT || VS_CNT || NUM_INST_CNTS */ {
@@ -2220,6 +2229,8 @@ void SIInsertWaitcnts::updateEventWaitcntAfter(MachineInstr &Inst,
// Now look at the instruction opcode. If it is a memory access
// instruction, update the upper-bound of the appropriate counter's
// bracket and the destination operand scores.
+ // For architectures with X_CNT, mark the source address operands
+ // with the appropriate counter values.
// TODO: Use the (TSFlags & SIInstrFlags::DS_CNT) property everywhere.
bool IsVMEMAccess = false;
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 56435a5..cda8069 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -2112,8 +2112,6 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
MI.setDesc(get(AMDGPU::V_READLANE_B32));
- MI.getMF()->getRegInfo().constrainRegClass(MI.getOperand(0).getReg(),
- &AMDGPU::SReg_32_XM0RegClass);
break;
case AMDGPU::AV_MOV_B32_IMM_PSEUDO: {
Register Dst = MI.getOperand(0).getReg();
@@ -8117,21 +8115,14 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
// hope for the best.
if (Inst.isCopy() && DstReg.isPhysical() &&
RI.isVGPR(MRI, Inst.getOperand(1).getReg())) {
- // TODO: Only works for 32 bit registers.
- if (MRI.constrainRegClass(DstReg, &AMDGPU::SReg_32_XM0RegClass)) {
- BuildMI(*Inst.getParent(), &Inst, Inst.getDebugLoc(),
- get(AMDGPU::V_READFIRSTLANE_B32), DstReg)
- .add(Inst.getOperand(1));
- } else {
- Register NewDst =
- MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
- BuildMI(*Inst.getParent(), &Inst, Inst.getDebugLoc(),
- get(AMDGPU::V_READFIRSTLANE_B32), NewDst)
- .add(Inst.getOperand(1));
- BuildMI(*Inst.getParent(), &Inst, Inst.getDebugLoc(), get(AMDGPU::COPY),
- DstReg)
- .addReg(NewDst);
- }
+ Register NewDst = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+ BuildMI(*Inst.getParent(), &Inst, Inst.getDebugLoc(),
+ get(AMDGPU::V_READFIRSTLANE_B32), NewDst)
+ .add(Inst.getOperand(1));
+ BuildMI(*Inst.getParent(), &Inst, Inst.getDebugLoc(), get(AMDGPU::COPY),
+ DstReg)
+ .addReg(NewDst);
+
Inst.eraseFromParent();
return;
}
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 205237f..3c2dd42 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -2222,8 +2222,6 @@ bool SIRegisterInfo::spillEmergencySGPR(MachineBasicBlock::iterator MI,
// Don't need to write VGPR out.
}
- MachineRegisterInfo &MRI = MI->getMF()->getRegInfo();
-
// Restore clobbered registers in the specified restore block.
MI = RestoreMBB.end();
SB.setMI(&RestoreMBB, MI);
@@ -2238,7 +2236,8 @@ bool SIRegisterInfo::spillEmergencySGPR(MachineBasicBlock::iterator MI,
SB.NumSubRegs == 1
? SB.SuperReg
: Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
- MRI.constrainRegClass(SubReg, &AMDGPU::SReg_32_XM0RegClass);
+
+ assert(SubReg.isPhysical());
bool LastSubReg = (i + 1 == e);
auto MIB = BuildMI(*SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_READLANE_B32),
SubReg)
@@ -3059,8 +3058,7 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
if (IsSALU && LiveSCC) {
Register NewDest;
if (IsCopy) {
- MF->getRegInfo().constrainRegClass(ResultReg,
- &AMDGPU::SReg_32_XM0RegClass);
+ assert(ResultReg.isPhysical());
NewDest = ResultReg;
} else {
NewDest = RS->scavengeRegisterBackwards(AMDGPU::SReg_32_XM0RegClass,
@@ -3190,8 +3188,6 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
Register NewDest;
if (IsCopy) {
- MF->getRegInfo().constrainRegClass(ResultReg,
- &AMDGPU::SReg_32_XM0RegClass);
NewDest = ResultReg;
} else {
NewDest = RS->scavengeRegisterBackwards(
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index 8f1dd62..5630580 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -1163,6 +1163,22 @@ def VS_64_Lo256 : SIRegisterClass<"AMDGPU", VReg_64.RegTypes, 32,
let HasSGPR = 1;
let Size = 64;
}
+
+def VS_128 : SIRegisterClass<"AMDGPU", VReg_128.RegTypes, 32,
+ (add VReg_128, SReg_128)> {
+ let isAllocatable = 0;
+ let HasVGPR = 1;
+ let HasSGPR = 1;
+ let Size = 128;
+}
+
+def VS_128_Align2 : SIRegisterClass<"AMDGPU", VReg_128.RegTypes, 32,
+ (add VReg_128_Align2, SReg_128)> {
+ let isAllocatable = 0;
+ let HasVGPR = 1;
+ let HasSGPR = 1;
+ let Size = 128;
+}
} // End GeneratePressureSet = 0
// Define a register tuple class, along with one requiring an even
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
index fa130a1..26ff54c 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -775,6 +775,16 @@ class VectorType;
bool shouldFoldConstantShiftPairToMask(const SDNode *N,
CombineLevel Level) const override;
+ /// Return true if it is profitable to fold a pair of shifts into a mask.
+ bool shouldFoldMaskToVariableShiftPair(SDValue Y) const override {
+ EVT VT = Y.getValueType();
+
+ if (VT.isVector())
+ return false;
+
+ return VT.getScalarSizeInBits() <= 32;
+ }
+
bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT,
unsigned SelectOpcode, SDValue X,
SDValue Y) const override;
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
index d0dfa47..a94e131 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
@@ -359,6 +359,8 @@ HexagonTargetLowering::initializeHVXLowering() {
setCondCodeAction(ISD::SETULE, MVT::v64f16, Expand);
setCondCodeAction(ISD::SETUGE, MVT::v64f16, Expand);
setCondCodeAction(ISD::SETULT, MVT::v64f16, Expand);
+ setCondCodeAction(ISD::SETUO, MVT::v64f16, Expand);
+ setCondCodeAction(ISD::SETO, MVT::v64f16, Expand);
setCondCodeAction(ISD::SETNE, MVT::v32f32, Expand);
setCondCodeAction(ISD::SETLE, MVT::v32f32, Expand);
@@ -372,6 +374,8 @@ HexagonTargetLowering::initializeHVXLowering() {
setCondCodeAction(ISD::SETULE, MVT::v32f32, Expand);
setCondCodeAction(ISD::SETUGE, MVT::v32f32, Expand);
setCondCodeAction(ISD::SETULT, MVT::v32f32, Expand);
+ setCondCodeAction(ISD::SETUO, MVT::v32f32, Expand);
+ setCondCodeAction(ISD::SETO, MVT::v32f32, Expand);
// Boolean vectors.
@@ -449,6 +453,7 @@ HexagonTargetLowering::initializeHVXLowering() {
// Include cases which are not hander earlier
setOperationAction(ISD::UINT_TO_FP, MVT::v32i1, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v64i1, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v32i1, Custom);
setTargetDAGCombine({ISD::CONCAT_VECTORS, ISD::TRUNCATE, ISD::VSELECT});
}
@@ -2337,7 +2342,7 @@ HexagonTargetLowering::LowerHvxFpToInt(SDValue Op, SelectionDAG &DAG) const {
return ExpandHvxFpToInt(Op, DAG);
}
-// For vector type v32i1 uint_to_fp to v32f32:
+// For vector type v32i1 uint_to_fp/sint_to_fp to v32f32:
// R1 = #1, R2 holds the v32i1 param
// V1 = vsplat(R1)
// V2 = vsplat(R2)
@@ -2464,7 +2469,7 @@ HexagonTargetLowering::LowerHvxIntToFp(SDValue Op, SelectionDAG &DAG) const {
MVT IntTy = ty(Op.getOperand(0)).getVectorElementType();
MVT FpTy = ResTy.getVectorElementType();
- if (Op.getOpcode() == ISD::UINT_TO_FP) {
+ if (Op.getOpcode() == ISD::UINT_TO_FP || Op.getOpcode() == ISD::SINT_TO_FP) {
if (ResTy == MVT::v32f32 && ty(Op.getOperand(0)) == MVT::v32i1)
return LowerHvxPred32ToFp(Op, DAG);
if (ResTy == MVT::v64f16 && ty(Op.getOperand(0)) == MVT::v64i1)
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 4cfbfca..7ddf996 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -2860,8 +2860,7 @@ static SDValue fillSubVectorFromBuildVector(BuildVectorSDNode *Node,
EVT ResTy, unsigned first) {
unsigned NumElts = ResTy.getVectorNumElements();
- assert(first >= 0 &&
- first + NumElts <= Node->getSimpleValueType(0).getVectorNumElements());
+ assert(first + NumElts <= Node->getSimpleValueType(0).getVectorNumElements());
SmallVector<SDValue, 16> Ops(Node->op_begin() + first,
Node->op_begin() + first + NumElts);
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
index 395d2c4..662d3f6 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
@@ -629,7 +629,7 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST)
getActionDefinitionsBuilder({G_FCOS, G_FSIN, G_FTAN, G_FPOW, G_FLOG, G_FLOG2,
G_FLOG10, G_FEXP, G_FEXP2, G_FEXP10, G_FACOS,
G_FASIN, G_FATAN, G_FATAN2, G_FCOSH, G_FSINH,
- G_FTANH})
+ G_FTANH, G_FMODF})
.libcallFor({s32, s64})
.libcallFor(ST.is64Bit(), {s128});
getActionDefinitionsBuilder({G_FPOWI, G_FLDEXP})
diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
index 7d4535a..b37b740 100644
--- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
@@ -1560,7 +1560,7 @@ static MCRegister getRVVBaseRegister(const RISCVRegisterInfo &TRI,
MCRegister BaseReg = TRI.getSubReg(Reg, RISCV::sub_vrm1_0);
// If it's not a grouped vector register, it doesn't have subregister, so
// the base register is just itself.
- if (BaseReg == RISCV::NoRegister)
+ if (!BaseReg.isValid())
BaseReg = Reg;
return BaseReg;
}
diff --git a/llvm/lib/Target/RISCV/RISCVGISel.td b/llvm/lib/Target/RISCV/RISCVGISel.td
index af1ceb6..cf6f83a 100644
--- a/llvm/lib/Target/RISCV/RISCVGISel.td
+++ b/llvm/lib/Target/RISCV/RISCVGISel.td
@@ -110,16 +110,16 @@ def : StPat<truncstorei8, SB, GPR, i16>;
let Predicates = [HasAtomicLdSt] in {
// Prefer unsigned due to no c.lb in Zcb.
- def : LdPat<atomic_load_aext_8, LBU, i16>;
- def : LdPat<atomic_load_nonext_16, LH, i16>;
+ def : LdPat<relaxed_load<atomic_load_aext_8>, LBU, i16>;
+ def : LdPat<relaxed_load<atomic_load_nonext_16>, LH, i16>;
- def : StPat<atomic_store_8, SB, GPR, i16>;
- def : StPat<atomic_store_16, SH, GPR, i16>;
+ def : StPat<relaxed_store<atomic_store_8>, SB, GPR, i16>;
+ def : StPat<relaxed_store<atomic_store_16>, SH, GPR, i16>;
}
let Predicates = [HasAtomicLdSt, IsRV64] in {
// Load pattern is in RISCVInstrInfoA.td and shared with RV32.
- def : StPat<atomic_store_32, SW, GPR, i32>;
+ def : StPat<relaxed_store<atomic_store_32>, SW, GPR, i32>;
}
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 50649cf..dcce2d2 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -533,7 +533,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction({ISD::FREM, ISD::FPOW, ISD::FPOWI,
ISD::FCOS, ISD::FSIN, ISD::FSINCOS, ISD::FEXP,
ISD::FEXP2, ISD::FEXP10, ISD::FLOG, ISD::FLOG2,
- ISD::FLOG10, ISD::FLDEXP, ISD::FFREXP},
+ ISD::FLOG10, ISD::FLDEXP, ISD::FFREXP, ISD::FMODF},
MVT::f16, Promote);
// FIXME: Need to promote f16 STRICT_* to f32 libcalls, but we don't have
diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
index 6a6ead2..cf8d120 100644
--- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
@@ -128,7 +128,7 @@ static bool hasUndefinedPassthru(const MachineInstr &MI) {
// All undefined passthrus should be $noreg: see
// RISCVDAGToDAGISel::doPeepholeNoRegPassThru
const MachineOperand &UseMO = MI.getOperand(UseOpIdx);
- return UseMO.getReg() == RISCV::NoRegister || UseMO.isUndef();
+ return !UseMO.getReg().isValid() || UseMO.isUndef();
}
/// Return true if \p MI is a copy that will be lowered to one or more vmvNr.vs.
@@ -1454,7 +1454,7 @@ void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
Register Reg = VLOp.getReg();
// Erase the AVL operand from the instruction.
- VLOp.setReg(RISCV::NoRegister);
+ VLOp.setReg(Register());
VLOp.setIsKill(false);
if (LIS) {
LiveInterval &LI = LIS->getInterval(Reg);
@@ -1663,7 +1663,7 @@ void RISCVInsertVSETVLI::coalesceVSETVLIs(MachineBasicBlock &MBB) const {
if (!MO.isReg() || !MO.getReg().isVirtual())
return;
Register OldVLReg = MO.getReg();
- MO.setReg(RISCV::NoRegister);
+ MO.setReg(Register());
if (LIS)
LIS->shrinkToUses(&LIS->getInterval(OldVLReg));
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 1e6b04f8..7db4832 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -1364,7 +1364,7 @@ void RISCVInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
RS->scavengeRegisterBackwards(RISCV::GPRRegClass, MI.getIterator(),
/*RestoreAfter=*/false, /*SpAdj=*/0,
/*AllowSpill=*/false);
- if (TmpGPR != RISCV::NoRegister)
+ if (TmpGPR.isValid())
RS->setRegUsed(TmpGPR);
else {
// The case when there is no scavenged register needs special handling.
@@ -3021,7 +3021,7 @@ bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI,
ErrInfo = "Invalid operand type for VL operand";
return false;
}
- if (Op.isReg() && Op.getReg() != RISCV::NoRegister) {
+ if (Op.isReg() && Op.getReg().isValid()) {
const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
auto *RC = MRI.getRegClass(Op.getReg());
if (!RISCV::GPRRegClass.hasSubClassEq(RC)) {
diff --git a/llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp b/llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp
index f8d33ae..54569b1 100644
--- a/llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp
+++ b/llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp
@@ -259,7 +259,7 @@ static RegImmPair getRegImmPairPreventingCompression(const MachineInstr &MI) {
if (isCompressibleLoad(MI) || isCompressibleStore(MI)) {
const MachineOperand &MOImm = MI.getOperand(2);
if (!MOImm.isImm())
- return RegImmPair(RISCV::NoRegister, 0);
+ return RegImmPair(Register(), 0);
int64_t Offset = MOImm.getImm();
int64_t NewBaseAdjust = getBaseAdjustForCompression(Offset, Opcode);
@@ -292,7 +292,7 @@ static RegImmPair getRegImmPairPreventingCompression(const MachineInstr &MI) {
}
}
}
- return RegImmPair(RISCV::NoRegister, 0);
+ return RegImmPair(Register(), 0);
}
// Check all uses after FirstMI of the given register, keeping a vector of
diff --git a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
index ffba284..fdf9a4f 100644
--- a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
@@ -382,7 +382,7 @@ bool RISCVVectorPeephole::convertAllOnesVMergeToVMv(MachineInstr &MI) const {
// vmv.v.v doesn't have a mask operand, so we may be able to inflate the
// register class for the destination and passthru operands e.g. VRNoV0 -> VR
MRI->recomputeRegClass(MI.getOperand(0).getReg());
- if (MI.getOperand(1).getReg() != RISCV::NoRegister)
+ if (MI.getOperand(1).getReg().isValid())
MRI->recomputeRegClass(MI.getOperand(1).getReg());
return true;
}
@@ -448,7 +448,7 @@ bool RISCVVectorPeephole::convertSameMaskVMergeToVMv(MachineInstr &MI) {
Register FalseReg = MI.getOperand(2).getReg();
if (TruePassthruReg != FalseReg) {
// If True's passthru is undef see if we can change it to False
- if (TruePassthruReg != RISCV::NoRegister ||
+ if (TruePassthruReg.isValid() ||
!MRI->hasOneUse(MI.getOperand(3).getReg()) ||
!ensureDominates(MI.getOperand(2), *True))
return false;
@@ -467,7 +467,7 @@ bool RISCVVectorPeephole::convertSameMaskVMergeToVMv(MachineInstr &MI) {
// vmv.v.v doesn't have a mask operand, so we may be able to inflate the
// register class for the destination and passthru operands e.g. VRNoV0 -> VR
MRI->recomputeRegClass(MI.getOperand(0).getReg());
- if (MI.getOperand(1).getReg() != RISCV::NoRegister)
+ if (MI.getOperand(1).getReg().isValid())
MRI->recomputeRegClass(MI.getOperand(1).getReg());
return true;
}
@@ -517,7 +517,7 @@ bool RISCVVectorPeephole::convertToUnmasked(MachineInstr &MI) const {
if (RISCVII::isFirstDefTiedToFirstUse(MaskedMCID)) {
unsigned PassthruOpIdx = MI.getNumExplicitDefs();
if (HasPassthru) {
- if (MI.getOperand(PassthruOpIdx).getReg() != RISCV::NoRegister)
+ if (MI.getOperand(PassthruOpIdx).getReg())
MRI->recomputeRegClass(MI.getOperand(PassthruOpIdx).getReg());
} else
MI.removeOperand(PassthruOpIdx);
@@ -576,7 +576,7 @@ static bool dominates(MachineBasicBlock::const_iterator A,
bool RISCVVectorPeephole::ensureDominates(const MachineOperand &MO,
MachineInstr &Src) const {
assert(MO.getParent()->getParent() == Src.getParent());
- if (!MO.isReg() || MO.getReg() == RISCV::NoRegister)
+ if (!MO.isReg() || !MO.getReg().isValid())
return true;
MachineInstr *Def = MRI->getVRegDef(MO.getReg());
@@ -593,7 +593,7 @@ bool RISCVVectorPeephole::ensureDominates(const MachineOperand &MO,
bool RISCVVectorPeephole::foldUndefPassthruVMV_V_V(MachineInstr &MI) {
if (RISCV::getRVVMCOpcode(MI.getOpcode()) != RISCV::VMV_V_V)
return false;
- if (MI.getOperand(1).getReg() != RISCV::NoRegister)
+ if (MI.getOperand(1).getReg().isValid())
return false;
// If the input was a pseudo with a policy operand, we can give it a tail
@@ -654,7 +654,7 @@ bool RISCVVectorPeephole::foldVMV_V_V(MachineInstr &MI) {
// Src needs to have the same passthru as VMV_V_V
MachineOperand &SrcPassthru = Src->getOperand(Src->getNumExplicitDefs());
- if (SrcPassthru.getReg() != RISCV::NoRegister &&
+ if (SrcPassthru.getReg().isValid() &&
SrcPassthru.getReg() != Passthru.getReg())
return false;
@@ -672,7 +672,7 @@ bool RISCVVectorPeephole::foldVMV_V_V(MachineInstr &MI) {
if (SrcPassthru.getReg() != Passthru.getReg()) {
SrcPassthru.setReg(Passthru.getReg());
// If Src is masked then its passthru needs to be in VRNoV0.
- if (Passthru.getReg() != RISCV::NoRegister)
+ if (Passthru.getReg().isValid())
MRI->constrainRegClass(
Passthru.getReg(),
TII->getRegClass(Src->getDesc(), SrcPassthru.getOperandNo(), TRI));
diff --git a/llvm/lib/Target/Sparc/DelaySlotFiller.cpp b/llvm/lib/Target/Sparc/DelaySlotFiller.cpp
index 6c19049..024030d 100644
--- a/llvm/lib/Target/Sparc/DelaySlotFiller.cpp
+++ b/llvm/lib/Target/Sparc/DelaySlotFiller.cpp
@@ -206,8 +206,8 @@ Filler::findDelayInstr(MachineBasicBlock &MBB,
if (!done)
--I;
- // skip debug instruction
- if (I->isDebugInstr())
+ // Skip meta instructions.
+ if (I->isMetaInstruction())
continue;
if (I->hasUnmodeledSideEffects() || I->isInlineAsm() || I->isPosition() ||
diff --git a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
index 143c4c4..e7709ef 100644
--- a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
+++ b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
@@ -149,6 +149,10 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI,
});
}
+ getActionDefinitionsBuilder({G_UMIN, G_UMAX, G_SMIN, G_SMAX})
+ .widenScalarToNextPow2(0, /*Min=*/32)
+ .lower();
+
// integer addition/subtraction
getActionDefinitionsBuilder({G_ADD, G_SUB})
.legalFor({s8, s16, s32})
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index cda5568..931a10b 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -13783,10 +13783,12 @@ static SDValue lowerV4I32Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
// so prevents folding a load into this instruction or making a copy.
const int UnpackLoMask[] = {0, 0, 1, 1};
const int UnpackHiMask[] = {2, 2, 3, 3};
- if (isShuffleEquivalent(Mask, {0, 0, 1, 1}, V1, V2))
- Mask = UnpackLoMask;
- else if (isShuffleEquivalent(Mask, {2, 2, 3, 3}, V1, V2))
- Mask = UnpackHiMask;
+ if (!isSingleElementRepeatedMask(Mask)) {
+ if (isShuffleEquivalent(Mask, {0, 0, 1, 1}, V1, V2))
+ Mask = UnpackLoMask;
+ else if (isShuffleEquivalent(Mask, {2, 2, 3, 3}, V1, V2))
+ Mask = UnpackHiMask;
+ }
return DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32, V1,
getV4X86ShuffleImm8ForMask(Mask, DL, DAG));
@@ -45457,7 +45459,8 @@ static SDValue combineBitcastvxi1(SelectionDAG &DAG, EVT VT, SDValue Src,
const SDLoc &DL,
const X86Subtarget &Subtarget) {
EVT SrcVT = Src.getValueType();
- if (!SrcVT.isSimple() || SrcVT.getScalarType() != MVT::i1)
+ if (Subtarget.useSoftFloat() || !SrcVT.isSimple() ||
+ SrcVT.getScalarType() != MVT::i1)
return SDValue();
// Recognize the IR pattern for the movmsk intrinsic under SSE1 before type
@@ -58134,6 +58137,14 @@ static SDValue combineAdd(SDNode *N, SelectionDAG &DAG,
if (SDValue V = combineToHorizontalAddSub(N, DAG, Subtarget))
return V;
+ // Prefer VSHLI to reduce uses, X86FixupInstTunings may revert this depending
+ // on the scheduler model. Limit multiple users to AVX+ targets to prevent
+ // introducing extra register moves.
+ if (Op0 == Op1 && supportedVectorShiftWithImm(VT, Subtarget, ISD::SHL))
+ if (Subtarget.hasAVX() || N->isOnlyUserOf(Op0.getNode()))
+ return getTargetVShiftByConstNode(X86ISD::VSHLI, DL, VT.getSimpleVT(),
+ Op0, 1, DAG);
+
// Canonicalize hidden LEA pattern:
// Fold (add (sub (shl x, c), y), z) -> (sub (add (shl x, c), z), y)
// iff c < 4
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index cf6d0ec..e1e24a9 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -318,18 +318,18 @@ Value *InstCombinerImpl::simplifyMaskedLoad(IntrinsicInst &II) {
// * Single constant active lane -> store
// * Narrow width by halfs excluding zero/undef lanes
Instruction *InstCombinerImpl::simplifyMaskedStore(IntrinsicInst &II) {
+ Value *StorePtr = II.getArgOperand(1);
+ Align Alignment = cast<ConstantInt>(II.getArgOperand(2))->getAlignValue();
auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(3));
if (!ConstMask)
return nullptr;
// If the mask is all zeros, this instruction does nothing.
- if (ConstMask->isNullValue())
+ if (maskIsAllZeroOrUndef(ConstMask))
return eraseInstFromFunction(II);
// If the mask is all ones, this is a plain vector store of the 1st argument.
- if (ConstMask->isAllOnesValue()) {
- Value *StorePtr = II.getArgOperand(1);
- Align Alignment = cast<ConstantInt>(II.getArgOperand(2))->getAlignValue();
+ if (maskIsAllOneOrUndef(ConstMask)) {
StoreInst *S =
new StoreInst(II.getArgOperand(0), StorePtr, false, Alignment);
S->copyMetadata(II);
@@ -389,7 +389,7 @@ Instruction *InstCombinerImpl::simplifyMaskedScatter(IntrinsicInst &II) {
return nullptr;
// If the mask is all zeros, a scatter does nothing.
- if (ConstMask->isNullValue())
+ if (maskIsAllZeroOrUndef(ConstMask))
return eraseInstFromFunction(II);
// Vector splat address -> scalar store
diff --git a/llvm/lib/Transforms/Scalar/GVN.cpp b/llvm/lib/Transforms/Scalar/GVN.cpp
index 26e17cc..b9b5b58 100644
--- a/llvm/lib/Transforms/Scalar/GVN.cpp
+++ b/llvm/lib/Transforms/Scalar/GVN.cpp
@@ -2287,6 +2287,35 @@ bool GVNPass::processLoad(LoadInst *L) {
return true;
}
+// Attempt to process masked loads which have loaded from
+// masked stores with the same mask
+bool GVNPass::processMaskedLoad(IntrinsicInst *I) {
+ if (!MD)
+ return false;
+ MemDepResult Dep = MD->getDependency(I);
+ Instruction *DepInst = Dep.getInst();
+ if (!DepInst || !Dep.isLocal() || !Dep.isDef())
+ return false;
+
+ Value *Mask = I->getOperand(2);
+ Value *Passthrough = I->getOperand(3);
+ Value *StoreVal;
+ if (!match(DepInst, m_MaskedStore(m_Value(StoreVal), m_Value(), m_Value(),
+ m_Specific(Mask))) ||
+ StoreVal->getType() != I->getType())
+ return false;
+
+ // Remove the load but generate a select for the passthrough
+ Value *OpToForward = llvm::SelectInst::Create(Mask, StoreVal, Passthrough, "",
+ I->getIterator());
+
+ ICF->removeUsersOf(I);
+ I->replaceAllUsesWith(OpToForward);
+ salvageAndRemoveInstruction(I);
+ ++NumGVNLoad;
+ return true;
+}
+
/// Return a pair the first field showing the value number of \p Exp and the
/// second field showing whether it is a value number newly created.
std::pair<uint32_t, bool>
@@ -2734,6 +2763,10 @@ bool GVNPass::processInstruction(Instruction *I) {
return false;
}
+ if (match(I, m_Intrinsic<Intrinsic::masked_load>()) &&
+ processMaskedLoad(cast<IntrinsicInst>(I)))
+ return true;
+
// For conditional branches, we can perform simple conditional propagation on
// the condition value itself.
if (BranchInst *BI = dyn_cast<BranchInst>(I)) {
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 7750687..cb6bfb2 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -8694,7 +8694,7 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VFRange &Range) {
Plan->addVF(VF);
if (!VPlanTransforms::tryToConvertVPInstructionsToVPRecipes(
- Plan,
+ *Plan,
[this](PHINode *P) {
return Legal->getIntOrFpInductionDescriptor(P);
},
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index f76777b..ca63bf3 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -45,13 +45,13 @@ static cl::opt<bool> EnableWideActiveLaneMask(
cl::desc("Enable use of wide get active lane mask instructions"));
bool VPlanTransforms::tryToConvertVPInstructionsToVPRecipes(
- VPlanPtr &Plan,
+ VPlan &Plan,
function_ref<const InductionDescriptor *(PHINode *)>
GetIntOrFpInductionDescriptor,
const TargetLibraryInfo &TLI) {
ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT(
- Plan->getVectorLoopRegion());
+ Plan.getVectorLoopRegion());
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
// Skip blocks outside region
if (!VPBB->getParent())
@@ -77,11 +77,11 @@ bool VPlanTransforms::tryToConvertVPInstructionsToVPRecipes(
for (VPValue *Op : PhiR->operands())
NewRecipe->addOperand(Op);
} else {
- VPValue *Start = Plan->getOrAddLiveIn(II->getStartValue());
+ VPValue *Start = Plan.getOrAddLiveIn(II->getStartValue());
VPValue *Step =
- vputils::getOrCreateVPValueForSCEVExpr(*Plan, II->getStep());
+ vputils::getOrCreateVPValueForSCEVExpr(Plan, II->getStep());
NewRecipe = new VPWidenIntOrFpInductionRecipe(
- Phi, Start, Step, &Plan->getVF(), *II, Ingredient.getDebugLoc());
+ Phi, Start, Step, &Plan.getVF(), *II, Ingredient.getDebugLoc());
}
} else {
assert(isa<VPInstruction>(&Ingredient) &&
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index 4c65cb7..2f00e51 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -138,7 +138,7 @@ struct VPlanTransforms {
/// widen recipes. Returns false if any VPInstructions could not be converted
/// to a wide recipe if needed.
LLVM_ABI_FOR_TEST static bool tryToConvertVPInstructionsToVPRecipes(
- VPlanPtr &Plan,
+ VPlan &Plan,
function_ref<const InductionDescriptor *(PHINode *)>
GetIntOrFpInductionDescriptor,
const TargetLibraryInfo &TLI);