aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/CodeGen
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/CodeGen')
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp108
-rw-r--r--llvm/lib/CodeGen/CodeGenPrepare.cpp23
-rw-r--r--llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp7
-rw-r--r--llvm/lib/CodeGen/MIRParser/MILexer.cpp1
-rw-r--r--llvm/lib/CodeGen/MIRParser/MILexer.h1
-rw-r--r--llvm/lib/CodeGen/MIRParser/MIParser.cpp5
-rw-r--r--llvm/lib/CodeGen/MIRPrinter.cpp2
-rw-r--r--llvm/lib/CodeGen/MachineFunction.cpp20
-rw-r--r--llvm/lib/CodeGen/MachineInstr.cpp6
-rw-r--r--llvm/lib/CodeGen/MachineScheduler.cpp66
-rw-r--r--llvm/lib/CodeGen/ModuloSchedule.cpp2
-rw-r--r--llvm/lib/CodeGen/RegAllocBase.cpp4
-rw-r--r--llvm/lib/CodeGen/RegisterCoalescer.cpp179
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp173
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp3
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp83
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp2
-rw-r--r--llvm/lib/CodeGen/TailDuplicator.cpp18
-rw-r--r--llvm/lib/CodeGen/TargetInstrInfo.cpp2
-rw-r--r--llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp14
20 files changed, 620 insertions, 99 deletions
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 6166271..1641c3e 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -1654,6 +1654,88 @@ void AsmPrinter::emitStackUsage(const MachineFunction &MF) {
*StackUsageStream << "static\n";
}
+/// Extracts a generalized numeric type identifier of a Function's type from
+/// type metadata. Returns null if metadata cannot be found.
+static ConstantInt *extractNumericCGTypeId(const Function &F) {
+ SmallVector<MDNode *, 2> Types;
+ F.getMetadata(LLVMContext::MD_type, Types);
+ for (const auto &Type : Types) {
+ if (Type->hasGeneralizedMDString()) {
+ MDString *MDGeneralizedTypeId = cast<MDString>(Type->getOperand(1));
+ uint64_t TypeIdVal = llvm::MD5Hash(MDGeneralizedTypeId->getString());
+ IntegerType *Int64Ty = Type::getInt64Ty(F.getContext());
+ return ConstantInt::get(Int64Ty, TypeIdVal);
+ }
+ }
+ return nullptr;
+}
+
+/// Emits .callgraph section.
+void AsmPrinter::emitCallGraphSection(const MachineFunction &MF,
+ FunctionInfo &FuncInfo) {
+ if (!MF.getTarget().Options.EmitCallGraphSection)
+ return;
+
+ // Switch to the call graph section for the function
+ MCSection *FuncCGSection =
+ getObjFileLowering().getCallGraphSection(*getCurrentSection());
+ assert(FuncCGSection && "null callgraph section");
+ OutStreamer->pushSection();
+ OutStreamer->switchSection(FuncCGSection);
+
+ // Emit format version number.
+ OutStreamer->emitInt64(CallGraphSectionFormatVersion::V_0);
+
+ // Emit function's self information, which is composed of:
+ // 1) FunctionEntryPc
+ // 2) FunctionKind: Whether the function is indirect target, and if so,
+ // whether its type id is known.
+ // 3) FunctionTypeId: Emit only when the function is an indirect target
+ // and its type id is known.
+
+ // Emit function entry pc.
+ const MCSymbol *FunctionSymbol = getFunctionBegin();
+ OutStreamer->emitSymbolValue(FunctionSymbol, TM.getProgramPointerSize());
+
+ // If this function has external linkage or has its address taken and
+ // it is not a callback, then anything could call it.
+ const Function &F = MF.getFunction();
+ bool IsIndirectTarget =
+ !F.hasLocalLinkage() || F.hasAddressTaken(nullptr,
+ /*IgnoreCallbackUses=*/true,
+ /*IgnoreAssumeLikeCalls=*/true,
+ /*IgnoreLLVMUsed=*/false);
+
+ // FIXME: FunctionKind takes a few values but emitted as a 64-bit value.
+ // Can be optimized to occupy 2 bits instead.
+ // Emit function kind, and type id if available.
+ if (!IsIndirectTarget) {
+ OutStreamer->emitInt64(
+ static_cast<uint64_t>(FunctionInfo::FunctionKind::NOT_INDIRECT_TARGET));
+ } else {
+ if (const auto *TypeId = extractNumericCGTypeId(F)) {
+ OutStreamer->emitInt64(static_cast<uint64_t>(
+ FunctionInfo::FunctionKind::INDIRECT_TARGET_KNOWN_TID));
+ OutStreamer->emitInt64(TypeId->getZExtValue());
+ } else {
+ OutStreamer->emitInt64(static_cast<uint64_t>(
+ FunctionInfo::FunctionKind::INDIRECT_TARGET_UNKNOWN_TID));
+ }
+ }
+
+ // Emit callsite labels, where each element is a pair of type id and
+ // indirect callsite pc.
+ const auto &CallSiteLabels = FuncInfo.CallSiteLabels;
+ OutStreamer->emitInt64(CallSiteLabels.size());
+ for (const auto &[TypeId, Label] : CallSiteLabels) {
+ OutStreamer->emitInt64(TypeId);
+ OutStreamer->emitSymbolValue(Label, TM.getProgramPointerSize());
+ }
+ FuncInfo.CallSiteLabels.clear();
+
+ OutStreamer->popSection();
+}
+
void AsmPrinter::emitPCSectionsLabel(const MachineFunction &MF,
const MDNode &MD) {
MCSymbol *S = MF.getContext().createTempSymbol("pcsection");
@@ -1784,6 +1866,23 @@ static StringRef getMIMnemonic(const MachineInstr &MI, MCStreamer &Streamer) {
return Name;
}
+void AsmPrinter::emitIndirectCalleeLabels(
+ FunctionInfo &FuncInfo,
+ const MachineFunction::CallSiteInfoMap &CallSitesInfoMap,
+ const MachineInstr &MI) {
+ // Only indirect calls have type identifiers set.
+ const auto &CallSiteInfo = CallSitesInfoMap.find(&MI);
+ if (CallSiteInfo == CallSitesInfoMap.end())
+ return;
+
+ for (ConstantInt *CalleeTypeId : CallSiteInfo->second.CalleeTypeIds) {
+ MCSymbol *S = MF->getContext().createTempSymbol();
+ OutStreamer->emitLabel(S);
+ uint64_t CalleeTypeIdVal = CalleeTypeId->getZExtValue();
+ FuncInfo.CallSiteLabels.emplace_back(CalleeTypeIdVal, S);
+ }
+}
+
/// EmitFunctionBody - This method emits the body and trailer for a
/// function.
void AsmPrinter::emitFunctionBody() {
@@ -1830,6 +1929,8 @@ void AsmPrinter::emitFunctionBody() {
MBBSectionRanges[MF->front().getSectionID()] =
MBBSectionRange{CurrentFnBegin, nullptr};
+ FunctionInfo FuncInfo;
+ const auto &CallSitesInfoMap = MF->getCallSitesInfo();
for (auto &MBB : *MF) {
// Print a label for the basic block.
emitBasicBlockStart(MBB);
@@ -1963,6 +2064,9 @@ void AsmPrinter::emitFunctionBody() {
break;
}
+ if (TM.Options.EmitCallGraphSection && MI.isCall())
+ emitIndirectCalleeLabels(FuncInfo, CallSitesInfoMap, MI);
+
// If there is a post-instruction symbol, emit a label for it here.
if (MCSymbol *S = MI.getPostInstrSymbol())
OutStreamer->emitLabel(S);
@@ -2142,6 +2246,9 @@ void AsmPrinter::emitFunctionBody() {
// Emit section containing stack size metadata.
emitStackSizeSection(*MF);
+ // Emit section containing call graph metadata.
+ emitCallGraphSection(*MF, FuncInfo);
+
// Emit .su file containing function stack size information.
emitStackUsage(*MF);
@@ -2841,6 +2948,7 @@ void AsmPrinter::SetupMachineFunction(MachineFunction &MF) {
F.hasFnAttribute("xray-instruction-threshold") ||
needFuncLabels(MF, *this) || NeedsLocalForSize ||
MF.getTarget().Options.EmitStackSizeSection ||
+ MF.getTarget().Options.EmitCallGraphSection ||
MF.getTarget().Options.BBAddrMap) {
CurrentFnBegin = createTempSymbol("func_begin");
if (NeedsLocalForSize)
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 416c56d..f16283b 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -2769,6 +2769,29 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT) {
return optimizeGatherScatterInst(II, II->getArgOperand(0));
case Intrinsic::masked_scatter:
return optimizeGatherScatterInst(II, II->getArgOperand(1));
+ case Intrinsic::masked_load:
+ // Treat v1X masked load as load X type.
+ if (auto *VT = dyn_cast<FixedVectorType>(II->getType())) {
+ if (VT->getNumElements() == 1) {
+ Value *PtrVal = II->getArgOperand(0);
+ unsigned AS = PtrVal->getType()->getPointerAddressSpace();
+ if (optimizeMemoryInst(II, PtrVal, VT->getElementType(), AS))
+ return true;
+ }
+ }
+ return false;
+ case Intrinsic::masked_store:
+ // Treat v1X masked store as store X type.
+ if (auto *VT =
+ dyn_cast<FixedVectorType>(II->getArgOperand(0)->getType())) {
+ if (VT->getNumElements() == 1) {
+ Value *PtrVal = II->getArgOperand(1);
+ unsigned AS = PtrVal->getType()->getPointerAddressSpace();
+ if (optimizeMemoryInst(II, PtrVal, VT->getElementType(), AS))
+ return true;
+ }
+ }
+ return false;
}
SmallVector<Value *, 2> PtrOps;
diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
index 9aa8deb..27df7e3 100644
--- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
@@ -211,7 +211,9 @@ MachineIRBuilder::buildPtrAdd(const DstOp &Res, const SrcOp &Op0,
MachineInstrBuilder MachineIRBuilder::buildObjectPtrOffset(const DstOp &Res,
const SrcOp &Op0,
const SrcOp &Op1) {
- return buildPtrAdd(Res, Op0, Op1, MachineInstr::MIFlag::NoUWrap);
+ return buildPtrAdd(Res, Op0, Op1,
+ MachineInstr::MIFlag::NoUWrap |
+ MachineInstr::MIFlag::InBounds);
}
std::optional<MachineInstrBuilder>
@@ -234,7 +236,8 @@ MachineIRBuilder::materializePtrAdd(Register &Res, Register Op0,
std::optional<MachineInstrBuilder> MachineIRBuilder::materializeObjectPtrOffset(
Register &Res, Register Op0, const LLT ValueTy, uint64_t Value) {
return materializePtrAdd(Res, Op0, ValueTy, Value,
- MachineInstr::MIFlag::NoUWrap);
+ MachineInstr::MIFlag::NoUWrap |
+ MachineInstr::MIFlag::InBounds);
}
MachineInstrBuilder MachineIRBuilder::buildMaskLowPtrBits(const DstOp &Res,
diff --git a/llvm/lib/CodeGen/MIRParser/MILexer.cpp b/llvm/lib/CodeGen/MIRParser/MILexer.cpp
index 193df1f..8b72c29 100644
--- a/llvm/lib/CodeGen/MIRParser/MILexer.cpp
+++ b/llvm/lib/CodeGen/MIRParser/MILexer.cpp
@@ -217,6 +217,7 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) {
.Case("nneg", MIToken::kw_nneg)
.Case("disjoint", MIToken::kw_disjoint)
.Case("samesign", MIToken::kw_samesign)
+ .Case("inbounds", MIToken::kw_inbounds)
.Case("nofpexcept", MIToken::kw_nofpexcept)
.Case("unpredictable", MIToken::kw_unpredictable)
.Case("debug-location", MIToken::kw_debug_location)
diff --git a/llvm/lib/CodeGen/MIRParser/MILexer.h b/llvm/lib/CodeGen/MIRParser/MILexer.h
index 54142ac..0627f17 100644
--- a/llvm/lib/CodeGen/MIRParser/MILexer.h
+++ b/llvm/lib/CodeGen/MIRParser/MILexer.h
@@ -78,6 +78,7 @@ struct MIToken {
kw_nneg,
kw_disjoint,
kw_samesign,
+ kw_inbounds,
kw_debug_location,
kw_debug_instr_number,
kw_dbg_instr_ref,
diff --git a/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/llvm/lib/CodeGen/MIRParser/MIParser.cpp
index 807d59c..6a464d9 100644
--- a/llvm/lib/CodeGen/MIRParser/MIParser.cpp
+++ b/llvm/lib/CodeGen/MIRParser/MIParser.cpp
@@ -1477,7 +1477,8 @@ bool MIParser::parseInstruction(unsigned &OpCode, unsigned &Flags) {
Token.is(MIToken::kw_nneg) ||
Token.is(MIToken::kw_disjoint) ||
Token.is(MIToken::kw_nusw) ||
- Token.is(MIToken::kw_samesign)) {
+ Token.is(MIToken::kw_samesign) ||
+ Token.is(MIToken::kw_inbounds)) {
// clang-format on
// Mine frame and fast math flags
if (Token.is(MIToken::kw_frame_setup))
@@ -1518,6 +1519,8 @@ bool MIParser::parseInstruction(unsigned &OpCode, unsigned &Flags) {
Flags |= MachineInstr::NoUSWrap;
if (Token.is(MIToken::kw_samesign))
Flags |= MachineInstr::SameSign;
+ if (Token.is(MIToken::kw_inbounds))
+ Flags |= MachineInstr::InBounds;
lex();
}
diff --git a/llvm/lib/CodeGen/MIRPrinter.cpp b/llvm/lib/CodeGen/MIRPrinter.cpp
index ad7835a..ce1834a 100644
--- a/llvm/lib/CodeGen/MIRPrinter.cpp
+++ b/llvm/lib/CodeGen/MIRPrinter.cpp
@@ -820,6 +820,8 @@ static void printMI(raw_ostream &OS, MFPrintState &State,
OS << "nusw ";
if (MI.getFlag(MachineInstr::SameSign))
OS << "samesign ";
+ if (MI.getFlag(MachineInstr::InBounds))
+ OS << "inbounds ";
// NOTE: Please add new MIFlags also to the MI_FLAGS_STR in
// llvm/utils/update_mir_test_checks.py.
diff --git a/llvm/lib/CodeGen/MachineFunction.cpp b/llvm/lib/CodeGen/MachineFunction.cpp
index 60d42e0..ec40f6a 100644
--- a/llvm/lib/CodeGen/MachineFunction.cpp
+++ b/llvm/lib/CodeGen/MachineFunction.cpp
@@ -698,6 +698,26 @@ bool MachineFunction::needsFrameMoves() const {
!F.getParent()->debug_compile_units().empty();
}
+MachineFunction::CallSiteInfo::CallSiteInfo(const CallBase &CB) {
+ // Numeric callee_type ids are only for indirect calls.
+ if (!CB.isIndirectCall())
+ return;
+
+ MDNode *CalleeTypeList = CB.getMetadata(LLVMContext::MD_callee_type);
+ if (!CalleeTypeList)
+ return;
+
+ for (const MDOperand &Op : CalleeTypeList->operands()) {
+ MDNode *TypeMD = cast<MDNode>(Op);
+ MDString *TypeIdStr = cast<MDString>(TypeMD->getOperand(1));
+ // Compute numeric type id from generalized type id string
+ uint64_t TypeIdVal = MD5Hash(TypeIdStr->getString());
+ IntegerType *Int64Ty = Type::getInt64Ty(CB.getContext());
+ CalleeTypeIds.push_back(
+ ConstantInt::get(Int64Ty, TypeIdVal, /*IsSigned=*/false));
+ }
+}
+
namespace llvm {
template<>
diff --git a/llvm/lib/CodeGen/MachineInstr.cpp b/llvm/lib/CodeGen/MachineInstr.cpp
index da3665b..79047f7 100644
--- a/llvm/lib/CodeGen/MachineInstr.cpp
+++ b/llvm/lib/CodeGen/MachineInstr.cpp
@@ -585,6 +585,8 @@ uint32_t MachineInstr::copyFlagsFromInstruction(const Instruction &I) {
MIFlags |= MachineInstr::MIFlag::NoUSWrap;
if (GEP->hasNoUnsignedWrap())
MIFlags |= MachineInstr::MIFlag::NoUWrap;
+ if (GEP->isInBounds())
+ MIFlags |= MachineInstr::MIFlag::InBounds;
}
// Copy the nonneg flag.
@@ -1860,8 +1862,12 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
OS << "nneg ";
if (getFlag(MachineInstr::Disjoint))
OS << "disjoint ";
+ if (getFlag(MachineInstr::NoUSWrap))
+ OS << "nusw ";
if (getFlag(MachineInstr::SameSign))
OS << "samesign ";
+ if (getFlag(MachineInstr::InBounds))
+ OS << "inbounds ";
// Print the opcode name.
if (TII)
diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp
index 9d5c39c..c6fa8f4 100644
--- a/llvm/lib/CodeGen/MachineScheduler.cpp
+++ b/llvm/lib/CodeGen/MachineScheduler.cpp
@@ -3676,8 +3676,8 @@ void GenericScheduler::initialize(ScheduleDAGMI *dag) {
TopCand.SU = nullptr;
BotCand.SU = nullptr;
- TopCluster = nullptr;
- BotCluster = nullptr;
+ TopClusterID = InvalidClusterId;
+ BotClusterID = InvalidClusterId;
}
/// Initialize the per-region scheduling policy.
@@ -3988,10 +3988,14 @@ bool GenericScheduler::tryCandidate(SchedCandidate &Cand,
// This is a best effort to set things up for a post-RA pass. Optimizations
// like generating loads of multiple registers should ideally be done within
// the scheduler pass by combining the loads during DAG postprocessing.
- const ClusterInfo *CandCluster = Cand.AtTop ? TopCluster : BotCluster;
- const ClusterInfo *TryCandCluster = TryCand.AtTop ? TopCluster : BotCluster;
- if (tryGreater(TryCandCluster && TryCandCluster->contains(TryCand.SU),
- CandCluster && CandCluster->contains(Cand.SU), TryCand, Cand,
+ unsigned CandZoneCluster = Cand.AtTop ? TopClusterID : BotClusterID;
+ unsigned TryCandZoneCluster = TryCand.AtTop ? TopClusterID : BotClusterID;
+ bool CandIsClusterSucc =
+ isTheSameCluster(CandZoneCluster, Cand.SU->ParentClusterIdx);
+ bool TryCandIsClusterSucc =
+ isTheSameCluster(TryCandZoneCluster, TryCand.SU->ParentClusterIdx);
+
+ if (tryGreater(TryCandIsClusterSucc, CandIsClusterSucc, TryCand, Cand,
Cluster))
return TryCand.Reason != NoCand;
@@ -4251,24 +4255,30 @@ void GenericScheduler::reschedulePhysReg(SUnit *SU, bool isTop) {
void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
if (IsTopNode) {
SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
- TopCluster = DAG->getCluster(SU->ParentClusterIdx);
- LLVM_DEBUG(if (TopCluster) {
- dbgs() << " Top Cluster: ";
- for (auto *N : *TopCluster)
- dbgs() << N->NodeNum << '\t';
- dbgs() << '\n';
+ TopClusterID = SU->ParentClusterIdx;
+ LLVM_DEBUG({
+ if (TopClusterID != InvalidClusterId) {
+ ClusterInfo *TopCluster = DAG->getCluster(TopClusterID);
+ dbgs() << " Top Cluster: ";
+ for (auto *N : *TopCluster)
+ dbgs() << N->NodeNum << '\t';
+ dbgs() << '\n';
+ }
});
Top.bumpNode(SU);
if (SU->hasPhysRegUses)
reschedulePhysReg(SU, true);
} else {
SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle());
- BotCluster = DAG->getCluster(SU->ParentClusterIdx);
- LLVM_DEBUG(if (BotCluster) {
- dbgs() << " Bot Cluster: ";
- for (auto *N : *BotCluster)
- dbgs() << N->NodeNum << '\t';
- dbgs() << '\n';
+ BotClusterID = SU->ParentClusterIdx;
+ LLVM_DEBUG({
+ if (BotClusterID != InvalidClusterId) {
+ ClusterInfo *BotCluster = DAG->getCluster(BotClusterID);
+ dbgs() << " Bot Cluster: ";
+ for (auto *N : *BotCluster)
+ dbgs() << N->NodeNum << '\t';
+ dbgs() << '\n';
+ }
});
Bot.bumpNode(SU);
if (SU->hasPhysRegDefs)
@@ -4306,8 +4316,8 @@ void PostGenericScheduler::initialize(ScheduleDAGMI *Dag) {
if (!Bot.HazardRec) {
Bot.HazardRec = DAG->TII->CreateTargetMIHazardRecognizer(Itin, DAG);
}
- TopCluster = nullptr;
- BotCluster = nullptr;
+ TopClusterID = InvalidClusterId;
+ BotClusterID = InvalidClusterId;
}
void PostGenericScheduler::initPolicy(MachineBasicBlock::iterator Begin,
@@ -4373,10 +4383,14 @@ bool PostGenericScheduler::tryCandidate(SchedCandidate &Cand,
return TryCand.Reason != NoCand;
// Keep clustered nodes together.
- const ClusterInfo *CandCluster = Cand.AtTop ? TopCluster : BotCluster;
- const ClusterInfo *TryCandCluster = TryCand.AtTop ? TopCluster : BotCluster;
- if (tryGreater(TryCandCluster && TryCandCluster->contains(TryCand.SU),
- CandCluster && CandCluster->contains(Cand.SU), TryCand, Cand,
+ unsigned CandZoneCluster = Cand.AtTop ? TopClusterID : BotClusterID;
+ unsigned TryCandZoneCluster = TryCand.AtTop ? TopClusterID : BotClusterID;
+ bool CandIsClusterSucc =
+ isTheSameCluster(CandZoneCluster, Cand.SU->ParentClusterIdx);
+ bool TryCandIsClusterSucc =
+ isTheSameCluster(TryCandZoneCluster, TryCand.SU->ParentClusterIdx);
+
+ if (tryGreater(TryCandIsClusterSucc, CandIsClusterSucc, TryCand, Cand,
Cluster))
return TryCand.Reason != NoCand;
// Avoid critical resource consumption and balance the schedule.
@@ -4575,11 +4589,11 @@ SUnit *PostGenericScheduler::pickNode(bool &IsTopNode) {
void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
if (IsTopNode) {
SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
- TopCluster = DAG->getCluster(SU->ParentClusterIdx);
+ TopClusterID = SU->ParentClusterIdx;
Top.bumpNode(SU);
} else {
SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle());
- BotCluster = DAG->getCluster(SU->ParentClusterIdx);
+ BotClusterID = SU->ParentClusterIdx;
Bot.bumpNode(SU);
}
}
diff --git a/llvm/lib/CodeGen/ModuloSchedule.cpp b/llvm/lib/CodeGen/ModuloSchedule.cpp
index 0f742c4..21bf052 100644
--- a/llvm/lib/CodeGen/ModuloSchedule.cpp
+++ b/llvm/lib/CodeGen/ModuloSchedule.cpp
@@ -423,7 +423,7 @@ void ModuloScheduleExpander::generateExistingPhis(
// potentially define two values.
unsigned MaxPhis = PrologStage + 2;
if (!InKernel && (int)PrologStage <= LoopValStage)
- MaxPhis = std::max((int)MaxPhis - (int)LoopValStage, 1);
+ MaxPhis = std::max((int)MaxPhis - LoopValStage, 1);
unsigned NumPhis = std::min(NumStages, MaxPhis);
Register NewReg;
diff --git a/llvm/lib/CodeGen/RegAllocBase.cpp b/llvm/lib/CodeGen/RegAllocBase.cpp
index 69b9291..2400a1f 100644
--- a/llvm/lib/CodeGen/RegAllocBase.cpp
+++ b/llvm/lib/CodeGen/RegAllocBase.cpp
@@ -178,10 +178,8 @@ void RegAllocBase::cleanupFailedVReg(Register FailedReg, MCRegister PhysReg,
for (MCRegAliasIterator Aliases(PhysReg, TRI, true); Aliases.isValid();
++Aliases) {
for (MachineOperand &MO : MRI->reg_operands(*Aliases)) {
- if (MO.readsReg()) {
+ if (MO.readsReg())
MO.setIsUndef(true);
- LIS->removeAllRegUnitsForPhysReg(MO.getReg());
- }
}
}
}
diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp
index 2d7987a..7ede564 100644
--- a/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -306,7 +306,12 @@ class RegisterCoalescer : private LiveRangeEdit::Delegate {
/// number if it is not zero. If DstReg is a physical register and the
/// existing subregister number of the def / use being updated is not zero,
/// make sure to set it to the correct physical subregister.
- void updateRegDefsUses(Register SrcReg, Register DstReg, unsigned SubIdx);
+ ///
+ /// If \p SubregToRegSrcInst is not empty, we are coalescing a
+ /// `DstReg = SUBREG_TO_REG SrcReg`, which should introduce an
+ /// implicit-def of DstReg on instructions that define SrcReg.
+ void updateRegDefsUses(Register SrcReg, Register DstReg, unsigned SubIdx,
+ ArrayRef<MachineInstr *> SubregToRegSrcInst = {});
/// If the given machine operand reads only undefined lanes add an undef
/// flag.
@@ -1443,6 +1448,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
// CopyMI may have implicit operands, save them so that we can transfer them
// over to the newly materialized instruction after CopyMI is removed.
+ LaneBitmask NewMIImplicitOpsMask;
SmallVector<MachineOperand, 4> ImplicitOps;
ImplicitOps.reserve(CopyMI->getNumOperands() -
CopyMI->getDesc().getNumOperands());
@@ -1457,6 +1463,9 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
(MO.getSubReg() == 0 && MO.getReg() == DstOperand.getReg())) &&
"unexpected implicit virtual register def");
ImplicitOps.push_back(MO);
+ if (MO.isDef() && MO.getReg().isVirtual() &&
+ MRI->shouldTrackSubRegLiveness(DstReg))
+ NewMIImplicitOpsMask |= MRI->getMaxLaneMaskForVReg(MO.getReg());
}
}
@@ -1499,14 +1508,11 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
} else {
assert(MO.getReg() == NewMI.getOperand(0).getReg());
- // We're only expecting another def of the main output, so the range
- // should get updated with the regular output range.
- //
- // FIXME: The range updating below probably needs updating to look at
- // the super register if subranges are tracked.
- assert(!MRI->shouldTrackSubRegLiveness(DstReg) &&
- "subrange update for implicit-def of super register may not be "
- "properly handled");
+ // If lanemasks need to be tracked, compile the lanemask of the NewMI
+ // implicit def operands to avoid subranges for the super-regs from
+ // being removed by code later on in this function.
+ if (MRI->shouldTrackSubRegLiveness(MO.getReg()))
+ NewMIImplicitOpsMask |= MRI->getMaxLaneMaskForVReg(MO.getReg());
}
}
}
@@ -1606,7 +1612,8 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
CurrIdx.getRegSlot(NewMI.getOperand(0).isEarlyClobber());
VNInfo::Allocator &Alloc = LIS->getVNInfoAllocator();
for (LiveInterval::SubRange &SR : DstInt.subranges()) {
- if ((SR.LaneMask & DstMask).none()) {
+ if ((SR.LaneMask & DstMask).none() &&
+ (SR.LaneMask & NewMIImplicitOpsMask).none()) {
LLVM_DEBUG(dbgs()
<< "Removing undefined SubRange "
<< PrintLaneMask(SR.LaneMask) << " : " << SR << "\n");
@@ -1870,11 +1877,14 @@ void RegisterCoalescer::addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx,
}
}
-void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
- unsigned SubIdx) {
+void RegisterCoalescer::updateRegDefsUses(
+ Register SrcReg, Register DstReg, unsigned SubIdx,
+ ArrayRef<MachineInstr *> SubregToRegSrcInsts) {
bool DstIsPhys = DstReg.isPhysical();
LiveInterval *DstInt = DstIsPhys ? nullptr : &LIS->getInterval(DstReg);
+ // Coalescing a COPY may expose reads of 'undef' subregisters.
+ // If so, then explicitly propagate 'undef' to those operands.
if (DstInt && DstInt->hasSubRanges() && DstReg != SrcReg) {
for (MachineOperand &MO : MRI->reg_operands(DstReg)) {
if (MO.isUndef())
@@ -1891,6 +1901,15 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
}
}
+ // If DstInt already has a subrange for the unused lanes, then we shouldn't
+ // create duplicate subranges when we update the interval for unused lanes.
+ LaneBitmask DstIntLaneMask;
+ if (DstInt && MRI->shouldTrackSubRegLiveness(DstReg)) {
+ for (LiveInterval::SubRange &SR : DstInt->subranges())
+ DstIntLaneMask |= SR.LaneMask;
+ }
+
+ // Go through all instructions to replace uses of 'SrcReg' by 'DstReg'.
SmallPtrSet<MachineInstr *, 8> Visited;
for (MachineRegisterInfo::reg_instr_iterator I = MRI->reg_instr_begin(SrcReg),
E = MRI->reg_instr_end();
@@ -1914,6 +1933,80 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
if (DstInt && !Reads && SubIdx && !UseMI->isDebugInstr())
Reads = DstInt->liveAt(LIS->getInstructionIndex(*UseMI));
+ bool RequiresImplicitRedef = false;
+ if (!SubregToRegSrcInsts.empty()) {
+ // We can only add an implicit-def and undef if the sub registers match,
+ // e.g.
+ // %0:gr32 = INSTX
+ // %0.sub8:gr32 = INSTY // top 24 bits of %0 still defined
+ // %1:gr64 = SUBREG_TO_REG 0, %0, %subreg.sub32
+ //
+ // This cannot be transformed into:
+ // %1.sub32:gr64 = INSTX
+ // undef %1.sub8:gr64 = INSTY , implicit-def %1
+ //
+ // Because that would thrash the top 24 bits of %1.sub32.
+ if (is_contained(SubregToRegSrcInsts, UseMI) &&
+ all_of(UseMI->defs(),
+ [&SubIdx, &SrcReg](const MachineOperand &MO) -> bool {
+ if (MO.getReg() != SrcReg || !MO.getSubReg() || MO.isUndef())
+ return true;
+ return SubIdx == MO.getSubReg();
+ })) {
+ // Add implicit-def of super-register to express that the whole
+ // register is defined by the instruction.
+ MachineInstrBuilder MIB(*MF, UseMI);
+ MIB.addReg(DstReg, RegState::ImplicitDefine);
+ RequiresImplicitRedef = true;
+ }
+
+ // If the coalesed instruction doesn't fully define the register, we need
+ // to preserve the original super register liveness for SUBREG_TO_REG.
+ //
+ // We pretended SUBREG_TO_REG was a regular copy for coalescing purposes,
+ // but it introduces liveness for other subregisters. Downstream users may
+ // have been relying on those bits, so we need to ensure their liveness is
+ // captured with a def of other lanes.
+ if (DstInt && MRI->shouldTrackSubRegLiveness(DstReg)) {
+ // First check if there is sufficient granularity in terms of subranges.
+ LaneBitmask DstMask = MRI->getMaxLaneMaskForVReg(DstInt->reg());
+ LaneBitmask UsedLanes = TRI->getSubRegIndexLaneMask(SubIdx);
+ LaneBitmask UnusedLanes = DstMask & ~UsedLanes;
+ if ((UnusedLanes & ~DstIntLaneMask).any()) {
+ BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
+ DstInt->createSubRangeFrom(Allocator, UnusedLanes, *DstInt);
+ DstIntLaneMask |= UnusedLanes;
+ }
+
+ // After duplicating the live ranges for the low/hi bits, we
+ // need to update the subranges of the DstReg interval such that
+ // for a case like this:
+ //
+ // entry:
+ // 16B %1:gpr32 = INSTRUCTION (<=> UseMI)
+ // :
+ // if.then:
+ // 32B %1:gpr32 = MOVIMM32 ..
+ // 48B %0:gpr64 = SUBREG_TO_REG 0, %1, sub32
+ //
+ // Only the MOVIMM32 require a def of the top lanes and any intervals
+ // for the top 32-bits of the def at 16B should be removed.
+ for (LiveInterval::SubRange &SR : DstInt->subranges()) {
+ if (!Writes || RequiresImplicitRedef ||
+ (SR.LaneMask & UnusedLanes).none())
+ continue;
+
+ assert((SR.LaneMask & UnusedLanes) == SR.LaneMask &&
+ "Unexpected lanemask. Subrange needs finer granularity");
+
+ SlotIndex UseIdx = LIS->getInstructionIndex(*UseMI).getRegSlot(false);
+ auto SegmentI = SR.find(UseIdx);
+ if (SegmentI != SR.end())
+ SR.removeSegment(SegmentI, true);
+ }
+ }
+ }
+
// Replace SrcReg with DstReg in all UseMI operands.
for (unsigned Op : Ops) {
MachineOperand &MO = UseMI->getOperand(Op);
@@ -1922,7 +2015,7 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
// turn a full def into a read-modify-write sub-register def and vice
// versa.
if (SubIdx && MO.isDef())
- MO.setIsUndef(!Reads);
+ MO.setIsUndef(!Reads || RequiresImplicitRedef);
// A subreg use of a partially undef (super) register may be a complete
// undef use now and then has to be marked that way.
@@ -2025,6 +2118,30 @@ void RegisterCoalescer::setUndefOnPrunedSubRegUses(LiveInterval &LI,
LIS->shrinkToUses(&LI);
}
+/// For a given use of value \p Idx, it returns the def in the current block,
+/// or otherwise all possible defs in preceding blocks.
+static bool FindDefInBlock(SmallPtrSetImpl<MachineBasicBlock *> &VisitedBlocks,
+ SmallVector<MachineInstr *> &Instrs,
+ LiveIntervals *LIS, LiveInterval &SrcInt,
+ MachineBasicBlock *MBB, VNInfo *Idx) {
+ if (!Idx->isPHIDef()) {
+ MachineInstr *Def = LIS->getInstructionFromIndex(Idx->def);
+ assert(Def && "Unable to find a def for SUBREG_TO_REG source operand");
+ Instrs.push_back(Def);
+ return true;
+ }
+
+ bool Any = false;
+ if (VisitedBlocks.count(MBB))
+ return false;
+ VisitedBlocks.insert(MBB);
+ for (MachineBasicBlock *Pred : MBB->predecessors()) {
+ Any |= FindDefInBlock(VisitedBlocks, Instrs, LIS, SrcInt, Pred,
+ SrcInt.getVNInfoBefore(LIS->getMBBEndIdx(Pred)));
+ }
+ return Any;
+}
+
bool RegisterCoalescer::joinCopy(
MachineInstr *CopyMI, bool &Again,
SmallPtrSetImpl<MachineInstr *> &CurrentErasedInstrs) {
@@ -2156,6 +2273,35 @@ bool RegisterCoalescer::joinCopy(
});
}
+ SmallVector<MachineInstr *> SubregToRegSrcInsts;
+ if (CopyMI->isSubregToReg()) {
+ // For the case where the copy instruction is a SUBREG_TO_REG, e.g.
+ //
+ // %0:gpr32 = movimm32 ..
+ // %1:gpr64 = SUBREG_TO_REG 0, %0, sub32
+ // ...
+ // %0:gpr32 = COPY <something>
+ //
+ // After joining liveranges, the original `movimm32` will need an
+ // implicit-def to make it explicit that the entire register is written,
+ // i.e.
+ //
+ // undef %0.sub32:gpr64 = movimm32 ..., implicit-def %0
+ // ...
+ // undef %0.sub32:gpr64 = COPY <something> // Note that this does not
+ // // require an implicit-def,
+ // // because it has nothing to
+ // // do with the SUBREG_TO_REG.
+ LiveInterval &SrcInt =
+ LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg());
+ SlotIndex SubregToRegSlotIdx = LIS->getInstructionIndex(*CopyMI);
+ SmallPtrSet<MachineBasicBlock *, 8> VisitedBlocks;
+ if (!FindDefInBlock(VisitedBlocks, SubregToRegSrcInsts, LIS, SrcInt,
+ CopyMI->getParent(),
+ SrcInt.Query(SubregToRegSlotIdx).valueIn()))
+ llvm_unreachable("SUBREG_TO_REG src requires a def");
+ }
+
ShrinkMask = LaneBitmask::getNone();
ShrinkMainRange = false;
@@ -2225,9 +2371,12 @@ bool RegisterCoalescer::joinCopy(
// Rewrite all SrcReg operands to DstReg.
// Also update DstReg operands to include DstIdx if it is set.
- if (CP.getDstIdx())
+ if (CP.getDstIdx()) {
+ assert(SubregToRegSrcInsts.empty() && "can this happen?");
updateRegDefsUses(CP.getDstReg(), CP.getDstReg(), CP.getDstIdx());
- updateRegDefsUses(CP.getSrcReg(), CP.getDstReg(), CP.getSrcIdx());
+ }
+ updateRegDefsUses(CP.getSrcReg(), CP.getDstReg(), CP.getSrcIdx(),
+ SubregToRegSrcInsts);
// Shrink subregister ranges if necessary.
if (ShrinkMask.any()) {
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 20b96f5..5989c1d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -331,6 +331,11 @@ namespace {
return CombineTo(N, To, 2, AddTo);
}
+ SDValue CombineTo(SDNode *N, SmallVectorImpl<SDValue> *To,
+ bool AddTo = true) {
+ return CombineTo(N, To->data(), To->size(), AddTo);
+ }
+
void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
private:
@@ -541,6 +546,7 @@ namespace {
SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
SDValue visitBUILD_VECTOR(SDNode *N);
SDValue visitCONCAT_VECTORS(SDNode *N);
+ SDValue visitVECTOR_INTERLEAVE(SDNode *N);
SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
SDValue visitVECTOR_SHUFFLE(SDNode *N);
SDValue visitSCALAR_TO_VECTOR(SDNode *N);
@@ -2021,6 +2027,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N);
case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
+ case ISD::VECTOR_INTERLEAVE: return visitVECTOR_INTERLEAVE(N);
case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N);
case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N);
@@ -4100,18 +4107,17 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
// (sub x, ([v]select (uge x, y), y, 0)) -> (umin x, (sub x, y))
if (N1.hasOneUse() && hasUMin(VT)) {
SDValue Y;
- if (sd_match(N1, m_Select(m_SetCC(m_Specific(N0), m_Value(Y),
- m_SpecificCondCode(ISD::SETULT)),
- m_Zero(), m_Deferred(Y))) ||
- sd_match(N1, m_Select(m_SetCC(m_Specific(N0), m_Value(Y),
- m_SpecificCondCode(ISD::SETUGE)),
- m_Deferred(Y), m_Zero())) ||
- sd_match(N1, m_VSelect(m_SetCC(m_Specific(N0), m_Value(Y),
- m_SpecificCondCode(ISD::SETULT)),
- m_Zero(), m_Deferred(Y))) ||
- sd_match(N1, m_VSelect(m_SetCC(m_Specific(N0), m_Value(Y),
- m_SpecificCondCode(ISD::SETUGE)),
- m_Deferred(Y), m_Zero())))
+ auto MS0 = m_Specific(N0);
+ auto MVY = m_Value(Y);
+ auto MZ = m_Zero();
+ auto MCC1 = m_SpecificCondCode(ISD::SETULT);
+ auto MCC2 = m_SpecificCondCode(ISD::SETUGE);
+
+ if (sd_match(N1, m_SelectCCLike(MS0, MVY, MZ, m_Deferred(Y), MCC1)) ||
+ sd_match(N1, m_SelectCCLike(MS0, MVY, m_Deferred(Y), MZ, MCC2)) ||
+ sd_match(N1, m_VSelect(m_SetCC(MS0, MVY, MCC1), MZ, m_Deferred(Y))) ||
+ sd_match(N1, m_VSelect(m_SetCC(MS0, MVY, MCC2), m_Deferred(Y), MZ)))
+
return DAG.getNode(ISD::UMIN, DL, VT, N0,
DAG.getNode(ISD::SUB, DL, VT, N0, Y));
}
@@ -10616,6 +10622,19 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
return DAG.getVScale(DL, VT, C0 << C1);
}
+ SDValue X;
+ APInt VS0;
+
+ // fold (shl (X * vscale(VS0)), C1) -> (X * vscale(VS0 << C1))
+ if (N1C && sd_match(N0, m_Mul(m_Value(X), m_VScale(m_ConstInt(VS0))))) {
+ SDNodeFlags Flags;
+ Flags.setNoUnsignedWrap(N->getFlags().hasNoUnsignedWrap() &&
+ N0->getFlags().hasNoUnsignedWrap());
+
+ SDValue VScale = DAG.getVScale(DL, VT, VS0 << N1C->getAPIntValue());
+ return DAG.getNode(ISD::MUL, DL, VT, X, VScale, Flags);
+ }
+
// Fold (shl step_vector(C0), C1) to (step_vector(C0 << C1)).
APInt ShlVal;
if (N0.getOpcode() == ISD::STEP_VECTOR &&
@@ -25282,6 +25301,28 @@ static SDValue combineConcatVectorOfShuffleAndItsOperands(
return DAG.getVectorShuffle(VT, dl, ShufOps[0], ShufOps[1], Mask);
}
+static SDValue combineConcatVectorOfSplats(SDNode *N, SelectionDAG &DAG,
+ const TargetLowering &TLI,
+ bool LegalTypes,
+ bool LegalOperations) {
+ EVT VT = N->getValueType(0);
+
+ // Post-legalization we can only create wider SPLAT_VECTOR operations if both
+ // the type and operation is legal. The Hexagon target has custom
+ // legalization for SPLAT_VECTOR that splits the operation into two parts and
+ // concatenates them. Therefore, custom lowering must also be rejected in
+ // order to avoid an infinite loop.
+ if ((LegalTypes && !TLI.isTypeLegal(VT)) ||
+ (LegalOperations && !TLI.isOperationLegal(ISD::SPLAT_VECTOR, VT)))
+ return SDValue();
+
+ SDValue Op0 = N->getOperand(0);
+ if (!llvm::all_equal(N->op_values()) || Op0.getOpcode() != ISD::SPLAT_VECTOR)
+ return SDValue();
+
+ return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, Op0.getOperand(0));
+}
+
SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
// If we only have one input vector, we don't need to do any concatenation.
if (N->getNumOperands() == 1)
@@ -25405,6 +25446,10 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
return DAG.getBuildVector(VT, SDLoc(N), Opnds);
}
+ if (SDValue V =
+ combineConcatVectorOfSplats(N, DAG, TLI, LegalTypes, LegalOperations))
+ return V;
+
// Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
// FIXME: Add support for concat_vectors(bitcast(vec0),bitcast(vec1),...).
if (SDValue V = combineConcatVectorOfScalars(N, DAG))
@@ -25473,6 +25518,21 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitVECTOR_INTERLEAVE(SDNode *N) {
+ // Check to see if all operands are identical.
+ if (!llvm::all_equal(N->op_values()))
+ return SDValue();
+
+ // Check to see if the identical operand is a splat.
+ if (!DAG.isSplatValue(N->getOperand(0)))
+ return SDValue();
+
+ // interleave splat(X), splat(X).... --> splat(X), splat(X)....
+ SmallVector<SDValue, 4> Ops;
+ Ops.append(N->op_values().begin(), N->op_values().end());
+ return CombineTo(N, &Ops);
+}
+
// Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find
// if the subvector can be sourced for free.
static SDValue getSubVectorSrc(SDValue V, unsigned Index, EVT SubVT) {
@@ -28982,13 +29042,100 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
return SDValue();
}
+static SDValue matchMergedBFX(SDValue Root, SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ // Match a pattern such as:
+ // (X | (X >> C0) | (X >> C1) | ...) & Mask
+ // This extracts contiguous parts of X and ORs them together before comparing.
+ // We can optimize this so that we directly check (X & SomeMask) instead,
+ // eliminating the shifts.
+
+ EVT VT = Root.getValueType();
+
+ // TODO: Support vectors?
+ if (!VT.isScalarInteger() || Root.getOpcode() != ISD::AND)
+ return SDValue();
+
+ SDValue N0 = Root.getOperand(0);
+ SDValue N1 = Root.getOperand(1);
+
+ if (N0.getOpcode() != ISD::OR || !isa<ConstantSDNode>(N1))
+ return SDValue();
+
+ APInt RootMask = cast<ConstantSDNode>(N1)->getAsAPIntVal();
+
+ SDValue Src;
+ const auto IsSrc = [&](SDValue V) {
+ if (!Src) {
+ Src = V;
+ return true;
+ }
+
+ return Src == V;
+ };
+
+ SmallVector<SDValue> Worklist = {N0};
+ APInt PartsMask(VT.getSizeInBits(), 0);
+ while (!Worklist.empty()) {
+ SDValue V = Worklist.pop_back_val();
+ if (!V.hasOneUse() && (Src && Src != V))
+ return SDValue();
+
+ if (V.getOpcode() == ISD::OR) {
+ Worklist.push_back(V.getOperand(0));
+ Worklist.push_back(V.getOperand(1));
+ continue;
+ }
+
+ if (V.getOpcode() == ISD::SRL) {
+ SDValue ShiftSrc = V.getOperand(0);
+ SDValue ShiftAmt = V.getOperand(1);
+
+ if (!IsSrc(ShiftSrc) || !isa<ConstantSDNode>(ShiftAmt))
+ return SDValue();
+
+ auto ShiftAmtVal = cast<ConstantSDNode>(ShiftAmt)->getAsZExtVal();
+ if (ShiftAmtVal > RootMask.getBitWidth())
+ return SDValue();
+
+ PartsMask |= (RootMask << ShiftAmtVal);
+ continue;
+ }
+
+ if (IsSrc(V)) {
+ PartsMask |= RootMask;
+ continue;
+ }
+
+ return SDValue();
+ }
+
+ if (!Src)
+ return SDValue();
+
+ SDLoc DL(Root);
+ return DAG.getNode(ISD::AND, DL, VT,
+ {Src, DAG.getConstant(PartsMask, DL, VT)});
+}
+
/// This is a stub for TargetLowering::SimplifySetCC.
SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
ISD::CondCode Cond, const SDLoc &DL,
bool foldBooleans) {
TargetLowering::DAGCombinerInfo
DagCombineInfo(DAG, Level, false, this);
- return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
+ if (SDValue C =
+ TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL))
+ return C;
+
+ if (ISD::isIntEqualitySetCC(Cond) && N0.getOpcode() == ISD::AND &&
+ isNullConstant(N1)) {
+
+ if (SDValue Res = matchMergedBFX(N0, DAG, TLI))
+ return DAG.getSetCC(DL, VT, Res, N1, Cond);
+ }
+
+ return SDValue();
}
/// Given an ISD::SDIV node expressing a divide by constant, return
diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index 6a2e782..31e7855 100644
--- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -888,7 +888,8 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
}
if (MI->isCandidateForAdditionalCallInfo()) {
- if (DAG->getTarget().Options.EmitCallSiteInfo)
+ if (DAG->getTarget().Options.EmitCallSiteInfo ||
+ DAG->getTarget().Options.EmitCallGraphSection)
MF.addCallSiteInfo(MI, DAG->getCallSiteInfo(Node));
if (auto CalledGlobal = DAG->getCalledGlobal(Node))
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 5c586f7..f41b6eb 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -7843,20 +7843,43 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
}
}
- // Perform trivial constant folding.
- if (SDValue SV = FoldConstantArithmetic(Opcode, DL, VT, {N1, N2}, Flags))
- return SV;
+ if (N1.getOpcode() == ISD::POISON || N2.getOpcode() == ISD::POISON) {
+ switch (Opcode) {
+ case ISD::XOR:
+ case ISD::ADD:
+ case ISD::PTRADD:
+ case ISD::SUB:
+ case ISD::SIGN_EXTEND_INREG:
+ case ISD::UDIV:
+ case ISD::SDIV:
+ case ISD::UREM:
+ case ISD::SREM:
+ case ISD::MUL:
+ case ISD::AND:
+ case ISD::SSUBSAT:
+ case ISD::USUBSAT:
+ case ISD::UMIN:
+ case ISD::OR:
+ case ISD::SADDSAT:
+ case ISD::UADDSAT:
+ case ISD::UMAX:
+ case ISD::SMAX:
+ case ISD::SMIN:
+ // fold op(arg1, poison) -> poison, fold op(poison, arg2) -> poison.
+ return N2.getOpcode() == ISD::POISON ? N2 : N1;
+ }
+ }
// Canonicalize an UNDEF to the RHS, even over a constant.
- if (N1.isUndef()) {
+ if (N1.getOpcode() == ISD::UNDEF && N2.getOpcode() != ISD::UNDEF) {
if (TLI->isCommutativeBinOp(Opcode)) {
std::swap(N1, N2);
} else {
switch (Opcode) {
case ISD::PTRADD:
case ISD::SUB:
- // fold op(undef, arg2) -> undef, fold op(poison, arg2) ->poison.
- return N1.getOpcode() == ISD::POISON ? getPOISON(VT) : getUNDEF(VT);
+ // fold op(undef, non_undef_arg2) -> undef.
+ return N1;
case ISD::SIGN_EXTEND_INREG:
case ISD::UDIV:
case ISD::SDIV:
@@ -7864,18 +7887,17 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
case ISD::SREM:
case ISD::SSUBSAT:
case ISD::USUBSAT:
- // fold op(undef, arg2) -> 0, fold op(poison, arg2) -> poison.
- return N1.getOpcode() == ISD::POISON ? getPOISON(VT)
- : getConstant(0, DL, VT);
+ // fold op(undef, non_undef_arg2) -> 0.
+ return getConstant(0, DL, VT);
}
}
}
// Fold a bunch of operators when the RHS is undef.
- if (N2.isUndef()) {
+ if (N2.getOpcode() == ISD::UNDEF) {
switch (Opcode) {
case ISD::XOR:
- if (N1.isUndef())
+ if (N1.getOpcode() == ISD::UNDEF)
// Handle undef ^ undef -> 0 special case. This is a common
// idiom (misuse).
return getConstant(0, DL, VT);
@@ -7883,29 +7905,48 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
case ISD::ADD:
case ISD::PTRADD:
case ISD::SUB:
+ // fold op(arg1, undef) -> undef.
+ return N2;
case ISD::UDIV:
case ISD::SDIV:
case ISD::UREM:
case ISD::SREM:
- // fold op(arg1, undef) -> undef, fold op(arg1, poison) -> poison.
- return N2.getOpcode() == ISD::POISON ? getPOISON(VT) : getUNDEF(VT);
+ // fold op(arg1, undef) -> poison.
+ return getPOISON(VT);
case ISD::MUL:
case ISD::AND:
case ISD::SSUBSAT:
case ISD::USUBSAT:
- // fold op(arg1, undef) -> 0, fold op(arg1, poison) -> poison.
- return N2.getOpcode() == ISD::POISON ? getPOISON(VT)
- : getConstant(0, DL, VT);
+ case ISD::UMIN:
+ // fold op(undef, undef) -> undef, fold op(arg1, undef) -> 0.
+ return N1.getOpcode() == ISD::UNDEF ? N2 : getConstant(0, DL, VT);
case ISD::OR:
case ISD::SADDSAT:
case ISD::UADDSAT:
- // fold op(arg1, undef) -> an all-ones constant, fold op(arg1, poison) ->
- // poison.
- return N2.getOpcode() == ISD::POISON ? getPOISON(VT)
- : getAllOnesConstant(DL, VT);
+ case ISD::UMAX:
+ // fold op(undef, undef) -> undef, fold op(arg1, undef) -> -1.
+ return N1.getOpcode() == ISD::UNDEF ? N2 : getAllOnesConstant(DL, VT);
+ case ISD::SMAX:
+ // fold op(undef, undef) -> undef, fold op(arg1, undef) -> MAX_INT.
+ return N1.getOpcode() == ISD::UNDEF
+ ? N2
+ : getConstant(
+ APInt::getSignedMaxValue(VT.getScalarSizeInBits()), DL,
+ VT);
+ case ISD::SMIN:
+ // fold op(undef, undef) -> undef, fold op(arg1, undef) -> MIN_INT.
+ return N1.getOpcode() == ISD::UNDEF
+ ? N2
+ : getConstant(
+ APInt::getSignedMinValue(VT.getScalarSizeInBits()), DL,
+ VT);
}
}
+ // Perform trivial constant folding.
+ if (SDValue SV = FoldConstantArithmetic(Opcode, DL, VT, {N1, N2}, Flags))
+ return SV;
+
// Memoize this node if possible.
SDNode *N;
SDVTList VTs = getVTList(VT);
@@ -12741,7 +12782,7 @@ bool SDNode::areOnlyUsersOf(ArrayRef<const SDNode *> Nodes, const SDNode *N) {
return Seen;
}
-/// isOperand - Return true if this node is an operand of N.
+/// Return true if the referenced return value is an operand of N.
bool SDValue::isOperandOf(const SDNode *N) const {
return is_contained(N->op_values(), *this);
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 1764910..48d6b99 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -9471,7 +9471,7 @@ SDValue TargetLowering::CTTZTableLookup(SDNode *Node, SelectionDAG &DAG,
ISD::SRL, DL, VT,
DAG.getNode(ISD::MUL, DL, VT, DAG.getNode(ISD::AND, DL, VT, Op, Neg),
DAG.getConstant(DeBruijn, DL, VT)),
- DAG.getConstant(ShiftAmt, DL, VT));
+ DAG.getShiftAmountConstant(ShiftAmt, VT, DL));
Lookup = DAG.getSExtOrTrunc(Lookup, DL, getPointerTy(TD));
SmallVector<uint8_t> Table(BitWidth, 0);
diff --git a/llvm/lib/CodeGen/TailDuplicator.cpp b/llvm/lib/CodeGen/TailDuplicator.cpp
index a88c57f..5d720fb 100644
--- a/llvm/lib/CodeGen/TailDuplicator.cpp
+++ b/llvm/lib/CodeGen/TailDuplicator.cpp
@@ -604,12 +604,21 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple,
bool HasComputedGoto = false;
if (!TailBB.empty()) {
HasIndirectbr = TailBB.back().isIndirectBranch();
- HasComputedGoto = TailBB.terminatorIsComputedGoto();
+ HasComputedGoto = TailBB.terminatorIsComputedGotoWithSuccessors();
}
if (HasIndirectbr && PreRegAlloc)
MaxDuplicateCount = TailDupIndirectBranchSize;
+ // Allow higher limits when the block has computed-gotos and running after
+ // register allocation. NB. This basically unfactors computed gotos that were
+ // factored early on in the compilation process to speed up edge based data
+ // flow. If we do not unfactor them again, it can seriously pessimize code
+ // with many computed jumps in the source code, such as interpreters.
+ // Therefore we do not restrict the computed gotos.
+ if (HasComputedGoto && !PreRegAlloc)
+ MaxDuplicateCount = std::max(MaxDuplicateCount, 10u);
+
// Check the instructions in the block to determine whether tail-duplication
// is invalid or unlikely to be profitable.
unsigned InstrCount = 0;
@@ -663,12 +672,7 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple,
// Duplicating a BB which has both multiple predecessors and successors will
// may cause huge amount of PHI nodes. If we want to remove this limitation,
// we have to address https://github.com/llvm/llvm-project/issues/78578.
- // NB. This basically unfactors computed gotos that were factored early on in
- // the compilation process to speed up edge based data flow. If we do not
- // unfactor them again, it can seriously pessimize code with many computed
- // jumps in the source code, such as interpreters. Therefore we do not
- // restrict the computed gotos.
- if (!HasComputedGoto && TailBB.pred_size() > TailDupPredSize &&
+ if (PreRegAlloc && TailBB.pred_size() > TailDupPredSize &&
TailBB.succ_size() > TailDupSuccSize) {
// If TailBB or any of its successors contains a phi, we may have to add a
// large number of additional phis with additional incoming values.
diff --git a/llvm/lib/CodeGen/TargetInstrInfo.cpp b/llvm/lib/CodeGen/TargetInstrInfo.cpp
index 18d6bbc..705e046e 100644
--- a/llvm/lib/CodeGen/TargetInstrInfo.cpp
+++ b/llvm/lib/CodeGen/TargetInstrInfo.cpp
@@ -1406,7 +1406,7 @@ void TargetInstrInfo::reassociateOps(
const MCInstrDesc &MCID, Register DestReg) {
return MachineInstrBuilder(
MF, MF.CreateMachineInstr(MCID, MIMD.getDL(), /*NoImpl=*/true))
- .setPCSections(MIMD.getPCSections())
+ .copyMIMetadata(MIMD)
.addReg(DestReg, RegState::Define);
};
diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index 725e951..e9172f4 100644
--- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -1060,27 +1060,27 @@ MCSection *TargetLoweringObjectFileELF::getSectionForConstant(
auto &Context = getContext();
if (Kind.isMergeableConst4() && MergeableConst4Section)
- return Context.getELFSection(".rodata.cst4." + SectionSuffix,
+ return Context.getELFSection(".rodata.cst4." + SectionSuffix + ".",
ELF::SHT_PROGBITS,
ELF::SHF_ALLOC | ELF::SHF_MERGE, 4);
if (Kind.isMergeableConst8() && MergeableConst8Section)
- return Context.getELFSection(".rodata.cst8." + SectionSuffix,
+ return Context.getELFSection(".rodata.cst8." + SectionSuffix + ".",
ELF::SHT_PROGBITS,
ELF::SHF_ALLOC | ELF::SHF_MERGE, 8);
if (Kind.isMergeableConst16() && MergeableConst16Section)
- return Context.getELFSection(".rodata.cst16." + SectionSuffix,
+ return Context.getELFSection(".rodata.cst16." + SectionSuffix + ".",
ELF::SHT_PROGBITS,
ELF::SHF_ALLOC | ELF::SHF_MERGE, 16);
if (Kind.isMergeableConst32() && MergeableConst32Section)
- return Context.getELFSection(".rodata.cst32." + SectionSuffix,
+ return Context.getELFSection(".rodata.cst32." + SectionSuffix + ".",
ELF::SHT_PROGBITS,
ELF::SHF_ALLOC | ELF::SHF_MERGE, 32);
if (Kind.isReadOnly())
- return Context.getELFSection(".rodata." + SectionSuffix, ELF::SHT_PROGBITS,
- ELF::SHF_ALLOC);
+ return Context.getELFSection(".rodata." + SectionSuffix + ".",
+ ELF::SHT_PROGBITS, ELF::SHF_ALLOC);
assert(Kind.isReadOnlyWithRel() && "Unknown section kind");
- return Context.getELFSection(".data.rel.ro." + SectionSuffix,
+ return Context.getELFSection(".data.rel.ro." + SectionSuffix + ".",
ELF::SHT_PROGBITS,
ELF::SHF_ALLOC | ELF::SHF_WRITE);
}