aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/CodeGen
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/CodeGen')
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp108
-rw-r--r--llvm/lib/CodeGen/CodeGenPrepare.cpp23
-rw-r--r--llvm/lib/CodeGen/MachineFunction.cpp20
-rw-r--r--llvm/lib/CodeGen/MachineScheduler.cpp66
-rw-r--r--llvm/lib/CodeGen/RegAllocBase.cpp4
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp106
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp2
-rw-r--r--llvm/lib/CodeGen/TailDuplicator.cpp18
-rw-r--r--llvm/lib/CodeGen/TargetInstrInfo.cpp2
9 files changed, 295 insertions, 54 deletions
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 6166271..1641c3e 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -1654,6 +1654,88 @@ void AsmPrinter::emitStackUsage(const MachineFunction &MF) {
*StackUsageStream << "static\n";
}
+/// Extracts a generalized numeric type identifier of a Function's type from
+/// type metadata. Returns null if metadata cannot be found.
+static ConstantInt *extractNumericCGTypeId(const Function &F) {
+ SmallVector<MDNode *, 2> Types;
+ F.getMetadata(LLVMContext::MD_type, Types);
+ for (const auto &Type : Types) {
+ if (Type->hasGeneralizedMDString()) {
+ MDString *MDGeneralizedTypeId = cast<MDString>(Type->getOperand(1));
+ uint64_t TypeIdVal = llvm::MD5Hash(MDGeneralizedTypeId->getString());
+ IntegerType *Int64Ty = Type::getInt64Ty(F.getContext());
+ return ConstantInt::get(Int64Ty, TypeIdVal);
+ }
+ }
+ return nullptr;
+}
+
+/// Emits .callgraph section.
+void AsmPrinter::emitCallGraphSection(const MachineFunction &MF,
+ FunctionInfo &FuncInfo) {
+ if (!MF.getTarget().Options.EmitCallGraphSection)
+ return;
+
+ // Switch to the call graph section for the function
+ MCSection *FuncCGSection =
+ getObjFileLowering().getCallGraphSection(*getCurrentSection());
+ assert(FuncCGSection && "null callgraph section");
+ OutStreamer->pushSection();
+ OutStreamer->switchSection(FuncCGSection);
+
+ // Emit format version number.
+ OutStreamer->emitInt64(CallGraphSectionFormatVersion::V_0);
+
+ // Emit function's self information, which is composed of:
+ // 1) FunctionEntryPc
+ // 2) FunctionKind: Whether the function is indirect target, and if so,
+ // whether its type id is known.
+ // 3) FunctionTypeId: Emit only when the function is an indirect target
+ // and its type id is known.
+
+ // Emit function entry pc.
+ const MCSymbol *FunctionSymbol = getFunctionBegin();
+ OutStreamer->emitSymbolValue(FunctionSymbol, TM.getProgramPointerSize());
+
+ // If this function has external linkage or has its address taken and
+ // it is not a callback, then anything could call it.
+ const Function &F = MF.getFunction();
+ bool IsIndirectTarget =
+ !F.hasLocalLinkage() || F.hasAddressTaken(nullptr,
+ /*IgnoreCallbackUses=*/true,
+ /*IgnoreAssumeLikeCalls=*/true,
+ /*IgnoreLLVMUsed=*/false);
+
+ // FIXME: FunctionKind takes a few values but emitted as a 64-bit value.
+ // Can be optimized to occupy 2 bits instead.
+ // Emit function kind, and type id if available.
+ if (!IsIndirectTarget) {
+ OutStreamer->emitInt64(
+ static_cast<uint64_t>(FunctionInfo::FunctionKind::NOT_INDIRECT_TARGET));
+ } else {
+ if (const auto *TypeId = extractNumericCGTypeId(F)) {
+ OutStreamer->emitInt64(static_cast<uint64_t>(
+ FunctionInfo::FunctionKind::INDIRECT_TARGET_KNOWN_TID));
+ OutStreamer->emitInt64(TypeId->getZExtValue());
+ } else {
+ OutStreamer->emitInt64(static_cast<uint64_t>(
+ FunctionInfo::FunctionKind::INDIRECT_TARGET_UNKNOWN_TID));
+ }
+ }
+
+ // Emit callsite labels, where each element is a pair of type id and
+ // indirect callsite pc.
+ const auto &CallSiteLabels = FuncInfo.CallSiteLabels;
+ OutStreamer->emitInt64(CallSiteLabels.size());
+ for (const auto &[TypeId, Label] : CallSiteLabels) {
+ OutStreamer->emitInt64(TypeId);
+ OutStreamer->emitSymbolValue(Label, TM.getProgramPointerSize());
+ }
+ FuncInfo.CallSiteLabels.clear();
+
+ OutStreamer->popSection();
+}
+
void AsmPrinter::emitPCSectionsLabel(const MachineFunction &MF,
const MDNode &MD) {
MCSymbol *S = MF.getContext().createTempSymbol("pcsection");
@@ -1784,6 +1866,23 @@ static StringRef getMIMnemonic(const MachineInstr &MI, MCStreamer &Streamer) {
return Name;
}
+void AsmPrinter::emitIndirectCalleeLabels(
+ FunctionInfo &FuncInfo,
+ const MachineFunction::CallSiteInfoMap &CallSitesInfoMap,
+ const MachineInstr &MI) {
+ // Only indirect calls have type identifiers set.
+ const auto &CallSiteInfo = CallSitesInfoMap.find(&MI);
+ if (CallSiteInfo == CallSitesInfoMap.end())
+ return;
+
+ for (ConstantInt *CalleeTypeId : CallSiteInfo->second.CalleeTypeIds) {
+ MCSymbol *S = MF->getContext().createTempSymbol();
+ OutStreamer->emitLabel(S);
+ uint64_t CalleeTypeIdVal = CalleeTypeId->getZExtValue();
+ FuncInfo.CallSiteLabels.emplace_back(CalleeTypeIdVal, S);
+ }
+}
+
/// EmitFunctionBody - This method emits the body and trailer for a
/// function.
void AsmPrinter::emitFunctionBody() {
@@ -1830,6 +1929,8 @@ void AsmPrinter::emitFunctionBody() {
MBBSectionRanges[MF->front().getSectionID()] =
MBBSectionRange{CurrentFnBegin, nullptr};
+ FunctionInfo FuncInfo;
+ const auto &CallSitesInfoMap = MF->getCallSitesInfo();
for (auto &MBB : *MF) {
// Print a label for the basic block.
emitBasicBlockStart(MBB);
@@ -1963,6 +2064,9 @@ void AsmPrinter::emitFunctionBody() {
break;
}
+ if (TM.Options.EmitCallGraphSection && MI.isCall())
+ emitIndirectCalleeLabels(FuncInfo, CallSitesInfoMap, MI);
+
// If there is a post-instruction symbol, emit a label for it here.
if (MCSymbol *S = MI.getPostInstrSymbol())
OutStreamer->emitLabel(S);
@@ -2142,6 +2246,9 @@ void AsmPrinter::emitFunctionBody() {
// Emit section containing stack size metadata.
emitStackSizeSection(*MF);
+ // Emit section containing call graph metadata.
+ emitCallGraphSection(*MF, FuncInfo);
+
// Emit .su file containing function stack size information.
emitStackUsage(*MF);
@@ -2841,6 +2948,7 @@ void AsmPrinter::SetupMachineFunction(MachineFunction &MF) {
F.hasFnAttribute("xray-instruction-threshold") ||
needFuncLabels(MF, *this) || NeedsLocalForSize ||
MF.getTarget().Options.EmitStackSizeSection ||
+ MF.getTarget().Options.EmitCallGraphSection ||
MF.getTarget().Options.BBAddrMap) {
CurrentFnBegin = createTempSymbol("func_begin");
if (NeedsLocalForSize)
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 416c56d..f16283b 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -2769,6 +2769,29 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT) {
return optimizeGatherScatterInst(II, II->getArgOperand(0));
case Intrinsic::masked_scatter:
return optimizeGatherScatterInst(II, II->getArgOperand(1));
+ case Intrinsic::masked_load:
+ // Treat v1X masked load as load X type.
+ if (auto *VT = dyn_cast<FixedVectorType>(II->getType())) {
+ if (VT->getNumElements() == 1) {
+ Value *PtrVal = II->getArgOperand(0);
+ unsigned AS = PtrVal->getType()->getPointerAddressSpace();
+ if (optimizeMemoryInst(II, PtrVal, VT->getElementType(), AS))
+ return true;
+ }
+ }
+ return false;
+ case Intrinsic::masked_store:
+ // Treat v1X masked store as store X type.
+ if (auto *VT =
+ dyn_cast<FixedVectorType>(II->getArgOperand(0)->getType())) {
+ if (VT->getNumElements() == 1) {
+ Value *PtrVal = II->getArgOperand(1);
+ unsigned AS = PtrVal->getType()->getPointerAddressSpace();
+ if (optimizeMemoryInst(II, PtrVal, VT->getElementType(), AS))
+ return true;
+ }
+ }
+ return false;
}
SmallVector<Value *, 2> PtrOps;
diff --git a/llvm/lib/CodeGen/MachineFunction.cpp b/llvm/lib/CodeGen/MachineFunction.cpp
index 60d42e0..ec40f6a 100644
--- a/llvm/lib/CodeGen/MachineFunction.cpp
+++ b/llvm/lib/CodeGen/MachineFunction.cpp
@@ -698,6 +698,26 @@ bool MachineFunction::needsFrameMoves() const {
!F.getParent()->debug_compile_units().empty();
}
+MachineFunction::CallSiteInfo::CallSiteInfo(const CallBase &CB) {
+ // Numeric callee_type ids are only for indirect calls.
+ if (!CB.isIndirectCall())
+ return;
+
+ MDNode *CalleeTypeList = CB.getMetadata(LLVMContext::MD_callee_type);
+ if (!CalleeTypeList)
+ return;
+
+ for (const MDOperand &Op : CalleeTypeList->operands()) {
+ MDNode *TypeMD = cast<MDNode>(Op);
+ MDString *TypeIdStr = cast<MDString>(TypeMD->getOperand(1));
+ // Compute numeric type id from generalized type id string
+ uint64_t TypeIdVal = MD5Hash(TypeIdStr->getString());
+ IntegerType *Int64Ty = Type::getInt64Ty(CB.getContext());
+ CalleeTypeIds.push_back(
+ ConstantInt::get(Int64Ty, TypeIdVal, /*IsSigned=*/false));
+ }
+}
+
namespace llvm {
template<>
diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp
index 9d5c39c..c6fa8f4 100644
--- a/llvm/lib/CodeGen/MachineScheduler.cpp
+++ b/llvm/lib/CodeGen/MachineScheduler.cpp
@@ -3676,8 +3676,8 @@ void GenericScheduler::initialize(ScheduleDAGMI *dag) {
TopCand.SU = nullptr;
BotCand.SU = nullptr;
- TopCluster = nullptr;
- BotCluster = nullptr;
+ TopClusterID = InvalidClusterId;
+ BotClusterID = InvalidClusterId;
}
/// Initialize the per-region scheduling policy.
@@ -3988,10 +3988,14 @@ bool GenericScheduler::tryCandidate(SchedCandidate &Cand,
// This is a best effort to set things up for a post-RA pass. Optimizations
// like generating loads of multiple registers should ideally be done within
// the scheduler pass by combining the loads during DAG postprocessing.
- const ClusterInfo *CandCluster = Cand.AtTop ? TopCluster : BotCluster;
- const ClusterInfo *TryCandCluster = TryCand.AtTop ? TopCluster : BotCluster;
- if (tryGreater(TryCandCluster && TryCandCluster->contains(TryCand.SU),
- CandCluster && CandCluster->contains(Cand.SU), TryCand, Cand,
+ unsigned CandZoneCluster = Cand.AtTop ? TopClusterID : BotClusterID;
+ unsigned TryCandZoneCluster = TryCand.AtTop ? TopClusterID : BotClusterID;
+ bool CandIsClusterSucc =
+ isTheSameCluster(CandZoneCluster, Cand.SU->ParentClusterIdx);
+ bool TryCandIsClusterSucc =
+ isTheSameCluster(TryCandZoneCluster, TryCand.SU->ParentClusterIdx);
+
+ if (tryGreater(TryCandIsClusterSucc, CandIsClusterSucc, TryCand, Cand,
Cluster))
return TryCand.Reason != NoCand;
@@ -4251,24 +4255,30 @@ void GenericScheduler::reschedulePhysReg(SUnit *SU, bool isTop) {
void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
if (IsTopNode) {
SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
- TopCluster = DAG->getCluster(SU->ParentClusterIdx);
- LLVM_DEBUG(if (TopCluster) {
- dbgs() << " Top Cluster: ";
- for (auto *N : *TopCluster)
- dbgs() << N->NodeNum << '\t';
- dbgs() << '\n';
+ TopClusterID = SU->ParentClusterIdx;
+ LLVM_DEBUG({
+ if (TopClusterID != InvalidClusterId) {
+ ClusterInfo *TopCluster = DAG->getCluster(TopClusterID);
+ dbgs() << " Top Cluster: ";
+ for (auto *N : *TopCluster)
+ dbgs() << N->NodeNum << '\t';
+ dbgs() << '\n';
+ }
});
Top.bumpNode(SU);
if (SU->hasPhysRegUses)
reschedulePhysReg(SU, true);
} else {
SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle());
- BotCluster = DAG->getCluster(SU->ParentClusterIdx);
- LLVM_DEBUG(if (BotCluster) {
- dbgs() << " Bot Cluster: ";
- for (auto *N : *BotCluster)
- dbgs() << N->NodeNum << '\t';
- dbgs() << '\n';
+ BotClusterID = SU->ParentClusterIdx;
+ LLVM_DEBUG({
+ if (BotClusterID != InvalidClusterId) {
+ ClusterInfo *BotCluster = DAG->getCluster(BotClusterID);
+ dbgs() << " Bot Cluster: ";
+ for (auto *N : *BotCluster)
+ dbgs() << N->NodeNum << '\t';
+ dbgs() << '\n';
+ }
});
Bot.bumpNode(SU);
if (SU->hasPhysRegDefs)
@@ -4306,8 +4316,8 @@ void PostGenericScheduler::initialize(ScheduleDAGMI *Dag) {
if (!Bot.HazardRec) {
Bot.HazardRec = DAG->TII->CreateTargetMIHazardRecognizer(Itin, DAG);
}
- TopCluster = nullptr;
- BotCluster = nullptr;
+ TopClusterID = InvalidClusterId;
+ BotClusterID = InvalidClusterId;
}
void PostGenericScheduler::initPolicy(MachineBasicBlock::iterator Begin,
@@ -4373,10 +4383,14 @@ bool PostGenericScheduler::tryCandidate(SchedCandidate &Cand,
return TryCand.Reason != NoCand;
// Keep clustered nodes together.
- const ClusterInfo *CandCluster = Cand.AtTop ? TopCluster : BotCluster;
- const ClusterInfo *TryCandCluster = TryCand.AtTop ? TopCluster : BotCluster;
- if (tryGreater(TryCandCluster && TryCandCluster->contains(TryCand.SU),
- CandCluster && CandCluster->contains(Cand.SU), TryCand, Cand,
+ unsigned CandZoneCluster = Cand.AtTop ? TopClusterID : BotClusterID;
+ unsigned TryCandZoneCluster = TryCand.AtTop ? TopClusterID : BotClusterID;
+ bool CandIsClusterSucc =
+ isTheSameCluster(CandZoneCluster, Cand.SU->ParentClusterIdx);
+ bool TryCandIsClusterSucc =
+ isTheSameCluster(TryCandZoneCluster, TryCand.SU->ParentClusterIdx);
+
+ if (tryGreater(TryCandIsClusterSucc, CandIsClusterSucc, TryCand, Cand,
Cluster))
return TryCand.Reason != NoCand;
// Avoid critical resource consumption and balance the schedule.
@@ -4575,11 +4589,11 @@ SUnit *PostGenericScheduler::pickNode(bool &IsTopNode) {
void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
if (IsTopNode) {
SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
- TopCluster = DAG->getCluster(SU->ParentClusterIdx);
+ TopClusterID = SU->ParentClusterIdx;
Top.bumpNode(SU);
} else {
SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle());
- BotCluster = DAG->getCluster(SU->ParentClusterIdx);
+ BotClusterID = SU->ParentClusterIdx;
Bot.bumpNode(SU);
}
}
diff --git a/llvm/lib/CodeGen/RegAllocBase.cpp b/llvm/lib/CodeGen/RegAllocBase.cpp
index 69b9291..2400a1f 100644
--- a/llvm/lib/CodeGen/RegAllocBase.cpp
+++ b/llvm/lib/CodeGen/RegAllocBase.cpp
@@ -178,10 +178,8 @@ void RegAllocBase::cleanupFailedVReg(Register FailedReg, MCRegister PhysReg,
for (MCRegAliasIterator Aliases(PhysReg, TRI, true); Aliases.isValid();
++Aliases) {
for (MachineOperand &MO : MRI->reg_operands(*Aliases)) {
- if (MO.readsReg()) {
+ if (MO.readsReg())
MO.setIsUndef(true);
- LIS->removeAllRegUnitsForPhysReg(MO.getReg());
- }
}
}
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index a43020e..11e869a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -331,6 +331,11 @@ namespace {
return CombineTo(N, To, 2, AddTo);
}
+ SDValue CombineTo(SDNode *N, SmallVectorImpl<SDValue> *To,
+ bool AddTo = true) {
+ return CombineTo(N, To->data(), To->size(), AddTo);
+ }
+
void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
private:
@@ -541,6 +546,7 @@ namespace {
SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
SDValue visitBUILD_VECTOR(SDNode *N);
SDValue visitCONCAT_VECTORS(SDNode *N);
+ SDValue visitVECTOR_INTERLEAVE(SDNode *N);
SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
SDValue visitVECTOR_SHUFFLE(SDNode *N);
SDValue visitSCALAR_TO_VECTOR(SDNode *N);
@@ -2021,6 +2027,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N);
case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
+ case ISD::VECTOR_INTERLEAVE: return visitVECTOR_INTERLEAVE(N);
case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N);
case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N);
@@ -4100,18 +4107,17 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
// (sub x, ([v]select (uge x, y), y, 0)) -> (umin x, (sub x, y))
if (N1.hasOneUse() && hasUMin(VT)) {
SDValue Y;
- if (sd_match(N1, m_Select(m_SetCC(m_Specific(N0), m_Value(Y),
- m_SpecificCondCode(ISD::SETULT)),
- m_Zero(), m_Deferred(Y))) ||
- sd_match(N1, m_Select(m_SetCC(m_Specific(N0), m_Value(Y),
- m_SpecificCondCode(ISD::SETUGE)),
- m_Deferred(Y), m_Zero())) ||
- sd_match(N1, m_VSelect(m_SetCC(m_Specific(N0), m_Value(Y),
- m_SpecificCondCode(ISD::SETULT)),
- m_Zero(), m_Deferred(Y))) ||
- sd_match(N1, m_VSelect(m_SetCC(m_Specific(N0), m_Value(Y),
- m_SpecificCondCode(ISD::SETUGE)),
- m_Deferred(Y), m_Zero())))
+ auto MS0 = m_Specific(N0);
+ auto MVY = m_Value(Y);
+ auto MZ = m_Zero();
+ auto MCC1 = m_SpecificCondCode(ISD::SETULT);
+ auto MCC2 = m_SpecificCondCode(ISD::SETUGE);
+
+ if (sd_match(N1, m_SelectCCLike(MS0, MVY, MZ, m_Deferred(Y), MCC1)) ||
+ sd_match(N1, m_SelectCCLike(MS0, MVY, m_Deferred(Y), MZ, MCC2)) ||
+ sd_match(N1, m_VSelect(m_SetCC(MS0, MVY, MCC1), MZ, m_Deferred(Y))) ||
+ sd_match(N1, m_VSelect(m_SetCC(MS0, MVY, MCC2), m_Deferred(Y), MZ)))
+
return DAG.getNode(ISD::UMIN, DL, VT, N0,
DAG.getNode(ISD::SUB, DL, VT, N0, Y));
}
@@ -10616,6 +10622,19 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
return DAG.getVScale(DL, VT, C0 << C1);
}
+ SDValue X;
+ APInt VS0;
+
+ // fold (shl (X * vscale(VS0)), C1) -> (X * vscale(VS0 << C1))
+ if (N1C && sd_match(N0, m_Mul(m_Value(X), m_VScale(m_ConstInt(VS0))))) {
+ SDNodeFlags Flags;
+ Flags.setNoUnsignedWrap(N->getFlags().hasNoUnsignedWrap() &&
+ N0->getFlags().hasNoUnsignedWrap());
+
+ SDValue VScale = DAG.getVScale(DL, VT, VS0 << N1C->getAPIntValue());
+ return DAG.getNode(ISD::MUL, DL, VT, X, VScale, Flags);
+ }
+
// Fold (shl step_vector(C0), C1) to (step_vector(C0 << C1)).
APInt ShlVal;
if (N0.getOpcode() == ISD::STEP_VECTOR &&
@@ -25282,6 +25301,28 @@ static SDValue combineConcatVectorOfShuffleAndItsOperands(
return DAG.getVectorShuffle(VT, dl, ShufOps[0], ShufOps[1], Mask);
}
+static SDValue combineConcatVectorOfSplats(SDNode *N, SelectionDAG &DAG,
+ const TargetLowering &TLI,
+ bool LegalTypes,
+ bool LegalOperations) {
+ EVT VT = N->getValueType(0);
+
+ // Post-legalization we can only create wider SPLAT_VECTOR operations if both
+ // the type and operation is legal. The Hexagon target has custom
+ // legalization for SPLAT_VECTOR that splits the operation into two parts and
+ // concatenates them. Therefore, custom lowering must also be rejected in
+ // order to avoid an infinite loop.
+ if ((LegalTypes && !TLI.isTypeLegal(VT)) ||
+ (LegalOperations && !TLI.isOperationLegal(ISD::SPLAT_VECTOR, VT)))
+ return SDValue();
+
+ SDValue Op0 = N->getOperand(0);
+ if (!llvm::all_equal(N->op_values()) || Op0.getOpcode() != ISD::SPLAT_VECTOR)
+ return SDValue();
+
+ return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, Op0.getOperand(0));
+}
+
SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
// If we only have one input vector, we don't need to do any concatenation.
if (N->getNumOperands() == 1)
@@ -25405,6 +25446,10 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
return DAG.getBuildVector(VT, SDLoc(N), Opnds);
}
+ if (SDValue V =
+ combineConcatVectorOfSplats(N, DAG, TLI, LegalTypes, LegalOperations))
+ return V;
+
// Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
// FIXME: Add support for concat_vectors(bitcast(vec0),bitcast(vec1),...).
if (SDValue V = combineConcatVectorOfScalars(N, DAG))
@@ -25473,6 +25518,21 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitVECTOR_INTERLEAVE(SDNode *N) {
+ // Check to see if all operands are identical.
+ if (!llvm::all_equal(N->op_values()))
+ return SDValue();
+
+ // Check to see if the identical operand is a splat.
+ if (!DAG.isSplatValue(N->getOperand(0)))
+ return SDValue();
+
+ // interleave splat(X), splat(X).... --> splat(X), splat(X)....
+ SmallVector<SDValue, 4> Ops;
+ Ops.append(N->op_values().begin(), N->op_values().end());
+ return CombineTo(N, &Ops);
+}
+
// Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find
// if the subvector can be sourced for free.
static SDValue getSubVectorSrc(SDValue V, unsigned Index, EVT SubVT) {
@@ -28965,13 +29025,27 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
((N1C->isAllOnes() && CC == ISD::SETGT) ||
(N1C->isZero() && CC == ISD::SETLT)) &&
!TLI.shouldAvoidTransformToShift(VT, CmpOpVT.getScalarSizeInBits() - 1)) {
- SDValue ASR = DAG.getNode(
- ISD::SRA, DL, CmpOpVT, N0,
- DAG.getConstant(CmpOpVT.getScalarSizeInBits() - 1, DL, CmpOpVT));
- return DAG.getNode(ISD::XOR, DL, VT, DAG.getSExtOrTrunc(ASR, DL, VT),
+ SDValue ASHR =
+ DAG.getNode(ISD::SRA, DL, CmpOpVT, N0,
+ DAG.getShiftAmountConstant(
+ CmpOpVT.getScalarSizeInBits() - 1, CmpOpVT, DL));
+ return DAG.getNode(ISD::XOR, DL, VT, DAG.getSExtOrTrunc(ASHR, DL, VT),
DAG.getSExtOrTrunc(CC == ISD::SETLT ? N3 : N2, DL, VT));
}
+ // Fold sign pattern select_cc setgt X, -1, 1, -1 -> or (ashr X, BW-1), 1
+ if (CC == ISD::SETGT && N1C && N2C && N3C && N1C->isAllOnes() &&
+ N2C->isOne() && N3C->isAllOnes() &&
+ !TLI.shouldAvoidTransformToShift(CmpOpVT,
+ CmpOpVT.getScalarSizeInBits() - 1)) {
+ SDValue ASHR =
+ DAG.getNode(ISD::SRA, DL, CmpOpVT, N0,
+ DAG.getShiftAmountConstant(
+ CmpOpVT.getScalarSizeInBits() - 1, CmpOpVT, DL));
+ return DAG.getNode(ISD::OR, DL, VT, DAG.getSExtOrTrunc(ASHR, DL, VT),
+ DAG.getConstant(1, DL, VT));
+ }
+
if (SDValue S = PerformMinMaxFpToSatCombine(N0, N1, N2, N3, CC, DAG))
return S;
if (SDValue S = PerformUMinFpToSatCombine(N0, N1, N2, N3, CC, DAG))
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 02d1100..f41b6eb 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -12782,7 +12782,7 @@ bool SDNode::areOnlyUsersOf(ArrayRef<const SDNode *> Nodes, const SDNode *N) {
return Seen;
}
-/// isOperand - Return true if this node is an operand of N.
+/// Return true if the referenced return value is an operand of N.
bool SDValue::isOperandOf(const SDNode *N) const {
return is_contained(N->op_values(), *this);
}
diff --git a/llvm/lib/CodeGen/TailDuplicator.cpp b/llvm/lib/CodeGen/TailDuplicator.cpp
index a88c57f..5d720fb 100644
--- a/llvm/lib/CodeGen/TailDuplicator.cpp
+++ b/llvm/lib/CodeGen/TailDuplicator.cpp
@@ -604,12 +604,21 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple,
bool HasComputedGoto = false;
if (!TailBB.empty()) {
HasIndirectbr = TailBB.back().isIndirectBranch();
- HasComputedGoto = TailBB.terminatorIsComputedGoto();
+ HasComputedGoto = TailBB.terminatorIsComputedGotoWithSuccessors();
}
if (HasIndirectbr && PreRegAlloc)
MaxDuplicateCount = TailDupIndirectBranchSize;
+ // Allow higher limits when the block has computed-gotos and running after
+ // register allocation. NB. This basically unfactors computed gotos that were
+ // factored early on in the compilation process to speed up edge based data
+ // flow. If we do not unfactor them again, it can seriously pessimize code
+ // with many computed jumps in the source code, such as interpreters.
+ // Therefore we do not restrict the computed gotos.
+ if (HasComputedGoto && !PreRegAlloc)
+ MaxDuplicateCount = std::max(MaxDuplicateCount, 10u);
+
// Check the instructions in the block to determine whether tail-duplication
// is invalid or unlikely to be profitable.
unsigned InstrCount = 0;
@@ -663,12 +672,7 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple,
// Duplicating a BB which has both multiple predecessors and successors will
// may cause huge amount of PHI nodes. If we want to remove this limitation,
// we have to address https://github.com/llvm/llvm-project/issues/78578.
- // NB. This basically unfactors computed gotos that were factored early on in
- // the compilation process to speed up edge based data flow. If we do not
- // unfactor them again, it can seriously pessimize code with many computed
- // jumps in the source code, such as interpreters. Therefore we do not
- // restrict the computed gotos.
- if (!HasComputedGoto && TailBB.pred_size() > TailDupPredSize &&
+ if (PreRegAlloc && TailBB.pred_size() > TailDupPredSize &&
TailBB.succ_size() > TailDupSuccSize) {
// If TailBB or any of its successors contains a phi, we may have to add a
// large number of additional phis with additional incoming values.
diff --git a/llvm/lib/CodeGen/TargetInstrInfo.cpp b/llvm/lib/CodeGen/TargetInstrInfo.cpp
index 18d6bbc..705e046e 100644
--- a/llvm/lib/CodeGen/TargetInstrInfo.cpp
+++ b/llvm/lib/CodeGen/TargetInstrInfo.cpp
@@ -1406,7 +1406,7 @@ void TargetInstrInfo::reassociateOps(
const MCInstrDesc &MCID, Register DestReg) {
return MachineInstrBuilder(
MF, MF.CreateMachineInstr(MCID, MIMD.getDL(), /*NoImpl=*/true))
- .setPCSections(MIMD.getPCSections())
+ .copyMIMetadata(MIMD)
.addReg(DestReg, RegState::Define);
};