diff options
Diffstat (limited to 'llvm/lib/CodeGen')
20 files changed, 620 insertions, 99 deletions
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 6166271..1641c3e 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -1654,6 +1654,88 @@ void AsmPrinter::emitStackUsage(const MachineFunction &MF) { *StackUsageStream << "static\n"; } +/// Extracts a generalized numeric type identifier of a Function's type from +/// type metadata. Returns null if metadata cannot be found. +static ConstantInt *extractNumericCGTypeId(const Function &F) { + SmallVector<MDNode *, 2> Types; + F.getMetadata(LLVMContext::MD_type, Types); + for (const auto &Type : Types) { + if (Type->hasGeneralizedMDString()) { + MDString *MDGeneralizedTypeId = cast<MDString>(Type->getOperand(1)); + uint64_t TypeIdVal = llvm::MD5Hash(MDGeneralizedTypeId->getString()); + IntegerType *Int64Ty = Type::getInt64Ty(F.getContext()); + return ConstantInt::get(Int64Ty, TypeIdVal); + } + } + return nullptr; +} + +/// Emits .callgraph section. +void AsmPrinter::emitCallGraphSection(const MachineFunction &MF, + FunctionInfo &FuncInfo) { + if (!MF.getTarget().Options.EmitCallGraphSection) + return; + + // Switch to the call graph section for the function + MCSection *FuncCGSection = + getObjFileLowering().getCallGraphSection(*getCurrentSection()); + assert(FuncCGSection && "null callgraph section"); + OutStreamer->pushSection(); + OutStreamer->switchSection(FuncCGSection); + + // Emit format version number. + OutStreamer->emitInt64(CallGraphSectionFormatVersion::V_0); + + // Emit function's self information, which is composed of: + // 1) FunctionEntryPc + // 2) FunctionKind: Whether the function is indirect target, and if so, + // whether its type id is known. + // 3) FunctionTypeId: Emit only when the function is an indirect target + // and its type id is known. + + // Emit function entry pc. + const MCSymbol *FunctionSymbol = getFunctionBegin(); + OutStreamer->emitSymbolValue(FunctionSymbol, TM.getProgramPointerSize()); + + // If this function has external linkage or has its address taken and + // it is not a callback, then anything could call it. + const Function &F = MF.getFunction(); + bool IsIndirectTarget = + !F.hasLocalLinkage() || F.hasAddressTaken(nullptr, + /*IgnoreCallbackUses=*/true, + /*IgnoreAssumeLikeCalls=*/true, + /*IgnoreLLVMUsed=*/false); + + // FIXME: FunctionKind takes a few values but emitted as a 64-bit value. + // Can be optimized to occupy 2 bits instead. + // Emit function kind, and type id if available. + if (!IsIndirectTarget) { + OutStreamer->emitInt64( + static_cast<uint64_t>(FunctionInfo::FunctionKind::NOT_INDIRECT_TARGET)); + } else { + if (const auto *TypeId = extractNumericCGTypeId(F)) { + OutStreamer->emitInt64(static_cast<uint64_t>( + FunctionInfo::FunctionKind::INDIRECT_TARGET_KNOWN_TID)); + OutStreamer->emitInt64(TypeId->getZExtValue()); + } else { + OutStreamer->emitInt64(static_cast<uint64_t>( + FunctionInfo::FunctionKind::INDIRECT_TARGET_UNKNOWN_TID)); + } + } + + // Emit callsite labels, where each element is a pair of type id and + // indirect callsite pc. + const auto &CallSiteLabels = FuncInfo.CallSiteLabels; + OutStreamer->emitInt64(CallSiteLabels.size()); + for (const auto &[TypeId, Label] : CallSiteLabels) { + OutStreamer->emitInt64(TypeId); + OutStreamer->emitSymbolValue(Label, TM.getProgramPointerSize()); + } + FuncInfo.CallSiteLabels.clear(); + + OutStreamer->popSection(); +} + void AsmPrinter::emitPCSectionsLabel(const MachineFunction &MF, const MDNode &MD) { MCSymbol *S = MF.getContext().createTempSymbol("pcsection"); @@ -1784,6 +1866,23 @@ static StringRef getMIMnemonic(const MachineInstr &MI, MCStreamer &Streamer) { return Name; } +void AsmPrinter::emitIndirectCalleeLabels( + FunctionInfo &FuncInfo, + const MachineFunction::CallSiteInfoMap &CallSitesInfoMap, + const MachineInstr &MI) { + // Only indirect calls have type identifiers set. + const auto &CallSiteInfo = CallSitesInfoMap.find(&MI); + if (CallSiteInfo == CallSitesInfoMap.end()) + return; + + for (ConstantInt *CalleeTypeId : CallSiteInfo->second.CalleeTypeIds) { + MCSymbol *S = MF->getContext().createTempSymbol(); + OutStreamer->emitLabel(S); + uint64_t CalleeTypeIdVal = CalleeTypeId->getZExtValue(); + FuncInfo.CallSiteLabels.emplace_back(CalleeTypeIdVal, S); + } +} + /// EmitFunctionBody - This method emits the body and trailer for a /// function. void AsmPrinter::emitFunctionBody() { @@ -1830,6 +1929,8 @@ void AsmPrinter::emitFunctionBody() { MBBSectionRanges[MF->front().getSectionID()] = MBBSectionRange{CurrentFnBegin, nullptr}; + FunctionInfo FuncInfo; + const auto &CallSitesInfoMap = MF->getCallSitesInfo(); for (auto &MBB : *MF) { // Print a label for the basic block. emitBasicBlockStart(MBB); @@ -1963,6 +2064,9 @@ void AsmPrinter::emitFunctionBody() { break; } + if (TM.Options.EmitCallGraphSection && MI.isCall()) + emitIndirectCalleeLabels(FuncInfo, CallSitesInfoMap, MI); + // If there is a post-instruction symbol, emit a label for it here. if (MCSymbol *S = MI.getPostInstrSymbol()) OutStreamer->emitLabel(S); @@ -2142,6 +2246,9 @@ void AsmPrinter::emitFunctionBody() { // Emit section containing stack size metadata. emitStackSizeSection(*MF); + // Emit section containing call graph metadata. + emitCallGraphSection(*MF, FuncInfo); + // Emit .su file containing function stack size information. emitStackUsage(*MF); @@ -2841,6 +2948,7 @@ void AsmPrinter::SetupMachineFunction(MachineFunction &MF) { F.hasFnAttribute("xray-instruction-threshold") || needFuncLabels(MF, *this) || NeedsLocalForSize || MF.getTarget().Options.EmitStackSizeSection || + MF.getTarget().Options.EmitCallGraphSection || MF.getTarget().Options.BBAddrMap) { CurrentFnBegin = createTempSymbol("func_begin"); if (NeedsLocalForSize) diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index 416c56d..f16283b 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -2769,6 +2769,29 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT) { return optimizeGatherScatterInst(II, II->getArgOperand(0)); case Intrinsic::masked_scatter: return optimizeGatherScatterInst(II, II->getArgOperand(1)); + case Intrinsic::masked_load: + // Treat v1X masked load as load X type. + if (auto *VT = dyn_cast<FixedVectorType>(II->getType())) { + if (VT->getNumElements() == 1) { + Value *PtrVal = II->getArgOperand(0); + unsigned AS = PtrVal->getType()->getPointerAddressSpace(); + if (optimizeMemoryInst(II, PtrVal, VT->getElementType(), AS)) + return true; + } + } + return false; + case Intrinsic::masked_store: + // Treat v1X masked store as store X type. + if (auto *VT = + dyn_cast<FixedVectorType>(II->getArgOperand(0)->getType())) { + if (VT->getNumElements() == 1) { + Value *PtrVal = II->getArgOperand(1); + unsigned AS = PtrVal->getType()->getPointerAddressSpace(); + if (optimizeMemoryInst(II, PtrVal, VT->getElementType(), AS)) + return true; + } + } + return false; } SmallVector<Value *, 2> PtrOps; diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp index 9aa8deb..27df7e3 100644 --- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -211,7 +211,9 @@ MachineIRBuilder::buildPtrAdd(const DstOp &Res, const SrcOp &Op0, MachineInstrBuilder MachineIRBuilder::buildObjectPtrOffset(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1) { - return buildPtrAdd(Res, Op0, Op1, MachineInstr::MIFlag::NoUWrap); + return buildPtrAdd(Res, Op0, Op1, + MachineInstr::MIFlag::NoUWrap | + MachineInstr::MIFlag::InBounds); } std::optional<MachineInstrBuilder> @@ -234,7 +236,8 @@ MachineIRBuilder::materializePtrAdd(Register &Res, Register Op0, std::optional<MachineInstrBuilder> MachineIRBuilder::materializeObjectPtrOffset( Register &Res, Register Op0, const LLT ValueTy, uint64_t Value) { return materializePtrAdd(Res, Op0, ValueTy, Value, - MachineInstr::MIFlag::NoUWrap); + MachineInstr::MIFlag::NoUWrap | + MachineInstr::MIFlag::InBounds); } MachineInstrBuilder MachineIRBuilder::buildMaskLowPtrBits(const DstOp &Res, diff --git a/llvm/lib/CodeGen/MIRParser/MILexer.cpp b/llvm/lib/CodeGen/MIRParser/MILexer.cpp index 193df1f..8b72c29 100644 --- a/llvm/lib/CodeGen/MIRParser/MILexer.cpp +++ b/llvm/lib/CodeGen/MIRParser/MILexer.cpp @@ -217,6 +217,7 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) { .Case("nneg", MIToken::kw_nneg) .Case("disjoint", MIToken::kw_disjoint) .Case("samesign", MIToken::kw_samesign) + .Case("inbounds", MIToken::kw_inbounds) .Case("nofpexcept", MIToken::kw_nofpexcept) .Case("unpredictable", MIToken::kw_unpredictable) .Case("debug-location", MIToken::kw_debug_location) diff --git a/llvm/lib/CodeGen/MIRParser/MILexer.h b/llvm/lib/CodeGen/MIRParser/MILexer.h index 54142ac..0627f17 100644 --- a/llvm/lib/CodeGen/MIRParser/MILexer.h +++ b/llvm/lib/CodeGen/MIRParser/MILexer.h @@ -78,6 +78,7 @@ struct MIToken { kw_nneg, kw_disjoint, kw_samesign, + kw_inbounds, kw_debug_location, kw_debug_instr_number, kw_dbg_instr_ref, diff --git a/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/llvm/lib/CodeGen/MIRParser/MIParser.cpp index 807d59c..6a464d9 100644 --- a/llvm/lib/CodeGen/MIRParser/MIParser.cpp +++ b/llvm/lib/CodeGen/MIRParser/MIParser.cpp @@ -1477,7 +1477,8 @@ bool MIParser::parseInstruction(unsigned &OpCode, unsigned &Flags) { Token.is(MIToken::kw_nneg) || Token.is(MIToken::kw_disjoint) || Token.is(MIToken::kw_nusw) || - Token.is(MIToken::kw_samesign)) { + Token.is(MIToken::kw_samesign) || + Token.is(MIToken::kw_inbounds)) { // clang-format on // Mine frame and fast math flags if (Token.is(MIToken::kw_frame_setup)) @@ -1518,6 +1519,8 @@ bool MIParser::parseInstruction(unsigned &OpCode, unsigned &Flags) { Flags |= MachineInstr::NoUSWrap; if (Token.is(MIToken::kw_samesign)) Flags |= MachineInstr::SameSign; + if (Token.is(MIToken::kw_inbounds)) + Flags |= MachineInstr::InBounds; lex(); } diff --git a/llvm/lib/CodeGen/MIRPrinter.cpp b/llvm/lib/CodeGen/MIRPrinter.cpp index ad7835a..ce1834a 100644 --- a/llvm/lib/CodeGen/MIRPrinter.cpp +++ b/llvm/lib/CodeGen/MIRPrinter.cpp @@ -820,6 +820,8 @@ static void printMI(raw_ostream &OS, MFPrintState &State, OS << "nusw "; if (MI.getFlag(MachineInstr::SameSign)) OS << "samesign "; + if (MI.getFlag(MachineInstr::InBounds)) + OS << "inbounds "; // NOTE: Please add new MIFlags also to the MI_FLAGS_STR in // llvm/utils/update_mir_test_checks.py. diff --git a/llvm/lib/CodeGen/MachineFunction.cpp b/llvm/lib/CodeGen/MachineFunction.cpp index 60d42e0..ec40f6a 100644 --- a/llvm/lib/CodeGen/MachineFunction.cpp +++ b/llvm/lib/CodeGen/MachineFunction.cpp @@ -698,6 +698,26 @@ bool MachineFunction::needsFrameMoves() const { !F.getParent()->debug_compile_units().empty(); } +MachineFunction::CallSiteInfo::CallSiteInfo(const CallBase &CB) { + // Numeric callee_type ids are only for indirect calls. + if (!CB.isIndirectCall()) + return; + + MDNode *CalleeTypeList = CB.getMetadata(LLVMContext::MD_callee_type); + if (!CalleeTypeList) + return; + + for (const MDOperand &Op : CalleeTypeList->operands()) { + MDNode *TypeMD = cast<MDNode>(Op); + MDString *TypeIdStr = cast<MDString>(TypeMD->getOperand(1)); + // Compute numeric type id from generalized type id string + uint64_t TypeIdVal = MD5Hash(TypeIdStr->getString()); + IntegerType *Int64Ty = Type::getInt64Ty(CB.getContext()); + CalleeTypeIds.push_back( + ConstantInt::get(Int64Ty, TypeIdVal, /*IsSigned=*/false)); + } +} + namespace llvm { template<> diff --git a/llvm/lib/CodeGen/MachineInstr.cpp b/llvm/lib/CodeGen/MachineInstr.cpp index da3665b..79047f7 100644 --- a/llvm/lib/CodeGen/MachineInstr.cpp +++ b/llvm/lib/CodeGen/MachineInstr.cpp @@ -585,6 +585,8 @@ uint32_t MachineInstr::copyFlagsFromInstruction(const Instruction &I) { MIFlags |= MachineInstr::MIFlag::NoUSWrap; if (GEP->hasNoUnsignedWrap()) MIFlags |= MachineInstr::MIFlag::NoUWrap; + if (GEP->isInBounds()) + MIFlags |= MachineInstr::MIFlag::InBounds; } // Copy the nonneg flag. @@ -1860,8 +1862,12 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST, OS << "nneg "; if (getFlag(MachineInstr::Disjoint)) OS << "disjoint "; + if (getFlag(MachineInstr::NoUSWrap)) + OS << "nusw "; if (getFlag(MachineInstr::SameSign)) OS << "samesign "; + if (getFlag(MachineInstr::InBounds)) + OS << "inbounds "; // Print the opcode name. if (TII) diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp index 9d5c39c..c6fa8f4 100644 --- a/llvm/lib/CodeGen/MachineScheduler.cpp +++ b/llvm/lib/CodeGen/MachineScheduler.cpp @@ -3676,8 +3676,8 @@ void GenericScheduler::initialize(ScheduleDAGMI *dag) { TopCand.SU = nullptr; BotCand.SU = nullptr; - TopCluster = nullptr; - BotCluster = nullptr; + TopClusterID = InvalidClusterId; + BotClusterID = InvalidClusterId; } /// Initialize the per-region scheduling policy. @@ -3988,10 +3988,14 @@ bool GenericScheduler::tryCandidate(SchedCandidate &Cand, // This is a best effort to set things up for a post-RA pass. Optimizations // like generating loads of multiple registers should ideally be done within // the scheduler pass by combining the loads during DAG postprocessing. - const ClusterInfo *CandCluster = Cand.AtTop ? TopCluster : BotCluster; - const ClusterInfo *TryCandCluster = TryCand.AtTop ? TopCluster : BotCluster; - if (tryGreater(TryCandCluster && TryCandCluster->contains(TryCand.SU), - CandCluster && CandCluster->contains(Cand.SU), TryCand, Cand, + unsigned CandZoneCluster = Cand.AtTop ? TopClusterID : BotClusterID; + unsigned TryCandZoneCluster = TryCand.AtTop ? TopClusterID : BotClusterID; + bool CandIsClusterSucc = + isTheSameCluster(CandZoneCluster, Cand.SU->ParentClusterIdx); + bool TryCandIsClusterSucc = + isTheSameCluster(TryCandZoneCluster, TryCand.SU->ParentClusterIdx); + + if (tryGreater(TryCandIsClusterSucc, CandIsClusterSucc, TryCand, Cand, Cluster)) return TryCand.Reason != NoCand; @@ -4251,24 +4255,30 @@ void GenericScheduler::reschedulePhysReg(SUnit *SU, bool isTop) { void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) { if (IsTopNode) { SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle()); - TopCluster = DAG->getCluster(SU->ParentClusterIdx); - LLVM_DEBUG(if (TopCluster) { - dbgs() << " Top Cluster: "; - for (auto *N : *TopCluster) - dbgs() << N->NodeNum << '\t'; - dbgs() << '\n'; + TopClusterID = SU->ParentClusterIdx; + LLVM_DEBUG({ + if (TopClusterID != InvalidClusterId) { + ClusterInfo *TopCluster = DAG->getCluster(TopClusterID); + dbgs() << " Top Cluster: "; + for (auto *N : *TopCluster) + dbgs() << N->NodeNum << '\t'; + dbgs() << '\n'; + } }); Top.bumpNode(SU); if (SU->hasPhysRegUses) reschedulePhysReg(SU, true); } else { SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle()); - BotCluster = DAG->getCluster(SU->ParentClusterIdx); - LLVM_DEBUG(if (BotCluster) { - dbgs() << " Bot Cluster: "; - for (auto *N : *BotCluster) - dbgs() << N->NodeNum << '\t'; - dbgs() << '\n'; + BotClusterID = SU->ParentClusterIdx; + LLVM_DEBUG({ + if (BotClusterID != InvalidClusterId) { + ClusterInfo *BotCluster = DAG->getCluster(BotClusterID); + dbgs() << " Bot Cluster: "; + for (auto *N : *BotCluster) + dbgs() << N->NodeNum << '\t'; + dbgs() << '\n'; + } }); Bot.bumpNode(SU); if (SU->hasPhysRegDefs) @@ -4306,8 +4316,8 @@ void PostGenericScheduler::initialize(ScheduleDAGMI *Dag) { if (!Bot.HazardRec) { Bot.HazardRec = DAG->TII->CreateTargetMIHazardRecognizer(Itin, DAG); } - TopCluster = nullptr; - BotCluster = nullptr; + TopClusterID = InvalidClusterId; + BotClusterID = InvalidClusterId; } void PostGenericScheduler::initPolicy(MachineBasicBlock::iterator Begin, @@ -4373,10 +4383,14 @@ bool PostGenericScheduler::tryCandidate(SchedCandidate &Cand, return TryCand.Reason != NoCand; // Keep clustered nodes together. - const ClusterInfo *CandCluster = Cand.AtTop ? TopCluster : BotCluster; - const ClusterInfo *TryCandCluster = TryCand.AtTop ? TopCluster : BotCluster; - if (tryGreater(TryCandCluster && TryCandCluster->contains(TryCand.SU), - CandCluster && CandCluster->contains(Cand.SU), TryCand, Cand, + unsigned CandZoneCluster = Cand.AtTop ? TopClusterID : BotClusterID; + unsigned TryCandZoneCluster = TryCand.AtTop ? TopClusterID : BotClusterID; + bool CandIsClusterSucc = + isTheSameCluster(CandZoneCluster, Cand.SU->ParentClusterIdx); + bool TryCandIsClusterSucc = + isTheSameCluster(TryCandZoneCluster, TryCand.SU->ParentClusterIdx); + + if (tryGreater(TryCandIsClusterSucc, CandIsClusterSucc, TryCand, Cand, Cluster)) return TryCand.Reason != NoCand; // Avoid critical resource consumption and balance the schedule. @@ -4575,11 +4589,11 @@ SUnit *PostGenericScheduler::pickNode(bool &IsTopNode) { void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) { if (IsTopNode) { SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle()); - TopCluster = DAG->getCluster(SU->ParentClusterIdx); + TopClusterID = SU->ParentClusterIdx; Top.bumpNode(SU); } else { SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle()); - BotCluster = DAG->getCluster(SU->ParentClusterIdx); + BotClusterID = SU->ParentClusterIdx; Bot.bumpNode(SU); } } diff --git a/llvm/lib/CodeGen/ModuloSchedule.cpp b/llvm/lib/CodeGen/ModuloSchedule.cpp index 0f742c4..21bf052 100644 --- a/llvm/lib/CodeGen/ModuloSchedule.cpp +++ b/llvm/lib/CodeGen/ModuloSchedule.cpp @@ -423,7 +423,7 @@ void ModuloScheduleExpander::generateExistingPhis( // potentially define two values. unsigned MaxPhis = PrologStage + 2; if (!InKernel && (int)PrologStage <= LoopValStage) - MaxPhis = std::max((int)MaxPhis - (int)LoopValStage, 1); + MaxPhis = std::max((int)MaxPhis - LoopValStage, 1); unsigned NumPhis = std::min(NumStages, MaxPhis); Register NewReg; diff --git a/llvm/lib/CodeGen/RegAllocBase.cpp b/llvm/lib/CodeGen/RegAllocBase.cpp index 69b9291..2400a1f 100644 --- a/llvm/lib/CodeGen/RegAllocBase.cpp +++ b/llvm/lib/CodeGen/RegAllocBase.cpp @@ -178,10 +178,8 @@ void RegAllocBase::cleanupFailedVReg(Register FailedReg, MCRegister PhysReg, for (MCRegAliasIterator Aliases(PhysReg, TRI, true); Aliases.isValid(); ++Aliases) { for (MachineOperand &MO : MRI->reg_operands(*Aliases)) { - if (MO.readsReg()) { + if (MO.readsReg()) MO.setIsUndef(true); - LIS->removeAllRegUnitsForPhysReg(MO.getReg()); - } } } } diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp index 2d7987a..7ede564 100644 --- a/llvm/lib/CodeGen/RegisterCoalescer.cpp +++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp @@ -306,7 +306,12 @@ class RegisterCoalescer : private LiveRangeEdit::Delegate { /// number if it is not zero. If DstReg is a physical register and the /// existing subregister number of the def / use being updated is not zero, /// make sure to set it to the correct physical subregister. - void updateRegDefsUses(Register SrcReg, Register DstReg, unsigned SubIdx); + /// + /// If \p SubregToRegSrcInst is not empty, we are coalescing a + /// `DstReg = SUBREG_TO_REG SrcReg`, which should introduce an + /// implicit-def of DstReg on instructions that define SrcReg. + void updateRegDefsUses(Register SrcReg, Register DstReg, unsigned SubIdx, + ArrayRef<MachineInstr *> SubregToRegSrcInst = {}); /// If the given machine operand reads only undefined lanes add an undef /// flag. @@ -1443,6 +1448,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, // CopyMI may have implicit operands, save them so that we can transfer them // over to the newly materialized instruction after CopyMI is removed. + LaneBitmask NewMIImplicitOpsMask; SmallVector<MachineOperand, 4> ImplicitOps; ImplicitOps.reserve(CopyMI->getNumOperands() - CopyMI->getDesc().getNumOperands()); @@ -1457,6 +1463,9 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, (MO.getSubReg() == 0 && MO.getReg() == DstOperand.getReg())) && "unexpected implicit virtual register def"); ImplicitOps.push_back(MO); + if (MO.isDef() && MO.getReg().isVirtual() && + MRI->shouldTrackSubRegLiveness(DstReg)) + NewMIImplicitOpsMask |= MRI->getMaxLaneMaskForVReg(MO.getReg()); } } @@ -1499,14 +1508,11 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, } else { assert(MO.getReg() == NewMI.getOperand(0).getReg()); - // We're only expecting another def of the main output, so the range - // should get updated with the regular output range. - // - // FIXME: The range updating below probably needs updating to look at - // the super register if subranges are tracked. - assert(!MRI->shouldTrackSubRegLiveness(DstReg) && - "subrange update for implicit-def of super register may not be " - "properly handled"); + // If lanemasks need to be tracked, compile the lanemask of the NewMI + // implicit def operands to avoid subranges for the super-regs from + // being removed by code later on in this function. + if (MRI->shouldTrackSubRegLiveness(MO.getReg())) + NewMIImplicitOpsMask |= MRI->getMaxLaneMaskForVReg(MO.getReg()); } } } @@ -1606,7 +1612,8 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, CurrIdx.getRegSlot(NewMI.getOperand(0).isEarlyClobber()); VNInfo::Allocator &Alloc = LIS->getVNInfoAllocator(); for (LiveInterval::SubRange &SR : DstInt.subranges()) { - if ((SR.LaneMask & DstMask).none()) { + if ((SR.LaneMask & DstMask).none() && + (SR.LaneMask & NewMIImplicitOpsMask).none()) { LLVM_DEBUG(dbgs() << "Removing undefined SubRange " << PrintLaneMask(SR.LaneMask) << " : " << SR << "\n"); @@ -1870,11 +1877,14 @@ void RegisterCoalescer::addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx, } } -void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg, - unsigned SubIdx) { +void RegisterCoalescer::updateRegDefsUses( + Register SrcReg, Register DstReg, unsigned SubIdx, + ArrayRef<MachineInstr *> SubregToRegSrcInsts) { bool DstIsPhys = DstReg.isPhysical(); LiveInterval *DstInt = DstIsPhys ? nullptr : &LIS->getInterval(DstReg); + // Coalescing a COPY may expose reads of 'undef' subregisters. + // If so, then explicitly propagate 'undef' to those operands. if (DstInt && DstInt->hasSubRanges() && DstReg != SrcReg) { for (MachineOperand &MO : MRI->reg_operands(DstReg)) { if (MO.isUndef()) @@ -1891,6 +1901,15 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg, } } + // If DstInt already has a subrange for the unused lanes, then we shouldn't + // create duplicate subranges when we update the interval for unused lanes. + LaneBitmask DstIntLaneMask; + if (DstInt && MRI->shouldTrackSubRegLiveness(DstReg)) { + for (LiveInterval::SubRange &SR : DstInt->subranges()) + DstIntLaneMask |= SR.LaneMask; + } + + // Go through all instructions to replace uses of 'SrcReg' by 'DstReg'. SmallPtrSet<MachineInstr *, 8> Visited; for (MachineRegisterInfo::reg_instr_iterator I = MRI->reg_instr_begin(SrcReg), E = MRI->reg_instr_end(); @@ -1914,6 +1933,80 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg, if (DstInt && !Reads && SubIdx && !UseMI->isDebugInstr()) Reads = DstInt->liveAt(LIS->getInstructionIndex(*UseMI)); + bool RequiresImplicitRedef = false; + if (!SubregToRegSrcInsts.empty()) { + // We can only add an implicit-def and undef if the sub registers match, + // e.g. + // %0:gr32 = INSTX + // %0.sub8:gr32 = INSTY // top 24 bits of %0 still defined + // %1:gr64 = SUBREG_TO_REG 0, %0, %subreg.sub32 + // + // This cannot be transformed into: + // %1.sub32:gr64 = INSTX + // undef %1.sub8:gr64 = INSTY , implicit-def %1 + // + // Because that would thrash the top 24 bits of %1.sub32. + if (is_contained(SubregToRegSrcInsts, UseMI) && + all_of(UseMI->defs(), + [&SubIdx, &SrcReg](const MachineOperand &MO) -> bool { + if (MO.getReg() != SrcReg || !MO.getSubReg() || MO.isUndef()) + return true; + return SubIdx == MO.getSubReg(); + })) { + // Add implicit-def of super-register to express that the whole + // register is defined by the instruction. + MachineInstrBuilder MIB(*MF, UseMI); + MIB.addReg(DstReg, RegState::ImplicitDefine); + RequiresImplicitRedef = true; + } + + // If the coalesed instruction doesn't fully define the register, we need + // to preserve the original super register liveness for SUBREG_TO_REG. + // + // We pretended SUBREG_TO_REG was a regular copy for coalescing purposes, + // but it introduces liveness for other subregisters. Downstream users may + // have been relying on those bits, so we need to ensure their liveness is + // captured with a def of other lanes. + if (DstInt && MRI->shouldTrackSubRegLiveness(DstReg)) { + // First check if there is sufficient granularity in terms of subranges. + LaneBitmask DstMask = MRI->getMaxLaneMaskForVReg(DstInt->reg()); + LaneBitmask UsedLanes = TRI->getSubRegIndexLaneMask(SubIdx); + LaneBitmask UnusedLanes = DstMask & ~UsedLanes; + if ((UnusedLanes & ~DstIntLaneMask).any()) { + BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator(); + DstInt->createSubRangeFrom(Allocator, UnusedLanes, *DstInt); + DstIntLaneMask |= UnusedLanes; + } + + // After duplicating the live ranges for the low/hi bits, we + // need to update the subranges of the DstReg interval such that + // for a case like this: + // + // entry: + // 16B %1:gpr32 = INSTRUCTION (<=> UseMI) + // : + // if.then: + // 32B %1:gpr32 = MOVIMM32 .. + // 48B %0:gpr64 = SUBREG_TO_REG 0, %1, sub32 + // + // Only the MOVIMM32 require a def of the top lanes and any intervals + // for the top 32-bits of the def at 16B should be removed. + for (LiveInterval::SubRange &SR : DstInt->subranges()) { + if (!Writes || RequiresImplicitRedef || + (SR.LaneMask & UnusedLanes).none()) + continue; + + assert((SR.LaneMask & UnusedLanes) == SR.LaneMask && + "Unexpected lanemask. Subrange needs finer granularity"); + + SlotIndex UseIdx = LIS->getInstructionIndex(*UseMI).getRegSlot(false); + auto SegmentI = SR.find(UseIdx); + if (SegmentI != SR.end()) + SR.removeSegment(SegmentI, true); + } + } + } + // Replace SrcReg with DstReg in all UseMI operands. for (unsigned Op : Ops) { MachineOperand &MO = UseMI->getOperand(Op); @@ -1922,7 +2015,7 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg, // turn a full def into a read-modify-write sub-register def and vice // versa. if (SubIdx && MO.isDef()) - MO.setIsUndef(!Reads); + MO.setIsUndef(!Reads || RequiresImplicitRedef); // A subreg use of a partially undef (super) register may be a complete // undef use now and then has to be marked that way. @@ -2025,6 +2118,30 @@ void RegisterCoalescer::setUndefOnPrunedSubRegUses(LiveInterval &LI, LIS->shrinkToUses(&LI); } +/// For a given use of value \p Idx, it returns the def in the current block, +/// or otherwise all possible defs in preceding blocks. +static bool FindDefInBlock(SmallPtrSetImpl<MachineBasicBlock *> &VisitedBlocks, + SmallVector<MachineInstr *> &Instrs, + LiveIntervals *LIS, LiveInterval &SrcInt, + MachineBasicBlock *MBB, VNInfo *Idx) { + if (!Idx->isPHIDef()) { + MachineInstr *Def = LIS->getInstructionFromIndex(Idx->def); + assert(Def && "Unable to find a def for SUBREG_TO_REG source operand"); + Instrs.push_back(Def); + return true; + } + + bool Any = false; + if (VisitedBlocks.count(MBB)) + return false; + VisitedBlocks.insert(MBB); + for (MachineBasicBlock *Pred : MBB->predecessors()) { + Any |= FindDefInBlock(VisitedBlocks, Instrs, LIS, SrcInt, Pred, + SrcInt.getVNInfoBefore(LIS->getMBBEndIdx(Pred))); + } + return Any; +} + bool RegisterCoalescer::joinCopy( MachineInstr *CopyMI, bool &Again, SmallPtrSetImpl<MachineInstr *> &CurrentErasedInstrs) { @@ -2156,6 +2273,35 @@ bool RegisterCoalescer::joinCopy( }); } + SmallVector<MachineInstr *> SubregToRegSrcInsts; + if (CopyMI->isSubregToReg()) { + // For the case where the copy instruction is a SUBREG_TO_REG, e.g. + // + // %0:gpr32 = movimm32 .. + // %1:gpr64 = SUBREG_TO_REG 0, %0, sub32 + // ... + // %0:gpr32 = COPY <something> + // + // After joining liveranges, the original `movimm32` will need an + // implicit-def to make it explicit that the entire register is written, + // i.e. + // + // undef %0.sub32:gpr64 = movimm32 ..., implicit-def %0 + // ... + // undef %0.sub32:gpr64 = COPY <something> // Note that this does not + // // require an implicit-def, + // // because it has nothing to + // // do with the SUBREG_TO_REG. + LiveInterval &SrcInt = + LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg()); + SlotIndex SubregToRegSlotIdx = LIS->getInstructionIndex(*CopyMI); + SmallPtrSet<MachineBasicBlock *, 8> VisitedBlocks; + if (!FindDefInBlock(VisitedBlocks, SubregToRegSrcInsts, LIS, SrcInt, + CopyMI->getParent(), + SrcInt.Query(SubregToRegSlotIdx).valueIn())) + llvm_unreachable("SUBREG_TO_REG src requires a def"); + } + ShrinkMask = LaneBitmask::getNone(); ShrinkMainRange = false; @@ -2225,9 +2371,12 @@ bool RegisterCoalescer::joinCopy( // Rewrite all SrcReg operands to DstReg. // Also update DstReg operands to include DstIdx if it is set. - if (CP.getDstIdx()) + if (CP.getDstIdx()) { + assert(SubregToRegSrcInsts.empty() && "can this happen?"); updateRegDefsUses(CP.getDstReg(), CP.getDstReg(), CP.getDstIdx()); - updateRegDefsUses(CP.getSrcReg(), CP.getDstReg(), CP.getSrcIdx()); + } + updateRegDefsUses(CP.getSrcReg(), CP.getDstReg(), CP.getSrcIdx(), + SubregToRegSrcInsts); // Shrink subregister ranges if necessary. if (ShrinkMask.any()) { diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 20b96f5..5989c1d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -331,6 +331,11 @@ namespace { return CombineTo(N, To, 2, AddTo); } + SDValue CombineTo(SDNode *N, SmallVectorImpl<SDValue> *To, + bool AddTo = true) { + return CombineTo(N, To->data(), To->size(), AddTo); + } + void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO); private: @@ -541,6 +546,7 @@ namespace { SDValue visitEXTRACT_VECTOR_ELT(SDNode *N); SDValue visitBUILD_VECTOR(SDNode *N); SDValue visitCONCAT_VECTORS(SDNode *N); + SDValue visitVECTOR_INTERLEAVE(SDNode *N); SDValue visitEXTRACT_SUBVECTOR(SDNode *N); SDValue visitVECTOR_SHUFFLE(SDNode *N); SDValue visitSCALAR_TO_VECTOR(SDNode *N); @@ -2021,6 +2027,7 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N); case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N); case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N); + case ISD::VECTOR_INTERLEAVE: return visitVECTOR_INTERLEAVE(N); case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N); case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N); case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N); @@ -4100,18 +4107,17 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { // (sub x, ([v]select (uge x, y), y, 0)) -> (umin x, (sub x, y)) if (N1.hasOneUse() && hasUMin(VT)) { SDValue Y; - if (sd_match(N1, m_Select(m_SetCC(m_Specific(N0), m_Value(Y), - m_SpecificCondCode(ISD::SETULT)), - m_Zero(), m_Deferred(Y))) || - sd_match(N1, m_Select(m_SetCC(m_Specific(N0), m_Value(Y), - m_SpecificCondCode(ISD::SETUGE)), - m_Deferred(Y), m_Zero())) || - sd_match(N1, m_VSelect(m_SetCC(m_Specific(N0), m_Value(Y), - m_SpecificCondCode(ISD::SETULT)), - m_Zero(), m_Deferred(Y))) || - sd_match(N1, m_VSelect(m_SetCC(m_Specific(N0), m_Value(Y), - m_SpecificCondCode(ISD::SETUGE)), - m_Deferred(Y), m_Zero()))) + auto MS0 = m_Specific(N0); + auto MVY = m_Value(Y); + auto MZ = m_Zero(); + auto MCC1 = m_SpecificCondCode(ISD::SETULT); + auto MCC2 = m_SpecificCondCode(ISD::SETUGE); + + if (sd_match(N1, m_SelectCCLike(MS0, MVY, MZ, m_Deferred(Y), MCC1)) || + sd_match(N1, m_SelectCCLike(MS0, MVY, m_Deferred(Y), MZ, MCC2)) || + sd_match(N1, m_VSelect(m_SetCC(MS0, MVY, MCC1), MZ, m_Deferred(Y))) || + sd_match(N1, m_VSelect(m_SetCC(MS0, MVY, MCC2), m_Deferred(Y), MZ))) + return DAG.getNode(ISD::UMIN, DL, VT, N0, DAG.getNode(ISD::SUB, DL, VT, N0, Y)); } @@ -10616,6 +10622,19 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { return DAG.getVScale(DL, VT, C0 << C1); } + SDValue X; + APInt VS0; + + // fold (shl (X * vscale(VS0)), C1) -> (X * vscale(VS0 << C1)) + if (N1C && sd_match(N0, m_Mul(m_Value(X), m_VScale(m_ConstInt(VS0))))) { + SDNodeFlags Flags; + Flags.setNoUnsignedWrap(N->getFlags().hasNoUnsignedWrap() && + N0->getFlags().hasNoUnsignedWrap()); + + SDValue VScale = DAG.getVScale(DL, VT, VS0 << N1C->getAPIntValue()); + return DAG.getNode(ISD::MUL, DL, VT, X, VScale, Flags); + } + // Fold (shl step_vector(C0), C1) to (step_vector(C0 << C1)). APInt ShlVal; if (N0.getOpcode() == ISD::STEP_VECTOR && @@ -25282,6 +25301,28 @@ static SDValue combineConcatVectorOfShuffleAndItsOperands( return DAG.getVectorShuffle(VT, dl, ShufOps[0], ShufOps[1], Mask); } +static SDValue combineConcatVectorOfSplats(SDNode *N, SelectionDAG &DAG, + const TargetLowering &TLI, + bool LegalTypes, + bool LegalOperations) { + EVT VT = N->getValueType(0); + + // Post-legalization we can only create wider SPLAT_VECTOR operations if both + // the type and operation is legal. The Hexagon target has custom + // legalization for SPLAT_VECTOR that splits the operation into two parts and + // concatenates them. Therefore, custom lowering must also be rejected in + // order to avoid an infinite loop. + if ((LegalTypes && !TLI.isTypeLegal(VT)) || + (LegalOperations && !TLI.isOperationLegal(ISD::SPLAT_VECTOR, VT))) + return SDValue(); + + SDValue Op0 = N->getOperand(0); + if (!llvm::all_equal(N->op_values()) || Op0.getOpcode() != ISD::SPLAT_VECTOR) + return SDValue(); + + return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, Op0.getOperand(0)); +} + SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { // If we only have one input vector, we don't need to do any concatenation. if (N->getNumOperands() == 1) @@ -25405,6 +25446,10 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { return DAG.getBuildVector(VT, SDLoc(N), Opnds); } + if (SDValue V = + combineConcatVectorOfSplats(N, DAG, TLI, LegalTypes, LegalOperations)) + return V; + // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR. // FIXME: Add support for concat_vectors(bitcast(vec0),bitcast(vec1),...). if (SDValue V = combineConcatVectorOfScalars(N, DAG)) @@ -25473,6 +25518,21 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitVECTOR_INTERLEAVE(SDNode *N) { + // Check to see if all operands are identical. + if (!llvm::all_equal(N->op_values())) + return SDValue(); + + // Check to see if the identical operand is a splat. + if (!DAG.isSplatValue(N->getOperand(0))) + return SDValue(); + + // interleave splat(X), splat(X).... --> splat(X), splat(X).... + SmallVector<SDValue, 4> Ops; + Ops.append(N->op_values().begin(), N->op_values().end()); + return CombineTo(N, &Ops); +} + // Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find // if the subvector can be sourced for free. static SDValue getSubVectorSrc(SDValue V, unsigned Index, EVT SubVT) { @@ -28982,13 +29042,100 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1, return SDValue(); } +static SDValue matchMergedBFX(SDValue Root, SelectionDAG &DAG, + const TargetLowering &TLI) { + // Match a pattern such as: + // (X | (X >> C0) | (X >> C1) | ...) & Mask + // This extracts contiguous parts of X and ORs them together before comparing. + // We can optimize this so that we directly check (X & SomeMask) instead, + // eliminating the shifts. + + EVT VT = Root.getValueType(); + + // TODO: Support vectors? + if (!VT.isScalarInteger() || Root.getOpcode() != ISD::AND) + return SDValue(); + + SDValue N0 = Root.getOperand(0); + SDValue N1 = Root.getOperand(1); + + if (N0.getOpcode() != ISD::OR || !isa<ConstantSDNode>(N1)) + return SDValue(); + + APInt RootMask = cast<ConstantSDNode>(N1)->getAsAPIntVal(); + + SDValue Src; + const auto IsSrc = [&](SDValue V) { + if (!Src) { + Src = V; + return true; + } + + return Src == V; + }; + + SmallVector<SDValue> Worklist = {N0}; + APInt PartsMask(VT.getSizeInBits(), 0); + while (!Worklist.empty()) { + SDValue V = Worklist.pop_back_val(); + if (!V.hasOneUse() && (Src && Src != V)) + return SDValue(); + + if (V.getOpcode() == ISD::OR) { + Worklist.push_back(V.getOperand(0)); + Worklist.push_back(V.getOperand(1)); + continue; + } + + if (V.getOpcode() == ISD::SRL) { + SDValue ShiftSrc = V.getOperand(0); + SDValue ShiftAmt = V.getOperand(1); + + if (!IsSrc(ShiftSrc) || !isa<ConstantSDNode>(ShiftAmt)) + return SDValue(); + + auto ShiftAmtVal = cast<ConstantSDNode>(ShiftAmt)->getAsZExtVal(); + if (ShiftAmtVal > RootMask.getBitWidth()) + return SDValue(); + + PartsMask |= (RootMask << ShiftAmtVal); + continue; + } + + if (IsSrc(V)) { + PartsMask |= RootMask; + continue; + } + + return SDValue(); + } + + if (!Src) + return SDValue(); + + SDLoc DL(Root); + return DAG.getNode(ISD::AND, DL, VT, + {Src, DAG.getConstant(PartsMask, DL, VT)}); +} + /// This is a stub for TargetLowering::SimplifySetCC. SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, const SDLoc &DL, bool foldBooleans) { TargetLowering::DAGCombinerInfo DagCombineInfo(DAG, Level, false, this); - return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL); + if (SDValue C = + TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL)) + return C; + + if (ISD::isIntEqualitySetCC(Cond) && N0.getOpcode() == ISD::AND && + isNullConstant(N1)) { + + if (SDValue Res = matchMergedBFX(N0, DAG, TLI)) + return DAG.getSetCC(DL, VT, Res, N1, Cond); + } + + return SDValue(); } /// Given an ISD::SDIV node expressing a divide by constant, return diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index 6a2e782..31e7855 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -888,7 +888,8 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) { } if (MI->isCandidateForAdditionalCallInfo()) { - if (DAG->getTarget().Options.EmitCallSiteInfo) + if (DAG->getTarget().Options.EmitCallSiteInfo || + DAG->getTarget().Options.EmitCallGraphSection) MF.addCallSiteInfo(MI, DAG->getCallSiteInfo(Node)); if (auto CalledGlobal = DAG->getCalledGlobal(Node)) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 5c586f7..f41b6eb 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -7843,20 +7843,43 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, } } - // Perform trivial constant folding. - if (SDValue SV = FoldConstantArithmetic(Opcode, DL, VT, {N1, N2}, Flags)) - return SV; + if (N1.getOpcode() == ISD::POISON || N2.getOpcode() == ISD::POISON) { + switch (Opcode) { + case ISD::XOR: + case ISD::ADD: + case ISD::PTRADD: + case ISD::SUB: + case ISD::SIGN_EXTEND_INREG: + case ISD::UDIV: + case ISD::SDIV: + case ISD::UREM: + case ISD::SREM: + case ISD::MUL: + case ISD::AND: + case ISD::SSUBSAT: + case ISD::USUBSAT: + case ISD::UMIN: + case ISD::OR: + case ISD::SADDSAT: + case ISD::UADDSAT: + case ISD::UMAX: + case ISD::SMAX: + case ISD::SMIN: + // fold op(arg1, poison) -> poison, fold op(poison, arg2) -> poison. + return N2.getOpcode() == ISD::POISON ? N2 : N1; + } + } // Canonicalize an UNDEF to the RHS, even over a constant. - if (N1.isUndef()) { + if (N1.getOpcode() == ISD::UNDEF && N2.getOpcode() != ISD::UNDEF) { if (TLI->isCommutativeBinOp(Opcode)) { std::swap(N1, N2); } else { switch (Opcode) { case ISD::PTRADD: case ISD::SUB: - // fold op(undef, arg2) -> undef, fold op(poison, arg2) ->poison. - return N1.getOpcode() == ISD::POISON ? getPOISON(VT) : getUNDEF(VT); + // fold op(undef, non_undef_arg2) -> undef. + return N1; case ISD::SIGN_EXTEND_INREG: case ISD::UDIV: case ISD::SDIV: @@ -7864,18 +7887,17 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, case ISD::SREM: case ISD::SSUBSAT: case ISD::USUBSAT: - // fold op(undef, arg2) -> 0, fold op(poison, arg2) -> poison. - return N1.getOpcode() == ISD::POISON ? getPOISON(VT) - : getConstant(0, DL, VT); + // fold op(undef, non_undef_arg2) -> 0. + return getConstant(0, DL, VT); } } } // Fold a bunch of operators when the RHS is undef. - if (N2.isUndef()) { + if (N2.getOpcode() == ISD::UNDEF) { switch (Opcode) { case ISD::XOR: - if (N1.isUndef()) + if (N1.getOpcode() == ISD::UNDEF) // Handle undef ^ undef -> 0 special case. This is a common // idiom (misuse). return getConstant(0, DL, VT); @@ -7883,29 +7905,48 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, case ISD::ADD: case ISD::PTRADD: case ISD::SUB: + // fold op(arg1, undef) -> undef. + return N2; case ISD::UDIV: case ISD::SDIV: case ISD::UREM: case ISD::SREM: - // fold op(arg1, undef) -> undef, fold op(arg1, poison) -> poison. - return N2.getOpcode() == ISD::POISON ? getPOISON(VT) : getUNDEF(VT); + // fold op(arg1, undef) -> poison. + return getPOISON(VT); case ISD::MUL: case ISD::AND: case ISD::SSUBSAT: case ISD::USUBSAT: - // fold op(arg1, undef) -> 0, fold op(arg1, poison) -> poison. - return N2.getOpcode() == ISD::POISON ? getPOISON(VT) - : getConstant(0, DL, VT); + case ISD::UMIN: + // fold op(undef, undef) -> undef, fold op(arg1, undef) -> 0. + return N1.getOpcode() == ISD::UNDEF ? N2 : getConstant(0, DL, VT); case ISD::OR: case ISD::SADDSAT: case ISD::UADDSAT: - // fold op(arg1, undef) -> an all-ones constant, fold op(arg1, poison) -> - // poison. - return N2.getOpcode() == ISD::POISON ? getPOISON(VT) - : getAllOnesConstant(DL, VT); + case ISD::UMAX: + // fold op(undef, undef) -> undef, fold op(arg1, undef) -> -1. + return N1.getOpcode() == ISD::UNDEF ? N2 : getAllOnesConstant(DL, VT); + case ISD::SMAX: + // fold op(undef, undef) -> undef, fold op(arg1, undef) -> MAX_INT. + return N1.getOpcode() == ISD::UNDEF + ? N2 + : getConstant( + APInt::getSignedMaxValue(VT.getScalarSizeInBits()), DL, + VT); + case ISD::SMIN: + // fold op(undef, undef) -> undef, fold op(arg1, undef) -> MIN_INT. + return N1.getOpcode() == ISD::UNDEF + ? N2 + : getConstant( + APInt::getSignedMinValue(VT.getScalarSizeInBits()), DL, + VT); } } + // Perform trivial constant folding. + if (SDValue SV = FoldConstantArithmetic(Opcode, DL, VT, {N1, N2}, Flags)) + return SV; + // Memoize this node if possible. SDNode *N; SDVTList VTs = getVTList(VT); @@ -12741,7 +12782,7 @@ bool SDNode::areOnlyUsersOf(ArrayRef<const SDNode *> Nodes, const SDNode *N) { return Seen; } -/// isOperand - Return true if this node is an operand of N. +/// Return true if the referenced return value is an operand of N. bool SDValue::isOperandOf(const SDNode *N) const { return is_contained(N->op_values(), *this); } diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 1764910..48d6b99 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -9471,7 +9471,7 @@ SDValue TargetLowering::CTTZTableLookup(SDNode *Node, SelectionDAG &DAG, ISD::SRL, DL, VT, DAG.getNode(ISD::MUL, DL, VT, DAG.getNode(ISD::AND, DL, VT, Op, Neg), DAG.getConstant(DeBruijn, DL, VT)), - DAG.getConstant(ShiftAmt, DL, VT)); + DAG.getShiftAmountConstant(ShiftAmt, VT, DL)); Lookup = DAG.getSExtOrTrunc(Lookup, DL, getPointerTy(TD)); SmallVector<uint8_t> Table(BitWidth, 0); diff --git a/llvm/lib/CodeGen/TailDuplicator.cpp b/llvm/lib/CodeGen/TailDuplicator.cpp index a88c57f..5d720fb 100644 --- a/llvm/lib/CodeGen/TailDuplicator.cpp +++ b/llvm/lib/CodeGen/TailDuplicator.cpp @@ -604,12 +604,21 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple, bool HasComputedGoto = false; if (!TailBB.empty()) { HasIndirectbr = TailBB.back().isIndirectBranch(); - HasComputedGoto = TailBB.terminatorIsComputedGoto(); + HasComputedGoto = TailBB.terminatorIsComputedGotoWithSuccessors(); } if (HasIndirectbr && PreRegAlloc) MaxDuplicateCount = TailDupIndirectBranchSize; + // Allow higher limits when the block has computed-gotos and running after + // register allocation. NB. This basically unfactors computed gotos that were + // factored early on in the compilation process to speed up edge based data + // flow. If we do not unfactor them again, it can seriously pessimize code + // with many computed jumps in the source code, such as interpreters. + // Therefore we do not restrict the computed gotos. + if (HasComputedGoto && !PreRegAlloc) + MaxDuplicateCount = std::max(MaxDuplicateCount, 10u); + // Check the instructions in the block to determine whether tail-duplication // is invalid or unlikely to be profitable. unsigned InstrCount = 0; @@ -663,12 +672,7 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple, // Duplicating a BB which has both multiple predecessors and successors will // may cause huge amount of PHI nodes. If we want to remove this limitation, // we have to address https://github.com/llvm/llvm-project/issues/78578. - // NB. This basically unfactors computed gotos that were factored early on in - // the compilation process to speed up edge based data flow. If we do not - // unfactor them again, it can seriously pessimize code with many computed - // jumps in the source code, such as interpreters. Therefore we do not - // restrict the computed gotos. - if (!HasComputedGoto && TailBB.pred_size() > TailDupPredSize && + if (PreRegAlloc && TailBB.pred_size() > TailDupPredSize && TailBB.succ_size() > TailDupSuccSize) { // If TailBB or any of its successors contains a phi, we may have to add a // large number of additional phis with additional incoming values. diff --git a/llvm/lib/CodeGen/TargetInstrInfo.cpp b/llvm/lib/CodeGen/TargetInstrInfo.cpp index 18d6bbc..705e046e 100644 --- a/llvm/lib/CodeGen/TargetInstrInfo.cpp +++ b/llvm/lib/CodeGen/TargetInstrInfo.cpp @@ -1406,7 +1406,7 @@ void TargetInstrInfo::reassociateOps( const MCInstrDesc &MCID, Register DestReg) { return MachineInstrBuilder( MF, MF.CreateMachineInstr(MCID, MIMD.getDL(), /*NoImpl=*/true)) - .setPCSections(MIMD.getPCSections()) + .copyMIMetadata(MIMD) .addReg(DestReg, RegState::Define); }; diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index 725e951..e9172f4 100644 --- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -1060,27 +1060,27 @@ MCSection *TargetLoweringObjectFileELF::getSectionForConstant( auto &Context = getContext(); if (Kind.isMergeableConst4() && MergeableConst4Section) - return Context.getELFSection(".rodata.cst4." + SectionSuffix, + return Context.getELFSection(".rodata.cst4." + SectionSuffix + ".", ELF::SHT_PROGBITS, ELF::SHF_ALLOC | ELF::SHF_MERGE, 4); if (Kind.isMergeableConst8() && MergeableConst8Section) - return Context.getELFSection(".rodata.cst8." + SectionSuffix, + return Context.getELFSection(".rodata.cst8." + SectionSuffix + ".", ELF::SHT_PROGBITS, ELF::SHF_ALLOC | ELF::SHF_MERGE, 8); if (Kind.isMergeableConst16() && MergeableConst16Section) - return Context.getELFSection(".rodata.cst16." + SectionSuffix, + return Context.getELFSection(".rodata.cst16." + SectionSuffix + ".", ELF::SHT_PROGBITS, ELF::SHF_ALLOC | ELF::SHF_MERGE, 16); if (Kind.isMergeableConst32() && MergeableConst32Section) - return Context.getELFSection(".rodata.cst32." + SectionSuffix, + return Context.getELFSection(".rodata.cst32." + SectionSuffix + ".", ELF::SHT_PROGBITS, ELF::SHF_ALLOC | ELF::SHF_MERGE, 32); if (Kind.isReadOnly()) - return Context.getELFSection(".rodata." + SectionSuffix, ELF::SHT_PROGBITS, - ELF::SHF_ALLOC); + return Context.getELFSection(".rodata." + SectionSuffix + ".", + ELF::SHT_PROGBITS, ELF::SHF_ALLOC); assert(Kind.isReadOnlyWithRel() && "Unknown section kind"); - return Context.getELFSection(".data.rel.ro." + SectionSuffix, + return Context.getELFSection(".data.rel.ro." + SectionSuffix + ".", ELF::SHT_PROGBITS, ELF::SHF_ALLOC | ELF::SHF_WRITE); } |