diff options
Diffstat (limited to 'llvm/lib')
125 files changed, 2250 insertions, 1732 deletions
diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp index ec78386..759c553 100644 --- a/llvm/lib/Analysis/ConstantFolding.cpp +++ b/llvm/lib/Analysis/ConstantFolding.cpp @@ -929,12 +929,11 @@ Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP, if (!AllConstantInt) break; - // TODO: Try to intersect two inrange attributes? - if (!InRange) { - InRange = GEP->getInRange(); - if (InRange) - // Adjust inrange by offset until now. - InRange = InRange->sextOrTrunc(BitWidth).subtract(Offset); + // Adjust inrange offset and intersect inrange attributes + if (auto GEPRange = GEP->getInRange()) { + auto AdjustedGEPRange = GEPRange->sextOrTrunc(BitWidth).subtract(Offset); + InRange = + InRange ? InRange->intersectWith(AdjustedGEPRange) : AdjustedGEPRange; } Ptr = cast<Constant>(GEP->getOperand(0)); diff --git a/llvm/lib/Analysis/DXILResource.cpp b/llvm/lib/Analysis/DXILResource.cpp index 2da6468..1959ab6 100644 --- a/llvm/lib/Analysis/DXILResource.cpp +++ b/llvm/lib/Analysis/DXILResource.cpp @@ -1079,15 +1079,16 @@ void DXILResourceBindingInfo::populate(Module &M, DXILResourceTypeMap &DRTM) { // add new space S = &BS->Spaces.emplace_back(B.Space); - // the space is full - set flag to report overlapping binding later - if (S->FreeRanges.empty()) { + // The space is full - there are no free slots left, or the rest of the + // slots are taken by an unbounded array. Set flag to report overlapping + // binding later. + if (S->FreeRanges.empty() || S->FreeRanges.back().UpperBound < UINT32_MAX) { OverlappingBinding = true; continue; } // adjust the last free range lower bound, split it in two, or remove it BindingRange &LastFreeRange = S->FreeRanges.back(); - assert(LastFreeRange.UpperBound == UINT32_MAX); if (LastFreeRange.LowerBound == B.LowerBound) { if (B.UpperBound < UINT32_MAX) LastFreeRange.LowerBound = B.UpperBound + 1; diff --git a/llvm/lib/Analysis/DependenceAnalysis.cpp b/llvm/lib/Analysis/DependenceAnalysis.cpp index dd9a44b..f1473b2 100644 --- a/llvm/lib/Analysis/DependenceAnalysis.cpp +++ b/llvm/lib/Analysis/DependenceAnalysis.cpp @@ -3383,6 +3383,10 @@ bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst, SrcSubscripts, DstSubscripts)) return false; + assert(isLoopInvariant(SrcBase, SrcLoop) && + isLoopInvariant(DstBase, DstLoop) && + "Expected SrcBase and DstBase to be loop invariant"); + int Size = SrcSubscripts.size(); LLVM_DEBUG({ dbgs() << "\nSrcSubscripts: "; @@ -3666,6 +3670,19 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst, SCEVUnionPredicate(Assume, *SE)); } + // Even if the base pointers are the same, they may not be loop-invariant. It + // could lead to incorrect results, as we're analyzing loop-carried + // dependencies. Src and Dst can be in different loops, so we need to check + // the base pointer is invariant in both loops. + Loop *SrcLoop = LI->getLoopFor(Src->getParent()); + Loop *DstLoop = LI->getLoopFor(Dst->getParent()); + if (!isLoopInvariant(SrcBase, SrcLoop) || + !isLoopInvariant(DstBase, DstLoop)) { + LLVM_DEBUG(dbgs() << "The base pointer is not loop invariant.\n"); + return std::make_unique<Dependence>(Src, Dst, + SCEVUnionPredicate(Assume, *SE)); + } + uint64_t EltSize = SrcLoc.Size.toRaw(); const SCEV *SrcEv = SE->getMinusSCEV(SrcSCEV, SrcBase); const SCEV *DstEv = SE->getMinusSCEV(DstSCEV, DstBase); diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index 82530e7..5907e21 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -5366,7 +5366,7 @@ static Value *simplifyCastInst(unsigned CastOpc, Value *Op, Type *Ty, Type *MidTy = CI->getType(); Type *DstTy = Ty; if (Src->getType() == Ty) { - auto FirstOp = static_cast<Instruction::CastOps>(CI->getOpcode()); + auto FirstOp = CI->getOpcode(); auto SecondOp = static_cast<Instruction::CastOps>(CastOpc); Type *SrcIntPtrTy = SrcTy->isPtrOrPtrVectorTy() ? Q.DL.getIntPtrType(SrcTy) : nullptr; diff --git a/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp b/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp index 5d7c97a..6356d71 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp @@ -37,8 +37,8 @@ void AIXException::emitExceptionInfoTable(const MCSymbol *LSDA, // unsigned long personality; /* Pointer to the personality routine */ // } - auto *EHInfo = - cast<MCSectionXCOFF>(Asm->getObjFileLowering().getCompactUnwindSection()); + auto *EHInfo = static_cast<MCSectionXCOFF *>( + Asm->getObjFileLowering().getCompactUnwindSection()); if (Asm->TM.getFunctionSections()) { // If option -ffunction-sections is on, append the function name to the // name of EH Info Table csect so that each function has its own EH Info diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index f1d3e96..6166271 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -4221,10 +4221,11 @@ MCSymbol *AsmPrinter::GetCPISymbol(unsigned CPID) const { SectionKind Kind = CPE.getSectionKind(&DL); const Constant *C = CPE.Val.ConstVal; Align Alignment = CPE.Alignment; - if (const MCSectionCOFF *S = dyn_cast<MCSectionCOFF>( - getObjFileLowering().getSectionForConstant(DL, Kind, C, - Alignment))) { - if (MCSymbol *Sym = S->getCOMDATSymbol()) { + auto *S = + getObjFileLowering().getSectionForConstant(DL, Kind, C, Alignment); + if (S && TM.getTargetTriple().isOSBinFormatCOFF()) { + if (MCSymbol *Sym = + static_cast<const MCSectionCOFF *>(S)->getCOMDATSymbol()) { if (Sym->isUndefined()) OutStreamer->emitSymbolAttribute(Sym, MCSA_Global); return Sym; diff --git a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp index 8abeb56..c5d6e40 100644 --- a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp @@ -1051,10 +1051,10 @@ void CodeViewDebug::switchToDebugSectionForSymbol(const MCSymbol *GVSym) { // comdat key. A section may be comdat because of -ffunction-sections or // because it is comdat in the IR. MCSectionCOFF *GVSec = - GVSym ? dyn_cast<MCSectionCOFF>(&GVSym->getSection()) : nullptr; + GVSym ? static_cast<MCSectionCOFF *>(&GVSym->getSection()) : nullptr; const MCSymbol *KeySym = GVSec ? GVSec->getCOMDATSymbol() : nullptr; - MCSectionCOFF *DebugSec = cast<MCSectionCOFF>( + auto *DebugSec = static_cast<MCSectionCOFF *>( CompilerInfoAsm->getObjFileLowering().getCOFFDebugSymbolsSection()); DebugSec = OS.getContext().getAssociativeCOFFSection(DebugSec, KeySym); diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 11b8576..7188833 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -972,10 +972,9 @@ void DwarfDebug::constructCallSiteEntryDIEs(const DISubprogram &SP, // the call graph which could lead to some target function. For tail // calls, no return PC information is needed, unless tuning for GDB in // DWARF4 mode in which case we fake a return PC for compatibility. - const MCSymbol *PCAddr = - (!IsTail || CU.useGNUAnalogForDwarf5Feature()) - ? const_cast<MCSymbol *>(getLabelAfterInsn(TopLevelCallMI)) - : nullptr; + const MCSymbol *PCAddr = (!IsTail || CU.useGNUAnalogForDwarf5Feature()) + ? getLabelAfterInsn(TopLevelCallMI) + : nullptr; // For tail calls, it's necessary to record the address of the branch // instruction so that the debugger can show where the tail call occurred. diff --git a/llvm/lib/CodeGen/BranchFolding.cpp b/llvm/lib/CodeGen/BranchFolding.cpp index 1cb0a23..3b3e7a4 100644 --- a/llvm/lib/CodeGen/BranchFolding.cpp +++ b/llvm/lib/CodeGen/BranchFolding.cpp @@ -2083,59 +2083,22 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) { if (TBB == FBB) { MBB->splice(Loc, TBB, TBB->begin(), TIB); } else { - // Merge the debug locations, and hoist and kill the debug instructions from - // both branches. FIXME: We could probably try harder to preserve some debug - // instructions (but at least this isn't producing wrong locations). - MachineInstrBuilder MIRBuilder(*MBB->getParent(), Loc); - auto HoistAndKillDbgInstr = [MBB, Loc](MachineBasicBlock::iterator DI) { - assert(DI->isDebugInstr() && "Expected a debug instruction"); - if (DI->isDebugRef()) { - const TargetInstrInfo *TII = - MBB->getParent()->getSubtarget().getInstrInfo(); - const MCInstrDesc &DBGV = TII->get(TargetOpcode::DBG_VALUE); - DI = BuildMI(*MBB->getParent(), DI->getDebugLoc(), DBGV, false, 0, - DI->getDebugVariable(), DI->getDebugExpression()); - MBB->insert(Loc, &*DI); - return; - } - // Deleting a DBG_PHI results in an undef at the referenced DBG_INSTR_REF. - if (DI->isDebugPHI()) { - DI->eraseFromParent(); - return; - } - - DI->setDebugValueUndef(); - DI->moveBefore(&*Loc); - }; - // TIB and FIB point to the end of the regions to hoist/merge in TBB and // FBB. MachineBasicBlock::iterator FE = FIB; MachineBasicBlock::iterator FI = FBB->begin(); for (MachineBasicBlock::iterator TI : make_early_inc_range(make_range(TBB->begin(), TIB))) { - // Hoist and kill debug instructions from FBB. After this loop FI points - // to the next non-debug instruction to hoist (checked in assert after the - // TBB debug instruction handling code). - while (FI->isDebugInstr()) { - assert(FI != FE && "Unexpected end of FBB range"); - MachineBasicBlock::iterator FINext = std::next(FI); - HoistAndKillDbgInstr(FI); - FI = FINext; - } - - // Kill debug instructions before moving. - if (TI->isDebugInstr()) { - HoistAndKillDbgInstr(TI); + // Move debug instructions and pseudo probes without modifying them. + // FIXME: This is the wrong thing to do for debug locations, which + // should at least be killed (and hoisted from BOTH blocks). + if (TI->isDebugOrPseudoInstr()) { + TI->moveBefore(&*Loc); continue; } - // If FI is a debug instruction, skip forward to the next non-debug - // instruction. + // Get the next non-meta instruction in FBB. FI = skipDebugInstructionsForward(FI, FE, false); - // Pseudo probes are excluded from the range when identifying foldable - // instructions, so we don't expect to see one now. - assert(!TI->isPseudoProbe() && "Unexpected pseudo probe in range"); // NOTE: The loop above checks CheckKillDead but we can't do that here as // it modifies some kill markers after the check. assert(TI->isIdenticalTo(*FI, MachineInstr::CheckDefs) && @@ -2148,7 +2111,6 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) { ++FI; } } - FBB->erase(FBB->begin(), FIB); if (UpdateLiveIns) diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index c21058c..416c56d 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -2095,6 +2095,10 @@ static bool isRemOfLoopIncrementWithLoopInvariant( if (!L->isLoopInvariant(RemAmt)) return false; + // Only works if the AddOffset is a loop invaraint + if (AddOffset && !L->isLoopInvariant(AddOffset)) + return false; + // Is the PHI a loop increment? auto LoopIncrInfo = getIVIncrement(PN, LI); if (!LoopIncrInfo) diff --git a/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/llvm/lib/CodeGen/InterleavedAccessPass.cpp index c2839d4..5e50898 100644 --- a/llvm/lib/CodeGen/InterleavedAccessPass.cpp +++ b/llvm/lib/CodeGen/InterleavedAccessPass.cpp @@ -634,6 +634,9 @@ bool InterleavedAccessImpl::lowerDeinterleaveIntrinsic( << " and factor = " << Factor << "\n"); } else { assert(II); + if (II->getIntrinsicID() != Intrinsic::masked_load && + II->getIntrinsicID() != Intrinsic::vp_load) + return false; // Check mask operand. Handle both all-true/false and interleaved mask. Mask = getMask(getMaskOperand(II), Factor, getDeinterleavedVectorType(DI)); @@ -673,6 +676,9 @@ bool InterleavedAccessImpl::lowerInterleaveIntrinsic( Value *Mask = nullptr; if (II) { + if (II->getIntrinsicID() != Intrinsic::masked_store && + II->getIntrinsicID() != Intrinsic::vp_store) + return false; // Check mask operand. Handle both all-true/false and interleaved mask. Mask = getMask(getMaskOperand(II), Factor, cast<VectorType>(InterleaveValues[0]->getType())); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index e5704c0..583a85a 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -22,6 +22,7 @@ #include "llvm/CodeGen/StackMaps.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/DiagnosticInfo.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/KnownBits.h" #include "llvm/Support/raw_ostream.h" @@ -357,6 +358,9 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::PATCHPOINT: Res = PromoteIntRes_PATCHPOINT(N); break; + case ISD::READ_REGISTER: + Res = PromoteIntRes_READ_REGISTER(N); + break; } // If the result is null then the sub-method took care of registering it. @@ -2076,6 +2080,9 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { case ISD::PATCHPOINT: Res = PromoteIntOp_PATCHPOINT(N, OpNo); break; + case ISD::WRITE_REGISTER: + Res = PromoteIntOp_WRITE_REGISTER(N, OpNo); + break; case ISD::EXPERIMENTAL_VP_STRIDED_LOAD: case ISD::EXPERIMENTAL_VP_STRIDED_STORE: Res = PromoteIntOp_VP_STRIDED(N, OpNo); @@ -2853,6 +2860,15 @@ SDValue DAGTypeLegalizer::PromoteIntOp_PATCHPOINT(SDNode *N, unsigned OpNo) { return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); } +SDValue DAGTypeLegalizer::PromoteIntOp_WRITE_REGISTER(SDNode *N, + unsigned OpNo) { + const Function &Fn = DAG.getMachineFunction().getFunction(); + Fn.getContext().diagnose(DiagnosticInfoLegalizationFailure( + "cannot use llvm.write_register with illegal type", Fn, + N->getDebugLoc())); + return N->getOperand(0); +} + SDValue DAGTypeLegalizer::PromoteIntOp_VP_STRIDED(SDNode *N, unsigned OpNo) { assert((N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_LOAD && OpNo == 3) || (N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_STORE && OpNo == 4)); @@ -3127,6 +3143,10 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { case ISD::VSCALE: ExpandIntRes_VSCALE(N, Lo, Hi); break; + + case ISD::READ_REGISTER: + ExpandIntRes_READ_REGISTER(N, Lo, Hi); + break; } // If Lo/Hi is null, the sub-method took care of registering results etc. @@ -5471,6 +5491,18 @@ void DAGTypeLegalizer::ExpandIntRes_VSCALE(SDNode *N, SDValue &Lo, SplitInteger(Res, Lo, Hi); } +void DAGTypeLegalizer::ExpandIntRes_READ_REGISTER(SDNode *N, SDValue &Lo, + SDValue &Hi) { + const Function &Fn = DAG.getMachineFunction().getFunction(); + Fn.getContext().diagnose(DiagnosticInfoLegalizationFailure( + "cannot use llvm.read_register with illegal type", Fn, N->getDebugLoc())); + ReplaceValueWith(SDValue(N, 1), N->getOperand(0)); + EVT LoVT, HiVT; + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + Lo = DAG.getPOISON(LoVT); + Hi = DAG.getPOISON(HiVT); +} + //===----------------------------------------------------------------------===// // Integer Operand Expansion //===----------------------------------------------------------------------===// @@ -5537,6 +5569,9 @@ bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) { case ISD::EXPERIMENTAL_VP_STRIDED_STORE: Res = ExpandIntOp_VP_STRIDED(N, OpNo); break; + case ISD::WRITE_REGISTER: + Res = ExpandIntOp_WRITE_REGISTER(N, OpNo); + break; } // If the result is null, the sub-method took care of registering results etc. @@ -5935,6 +5970,15 @@ SDValue DAGTypeLegalizer::ExpandIntOp_VP_STRIDED(SDNode *N, unsigned OpNo) { return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); } +SDValue DAGTypeLegalizer::ExpandIntOp_WRITE_REGISTER(SDNode *N, unsigned OpNo) { + const Function &Fn = DAG.getMachineFunction().getFunction(); + Fn.getContext().diagnose(DiagnosticInfoLegalizationFailure( + "cannot use llvm.write_register with illegal type", Fn, + N->getDebugLoc())); + + return N->getOperand(0); +} + SDValue DAGTypeLegalizer::PromoteIntRes_VECTOR_SPLICE(SDNode *N) { SDLoc dl(N); @@ -6332,6 +6376,16 @@ SDValue DAGTypeLegalizer::PromoteIntRes_PATCHPOINT(SDNode *N) { return Res.getValue(0); } +SDValue DAGTypeLegalizer::PromoteIntRes_READ_REGISTER(SDNode *N) { + const Function &Fn = DAG.getMachineFunction().getFunction(); + Fn.getContext().diagnose(DiagnosticInfoLegalizationFailure( + "cannot use llvm.read_register with illegal type", Fn, N->getDebugLoc())); + + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + ReplaceValueWith(SDValue(N, 1), N->getOperand(0)); + return DAG.getPOISON(NVT); +} + SDValue DAGTypeLegalizer::PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N) { SDLoc dl(N); SDValue V0 = GetPromotedInteger(N->getOperand(0)); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 9b53724..2e13b18 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -378,6 +378,7 @@ private: SDValue PromoteIntRes_VPFunnelShift(SDNode *N); SDValue PromoteIntRes_IS_FPCLASS(SDNode *N); SDValue PromoteIntRes_PATCHPOINT(SDNode *N); + SDValue PromoteIntRes_READ_REGISTER(SDNode *N); SDValue PromoteIntRes_VECTOR_FIND_LAST_ACTIVE(SDNode *N); SDValue PromoteIntRes_GET_ACTIVE_LANE_MASK(SDNode *N); SDValue PromoteIntRes_PARTIAL_REDUCE_MLA(SDNode *N); @@ -428,6 +429,7 @@ private: SDValue PromoteIntOp_SET_ROUNDING(SDNode *N); SDValue PromoteIntOp_STACKMAP(SDNode *N, unsigned OpNo); SDValue PromoteIntOp_PATCHPOINT(SDNode *N, unsigned OpNo); + SDValue PromoteIntOp_WRITE_REGISTER(SDNode *N, unsigned OpNo); SDValue PromoteIntOp_VP_STRIDED(SDNode *N, unsigned OpNo); SDValue PromoteIntOp_VP_SPLICE(SDNode *N, unsigned OpNo); SDValue PromoteIntOp_VECTOR_HISTOGRAM(SDNode *N, unsigned OpNo); @@ -511,6 +513,7 @@ private: void ExpandIntRes_FunnelShift (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_VSCALE (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_READ_REGISTER(SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandShiftByConstant(SDNode *N, const APInt &Amt, SDValue &Lo, SDValue &Hi); @@ -534,6 +537,7 @@ private: SDValue ExpandIntOp_STACKMAP(SDNode *N, unsigned OpNo); SDValue ExpandIntOp_PATCHPOINT(SDNode *N, unsigned OpNo); SDValue ExpandIntOp_VP_STRIDED(SDNode *N, unsigned OpNo); + SDValue ExpandIntOp_WRITE_REGISTER(SDNode *N, unsigned OpNo); void IntegerExpandSetCCOperands(SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &dl); diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index a40ceaa..408d07b 100644 --- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -995,7 +995,7 @@ MCSection *TargetLoweringObjectFileELF::getSectionForLSDA( if (!LSDASection || (!F.hasComdat() && !TM.getFunctionSections())) return LSDASection; - const auto *LSDA = cast<MCSectionELF>(LSDASection); + const auto *LSDA = static_cast<const MCSectionELF *>(LSDASection); unsigned Flags = LSDA->getFlags(); const MCSymbolELF *LinkedToSym = nullptr; StringRef Group; @@ -2054,14 +2054,14 @@ MCSection *TargetLoweringObjectFileCOFF::getStaticCtorSection( unsigned Priority, const MCSymbol *KeySym) const { return getCOFFStaticStructorSection( getContext(), getContext().getTargetTriple(), true, Priority, KeySym, - cast<MCSectionCOFF>(StaticCtorSection)); + static_cast<MCSectionCOFF *>(StaticCtorSection)); } MCSection *TargetLoweringObjectFileCOFF::getStaticDtorSection( unsigned Priority, const MCSymbol *KeySym) const { return getCOFFStaticStructorSection( getContext(), getContext().getTargetTriple(), false, Priority, KeySym, - cast<MCSectionCOFF>(StaticDtorSection)); + static_cast<MCSectionCOFF *>(StaticDtorSection)); } const MCExpr *TargetLoweringObjectFileCOFF::lowerRelativeReference( @@ -2388,23 +2388,25 @@ TargetLoweringObjectFileXCOFF::getTargetSymbol(const GlobalValue *GV, // here. if (const GlobalObject *GO = dyn_cast<GlobalObject>(GV)) { if (GO->isDeclarationForLinker()) - return cast<MCSectionXCOFF>(getSectionForExternalReference(GO, TM)) + return static_cast<const MCSectionXCOFF *>( + getSectionForExternalReference(GO, TM)) ->getQualNameSymbol(); if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) if (GVar->hasAttribute("toc-data")) - return cast<MCSectionXCOFF>( + return static_cast<const MCSectionXCOFF *>( SectionForGlobal(GVar, SectionKind::getData(), TM)) ->getQualNameSymbol(); SectionKind GOKind = getKindForGlobal(GO, TM); if (GOKind.isText()) - return cast<MCSectionXCOFF>( + return static_cast<const MCSectionXCOFF *>( getSectionForFunctionDescriptor(cast<Function>(GO), TM)) ->getQualNameSymbol(); if ((TM.getDataSections() && !GO->hasSection()) || GO->hasCommonLinkage() || GOKind.isBSSLocal() || GOKind.isThreadBSSLocal()) - return cast<MCSectionXCOFF>(SectionForGlobal(GO, GOKind, TM)) + return static_cast<const MCSectionXCOFF *>( + SectionForGlobal(GO, GOKind, TM)) ->getQualNameSymbol(); } @@ -2740,7 +2742,7 @@ MCSection *TargetLoweringObjectFileXCOFF::getSectionForTOCEntry( MCSection *TargetLoweringObjectFileXCOFF::getSectionForLSDA( const Function &F, const MCSymbol &FnSym, const TargetMachine &TM) const { - auto *LSDA = cast<MCSectionXCOFF>(LSDASection); + auto *LSDA = static_cast<MCSectionXCOFF *>(LSDASection); if (TM.getFunctionSections()) { // If option -ffunction-sections is on, append the function name to the // name of the LSDA csect so that each function has its own LSDA csect. diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp index b3798f1..a8559e7 100644 --- a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp +++ b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp @@ -183,7 +183,7 @@ RuntimeDyldImpl::loadObjectImpl(const object::ObjectFile &Obj) { std::lock_guard<sys::Mutex> locked(lock); // Save information about our target - Arch = (Triple::ArchType)Obj.getArch(); + Arch = Obj.getArch(); IsTargetLittleEndian = Obj.isLittleEndian(); setMipsABI(Obj); @@ -1361,18 +1361,17 @@ std::unique_ptr<RuntimeDyld::LoadedObjectInfo> RuntimeDyld::loadObject(const ObjectFile &Obj) { if (!Dyld) { if (Obj.isELF()) - Dyld = - createRuntimeDyldELF(static_cast<Triple::ArchType>(Obj.getArch()), - MemMgr, Resolver, ProcessAllSections, - std::move(NotifyStubEmitted)); + Dyld = createRuntimeDyldELF(Obj.getArch(), MemMgr, Resolver, + ProcessAllSections, + std::move(NotifyStubEmitted)); else if (Obj.isMachO()) - Dyld = createRuntimeDyldMachO( - static_cast<Triple::ArchType>(Obj.getArch()), MemMgr, Resolver, - ProcessAllSections, std::move(NotifyStubEmitted)); + Dyld = createRuntimeDyldMachO(Obj.getArch(), MemMgr, Resolver, + ProcessAllSections, + std::move(NotifyStubEmitted)); else if (Obj.isCOFF()) - Dyld = createRuntimeDyldCOFF( - static_cast<Triple::ArchType>(Obj.getArch()), MemMgr, Resolver, - ProcessAllSections, std::move(NotifyStubEmitted)); + Dyld = createRuntimeDyldCOFF(Obj.getArch(), MemMgr, Resolver, + ProcessAllSections, + std::move(NotifyStubEmitted)); else report_fatal_error("Incompatible object format!"); } diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 840ca83..7928772 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -2617,7 +2617,7 @@ void OpenMPIRBuilder::emitReductionListCopy( Expected<Function *> OpenMPIRBuilder::emitInterWarpCopyFunction( const LocationDescription &Loc, ArrayRef<ReductionInfo> ReductionInfos, AttributeList FuncAttrs) { - IRBuilder<>::InsertPointGuard IPG(Builder); + InsertPointTy SavedIP = Builder.saveIP(); LLVMContext &Ctx = M.getContext(); FunctionType *FuncTy = FunctionType::get( Builder.getVoidTy(), {Builder.getPtrTy(), Builder.getInt32Ty()}, @@ -2630,7 +2630,6 @@ Expected<Function *> OpenMPIRBuilder::emitInterWarpCopyFunction( WcFunc->addParamAttr(1, Attribute::NoUndef); BasicBlock *EntryBB = BasicBlock::Create(M.getContext(), "entry", WcFunc); Builder.SetInsertPoint(EntryBB); - Builder.SetCurrentDebugLocation(llvm::DebugLoc()); // ReduceList: thread local Reduce list. // At the stage of the computation when this function is called, partially @@ -2845,6 +2844,7 @@ Expected<Function *> OpenMPIRBuilder::emitInterWarpCopyFunction( } Builder.CreateRetVoid(); + Builder.restoreIP(SavedIP); return WcFunc; } @@ -2853,7 +2853,6 @@ Function *OpenMPIRBuilder::emitShuffleAndReduceFunction( ArrayRef<ReductionInfo> ReductionInfos, Function *ReduceFn, AttributeList FuncAttrs) { LLVMContext &Ctx = M.getContext(); - IRBuilder<>::InsertPointGuard IPG(Builder); FunctionType *FuncTy = FunctionType::get(Builder.getVoidTy(), {Builder.getPtrTy(), Builder.getInt16Ty(), @@ -2872,7 +2871,6 @@ Function *OpenMPIRBuilder::emitShuffleAndReduceFunction( SarFunc->addParamAttr(3, Attribute::SExt); BasicBlock *EntryBB = BasicBlock::Create(M.getContext(), "entry", SarFunc); Builder.SetInsertPoint(EntryBB); - Builder.SetCurrentDebugLocation(llvm::DebugLoc()); // Thread local Reduce list used to host the values of data to be reduced. Argument *ReduceListArg = SarFunc->getArg(0); @@ -3019,7 +3017,7 @@ Function *OpenMPIRBuilder::emitShuffleAndReduceFunction( Function *OpenMPIRBuilder::emitListToGlobalCopyFunction( ArrayRef<ReductionInfo> ReductionInfos, Type *ReductionsBufferTy, AttributeList FuncAttrs) { - IRBuilder<>::InsertPointGuard IPG(Builder); + OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP(); LLVMContext &Ctx = M.getContext(); FunctionType *FuncTy = FunctionType::get( Builder.getVoidTy(), @@ -3035,7 +3033,6 @@ Function *OpenMPIRBuilder::emitListToGlobalCopyFunction( BasicBlock *EntryBlock = BasicBlock::Create(Ctx, "entry", LtGCFunc); Builder.SetInsertPoint(EntryBlock); - Builder.SetCurrentDebugLocation(llvm::DebugLoc()); // Buffer: global reduction buffer. Argument *BufferArg = LtGCFunc->getArg(0); @@ -3123,13 +3120,14 @@ Function *OpenMPIRBuilder::emitListToGlobalCopyFunction( } Builder.CreateRetVoid(); + Builder.restoreIP(OldIP); return LtGCFunc; } Function *OpenMPIRBuilder::emitListToGlobalReduceFunction( ArrayRef<ReductionInfo> ReductionInfos, Function *ReduceFn, Type *ReductionsBufferTy, AttributeList FuncAttrs) { - IRBuilder<>::InsertPointGuard IPG(Builder); + OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP(); LLVMContext &Ctx = M.getContext(); FunctionType *FuncTy = FunctionType::get( Builder.getVoidTy(), @@ -3145,7 +3143,6 @@ Function *OpenMPIRBuilder::emitListToGlobalReduceFunction( BasicBlock *EntryBlock = BasicBlock::Create(Ctx, "entry", LtGRFunc); Builder.SetInsertPoint(EntryBlock); - Builder.SetCurrentDebugLocation(llvm::DebugLoc()); // Buffer: global reduction buffer. Argument *BufferArg = LtGRFunc->getArg(0); @@ -3206,13 +3203,14 @@ Function *OpenMPIRBuilder::emitListToGlobalReduceFunction( Builder.CreateCall(ReduceFn, {LocalReduceListAddrCast, ReduceList}) ->addFnAttr(Attribute::NoUnwind); Builder.CreateRetVoid(); + Builder.restoreIP(OldIP); return LtGRFunc; } Function *OpenMPIRBuilder::emitGlobalToListCopyFunction( ArrayRef<ReductionInfo> ReductionInfos, Type *ReductionsBufferTy, AttributeList FuncAttrs) { - IRBuilder<>::InsertPointGuard IPG(Builder); + OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP(); LLVMContext &Ctx = M.getContext(); FunctionType *FuncTy = FunctionType::get( Builder.getVoidTy(), @@ -3228,7 +3226,6 @@ Function *OpenMPIRBuilder::emitGlobalToListCopyFunction( BasicBlock *EntryBlock = BasicBlock::Create(Ctx, "entry", LtGCFunc); Builder.SetInsertPoint(EntryBlock); - Builder.SetCurrentDebugLocation(llvm::DebugLoc()); // Buffer: global reduction buffer. Argument *BufferArg = LtGCFunc->getArg(0); @@ -3314,13 +3311,14 @@ Function *OpenMPIRBuilder::emitGlobalToListCopyFunction( } Builder.CreateRetVoid(); + Builder.restoreIP(OldIP); return LtGCFunc; } Function *OpenMPIRBuilder::emitGlobalToListReduceFunction( ArrayRef<ReductionInfo> ReductionInfos, Function *ReduceFn, Type *ReductionsBufferTy, AttributeList FuncAttrs) { - IRBuilder<>::InsertPointGuard IPG(Builder); + OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP(); LLVMContext &Ctx = M.getContext(); auto *FuncTy = FunctionType::get( Builder.getVoidTy(), @@ -3336,7 +3334,6 @@ Function *OpenMPIRBuilder::emitGlobalToListReduceFunction( BasicBlock *EntryBlock = BasicBlock::Create(Ctx, "entry", LtGRFunc); Builder.SetInsertPoint(EntryBlock); - Builder.SetCurrentDebugLocation(llvm::DebugLoc()); // Buffer: global reduction buffer. Argument *BufferArg = LtGRFunc->getArg(0); @@ -3397,6 +3394,7 @@ Function *OpenMPIRBuilder::emitGlobalToListReduceFunction( Builder.CreateCall(ReduceFn, {ReduceList, ReductionList}) ->addFnAttr(Attribute::NoUnwind); Builder.CreateRetVoid(); + Builder.restoreIP(OldIP); return LtGRFunc; } @@ -3409,7 +3407,6 @@ std::string OpenMPIRBuilder::getReductionFuncName(StringRef Name) const { Expected<Function *> OpenMPIRBuilder::createReductionFunction( StringRef ReducerName, ArrayRef<ReductionInfo> ReductionInfos, ReductionGenCBKind ReductionGenCBKind, AttributeList FuncAttrs) { - IRBuilder<>::InsertPointGuard IPG(Builder); auto *FuncTy = FunctionType::get(Builder.getVoidTy(), {Builder.getPtrTy(), Builder.getPtrTy()}, /* IsVarArg */ false); @@ -3422,7 +3419,6 @@ Expected<Function *> OpenMPIRBuilder::createReductionFunction( BasicBlock *EntryBB = BasicBlock::Create(M.getContext(), "entry", ReductionFunc); Builder.SetInsertPoint(EntryBB); - Builder.SetCurrentDebugLocation(llvm::DebugLoc()); // Need to alloca memory here and deal with the pointers before getting // LHS/RHS pointers out @@ -3750,12 +3746,10 @@ static Error populateReductionFunction( Function *ReductionFunc, ArrayRef<OpenMPIRBuilder::ReductionInfo> ReductionInfos, IRBuilder<> &Builder, ArrayRef<bool> IsByRef, bool IsGPU) { - IRBuilder<>::InsertPointGuard IPG(Builder); Module *Module = ReductionFunc->getParent(); BasicBlock *ReductionFuncBlock = BasicBlock::Create(Module->getContext(), "", ReductionFunc); Builder.SetInsertPoint(ReductionFuncBlock); - Builder.SetCurrentDebugLocation(llvm::DebugLoc()); Value *LHSArrayPtr = nullptr; Value *RHSArrayPtr = nullptr; if (IsGPU) { diff --git a/llvm/lib/IR/DiagnosticInfo.cpp b/llvm/lib/IR/DiagnosticInfo.cpp index b94dcac..4f37624 100644 --- a/llvm/lib/IR/DiagnosticInfo.cpp +++ b/llvm/lib/IR/DiagnosticInfo.cpp @@ -81,6 +81,10 @@ void DiagnosticInfoInlineAsm::print(DiagnosticPrinter &DP) const { DP << " at line " << getLocCookie(); } +void DiagnosticInfoLegalizationFailure::print(DiagnosticPrinter &DP) const { + DP << getLocationStr() << ": " << getMsgStr(); +} + DiagnosticInfoRegAllocFailure::DiagnosticInfoRegAllocFailure( const Twine &MsgStr, const Function &Fn, const DiagnosticLocation &DL, DiagnosticSeverity Severity) diff --git a/llvm/lib/MC/CMakeLists.txt b/llvm/lib/MC/CMakeLists.txt index d662c42..18a85b3 100644 --- a/llvm/lib/MC/CMakeLists.txt +++ b/llvm/lib/MC/CMakeLists.txt @@ -43,13 +43,7 @@ add_llvm_component_library(LLVMMC MCRegisterInfo.cpp MCSchedule.cpp MCSection.cpp - MCSectionCOFF.cpp - MCSectionDXContainer.cpp - MCSectionELF.cpp - MCSectionGOFF.cpp MCSectionMachO.cpp - MCSectionWasm.cpp - MCSectionXCOFF.cpp MCStreamer.cpp MCSPIRVStreamer.cpp MCSubtargetInfo.cpp diff --git a/llvm/lib/MC/ELFObjectWriter.cpp b/llvm/lib/MC/ELFObjectWriter.cpp index 4510d4db..ae8dffc 100644 --- a/llvm/lib/MC/ELFObjectWriter.cpp +++ b/llvm/lib/MC/ELFObjectWriter.cpp @@ -1087,7 +1087,8 @@ uint64_t ELFWriter::writeObject() { // Remember the offset into the file for this section. const uint64_t SecStart = align(RelSection->getAlign()); - writeRelocations(cast<MCSectionELF>(*RelSection->getLinkedToSection())); + writeRelocations( + static_cast<const MCSectionELF &>(*RelSection->getLinkedToSection())); uint64_t SecEnd = W.OS.tell(); RelSection->setOffsets(SecStart, SecEnd); @@ -1260,7 +1261,7 @@ bool ELFObjectWriter::useSectionSymbol(const MCValue &Val, // that it pointed to another string and subtracting 42 at runtime will // produce the wrong value. if (Sym->isInSection()) { - auto &Sec = cast<MCSectionELF>(Sym->getSection()); + auto &Sec = static_cast<const MCSectionELF &>(Sym->getSection()); unsigned Flags = Sec.getFlags(); if (Flags & ELF::SHF_MERGE) { if (C != 0) @@ -1312,13 +1313,14 @@ bool ELFObjectWriter::checkRelocation(SMLoc Loc, const MCSectionELF *From, void ELFObjectWriter::recordRelocation(const MCFragment &F, const MCFixup &Fixup, MCValue Target, uint64_t &FixedValue) { - const MCSectionELF &Section = cast<MCSectionELF>(*F.getParent()); + auto &Section = static_cast<const MCSectionELF &>(*F.getParent()); MCContext &Ctx = getContext(); const auto *SymA = cast_or_null<MCSymbolELF>(Target.getAddSym()); - const MCSectionELF *SecA = (SymA && SymA->isInSection()) - ? cast<MCSectionELF>(&SymA->getSection()) - : nullptr; + const MCSectionELF *SecA = + (SymA && SymA->isInSection()) + ? static_cast<const MCSectionELF *>(&SymA->getSection()) + : nullptr; if (DwoOS && !checkRelocation(Fixup.getLoc(), &Section, SecA)) return; diff --git a/llvm/lib/MC/GOFFObjectWriter.cpp b/llvm/lib/MC/GOFFObjectWriter.cpp index 1871f5f..88188f3 100644 --- a/llvm/lib/MC/GOFFObjectWriter.cpp +++ b/llvm/lib/MC/GOFFObjectWriter.cpp @@ -336,7 +336,7 @@ void GOFFWriter::defineSymbols() { unsigned Ordinal = 0; // Process all sections. for (MCSection &S : Asm) { - auto &Section = cast<MCSectionGOFF>(S); + auto &Section = static_cast<MCSectionGOFF &>(S); Section.setOrdinal(++Ordinal); defineSectionSymbols(Section); } diff --git a/llvm/lib/MC/MCAsmInfoCOFF.cpp b/llvm/lib/MC/MCAsmInfoCOFF.cpp index 0b8781c..54717df 100644 --- a/llvm/lib/MC/MCAsmInfoCOFF.cpp +++ b/llvm/lib/MC/MCAsmInfoCOFF.cpp @@ -12,7 +12,13 @@ //===----------------------------------------------------------------------===// #include "llvm/MC/MCAsmInfoCOFF.h" +#include "llvm/BinaryFormat/COFF.h" #include "llvm/MC/MCDirectives.h" +#include "llvm/MC/MCSection.h" +#include "llvm/MC/MCSectionCOFF.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/raw_ostream.h" +#include <cassert> using namespace llvm; @@ -49,6 +55,10 @@ MCAsmInfoCOFF::MCAsmInfoCOFF() { HasCOFFComdatConstants = true; } +bool MCAsmInfoCOFF::useCodeAlign(const MCSection &Sec) const { + return Sec.isText(); +} + void MCAsmInfoMicrosoft::anchor() {} MCAsmInfoMicrosoft::MCAsmInfoMicrosoft() = default; @@ -64,3 +74,101 @@ MCAsmInfoGNUCOFF::MCAsmInfoGNUCOFF() { // We don't create constants in comdat sections for MinGW. HasCOFFComdatConstants = false; } + +bool MCSectionCOFF::shouldOmitSectionDirective(StringRef Name) const { + if (COMDATSymbol || isUnique()) + return false; + + // FIXME: Does .section .bss/.data/.text work everywhere?? + if (Name == ".text" || Name == ".data" || Name == ".bss") + return true; + + return false; +} + +void MCSectionCOFF::setSelection(int Selection) const { + assert(Selection != 0 && "invalid COMDAT selection type"); + this->Selection = Selection; + Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT; +} + +void MCAsmInfoCOFF::printSwitchToSection(const MCSection &Section, uint32_t, + const Triple &T, + raw_ostream &OS) const { + auto &Sec = static_cast<const MCSectionCOFF &>(Section); + // standard sections don't require the '.section' + if (Sec.shouldOmitSectionDirective(Sec.getName())) { + OS << '\t' << Sec.getName() << '\n'; + return; + } + + OS << "\t.section\t" << Sec.getName() << ",\""; + if (Sec.getCharacteristics() & COFF::IMAGE_SCN_CNT_INITIALIZED_DATA) + OS << 'd'; + if (Sec.getCharacteristics() & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA) + OS << 'b'; + if (Sec.getCharacteristics() & COFF::IMAGE_SCN_MEM_EXECUTE) + OS << 'x'; + if (Sec.getCharacteristics() & COFF::IMAGE_SCN_MEM_WRITE) + OS << 'w'; + else if (Sec.getCharacteristics() & COFF::IMAGE_SCN_MEM_READ) + OS << 'r'; + else + OS << 'y'; + if (Sec.getCharacteristics() & COFF::IMAGE_SCN_LNK_REMOVE) + OS << 'n'; + if (Sec.getCharacteristics() & COFF::IMAGE_SCN_MEM_SHARED) + OS << 's'; + if ((Sec.getCharacteristics() & COFF::IMAGE_SCN_MEM_DISCARDABLE) && + !Sec.isImplicitlyDiscardable(Sec.getName())) + OS << 'D'; + if (Sec.getCharacteristics() & COFF::IMAGE_SCN_LNK_INFO) + OS << 'i'; + OS << '"'; + + // unique should be tail of .section directive. + if (Sec.isUnique() && !Sec.COMDATSymbol) + OS << ",unique," << Sec.UniqueID; + + if (Sec.getCharacteristics() & COFF::IMAGE_SCN_LNK_COMDAT) { + if (Sec.COMDATSymbol) + OS << ","; + else + OS << "\n\t.linkonce\t"; + switch (Sec.Selection) { + case COFF::IMAGE_COMDAT_SELECT_NODUPLICATES: + OS << "one_only"; + break; + case COFF::IMAGE_COMDAT_SELECT_ANY: + OS << "discard"; + break; + case COFF::IMAGE_COMDAT_SELECT_SAME_SIZE: + OS << "same_size"; + break; + case COFF::IMAGE_COMDAT_SELECT_EXACT_MATCH: + OS << "same_contents"; + break; + case COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE: + OS << "associative"; + break; + case COFF::IMAGE_COMDAT_SELECT_LARGEST: + OS << "largest"; + break; + case COFF::IMAGE_COMDAT_SELECT_NEWEST: + OS << "newest"; + break; + default: + assert(false && "unsupported COFF selection type"); + break; + } + if (Sec.COMDATSymbol) { + OS << ","; + Sec.COMDATSymbol->print(OS, this); + } + } + + if (Sec.isUnique() && Sec.COMDATSymbol) + OS << ",unique," << Sec.UniqueID; + + OS << '\n'; +} diff --git a/llvm/lib/MC/MCAsmInfoDarwin.cpp b/llvm/lib/MC/MCAsmInfoDarwin.cpp index 9cba775..e156fa0 100644 --- a/llvm/lib/MC/MCAsmInfoDarwin.cpp +++ b/llvm/lib/MC/MCAsmInfoDarwin.cpp @@ -85,3 +85,8 @@ MCAsmInfoDarwin::MCAsmInfoDarwin() { DwarfUsesRelocationsAcrossSections = false; SetDirectiveSuppressesReloc = true; } + +bool MCAsmInfoDarwin::useCodeAlign(const MCSection &Sec) const { + return static_cast<const MCSectionMachO &>(Sec).hasAttribute( + MachO::S_ATTR_PURE_INSTRUCTIONS); +} diff --git a/llvm/lib/MC/MCAsmInfoELF.cpp b/llvm/lib/MC/MCAsmInfoELF.cpp index 7eb89ef..cdae9d7 100644 --- a/llvm/lib/MC/MCAsmInfoELF.cpp +++ b/llvm/lib/MC/MCAsmInfoELF.cpp @@ -12,9 +12,16 @@ //===----------------------------------------------------------------------===// #include "llvm/MC/MCAsmInfoELF.h" +#include "llvm/ADT/Twine.h" #include "llvm/BinaryFormat/ELF.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" #include "llvm/MC/MCSectionELF.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/TargetParser/Triple.h" +#include <cassert> using namespace llvm; @@ -28,9 +35,198 @@ MCSection *MCAsmInfoELF::getNonexecutableStackSection(MCContext &Ctx) const { return Ctx.getELFSection(".note.GNU-stack", ELF::SHT_PROGBITS, 0); } +bool MCAsmInfoELF::useCodeAlign(const MCSection &Sec) const { + return static_cast<const MCSectionELF &>(Sec).getFlags() & ELF::SHF_EXECINSTR; +} + MCAsmInfoELF::MCAsmInfoELF() { HasIdentDirective = true; WeakRefDirective = "\t.weak\t"; PrivateGlobalPrefix = ".L"; PrivateLabelPrefix = ".L"; } + +static void printName(raw_ostream &OS, StringRef Name) { + if (Name.find_first_not_of("0123456789_." + "abcdefghijklmnopqrstuvwxyz" + "ABCDEFGHIJKLMNOPQRSTUVWXYZ") == Name.npos) { + OS << Name; + return; + } + OS << '"'; + for (const char *B = Name.begin(), *E = Name.end(); B < E; ++B) { + if (*B == '"') // Unquoted " + OS << "\\\""; + else if (*B != '\\') // Neither " or backslash + OS << *B; + else if (B + 1 == E) // Trailing backslash + OS << "\\\\"; + else { + OS << B[0] << B[1]; // Quoted character + ++B; + } + } + OS << '"'; +} + +void MCAsmInfoELF::printSwitchToSection(const MCSection &Section, + uint32_t Subsection, const Triple &T, + raw_ostream &OS) const { + auto &Sec = static_cast<const MCSectionELF &>(Section); + if (!Sec.isUnique() && shouldOmitSectionDirective(Sec.getName())) { + OS << '\t' << Sec.getName(); + if (Subsection) + OS << '\t' << Subsection; + OS << '\n'; + return; + } + + OS << "\t.section\t"; + printName(OS, Sec.getName()); + + // Handle the weird solaris syntax if desired. + if (usesSunStyleELFSectionSwitchSyntax() && !(Sec.Flags & ELF::SHF_MERGE)) { + if (Sec.Flags & ELF::SHF_ALLOC) + OS << ",#alloc"; + if (Sec.Flags & ELF::SHF_EXECINSTR) + OS << ",#execinstr"; + if (Sec.Flags & ELF::SHF_WRITE) + OS << ",#write"; + if (Sec.Flags & ELF::SHF_EXCLUDE) + OS << ",#exclude"; + if (Sec.Flags & ELF::SHF_TLS) + OS << ",#tls"; + OS << '\n'; + return; + } + + OS << ",\""; + if (Sec.Flags & ELF::SHF_ALLOC) + OS << 'a'; + if (Sec.Flags & ELF::SHF_EXCLUDE) + OS << 'e'; + if (Sec.Flags & ELF::SHF_EXECINSTR) + OS << 'x'; + if (Sec.Flags & ELF::SHF_WRITE) + OS << 'w'; + if (Sec.Flags & ELF::SHF_MERGE) + OS << 'M'; + if (Sec.Flags & ELF::SHF_STRINGS) + OS << 'S'; + if (Sec.Flags & ELF::SHF_TLS) + OS << 'T'; + if (Sec.Flags & ELF::SHF_LINK_ORDER) + OS << 'o'; + if (Sec.Flags & ELF::SHF_GROUP) + OS << 'G'; + if (Sec.Flags & ELF::SHF_GNU_RETAIN) + OS << 'R'; + + // If there are os-specific flags, print them. + if (T.isOSSolaris()) + if (Sec.Flags & ELF::SHF_SUNW_NODISCARD) + OS << 'R'; + + // If there are tarSec.get-specific flags, print them. + Triple::ArchType Arch = T.getArch(); + if (Arch == Triple::xcore) { + if (Sec.Flags & ELF::XCORE_SHF_CP_SECTION) + OS << 'c'; + if (Sec.Flags & ELF::XCORE_SHF_DP_SECTION) + OS << 'd'; + } else if (T.isARM() || T.isThumb()) { + if (Sec.Flags & ELF::SHF_ARM_PURECODE) + OS << 'y'; + } else if (T.isAArch64()) { + if (Sec.Flags & ELF::SHF_AARCH64_PURECODE) + OS << 'y'; + } else if (Arch == Triple::hexagon) { + if (Sec.Flags & ELF::SHF_HEX_GPREL) + OS << 's'; + } else if (Arch == Triple::x86_64) { + if (Sec.Flags & ELF::SHF_X86_64_LARGE) + OS << 'l'; + } + + OS << '"'; + + OS << ','; + + // If comment string is '@', e.g. as on ARM - use '%' instead + if (getCommentString()[0] == '@') + OS << '%'; + else + OS << '@'; + + if (Sec.Type == ELF::SHT_INIT_ARRAY) + OS << "init_array"; + else if (Sec.Type == ELF::SHT_FINI_ARRAY) + OS << "fini_array"; + else if (Sec.Type == ELF::SHT_PREINIT_ARRAY) + OS << "preinit_array"; + else if (Sec.Type == ELF::SHT_NOBITS) + OS << "nobits"; + else if (Sec.Type == ELF::SHT_NOTE) + OS << "note"; + else if (Sec.Type == ELF::SHT_PROGBITS) + OS << "progbits"; + else if (Sec.Type == ELF::SHT_X86_64_UNWIND) + OS << "unwind"; + else if (Sec.Type == ELF::SHT_MIPS_DWARF) + // Print hex value of the flag while we do not have + // any standard symbolic representation of the flag. + OS << "0x7000001e"; + else if (Sec.Type == ELF::SHT_LLVM_ODRTAB) + OS << "llvm_odrtab"; + else if (Sec.Type == ELF::SHT_LLVM_LINKER_OPTIONS) + OS << "llvm_linker_options"; + else if (Sec.Type == ELF::SHT_LLVM_CALL_GRAPH_PROFILE) + OS << "llvm_call_graph_profile"; + else if (Sec.Type == ELF::SHT_LLVM_DEPENDENT_LIBRARIES) + OS << "llvm_dependent_libraries"; + else if (Sec.Type == ELF::SHT_LLVM_SYMPART) + OS << "llvm_sympart"; + else if (Sec.Type == ELF::SHT_LLVM_BB_ADDR_MAP) + OS << "llvm_bb_addr_map"; + else if (Sec.Type == ELF::SHT_LLVM_OFFLOADING) + OS << "llvm_offloading"; + else if (Sec.Type == ELF::SHT_LLVM_LTO) + OS << "llvm_lto"; + else if (Sec.Type == ELF::SHT_LLVM_JT_SIZES) + OS << "llvm_jt_sizes"; + else if (Sec.Type == ELF::SHT_LLVM_CFI_JUMP_TABLE) + OS << "llvm_cfi_jump_table"; + else + OS << "0x" << Twine::utohexstr(Sec.Type); + + if (Sec.EntrySize) { + assert((Sec.Flags & ELF::SHF_MERGE) || + Sec.Type == ELF::SHT_LLVM_CFI_JUMP_TABLE); + OS << "," << Sec.EntrySize; + } + + if (Sec.Flags & ELF::SHF_LINK_ORDER) { + OS << ","; + if (Sec.LinkedToSym) + printName(OS, Sec.LinkedToSym->getName()); + else + OS << '0'; + } + + if (Sec.Flags & ELF::SHF_GROUP) { + OS << ","; + printName(OS, Sec.Group.getPointer()->getName()); + if (Sec.isComdat()) + OS << ",comdat"; + } + + if (Sec.isUnique()) + OS << ",unique," << Sec.UniqueID; + + OS << '\n'; + + if (Subsection) { + OS << "\t.subsection\t" << Subsection; + OS << '\n'; + } +} diff --git a/llvm/lib/MC/MCAsmInfoGOFF.cpp b/llvm/lib/MC/MCAsmInfoGOFF.cpp index 3c81a46..0a5d1927 100644 --- a/llvm/lib/MC/MCAsmInfoGOFF.cpp +++ b/llvm/lib/MC/MCAsmInfoGOFF.cpp @@ -13,11 +13,12 @@ //===----------------------------------------------------------------------===// #include "llvm/MC/MCAsmInfoGOFF.h" +#include "llvm/BinaryFormat/GOFF.h" +#include "llvm/MC/MCSectionGOFF.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; -void MCAsmInfoGOFF::anchor() {} - MCAsmInfoGOFF::MCAsmInfoGOFF() { Data64bitsDirective = "\t.quad\t"; HasDotTypeDotSizeDirective = false; @@ -25,3 +26,136 @@ MCAsmInfoGOFF::MCAsmInfoGOFF() { PrivateLabelPrefix = "L#"; ZeroDirective = "\t.space\t"; } + +static void emitCATTR(raw_ostream &OS, StringRef Name, GOFF::ESDRmode Rmode, + GOFF::ESDAlignment Alignment, + GOFF::ESDLoadingBehavior LoadBehavior, + GOFF::ESDExecutable Executable, bool IsReadOnly, + uint32_t SortKey, uint8_t FillByteValue, + StringRef PartName) { + OS << Name << " CATTR "; + OS << "ALIGN(" << static_cast<unsigned>(Alignment) << ")," + << "FILL(" << static_cast<unsigned>(FillByteValue) << ")"; + switch (LoadBehavior) { + case GOFF::ESD_LB_Deferred: + OS << ",DEFLOAD"; + break; + case GOFF::ESD_LB_NoLoad: + OS << ",NOLOAD"; + break; + default: + break; + } + switch (Executable) { + case GOFF::ESD_EXE_CODE: + OS << ",EXECUTABLE"; + break; + case GOFF::ESD_EXE_DATA: + OS << ",NOTEXECUTABLE"; + break; + default: + break; + } + if (IsReadOnly) + OS << ",READONLY"; + if (Rmode != GOFF::ESD_RMODE_None) { + OS << ','; + OS << "RMODE("; + switch (Rmode) { + case GOFF::ESD_RMODE_24: + OS << "24"; + break; + case GOFF::ESD_RMODE_31: + OS << "31"; + break; + case GOFF::ESD_RMODE_64: + OS << "64"; + break; + case GOFF::ESD_RMODE_None: + break; + } + OS << ')'; + } + if (SortKey) + OS << ",PRIORITY(" << SortKey << ")"; + if (!PartName.empty()) + OS << ",PART(" << PartName << ")"; + OS << '\n'; +} + +static void emitXATTR(raw_ostream &OS, StringRef Name, + GOFF::ESDLinkageType Linkage, + GOFF::ESDExecutable Executable, + GOFF::ESDBindingScope BindingScope) { + OS << Name << " XATTR "; + OS << "LINKAGE(" << (Linkage == GOFF::ESD_LT_OS ? "OS" : "XPLINK") << "),"; + if (Executable != GOFF::ESD_EXE_Unspecified) + OS << "REFERENCE(" << (Executable == GOFF::ESD_EXE_CODE ? "CODE" : "DATA") + << "),"; + if (BindingScope != GOFF::ESD_BSC_Unspecified) { + OS << "SCOPE("; + switch (BindingScope) { + case GOFF::ESD_BSC_Section: + OS << "SECTION"; + break; + case GOFF::ESD_BSC_Module: + OS << "MODULE"; + break; + case GOFF::ESD_BSC_Library: + OS << "LIBRARY"; + break; + case GOFF::ESD_BSC_ImportExport: + OS << "EXPORT"; + break; + default: + break; + } + OS << ')'; + } + OS << '\n'; +} + +void MCAsmInfoGOFF::printSwitchToSection(const MCSection &Section, + uint32_t Subsection, const Triple &T, + raw_ostream &OS) const { + auto &Sec = + const_cast<MCSectionGOFF &>(static_cast<const MCSectionGOFF &>(Section)); + switch (Sec.SymbolType) { + case GOFF::ESD_ST_SectionDefinition: { + OS << Sec.getName() << " CSECT\n"; + Sec.Emitted = true; + break; + } + case GOFF::ESD_ST_ElementDefinition: { + printSwitchToSection(*Sec.getParent(), Subsection, T, OS); + if (!Sec.Emitted) { + emitCATTR(OS, Sec.getName(), Sec.EDAttributes.Rmode, + Sec.EDAttributes.Alignment, Sec.EDAttributes.LoadBehavior, + GOFF::ESD_EXE_Unspecified, Sec.EDAttributes.IsReadOnly, 0, + Sec.EDAttributes.FillByteValue, StringRef()); + Sec.Emitted = true; + } else + OS << Sec.getName() << " CATTR\n"; + break; + } + case GOFF::ESD_ST_PartReference: { + MCSectionGOFF *ED = Sec.getParent(); + printSwitchToSection(*ED->getParent(), Subsection, T, OS); + if (!Sec.Emitted) { + emitCATTR(OS, ED->getName(), ED->getEDAttributes().Rmode, + ED->EDAttributes.Alignment, ED->EDAttributes.LoadBehavior, + Sec.PRAttributes.Executable, ED->EDAttributes.IsReadOnly, + Sec.PRAttributes.SortKey, ED->EDAttributes.FillByteValue, + Sec.getName()); + emitXATTR(OS, Sec.getName(), Sec.PRAttributes.Linkage, + Sec.PRAttributes.Executable, Sec.PRAttributes.BindingScope); + ED->Emitted = true; + Sec.Emitted = true; + } else + OS << ED->getName() << " CATTR PART(" << Sec.getName() << ")\n"; + break; + } + default: + llvm_unreachable("Wrong section type"); + } +} diff --git a/llvm/lib/MC/MCAsmInfoWasm.cpp b/llvm/lib/MC/MCAsmInfoWasm.cpp index ce6ec7e..5e44f48 100644 --- a/llvm/lib/MC/MCAsmInfoWasm.cpp +++ b/llvm/lib/MC/MCAsmInfoWasm.cpp @@ -12,9 +12,11 @@ //===----------------------------------------------------------------------===// #include "llvm/MC/MCAsmInfoWasm.h" -using namespace llvm; +#include "llvm/MC/MCSectionWasm.h" +#include "llvm/MC/MCSymbolWasm.h" +#include "llvm/Support/raw_ostream.h" -void MCAsmInfoWasm::anchor() {} +using namespace llvm; MCAsmInfoWasm::MCAsmInfoWasm() { HasIdentDirective = true; @@ -23,3 +25,80 @@ MCAsmInfoWasm::MCAsmInfoWasm() { PrivateGlobalPrefix = ".L"; PrivateLabelPrefix = ".L"; } + +static void printName(raw_ostream &OS, StringRef Name) { + if (Name.find_first_not_of("0123456789_." + "abcdefghijklmnopqrstuvwxyz" + "ABCDEFGHIJKLMNOPQRSTUVWXYZ") == Name.npos) { + OS << Name; + return; + } + OS << '"'; + for (const char *B = Name.begin(), *E = Name.end(); B < E; ++B) { + if (*B == '"') // Unquoted " + OS << "\\\""; + else if (*B != '\\') // Neither " or backslash + OS << *B; + else if (B + 1 == E) // Trailing backslash + OS << "\\\\"; + else { + OS << B[0] << B[1]; // Quoted character + ++B; + } + } + OS << '"'; +} + +void MCAsmInfoWasm::printSwitchToSection(const MCSection &Section, + uint32_t Subsection, const Triple &T, + raw_ostream &OS) const { + auto &Sec = static_cast<const MCSectionWasm &>(Section); + if (shouldOmitSectionDirective(Sec.getName())) { + OS << '\t' << Sec.getName(); + if (Subsection) + OS << '\t' << Subsection; + OS << '\n'; + return; + } + + OS << "\t.section\t"; + printName(OS, Sec.getName()); + OS << ",\""; + + if (Sec.IsPassive) + OS << 'p'; + if (Sec.Group) + OS << 'G'; + if (Sec.SegmentFlags & wasm::WASM_SEG_FLAG_STRINGS) + OS << 'S'; + if (Sec.SegmentFlags & wasm::WASM_SEG_FLAG_TLS) + OS << 'T'; + if (Sec.SegmentFlags & wasm::WASM_SEG_FLAG_RETAIN) + OS << 'R'; + + OS << '"'; + + OS << ','; + + // If comment string is '@', e.g. as on ARM - use '%' instead + if (getCommentString()[0] == '@') + OS << '%'; + else + OS << '@'; + + // TODO: Print section type. + + if (Sec.Group) { + OS << ","; + printName(OS, Sec.Group->getName()); + OS << ",comdat"; + } + + if (Sec.isUnique()) + OS << ",unique," << Sec.UniqueID; + + OS << '\n'; + + if (Subsection) + OS << "\t.subsection\t" << Subsection << '\n'; +} diff --git a/llvm/lib/MC/MCAsmInfoXCOFF.cpp b/llvm/lib/MC/MCAsmInfoXCOFF.cpp index 6ef11ba..0403b44 100644 --- a/llvm/lib/MC/MCAsmInfoXCOFF.cpp +++ b/llvm/lib/MC/MCAsmInfoXCOFF.cpp @@ -8,7 +8,11 @@ #include "llvm/MC/MCAsmInfoXCOFF.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCSectionXCOFF.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -16,8 +20,6 @@ namespace llvm { extern cl::opt<cl::boolOrDefault> UseLEB128Directives; } -void MCAsmInfoXCOFF::anchor() {} - MCAsmInfoXCOFF::MCAsmInfoXCOFF() { IsAIX = true; IsLittleEndian = false; @@ -56,3 +58,121 @@ bool MCAsmInfoXCOFF::isAcceptableChar(char C) const { // any combination of these. return isAlnum(C) || C == '_' || C == '.'; } + +bool MCAsmInfoXCOFF::useCodeAlign(const MCSection &Sec) const { + return static_cast<const MCSectionXCOFF &>(Sec).getKind().isText(); +} + +MCSectionXCOFF::~MCSectionXCOFF() = default; + +void MCSectionXCOFF::printCsectDirective(raw_ostream &OS) const { + OS << "\t.csect " << QualName->getName() << "," << Log2(getAlign()) << '\n'; +} + +void MCAsmInfoXCOFF::printSwitchToSection(const MCSection &Section, uint32_t, + const Triple &T, + raw_ostream &OS) const { + auto &Sec = static_cast<const MCSectionXCOFF &>(Section); + if (Sec.getKind().isText()) { + if (Sec.getMappingClass() != XCOFF::XMC_PR) + report_fatal_error("Unhandled storage-mapping class for .text csect"); + + Sec.printCsectDirective(OS); + return; + } + + if (Sec.getKind().isReadOnly()) { + if (Sec.getMappingClass() != XCOFF::XMC_RO && + Sec.getMappingClass() != XCOFF::XMC_TD) + report_fatal_error("Unhandled storage-mapping class for .rodata csect."); + Sec.printCsectDirective(OS); + return; + } + + if (Sec.getKind().isReadOnlyWithRel()) { + if (Sec.getMappingClass() != XCOFF::XMC_RW && + Sec.getMappingClass() != XCOFF::XMC_RO && + Sec.getMappingClass() != XCOFF::XMC_TD) + report_fatal_error( + "Unexepected storage-mapping class for ReadOnlyWithRel kind"); + Sec.printCsectDirective(OS); + return; + } + + // Initialized TLS data. + if (Sec.getKind().isThreadData()) { + // We only expect XMC_TL here for initialized TLS data. + if (Sec.getMappingClass() != XCOFF::XMC_TL) + report_fatal_error("Unhandled storage-mapping class for .tdata csect."); + Sec.printCsectDirective(OS); + return; + } + + if (Sec.getKind().isData()) { + switch (Sec.getMappingClass()) { + case XCOFF::XMC_RW: + case XCOFF::XMC_DS: + case XCOFF::XMC_TD: + Sec.printCsectDirective(OS); + break; + case XCOFF::XMC_TC: + case XCOFF::XMC_TE: + break; + case XCOFF::XMC_TC0: + OS << "\t.toc\n"; + break; + default: + report_fatal_error("Unhandled storage-mapping class for .data csect."); + } + return; + } + + if (Sec.isCsect() && Sec.getMappingClass() == XCOFF::XMC_TD) { + // Common csect type (uninitialized storage) does not have to print + // csect directive for section switching unless it is local. + if (Sec.getKind().isCommon() && !Sec.getKind().isBSSLocal()) + return; + + assert(Sec.getKind().isBSS() && "Unexpected section kind for toc-data"); + Sec.printCsectDirective(OS); + return; + } + // Common csect type (uninitialized storage) does not have to print csect + // directive for section switching. + if (Sec.isCsect() && Sec.getCSectType() == XCOFF::XTY_CM) { + assert((Sec.getMappingClass() == XCOFF::XMC_RW || + Sec.getMappingClass() == XCOFF::XMC_BS || + Sec.getMappingClass() == XCOFF::XMC_UL) && + "Generated a storage-mapping class for a common/bss/tbss csect we " + "don't " + "understand how to switch to."); + // Common symbols and local zero-initialized symbols for TLS and Non-TLS are + // eligible for .bss/.tbss csect, getKind().isThreadBSS() is used to + // cover TLS common and zero-initialized local symbols since linkage type + // (in the GlobalVariable) is not accessible in this class. + assert((Sec.getKind().isBSSLocal() || Sec.getKind().isCommon() || + Sec.getKind().isThreadBSS()) && + "wrong symbol type for .bss/.tbss csect"); + // Don't have to print a directive for switching to section for commons + // and zero-initialized TLS data. The '.comm' and '.lcomm' directives of the + // variable will create the needed csect. + return; + } + + // Zero-initialized TLS data with weak or external linkage are not eligible to + // be put into common csect. + if (Sec.getKind().isThreadBSS()) { + Sec.printCsectDirective(OS); + return; + } + + // XCOFF debug sections. + if (Sec.getKind().isMetadata() && Sec.isDwarfSect()) { + OS << "\n\t.dwsect " << format("0x%" PRIx32, *Sec.getDwarfSubtypeFlags()) + << '\n'; + OS << Sec.getName() << ':' << '\n'; + return; + } + + report_fatal_error("Printing for this SectionKind is unimplemented."); +} diff --git a/llvm/lib/MC/MCAsmStreamer.cpp b/llvm/lib/MC/MCAsmStreamer.cpp index 7119ef4..da51da4 100644 --- a/llvm/lib/MC/MCAsmStreamer.cpp +++ b/llvm/lib/MC/MCAsmStreamer.cpp @@ -532,8 +532,8 @@ void MCAsmStreamer::switchSection(MCSection *Section, uint32_t Subsection) { if (MCTargetStreamer *TS = getTargetStreamer()) { TS->changeSection(Cur.first, Section, Subsection, OS); } else { - Section->printSwitchToSection(*MAI, getContext().getTargetTriple(), OS, - Subsection); + MAI->printSwitchToSection(*Section, Subsection, + getContext().getTargetTriple(), OS); } } MCStreamer::switchSection(Section, Subsection); @@ -543,7 +543,7 @@ bool MCAsmStreamer::popSection() { if (!MCStreamer::popSection()) return false; auto [Sec, Subsec] = getCurrentSection(); - Sec->printSwitchToSection(*MAI, getContext().getTargetTriple(), OS, Subsec); + MAI->printSwitchToSection(*Sec, Subsec, getContext().getTargetTriple(), OS); return true; } @@ -1105,7 +1105,7 @@ void MCAsmStreamer::emitZerofill(MCSection *Section, MCSymbol *Symbol, // Note: a .zerofill directive does not switch sections. OS << ".zerofill "; - assert(Section->getVariant() == MCSection::SV_MachO && + assert(getContext().getObjectFileType() == MCContext::IsMachO && ".zerofill is a Mach-O specific directive"); // This is a mach-o specific directive. @@ -1130,7 +1130,7 @@ void MCAsmStreamer::emitTBSSSymbol(MCSection *Section, MCSymbol *Symbol, // Instead of using the Section we'll just use the shortcut. - assert(Section->getVariant() == MCSection::SV_MachO && + assert(getContext().getObjectFileType() == MCContext::IsMachO && ".zerofill is a Mach-O specific directive"); // This is a mach-o specific directive and section. diff --git a/llvm/lib/MC/MCContext.cpp b/llvm/lib/MC/MCContext.cpp index c0448ae..39bf628 100644 --- a/llvm/lib/MC/MCContext.cpp +++ b/llvm/lib/MC/MCContext.cpp @@ -767,8 +767,8 @@ MCSectionCOFF *MCContext::getCOFFSection(StringRef Section, if (Selection != COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE && COMDATSymbol->isDefined() && (!COMDATSymbol->isInSection() || - cast<MCSectionCOFF>(COMDATSymbol->getSection()).getCOMDATSymbol() != - COMDATSymbol)) + static_cast<const MCSectionCOFF &>(COMDATSymbol->getSection()) + .getCOMDATSymbol() != COMDATSymbol)) reportError(SMLoc(), "invalid symbol redefinition"); } diff --git a/llvm/lib/MC/MCExpr.cpp b/llvm/lib/MC/MCExpr.cpp index dbb2fd1..c24c82d 100644 --- a/llvm/lib/MC/MCExpr.cpp +++ b/llvm/lib/MC/MCExpr.cpp @@ -346,17 +346,16 @@ static void attemptToFoldSymbolOffsetDifference(const MCAssembler *Asm, Displacement *= -1; } - // Track whether B is before a relaxable instruction and whether A is after - // a relaxable instruction. If SA and SB are separated by a linker-relaxable - // instruction, the difference cannot be resolved as it may be changed by - // the linker. + // Track whether B is before a relaxable instruction/alignment and whether A + // is after a relaxable instruction/alignment. If SA and SB are separated by + // a linker-relaxable instruction/alignment, the difference cannot be + // resolved as it may be changed by the linker. bool BBeforeRelax = false, AAfterRelax = false; for (auto F = FB; F; F = F->getNext()) { - auto DF = F->getKind() == MCFragment::FT_Data ? F : nullptr; - if (DF && DF->isLinkerRelaxable()) { - if (&*F != FB || SBOffset != DF->getContents().size()) + if (F && F->isLinkerRelaxable()) { + if (&*F != FB || SBOffset != F->getSize()) BBeforeRelax = true; - if (&*F != FA || SAOffset == DF->getContents().size()) + if (&*F != FA || SAOffset == F->getSize()) AAfterRelax = true; if (BBeforeRelax && AAfterRelax) return; @@ -370,17 +369,15 @@ static void attemptToFoldSymbolOffsetDifference(const MCAssembler *Asm, } int64_t Num; - if (DF) { - Displacement += DF->getContents().size(); - } else if (F->getKind() == MCFragment::FT_Relaxable && + if (F->getKind() == MCFragment::FT_Data) { + Displacement += F->getFixedSize(); + } else if ((F->getKind() == MCFragment::FT_Relaxable || + F->getKind() == MCFragment::FT_Align) && Asm->hasFinalLayout()) { // Before finishLayout, a relaxable fragment's size is indeterminate. // After layout, during relocation generation, it can be treated as a // data fragment. Displacement += F->getSize(); - } else if (F->getKind() == MCFragment::FT_Align && Layout && - F->isLinkerRelaxable()) { - Displacement += Asm->computeFragmentSize(*F); } else if (auto *FF = dyn_cast<MCFillFragment>(F); FF && FF->getNumValues().evaluateAsAbsolute(Num)) { Displacement += Num * FF->getValueSize(); diff --git a/llvm/lib/MC/MCFragment.cpp b/llvm/lib/MC/MCFragment.cpp index 3c395e5..6cbdf74 100644 --- a/llvm/lib/MC/MCFragment.cpp +++ b/llvm/lib/MC/MCFragment.cpp @@ -35,7 +35,7 @@ MCFragment::MCFragment(FragmentType Kind, bool HasInstructions) } const MCSymbol *MCFragment::getAtom() const { - return cast<MCSectionMachO>(Parent)->getAtom(LayoutOrder); + return static_cast<const MCSectionMachO *>(Parent)->getAtom(LayoutOrder); } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) diff --git a/llvm/lib/MC/MCMachOStreamer.cpp b/llvm/lib/MC/MCMachOStreamer.cpp index 4934815..1074669 100644 --- a/llvm/lib/MC/MCMachOStreamer.cpp +++ b/llvm/lib/MC/MCMachOStreamer.cpp @@ -443,13 +443,13 @@ void MCMachOStreamer::finishImpl() { // Set the fragment atom associations by tracking the last seen atom defining // symbol. for (MCSection &Sec : getAssembler()) { - cast<MCSectionMachO>(Sec).allocAtoms(); + static_cast<MCSectionMachO &>(Sec).allocAtoms(); const MCSymbol *CurrentAtom = nullptr; size_t I = 0; for (MCFragment &Frag : Sec) { if (const MCSymbol *Symbol = DefiningSymbolMap.lookup(&Frag)) CurrentAtom = Symbol; - cast<MCSectionMachO>(Sec).setAtom(I++, CurrentAtom); + static_cast<MCSectionMachO &>(Sec).setAtom(I++, CurrentAtom); } } diff --git a/llvm/lib/MC/MCObjectStreamer.cpp b/llvm/lib/MC/MCObjectStreamer.cpp index 37b6629..9c7b05b 100644 --- a/llvm/lib/MC/MCObjectStreamer.cpp +++ b/llvm/lib/MC/MCObjectStreamer.cpp @@ -280,11 +280,6 @@ void MCObjectStreamer::changeSection(MCSection *Section, uint32_t Subsection) { } } -void MCObjectStreamer::switchSectionNoPrint(MCSection *Section) { - MCStreamer::switchSectionNoPrint(Section); - changeSection(Section, 0); -} - void MCObjectStreamer::emitAssignment(MCSymbol *Symbol, const MCExpr *Value) { getAssembler().registerSymbol(*Symbol); MCStreamer::emitAssignment(Symbol, Value); @@ -345,31 +340,33 @@ void MCObjectStreamer::emitInstToData(const MCInst &Inst, MCFragment *F = getCurrentFragment(); // Append the instruction to the data fragment. - size_t FixupStartIndex = F->getFixups().size(); size_t CodeOffset = F->getContents().size(); SmallVector<MCFixup, 1> Fixups; getAssembler().getEmitter().encodeInstruction( Inst, F->getContentsForAppending(), Fixups, STI); F->doneAppending(); - if (!Fixups.empty()) - F->appendFixups(Fixups); F->setHasInstructions(STI); + if (Fixups.empty()) + return; bool MarkedLinkerRelaxable = false; - for (auto &Fixup : MutableArrayRef(F->getFixups()).slice(FixupStartIndex)) { + for (auto &Fixup : Fixups) { Fixup.setOffset(Fixup.getOffset() + CodeOffset); - if (!Fixup.isLinkerRelaxable()) + if (!Fixup.isLinkerRelaxable() || MarkedLinkerRelaxable) continue; - F->setLinkerRelaxable(); + MarkedLinkerRelaxable = true; + // Set the fragment's order within the subsection for use by + // MCAssembler::relaxAlign. + auto *Sec = F->getParent(); + if (!Sec->isLinkerRelaxable()) + Sec->setLinkerRelaxable(); // Do not add data after a linker-relaxable instruction. The difference // between a new label and a label at or before the linker-relaxable // instruction cannot be resolved at assemble-time. - if (!MarkedLinkerRelaxable) { - MarkedLinkerRelaxable = true; - getCurrentSectionOnly()->setLinkerRelaxable(); - newFragment(); - } + F->setLinkerRelaxable(); + newFragment(); } + F->appendFixups(Fixups); } void MCObjectStreamer::emitInstToFragment(const MCInst &Inst, diff --git a/llvm/lib/MC/MCParser/AsmParser.cpp b/llvm/lib/MC/MCParser/AsmParser.cpp index eda5e8c..9f64a98 100644 --- a/llvm/lib/MC/MCParser/AsmParser.cpp +++ b/llvm/lib/MC/MCParser/AsmParser.cpp @@ -3413,7 +3413,7 @@ bool AsmParser::parseDirectiveAlign(bool IsPow2, uint8_t ValueSize) { // Check whether we should use optimal code alignment for this .align // directive. - if (Section->useCodeAlign() && !HasFillExpr) { + if (MAI.useCodeAlign(*Section) && !HasFillExpr) { getStreamer().emitCodeAlignment( Align(Alignment), &getTargetParser().getSTI(), MaxBytesToFill); } else { diff --git a/llvm/lib/MC/MCParser/ELFAsmParser.cpp b/llvm/lib/MC/MCParser/ELFAsmParser.cpp index c7c3df3..2e251cc 100644 --- a/llvm/lib/MC/MCParser/ELFAsmParser.cpp +++ b/llvm/lib/MC/MCParser/ELFAsmParser.cpp @@ -644,8 +644,8 @@ EndStmt: } if (UseLastGroup) { - if (const MCSectionELF *Section = - cast_or_null<MCSectionELF>(getStreamer().getCurrentSectionOnly())) + if (auto *Section = static_cast<const MCSectionELF *>( + getStreamer().getCurrentSectionOnly())) if (const MCSymbol *Group = Section->getGroup()) { GroupName = Group->getName(); IsComdat = Section->isComdat(); diff --git a/llvm/lib/MC/MCParser/MasmParser.cpp b/llvm/lib/MC/MCParser/MasmParser.cpp index f4684e6..780289e 100644 --- a/llvm/lib/MC/MCParser/MasmParser.cpp +++ b/llvm/lib/MC/MCParser/MasmParser.cpp @@ -4228,8 +4228,7 @@ bool MasmParser::emitAlignTo(int64_t Alignment) { // Check whether we should use optimal code alignment for this align // directive. const MCSection *Section = getStreamer().getCurrentSectionOnly(); - assert(Section && "must have section to emit alignment"); - if (Section->useCodeAlign()) { + if (MAI.useCodeAlign(*Section)) { getStreamer().emitCodeAlignment(Align(Alignment), &getTargetParser().getSTI(), /*MaxBytesToEmit=*/0); diff --git a/llvm/lib/MC/MCParser/WasmAsmParser.cpp b/llvm/lib/MC/MCParser/WasmAsmParser.cpp index 1f824b8..d97f4f5 100644 --- a/llvm/lib/MC/MCParser/WasmAsmParser.cpp +++ b/llvm/lib/MC/MCParser/WasmAsmParser.cpp @@ -252,7 +252,7 @@ public: if (TypeName == "function") { WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION); auto *Current = - cast<MCSectionWasm>(getStreamer().getCurrentSectionOnly()); + static_cast<MCSectionWasm *>(getStreamer().getCurrentSectionOnly()); if (Current->getGroup()) WasmSym->setComdat(true); } else if (TypeName == "global") diff --git a/llvm/lib/MC/MCSection.cpp b/llvm/lib/MC/MCSection.cpp index e738a22..4f28267 100644 --- a/llvm/lib/MC/MCSection.cpp +++ b/llvm/lib/MC/MCSection.cpp @@ -18,10 +18,9 @@ using namespace llvm; -MCSection::MCSection(SectionVariant V, StringRef Name, bool IsText, bool IsBss, - MCSymbol *Begin) +MCSection::MCSection(StringRef Name, bool IsText, bool IsBss, MCSymbol *Begin) : Begin(Begin), HasInstructions(false), IsRegistered(false), IsText(IsText), - IsBss(IsBss), LinkerRelaxable(false), Name(Name), Variant(V) { + IsBss(IsBss), LinkerRelaxable(false), Name(Name) { DummyFragment.setParent(this); } diff --git a/llvm/lib/MC/MCSectionCOFF.cpp b/llvm/lib/MC/MCSectionCOFF.cpp deleted file mode 100644 index 5bf1473..0000000 --- a/llvm/lib/MC/MCSectionCOFF.cpp +++ /dev/null @@ -1,117 +0,0 @@ -//===- lib/MC/MCSectionCOFF.cpp - COFF Code Section Representation --------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "llvm/MC/MCSectionCOFF.h" -#include "llvm/BinaryFormat/COFF.h" -#include "llvm/MC/MCSymbol.h" -#include "llvm/Support/raw_ostream.h" -#include <cassert> - -using namespace llvm; - -// shouldOmitSectionDirective - Decides whether a '.section' directive -// should be printed before the section name -bool MCSectionCOFF::shouldOmitSectionDirective(StringRef Name, - const MCAsmInfo &MAI) const { - if (COMDATSymbol || isUnique()) - return false; - - // FIXME: Does .section .bss/.data/.text work everywhere?? - if (Name == ".text" || Name == ".data" || Name == ".bss") - return true; - - return false; -} - -void MCSectionCOFF::setSelection(int Selection) const { - assert(Selection != 0 && "invalid COMDAT selection type"); - this->Selection = Selection; - Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT; -} - -void MCSectionCOFF::printSwitchToSection(const MCAsmInfo &MAI, const Triple &T, - raw_ostream &OS, - uint32_t Subsection) const { - // standard sections don't require the '.section' - if (shouldOmitSectionDirective(getName(), MAI)) { - OS << '\t' << getName() << '\n'; - return; - } - - OS << "\t.section\t" << getName() << ",\""; - if (getCharacteristics() & COFF::IMAGE_SCN_CNT_INITIALIZED_DATA) - OS << 'd'; - if (getCharacteristics() & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA) - OS << 'b'; - if (getCharacteristics() & COFF::IMAGE_SCN_MEM_EXECUTE) - OS << 'x'; - if (getCharacteristics() & COFF::IMAGE_SCN_MEM_WRITE) - OS << 'w'; - else if (getCharacteristics() & COFF::IMAGE_SCN_MEM_READ) - OS << 'r'; - else - OS << 'y'; - if (getCharacteristics() & COFF::IMAGE_SCN_LNK_REMOVE) - OS << 'n'; - if (getCharacteristics() & COFF::IMAGE_SCN_MEM_SHARED) - OS << 's'; - if ((getCharacteristics() & COFF::IMAGE_SCN_MEM_DISCARDABLE) && - !isImplicitlyDiscardable(getName())) - OS << 'D'; - if (getCharacteristics() & COFF::IMAGE_SCN_LNK_INFO) - OS << 'i'; - OS << '"'; - - // unique should be tail of .section directive. - if (isUnique() && !COMDATSymbol) - OS << ",unique," << UniqueID; - - if (getCharacteristics() & COFF::IMAGE_SCN_LNK_COMDAT) { - if (COMDATSymbol) - OS << ","; - else - OS << "\n\t.linkonce\t"; - switch (Selection) { - case COFF::IMAGE_COMDAT_SELECT_NODUPLICATES: - OS << "one_only"; - break; - case COFF::IMAGE_COMDAT_SELECT_ANY: - OS << "discard"; - break; - case COFF::IMAGE_COMDAT_SELECT_SAME_SIZE: - OS << "same_size"; - break; - case COFF::IMAGE_COMDAT_SELECT_EXACT_MATCH: - OS << "same_contents"; - break; - case COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE: - OS << "associative"; - break; - case COFF::IMAGE_COMDAT_SELECT_LARGEST: - OS << "largest"; - break; - case COFF::IMAGE_COMDAT_SELECT_NEWEST: - OS << "newest"; - break; - default: - assert(false && "unsupported COFF selection type"); - break; - } - if (COMDATSymbol) { - OS << ","; - COMDATSymbol->print(OS, &MAI); - } - } - - if (isUnique() && COMDATSymbol) - OS << ",unique," << UniqueID; - - OS << '\n'; -} - -bool MCSectionCOFF::useCodeAlign() const { return isText(); } diff --git a/llvm/lib/MC/MCSectionDXContainer.cpp b/llvm/lib/MC/MCSectionDXContainer.cpp deleted file mode 100644 index 7eee59d..0000000 --- a/llvm/lib/MC/MCSectionDXContainer.cpp +++ /dev/null @@ -1,15 +0,0 @@ -//===- lib/MC/MCSectionDXContainer.cpp - DXContainer Section --------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "llvm/MC/MCSectionDXContainer.h" - -using namespace llvm; - -void MCSectionDXContainer::printSwitchToSection(const MCAsmInfo &, - const Triple &, raw_ostream &, - uint32_t) const {} diff --git a/llvm/lib/MC/MCSectionELF.cpp b/llvm/lib/MC/MCSectionELF.cpp deleted file mode 100644 index ef33f9c..0000000 --- a/llvm/lib/MC/MCSectionELF.cpp +++ /dev/null @@ -1,217 +0,0 @@ -//===- lib/MC/MCSectionELF.cpp - ELF Code Section Representation ----------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "llvm/MC/MCSectionELF.h" -#include "llvm/ADT/Twine.h" -#include "llvm/BinaryFormat/ELF.h" -#include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCExpr.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/TargetParser/Triple.h" -#include <cassert> - -using namespace llvm; - -// Decides whether a '.section' directive -// should be printed before the section name. -bool MCSectionELF::shouldOmitSectionDirective(StringRef Name, - const MCAsmInfo &MAI) const { - if (isUnique()) - return false; - - return MAI.shouldOmitSectionDirective(Name); -} - -static void printName(raw_ostream &OS, StringRef Name) { - if (Name.find_first_not_of("0123456789_." - "abcdefghijklmnopqrstuvwxyz" - "ABCDEFGHIJKLMNOPQRSTUVWXYZ") == Name.npos) { - OS << Name; - return; - } - OS << '"'; - for (const char *B = Name.begin(), *E = Name.end(); B < E; ++B) { - if (*B == '"') // Unquoted " - OS << "\\\""; - else if (*B != '\\') // Neither " or backslash - OS << *B; - else if (B + 1 == E) // Trailing backslash - OS << "\\\\"; - else { - OS << B[0] << B[1]; // Quoted character - ++B; - } - } - OS << '"'; -} - -void MCSectionELF::printSwitchToSection(const MCAsmInfo &MAI, const Triple &T, - raw_ostream &OS, - uint32_t Subsection) const { - if (shouldOmitSectionDirective(getName(), MAI)) { - OS << '\t' << getName(); - if (Subsection) - OS << '\t' << Subsection; - OS << '\n'; - return; - } - - OS << "\t.section\t"; - printName(OS, getName()); - - // Handle the weird solaris syntax if desired. - if (MAI.usesSunStyleELFSectionSwitchSyntax() && - !(Flags & ELF::SHF_MERGE)) { - if (Flags & ELF::SHF_ALLOC) - OS << ",#alloc"; - if (Flags & ELF::SHF_EXECINSTR) - OS << ",#execinstr"; - if (Flags & ELF::SHF_WRITE) - OS << ",#write"; - if (Flags & ELF::SHF_EXCLUDE) - OS << ",#exclude"; - if (Flags & ELF::SHF_TLS) - OS << ",#tls"; - OS << '\n'; - return; - } - - OS << ",\""; - if (Flags & ELF::SHF_ALLOC) - OS << 'a'; - if (Flags & ELF::SHF_EXCLUDE) - OS << 'e'; - if (Flags & ELF::SHF_EXECINSTR) - OS << 'x'; - if (Flags & ELF::SHF_WRITE) - OS << 'w'; - if (Flags & ELF::SHF_MERGE) - OS << 'M'; - if (Flags & ELF::SHF_STRINGS) - OS << 'S'; - if (Flags & ELF::SHF_TLS) - OS << 'T'; - if (Flags & ELF::SHF_LINK_ORDER) - OS << 'o'; - if (Flags & ELF::SHF_GROUP) - OS << 'G'; - if (Flags & ELF::SHF_GNU_RETAIN) - OS << 'R'; - - // If there are os-specific flags, print them. - if (T.isOSSolaris()) - if (Flags & ELF::SHF_SUNW_NODISCARD) - OS << 'R'; - - // If there are target-specific flags, print them. - Triple::ArchType Arch = T.getArch(); - if (Arch == Triple::xcore) { - if (Flags & ELF::XCORE_SHF_CP_SECTION) - OS << 'c'; - if (Flags & ELF::XCORE_SHF_DP_SECTION) - OS << 'd'; - } else if (T.isARM() || T.isThumb()) { - if (Flags & ELF::SHF_ARM_PURECODE) - OS << 'y'; - } else if (T.isAArch64()) { - if (Flags & ELF::SHF_AARCH64_PURECODE) - OS << 'y'; - } else if (Arch == Triple::hexagon) { - if (Flags & ELF::SHF_HEX_GPREL) - OS << 's'; - } else if (Arch == Triple::x86_64) { - if (Flags & ELF::SHF_X86_64_LARGE) - OS << 'l'; - } - - OS << '"'; - - OS << ','; - - // If comment string is '@', e.g. as on ARM - use '%' instead - if (MAI.getCommentString()[0] == '@') - OS << '%'; - else - OS << '@'; - - if (Type == ELF::SHT_INIT_ARRAY) - OS << "init_array"; - else if (Type == ELF::SHT_FINI_ARRAY) - OS << "fini_array"; - else if (Type == ELF::SHT_PREINIT_ARRAY) - OS << "preinit_array"; - else if (Type == ELF::SHT_NOBITS) - OS << "nobits"; - else if (Type == ELF::SHT_NOTE) - OS << "note"; - else if (Type == ELF::SHT_PROGBITS) - OS << "progbits"; - else if (Type == ELF::SHT_X86_64_UNWIND) - OS << "unwind"; - else if (Type == ELF::SHT_MIPS_DWARF) - // Print hex value of the flag while we do not have - // any standard symbolic representation of the flag. - OS << "0x7000001e"; - else if (Type == ELF::SHT_LLVM_ODRTAB) - OS << "llvm_odrtab"; - else if (Type == ELF::SHT_LLVM_LINKER_OPTIONS) - OS << "llvm_linker_options"; - else if (Type == ELF::SHT_LLVM_CALL_GRAPH_PROFILE) - OS << "llvm_call_graph_profile"; - else if (Type == ELF::SHT_LLVM_DEPENDENT_LIBRARIES) - OS << "llvm_dependent_libraries"; - else if (Type == ELF::SHT_LLVM_SYMPART) - OS << "llvm_sympart"; - else if (Type == ELF::SHT_LLVM_BB_ADDR_MAP) - OS << "llvm_bb_addr_map"; - else if (Type == ELF::SHT_LLVM_OFFLOADING) - OS << "llvm_offloading"; - else if (Type == ELF::SHT_LLVM_LTO) - OS << "llvm_lto"; - else if (Type == ELF::SHT_LLVM_JT_SIZES) - OS << "llvm_jt_sizes"; - else if (Type == ELF::SHT_LLVM_CFI_JUMP_TABLE) - OS << "llvm_cfi_jump_table"; - else - OS << "0x" << Twine::utohexstr(Type); - - if (EntrySize) { - assert((Flags & ELF::SHF_MERGE) || Type == ELF::SHT_LLVM_CFI_JUMP_TABLE); - OS << "," << EntrySize; - } - - if (Flags & ELF::SHF_LINK_ORDER) { - OS << ","; - if (LinkedToSym) - printName(OS, LinkedToSym->getName()); - else - OS << '0'; - } - - if (Flags & ELF::SHF_GROUP) { - OS << ","; - printName(OS, Group.getPointer()->getName()); - if (isComdat()) - OS << ",comdat"; - } - - if (isUnique()) - OS << ",unique," << UniqueID; - - OS << '\n'; - - if (Subsection) { - OS << "\t.subsection\t" << Subsection; - OS << '\n'; - } -} - -bool MCSectionELF::useCodeAlign() const { - return getFlags() & ELF::SHF_EXECINSTR; -} diff --git a/llvm/lib/MC/MCSectionGOFF.cpp b/llvm/lib/MC/MCSectionGOFF.cpp deleted file mode 100644 index 8163e5b..0000000 --- a/llvm/lib/MC/MCSectionGOFF.cpp +++ /dev/null @@ -1,143 +0,0 @@ -//===- MCSectionGOFF.cpp - GOFF Code Section Representation ---------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "llvm/MC/MCSectionGOFF.h" -#include "llvm/BinaryFormat/GOFF.h" -#include "llvm/Support/raw_ostream.h" - -using namespace llvm; - -static void emitCATTR(raw_ostream &OS, StringRef Name, GOFF::ESDRmode Rmode, - GOFF::ESDAlignment Alignment, - GOFF::ESDLoadingBehavior LoadBehavior, - GOFF::ESDExecutable Executable, bool IsReadOnly, - uint32_t SortKey, uint8_t FillByteValue, - StringRef PartName) { - OS << Name << " CATTR "; - OS << "ALIGN(" << static_cast<unsigned>(Alignment) << ")," - << "FILL(" << static_cast<unsigned>(FillByteValue) << ")"; - switch (LoadBehavior) { - case GOFF::ESD_LB_Deferred: - OS << ",DEFLOAD"; - break; - case GOFF::ESD_LB_NoLoad: - OS << ",NOLOAD"; - break; - default: - break; - } - switch (Executable) { - case GOFF::ESD_EXE_CODE: - OS << ",EXECUTABLE"; - break; - case GOFF::ESD_EXE_DATA: - OS << ",NOTEXECUTABLE"; - break; - default: - break; - } - if (IsReadOnly) - OS << ",READONLY"; - if (Rmode != GOFF::ESD_RMODE_None) { - OS << ','; - OS << "RMODE("; - switch (Rmode) { - case GOFF::ESD_RMODE_24: - OS << "24"; - break; - case GOFF::ESD_RMODE_31: - OS << "31"; - break; - case GOFF::ESD_RMODE_64: - OS << "64"; - break; - case GOFF::ESD_RMODE_None: - break; - } - OS << ')'; - } - if (SortKey) - OS << ",PRIORITY(" << SortKey << ")"; - if (!PartName.empty()) - OS << ",PART(" << PartName << ")"; - OS << '\n'; -} - -static void emitXATTR(raw_ostream &OS, StringRef Name, - GOFF::ESDLinkageType Linkage, - GOFF::ESDExecutable Executable, - GOFF::ESDBindingScope BindingScope) { - OS << Name << " XATTR "; - OS << "LINKAGE(" << (Linkage == GOFF::ESD_LT_OS ? "OS" : "XPLINK") << "),"; - if (Executable != GOFF::ESD_EXE_Unspecified) - OS << "REFERENCE(" << (Executable == GOFF::ESD_EXE_CODE ? "CODE" : "DATA") - << "),"; - if (BindingScope != GOFF::ESD_BSC_Unspecified) { - OS << "SCOPE("; - switch (BindingScope) { - case GOFF::ESD_BSC_Section: - OS << "SECTION"; - break; - case GOFF::ESD_BSC_Module: - OS << "MODULE"; - break; - case GOFF::ESD_BSC_Library: - OS << "LIBRARY"; - break; - case GOFF::ESD_BSC_ImportExport: - OS << "EXPORT"; - break; - default: - break; - } - OS << ')'; - } - OS << '\n'; -} - -void MCSectionGOFF::printSwitchToSection(const MCAsmInfo &MAI, const Triple &T, - raw_ostream &OS, - uint32_t Subsection) const { - switch (SymbolType) { - case GOFF::ESD_ST_SectionDefinition: { - OS << Name << " CSECT\n"; - Emitted = true; - break; - } - case GOFF::ESD_ST_ElementDefinition: { - getParent()->printSwitchToSection(MAI, T, OS, Subsection); - if (!Emitted) { - emitCATTR(OS, Name, EDAttributes.Rmode, EDAttributes.Alignment, - EDAttributes.LoadBehavior, GOFF::ESD_EXE_Unspecified, - EDAttributes.IsReadOnly, 0, EDAttributes.FillByteValue, - StringRef()); - Emitted = true; - } else - OS << Name << " CATTR\n"; - break; - } - case GOFF::ESD_ST_PartReference: { - MCSectionGOFF *ED = getParent(); - ED->getParent()->printSwitchToSection(MAI, T, OS, Subsection); - if (!Emitted) { - emitCATTR(OS, ED->getName(), ED->getEDAttributes().Rmode, - ED->EDAttributes.Alignment, ED->EDAttributes.LoadBehavior, - PRAttributes.Executable, ED->EDAttributes.IsReadOnly, - PRAttributes.SortKey, ED->EDAttributes.FillByteValue, Name); - emitXATTR(OS, Name, PRAttributes.Linkage, PRAttributes.Executable, - PRAttributes.BindingScope); - ED->Emitted = true; - Emitted = true; - } else - OS << ED->getName() << " CATTR PART(" << Name << ")\n"; - break; - } - default: - llvm_unreachable("Wrong section type"); - } -}
\ No newline at end of file diff --git a/llvm/lib/MC/MCSectionMachO.cpp b/llvm/lib/MC/MCSectionMachO.cpp index 67453ce..67c8235 100644 --- a/llvm/lib/MC/MCSectionMachO.cpp +++ b/llvm/lib/MC/MCSectionMachO.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "llvm/MC/MCSectionMachO.h" +#include "llvm/MC/MCAsmInfoDarwin.h" #include "llvm/MC/SectionKind.h" #include "llvm/Support/raw_ostream.h" @@ -92,7 +93,7 @@ ENTRY("" /*FIXME*/, S_ATTR_LOC_RELOC) MCSectionMachO::MCSectionMachO(StringRef Segment, StringRef Section, unsigned TAA, unsigned reserved2, SectionKind K, MCSymbol *Begin) - : MCSection(SV_MachO, Section, K.isText(), + : MCSection(Section, K.isText(), MachO::isVirtualSection(TAA & MachO::SECTION_TYPE), Begin), TypeAndAttributes(TAA), Reserved2(reserved2) { assert(Segment.size() <= 16 && Section.size() <= 16 && @@ -105,19 +106,20 @@ MCSectionMachO::MCSectionMachO(StringRef Segment, StringRef Section, } } -void MCSectionMachO::printSwitchToSection(const MCAsmInfo &MAI, const Triple &T, - raw_ostream &OS, - uint32_t Subsection) const { - OS << "\t.section\t" << getSegmentName() << ',' << getName(); +void MCAsmInfoDarwin::printSwitchToSection(const MCSection &Section, uint32_t, + const Triple &T, + raw_ostream &OS) const { + auto &Sec = static_cast<const MCSectionMachO &>(Section); + OS << "\t.section\t" << Sec.getSegmentName() << ',' << Sec.getName(); // Get the section type and attributes. - unsigned TAA = getTypeAndAttributes(); + unsigned TAA = Sec.getTypeAndAttributes(); if (TAA == 0) { OS << '\n'; return; } - MachO::SectionType SectionType = getType(); + MachO::SectionType SectionType = Sec.getType(); assert(SectionType <= MachO::LAST_KNOWN_SECTION_TYPE && "Invalid SectionType specified!"); @@ -135,8 +137,8 @@ void MCSectionMachO::printSwitchToSection(const MCAsmInfo &MAI, const Triple &T, if (SectionAttrs == 0) { // If we have a S_SYMBOL_STUBS size specified, print it along with 'none' as // the attribute specifier. - if (Reserved2 != 0) - OS << ",none," << Reserved2; + if (Sec.Reserved2 != 0) + OS << ",none," << Sec.Reserved2; OS << '\n'; return; } @@ -164,15 +166,11 @@ void MCSectionMachO::printSwitchToSection(const MCAsmInfo &MAI, const Triple &T, assert(SectionAttrs == 0 && "Unknown section attributes!"); // If we have a S_SYMBOL_STUBS size specified, print it. - if (Reserved2 != 0) - OS << ',' << Reserved2; + if (Sec.Reserved2 != 0) + OS << ',' << Sec.Reserved2; OS << '\n'; } -bool MCSectionMachO::useCodeAlign() const { - return hasAttribute(MachO::S_ATTR_PURE_INSTRUCTIONS); -} - /// ParseSectionSpecifier - Parse the section specifier indicated by "Spec". /// This is a string that can appear after a .section directive in a mach-o /// flavored .s file. If successful, this fills in the specified Out diff --git a/llvm/lib/MC/MCSectionWasm.cpp b/llvm/lib/MC/MCSectionWasm.cpp deleted file mode 100644 index e25af1c..0000000 --- a/llvm/lib/MC/MCSectionWasm.cpp +++ /dev/null @@ -1,101 +0,0 @@ -//===- lib/MC/MCSectionWasm.cpp - Wasm Code Section Representation --------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "llvm/MC/MCSectionWasm.h" -#include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCSymbolWasm.h" -#include "llvm/Support/raw_ostream.h" - -using namespace llvm; - -// Decides whether a '.section' directive -// should be printed before the section name. -bool MCSectionWasm::shouldOmitSectionDirective(StringRef Name, - const MCAsmInfo &MAI) const { - return MAI.shouldOmitSectionDirective(Name); -} - -static void printName(raw_ostream &OS, StringRef Name) { - if (Name.find_first_not_of("0123456789_." - "abcdefghijklmnopqrstuvwxyz" - "ABCDEFGHIJKLMNOPQRSTUVWXYZ") == Name.npos) { - OS << Name; - return; - } - OS << '"'; - for (const char *B = Name.begin(), *E = Name.end(); B < E; ++B) { - if (*B == '"') // Unquoted " - OS << "\\\""; - else if (*B != '\\') // Neither " or backslash - OS << *B; - else if (B + 1 == E) // Trailing backslash - OS << "\\\\"; - else { - OS << B[0] << B[1]; // Quoted character - ++B; - } - } - OS << '"'; -} - -void MCSectionWasm::printSwitchToSection(const MCAsmInfo &MAI, const Triple &T, - raw_ostream &OS, - uint32_t Subsection) const { - - if (shouldOmitSectionDirective(getName(), MAI)) { - OS << '\t' << getName(); - if (Subsection) - OS << '\t' << Subsection; - OS << '\n'; - return; - } - - OS << "\t.section\t"; - printName(OS, getName()); - OS << ",\""; - - if (IsPassive) - OS << 'p'; - if (Group) - OS << 'G'; - if (SegmentFlags & wasm::WASM_SEG_FLAG_STRINGS) - OS << 'S'; - if (SegmentFlags & wasm::WASM_SEG_FLAG_TLS) - OS << 'T'; - if (SegmentFlags & wasm::WASM_SEG_FLAG_RETAIN) - OS << 'R'; - - OS << '"'; - - OS << ','; - - // If comment string is '@', e.g. as on ARM - use '%' instead - if (MAI.getCommentString()[0] == '@') - OS << '%'; - else - OS << '@'; - - // TODO: Print section type. - - if (Group) { - OS << ","; - printName(OS, Group->getName()); - OS << ",comdat"; - } - - if (isUnique()) - OS << ",unique," << UniqueID; - - OS << '\n'; - - if (Subsection) - OS << "\t.subsection\t" << Subsection << '\n'; -} - -bool MCSectionWasm::useCodeAlign() const { return false; } diff --git a/llvm/lib/MC/MCSectionXCOFF.cpp b/llvm/lib/MC/MCSectionXCOFF.cpp deleted file mode 100644 index 41043b2..0000000 --- a/llvm/lib/MC/MCSectionXCOFF.cpp +++ /dev/null @@ -1,134 +0,0 @@ -//===- lib/MC/MCSectionXCOFF.cpp - XCOFF Code Section Representation ------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "llvm/MC/MCSectionXCOFF.h" -#include "llvm/MC/MCAsmInfo.h" -#include "llvm/Support/Format.h" -#include "llvm/Support/raw_ostream.h" -namespace llvm { -class MCExpr; -class Triple; -} // namespace llvm - -using namespace llvm; - -MCSectionXCOFF::~MCSectionXCOFF() = default; - -void MCSectionXCOFF::printCsectDirective(raw_ostream &OS) const { - OS << "\t.csect " << QualName->getName() << "," << Log2(getAlign()) << '\n'; -} - -void MCSectionXCOFF::printSwitchToSection(const MCAsmInfo &MAI, const Triple &T, - raw_ostream &OS, - uint32_t Subsection) const { - if (getKind().isText()) { - if (getMappingClass() != XCOFF::XMC_PR) - report_fatal_error("Unhandled storage-mapping class for .text csect"); - - printCsectDirective(OS); - return; - } - - if (getKind().isReadOnly()) { - if (getMappingClass() != XCOFF::XMC_RO && - getMappingClass() != XCOFF::XMC_TD) - report_fatal_error("Unhandled storage-mapping class for .rodata csect."); - printCsectDirective(OS); - return; - } - - if (getKind().isReadOnlyWithRel()) { - if (getMappingClass() != XCOFF::XMC_RW && - getMappingClass() != XCOFF::XMC_RO && - getMappingClass() != XCOFF::XMC_TD) - report_fatal_error( - "Unexepected storage-mapping class for ReadOnlyWithRel kind"); - printCsectDirective(OS); - return; - } - - // Initialized TLS data. - if (getKind().isThreadData()) { - // We only expect XMC_TL here for initialized TLS data. - if (getMappingClass() != XCOFF::XMC_TL) - report_fatal_error("Unhandled storage-mapping class for .tdata csect."); - printCsectDirective(OS); - return; - } - - if (getKind().isData()) { - switch (getMappingClass()) { - case XCOFF::XMC_RW: - case XCOFF::XMC_DS: - case XCOFF::XMC_TD: - printCsectDirective(OS); - break; - case XCOFF::XMC_TC: - case XCOFF::XMC_TE: - break; - case XCOFF::XMC_TC0: - OS << "\t.toc\n"; - break; - default: - report_fatal_error( - "Unhandled storage-mapping class for .data csect."); - } - return; - } - - if (isCsect() && getMappingClass() == XCOFF::XMC_TD) { - // Common csect type (uninitialized storage) does not have to print csect - // directive for section switching unless it is local. - if (getKind().isCommon() && !getKind().isBSSLocal()) - return; - - assert(getKind().isBSS() && "Unexpected section kind for toc-data"); - printCsectDirective(OS); - return; - } - // Common csect type (uninitialized storage) does not have to print csect - // directive for section switching. - if (isCsect() && getCSectType() == XCOFF::XTY_CM) { - assert((getMappingClass() == XCOFF::XMC_RW || - getMappingClass() == XCOFF::XMC_BS || - getMappingClass() == XCOFF::XMC_UL) && - "Generated a storage-mapping class for a common/bss/tbss csect we " - "don't " - "understand how to switch to."); - // Common symbols and local zero-initialized symbols for TLS and Non-TLS are - // eligible for .bss/.tbss csect, getKind().isThreadBSS() is used to cover - // TLS common and zero-initialized local symbols since linkage type (in the - // GlobalVariable) is not accessible in this class. - assert((getKind().isBSSLocal() || getKind().isCommon() || - getKind().isThreadBSS()) && - "wrong symbol type for .bss/.tbss csect"); - // Don't have to print a directive for switching to section for commons and - // zero-initialized TLS data. The '.comm' and '.lcomm' directives of the - // variable will create the needed csect. - return; - } - - // Zero-initialized TLS data with weak or external linkage are not eligible to - // be put into common csect. - if (getKind().isThreadBSS()) { - printCsectDirective(OS); - return; - } - - // XCOFF debug sections. - if (getKind().isMetadata() && isDwarfSect()) { - OS << "\n\t.dwsect " << format("0x%" PRIx32, *getDwarfSubtypeFlags()) - << '\n'; - OS << getName() << ':' << '\n'; - return; - } - - report_fatal_error("Printing for this SectionKind is unimplemented."); -} - -bool MCSectionXCOFF::useCodeAlign() const { return getKind().isText(); } diff --git a/llvm/lib/MC/MCStreamer.cpp b/llvm/lib/MC/MCStreamer.cpp index 899a7df..bc73981 100644 --- a/llvm/lib/MC/MCStreamer.cpp +++ b/llvm/lib/MC/MCStreamer.cpp @@ -56,12 +56,11 @@ void MCTargetStreamer::finish() {} void MCTargetStreamer::emitConstantPools() {} -void MCTargetStreamer::changeSection(const MCSection *CurSection, - MCSection *Section, uint32_t Subsection, - raw_ostream &OS) { - Section->printSwitchToSection(*Streamer.getContext().getAsmInfo(), - Streamer.getContext().getTargetTriple(), OS, - Subsection); +void MCTargetStreamer::changeSection(const MCSection *, MCSection *Sec, + uint32_t Subsection, raw_ostream &OS) { + auto &MAI = *Streamer.getContext().getAsmInfo(); + MAI.printSwitchToSection(*Sec, Subsection, + Streamer.getContext().getTargetTriple(), OS); } void MCTargetStreamer::emitDwarfFileDirective(StringRef Directive) { @@ -838,8 +837,8 @@ static MCSection *getWinCFISection(MCContext &Context, unsigned *NextWinCFIID, if (TextSec == Context.getObjectFileInfo()->getTextSection()) return MainCFISec; - const auto *TextSecCOFF = cast<MCSectionCOFF>(TextSec); - auto *MainCFISecCOFF = cast<MCSectionCOFF>(MainCFISec); + const auto *TextSecCOFF = static_cast<const MCSectionCOFF *>(TextSec); + auto *MainCFISecCOFF = static_cast<MCSectionCOFF *>(MainCFISec); unsigned UniqueID = TextSecCOFF->getOrAssignWinCFISectionID(NextWinCFIID); // If this section is COMDAT, this unwind section should be COMDAT associative @@ -1314,11 +1313,20 @@ void MCStreamer::emitZerofill(MCSection *, MCSymbol *, uint64_t, Align, SMLoc) { } void MCStreamer::emitTBSSSymbol(MCSection *Section, MCSymbol *Symbol, uint64_t Size, Align ByteAlignment) {} + void MCStreamer::changeSection(MCSection *Sec, uint32_t) { CurFrag = &Sec->getDummyFragment(); - if (auto *Sym = Sec->getBeginSymbol()) - Sym->setFragment(&Sec->getDummyFragment()); + auto *Sym = Sec->getBeginSymbol(); + if (!Sym || !Sym->isUndefined()) + return; + // In Mach-O, DWARF sections use Begin as a temporary label, requiring a label + // definition, unlike section symbols in other file formats. + if (getContext().getObjectFileType() == MCContext::IsMachO) + emitLabel(Sym); + else + Sym->setFragment(CurFrag); } + void MCStreamer::emitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol) {} void MCStreamer::emitBytes(StringRef Data) {} void MCStreamer::emitBinaryData(StringRef Data) { emitBytes(Data); } diff --git a/llvm/lib/MC/MCWasmStreamer.cpp b/llvm/lib/MC/MCWasmStreamer.cpp index 5891420c..e3ef111 100644 --- a/llvm/lib/MC/MCWasmStreamer.cpp +++ b/llvm/lib/MC/MCWasmStreamer.cpp @@ -58,7 +58,7 @@ void MCWasmStreamer::emitLabelAtPos(MCSymbol *S, SMLoc Loc, MCFragment &F, void MCWasmStreamer::changeSection(MCSection *Section, uint32_t Subsection) { MCAssembler &Asm = getAssembler(); - auto *SectionWasm = cast<MCSectionWasm>(Section); + auto *SectionWasm = static_cast<const MCSectionWasm *>(Section); const MCSymbol *Grp = SectionWasm->getGroup(); if (Grp) Asm.registerSymbol(*Grp); diff --git a/llvm/lib/MC/MCWinCOFFStreamer.cpp b/llvm/lib/MC/MCWinCOFFStreamer.cpp index 9369bea..1ffe25c 100644 --- a/llvm/lib/MC/MCWinCOFFStreamer.cpp +++ b/llvm/lib/MC/MCWinCOFFStreamer.cpp @@ -157,7 +157,8 @@ void MCWinCOFFStreamer::changeSection(MCSection *Section, uint32_t Subsection) { // Ensure that the first and the second symbols relative to the section are // the section symbol and the COMDAT symbol. getAssembler().registerSymbol(*Section->getBeginSymbol()); - if (auto *Sym = cast<MCSectionCOFF>(Section)->getCOMDATSymbol()) + if (auto *Sym = + static_cast<const MCSectionCOFF *>(Section)->getCOMDATSymbol()) getAssembler().registerSymbol(*Sym); } diff --git a/llvm/lib/MC/MCXCOFFStreamer.cpp b/llvm/lib/MC/MCXCOFFStreamer.cpp index 78e4b95..898ac5d 100644 --- a/llvm/lib/MC/MCXCOFFStreamer.cpp +++ b/llvm/lib/MC/MCXCOFFStreamer.cpp @@ -38,7 +38,7 @@ XCOFFObjectWriter &MCXCOFFStreamer::getWriter() { void MCXCOFFStreamer::changeSection(MCSection *Section, uint32_t Subsection) { MCObjectStreamer::changeSection(Section, Subsection); - auto *Sec = cast<MCSectionXCOFF>(Section); + auto *Sec = static_cast<const MCSectionXCOFF *>(Section); // We might miss calculating the symbols difference as absolute value before // adding fixups when symbol_A without the fragment set is the csect itself // and symbol_B is in it. diff --git a/llvm/lib/MC/MachObjectWriter.cpp b/llvm/lib/MC/MachObjectWriter.cpp index 48d2fc6..7b5c3c0 100644 --- a/llvm/lib/MC/MachObjectWriter.cpp +++ b/llvm/lib/MC/MachObjectWriter.cpp @@ -126,7 +126,8 @@ uint64_t MachObjectWriter::getSymbolAddress(const MCSymbol &S) const { uint64_t MachObjectWriter::getPaddingSize(const MCAssembler &Asm, const MCSection *Sec) const { uint64_t EndAddr = getSectionAddress(Sec) + Asm.getSectionAddressSize(*Sec); - unsigned Next = cast<MCSectionMachO>(Sec)->getLayoutOrder() + 1; + unsigned Next = + static_cast<const MCSectionMachO *>(Sec)->getLayoutOrder() + 1; if (Next >= SectionOrder.size()) return 0; @@ -259,15 +260,12 @@ void MachObjectWriter::writeSegmentLoadCommand( } void MachObjectWriter::writeSection(const MCAssembler &Asm, - const MCSection &Sec, uint64_t VMAddr, + const MCSectionMachO &Sec, uint64_t VMAddr, uint64_t FileOffset, unsigned Flags, uint64_t RelocationsStart, unsigned NumRelocations) { - uint64_t SectionSize = Asm.getSectionAddressSize(Sec); - const MCSectionMachO &Section = cast<MCSectionMachO>(Sec); - // The offset is unused for virtual sections. - if (Section.isBssSection()) { + if (Sec.isBssSection()) { assert(Asm.getSectionFileSize(Sec) == 0 && "Invalid file size!"); FileOffset = 0; } @@ -275,11 +273,11 @@ void MachObjectWriter::writeSection(const MCAssembler &Asm, // struct section (68 bytes) or // struct section_64 (80 bytes) + uint64_t SectionSize = Asm.getSectionAddressSize(Sec); uint64_t Start = W.OS.tell(); (void) Start; - - writeWithPadding(Section.getName(), 16); - writeWithPadding(Section.getSegmentName(), 16); + writeWithPadding(Sec.getName(), 16); + writeWithPadding(Sec.getSegmentName(), 16); if (is64Bit()) { W.write<uint64_t>(VMAddr); // address W.write<uint64_t>(SectionSize); // size @@ -290,14 +288,14 @@ void MachObjectWriter::writeSection(const MCAssembler &Asm, assert(isUInt<32>(FileOffset) && "Cannot encode offset of section"); W.write<uint32_t>(FileOffset); - W.write<uint32_t>(Log2(Section.getAlign())); + W.write<uint32_t>(Log2(Sec.getAlign())); assert((!NumRelocations || isUInt<32>(RelocationsStart)) && "Cannot encode offset of relocations"); W.write<uint32_t>(NumRelocations ? RelocationsStart : 0); W.write<uint32_t>(NumRelocations); W.write<uint32_t>(Flags); W.write<uint32_t>(IndirectSymBase.lookup(&Sec)); // reserved1 - W.write<uint32_t>(Section.getStubSize()); // reserved2 + W.write<uint32_t>(Sec.getStubSize()); // reserved2 if (is64Bit()) W.write<uint32_t>(0); // reserved3 @@ -531,7 +529,7 @@ void MachObjectWriter::bindIndirectSymbols(MCAssembler &Asm) { // Report errors for use of .indirect_symbol not in a symbol pointer section // or stub section. for (IndirectSymbolData &ISD : IndirectSymbols) { - const MCSectionMachO &Section = cast<MCSectionMachO>(*ISD.Section); + const MCSectionMachO &Section = static_cast<MCSectionMachO &>(*ISD.Section); if (Section.getType() != MachO::S_NON_LAZY_SYMBOL_POINTERS && Section.getType() != MachO::S_LAZY_SYMBOL_POINTERS && @@ -545,7 +543,7 @@ void MachObjectWriter::bindIndirectSymbols(MCAssembler &Asm) { // Bind non-lazy symbol pointers first. for (auto [IndirectIndex, ISD] : enumerate(IndirectSymbols)) { - const auto &Section = cast<MCSectionMachO>(*ISD.Section); + const auto &Section = static_cast<MCSectionMachO &>(*ISD.Section); if (Section.getType() != MachO::S_NON_LAZY_SYMBOL_POINTERS && Section.getType() != MachO::S_THREAD_LOCAL_VARIABLE_POINTERS) @@ -559,7 +557,7 @@ void MachObjectWriter::bindIndirectSymbols(MCAssembler &Asm) { // Then lazy symbol pointers and symbol stubs. for (auto [IndirectIndex, ISD] : enumerate(IndirectSymbols)) { - const auto &Section = cast<MCSectionMachO>(*ISD.Section); + const auto &Section = static_cast<MCSectionMachO &>(*ISD.Section); if (Section.getType() != MachO::S_LAZY_SYMBOL_POINTERS && Section.getType() != MachO::S_SYMBOL_STUBS) @@ -684,13 +682,13 @@ void MachObjectWriter::computeSectionAddresses(const MCAssembler &Asm) { for (MCSection &Sec : Asm) { if (!Sec.isBssSection()) { SectionOrder.push_back(&Sec); - cast<MCSectionMachO>(Sec).setLayoutOrder(i++); + static_cast<MCSectionMachO &>(Sec).setLayoutOrder(i++); } } for (MCSection &Sec : Asm) { if (Sec.isBssSection()) { SectionOrder.push_back(&Sec); - cast<MCSectionMachO>(Sec).setLayoutOrder(i++); + static_cast<MCSectionMachO &>(Sec).setLayoutOrder(i++); } } @@ -907,7 +905,7 @@ uint64_t MachObjectWriter::writeObject() { // ... and then the section headers. uint64_t RelocTableEnd = SectionDataStart + SectionDataFileSize; for (const MCSection &Section : Asm) { - const auto &Sec = cast<MCSectionMachO>(Section); + const auto &Sec = static_cast<const MCSectionMachO &>(Section); std::vector<RelAndSymbol> &Relocs = Relocations[&Sec]; unsigned NumRelocs = Relocs.size(); uint64_t SectionStart = SectionDataStart + getSectionAddress(&Sec); diff --git a/llvm/lib/MC/WasmObjectWriter.cpp b/llvm/lib/MC/WasmObjectWriter.cpp index 3b99af4..bfd6334 100644 --- a/llvm/lib/MC/WasmObjectWriter.cpp +++ b/llvm/lib/MC/WasmObjectWriter.cpp @@ -480,7 +480,7 @@ void WasmObjectWriter::recordRelocation(const MCFragment &F, // The WebAssembly backend should never generate FKF_IsPCRel fixups assert(!Fixup.isPCRel()); - const auto &FixupSection = cast<MCSectionWasm>(*F.getParent()); + const auto &FixupSection = static_cast<MCSectionWasm &>(*F.getParent()); uint64_t C = Target.getConstant(); uint64_t FixupOffset = Asm->getFragmentOffset(F) + Fixup.getOffset(); MCContext &Ctx = getContext(); diff --git a/llvm/lib/MC/WinCOFFObjectWriter.cpp b/llvm/lib/MC/WinCOFFObjectWriter.cpp index 6ad4334..856850d 100644 --- a/llvm/lib/MC/WinCOFFObjectWriter.cpp +++ b/llvm/lib/MC/WinCOFFObjectWriter.cpp @@ -373,7 +373,7 @@ void WinCOFFWriter::defineSymbol(const MCSymbol &MCSym) { COFFSection *Sec = nullptr; MCSectionCOFF *MCSec = nullptr; if (Base && Base->getFragment()) { - MCSec = cast<MCSectionCOFF>(Base->getFragment()->getParent()); + MCSec = static_cast<MCSectionCOFF *>(Base->getFragment()->getParent()); Sec = SectionMap[MCSec]; } @@ -1057,7 +1057,8 @@ uint64_t WinCOFFWriter::writeObject() { continue; } - const auto *AssocMCSec = cast<MCSectionCOFF>(&AssocMCSym->getSection()); + const auto *AssocMCSec = + static_cast<const MCSectionCOFF *>(&AssocMCSym->getSection()); assert(SectionMap.count(AssocMCSec)); COFFSection *AssocSec = SectionMap[AssocMCSec]; diff --git a/llvm/lib/MC/XCOFFObjectWriter.cpp b/llvm/lib/MC/XCOFFObjectWriter.cpp index 2f6785f..65f543b 100644 --- a/llvm/lib/MC/XCOFFObjectWriter.cpp +++ b/llvm/lib/MC/XCOFFObjectWriter.cpp @@ -550,13 +550,13 @@ CsectGroup &XCOFFWriter::getCsectGroup(const MCSectionXCOFF *MCSec) { static MCSectionXCOFF *getContainingCsect(const MCSymbolXCOFF *XSym) { if (XSym->isDefined()) - return cast<MCSectionXCOFF>(XSym->getFragment()->getParent()); + return static_cast<MCSectionXCOFF *>(XSym->getFragment()->getParent()); return XSym->getRepresentedCsect(); } void XCOFFWriter::executePostLayoutBinding() { for (const auto &S : *Asm) { - const auto *MCSec = cast<const MCSectionXCOFF>(&S); + auto *MCSec = static_cast<const MCSectionXCOFF *>(&S); assert(!SectionMap.contains(MCSec) && "Cannot add a section twice."); // If the name does not fit in the storage provided in the symbol table @@ -747,7 +747,7 @@ void XCOFFWriter::recordRelocation(const MCFragment &F, const MCFixup &Fixup, FixedValue = TOCEntryOffset; } } else if (Type == XCOFF::RelocationType::R_RBR) { - MCSectionXCOFF *ParentSec = cast<MCSectionXCOFF>(F.getParent()); + auto *ParentSec = static_cast<MCSectionXCOFF *>(F.getParent()); assert((SymASec->getMappingClass() == XCOFF::XMC_PR && ParentSec->getMappingClass() == XCOFF::XMC_PR) && "Only XMC_PR csect may have the R_RBR relocation."); @@ -768,7 +768,7 @@ void XCOFFWriter::recordRelocation(const MCFragment &F, const MCFixup &Fixup, } XCOFFRelocation Reloc = {Index, FixupOffsetInCsect, SignAndSize, Type}; - MCSectionXCOFF *RelocationSec = cast<MCSectionXCOFF>(F.getParent()); + auto *RelocationSec = static_cast<MCSectionXCOFF *>(F.getParent()); assert(SectionMap.contains(RelocationSec) && "Expected containing csect to exist in map."); SectionMap[RelocationSec]->Relocations.push_back(Reloc); diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index bb7ccdb..fd89583 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -119,7 +119,6 @@ MODULE_PASS("module-inline", ModuleInlinerPass()) MODULE_PASS("name-anon-globals", NameAnonGlobalPass()) MODULE_PASS("no-op-module", NoOpModulePass()) MODULE_PASS("nsan", NumericalStabilitySanitizerPass()) -MODULE_PASS("objc-arc-apelim", ObjCARCAPElimPass()) MODULE_PASS("openmp-opt", OpenMPOptPass()) MODULE_PASS("openmp-opt-postlink", OpenMPOptPass(ThinOrFullLTOPhase::FullLTOPostLink)) diff --git a/llvm/lib/Support/SpecialCaseList.cpp b/llvm/lib/Support/SpecialCaseList.cpp index 2d91afb..8d4e043 100644 --- a/llvm/lib/Support/SpecialCaseList.cpp +++ b/llvm/lib/Support/SpecialCaseList.cpp @@ -79,28 +79,13 @@ unsigned SpecialCaseList::Matcher::match(StringRef Query) const { // TODO: Refactor this to return Expected<...> std::unique_ptr<SpecialCaseList> SpecialCaseList::create(const std::vector<std::string> &Paths, - llvm::vfs::FileSystem &FS, - std::pair<unsigned, std::string> &Error) { + llvm::vfs::FileSystem &FS, std::string &Error) { std::unique_ptr<SpecialCaseList> SCL(new SpecialCaseList()); if (SCL->createInternal(Paths, FS, Error)) return SCL; return nullptr; } -std::unique_ptr<SpecialCaseList> -SpecialCaseList::create(const std::vector<std::string> &Paths, - llvm::vfs::FileSystem &FS, std::string &Error) { - std::pair<unsigned, std::string> Err; - std::unique_ptr<SpecialCaseList> SCL = create(Paths, FS, Err); - if (!SCL) { - assert(Err.first == 0 || Err.first == 1 && "Unexpected error kind"); - const char *Prefix = - Err.first == 0 ? "can't open file " : "error parsing file "; - Error = (Twine(Prefix) + Err.second).str(); - } - return SCL; -} - std::unique_ptr<SpecialCaseList> SpecialCaseList::create(const MemoryBuffer *MB, std::string &Error) { std::unique_ptr<SpecialCaseList> SCL(new SpecialCaseList()); @@ -112,28 +97,25 @@ std::unique_ptr<SpecialCaseList> SpecialCaseList::create(const MemoryBuffer *MB, std::unique_ptr<SpecialCaseList> SpecialCaseList::createOrDie(const std::vector<std::string> &Paths, llvm::vfs::FileSystem &FS) { - std::pair<unsigned, std::string> Error; + std::string Error; if (auto SCL = create(Paths, FS, Error)) return SCL; - report_fatal_error(Twine(Error.second)); + report_fatal_error(Twine(Error)); } bool SpecialCaseList::createInternal(const std::vector<std::string> &Paths, - vfs::FileSystem &VFS, - std::pair<unsigned, std::string> &Error) { + vfs::FileSystem &VFS, std::string &Error) { for (size_t i = 0; i < Paths.size(); ++i) { const auto &Path = Paths[i]; ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr = VFS.getBufferForFile(Path); if (std::error_code EC = FileOrErr.getError()) { - Error.first = 0 /* open failure */; - Error.second = (Twine("'") + Path + "': " + EC.message()).str(); + Error = (Twine("can't open file '") + Path + "': " + EC.message()).str(); return false; } std::string ParseError; if (!parse(i, FileOrErr.get().get(), ParseError)) { - Error.first = 1 /* parse failure */; - Error.second = (Twine("'") + Path + "': " + ParseError).str(); + Error = (Twine("error parsing file '") + Path + "': " + ParseError).str(); return false; } } diff --git a/llvm/lib/Target/AArch64/AArch64TargetObjectFile.cpp b/llvm/lib/Target/AArch64/AArch64TargetObjectFile.cpp index c218831..85de2d5 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetObjectFile.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetObjectFile.cpp @@ -36,7 +36,7 @@ void AArch64_ELFTargetObjectFile::Initialize(MCContext &Ctx, // SHF_AARCH64_PURECODE flag set if the "+execute-only" target feature is // present. if (TM.getMCSubtargetInfo()->hasFeature(AArch64::FeatureExecuteOnly)) { - auto *Text = cast<MCSectionELF>(TextSection); + auto *Text = static_cast<MCSectionELF *>(TextSection); Text->setFlags(Text->getFlags() | ELF::SHF_AARCH64_PURECODE); } } diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp index 08f547a..6257e99 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp @@ -523,7 +523,8 @@ void AArch64TargetELFStreamer::finish() { // mark it execute-only if it is empty and there is at least one // execute-only section in the object. if (any_of(Asm, [](const MCSection &Sec) { - return cast<MCSectionELF>(Sec).getFlags() & ELF::SHF_AARCH64_PURECODE; + return static_cast<const MCSectionELF &>(Sec).getFlags() & + ELF::SHF_AARCH64_PURECODE; })) { auto *Text = static_cast<MCSectionELF *>(Ctx.getObjectFileInfo()->getTextSection()); diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp index 1ac340a..a22a17a 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp @@ -132,7 +132,8 @@ static bool canUseLocalRelocation(const MCSectionMachO &Section, // But only if they don't point to a few forbidden sections. if (!Symbol.isInSection()) return true; - const MCSectionMachO &RefSec = cast<MCSectionMachO>(Symbol.getSection()); + const MCSectionMachO &RefSec = + static_cast<MCSectionMachO &>(Symbol.getSection()); if (RefSec.getType() == MachO::S_CSTRING_LITERALS) return false; diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp index 7d6723a..334afd3 100644 --- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp +++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp @@ -38,7 +38,11 @@ bool llvm::isEqual(const GCNRPTracker::LiveRegSet &S1, unsigned GCNRegPressure::getRegKind(const TargetRegisterClass *RC, const SIRegisterInfo *STI) { - return STI->isSGPRClass(RC) ? SGPR : (STI->isAGPRClass(RC) ? AGPR : VGPR); + return STI->isSGPRClass(RC) + ? SGPR + : (STI->isAGPRClass(RC) + ? AGPR + : (STI->isVectorSuperClass(RC) ? AVGPR : VGPR)); } void GCNRegPressure::inc(unsigned Reg, diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h index 3749b6d..ea33a22 100644 --- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h +++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h @@ -29,43 +29,57 @@ class raw_ostream; class SlotIndex; struct GCNRegPressure { - enum RegKind { SGPR, VGPR, AGPR, TOTAL_KINDS }; + enum RegKind { SGPR, VGPR, AGPR, AVGPR, TOTAL_KINDS }; GCNRegPressure() { clear(); } - bool empty() const { return !Value[SGPR] && !Value[VGPR] && !Value[AGPR]; } + bool empty() const { + return !Value[SGPR] && !Value[VGPR] && !Value[AGPR] && !Value[AVGPR]; + } void clear() { std::fill(&Value[0], &Value[ValueArraySize], 0); } /// \returns the SGPR32 pressure unsigned getSGPRNum() const { return Value[SGPR]; } - /// \returns the aggregated ArchVGPR32, AccVGPR32 pressure dependent upon \p - /// UnifiedVGPRFile + /// \returns the aggregated ArchVGPR32, AccVGPR32, and Pseudo AVGPR pressure + /// dependent upon \p UnifiedVGPRFile unsigned getVGPRNum(bool UnifiedVGPRFile) const { if (UnifiedVGPRFile) { - return Value[AGPR] ? getUnifiedVGPRNum(Value[VGPR], Value[AGPR]) - : Value[VGPR]; + return Value[AGPR] + ? getUnifiedVGPRNum(Value[VGPR], Value[AGPR], Value[AVGPR]) + : Value[VGPR] + Value[AVGPR]; } - return std::max(Value[VGPR], Value[AGPR]); + // AVGPR assignment priority is based on the width of the register. Account + // AVGPR pressure as VGPR. + return std::max(Value[VGPR] + Value[AVGPR], Value[AGPR]); } /// Returns the aggregated VGPR pressure, assuming \p NumArchVGPRs ArchVGPRs - /// and \p NumAGPRs AGPRS, for a target with a unified VGPR file. + /// \p NumAGPRs AGPRS, and \p NumAVGPRs AVGPRs for a target with a unified + /// VGPR file. inline static unsigned getUnifiedVGPRNum(unsigned NumArchVGPRs, - unsigned NumAGPRs) { - return alignTo(NumArchVGPRs, AMDGPU::IsaInfo::getArchVGPRAllocGranule()) + + unsigned NumAGPRs, + unsigned NumAVGPRs) { + + // Assume AVGPRs will be assigned as VGPRs. + return alignTo(NumArchVGPRs + NumAVGPRs, + AMDGPU::IsaInfo::getArchVGPRAllocGranule()) + NumAGPRs; } - /// \returns the ArchVGPR32 pressure - unsigned getArchVGPRNum() const { return Value[VGPR]; } + /// \returns the ArchVGPR32 pressure, plus the AVGPRS which we assume will be + /// allocated as VGPR + unsigned getArchVGPRNum() const { return Value[VGPR] + Value[AVGPR]; } /// \returns the AccVGPR32 pressure unsigned getAGPRNum() const { return Value[AGPR]; } + /// \returns the AVGPR32 pressure + unsigned getAVGPRNum() const { return Value[AVGPR]; } unsigned getVGPRTuplesWeight() const { - return std::max(Value[TOTAL_KINDS + VGPR], Value[TOTAL_KINDS + AGPR]); + return std::max(Value[TOTAL_KINDS + VGPR] + Value[TOTAL_KINDS + AVGPR], + Value[TOTAL_KINDS + AGPR]); } unsigned getSGPRTuplesWeight() const { return Value[TOTAL_KINDS + SGPR]; } diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index e5d1eaa..b77da4d 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -1062,9 +1062,13 @@ bool SIFoldOperandsImpl::tryFoldRegSeqSplat( switch (OpTy) { case AMDGPU::OPERAND_REG_INLINE_AC_INT32: case AMDGPU::OPERAND_REG_INLINE_AC_FP32: + case AMDGPU::OPERAND_REG_INLINE_C_INT32: + case AMDGPU::OPERAND_REG_INLINE_C_FP32: OpRC = TRI->getSubRegisterClass(OpRC, AMDGPU::sub0); break; case AMDGPU::OPERAND_REG_INLINE_AC_FP64: + case AMDGPU::OPERAND_REG_INLINE_C_FP64: + case AMDGPU::OPERAND_REG_INLINE_C_INT64: OpRC = TRI->getSubRegisterClass(OpRC, AMDGPU::sub0_sub1); break; default: diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 0eee7ad..8d51ec6 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -14179,6 +14179,8 @@ SDValue SITargetLowering::performMinMaxCombine(SDNode *N, Op0.getOpcode() == AMDGPUISD::FMAX_LEGACY)) && (VT == MVT::f32 || VT == MVT::f64 || (VT == MVT::f16 && Subtarget->has16BitInsts()) || + (VT == MVT::bf16 && Subtarget->hasBF16PackedInsts()) || + (VT == MVT::v2bf16 && Subtarget->hasBF16PackedInsts()) || (VT == MVT::v2f16 && Subtarget->hasVOP3PInsts())) && Op0.hasOneUse()) { if (SDValue Res = performFPMed3ImmCombine(DAG, SDLoc(N), Op0, Op1)) diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index d04aafb..8d6c1d0 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -2508,7 +2508,20 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { .addReg(DstHi); } break; + + case AMDGPU::V_MAX_BF16_PSEUDO_e64: + assert(ST.hasBF16PackedInsts()); + MI.setDesc(get(AMDGPU::V_PK_MAX_NUM_BF16)); + MI.addOperand(MachineOperand::CreateImm(0)); // op_sel + MI.addOperand(MachineOperand::CreateImm(0)); // neg_lo + MI.addOperand(MachineOperand::CreateImm(0)); // neg_hi + auto Op0 = getNamedOperand(MI, AMDGPU::OpName::src0_modifiers); + Op0->setImm(Op0->getImm() | SISrcMods::OP_SEL_1); + auto Op1 = getNamedOperand(MI, AMDGPU::OpName::src1_modifiers); + Op1->setImm(Op1->getImm() | SISrcMods::OP_SEL_1); + break; } + return true; } diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index b0be3f86..83b0490 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -2865,6 +2865,7 @@ def VOP_F16_F16_I16 : VOPProfile <[f16, f16, i16, untyped]>; def VOP_F16_F16_I32 : VOPProfile <[f16, f16, i32, untyped]>; def VOP_I16_I16_I16 : VOPProfile <[i16, i16, i16, untyped]>; def VOP_I16_I16_I16_ARITH : VOPProfile <[i16, i16, i16, untyped], /*EnableClamp=*/1>; +def VOP_BF16_BF16_BF16 : VOPProfile <[bf16, bf16, bf16, untyped]>; def VOP_I16_I16_I16_I16 : VOPProfile <[i16, i16, i16, i16, untyped]>; def VOP_F16_F16_F16_F16 : VOPProfile <[f16, f16, f16, f16, untyped]>; diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index d05be8f..54fa192 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -1894,6 +1894,9 @@ let SubtargetPredicate = UseRealTrue16Insts in def : ClampPat<V_MAX_F16_t16_e64, f16>; let SubtargetPredicate = UseFakeTrue16Insts in def : ClampPat<V_MAX_F16_fake16_e64, f16>; +// FIXME-TRUE16: Pseudo expansion of this won't work with True16. +let True16Predicate = UseFakeTrue16Insts in +def : ClampPat<V_MAX_BF16_PSEUDO_e64, bf16>; let SubtargetPredicate = HasVOP3PInsts in { def : GCNPat < @@ -1903,6 +1906,13 @@ def : GCNPat < >; } +let SubtargetPredicate = HasBF16PackedInsts in { +def : GCNPat < + (v2bf16 (AMDGPUclamp (VOP3PMods v2bf16:$src0, i32:$src0_modifiers))), + (V_PK_MAX_NUM_BF16 $src0_modifiers, $src0, + $src0_modifiers, $src0, DSTCLAMP.ENABLE) +>; +} // End SubtargetPredicate = HasBF16PackedInsts /********** ================================ **********/ /********** Floating point absolute/negative **********/ diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td index 0039d2f..218841d 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td @@ -109,6 +109,23 @@ class SIRegisterClass <string n, list<ValueType> rTypes, int Align, dag rList> let TSFlags{2} = HasVGPR; let TSFlags{3} = HasAGPR; let TSFlags{4} = HasSGPR; + + // RA will use RegisterClass AllocationPriority amongst other info (e.g. ordering in the basic block) + // to decide which registers to try to assign first. Usually, this RegisterClass priority is given + // very high priority, if not the highest priority, when considering which VirtReg to allocate next. + // + // We have 5 bits to assign AllocationPriorities to RegisterClasses. Generally, it is beneficial to + // assign more constrained RegisterClasses first. As a result, we prioritize register classes with + // more 32 bit tuples (e.g. VReg_512) over registers with fewer tuples (e.g. VGPR_32). + // + // The interesting case is the vector register case on architectures which have ARegs, VRegs, AVRegs. + // In this case, we would like to assign ARegs and VRegs before AVRegs, as AVRegs are less constrained + // and can be assigned to both AGPRs and VGPRs. We use the 5th bit to encode this into the + // RegisterClass AllocationPriority. BaseClassPriority is used to turn the bit on, and BaseClassScaleFactor + // is used for scaling of the bit (i.e. 1 << 4). + field int BaseClassPriority = 1; + field int BaseClassScaleFactor = 16; + } multiclass SIRegLoHi16 <string n, bits<8> regIdx, bit ArtificialHigh = 1, @@ -575,7 +592,7 @@ let HasVGPR = 1 in { def VGPR_16 : SIRegisterClass<"AMDGPU", Reg16Types.types, 16, (add (interleave (sequence "VGPR%u_LO16", 0, 255), (sequence "VGPR%u_HI16", 0, 255)))> { - let AllocationPriority = 2; + let AllocationPriority = !add(2, !mul(BaseClassPriority, BaseClassScaleFactor)); let Size = 16; let GeneratePressureSet = 0; @@ -601,7 +618,7 @@ def VGPR_16_Lo128 : SIRegisterClass<"AMDGPU", Reg16Types.types, 16, // i16/f16 only on VI+ def VGPR_32 : SIRegisterClass<"AMDGPU", !listconcat(Reg32Types.types, Reg16Types.types), 32, (add (sequence "VGPR%u", 0, 255))> { - let AllocationPriority = 0; + let AllocationPriority = !add(0, !mul(BaseClassPriority, BaseClassScaleFactor)); let Size = 32; let Weight = 1; let BaseClassOrder = 32; @@ -610,7 +627,7 @@ def VGPR_32 : SIRegisterClass<"AMDGPU", !listconcat(Reg32Types.types, Reg16Types // Identical to VGPR_32 except it only contains the low 128 (Lo128) registers. def VGPR_32_Lo128 : SIRegisterClass<"AMDGPU", !listconcat(Reg32Types.types, Reg16Types.types), 32, (add (sequence "VGPR%u", 0, 127))> { - let AllocationPriority = 0; + let AllocationPriority = !add(0, !mul(BaseClassPriority, BaseClassScaleFactor)); let GeneratePressureSet = 0; let Size = 32; let Weight = 1; @@ -668,7 +685,7 @@ def AGPR_LO16 : SIRegisterClass<"AMDGPU", Reg16Types.types, 16, // AccVGPR 32-bit registers def AGPR_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2f16, v2bf16], 32, (add (sequence "AGPR%u", 0, 255))> { - let AllocationPriority = 0; + let AllocationPriority = !add(0, !mul(BaseClassPriority, BaseClassScaleFactor)); let Size = 32; let Weight = 1; let BaseClassOrder = 32; @@ -940,14 +957,23 @@ class VRegClassBase<int numRegs, list<ValueType> regTypes, dag regList> : // Requires n v_mov_b32 to copy let CopyCost = numRegs; - let AllocationPriority = !sub(numRegs, 1); + + // Since we only have 5 bits for the RegisterClass Allocation Priorty, and since we use the + // 5th bit for BaseClassPriority, we need to encode the SizePriority into 4 bits. As a result + // of this encoding, for registers with numRegs 15 or 16, we give SizePriority of 14, and for + // regsters with numRegs 17+ we give SizePriority of 15. In practice, there is only one + // RegClass per Vector Register type in each of these groups (i.e. numRegs = 15,16 : {VReg_512}, + // and numRegs = 17+ : {VReg_1024}). Therefore, we have not lost any info by compressing. + defvar SizePrioriity = !if(!le(numRegs, 14), !sub(numRegs, 1), !if(!le(numRegs, 16), 14, 15)); + + let AllocationPriority = !add(SizePrioriity, !mul(BaseClassPriority, BaseClassScaleFactor)); let Weight = numRegs; } // Define a register tuple class, along with one requiring an even // aligned base register. multiclass VRegClass<int numRegs, list<ValueType> regTypes, dag regList> { - let HasVGPR = 1 in { + let HasVGPR = 1, BaseClassPriority = 1 in { // Define the regular class. def "" : VRegClassBase<numRegs, regTypes, regList> { let BaseClassOrder = !mul(numRegs, 32); @@ -981,7 +1007,7 @@ defm VReg_1024 : VRegClass<32, Reg1024Types.types, (add VGPR_1024)>; } multiclass ARegClass<int numRegs, list<ValueType> regTypes, dag regList> { - let CopyCost = !add(numRegs, numRegs, 1), HasAGPR = 1 in { + let CopyCost = !add(numRegs, numRegs, 1), HasAGPR = 1, BaseClassPriority = 1 in { // Define the regular class. def "" : VRegClassBase<numRegs, regTypes, regList> { let BaseClassOrder = !mul(numRegs, 32); @@ -1066,6 +1092,7 @@ def VS_64 : SIRegisterClass<"AMDGPU", VReg_64.RegTypes, 32, (add VReg_64, SReg_6 def AV_32 : SIRegisterClass<"AMDGPU", VGPR_32.RegTypes, 32, (add VGPR_32, AGPR_32)> { let HasVGPR = 1; let HasAGPR = 1; + let BaseClassPriority = 0; let Size = 32; } } // End GeneratePressureSet = 0 @@ -1074,7 +1101,7 @@ def AV_32 : SIRegisterClass<"AMDGPU", VGPR_32.RegTypes, 32, (add VGPR_32, AGPR_3 // aligned base register. multiclass AVRegClass<int numRegs, list<ValueType> regTypes, dag vregList, dag aregList> { - let HasVGPR = 1, HasAGPR = 1 in { + let HasVGPR = 1, HasAGPR = 1, BaseClassPriority = 0 in { // Define the regular class. def "" : VRegClassBase<numRegs, regTypes, (add vregList, aregList)>; diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td index c812dc9..95fcd4a 100644 --- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td @@ -1236,6 +1236,12 @@ let isCommutable = 1, isReMaterializable = 1 in { defm V_PK_MIN_NUM_BF16 : VOP3PInst<"v_pk_min_num_bf16", VOP3P_Profile<VOP_V2BF16_V2BF16_V2BF16, VOP3_PACKED>, fminnum_like>; defm V_PK_MAX_NUM_BF16 : VOP3PInst<"v_pk_max_num_bf16", VOP3P_Profile<VOP_V2BF16_V2BF16_V2BF16, VOP3_PACKED>, fmaxnum_like>; defm V_PK_FMA_BF16 : VOP3PInst<"v_pk_fma_bf16", VOP3P_Profile<VOP_V2BF16_V2BF16_V2BF16_V2BF16, VOP3_PACKED>, any_fma>; + + // Scalar pseudo used to emulate AMDGPUClamp. + // Expanded to V_PK_MAX_NUM_BF16 with unused high half. + // FIXME-TRUE16: Pseudo expansion of this won't work with True16. + let True16Predicate = UseFakeTrue16Insts in + defm V_MAX_BF16_PSEUDO : VOP3Inst <"v_max_bf16", VOP_BF16_BF16_BF16>; } } // End isCommutable = 1, isReMaterializable = 1 diff --git a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index ec6f4e2..ece6c10 100644 --- a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -12327,7 +12327,7 @@ bool ARMAsmParser::parseDirectiveEven(SMLoc L) { } assert(Section && "must have section to emit alignment"); - if (Section->useCodeAlign()) + if (getContext().getAsmInfo()->useCodeAlign(*Section)) getStreamer().emitCodeAlignment(Align(2), &getSTI()); else getStreamer().emitValueToAlignment(Align(2)); @@ -12525,7 +12525,7 @@ bool ARMAsmParser::parseDirectiveAlign(SMLoc L) { // '.align' is target specifically handled to mean 2**2 byte alignment. const MCSection *Section = getStreamer().getCurrentSectionOnly(); assert(Section && "must have section to emit alignment"); - if (Section->useCodeAlign()) + if (getContext().getAsmInfo()->useCodeAlign(*Section)) getStreamer().emitCodeAlignment(Align(4), &getSTI(), 0); else getStreamer().emitValueToAlignment(Align(4), 0, 1, 0); diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp index f64a223..868556b 100644 --- a/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp +++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp @@ -1138,7 +1138,8 @@ void ARMTargetELFStreamer::finish() { MCContext &Ctx = getContext(); auto &Asm = getStreamer().getAssembler(); if (any_of(Asm, [](const MCSection &Sec) { - return cast<MCSectionELF>(Sec).getFlags() & ELF::SHF_ARM_PURECODE; + return static_cast<const MCSectionELF &>(Sec).getFlags() & + ELF::SHF_ARM_PURECODE; })) { auto *Text = static_cast<MCSectionELF *>(Ctx.getObjectFileInfo()->getTextSection()); diff --git a/llvm/lib/Target/AVR/AVRAsmPrinter.cpp b/llvm/lib/Target/AVR/AVRAsmPrinter.cpp index ad8aa571..0fb33cd 100644 --- a/llvm/lib/Target/AVR/AVRAsmPrinter.cpp +++ b/llvm/lib/Target/AVR/AVRAsmPrinter.cpp @@ -260,7 +260,7 @@ bool AVRAsmPrinter::doFinalization(Module &M) { continue; } - auto *Section = cast<MCSectionELF>(TLOF.SectionForGlobal(&GO, TM)); + auto *Section = static_cast<MCSectionELF *>(TLOF.SectionForGlobal(&GO, TM)); if (Section->getName().starts_with(".data")) NeedsCopyData = true; else if (Section->getName().starts_with(".rodata") && SubTM->hasLPM()) diff --git a/llvm/lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.cpp b/llvm/lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.cpp index c2c1bb4..0615ec9 100644 --- a/llvm/lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.cpp +++ b/llvm/lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.cpp @@ -24,8 +24,6 @@ AVRMCAsmInfo::AVRMCAsmInfo(const Triple &TT, const MCTargetOptions &Options) { CalleeSaveStackSlotSize = 2; CommentString = ";"; SeparatorString = "$"; - PrivateGlobalPrefix = ".L"; - PrivateLabelPrefix = ".L"; UsesELFSectionDirectiveForBSS = true; SupportsDebugInformation = true; } diff --git a/llvm/lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.h b/llvm/lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.h index fab2713..1915fa8 100644 --- a/llvm/lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.h +++ b/llvm/lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.h @@ -14,7 +14,7 @@ #define LLVM_AVR_ASM_INFO_H #include "MCTargetDesc/AVRMCExpr.h" -#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCAsmInfoELF.h" #include "llvm/MC/MCExpr.h" namespace llvm { @@ -22,7 +22,7 @@ namespace llvm { class Triple; /// Specifies the format of AVR assembly files. -class AVRMCAsmInfo : public MCAsmInfo { +class AVRMCAsmInfo : public MCAsmInfoELF { public: explicit AVRMCAsmInfo(const Triple &TT, const MCTargetOptions &Options); void printSpecifierExpr(raw_ostream &OS, diff --git a/llvm/lib/Target/BPF/BTFDebug.cpp b/llvm/lib/Target/BPF/BTFDebug.cpp index 1e29a0f..a87b9a2 100644 --- a/llvm/lib/Target/BPF/BTFDebug.cpp +++ b/llvm/lib/Target/BPF/BTFDebug.cpp @@ -1255,10 +1255,8 @@ void BTFDebug::beginFunctionImpl(const MachineFunction *MF) { FuncInfo.Label = FuncLabel; FuncInfo.TypeId = FuncTypeId; if (FuncLabel->isInSection()) { - MCSection &Section = FuncLabel->getSection(); - const MCSectionELF *SectionELF = dyn_cast<MCSectionELF>(&Section); - assert(SectionELF && "Null section for Function Label"); - SecNameOff = addString(SectionELF->getName()); + auto &Sec = static_cast<const MCSectionELF &>(FuncLabel->getSection()); + SecNameOff = addString(Sec.getName()); } else { SecNameOff = addString(".text"); } diff --git a/llvm/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp b/llvm/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp index 827e928..bb74f6a 100644 --- a/llvm/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp +++ b/llvm/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp @@ -54,11 +54,8 @@ unsigned BPFELFObjectWriter::getRelocType(const MCFixup &Fixup, const MCSymbol &Sym = *A; if (Sym.isDefined()) { - MCSection &Section = Sym.getSection(); - const MCSectionELF *SectionELF = dyn_cast<MCSectionELF>(&Section); - assert(SectionELF && "Null section for reloc symbol"); - - unsigned Flags = SectionELF->getFlags(); + auto &Section = static_cast<const MCSectionELF &>(Sym.getSection()); + unsigned Flags = Section.getFlags(); if (Sym.isTemporary()) { // .BTF.ext generates FK_Data_4 relocations for diff --git a/llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h b/llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h index 7b21684..63d6e6f 100644 --- a/llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h +++ b/llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h @@ -13,18 +13,19 @@ #ifndef LLVM_LIB_TARGET_BPF_MCTARGETDESC_BPFMCASMINFO_H #define LLVM_LIB_TARGET_BPF_MCTARGETDESC_BPFMCASMINFO_H -#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCAsmInfoELF.h" #include "llvm/TargetParser/Triple.h" namespace llvm { -class BPFMCAsmInfo : public MCAsmInfo { +class BPFMCAsmInfo : public MCAsmInfoELF { public: explicit BPFMCAsmInfo(const Triple &TT, const MCTargetOptions &Options) { if (TT.getArch() == Triple::bpfeb) IsLittleEndian = false; PrivateGlobalPrefix = ".L"; + PrivateLabelPrefix = "L"; WeakRefDirective = "\t.weak\t"; UsesELFSectionDirectiveForBSS = true; diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index e915a3c4..613cfb5 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -2385,19 +2385,9 @@ SDValue LoongArchTargetLowering::lowerBF16_TO_FP(SDValue Op, return Res; } -static bool isConstantOrUndef(const SDValue Op) { - if (Op->isUndef()) - return true; - if (isa<ConstantSDNode>(Op)) - return true; - if (isa<ConstantFPSDNode>(Op)) - return true; - return false; -} - -static bool isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode *Op) { +static bool isConstantBUILD_VECTOR(const BuildVectorSDNode *Op) { for (unsigned i = 0; i < Op->getNumOperands(); ++i) - if (isConstantOrUndef(Op->getOperand(i))) + if (isIntOrFPConstant(Op->getOperand(i))) return true; return false; } @@ -2505,20 +2495,23 @@ SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op, if (DAG.isSplatValue(Op, /*AllowUndefs=*/false)) return Op; - if (!isConstantOrUndefBUILD_VECTOR(Node)) { + if (!isConstantBUILD_VECTOR(Node)) { // Use INSERT_VECTOR_ELT operations rather than expand to stores. // The resulting code is the same length as the expansion, but it doesn't // use memory operations. - EVT ResTy = Node->getValueType(0); - assert(ResTy.isVector()); unsigned NumElts = ResTy.getVectorNumElements(); - SDValue Vector = - DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, ResTy, Node->getOperand(0)); + SDValue Op0 = Node->getOperand(0); + SDValue Vector = DAG.getUNDEF(ResTy); + + if (!Op0.isUndef()) + Vector = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, ResTy, Op0); for (unsigned i = 1; i < NumElts; ++i) { - Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector, - Node->getOperand(i), + SDValue Opi = Node->getOperand(i); + if (Opi.isUndef()) + continue; + Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector, Opi, DAG.getConstant(i, DL, Subtarget.getGRLenVT())); } return Vector; diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp index 8fa72bc..d9ea88c 100644 --- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp @@ -254,6 +254,7 @@ bool LoongArchAsmBackend::relaxAlign(MCFragment &F, unsigned &Size) { MCFixup Fixup = MCFixup::create(0, Expr, FirstLiteralRelocationKind + ELF::R_LARCH_ALIGN); F.setVarFixups({Fixup}); + F.setLinkerRelaxable(); F.getParent()->setLinkerRelaxable(); return true; } diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp b/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp index feb4eb3..d9680c7 100644 --- a/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp +++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp @@ -969,7 +969,7 @@ void MipsTargetELFStreamer::finish() { Align Alignment = Section.getAlign(); S.switchSection(&Section); - if (Section.useCodeAlign()) + if (getContext().getAsmInfo()->useCodeAlign(Section)) S.emitCodeAlignment(Alignment, &STI, Alignment.value()); else S.emitValueToAlignment(Alignment, 0, 1, Alignment.value()); diff --git a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp index 614b321..ce9cd12 100644 --- a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp +++ b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp @@ -15,8 +15,6 @@ using namespace llvm; -void NVPTXMCAsmInfo::anchor() {} - NVPTXMCAsmInfo::NVPTXMCAsmInfo(const Triple &TheTriple, const MCTargetOptions &Options) { if (TheTriple.getArch() == Triple::nvptx64) { diff --git a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h index 77c4dae..f071406 100644 --- a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h +++ b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h @@ -19,8 +19,6 @@ namespace llvm { class Triple; class NVPTXMCAsmInfo : public MCAsmInfo { - virtual void anchor(); - public: explicit NVPTXMCAsmInfo(const Triple &TheTriple, const MCTargetOptions &Options); diff --git a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXTargetStreamer.cpp b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXTargetStreamer.cpp index 9f91143..329e3b5 100644 --- a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXTargetStreamer.cpp +++ b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXTargetStreamer.cpp @@ -97,10 +97,7 @@ void NVPTXTargetStreamer::changeSection(const MCSection *CurSection, if (isDwarfSection(FI, Section)) { // Emit DWARF .file directives in the outermost scope. outputDwarfFileDirectives(); - OS << "\t.section"; - Section->printSwitchToSection(*getStreamer().getContext().getAsmInfo(), - getStreamer().getContext().getTargetTriple(), - OS, SubSection); + OS << "\t.section\t" << Section->getName() << '\n'; // DWARF sections are enclosed into braces - emit the open one. OS << "\t{\n"; HasSections = true; diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp index 8baf866..1af2f9c 100644 --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp @@ -220,8 +220,6 @@ bool PPCELFMCAsmInfo::evaluateAsRelocatableImpl(const MCSpecifierExpr &Expr, return evaluateAsRelocatable(Expr, Res, Asm); } -void PPCXCOFFMCAsmInfo::anchor() {} - PPCXCOFFMCAsmInfo::PPCXCOFFMCAsmInfo(bool Is64Bit, const Triple &T) { if (T.getArch() == Triple::ppc64le || T.getArch() == Triple::ppcle) report_fatal_error("XCOFF is not supported for little-endian targets"); diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h index 0f945b3..6af1bd7 100644 --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h @@ -33,8 +33,6 @@ public: }; class PPCXCOFFMCAsmInfo : public MCAsmInfoXCOFF { - void anchor() override; - public: explicit PPCXCOFFMCAsmInfo(bool is64Bit, const Triple &); void printSpecifierExpr(raw_ostream &OS, diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp index 54497d9..3dad0e8 100644 --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp @@ -213,7 +213,7 @@ public: void emitTCEntry(const MCSymbol &S, PPCMCExpr::Specifier Kind) override { if (const MCSymbolXCOFF *XSym = dyn_cast<MCSymbolXCOFF>(&S)) { MCSymbolXCOFF *TCSym = - cast<MCSectionXCOFF>(Streamer.getCurrentSectionOnly()) + static_cast<const MCSectionXCOFF *>(Streamer.getCurrentSectionOnly()) ->getQualNameSymbol(); // On AIX, we have TLS variable offsets (symbol@({gd|ie|le|ld}) depending // on the TLS access method (or model). For the general-dynamic access diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp index a091b21..ce1d51a 100644 --- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -2274,9 +2274,9 @@ void PPCAIXAsmPrinter::emitLinkage(const GlobalValue *GV, void PPCAIXAsmPrinter::SetupMachineFunction(MachineFunction &MF) { // Setup CurrentFnDescSym and its containing csect. - MCSectionXCOFF *FnDescSec = - cast<MCSectionXCOFF>(getObjFileLowering().getSectionForFunctionDescriptor( - &MF.getFunction(), TM)); + auto *FnDescSec = static_cast<MCSectionXCOFF *>( + getObjFileLowering().getSectionForFunctionDescriptor(&MF.getFunction(), + TM)); FnDescSec->setAlignment(Align(Subtarget->isPPC64() ? 8 : 4)); CurrentFnDescSym = FnDescSec->getQualNameSymbol(); @@ -2669,9 +2669,9 @@ void PPCAIXAsmPrinter::emitTracebackTable() { MCSymbol *EHInfoSym = TargetLoweringObjectFileXCOFF::getEHInfoTableSymbol(MF); MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(EHInfoSym, TOCType_EHBlock); - const MCSymbol *TOCBaseSym = - cast<MCSectionXCOFF>(getObjFileLowering().getTOCBaseSection()) - ->getQualNameSymbol(); + const MCSymbol *TOCBaseSym = static_cast<const MCSectionXCOFF *>( + getObjFileLowering().getTOCBaseSection()) + ->getQualNameSymbol(); const MCExpr *Exp = MCBinaryExpr::createSub(MCSymbolRefExpr::create(TOCEntry, Ctx), MCSymbolRefExpr::create(TOCBaseSym, Ctx), Ctx); @@ -2788,7 +2788,7 @@ void PPCAIXAsmPrinter::emitGlobalVariableHelper(const GlobalVariable *GV) { } } - MCSectionXCOFF *Csect = cast<MCSectionXCOFF>( + auto *Csect = static_cast<MCSectionXCOFF *>( getObjFileLowering().SectionForGlobal(GV, GVKind, TM)); // Switch to the containing csect. @@ -2869,9 +2869,9 @@ void PPCAIXAsmPrinter::emitFunctionDescriptor() { OutStreamer->emitValue(MCSymbolRefExpr::create(CurrentFnSym, OutContext), PointerSize); // Emit TOC base address. - const MCSymbol *TOCBaseSym = - cast<MCSectionXCOFF>(getObjFileLowering().getTOCBaseSection()) - ->getQualNameSymbol(); + const MCSymbol *TOCBaseSym = static_cast<const MCSectionXCOFF *>( + getObjFileLowering().getTOCBaseSection()) + ->getQualNameSymbol(); OutStreamer->emitValue(MCSymbolRefExpr::create(TOCBaseSym, OutContext), PointerSize); // Emit a null environment pointer. @@ -2996,10 +2996,10 @@ void PPCAIXAsmPrinter::emitEndOfAsmFile(Module &M) { Name += Prefix; Name += cast<MCSymbolXCOFF>(I.first.first)->getSymbolTableName(); MCSymbol *S = OutContext.getOrCreateSymbol(Name); - TCEntry = cast<MCSectionXCOFF>( + TCEntry = static_cast<MCSectionXCOFF *>( getObjFileLowering().getSectionForTOCEntry(S, TM)); } else { - TCEntry = cast<MCSectionXCOFF>( + TCEntry = static_cast<MCSectionXCOFF *>( getObjFileLowering().getSectionForTOCEntry(I.first.first, TM)); } OutStreamer->switchSection(TCEntry); @@ -3054,7 +3054,7 @@ bool PPCAIXAsmPrinter::doInitialization(Module &M) { return; SectionKind GOKind = getObjFileLowering().getKindForGlobal(GO, TM); - MCSectionXCOFF *Csect = cast<MCSectionXCOFF>( + auto *Csect = static_cast<MCSectionXCOFF *>( getObjFileLowering().SectionForGlobal(GO, GOKind, TM)); Align GOAlign = getGVAlignment(GO, GO->getDataLayout()); @@ -3316,9 +3316,9 @@ void PPCAIXAsmPrinter::emitTTypeReference(const GlobalValue *GV, GlobalType = TOCType_GlobalExternal; MCSymbol *TypeInfoSym = TM.getSymbol(GV); MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(TypeInfoSym, GlobalType); - const MCSymbol *TOCBaseSym = - cast<MCSectionXCOFF>(getObjFileLowering().getTOCBaseSection()) - ->getQualNameSymbol(); + const MCSymbol *TOCBaseSym = static_cast<const MCSectionXCOFF *>( + getObjFileLowering().getTOCBaseSection()) + ->getQualNameSymbol(); auto &Ctx = OutStreamer->getContext(); const MCExpr *Exp = MCBinaryExpr::createSub(MCSymbolRefExpr::create(TOCEntry, Ctx), diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp index a143d85..d71c42c 100644 --- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp +++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp @@ -3849,9 +3849,14 @@ bool RISCVAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc, switch (Inst.getOpcode()) { default: break; - case RISCV::PseudoC_ADDI_NOP: - emitToStreamer(Out, MCInstBuilder(RISCV::C_NOP)); + case RISCV::PseudoC_ADDI_NOP: { + if (Inst.getOperand(2).getImm() == 0) + emitToStreamer(Out, MCInstBuilder(RISCV::C_NOP)); + else + emitToStreamer( + Out, MCInstBuilder(RISCV::C_NOP_HINT).addOperand(Inst.getOperand(2))); return false; + } case RISCV::PseudoLLAImm: case RISCV::PseudoLAImm: case RISCV::PseudoLI: { diff --git a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp index fa7bcfa..5e54b82 100644 --- a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp +++ b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp @@ -193,21 +193,19 @@ static DecodeStatus DecodeFPR128RegisterClass(MCInst &Inst, uint32_t RegNo, static DecodeStatus DecodeGPRNoX0RegisterClass(MCInst &Inst, uint32_t RegNo, uint64_t Address, const MCDisassembler *Decoder) { - if (RegNo == 0) { + if (RegNo == 0) return MCDisassembler::Fail; - } return DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder); } -static DecodeStatus -DecodeGPRNoX0X2RegisterClass(MCInst &Inst, uint64_t RegNo, uint32_t Address, - const MCDisassembler *Decoder) { - if (RegNo == 2) { +static DecodeStatus DecodeGPRNoX2RegisterClass(MCInst &Inst, uint64_t RegNo, + uint32_t Address, + const MCDisassembler *Decoder) { + if (RegNo == 2) return MCDisassembler::Fail; - } - return DecodeGPRNoX0RegisterClass(Inst, RegNo, Address, Decoder); + return DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder); } static DecodeStatus DecodeGPRNoX31RegisterClass(MCInst &Inst, uint32_t RegNo, @@ -536,31 +534,6 @@ static DecodeStatus decodeRTZArg(MCInst &Inst, uint32_t Imm, int64_t Address, return MCDisassembler::Success; } -static DecodeStatus decodeRVCInstrRdRs1ImmZero(MCInst &Inst, uint32_t Insn, - uint64_t Address, - const MCDisassembler *Decoder); - -static DecodeStatus decodeRVCInstrRdSImm6(MCInst &Inst, uint32_t Insn, - uint64_t Address, - const MCDisassembler *Decoder); - -static DecodeStatus decodeRVCInstrRdCLUIImm(MCInst &Inst, uint32_t Insn, - uint64_t Address, - const MCDisassembler *Decoder); - -static DecodeStatus -decodeRVCInstrRdRs1UImmLog2XLenNonZero(MCInst &Inst, uint32_t Insn, - uint64_t Address, - const MCDisassembler *Decoder); - -static DecodeStatus decodeRVCInstrRdRs2(MCInst &Inst, uint32_t Insn, - uint64_t Address, - const MCDisassembler *Decoder); - -static DecodeStatus decodeRVCInstrRdRs1Rs2(MCInst &Inst, uint32_t Insn, - uint64_t Address, - const MCDisassembler *Decoder); - static DecodeStatus decodeXTHeadMemPair(MCInst &Inst, uint32_t Insn, uint64_t Address, const MCDisassembler *Decoder); @@ -579,18 +552,6 @@ static DecodeStatus decodeCSSPushPopchk(MCInst &Inst, uint32_t Insn, #include "RISCVGenDisassemblerTables.inc" -static DecodeStatus decodeRVCInstrRdRs1ImmZero(MCInst &Inst, uint32_t Insn, - uint64_t Address, - const MCDisassembler *Decoder) { - DecodeStatus S = MCDisassembler::Success; - uint32_t Rd = fieldFromInstruction(Insn, 7, 5); - if (!Check(S, DecodeGPRNoX0RegisterClass(Inst, Rd, Address, Decoder))) - return MCDisassembler::Fail; - Inst.addOperand(Inst.getOperand(0)); - Inst.addOperand(MCOperand::createImm(0)); - return S; -} - static DecodeStatus decodeCSSPushPopchk(MCInst &Inst, uint32_t Insn, uint64_t Address, const MCDisassembler *Decoder) { @@ -601,66 +562,6 @@ static DecodeStatus decodeCSSPushPopchk(MCInst &Inst, uint32_t Insn, return MCDisassembler::Success; } -static DecodeStatus decodeRVCInstrRdSImm6(MCInst &Inst, uint32_t Insn, - uint64_t Address, - const MCDisassembler *Decoder) { - Inst.addOperand(MCOperand::createReg(RISCV::X0)); - uint32_t Imm = - fieldFromInstruction(Insn, 12, 1) << 5 | fieldFromInstruction(Insn, 2, 5); - [[maybe_unused]] DecodeStatus Result = - decodeSImmOperand<6>(Inst, Imm, Address, Decoder); - assert(Result == MCDisassembler::Success && "Invalid immediate"); - return MCDisassembler::Success; -} - -static DecodeStatus decodeRVCInstrRdCLUIImm(MCInst &Inst, uint32_t Insn, - uint64_t Address, - const MCDisassembler *Decoder) { - Inst.addOperand(MCOperand::createReg(RISCV::X0)); - uint32_t Imm = - fieldFromInstruction(Insn, 12, 1) << 5 | fieldFromInstruction(Insn, 2, 5); - return decodeCLUIImmOperand(Inst, Imm, Address, Decoder); -} - -static DecodeStatus -decodeRVCInstrRdRs1UImmLog2XLenNonZero(MCInst &Inst, uint32_t Insn, - uint64_t Address, - const MCDisassembler *Decoder) { - Inst.addOperand(MCOperand::createReg(RISCV::X0)); - Inst.addOperand(Inst.getOperand(0)); - - uint32_t UImm6 = - fieldFromInstruction(Insn, 12, 1) << 5 | fieldFromInstruction(Insn, 2, 5); - return decodeUImmLog2XLenNonZeroOperand(Inst, UImm6, Address, Decoder); -} - -static DecodeStatus decodeRVCInstrRdRs2(MCInst &Inst, uint32_t Insn, - uint64_t Address, - const MCDisassembler *Decoder) { - DecodeStatus S = MCDisassembler::Success; - uint32_t Rd = fieldFromInstruction(Insn, 7, 5); - uint32_t Rs2 = fieldFromInstruction(Insn, 2, 5); - if (!Check(S, DecodeGPRRegisterClass(Inst, Rd, Address, Decoder))) - return MCDisassembler::Fail; - if (!Check(S, DecodeGPRRegisterClass(Inst, Rs2, Address, Decoder))) - return MCDisassembler::Fail; - return S; -} - -static DecodeStatus decodeRVCInstrRdRs1Rs2(MCInst &Inst, uint32_t Insn, - uint64_t Address, - const MCDisassembler *Decoder) { - DecodeStatus S = MCDisassembler::Success; - uint32_t Rd = fieldFromInstruction(Insn, 7, 5); - uint32_t Rs2 = fieldFromInstruction(Insn, 2, 5); - if (!Check(S, DecodeGPRRegisterClass(Inst, Rd, Address, Decoder))) - return MCDisassembler::Fail; - Inst.addOperand(Inst.getOperand(0)); - if (!Check(S, DecodeGPRRegisterClass(Inst, Rs2, Address, Decoder))) - return MCDisassembler::Fail; - return S; -} - static DecodeStatus decodeXTHeadMemPair(MCInst &Inst, uint32_t Insn, uint64_t Address, const MCDisassembler *Decoder) { diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp index 2c37c3b..82e3b5c 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp @@ -320,6 +320,7 @@ bool RISCVAsmBackend::relaxAlign(MCFragment &F, unsigned &Size) { MCFixup Fixup = MCFixup::create(0, Expr, FirstLiteralRelocationKind + ELF::R_RISCV_ALIGN); F.setVarFixups({Fixup}); + F.setLinkerRelaxable(); F.getParent()->setLinkerRelaxable(); return true; } diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h index 7ad5d5f..bddea43 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h @@ -330,7 +330,6 @@ enum OperandType : unsigned { OPERAND_UIMM32, OPERAND_UIMM48, OPERAND_UIMM64, - OPERAND_ZERO, OPERAND_THREE, OPERAND_FOUR, OPERAND_SIMM5, diff --git a/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp b/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp index d4f5d8f..2f32e2a 100644 --- a/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp +++ b/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp @@ -293,7 +293,7 @@ void RISCVAsmPrinter::emitNTLHint(const MachineInstr *MI) { MCInst Hint; if (STI->hasStdExtZca()) - Hint.setOpcode(RISCV::C_ADD_HINT); + Hint.setOpcode(RISCV::C_ADD); else Hint.setOpcode(RISCV::ADD); diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp index b1ab76a..9fc0d81 100644 --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -1581,7 +1581,8 @@ void RISCVFrameLowering::determineCalleeSaves(MachineFunction &MF, // Set the register and all its subregisters. if (!MRI.def_empty(CSReg) || MRI.getUsedPhysRegsMask().test(CSReg)) { SavedRegs.set(CSReg); - llvm::for_each(SubRegs, [&](unsigned Reg) { return SavedRegs.set(Reg); }); + for (unsigned Reg : SubRegs) + SavedRegs.set(Reg); } // Combine to super register if all of its subregisters are marked. diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index 64f9e3e..085064e 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -2859,9 +2859,6 @@ bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI, case RISCVOp::OPERAND_UIMM16_NONZERO: Ok = isUInt<16>(Imm) && (Imm != 0); break; - case RISCVOp::OPERAND_ZERO: - Ok = Imm == 0; - break; case RISCVOp::OPERAND_THREE: Ok = Imm == 3; break; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoC.td b/llvm/lib/Target/RISCV/RISCVInstrInfoC.td index 8252a9b..c5551fb 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoC.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoC.td @@ -57,12 +57,6 @@ def simm6nonzero : RISCVOp, }]; } -def immzero : RISCVOp, - ImmLeaf<XLenVT, [{return (Imm == 0);}]> { - let ParserMatchClass = ImmZeroAsmOperand; - let OperandType = "OPERAND_ZERO"; -} - def CLUIImmAsmOperand : AsmOperandClass { let Name = "CLUIImm"; let RenderMethod = "addImmOperands"; @@ -272,7 +266,7 @@ class Bcz<bits<3> funct3, string OpcodeStr> let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in class Shift_right<bits<2> funct2, string OpcodeStr> : RVInst16CB<0b100, 0b01, (outs GPRC:$rd), - (ins GPRC:$rs1, uimmlog2xlennonzero:$imm), + (ins GPRC:$rs1, uimmlog2xlen:$imm), OpcodeStr, "$rs1, $imm"> { let Constraints = "$rs1 = $rd"; let Inst{12} = imm{5}; @@ -402,17 +396,19 @@ def C_NOP : RVInst16CI<0b000, 0b01, (outs), (ins), "c.nop", "">, let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in def C_ADDI : RVInst16CI<0b000, 0b01, (outs GPRNoX0:$rd_wb), - (ins GPRNoX0:$rd, simm6nonzero:$imm), + (ins GPRNoX0:$rd, simm6:$imm), "c.addi", "$rd, $imm">, Sched<[WriteIALU, ReadIALU]> { let Constraints = "$rd = $rd_wb"; } -// Alternate syntax for c.nop. Converted to C_NOP by the assembler. +// Alternate syntax for c.nop. Converted to C_NOP/C_NOP_HINT by the assembler. let hasSideEffects = 0, mayLoad = 0, mayStore = 0, isCodeGenOnly = 0, isAsmParserOnly = 1 in -def PseudoC_ADDI_NOP : Pseudo<(outs GPRX0:$rd), (ins GPRX0:$rs1, immzero:$imm), - [], "c.addi", "$rd, $imm">; +def PseudoC_ADDI_NOP : Pseudo<(outs GPRX0:$rd), (ins GPRX0:$rs1, simm6:$imm), + [], "c.addi", "$rd, $imm"> { + let Constraints = "$rs1 = $rd"; +} let hasSideEffects = 0, mayLoad = 0, mayStore = 0, isCall = 1, DecoderNamespace = "RV32Only", Defs = [X1], @@ -430,7 +426,7 @@ def C_ADDIW : RVInst16CI<0b001, 0b01, (outs GPRNoX0:$rd_wb), } let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in -def C_LI : RVInst16CI<0b010, 0b01, (outs GPRNoX0:$rd), (ins simm6:$imm), +def C_LI : RVInst16CI<0b010, 0b01, (outs GPR:$rd), (ins simm6:$imm), "c.li", "$rd, $imm">, Sched<[WriteIALU]>; @@ -449,7 +445,7 @@ def C_ADDI16SP : RVInst16CI<0b011, 0b01, (outs SP:$rd_wb), } let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in -def C_LUI : RVInst16CI<0b011, 0b01, (outs GPRNoX0X2:$rd), +def C_LUI : RVInst16CI<0b011, 0b01, (outs GPRNoX2:$rd), (ins c_lui_imm:$imm), "c.lui", "$rd, $imm">, Sched<[WriteIALU]>; @@ -497,8 +493,8 @@ def C_BEQZ : Bcz<0b110, "c.beqz">, Sched<[WriteJmp, ReadJmp]>; def C_BNEZ : Bcz<0b111, "c.bnez">, Sched<[WriteJmp, ReadJmp]>; let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in -def C_SLLI : RVInst16CI<0b000, 0b10, (outs GPRNoX0:$rd_wb), - (ins GPRNoX0:$rd, uimmlog2xlennonzero:$imm), +def C_SLLI : RVInst16CI<0b000, 0b10, (outs GPR:$rd_wb), + (ins GPR:$rd, uimmlog2xlen:$imm), "c.slli", "$rd, $imm">, Sched<[WriteShiftImm, ReadShiftImm]> { let Constraints = "$rd = $rd_wb"; @@ -544,7 +540,7 @@ def C_JR : RVInst16CR<0b1000, 0b10, (outs), (ins GPRNoX0:$rs1), let hasSideEffects = 0, mayLoad = 0, mayStore = 0, isMoveReg = 1, isAsCheapAsAMove = 1 in -def C_MV : RVInst16CR<0b1000, 0b10, (outs GPRNoX0:$rs1), (ins GPRNoX0:$rs2), +def C_MV : RVInst16CR<0b1000, 0b10, (outs GPR:$rs1), (ins GPRNoX0:$rs2), "c.mv", "$rs1, $rs2">, Sched<[WriteIALU, ReadIALU]>; @@ -557,8 +553,8 @@ def C_JALR : RVInst16CR<0b1001, 0b10, (outs), (ins GPRNoX0:$rs1), "c.jalr", "$rs1">, Sched<[WriteJalr, ReadJalr]>; let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in -def C_ADD : RVInst16CR<0b1001, 0b10, (outs GPRNoX0:$rd), - (ins GPRNoX0:$rs1, GPRNoX0:$rs2), +def C_ADD : RVInst16CR<0b1001, 0b10, (outs GPR:$rd), + (ins GPR:$rs1, GPRNoX0:$rs2), "c.add", "$rs1, $rs2">, Sched<[WriteIALU, ReadIALU, ReadIALU]> { let Constraints = "$rs1 = $rd"; @@ -616,81 +612,6 @@ def C_NOP_HINT : RVInst16CI<0b000, 0b01, (outs), (ins simm6nonzero:$imm), let rd = 0; } -def C_ADDI_HINT_IMM_ZERO : RVInst16CI<0b000, 0b01, (outs GPRNoX0:$rd_wb), - (ins GPRNoX0:$rd, immzero:$imm), - "c.addi", "$rd, $imm">, - Sched<[WriteIALU, ReadIALU]> { - let Constraints = "$rd = $rd_wb"; - let imm = 0; - let DecoderMethod = "decodeRVCInstrRdRs1ImmZero"; -} - -def C_LI_HINT : RVInst16CI<0b010, 0b01, (outs GPRX0:$rd), (ins simm6:$imm), - "c.li", "$rd, $imm">, - Sched<[WriteIALU]> { - let Inst{11-7} = 0; - let DecoderMethod = "decodeRVCInstrRdSImm6"; -} - -def C_LUI_HINT : RVInst16CI<0b011, 0b01, (outs GPRX0:$rd), - (ins c_lui_imm:$imm), - "c.lui", "$rd, $imm">, - Sched<[WriteIALU]> { - let Inst{11-7} = 0; - let DecoderMethod = "decodeRVCInstrRdCLUIImm"; -} - -def C_MV_HINT : RVInst16CR<0b1000, 0b10, (outs GPRX0:$rs1), (ins GPRNoX0:$rs2), - "c.mv", "$rs1, $rs2">, Sched<[WriteIALU, ReadIALU]> { - let Inst{11-7} = 0; - let DecoderMethod = "decodeRVCInstrRdRs2"; -} - -def C_ADD_HINT : RVInst16CR<0b1001, 0b10, (outs GPRX0:$rd), - (ins GPRX0:$rs1, GPRNoX0:$rs2), - "c.add", "$rs1, $rs2">, - Sched<[WriteIALU, ReadIALU, ReadIALU]> { - let Constraints = "$rs1 = $rd"; - let Inst{11-7} = 0; - let DecoderMethod = "decodeRVCInstrRdRs1Rs2"; -} - -def C_SLLI_HINT : RVInst16CI<0b000, 0b10, (outs GPRX0:$rd_wb), - (ins GPRX0:$rd, uimmlog2xlennonzero:$imm), - "c.slli", "$rd, $imm">, - Sched<[WriteShiftImm, ReadShiftImm]> { - let Constraints = "$rd = $rd_wb"; - let Inst{11-7} = 0; - let DecoderMethod = "decodeRVCInstrRdRs1UImmLog2XLenNonZero"; -} - -def C_SLLI64_HINT : RVInst16CI<0b000, 0b10, (outs GPR:$rd_wb), (ins GPR:$rd), - "c.slli64", "$rd">, - Sched<[WriteShiftImm, ReadShiftImm]> { - let Constraints = "$rd = $rd_wb"; - let imm = 0; -} - -def C_SRLI64_HINT : RVInst16CB<0b100, 0b01, (outs GPRC:$rd), - (ins GPRC:$rs1), - "c.srli64", "$rs1">, - Sched<[WriteShiftImm, ReadShiftImm]> { - let Constraints = "$rs1 = $rd"; - let Inst{6-2} = 0; - let Inst{11-10} = 0b00; - let Inst{12} = 0; -} - -def C_SRAI64_HINT : RVInst16CB<0b100, 0b01, (outs GPRC:$rd), - (ins GPRC:$rs1), - "c.srai64", "$rs1">, - Sched<[WriteShiftImm, ReadShiftImm]> { - let Constraints = "$rs1 = $rd"; - let Inst{6-2} = 0; - let Inst{11-10} = 0b01; - let Inst{12} = 0; -} - } // Predicates = [HasStdExtZca], hasSideEffects = 0, mayLoad = 0, // mayStore = 0 @@ -699,15 +620,17 @@ def C_SRAI64_HINT : RVInst16CB<0b100, 0b01, (outs GPRC:$rd), //===----------------------------------------------------------------------===// let Predicates = [HasStdExtZca] in { -// Just a different syntax for the c.nop hint: c.addi x0, simm6 vs c.nop simm6. -def : InstAlias<"c.addi x0, $imm", (C_NOP_HINT simm6nonzero:$imm), 0>; +// Legacy aliases. +def : InstAlias<"c.slli64 $rd", (C_SLLI GPR:$rd, 0), 0>; +def : InstAlias<"c.srli64 $rs1", (C_SRLI GPRC:$rs1, 0), 0>; +def : InstAlias<"c.srai64 $rs1", (C_SRAI GPRC:$rs1, 0), 0>; } let Predicates = [HasStdExtC, HasStdExtZihintntl] in { -def : InstAlias<"c.ntl.p1", (C_ADD_HINT X0, X2)>; -def : InstAlias<"c.ntl.pall", (C_ADD_HINT X0, X3)>; -def : InstAlias<"c.ntl.s1", (C_ADD_HINT X0, X4)>; -def : InstAlias<"c.ntl.all", (C_ADD_HINT X0, X5)>; +def : InstAlias<"c.ntl.p1", (C_ADD X0, X2)>; +def : InstAlias<"c.ntl.pall", (C_ADD X0, X3)>; +def : InstAlias<"c.ntl.s1", (C_ADD X0, X4)>; +def : InstAlias<"c.ntl.all", (C_ADD X0, X5)>; } // Predicates = [HasStdExtC, HasStdExtZihintntl] let EmitPriority = 0 in { diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td index e87f452..ccb39e8 100644 --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td @@ -268,6 +268,11 @@ def GPRNoX0 : GPRRegisterClass<(sub GPR, X0)> { let DiagnosticString = "register must be a GPR excluding zero (x0)"; } +def GPRNoX2 : GPRRegisterClass<(sub GPR, X2)> { + let DiagnosticType = "InvalidRegClassGPRNoX2"; + let DiagnosticString = "register must be a GPR excluding sp (x2)"; +} + def GPRNoX0X2 : GPRRegisterClass<(sub GPR, X0, X2)> { let DiagnosticType = "InvalidRegClassGPRNoX0X2"; let DiagnosticString = "register must be a GPR excluding zero (x0) and sp (x2)"; diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index 1624f12a..fd634b5 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -979,12 +979,10 @@ InstructionCost RISCVTTIImpl::getInterleavedMemoryOpCost( Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond, bool UseMaskForGaps) const { - // The interleaved memory access pass will lower (de)interleave ops combined - // with an adjacent appropriate memory to vlseg/vsseg intrinsics. vlseg/vsseg - // only support masking per-iteration (i.e. condition), not per-segment (i.e. - // gap). - // TODO: Support masked interleaved access for fixed length vector. - if ((isa<ScalableVectorType>(VecTy) || !UseMaskForCond) && !UseMaskForGaps && + // The interleaved memory access pass will lower interleaved memory ops (i.e + // a load and store followed by a specific shuffle) to vlseg/vsseg + // intrinsics. + if (!UseMaskForCond && !UseMaskForGaps && Factor <= TLI->getMaxSupportedInterleaveFactor()) { auto *VTy = cast<VectorType>(VecTy); std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(VTy); diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h index 05d504c..d62d99c 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h @@ -398,10 +398,6 @@ public: bool enableInterleavedAccessVectorization() const override { return true; } - bool enableMaskedInterleavedAccessVectorization() const override { - return ST->hasVInstructions(); - } - unsigned getMinTripCountTailFoldingThreshold() const override; enum RISCVRegisterClass { GPRRC, FPRRC, VRRC }; diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZHLASMAsmStreamer.cpp b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZHLASMAsmStreamer.cpp index 3ef6030..72bb372 100644 --- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZHLASMAsmStreamer.cpp +++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZHLASMAsmStreamer.cpp @@ -69,8 +69,8 @@ void SystemZHLASMAsmStreamer::EmitEOL() { void SystemZHLASMAsmStreamer::changeSection(MCSection *Section, uint32_t Subsection) { - Section->printSwitchToSection(*MAI, getContext().getTargetTriple(), OS, - Subsection); + MAI->printSwitchToSection(*Section, Subsection, + getContext().getTargetTriple(), OS); MCStreamer::changeSection(Section, Subsection); } diff --git a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp index 19c9e9c..6ae69a4 100644 --- a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp +++ b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp @@ -900,7 +900,8 @@ public: bool checkDataSection() { if (CurrentState != DataSection) { - auto *WS = cast<MCSectionWasm>(getStreamer().getCurrentSectionOnly()); + auto *WS = static_cast<const MCSectionWasm *>( + getStreamer().getCurrentSectionOnly()); if (WS && WS->isText()) return error("data directive must occur in a data segment: ", Lexer.getTok()); @@ -1218,7 +1219,8 @@ public: void doBeforeLabelEmit(MCSymbol *Symbol, SMLoc IDLoc) override { // Code below only applies to labels in text sections. - auto *CWS = cast<MCSectionWasm>(getStreamer().getCurrentSectionOnly()); + auto *CWS = static_cast<const MCSectionWasm *>( + getStreamer().getCurrentSectionOnly()); if (!CWS->isText()) return; diff --git a/llvm/lib/Target/WebAssembly/WebAssembly.td b/llvm/lib/Target/WebAssembly/WebAssembly.td index 13603f8..a606209 100644 --- a/llvm/lib/Target/WebAssembly/WebAssembly.td +++ b/llvm/lib/Target/WebAssembly/WebAssembly.td @@ -71,6 +71,7 @@ def FeatureReferenceTypes : SubtargetFeature<"reference-types", "HasReferenceTypes", "true", "Enable reference types">; +def FeatureGC : SubtargetFeature<"gc", "HasGC", "true", "Enable wasm gc">; def FeatureRelaxedSIMD : SubtargetFeature<"relaxed-simd", "SIMDLevel", "RelaxedSIMD", "Enable relaxed-simd instructions">; @@ -136,13 +137,13 @@ def : ProcessorModel<"lime1", NoSchedModel, // Latest and greatest experimental version of WebAssembly. Bugs included! def : ProcessorModel<"bleeding-edge", NoSchedModel, - [FeatureAtomics, FeatureBulkMemory, FeatureBulkMemoryOpt, - FeatureCallIndirectOverlong, FeatureExceptionHandling, - FeatureExtendedConst, FeatureFP16, FeatureMultiMemory, - FeatureMultivalue, FeatureMutableGlobals, - FeatureNontrappingFPToInt, FeatureRelaxedSIMD, - FeatureReferenceTypes, FeatureSIMD128, FeatureSignExt, - FeatureTailCall]>; + [FeatureAtomics, FeatureBulkMemory, FeatureBulkMemoryOpt, + FeatureCallIndirectOverlong, FeatureExceptionHandling, + FeatureExtendedConst, FeatureFP16, FeatureMultiMemory, + FeatureMultivalue, FeatureMutableGlobals, + FeatureNontrappingFPToInt, FeatureRelaxedSIMD, + FeatureReferenceTypes, FeatureGC, FeatureSIMD128, + FeatureSignExt, FeatureTailCall]>; //===----------------------------------------------------------------------===// // Target Declaration diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index 11936a3..cd434f7 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -288,7 +288,7 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering( // Expand float operations supported for scalars but not SIMD for (auto Op : {ISD::FCOPYSIGN, ISD::FLOG, ISD::FLOG2, ISD::FLOG10, - ISD::FEXP, ISD::FEXP2}) + ISD::FEXP, ISD::FEXP2, ISD::FEXP10}) for (auto T : {MVT::v4f32, MVT::v2f64}) setOperationAction(Op, T, Expand); diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td index b5e723e..2b632fd 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td @@ -76,6 +76,9 @@ def HasReferenceTypes : Predicate<"Subtarget->hasReferenceTypes()">, AssemblerPredicate<(all_of FeatureReferenceTypes), "reference-types">; +def HasGC : Predicate<"Subtarget->hasGC()">, + AssemblerPredicate<(all_of FeatureGC), "gc">; + def HasRelaxedSIMD : Predicate<"Subtarget->hasRelaxedSIMD()">, AssemblerPredicate<(all_of FeatureRelaxedSIMD), "relaxed-simd">; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrRef.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrRef.td index 40b87a0..fc82e5b 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrRef.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrRef.td @@ -36,13 +36,10 @@ multiclass REF_I<WebAssemblyRegClass rc, ValueType vt, string ht> { Requires<[HasReferenceTypes]>; } -defm REF_TEST_FUNCREF : - I<(outs I32: $res), - (ins TypeIndex:$type, FUNCREF: $ref), - (outs), - (ins TypeIndex:$type), - [], - "ref.test\t$type, $ref", "ref.test $type", 0xfb14>; +defm REF_TEST_FUNCREF : I<(outs I32:$res), (ins TypeIndex:$type, FUNCREF:$ref), + (outs), (ins TypeIndex:$type), [], + "ref.test\t$type, $ref", "ref.test $type", 0xfb14>, + Requires<[HasGC]>; defm "" : REF_I<FUNCREF, funcref, "func">; defm "" : REF_I<EXTERNREF, externref, "extern">; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp b/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp index 40ea48a..a3ce40f 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp @@ -43,6 +43,11 @@ WebAssemblySubtarget::initializeSubtargetDependencies(StringRef CPU, Bits.set(WebAssembly::FeatureBulkMemoryOpt); } + // gc implies reference-types + if (HasGC) { + HasReferenceTypes = true; + } + // reference-types implies call-indirect-overlong if (HasReferenceTypes) { HasCallIndirectOverlong = true; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h b/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h index 591ce256..f814274 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h @@ -51,6 +51,7 @@ class WebAssemblySubtarget final : public WebAssemblyGenSubtargetInfo { bool HasMutableGlobals = false; bool HasNontrappingFPToInt = false; bool HasReferenceTypes = false; + bool HasGC = false; bool HasSignExt = false; bool HasTailCall = false; bool HasWideArithmetic = false; @@ -107,6 +108,7 @@ public: bool hasMutableGlobals() const { return HasMutableGlobals; } bool hasNontrappingFPToInt() const { return HasNontrappingFPToInt; } bool hasReferenceTypes() const { return HasReferenceTypes; } + bool hasGC() const { return HasGC; } bool hasRelaxedSIMD() const { return SIMDLevel >= RelaxedSIMD; } bool hasSignExt() const { return HasSignExt; } bool hasSIMD128() const { return SIMDLevel >= SIMD128; } diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp index 8213e51..d7671ed 100644 --- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -4803,7 +4803,7 @@ bool X86AsmParser::parseDirectiveEven(SMLoc L) { getStreamer().initSections(false, getSTI()); Section = getStreamer().getCurrentSectionOnly(); } - if (Section->useCodeAlign()) + if (getContext().getAsmInfo()->useCodeAlign(*Section)) getStreamer().emitCodeAlignment(Align(2), &getSTI(), 0); else getStreamer().emitValueToAlignment(Align(2), 0, 1, 0); diff --git a/llvm/lib/Target/X86/X86PassRegistry.def b/llvm/lib/Target/X86/X86PassRegistry.def index 620526ff..3f2a433 100644 --- a/llvm/lib/Target/X86/X86PassRegistry.def +++ b/llvm/lib/Target/X86/X86PassRegistry.def @@ -12,8 +12,52 @@ // NOTE: NO INCLUDE GUARD DESIRED! +#ifndef DUMMY_FUNCTION_PASS +#define DUMMY_FUNCTION_PASS(NAME, CREATE_PASS) +#endif +DUMMY_FUNCTION_PASS("lower-amx-intrinsics", X86LowerAMXIntrinsics(*this)) +DUMMY_FUNCTION_PASS("lower-amx-type", X86LowerAMXTypePass(*this)) +DUMMY_FUNCTION_PASS("x86-partial-reduction", X86PartialReduction()) +DUMMY_FUNCTION_PASS("x86-winehstate", WinEHStatePass()) +#undef DUMMY_FUNCTION_PASS + #ifndef MACHINE_FUNCTION_PASS #define MACHINE_FUNCTION_PASS(NAME, CREATE_PASS) #endif MACHINE_FUNCTION_PASS("x86-isel", X86ISelDAGToDAGPass(*this)) #undef MACHINE_FUNCTION_PASS + +#ifndef DUMMY_MACHINE_FUNCTION_PASS +#define DUMMY_MACHINE_FUNCTION_PASS(NAME, PASS_NAME) +#endif +DUMMY_MACHINE_FUNCTION_PASS("x86-avoid-SFB", X86AvoidSFBPass()) +DUMMY_MACHINE_FUNCTION_PASS("x86-avoid-trailing-call", X86AvoidTrailingCallPass()) +DUMMY_MACHINE_FUNCTION_PASS("x86-cf-opt", X86CallFrameOptimization()) +DUMMY_MACHINE_FUNCTION_PASS("x86-cmov-conversion", X86CmovConverterPass()) +DUMMY_MACHINE_FUNCTION_PASS("x86-codege", FPS()) +DUMMY_MACHINE_FUNCTION_PASS("x86-compress-evex", CompressEVEXPass()) +DUMMY_MACHINE_FUNCTION_PASS("x86-domain-reassignment", X86DomainReassignment()) +DUMMY_MACHINE_FUNCTION_PASS("x86-dyn-alloca-expander", X86DynAllocaExpander()) +DUMMY_MACHINE_FUNCTION_PASS("x86-execution-domain-fix", X86ExecutionDomainFix()) +DUMMY_MACHINE_FUNCTION_PASS("fastpretileconfig", X86FastPreTileConfig()) +DUMMY_MACHINE_FUNCTION_PASS("fasttileconfig", X86FastTileConfig()) +DUMMY_MACHINE_FUNCTION_PASS("x86-fixup-LEAs", FixupLEAPass()) +DUMMY_MACHINE_FUNCTION_PASS("x86-fixup-bw-inst", FixupBWInstPass()) +DUMMY_MACHINE_FUNCTION_PASS("x86-fixup-inst-tuning", X86FixupInstTuningPass()) +DUMMY_MACHINE_FUNCTION_PASS("x86-fixup-setcc", X86FixupSetCCPass()) +DUMMY_MACHINE_FUNCTION_PASS("x86-fixup-vector-constants", X86FixupVectorConstantsPass()) +DUMMY_MACHINE_FUNCTION_PASS("x86-flags-copy-lowering", X86FlagsCopyLoweringPass()) +DUMMY_MACHINE_FUNCTION_PASS("x86-lower-tile-copy", X86LowerTileCopy()) +DUMMY_MACHINE_FUNCTION_PASS("x86-lvi-load", X86LoadValueInjectionLoadHardeningPass()) +DUMMY_MACHINE_FUNCTION_PASS("x86-lvi-ret", X86LoadValueInjectionRetHardeningPass()) +DUMMY_MACHINE_FUNCTION_PASS("x86-optimize-LEAs", X86OptimizeLEAPass()) +DUMMY_MACHINE_FUNCTION_PASS("x86-pseudo", X86ExpandPseudo()) +DUMMY_MACHINE_FUNCTION_PASS("x86-return-thunks", X86ReturnThunks()) +DUMMY_MACHINE_FUNCTION_PASS("x86-seses", X86SpeculativeExecutionSideEffectSuppression()) +DUMMY_MACHINE_FUNCTION_PASS("x86-slh", X86SpeculativeLoadHardeningPass()) +DUMMY_MACHINE_FUNCTION_PASS("x86-suppress-apx-for-relocation", X86SuppressAPXForRelocationPass()) +DUMMY_MACHINE_FUNCTION_PASS("tile-pre-config", X86PreTileConfig()) +DUMMY_MACHINE_FUNCTION_PASS("tileconfig", X86TileConfig()) +DUMMY_MACHINE_FUNCTION_PASS("x86-wineh-unwindv2", X86WinEHUnwindV2()) +DUMMY_MACHINE_FUNCTION_PASS("x86argumentstackrebase", X86ArgumentStackSlotPass()) +#undef DUMMY_MACHINE_FUNCTION_PASS diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index 37a7b37..90791fc 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -1838,14 +1838,15 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, return LT.first * *KindCost; static const CostKindTblEntry AVX512BWShuffleTbl[] = { - { TTI::SK_Broadcast, MVT::v32i16, { 1, 1, 1, 1 } }, // vpbroadcastw - { TTI::SK_Broadcast, MVT::v32f16, { 1, 1, 1, 1 } }, // vpbroadcastw - { TTI::SK_Broadcast, MVT::v64i8, { 1, 1, 1, 1 } }, // vpbroadcastb + { TTI::SK_Broadcast, MVT::v32i16, { 1, 3, 1, 1 } }, // vpbroadcastw + { TTI::SK_Broadcast, MVT::v32f16, { 1, 3, 1, 1 } }, // vpbroadcastw + { TTI::SK_Broadcast, MVT::v64i8, { 1, 3, 1, 1 } }, // vpbroadcastb - { TTI::SK_Reverse, MVT::v32i16, { 2, 2, 2, 2 } }, // vpermw - { TTI::SK_Reverse, MVT::v32f16, { 2, 2, 2, 2 } }, // vpermw + { TTI::SK_Reverse, MVT::v32i16, { 2, 6, 2, 4 } }, // vpermw + { TTI::SK_Reverse, MVT::v32f16, { 2, 6, 2, 4 } }, // vpermw { TTI::SK_Reverse, MVT::v16i16, { 2, 2, 2, 2 } }, // vpermw - { TTI::SK_Reverse, MVT::v64i8, { 2, 2, 2, 2 } }, // pshufb + vshufi64x2 + { TTI::SK_Reverse, MVT::v16f16, { 2, 2, 2, 2 } }, // vpermw + { TTI::SK_Reverse, MVT::v64i8, { 2, 9, 2, 3 } }, // pshufb + vshufi64x2 { TTI::SK_PermuteSingleSrc, MVT::v32i16, { 2, 2, 2, 2 } }, // vpermw { TTI::SK_PermuteSingleSrc, MVT::v32f16, { 2, 2, 2, 2 } }, // vpermw @@ -1874,18 +1875,25 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, return LT.first * *KindCost; static const CostKindTblEntry AVX512ShuffleTbl[] = { - {TTI::SK_Broadcast, MVT::v8f64, { 1, 1, 1, 1 } }, // vbroadcastsd - {TTI::SK_Broadcast, MVT::v16f32, { 1, 1, 1, 1 } }, // vbroadcastss - {TTI::SK_Broadcast, MVT::v8i64, { 1, 1, 1, 1 } }, // vpbroadcastq - {TTI::SK_Broadcast, MVT::v16i32, { 1, 1, 1, 1 } }, // vpbroadcastd - {TTI::SK_Broadcast, MVT::v32i16, { 1, 1, 1, 1 } }, // vpbroadcastw - {TTI::SK_Broadcast, MVT::v32f16, { 1, 1, 1, 1 } }, // vpbroadcastw - {TTI::SK_Broadcast, MVT::v64i8, { 1, 1, 1, 1 } }, // vpbroadcastb - - {TTI::SK_Reverse, MVT::v8f64, { 1, 3, 1, 1 } }, // vpermpd - {TTI::SK_Reverse, MVT::v16f32, { 1, 3, 1, 1 } }, // vpermps - {TTI::SK_Reverse, MVT::v8i64, { 1, 3, 1, 1 } }, // vpermq - {TTI::SK_Reverse, MVT::v16i32, { 1, 3, 1, 1 } }, // vpermd + {TTI::SK_Broadcast, MVT::v8f64, { 1, 3, 1, 1 } }, // vbroadcastsd + {TTI::SK_Broadcast, MVT::v4f64, { 1, 3, 1, 1 } }, // vbroadcastsd + {TTI::SK_Broadcast, MVT::v16f32, { 1, 3, 1, 1 } }, // vbroadcastss + {TTI::SK_Broadcast, MVT::v8f32, { 1, 3, 1, 1 } }, // vbroadcastss + {TTI::SK_Broadcast, MVT::v8i64, { 1, 3, 1, 1 } }, // vpbroadcastq + {TTI::SK_Broadcast, MVT::v4i64, { 1, 3, 1, 1 } }, // vpbroadcastq + {TTI::SK_Broadcast, MVT::v16i32, { 1, 3, 1, 1 } }, // vpbroadcastd + {TTI::SK_Broadcast, MVT::v8i32, { 1, 3, 1, 1 } }, // vpbroadcastd + {TTI::SK_Broadcast, MVT::v32i16, { 1, 3, 1, 1 } }, // vpbroadcastw + {TTI::SK_Broadcast, MVT::v16i16, { 1, 3, 1, 1 } }, // vpbroadcastw + {TTI::SK_Broadcast, MVT::v32f16, { 1, 3, 1, 1 } }, // vpbroadcastw + {TTI::SK_Broadcast, MVT::v16f16, { 1, 3, 1, 1 } }, // vpbroadcastw + {TTI::SK_Broadcast, MVT::v64i8, { 1, 3, 1, 1 } }, // vpbroadcastb + {TTI::SK_Broadcast, MVT::v32i8, { 1, 3, 1, 1 }}, // vpbroadcastb + + {TTI::SK_Reverse, MVT::v8f64, { 1, 5, 2, 3 } }, // vpermpd + {TTI::SK_Reverse, MVT::v16f32, { 1, 3, 2, 3 } }, // vpermps + {TTI::SK_Reverse, MVT::v8i64, { 1, 5, 2, 3 } }, // vpermq + {TTI::SK_Reverse, MVT::v16i32, { 1, 3, 2, 3 } }, // vpermd {TTI::SK_Reverse, MVT::v32i16, { 7, 7, 7, 7 } }, // per mca {TTI::SK_Reverse, MVT::v32f16, { 7, 7, 7, 7 } }, // per mca {TTI::SK_Reverse, MVT::v64i8, { 7, 7, 7, 7 } }, // per mca @@ -1973,21 +1981,24 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, return LT.first * *KindCost; static const CostKindTblEntry AVX2ShuffleTbl[] = { - { TTI::SK_Broadcast, MVT::v4f64, { 1, 1, 1, 1 } }, // vbroadcastpd - { TTI::SK_Broadcast, MVT::v8f32, { 1, 1, 1, 1 } }, // vbroadcastps - { TTI::SK_Broadcast, MVT::v4i64, { 1, 1, 1, 1 } }, // vpbroadcastq - { TTI::SK_Broadcast, MVT::v8i32, { 1, 1, 1, 1 } }, // vpbroadcastd - { TTI::SK_Broadcast, MVT::v16i16, { 1, 1, 1, 1 } }, // vpbroadcastw - { TTI::SK_Broadcast, MVT::v16f16, { 1, 1, 1, 1 } }, // vpbroadcastw - { TTI::SK_Broadcast, MVT::v32i8, { 1, 1, 1, 1 } }, // vpbroadcastb - - { TTI::SK_Reverse, MVT::v4f64, { 1, 1, 1, 1 } }, // vpermpd - { TTI::SK_Reverse, MVT::v8f32, { 1, 1, 1, 1 } }, // vpermps - { TTI::SK_Reverse, MVT::v4i64, { 1, 1, 1, 1 } }, // vpermq - { TTI::SK_Reverse, MVT::v8i32, { 1, 1, 1, 1 } }, // vpermd - { TTI::SK_Reverse, MVT::v16i16, { 2, 2, 2, 2 } }, // vperm2i128 + pshufb - { TTI::SK_Reverse, MVT::v16f16, { 2, 2, 2, 2 } }, // vperm2i128 + pshufb - { TTI::SK_Reverse, MVT::v32i8, { 2, 2, 2, 2 } }, // vperm2i128 + pshufb + { TTI::SK_Broadcast, MVT::v4f64, { 1, 3, 1, 2 } }, // vbroadcastpd + { TTI::SK_Broadcast, MVT::v8f32, { 1, 3, 1, 2 } }, // vbroadcastps + { TTI::SK_Broadcast, MVT::v4i64, { 1, 3, 1, 2 } }, // vpbroadcastq + { TTI::SK_Broadcast, MVT::v8i32, { 1, 3, 1, 2 } }, // vpbroadcastd + { TTI::SK_Broadcast, MVT::v16i16, { 1, 3, 1, 2 } }, // vpbroadcastw + { TTI::SK_Broadcast, MVT::v8i16, { 1, 3, 1, 1 } }, // vpbroadcastw + { TTI::SK_Broadcast, MVT::v16f16, { 1, 3, 1, 2 } }, // vpbroadcastw + { TTI::SK_Broadcast, MVT::v8f16, { 1, 3, 1, 1 } }, // vpbroadcastw + { TTI::SK_Broadcast, MVT::v32i8, { 1, 3, 1, 2 } }, // vpbroadcastb + { TTI::SK_Broadcast, MVT::v16i8, { 1, 3, 1, 1 } }, // vpbroadcastb + + { TTI::SK_Reverse, MVT::v4f64, { 1, 6, 1, 2 } }, // vpermpd + { TTI::SK_Reverse, MVT::v8f32, { 2, 7, 2, 4 } }, // vpermps + { TTI::SK_Reverse, MVT::v4i64, { 1, 6, 1, 2 } }, // vpermq + { TTI::SK_Reverse, MVT::v8i32, { 2, 7, 2, 4 } }, // vpermd + { TTI::SK_Reverse, MVT::v16i16, { 2, 9, 2, 4 } }, // vperm2i128 + pshufb + { TTI::SK_Reverse, MVT::v16f16, { 2, 9, 2, 4 } }, // vperm2i128 + pshufb + { TTI::SK_Reverse, MVT::v32i8, { 2, 9, 2, 4 } }, // vperm2i128 + pshufb { TTI::SK_Select, MVT::v16i16, { 1, 1, 1, 1 } }, // vpblendvb { TTI::SK_Select, MVT::v16f16, { 1, 1, 1, 1 } }, // vpblendvb @@ -2077,23 +2088,23 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, return LT.first * *KindCost; static const CostKindTblEntry AVX1ShuffleTbl[] = { - {TTI::SK_Broadcast, MVT::v4f64, {2,2,2,2}}, // vperm2f128 + vpermilpd - {TTI::SK_Broadcast, MVT::v8f32, {2,2,2,2}}, // vperm2f128 + vpermilps - {TTI::SK_Broadcast, MVT::v4i64, {2,2,2,2}}, // vperm2f128 + vpermilpd - {TTI::SK_Broadcast, MVT::v8i32, {2,2,2,2}}, // vperm2f128 + vpermilps - {TTI::SK_Broadcast, MVT::v16i16, {3,3,3,3}}, // vpshuflw + vpshufd + vinsertf128 - {TTI::SK_Broadcast, MVT::v16f16, {3,3,3,3}}, // vpshuflw + vpshufd + vinsertf128 - {TTI::SK_Broadcast, MVT::v32i8, {2,2,2,2}}, // vpshufb + vinsertf128 - - {TTI::SK_Reverse, MVT::v4f64, {2,2,2,2}}, // vperm2f128 + vpermilpd - {TTI::SK_Reverse, MVT::v8f32, {2,2,2,2}}, // vperm2f128 + vpermilps - {TTI::SK_Reverse, MVT::v4i64, {2,2,2,2}}, // vperm2f128 + vpermilpd - {TTI::SK_Reverse, MVT::v8i32, {2,2,2,2}}, // vperm2f128 + vpermilps - {TTI::SK_Reverse, MVT::v16i16, {4,4,4,4}}, // vextractf128 + 2*pshufb + {TTI::SK_Broadcast, MVT::v4f64, {2,3,2,3}}, // vperm2f128 + vpermilpd + {TTI::SK_Broadcast, MVT::v8f32, {2,3,2,3}}, // vperm2f128 + vpermilps + {TTI::SK_Broadcast, MVT::v4i64, {2,3,2,3}}, // vperm2f128 + vpermilpd + {TTI::SK_Broadcast, MVT::v8i32, {2,3,2,3}}, // vperm2f128 + vpermilps + {TTI::SK_Broadcast, MVT::v16i16, {2,3,3,4}}, // vpshuflw + vpshufd + vinsertf128 + {TTI::SK_Broadcast, MVT::v16f16, {2,3,3,4}}, // vpshuflw + vpshufd + vinsertf128 + {TTI::SK_Broadcast, MVT::v32i8, {3,4,3,6}}, // vpshufb + vinsertf128 + + {TTI::SK_Reverse, MVT::v4f64, {2,6,2,2}}, // vperm2f128 + vpermilpd + {TTI::SK_Reverse, MVT::v8f32, {2,7,2,4}}, // vperm2f128 + vpermilps + {TTI::SK_Reverse, MVT::v4i64, {2,6,2,2}}, // vperm2f128 + vpermilpd + {TTI::SK_Reverse, MVT::v8i32, {2,7,2,4}}, // vperm2f128 + vpermilps + {TTI::SK_Reverse, MVT::v16i16, {2,9,5,5}}, // vextractf128 + 2*pshufb // + vinsertf128 - {TTI::SK_Reverse, MVT::v16f16, {4,4,4,4}}, // vextractf128 + 2*pshufb + {TTI::SK_Reverse, MVT::v16f16, {2,9,5,5}}, // vextractf128 + 2*pshufb // + vinsertf128 - {TTI::SK_Reverse, MVT::v32i8, {4,4,4,4}}, // vextractf128 + 2*pshufb + {TTI::SK_Reverse, MVT::v32i8, {2,9,5,5}}, // vextractf128 + 2*pshufb // + vinsertf128 {TTI::SK_Select, MVT::v4i64, {1,1,1,1}}, // vblendpd @@ -2156,13 +2167,13 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, return LT.first * *KindCost; static const CostKindTblEntry SSSE3ShuffleTbl[] = { - {TTI::SK_Broadcast, MVT::v8i16, {1, 1, 1, 1}}, // pshufb - {TTI::SK_Broadcast, MVT::v8f16, {1, 1, 1, 1}}, // pshufb - {TTI::SK_Broadcast, MVT::v16i8, {1, 1, 1, 1}}, // pshufb + {TTI::SK_Broadcast, MVT::v8i16, {1, 3, 2, 2}}, // pshufb + {TTI::SK_Broadcast, MVT::v8f16, {1, 3, 2, 2}}, // pshufb + {TTI::SK_Broadcast, MVT::v16i8, {1, 3, 2, 2}}, // pshufb - {TTI::SK_Reverse, MVT::v8i16, {1, 1, 1, 1}}, // pshufb - {TTI::SK_Reverse, MVT::v8f16, {1, 1, 1, 1}}, // pshufb - {TTI::SK_Reverse, MVT::v16i8, {1, 1, 1, 1}}, // pshufb + {TTI::SK_Reverse, MVT::v8i16, {1, 2, 1, 2}}, // pshufb + {TTI::SK_Reverse, MVT::v8f16, {1, 2, 1, 2}}, // pshufb + {TTI::SK_Reverse, MVT::v16i8, {1, 2, 1, 2}}, // pshufb {TTI::SK_Select, MVT::v8i16, {3, 3, 3, 3}}, // 2*pshufb + por {TTI::SK_Select, MVT::v8f16, {3, 3, 3, 3}}, // 2*pshufb + por @@ -2192,16 +2203,16 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, {TTI::SK_Broadcast, MVT::v2f64, {1, 1, 1, 1}}, // shufpd {TTI::SK_Broadcast, MVT::v2i64, {1, 1, 1, 1}}, // pshufd {TTI::SK_Broadcast, MVT::v4i32, {1, 1, 1, 1}}, // pshufd - {TTI::SK_Broadcast, MVT::v8i16, {2, 2, 2, 2}}, // pshuflw + pshufd - {TTI::SK_Broadcast, MVT::v8f16, {2, 2, 2, 2}}, // pshuflw + pshufd - {TTI::SK_Broadcast, MVT::v16i8, {3, 3, 3, 3}}, // unpck + pshuflw + pshufd + {TTI::SK_Broadcast, MVT::v8i16, {1, 2, 2, 2}}, // pshuflw + pshufd + {TTI::SK_Broadcast, MVT::v8f16, {1, 2, 2, 2}}, // pshuflw + pshufd + {TTI::SK_Broadcast, MVT::v16i8, {2, 3, 3, 4}}, // unpck + pshuflw + pshufd {TTI::SK_Reverse, MVT::v2f64, {1, 1, 1, 1}}, // shufpd {TTI::SK_Reverse, MVT::v2i64, {1, 1, 1, 1}}, // pshufd {TTI::SK_Reverse, MVT::v4i32, {1, 1, 1, 1}}, // pshufd - {TTI::SK_Reverse, MVT::v8i16, {3, 3, 3, 3}}, // pshuflw + pshufhw + pshufd - {TTI::SK_Reverse, MVT::v8f16, {3, 3, 3, 3}}, // pshuflw + pshufhw + pshufd - {TTI::SK_Reverse, MVT::v16i8, {9, 9, 9, 9}}, // 2*pshuflw + 2*pshufhw + {TTI::SK_Reverse, MVT::v8i16, {2, 3, 3, 3}}, // pshuflw + pshufhw + pshufd + {TTI::SK_Reverse, MVT::v8f16, {2, 3, 3, 3}}, // pshuflw + pshufhw + pshufd + {TTI::SK_Reverse, MVT::v16i8, {5, 6,11,11}}, // 2*pshuflw + 2*pshufhw // + 2*pshufd + 2*unpck + packus {TTI::SK_Select, MVT::v2i64, {1, 1, 1, 1}}, // movsd diff --git a/llvm/lib/TargetParser/RISCVISAInfo.cpp b/llvm/lib/TargetParser/RISCVISAInfo.cpp index 17c9833..d6afb8a 100644 --- a/llvm/lib/TargetParser/RISCVISAInfo.cpp +++ b/llvm/lib/TargetParser/RISCVISAInfo.cpp @@ -858,16 +858,15 @@ void RISCVISAInfo::updateImplication() { StringRef ExtName = WorkList.pop_back_val(); auto Range = std::equal_range(std::begin(ImpliedExts), std::end(ImpliedExts), ExtName); - std::for_each(Range.first, Range.second, - [&](const ImpliedExtsEntry &Implied) { - const char *ImpliedExt = Implied.ImpliedExt; - auto [It, Inserted] = Exts.try_emplace(ImpliedExt); - if (!Inserted) - return; - auto Version = findDefaultVersion(ImpliedExt); - It->second = *Version; - WorkList.push_back(ImpliedExt); - }); + for (const ImpliedExtsEntry &Implied : llvm::make_range(Range)) { + const char *ImpliedExt = Implied.ImpliedExt; + auto [It, Inserted] = Exts.try_emplace(ImpliedExt); + if (!Inserted) + continue; + auto Version = findDefaultVersion(ImpliedExt); + It->second = *Version; + WorkList.push_back(ImpliedExt); + } } // Add Zcd if C and D are enabled. diff --git a/llvm/lib/TextAPI/SymbolSet.cpp b/llvm/lib/TextAPI/SymbolSet.cpp index 2e0b416..f21a061 100644 --- a/llvm/lib/TextAPI/SymbolSet.cpp +++ b/llvm/lib/TextAPI/SymbolSet.cpp @@ -11,6 +11,11 @@ using namespace llvm; using namespace llvm::MachO; +SymbolSet::~SymbolSet() { + for (auto &[Key, Sym] : Symbols) + Sym->~Symbol(); +} + Symbol *SymbolSet::addGlobalImpl(EncodeKind Kind, StringRef Name, SymbolFlags Flags) { Name = copyString(Name); diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp index 0164fcd..81de0a5 100644 --- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp +++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp @@ -97,6 +97,8 @@ STATISTIC(MissingAllocForContextId, "Number of missing alloc nodes for context ids"); STATISTIC(SkippedCallsCloning, "Number of calls skipped during cloning due to unexpected operand"); +STATISTIC(MismatchedCloneAssignments, + "Number of callsites assigned to call multiple non-matching clones"); static cl::opt<std::string> DotFilePathPrefix( "memprof-dot-file-path-prefix", cl::init(""), cl::Hidden, @@ -2060,6 +2062,20 @@ static bool isMemProfClone(const Function &F) { return F.getName().contains(MemProfCloneSuffix); } +// Return the clone number of the given function by extracting it from the +// memprof suffix. Assumes the caller has already confirmed it is a memprof +// clone. +static unsigned getMemProfCloneNum(const Function &F) { + assert(isMemProfClone(F)); + auto Pos = F.getName().find_last_of('.'); + assert(Pos > 0); + unsigned CloneNo; + bool Err = F.getName().drop_front(Pos + 1).getAsInteger(10, CloneNo); + assert(!Err); + (void)Err; + return CloneNo; +} + std::string ModuleCallsiteContextGraph::getLabel(const Function *Func, const Instruction *Call, unsigned CloneNo) const { @@ -3979,7 +3995,22 @@ IndexCallsiteContextGraph::getAllocationCallType(const CallInfo &Call) const { void ModuleCallsiteContextGraph::updateCall(CallInfo &CallerCall, FuncInfo CalleeFunc) { - if (CalleeFunc.cloneNo() > 0) + auto *CurF = cast<CallBase>(CallerCall.call())->getCalledFunction(); + auto NewCalleeCloneNo = CalleeFunc.cloneNo(); + if (isMemProfClone(*CurF)) { + // If we already assigned this callsite to call a specific non-default + // clone (i.e. not the original function which is clone 0), ensure that we + // aren't trying to now update it to call a different clone, which is + // indicative of a bug in the graph or function assignment. + auto CurCalleeCloneNo = getMemProfCloneNum(*CurF); + if (CurCalleeCloneNo != NewCalleeCloneNo) { + LLVM_DEBUG(dbgs() << "Mismatch in call clone assignment: was " + << CurCalleeCloneNo << " now " << NewCalleeCloneNo + << "\n"); + MismatchedCloneAssignments++; + } + } + if (NewCalleeCloneNo > 0) cast<CallBase>(CallerCall.call())->setCalledFunction(CalleeFunc.func()); OREGetter(CallerCall.call()->getFunction()) .emit(OptimizationRemark(DEBUG_TYPE, "MemprofCall", CallerCall.call()) @@ -3995,7 +4026,19 @@ void IndexCallsiteContextGraph::updateCall(CallInfo &CallerCall, assert(CI && "Caller cannot be an allocation which should not have profiled calls"); assert(CI->Clones.size() > CallerCall.cloneNo()); - CI->Clones[CallerCall.cloneNo()] = CalleeFunc.cloneNo(); + auto NewCalleeCloneNo = CalleeFunc.cloneNo(); + auto &CurCalleeCloneNo = CI->Clones[CallerCall.cloneNo()]; + // If we already assigned this callsite to call a specific non-default + // clone (i.e. not the original function which is clone 0), ensure that we + // aren't trying to now update it to call a different clone, which is + // indicative of a bug in the graph or function assignment. + if (CurCalleeCloneNo != 0 && CurCalleeCloneNo != NewCalleeCloneNo) { + LLVM_DEBUG(dbgs() << "Mismatch in call clone assignment: was " + << CurCalleeCloneNo << " now " << NewCalleeCloneNo + << "\n"); + MismatchedCloneAssignments++; + } + CurCalleeCloneNo = NewCalleeCloneNo; } // Update the debug information attached to NewFunc to use the clone Name. Note @@ -4703,6 +4746,19 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() { // where the callers were assigned to different clones of a function. } + auto FindFirstAvailFuncClone = [&]() { + // Find first function in FuncClonesToCallMap without an assigned + // clone of this callsite Node. We should always have one + // available at this point due to the earlier cloning when the + // FuncClonesToCallMap size was smaller than the clone number. + for (auto &CF : FuncClonesToCallMap) { + if (!FuncCloneToCurNodeCloneMap.count(CF.first)) + return CF.first; + } + assert(false && + "Expected an available func clone for this callsite clone"); + }; + // See if we can use existing function clone. Walk through // all caller edges to see if any have already been assigned to // a clone of this callsite's function. If we can use it, do so. If not, @@ -4819,16 +4875,7 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() { // clone of OrigFunc for another caller during this iteration over // its caller edges. if (!FuncCloneAssignedToCurCallsiteClone) { - // Find first function in FuncClonesToCallMap without an assigned - // clone of this callsite Node. We should always have one - // available at this point due to the earlier cloning when the - // FuncClonesToCallMap size was smaller than the clone number. - for (auto &CF : FuncClonesToCallMap) { - if (!FuncCloneToCurNodeCloneMap.count(CF.first)) { - FuncCloneAssignedToCurCallsiteClone = CF.first; - break; - } - } + FuncCloneAssignedToCurCallsiteClone = FindFirstAvailFuncClone(); assert(FuncCloneAssignedToCurCallsiteClone); // Assign Clone to FuncCloneAssignedToCurCallsiteClone AssignCallsiteCloneToFuncClone( @@ -4842,6 +4889,31 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() { FuncCloneAssignedToCurCallsiteClone); } } + // If we didn't assign a function clone to this callsite clone yet, e.g. + // none of its callers has a non-null call, do the assignment here. + // We want to ensure that every callsite clone is assigned to some + // function clone, so that the call updates below work as expected. + // In particular if this is the original callsite, we want to ensure it + // is assigned to the original function, otherwise the original function + // will appear available for assignment to other callsite clones, + // leading to unintended effects. For one, the unknown and not updated + // callers will call into cloned paths leading to the wrong hints, + // because they still call the original function (clone 0). Also, + // because all callsites start out as being clone 0 by default, we can't + // easily distinguish between callsites explicitly assigned to clone 0 + // vs those never assigned, which can lead to multiple updates of the + // calls when invoking updateCall below, with mismatched clone values. + // TODO: Add a flag to the callsite nodes or some other mechanism to + // better distinguish and identify callsite clones that are not getting + // assigned to function clones as expected. + if (!FuncCloneAssignedToCurCallsiteClone) { + FuncCloneAssignedToCurCallsiteClone = FindFirstAvailFuncClone(); + assert(FuncCloneAssignedToCurCallsiteClone && + "No available func clone for this callsite clone"); + AssignCallsiteCloneToFuncClone( + FuncCloneAssignedToCurCallsiteClone, Call, Clone, + /*IsAlloc=*/AllocationCallToContextNodeMap.contains(Call)); + } } if (VerifyCCG) { checkNode<DerivedCCG, FuncTy, CallTy>(Node); diff --git a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp index fe3315e..4e5a8d1 100644 --- a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp @@ -1459,8 +1459,6 @@ void HWAddressSanitizer::instrumentStack(memtag::StackInfo &SInfo, size_t Size = memtag::getAllocaSizeInBytes(*AI); size_t AlignedSize = alignTo(Size, Mapping.getObjectAlignment()); - Value *AICast = IRB.CreatePointerCast(AI, PtrTy); - auto HandleLifetime = [&](IntrinsicInst *II) { // Set the lifetime intrinsic to cover the whole alloca. This reduces the // set of assumptions we need to make about the lifetime. Without this we @@ -1473,14 +1471,13 @@ void HWAddressSanitizer::instrumentStack(memtag::StackInfo &SInfo, // one set of start / end in any execution (i.e. the ends are not // reachable from each other), so this will not cause any problems. II->setArgOperand(0, ConstantInt::get(Int64Ty, AlignedSize)); - II->setArgOperand(1, AICast); }; llvm::for_each(Info.LifetimeStart, HandleLifetime); llvm::for_each(Info.LifetimeEnd, HandleLifetime); - AI->replaceUsesWithIf(Replacement, [AICast, AILong](const Use &U) { + AI->replaceUsesWithIf(Replacement, [AILong](const Use &U) { auto *User = U.getUser(); - return User != AILong && User != AICast && !isa<LifetimeIntrinsic>(User); + return User != AILong && !isa<LifetimeIntrinsic>(User); }); memtag::annotateDebugRecords(Info, retagMask(N)); diff --git a/llvm/lib/Transforms/ObjCARC/CMakeLists.txt b/llvm/lib/Transforms/ObjCARC/CMakeLists.txt index 80867db..4274667 100644 --- a/llvm/lib/Transforms/ObjCARC/CMakeLists.txt +++ b/llvm/lib/Transforms/ObjCARC/CMakeLists.txt @@ -2,7 +2,6 @@ add_llvm_component_library(LLVMObjCARCOpts ObjCARC.cpp ObjCARCOpts.cpp ObjCARCExpand.cpp - ObjCARCAPElim.cpp ObjCARCContract.cpp DependencyAnalysis.cpp ProvenanceAnalysis.cpp diff --git a/llvm/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp b/llvm/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp deleted file mode 100644 index dceb2eb..0000000 --- a/llvm/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp +++ /dev/null @@ -1,156 +0,0 @@ -//===- ObjCARCAPElim.cpp - ObjC ARC Optimization --------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -/// \file -/// -/// This file defines ObjC ARC optimizations. ARC stands for Automatic -/// Reference Counting and is a system for managing reference counts for objects -/// in Objective C. -/// -/// This specific file implements optimizations which remove extraneous -/// autorelease pools. -/// -/// WARNING: This file knows about certain library functions. It recognizes them -/// by name, and hardwires knowledge of their semantics. -/// -/// WARNING: This file knows about how certain Objective-C library functions are -/// used. Naive LLVM IR transformations which would otherwise be -/// behavior-preserving may break these assumptions. -/// -//===----------------------------------------------------------------------===// - -#include "llvm/ADT/STLExtras.h" -#include "llvm/Analysis/ObjCARCAnalysisUtils.h" -#include "llvm/Analysis/ObjCARCInstKind.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/InstrTypes.h" -#include "llvm/IR/PassManager.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/ObjCARC.h" - -using namespace llvm; -using namespace llvm::objcarc; - -#define DEBUG_TYPE "objc-arc-ap-elim" - -namespace { - -/// Interprocedurally determine if calls made by the given call site can -/// possibly produce autoreleases. -bool MayAutorelease(const CallBase &CB, unsigned Depth = 0) { - if (const Function *Callee = CB.getCalledFunction()) { - if (!Callee->hasExactDefinition()) - return true; - for (const BasicBlock &BB : *Callee) { - for (const Instruction &I : BB) - if (const CallBase *JCB = dyn_cast<CallBase>(&I)) - // This recursion depth limit is arbitrary. It's just great - // enough to cover known interesting testcases. - if (Depth < 3 && !JCB->onlyReadsMemory() && - MayAutorelease(*JCB, Depth + 1)) - return true; - } - return false; - } - - return true; -} - -bool OptimizeBB(BasicBlock *BB) { - bool Changed = false; - - Instruction *Push = nullptr; - for (Instruction &Inst : llvm::make_early_inc_range(*BB)) { - switch (GetBasicARCInstKind(&Inst)) { - case ARCInstKind::AutoreleasepoolPush: - Push = &Inst; - break; - case ARCInstKind::AutoreleasepoolPop: - // If this pop matches a push and nothing in between can autorelease, - // zap the pair. - if (Push && cast<CallInst>(&Inst)->getArgOperand(0) == Push) { - Changed = true; - LLVM_DEBUG(dbgs() << "ObjCARCAPElim::OptimizeBB: Zapping push pop " - "autorelease pair:\n" - " Pop: " - << Inst << "\n" - << " Push: " << *Push - << "\n"); - Inst.eraseFromParent(); - Push->eraseFromParent(); - } - Push = nullptr; - break; - case ARCInstKind::CallOrUser: - if (MayAutorelease(cast<CallBase>(Inst))) - Push = nullptr; - break; - default: - break; - } - } - - return Changed; -} - -bool runImpl(Module &M) { - if (!EnableARCOpts) - return false; - - // If nothing in the Module uses ARC, don't do anything. - if (!ModuleHasARC(M)) - return false; - // Find the llvm.global_ctors variable, as the first step in - // identifying the global constructors. In theory, unnecessary autorelease - // pools could occur anywhere, but in practice it's pretty rare. Global - // ctors are a place where autorelease pools get inserted automatically, - // so it's pretty common for them to be unnecessary, and it's pretty - // profitable to eliminate them. - GlobalVariable *GV = M.getGlobalVariable("llvm.global_ctors"); - if (!GV) - return false; - - assert(GV->hasDefinitiveInitializer() && - "llvm.global_ctors is uncooperative!"); - - bool Changed = false; - - // Dig the constructor functions out of GV's initializer. - ConstantArray *Init = cast<ConstantArray>(GV->getInitializer()); - for (User::op_iterator OI = Init->op_begin(), OE = Init->op_end(); - OI != OE; ++OI) { - Value *Op = *OI; - // llvm.global_ctors is an array of three-field structs where the second - // members are constructor functions. - Function *F = dyn_cast<Function>(cast<ConstantStruct>(Op)->getOperand(1)); - // If the user used a constructor function with the wrong signature and - // it got bitcasted or whatever, look the other way. - if (!F) - continue; - // Only look at function definitions. - if (F->isDeclaration()) - continue; - // Only look at functions with one basic block. - if (std::next(F->begin()) != F->end()) - continue; - // Ok, a single-block constructor function definition. Try to optimize it. - Changed |= OptimizeBB(&F->front()); - } - - return Changed; -} - -} // namespace - -PreservedAnalyses ObjCARCAPElimPass::run(Module &M, ModuleAnalysisManager &AM) { - if (!runImpl(M)) - return PreservedAnalyses::all(); - PreservedAnalyses PA; - PA.preserveSet<CFGAnalyses>(); - return PA; -} diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 84d1c0b..9220abb 100644 --- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -1593,11 +1593,8 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store, // since both llvm.lifetime.start and llvm.lifetime.end intrinsics // practically fill all the bytes of the alloca with an undefined // value, although conceptually marked as alive/dead. - int64_t Size = cast<ConstantInt>(UI->getOperand(0))->getSExtValue(); - if (Size < 0 || Size == DestSize) { - LifetimeMarkers.push_back(UI); - continue; - } + LifetimeMarkers.push_back(UI); + continue; } AAMetadataInstrs.insert(UI); @@ -1614,9 +1611,8 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store, return true; }; - // Check that dest has no Mod/Ref, from the alloca to the Store, except full - // size lifetime intrinsics. And collect modref inst for the reachability - // check. + // Check that dest has no Mod/Ref, from the alloca to the Store. And collect + // modref inst for the reachability check. ModRefInfo DestModRef = ModRefInfo::NoModRef; MemoryLocation DestLoc(DestAlloca, LocationSize::precise(Size)); SmallVector<BasicBlock *, 8> ReachabilityWorklist; diff --git a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp index a69d649..44e63a0 100644 --- a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp +++ b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp @@ -19,6 +19,7 @@ #include "llvm/Analysis/RegionInfo.h" #include "llvm/Analysis/RegionIterator.h" #include "llvm/Analysis/RegionPass.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/UniformityAnalysis.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" @@ -128,6 +129,7 @@ struct PredInfo { using BBPredicates = DenseMap<BasicBlock *, PredInfo>; using PredMap = DenseMap<BasicBlock *, BBPredicates>; using BB2BBMap = DenseMap<BasicBlock *, BasicBlock *>; +using Val2BBMap = DenseMap<Value *, BasicBlock *>; // A traits type that is intended to be used in graph algorithms. The graph // traits starts at an entry node, and traverses the RegionNodes that are in @@ -279,7 +281,7 @@ class StructurizeCFG { ConstantInt *BoolTrue; ConstantInt *BoolFalse; Value *BoolPoison; - + const TargetTransformInfo *TTI; Function *Func; Region *ParentRegion; @@ -301,8 +303,12 @@ class StructurizeCFG { PredMap LoopPreds; BranchVector LoopConds; + Val2BBMap HoistedValues; + RegionNode *PrevNode; + void hoistZeroCostElseBlockPhiValues(BasicBlock *ElseBB, BasicBlock *ThenBB); + void orderNodes(); void analyzeLoops(RegionNode *N); @@ -332,6 +338,8 @@ class StructurizeCFG { void simplifyAffectedPhis(); + void simplifyHoistedPhis(); + DebugLoc killTerminator(BasicBlock *BB); void changeExit(RegionNode *Node, BasicBlock *NewExit, @@ -359,7 +367,7 @@ class StructurizeCFG { public: void init(Region *R); - bool run(Region *R, DominatorTree *DT); + bool run(Region *R, DominatorTree *DT, const TargetTransformInfo *TTI); bool makeUniformRegion(Region *R, UniformityInfo &UA); }; @@ -385,8 +393,11 @@ public: if (SCFG.makeUniformRegion(R, UA)) return false; } + Function *F = R->getEntry()->getParent(); + const TargetTransformInfo *TTI = + &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(*F); DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); - return SCFG.run(R, DT); + return SCFG.run(R, DT, TTI); } StringRef getPassName() const override { return "Structurize control flow"; } @@ -394,7 +405,9 @@ public: void getAnalysisUsage(AnalysisUsage &AU) const override { if (SkipUniformRegions) AU.addRequired<UniformityInfoWrapperPass>(); + AU.addRequired<TargetTransformInfoWrapperPass>(); AU.addRequired<DominatorTreeWrapperPass>(); + AU.addRequired<TargetTransformInfoWrapperPass>(); AU.addPreserved<DominatorTreeWrapperPass>(); RegionPass::getAnalysisUsage(AU); @@ -403,6 +416,34 @@ public: } // end anonymous namespace +/// Checks whether an instruction is zero cost instruction and checks if the +/// operands are from different BB. If so, this instruction can be coalesced +/// if its hoisted to predecessor block. So, this returns true. +static bool isHoistableInstruction(Instruction *I, BasicBlock *BB, + const TargetTransformInfo *TTI) { + if (I->getParent() != BB || isa<PHINode>(I)) + return false; + + // If the instruction is not a zero cost instruction, return false. + auto Cost = TTI->getInstructionCost(I, TargetTransformInfo::TCK_Latency); + InstructionCost::CostType CostVal = + Cost.isValid() + ? Cost.getValue() + : (InstructionCost::CostType)TargetTransformInfo::TCC_Expensive; + if (CostVal != 0) + return false; + + // Check if any operands are instructions defined in the same block. + for (auto &Op : I->operands()) { + if (auto *OpI = dyn_cast<Instruction>(Op)) { + if (OpI->getParent() == BB) + return false; + } + } + + return true; +} + char StructurizeCFGLegacyPass::ID = 0; INITIALIZE_PASS_BEGIN(StructurizeCFGLegacyPass, "structurizecfg", @@ -413,6 +454,39 @@ INITIALIZE_PASS_DEPENDENCY(RegionInfoPass) INITIALIZE_PASS_END(StructurizeCFGLegacyPass, "structurizecfg", "Structurize the CFG", false, false) +/// Structurization can introduce unnecessary VGPR copies due to register +/// coalescing interference. For example, if the Else block has a zero-cost +/// instruction and the Then block modifies the VGPR value, only one value is +/// live at a time in merge block before structurization. After structurization, +/// the coalescer may incorrectly treat the Then value as live in the Else block +/// (via the path Then → Flow → Else), leading to unnecessary VGPR copies. +/// +/// This function examines phi nodes whose incoming values are zero-cost +/// instructions in the Else block. It identifies such values that can be safely +/// hoisted and moves them to the nearest common dominator of Then and Else +/// blocks. A follow-up function after setting PhiNodes assigns the hoisted +/// value to poison phi nodes along the if→flow edge, aiding register coalescing +/// and minimizing unnecessary live ranges. +void StructurizeCFG::hoistZeroCostElseBlockPhiValues(BasicBlock *ElseBB, + BasicBlock *ThenBB) { + + BasicBlock *ElseSucc = ElseBB->getSingleSuccessor(); + BasicBlock *CommonDominator = DT->findNearestCommonDominator(ElseBB, ThenBB); + + if (!ElseSucc || !CommonDominator) + return; + Instruction *Term = CommonDominator->getTerminator(); + for (PHINode &Phi : ElseSucc->phis()) { + Value *ElseVal = Phi.getIncomingValueForBlock(ElseBB); + auto *Inst = dyn_cast<Instruction>(ElseVal); + if (!Inst || !isHoistableInstruction(Inst, ElseBB, TTI)) + continue; + Inst->removeFromParent(); + Inst->insertInto(CommonDominator, Term->getIterator()); + HoistedValues[Inst] = CommonDominator; + } +} + /// Build up the general order of nodes, by performing a topological sort of the /// parent region's nodes, while ensuring that there is no outer cycle node /// between any two inner cycle nodes. @@ -535,7 +609,7 @@ void StructurizeCFG::gatherPredicates(RegionNode *N) { BasicBlock *Other = Term->getSuccessor(!i); if (Visited.count(Other) && !Loops.count(Other) && !Pred.count(Other) && !Pred.count(P)) { - + hoistZeroCostElseBlockPhiValues(Succ, Other); Pred[Other] = {BoolFalse, std::nullopt}; Pred[P] = {BoolTrue, std::nullopt}; continue; @@ -891,6 +965,44 @@ void StructurizeCFG::setPhiValues() { AffectedPhis.append(InsertedPhis.begin(), InsertedPhis.end()); } +/// Updates PHI nodes after hoisted zero cost instructions by replacing poison +/// entries on Flow nodes with the appropriate hoisted values +void StructurizeCFG::simplifyHoistedPhis() { + for (WeakVH VH : AffectedPhis) { + PHINode *Phi = dyn_cast_or_null<PHINode>(VH); + if (!Phi || Phi->getNumIncomingValues() != 2) + continue; + + for (int i = 0; i < 2; i++) { + Value *V = Phi->getIncomingValue(i); + auto BBIt = HoistedValues.find(V); + + if (BBIt == HoistedValues.end()) + continue; + + Value *OtherV = Phi->getIncomingValue(!i); + PHINode *OtherPhi = dyn_cast<PHINode>(OtherV); + if (!OtherPhi) + continue; + + int PoisonValBBIdx = -1; + for (size_t i = 0; i < OtherPhi->getNumIncomingValues(); i++) { + if (!isa<PoisonValue>(OtherPhi->getIncomingValue(i))) + continue; + PoisonValBBIdx = i; + break; + } + if (PoisonValBBIdx == -1 || + !DT->dominates(BBIt->second, + OtherPhi->getIncomingBlock(PoisonValBBIdx))) + continue; + + OtherPhi->setIncomingValue(PoisonValBBIdx, V); + Phi->setIncomingValue(i, OtherV); + } + } +} + void StructurizeCFG::simplifyAffectedPhis() { bool Changed; do { @@ -1283,12 +1395,13 @@ bool StructurizeCFG::makeUniformRegion(Region *R, UniformityInfo &UA) { } /// Run the transformation for each region found -bool StructurizeCFG::run(Region *R, DominatorTree *DT) { +bool StructurizeCFG::run(Region *R, DominatorTree *DT, + const TargetTransformInfo *TTI) { if (R->isTopLevelRegion()) return false; this->DT = DT; - + this->TTI = TTI; Func = R->getEntry()->getParent(); assert(hasOnlySimpleTerminator(*Func) && "Unsupported block terminator."); @@ -1300,6 +1413,7 @@ bool StructurizeCFG::run(Region *R, DominatorTree *DT) { insertConditions(false); insertConditions(true); setPhiValues(); + simplifyHoistedPhis(); simplifyConditions(); simplifyAffectedPhis(); rebuildSSA(); @@ -1349,7 +1463,7 @@ PreservedAnalyses StructurizeCFGPass::run(Function &F, bool Changed = false; DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(F); auto &RI = AM.getResult<RegionInfoAnalysis>(F); - + TargetTransformInfo *TTI = &AM.getResult<TargetIRAnalysis>(F); UniformityInfo *UI = nullptr; if (SkipUniformRegions) UI = &AM.getResult<UniformityInfoAnalysis>(F); @@ -1368,7 +1482,7 @@ PreservedAnalyses StructurizeCFGPass::run(Function &F, continue; } - Changed |= SCFG.run(R, DT); + Changed |= SCFG.run(R, DT, TTI); } if (!Changed) return PreservedAnalyses::all(); diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 9c9201c..6616e61f 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1359,9 +1359,7 @@ public: return; // Override EVL styles if needed. // FIXME: Investigate opportunity for fixed vector factor. - // FIXME: Support interleave accesses. bool EVLIsLegal = UserIC <= 1 && IsScalableVF && - !InterleaveInfo.hasGroups() && TTI.hasActiveVectorLength() && !EnableVPlanNativePath; if (EVLIsLegal) return; @@ -2023,6 +2021,9 @@ public: /// Retrieves the MemCheckCond and MemCheckBlock that were generated as IR /// outside VPlan. std::pair<Value *, BasicBlock *> getMemRuntimeChecks() { + using namespace llvm::PatternMatch; + if (MemRuntimeCheckCond && match(MemRuntimeCheckCond, m_ZeroInt())) + return {nullptr, nullptr}; return {MemRuntimeCheckCond, MemCheckBlock}; } @@ -7278,6 +7279,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan( VPBasicBlock *VectorPH = cast<VPBasicBlock>(BestVPlan.getVectorPreheader()); VPlanTransforms::optimizeForVFAndUF(BestVPlan, BestVF, BestUF, PSE); VPlanTransforms::simplifyRecipes(BestVPlan, *Legal->getWidestInductionType()); + VPlanTransforms::removeBranchOnConst(BestVPlan); VPlanTransforms::narrowInterleaveGroups( BestVPlan, BestVF, TTI.getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector)); @@ -10074,12 +10076,6 @@ bool LoopVectorizePass::processLoop(Loop *L) { // Get user vectorization factor and interleave count. ElementCount UserVF = Hints.getWidth(); unsigned UserIC = Hints.getInterleave(); - if (LVL.hasUncountableEarlyExit() && UserIC != 1) { - UserIC = 1; - reportVectorizationInfo("Interleaving not supported for loops " - "with uncountable early exits", - "InterleaveEarlyExitDisabled", ORE, L); - } // Plan how to best vectorize. LVP.plan(UserVF, UserIC); @@ -10097,9 +10093,20 @@ bool LoopVectorizePass::processLoop(Loop *L) { unsigned SelectedIC = std::max(IC, UserIC); // Optimistically generate runtime checks if they are needed. Drop them if // they turn out to not be profitable. - if (VF.Width.isVector() || SelectedIC > 1) + if (VF.Width.isVector() || SelectedIC > 1) { Checks.create(L, *LVL.getLAI(), PSE.getPredicate(), VF.Width, SelectedIC); + // Bail out early if either the SCEV or memory runtime checks are known to + // fail. In that case, the vector loop would never execute. + using namespace llvm::PatternMatch; + if (Checks.getSCEVChecks().first && + match(Checks.getSCEVChecks().first, m_One())) + return false; + if (Checks.getMemRuntimeChecks().first && + match(Checks.getMemRuntimeChecks().first, m_One())) + return false; + } + // Check if it is profitable to vectorize with runtime checks. bool ForceVectorization = Hints.getForce() == LoopVectorizeHints::FK_Enabled; @@ -10230,6 +10237,11 @@ bool LoopVectorizePass::processLoop(Loop *L) { L, PSE, LI, DT, TLI, TTI, AC, ORE, ElementCount::getFixed(1), ElementCount::getFixed(1), IC, &CM, BFI, PSI, Checks, BestPlan); + // TODO: Move to general VPlan pipeline once epilogue loops are also + // supported. + VPlanTransforms::runPass(VPlanTransforms::materializeVectorTripCount, + BestPlan, VF.Width, IC, PSE); + LVP.executePlan(VF.Width, IC, BestPlan, Unroller, DT, false); ORE->emit([&]() { @@ -10297,6 +10309,11 @@ bool LoopVectorizePass::processLoop(Loop *L) { InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, VF.Width, VF.MinProfitableTripCount, IC, &CM, BFI, PSI, Checks, BestPlan); + // TODO: Move to general VPlan pipeline once epilogue loops are also + // supported. + VPlanTransforms::runPass(VPlanTransforms::materializeVectorTripCount, + BestPlan, VF.Width, IC, PSE); + LVP.executePlan(VF.Width, IC, BestPlan, LB, DT, false); ++LoopsVectorized; diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 0d0b342..593868f 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -206,6 +206,12 @@ static cl::opt<bool> VectorizeNonPowerOf2( "slp-vectorize-non-power-of-2", cl::init(false), cl::Hidden, cl::desc("Try to vectorize with non-power-of-2 number of elements.")); +/// Enables vectorization of copyable elements. +static cl::opt<bool> VectorizeCopyableElements( + "slp-copyable-elements", cl::init(true), cl::Hidden, + cl::desc("Try to replace values with the idempotent instructions for " + "better vectorization.")); + // Limit the number of alias checks. The limit is chosen so that // it has no negative effect on the llvm benchmarks. static const unsigned AliasedCheckLimit = 10; @@ -855,6 +861,13 @@ static std::optional<unsigned> getExtractIndex(const Instruction *E) { return *EI->idx_begin(); } +namespace llvm { +/// Checks if the specified value does not require scheduling. It does not +/// require scheduling if all operands and all users do not need to be scheduled +/// in the current basic block. +static bool doesNotNeedToBeScheduled(Value *V); +} // namespace llvm + namespace { /// \returns true if \p Opcode is allowed as part of the main/alternate /// instruction for SLP vectorization. @@ -957,6 +970,33 @@ class BinOpSameOpcodeHelper { return Instruction::Xor; llvm_unreachable("Cannot find interchangeable instruction."); } + + /// Return true if the instruction can be converted to \p Opcode. + bool hasCandidateOpcode(unsigned Opcode) const { + MaskType Candidate = Mask & SeenBefore; + switch (Opcode) { + case Instruction::Shl: + return Candidate & ShlBIT; + case Instruction::AShr: + return Candidate & AShrBIT; + case Instruction::Mul: + return Candidate & MulBIT; + case Instruction::Add: + return Candidate & AddBIT; + case Instruction::Sub: + return Candidate & SubBIT; + case Instruction::And: + return Candidate & AndBIT; + case Instruction::Or: + return Candidate & OrBIT; + case Instruction::Xor: + return Candidate & XorBIT; + default: + break; + } + llvm_unreachable("Cannot find interchangeable instruction."); + } + SmallVector<Value *> getOperand(const Instruction *To) const { unsigned ToOpcode = To->getOpcode(); unsigned FromOpcode = I->getOpcode(); @@ -1117,6 +1157,10 @@ public: AltOp.trySet(OpcodeInMaskForm, InterchangeableMask)); } unsigned getMainOpcode() const { return MainOp.getOpcode(); } + /// Checks if the list of potential opcodes includes \p Opcode. + bool hasCandidateOpcode(unsigned Opcode) const { + return MainOp.hasCandidateOpcode(Opcode); + } bool hasAltOp() const { return AltOp.I; } unsigned getAltOpcode() const { return hasAltOp() ? AltOp.getOpcode() : getMainOpcode(); @@ -1152,6 +1196,8 @@ class InstructionsState { /// GetVectorCost. Instruction *MainOp = nullptr; Instruction *AltOp = nullptr; + /// Wether the instruction state represents copyable instructions. + bool HasCopyables = false; public: Instruction *getMainOp() const { @@ -1190,9 +1236,11 @@ public: if (!I->isBinaryOp()) return nullptr; BinOpSameOpcodeHelper Converter(MainOp); - if (Converter.add(I) && Converter.add(MainOp) && !Converter.hasAltOp()) - return MainOp; - return AltOp; + if (!Converter.add(I) || !Converter.add(MainOp)) + return nullptr; + if (Converter.hasAltOp() && !isAltShuffle()) + return nullptr; + return Converter.hasAltOp() ? AltOp : MainOp; } /// Checks if main/alt instructions are shift operations. @@ -1237,9 +1285,63 @@ public: explicit operator bool() const { return valid(); } InstructionsState() = delete; - InstructionsState(Instruction *MainOp, Instruction *AltOp) - : MainOp(MainOp), AltOp(AltOp) {} + InstructionsState(Instruction *MainOp, Instruction *AltOp, + bool HasCopyables = false) + : MainOp(MainOp), AltOp(AltOp), HasCopyables(HasCopyables) {} static InstructionsState invalid() { return {nullptr, nullptr}; } + + bool isCopyableElement(Value *V) const { + assert(valid() && "InstructionsState is invalid."); + if (!HasCopyables) + return false; + if (isAltShuffle() || getOpcode() == Instruction::GetElementPtr) + return false; + auto *I = dyn_cast<Instruction>(V); + if (!I) + return !isa<PoisonValue>(V); + if (I->getParent() != MainOp->getParent() && + (!isVectorLikeInstWithConstOps(I) || + !isVectorLikeInstWithConstOps(MainOp))) + return true; + if (I->getOpcode() == MainOp->getOpcode()) + return false; + if (!I->isBinaryOp()) + return true; + BinOpSameOpcodeHelper Converter(MainOp); + return !Converter.add(I) || !Converter.add(MainOp) || + Converter.hasAltOp() || !Converter.hasCandidateOpcode(getOpcode()); + } + + /// Checks if the value is non-schedulable. + bool isNonSchedulable(Value *V) const { + assert(valid() && "InstructionsState is invalid."); + auto *I = dyn_cast<Instruction>(V); + if (!HasCopyables) + return !I || isa<PHINode>(I) || isVectorLikeInstWithConstOps(I) || + doesNotNeedToBeScheduled(V); + // MainOp for copyables always schedulable to correctly identify + // non-schedulable copyables. + if (isCopyableElement(V)) { + auto IsNonSchedulableCopyableElement = [this](Value *V) { + auto *I = dyn_cast<Instruction>(V); + return !I || isa<PHINode>(I) || I->getParent() != MainOp->getParent() || + (doesNotNeedToBeScheduled(I) && + // If the copyable instructions comes after MainOp + // (non-schedulable, but used in the block) - cannot vectorize + // it, will possibly generate use before def. + (isVectorLikeInstWithConstOps(I) || !MainOp->comesBefore(I))); + }; + + return IsNonSchedulableCopyableElement(V); + } + return !I || isa<PHINode>(I) || isVectorLikeInstWithConstOps(I) || + doesNotNeedToBeScheduled(V); + } + + bool areInstructionsWithCopyableElements() const { + assert(valid() && "InstructionsState is invalid."); + return HasCopyables; + } }; std::pair<Instruction *, SmallVector<Value *>> @@ -1917,6 +2019,7 @@ public: CompressEntryToData.clear(); ExternalUses.clear(); ExternalUsesAsOriginalScalar.clear(); + ExternalUsesWithNonUsers.clear(); for (auto &Iter : BlocksSchedules) { BlockScheduling *BS = Iter.second.get(); BS->clear(); @@ -2899,9 +3002,6 @@ public: for (OperandDataVec &Ops : OpsVec) Ops.resize(NumLanes); for (unsigned Lane : seq<unsigned>(NumLanes)) { - Value *V = VL[Lane]; - assert((isa<Instruction>(V) || isa<PoisonValue>(V)) && - "Expected instruction or poison value"); // Our tree has just 3 nodes: the root and two operands. // It is therefore trivial to get the APO. We only need to check the // opcode of V and whether the operand at OpIdx is the LHS or RHS @@ -2912,17 +3012,24 @@ public: // Since operand reordering is performed on groups of commutative // operations or alternating sequences (e.g., +, -), we can safely tell // the inverse operations by checking commutativity. - if (isa<PoisonValue>(V)) { + auto *I = dyn_cast<Instruction>(VL[Lane]); + if (!I && isa<PoisonValue>(VL[Lane])) { for (unsigned OpIdx : seq<unsigned>(NumOperands)) OpsVec[OpIdx][Lane] = {Operands[OpIdx][Lane], true, false}; continue; } - auto [SelectedOp, Ops] = convertTo(cast<Instruction>(V), S); - // We cannot check commutativity by the converted instruction - // (SelectedOp) because isCommutative also examines def-use - // relationships. - bool IsInverseOperation = - !isCommutative(SelectedOp, cast<Instruction>(V)); + bool IsInverseOperation = false; + if (S.isCopyableElement(VL[Lane])) { + // The value is a copyable element. + IsInverseOperation = !isCommutative(MainOp); + } else { + assert(I && "Expected instruction"); + auto [SelectedOp, Ops] = convertTo(I, S); + // We cannot check commutativity by the converted instruction + // (SelectedOp) because isCommutative also examines def-use + // relationships. + IsInverseOperation = !isCommutative(SelectedOp, I); + } for (unsigned OpIdx : seq<unsigned>(ArgSize)) { bool APO = (OpIdx == 0) ? false : IsInverseOperation; OpsVec[OpIdx][Lane] = {Operands[OpIdx][Lane], APO, false}; @@ -3792,6 +3899,9 @@ private: /// reordering of operands during buildTreeRec() and vectorizeTree(). SmallVector<ValueList, 2> Operands; + /// Copyable elements of the entry node. + SmallPtrSet<const Value *, 4> CopyableElements; + /// MainOp and AltOp are recorded inside. S should be obtained from /// newTreeEntry. InstructionsState S = InstructionsState::invalid(); @@ -3820,11 +3930,7 @@ private: void setInterleave(unsigned Factor) { InterleaveFactor = Factor; } /// Marks the node as one that does not require scheduling. - void setDoesNotNeedToSchedule() { - assert(::doesNotNeedToSchedule(Scalars) && - "Expected to not need scheduling"); - DoesNotNeedToSchedule = true; - } + void setDoesNotNeedToSchedule() { DoesNotNeedToSchedule = true; } /// Returns true if the node is marked as one that does not require /// scheduling. bool doesNotNeedToSchedule() const { return DoesNotNeedToSchedule; } @@ -3896,6 +4002,20 @@ private: bool hasState() const { return S.valid(); } + /// Add \p V to the list of copyable elements. + void addCopyableElement(Value *V) { + assert(S.isCopyableElement(V) && "Not a copyable element."); + CopyableElements.insert(V); + } + + /// Returns true if \p V is a copyable element. + bool isCopyableElement(Value *V) const { + return CopyableElements.contains(V); + } + + /// Returns true if any scalar in the list is a copyable element. + bool hasCopyableElements() const { return !CopyableElements.empty(); } + /// When ReuseReorderShuffleIndices is empty it just returns position of \p /// V within vector of Scalars. Otherwise, try to remap on its reuse index. unsigned findLaneForValue(Value *V) const { @@ -3968,6 +4088,8 @@ private: for (Value *V : Scalars) dbgs().indent(2) << *V << "\n"; dbgs() << "State: "; + if (S && hasCopyableElements()) + dbgs() << "[[Copyable]] "; switch (State) { case Vectorize: if (InterleaveFactor > 0) { @@ -4145,12 +4267,20 @@ private: } } } else if (!Last->isGather()) { - if (doesNotNeedToSchedule(VL)) + if (isa<PHINode>(S.getMainOp()) || + isVectorLikeInstWithConstOps(S.getMainOp()) || + (!S.areInstructionsWithCopyableElements() && + doesNotNeedToSchedule(VL)) || + all_of(VL, [&](Value *V) { return S.isNonSchedulable(V); })) Last->setDoesNotNeedToSchedule(); SmallPtrSet<Value *, 4> Processed; for (Value *V : VL) { if (isa<PoisonValue>(V)) continue; + if (S.isCopyableElement(V)) { + Last->addCopyableElement(V); + continue; + } auto It = ScalarToTreeEntries.find(V); if (It == ScalarToTreeEntries.end()) { ScalarToTreeEntries.try_emplace(V).first->getSecond().push_back(Last); @@ -4162,16 +4292,14 @@ private: } } // Update the scheduler bundle to point to this TreeEntry. - assert((!Bundle.getBundle().empty() || isa<PHINode>(S.getMainOp()) || - isVectorLikeInstWithConstOps(S.getMainOp()) || - Last->doesNotNeedToSchedule()) && + assert((!Bundle.getBundle().empty() || Last->doesNotNeedToSchedule()) && "Bundle and VL out of sync"); if (!Bundle.getBundle().empty()) { #if !defined(NDEBUG) || defined(EXPENSIVE_CHECKS) auto *BundleMember = Bundle.getBundle().begin(); SmallPtrSet<Value *, 4> Processed; for (Value *V : VL) { - if (doesNotNeedToBeScheduled(V) || !Processed.insert(V).second) + if (S.isNonSchedulable(V) || !Processed.insert(V).second) continue; ++BundleMember; } @@ -4280,7 +4408,8 @@ private: /// in general. ScalarsVectorizationLegality getScalarsVectorizationLegality(ArrayRef<Value *> VL, unsigned Depth, - const EdgeInfo &UserTreeIdx) const; + const EdgeInfo &UserTreeIdx, + bool TryCopyableElementsVectorization) const; /// Checks if the specified list of the instructions/values can be vectorized /// and fills required data before actual scheduling of the instructions. @@ -4420,6 +4549,10 @@ private: /// extractelement instructions. SmallPtrSet<Value *, 4> ExternalUsesAsOriginalScalar; + /// A list of scalar to be extracted without specific user necause of too many + /// uses. + SmallPtrSet<Value *, 4> ExternalUsesWithNonUsers; + /// Values used only by @llvm.assume calls. SmallPtrSet<const Value *, 32> EphValues; @@ -4996,7 +5129,8 @@ private: /// Build a bundle from the ScheduleData nodes corresponding to the /// scalar instruction for each lane. - ScheduleBundle &buildBundle(ArrayRef<Value *> VL); + ScheduleBundle &buildBundle(ArrayRef<Value *> VL, + const InstructionsState &S); /// Checks if a bundle of instructions can be scheduled, i.e. has no /// cyclic dependencies. This is only a dry-run, no instructions are @@ -6727,7 +6861,7 @@ BoUpSLP::getReorderingData(const TreeEntry &TE, bool TopToBottom, return std::move(ResOrder); } if (TE.State == TreeEntry::StridedVectorize && !TopToBottom && - (!TE.UserTreeIndex || + (!TE.UserTreeIndex || !TE.UserTreeIndex.UserTE->hasState() || !Instruction::isBinaryOp(TE.UserTreeIndex.UserTE->getOpcode())) && (TE.ReorderIndices.empty() || isReverseOrder(TE.ReorderIndices))) return std::nullopt; @@ -7038,10 +7172,11 @@ bool BoUpSLP::isProfitableToReorder() const { VectorizableTree.front()->getOpcode() == Instruction::ICmp))) && VectorizableTree.front()->ReorderIndices.empty()) { // Check if the tree has only single store and single (unordered) load node, - // other nodes are phis or geps/binops, combined with phis, and/orsingle + // other nodes are phis or geps/binops, combined with phis, and/or single // gather load node bool HasPhis = false; - if (VectorizableTree.front()->getOpcode() == Instruction::PHI && + if (VectorizableTree.front()->hasState() && + VectorizableTree.front()->getOpcode() == Instruction::PHI && VectorizableTree.front()->Scalars.size() == TinyVF && VectorizableTree.front()->getNumOperands() > PhiOpsLimit) return false; @@ -7049,6 +7184,8 @@ bool BoUpSLP::isProfitableToReorder() const { unsigned GatherLoads = 0; for (const std::unique_ptr<TreeEntry> &TE : ArrayRef(VectorizableTree).drop_front()) { + if (TE->State == TreeEntry::SplitVectorize) + continue; if (!TE->hasState()) { if (all_of(TE->Scalars, IsaPred<Constant, PHINode>) || all_of(TE->Scalars, IsaPred<BinaryOperator, PHINode>)) @@ -7072,7 +7209,10 @@ bool BoUpSLP::isProfitableToReorder() const { if (TE->getOpcode() == Instruction::GetElementPtr || Instruction::isBinaryOp(TE->getOpcode())) continue; - if (TE->getOpcode() != Instruction::PHI) + if (TE->getOpcode() != Instruction::PHI && + (!TE->hasCopyableElements() || + static_cast<unsigned>(count_if(TE->Scalars, IsaPred<PHINode>)) < + TE->Scalars.size() / 2)) return true; if (VectorizableTree.front()->Scalars.size() == TinyVF && TE->getNumOperands() > PhiOpsLimit) @@ -7860,7 +8000,8 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) { } Instruction *BoUpSLP::getRootEntryInstruction(const TreeEntry &Entry) const { - if ((Entry.getOpcode() == Instruction::Store || + if (Entry.hasState() && + (Entry.getOpcode() == Instruction::Store || Entry.getOpcode() == Instruction::Load) && Entry.State == TreeEntry::StridedVectorize && !Entry.ReorderIndices.empty() && isReverseOrder(Entry.ReorderIndices)) @@ -7870,7 +8011,9 @@ Instruction *BoUpSLP::getRootEntryInstruction(const TreeEntry &Entry) const { void BoUpSLP::buildExternalUses( const ExtraValueToDebugLocsMap &ExternallyUsedValues) { + const size_t NumVectScalars = ScalarToTreeEntries.size() + 1; DenseMap<Value *, unsigned> ScalarToExtUses; + SmallPtrSet<Value *, 4> ExternalUsers; // Collect the values that we need to extract from the tree. for (auto &TEPtr : VectorizableTree) { TreeEntry *Entry = TEPtr.get(); @@ -7882,13 +8025,24 @@ void BoUpSLP::buildExternalUses( // For each lane: for (int Lane = 0, LE = Entry->Scalars.size(); Lane != LE; ++Lane) { Value *Scalar = Entry->Scalars[Lane]; - if (!isa<Instruction>(Scalar)) + if (!isa<Instruction>(Scalar) || Entry->isCopyableElement(Scalar)) continue; + // All uses must be replaced already? No need to do it again. auto It = ScalarToExtUses.find(Scalar); if (It != ScalarToExtUses.end() && !ExternalUses[It->second].User) continue; + if (Scalar->hasNUsesOrMore(NumVectScalars)) { + unsigned FoundLane = Entry->findLaneForValue(Scalar); + LLVM_DEBUG(dbgs() << "SLP: Need to extract from lane " << FoundLane + << " from " << *Scalar << "for many users.\n"); + It = ScalarToExtUses.try_emplace(Scalar, ExternalUses.size()).first; + ExternalUses.emplace_back(Scalar, nullptr, *Entry, FoundLane); + ExternalUsesWithNonUsers.insert(Scalar); + continue; + } + // Check if the scalar is externally used as an extra arg. const auto ExtI = ExternallyUsedValues.find(Scalar); if (ExtI != ExternallyUsedValues.end()) { @@ -7916,7 +8070,10 @@ void BoUpSLP::buildExternalUses( // Some in-tree scalars will remain as scalar in vectorized // instructions. If that is the case, the one in FoundLane will // be used. - if (all_of(UseEntries, [&](TreeEntry *UseEntry) { + if (!((Scalar->getType()->getScalarType()->isPointerTy() && + isa<LoadInst, StoreInst>(UserInst)) || + isa<CallInst>(UserInst)) || + all_of(UseEntries, [&](TreeEntry *UseEntry) { return UseEntry->State == TreeEntry::ScatterVectorize || !doesInTreeUserNeedToExtract( Scalar, getRootEntryInstruction(*UseEntry), TLI, @@ -7946,6 +8103,7 @@ void BoUpSLP::buildExternalUses( << ".\n"); It = ScalarToExtUses.try_emplace(Scalar, ExternalUses.size()).first; ExternalUses.emplace_back(Scalar, U, *Entry, FoundLane); + ExternalUsesWithNonUsers.insert(Scalar); if (!U) break; } @@ -9612,7 +9770,8 @@ static bool tryToFindDuplicates(SmallVectorImpl<Value *> &VL, PoisonValue::get(UniqueValues.front()->getType())); // Check that extended with poisons operations are still valid for // vectorization (div/rem are not allowed). - if (!getSameOpcode(PaddedUniqueValues, TLI).valid()) { + if (!S.areInstructionsWithCopyableElements() && + !getSameOpcode(PaddedUniqueValues, TLI).valid()) { LLVM_DEBUG(dbgs() << "SLP: Scalar used twice in bundle.\n"); ReuseShuffleIndices.clear(); return false; @@ -9761,13 +9920,95 @@ bool BoUpSLP::canBuildSplitNode(ArrayRef<Value *> VL, } namespace { -/// Class accepts incoming list of values and generates the list of values -/// for scheduling and list of operands for the new nodes. +/// Class accepts incoming list of values, checks if it is able to model +/// "copyable" values as compatible operations, and generates the list of values +/// for scheduling and list of operands doe the new nodes. class InstructionsCompatibilityAnalysis { DominatorTree &DT; const DataLayout &DL; const TargetTransformInfo &TTI; const TargetLibraryInfo &TLI; + unsigned MainOpcode = 0; + Instruction *MainOp = nullptr; + + /// Identifies the best candidate value, which represents main opcode + /// operation. + /// Currently the best candidate is the Add instruction with the parent + /// block with the highest DFS incoming number (block, that dominates other). + void findAndSetMainInstruction(ArrayRef<Value *> VL) { + BasicBlock *Parent = nullptr; + // Checks if the instruction has supported opcode. + auto IsSupportedOpcode = [](Instruction *I) { + return I && I->getOpcode() == Instruction::Add; + }; + SmallDenseSet<Value *, 8> Operands; + for (Value *V : VL) { + auto *I = dyn_cast<Instruction>(V); + if (!I) + continue; + if (!DT.isReachableFromEntry(I->getParent())) + continue; + if (!MainOp) { + MainOp = I; + Parent = I->getParent(); + Operands.insert(I->op_begin(), I->op_end()); + continue; + } + if (Parent == I->getParent()) { + if (!IsSupportedOpcode(MainOp)) + MainOp = I; + if (MainOp->getOpcode() == I->getOpcode() && + doesNotNeedToBeScheduled(MainOp) && !doesNotNeedToBeScheduled(I)) + MainOp = I; + Operands.insert(I->op_begin(), I->op_end()); + continue; + } + auto *NodeA = DT.getNode(Parent); + auto *NodeB = DT.getNode(I->getParent()); + assert(NodeA && "Should only process reachable instructions"); + assert(NodeB && "Should only process reachable instructions"); + assert((NodeA == NodeB) == + (NodeA->getDFSNumIn() == NodeB->getDFSNumIn()) && + "Different nodes should have different DFS numbers"); + if (NodeA->getDFSNumIn() < NodeB->getDFSNumIn()) { + MainOp = I; + Parent = I->getParent(); + Operands.clear(); + Operands.insert(I->op_begin(), I->op_end()); + } + } + if (!IsSupportedOpcode(MainOp) || Operands.contains(MainOp)) { + MainOp = nullptr; + return; + } + MainOpcode = MainOp->getOpcode(); + } + + /// Returns the idempotent value for the \p MainOp with the detected \p + /// MainOpcode. For Add, returns 0. For Or, it should choose between false and + /// the operand itself, since V or V == V. + Value *selectBestIdempotentValue() const { + assert(MainOpcode == Instruction::Add && "Unsupported opcode"); + return ConstantExpr::getBinOpIdentity(MainOpcode, MainOp->getType(), + !MainOp->isCommutative()); + } + + /// Returns the value and operands for the \p V, considering if it is original + /// instruction and its actual operands should be returned, or it is a + /// copyable element and its should be represented as idempotent instruction. + SmallVector<Value *> getOperands(const InstructionsState &S, Value *V) const { + if (isa<PoisonValue>(V)) + return {V, V}; + if (!S.isCopyableElement(V)) + return convertTo(cast<Instruction>(V), S).second; + switch (MainOpcode) { + case Instruction::Add: + return {V, selectBestIdempotentValue()}; + default: + break; + } + llvm_unreachable("Unsupported opcode"); + } /// Builds operands for the original instructions. void @@ -9928,22 +10169,165 @@ public: const TargetLibraryInfo &TLI) : DT(DT), DL(DL), TTI(TTI), TLI(TLI) {} + InstructionsState + buildInstructionsState(ArrayRef<Value *> VL, const BoUpSLP &R, + bool TryCopyableElementsVectorization, + bool WithProfitabilityCheck = false, + bool SkipSameCodeCheck = false) { + InstructionsState S = (SkipSameCodeCheck || !allSameBlock(VL)) + ? InstructionsState::invalid() + : getSameOpcode(VL, TLI); + if (S) + return S; + if (!VectorizeCopyableElements || !TryCopyableElementsVectorization) + return S; + findAndSetMainInstruction(VL); + if (!MainOp) + return InstructionsState::invalid(); + S = InstructionsState(MainOp, MainOp, /*HasCopyables=*/true); + // TODO: Remove this check once support for schulable copyables is landed. + if (any_of(VL, [&](Value *V) { + return S.isCopyableElement(V) && !S.isNonSchedulable(V); + })) + return InstructionsState::invalid(); + + if (!WithProfitabilityCheck) + return S; + // Check if it is profitable to vectorize the instruction. + SmallVector<BoUpSLP::ValueList> Operands = buildOperands(S, VL); + auto BuildCandidates = + [](SmallVectorImpl<std::pair<Value *, Value *>> &Candidates, Value *V1, + Value *V2) { + if (V1 != V2 && isa<PHINode>(V1)) + return; + auto *I1 = dyn_cast<Instruction>(V1); + auto *I2 = dyn_cast<Instruction>(V2); + if (I1 && I2 && I1->getOpcode() == I2->getOpcode() && + I1->getParent() != I2->getParent()) + return; + Candidates.emplace_back(V1, (I1 || I2) ? V2 : V1); + }; + if (VL.size() == 2) { + // Check if the operands allow better vectorization. + SmallVector<std::pair<Value *, Value *>, 4> Candidates1, Candidates2; + BuildCandidates(Candidates1, Operands[0][0], Operands[0][1]); + BuildCandidates(Candidates2, Operands[1][0], Operands[1][1]); + bool Res = !Candidates1.empty() && !Candidates2.empty() && + R.findBestRootPair(Candidates1) && + R.findBestRootPair(Candidates2); + if (!Res && isCommutative(MainOp)) { + Candidates1.clear(); + Candidates2.clear(); + BuildCandidates(Candidates1, Operands[0][0], Operands[1][1]); + BuildCandidates(Candidates2, Operands[1][0], Operands[0][1]); + Res = !Candidates1.empty() && !Candidates2.empty() && + R.findBestRootPair(Candidates1) && + R.findBestRootPair(Candidates2); + } + if (!Res) + return InstructionsState::invalid(); + return S; + } + assert(Operands.size() == 2 && "Unexpected number of operands!"); + unsigned CopyableNum = + count_if(VL, [&](Value *V) { return S.isCopyableElement(V); }); + if (CopyableNum < VL.size() / 2) + return S; + // Too many phi copyables - exit. + const unsigned Limit = VL.size() / 24; + if ((CopyableNum >= VL.size() - Limit || + (CopyableNum >= VL.size() - 1 && VL.size() > 4) || + CopyableNum >= MaxPHINumOperands) && + all_of(VL, [&](Value *V) { + return isa<PHINode>(V) || !S.isCopyableElement(V); + })) + return InstructionsState::invalid(); + // Check profitability if number of copyables > VL.size() / 2. + // 1. Reorder operands for better matching. + if (isCommutative(MainOp)) { + for (auto &Ops : Operands) { + // Make instructions the first operands. + if (!isa<Instruction>(Ops.front()) && isa<Instruction>(Ops.back())) { + std::swap(Ops.front(), Ops.back()); + continue; + } + // Make constants the second operands. + if (isa<Constant>(Ops.front())) { + std::swap(Ops.front(), Ops.back()); + continue; + } + } + } + // 2. Check, if operands can be vectorized. + if (count_if(Operands.back(), IsaPred<Instruction>) > 1) + return InstructionsState::invalid(); + auto CheckOperand = [&](ArrayRef<Value *> Ops) { + if (allConstant(Ops) || isSplat(Ops)) + return true; + // Check if it is "almost" splat, i.e. has >= 4 elements and only single + // one is different. + constexpr unsigned Limit = 4; + if (Operands.front().size() >= Limit) { + SmallDenseMap<const Value *, unsigned> Counters; + for (Value *V : Ops) { + if (isa<UndefValue>(V)) + continue; + ++Counters[V]; + } + if (Counters.size() == 2 && + any_of(Counters, [&](const std::pair<const Value *, unsigned> &C) { + return C.second == 1; + })) + return true; + } + // First operand not a constant or splat? Last attempt - check for + // potential vectorization. + InstructionsCompatibilityAnalysis Analysis(DT, DL, TTI, TLI); + InstructionsState OpS = Analysis.buildInstructionsState( + Ops, R, /*TryCopyableElementsVectorization=*/true); + if (!OpS || (OpS.getOpcode() == Instruction::PHI && !allSameBlock(Ops))) + return false; + unsigned CopyableNum = + count_if(Ops, [&](Value *V) { return OpS.isCopyableElement(V); }); + return CopyableNum <= VL.size() / 2; + }; + if (!CheckOperand(Operands.front())) + return InstructionsState::invalid(); + + return S; + } + SmallVector<BoUpSLP::ValueList> buildOperands(const InstructionsState &S, ArrayRef<Value *> VL) { assert(S && "Invalid state!"); SmallVector<BoUpSLP::ValueList> Operands; - buildOriginalOperands(S, VL, Operands); + if (S.areInstructionsWithCopyableElements()) { + MainOp = S.getMainOp(); + MainOpcode = S.getOpcode(); + Operands.assign(MainOp->getNumOperands(), + BoUpSLP::ValueList(VL.size(), nullptr)); + for (auto [Idx, V] : enumerate(VL)) { + SmallVector<Value *> OperandsForValue = getOperands(S, V); + for (auto [OperandIdx, Operand] : enumerate(OperandsForValue)) + Operands[OperandIdx][Idx] = Operand; + } + } else { + buildOriginalOperands(S, VL, Operands); + } return Operands; } }; } // namespace -BoUpSLP::ScalarsVectorizationLegality -BoUpSLP::getScalarsVectorizationLegality(ArrayRef<Value *> VL, unsigned Depth, - const EdgeInfo &UserTreeIdx) const { +BoUpSLP::ScalarsVectorizationLegality BoUpSLP::getScalarsVectorizationLegality( + ArrayRef<Value *> VL, unsigned Depth, const EdgeInfo &UserTreeIdx, + bool TryCopyableElementsVectorization) const { assert((allConstant(VL) || allSameType(VL)) && "Invalid types!"); - InstructionsState S = getSameOpcode(VL, *TLI); + InstructionsCompatibilityAnalysis Analysis(*DT, *DL, *TTI, *TLI); + InstructionsState S = Analysis.buildInstructionsState( + VL, *this, TryCopyableElementsVectorization, + /*WithProfitabilityCheck=*/true, TryCopyableElementsVectorization); // Don't go into catchswitch blocks, which can happen with PHIs. // Such blocks can only have PHIs and the catchswitch. There is no @@ -10066,7 +10450,7 @@ BoUpSLP::getScalarsVectorizationLegality(ArrayRef<Value *> VL, unsigned Depth, bool IsScatterVectorizeUserTE = UserTreeIdx.UserTE && UserTreeIdx.UserTE->State == TreeEntry::ScatterVectorize; - bool AreAllSameBlock = S && allSameBlock(VL); + bool AreAllSameBlock = S.valid(); bool AreScatterAllGEPSameBlock = (IsScatterVectorizeUserTE && VL.front()->getType()->isPointerTy() && VL.size() > 2 && @@ -10091,12 +10475,18 @@ BoUpSLP::getScalarsVectorizationLegality(ArrayRef<Value *> VL, unsigned Depth, NotProfitableForVectorization(VL)) { if (!S) { LLVM_DEBUG(dbgs() << "SLP: Try split and if failed, gathering due to " - "C,S,B,O, small shuffle. \n"); + "C,S,B,O, small shuffle. \n"; + dbgs() << "["; + interleaveComma(VL, dbgs(), [&](Value *V) { dbgs() << *V; }); + dbgs() << "]\n"); return ScalarsVectorizationLegality(S, /*IsLegal=*/false, /*TryToFindDuplicates=*/true, /*TrySplitVectorize=*/true); } - LLVM_DEBUG(dbgs() << "SLP: Gathering due to C,S,B,O, small shuffle. \n"); + LLVM_DEBUG(dbgs() << "SLP: Gathering due to C,S,B,O, small shuffle. \n"; + dbgs() << "["; + interleaveComma(VL, dbgs(), [&](Value *V) { dbgs() << *V; }); + dbgs() << "]\n"); return ScalarsVectorizationLegality(S, /*IsLegal=*/false); } @@ -10242,9 +10632,29 @@ void BoUpSLP::buildTreeRec(ArrayRef<Value *> VLRef, unsigned Depth, return true; }; - ScalarsVectorizationLegality Legality = - getScalarsVectorizationLegality(VL, Depth, UserTreeIdx); - const InstructionsState &S = Legality.getInstructionsState(); + auto AreOnlyConstsWithPHIs = [](ArrayRef<Value *> VL) { + bool AreConsts = false; + for (Value *V : VL) { + if (isa<PoisonValue>(V)) + continue; + if (isa<Constant>(V)) { + AreConsts = true; + continue; + } + if (!isa<PHINode>(V)) + return false; + } + return AreConsts; + }; + if (AreOnlyConstsWithPHIs(VL)) { + LLVM_DEBUG(dbgs() << "SLP: Gathering due to all constants and PHIs.\n"); + newGatherTreeEntry(VL, InstructionsState::invalid(), UserTreeIdx); + return; + } + + ScalarsVectorizationLegality Legality = getScalarsVectorizationLegality( + VL, Depth, UserTreeIdx, /*TryCopyableElementsVectorization=*/false); + InstructionsState S = Legality.getInstructionsState(); if (!Legality.isLegal()) { if (Legality.trySplitVectorize()) { auto [MainOp, AltOp] = getMainAltOpsNoStateVL(VL); @@ -10252,11 +10662,18 @@ void BoUpSLP::buildTreeRec(ArrayRef<Value *> VLRef, unsigned Depth, if (MainOp && AltOp && TrySplitNode(InstructionsState(MainOp, AltOp))) return; } - if (Legality.tryToFindDuplicates()) - tryToFindDuplicates(VL, ReuseShuffleIndices, *TTI, *TLI, S, UserTreeIdx); + if (!S) + Legality = getScalarsVectorizationLegality( + VL, Depth, UserTreeIdx, /*TryCopyableElementsVectorization=*/true); + if (!Legality.isLegal()) { + if (Legality.tryToFindDuplicates()) + tryToFindDuplicates(VL, ReuseShuffleIndices, *TTI, *TLI, S, + UserTreeIdx); - newGatherTreeEntry(VL, S, UserTreeIdx, ReuseShuffleIndices); - return; + newGatherTreeEntry(VL, S, UserTreeIdx, ReuseShuffleIndices); + return; + } + S = Legality.getInstructionsState(); } // FIXME: investigate if there are profitable cases for VL.size() <= 4. @@ -13024,7 +13441,8 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals, assert(E->getOpcode() && ((allSameType(VL) && allSameBlock(VL)) || (E->getOpcode() == Instruction::GetElementPtr && - E->getMainOp()->getType()->isPointerTy())) && + E->getMainOp()->getType()->isPointerTy()) || + E->hasCopyableElements()) && "Invalid VL"); Instruction *VL0 = E->getMainOp(); unsigned ShuffleOrOp = @@ -13036,6 +13454,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals, SmallBitVector UsedScalars(Sz, false); for (unsigned I = 0; I < Sz; ++I) { if (isa<Instruction>(UniqueValues[I]) && + !E->isCopyableElement(UniqueValues[I]) && getTreeEntries(UniqueValues[I]).front() == E) continue; UsedScalars.set(I); @@ -14075,15 +14494,45 @@ bool BoUpSLP::isTreeTinyAndNotFullyVectorizable(bool ForReduction) const { // If the tree contains only phis, buildvectors, split nodes and // small nodes with reuses, we can skip it. + SmallVector<const TreeEntry *> StoreLoadNodes; + unsigned NumGathers = 0; + constexpr int LimitTreeSize = 36; if (!ForReduction && !SLPCostThreshold.getNumOccurrences() && - all_of(VectorizableTree, [](const std::unique_ptr<TreeEntry> &TE) { - return TE->State == TreeEntry::SplitVectorize || - (TE->isGather() && - none_of(TE->Scalars, IsaPred<ExtractElementInst>)) || - (TE->hasState() && (TE->getOpcode() == Instruction::PHI || - (!TE->ReuseShuffleIndices.empty() && - TE->Scalars.size() == 2))); - })) + all_of(VectorizableTree, + [&](const std::unique_ptr<TreeEntry> &TE) { + if (!TE->isGather() && TE->hasState() && + (TE->getOpcode() == Instruction::Load || + TE->getOpcode() == Instruction::Store)) { + StoreLoadNodes.push_back(TE.get()); + return true; + } + if (TE->isGather()) + ++NumGathers; + return TE->State == TreeEntry::SplitVectorize || + (TE->Idx == 0 && TE->Scalars.size() == 2 && + TE->hasState() && TE->getOpcode() == Instruction::ICmp && + VectorizableTree.size() > LimitTreeSize) || + (TE->isGather() && + none_of(TE->Scalars, IsaPred<ExtractElementInst>)) || + (TE->hasState() && + (TE->getOpcode() == Instruction::PHI || + (TE->hasCopyableElements() && + static_cast<unsigned>(count_if( + TE->Scalars, IsaPred<PHINode, Constant>)) >= + TE->Scalars.size() / 2) || + ((!TE->ReuseShuffleIndices.empty() || + !TE->ReorderIndices.empty() || TE->isAltShuffle()) && + TE->Scalars.size() == 2))); + }) && + (StoreLoadNodes.empty() || + (VectorizableTree.size() > LimitTreeSize * StoreLoadNodes.size() && + (NumGathers > 0 || none_of(StoreLoadNodes, [&](const TreeEntry *TE) { + return TE->getOpcode() == Instruction::Store || + all_of(TE->Scalars, [&](Value *V) { + return !isa<LoadInst>(V) || + areAllUsersVectorized(cast<Instruction>(V)); + }); + }))))) return true; // We can vectorize the tree if its size is greater than or equal to the @@ -14826,6 +15275,7 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals, bool IsProfitablePHIUser = (KeepScalar || (ScalarCost - ExtraCost <= TTI::TCC_Basic && VectorizableTree.front()->Scalars.size() > 2)) && + VectorizableTree.front()->hasState() && VectorizableTree.front()->getOpcode() == Instruction::PHI && !Inst->hasNUsesOrMore(UsesLimit) && none_of(Inst->users(), @@ -15276,7 +15726,8 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry( const BasicBlock *TEInsertBlock = nullptr; // Main node of PHI entries keeps the correct order of operands/incoming // blocks. - if (auto *PHI = dyn_cast<PHINode>(TEUseEI.UserTE->getMainOp()); + if (auto *PHI = dyn_cast_or_null<PHINode>( + TEUseEI.UserTE->hasState() ? TEUseEI.UserTE->getMainOp() : nullptr); PHI && TEUseEI.UserTE->State != TreeEntry::SplitVectorize) { TEInsertBlock = PHI->getIncomingBlock(TEUseEI.EdgeIdx); TEInsertPt = TEInsertBlock->getTerminator(); @@ -15375,7 +15826,8 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry( "Expected only single user of a gather node."); const EdgeInfo &UseEI = TEPtr->UserTreeIndex; - PHINode *UserPHI = UseEI.UserTE->State != TreeEntry::SplitVectorize + PHINode *UserPHI = (UseEI.UserTE->State != TreeEntry::SplitVectorize && + UseEI.UserTE->hasState()) ? dyn_cast<PHINode>(UseEI.UserTE->getMainOp()) : nullptr; Instruction *InsertPt = @@ -15388,7 +15840,8 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry( TEUseEI.UserTE->isAltShuffle()) && all_of(TEUseEI.UserTE->Scalars, isUsedOutsideBlock)) { if (UseEI.UserTE->State != TreeEntry::Vectorize || - (UseEI.UserTE->getOpcode() == Instruction::PHI && + (UseEI.UserTE->hasState() && + UseEI.UserTE->getOpcode() == Instruction::PHI && !UseEI.UserTE->isAltShuffle()) || !all_of(UseEI.UserTE->Scalars, isUsedOutsideBlock)) continue; @@ -16009,25 +16462,32 @@ Instruction &BoUpSLP::getLastInstructionInBundle(const TreeEntry *E) { Instruction *Res = nullptr; // Get the basic block this bundle is in. All instructions in the bundle // should be in this block (except for extractelement-like instructions with - // constant indices or gathered loads). - auto *Front = E->getMainOp(); + // constant indices or gathered loads or copyables). + Instruction *Front; + unsigned Opcode; + if (E->hasState()) { + Front = E->getMainOp(); + Opcode = E->getOpcode(); + } else { + Front = cast<Instruction>(*find_if(E->Scalars, IsaPred<Instruction>)); + Opcode = Front->getOpcode(); + } auto *BB = Front->getParent(); - assert(((GatheredLoadsEntriesFirst.has_value() && - E->getOpcode() == Instruction::Load && E->isGather() && - E->Idx < *GatheredLoadsEntriesFirst) || - E->State == TreeEntry::SplitVectorize || - all_of(E->Scalars, - [=](Value *V) -> bool { - if (E->getOpcode() == Instruction::GetElementPtr && - !isa<GetElementPtrInst>(V)) - return true; - auto *I = dyn_cast<Instruction>(V); - return !I || !E->getMatchingMainOpOrAltOp(I) || - I->getParent() == BB || - isVectorLikeInstWithConstOps(I); - })) && - "Expected gathered loads or GEPs or instructions from same basic " - "block."); + assert( + ((GatheredLoadsEntriesFirst.has_value() && Opcode == Instruction::Load && + E->isGather() && E->Idx < *GatheredLoadsEntriesFirst) || + E->State == TreeEntry::SplitVectorize || E->hasCopyableElements() || + all_of(E->Scalars, + [=](Value *V) -> bool { + if (Opcode == Instruction::GetElementPtr && + !isa<GetElementPtrInst>(V)) + return true; + auto *I = dyn_cast<Instruction>(V); + return !I || !E->getMatchingMainOpOrAltOp(I) || + I->getParent() == BB || isVectorLikeInstWithConstOps(I); + })) && + "Expected gathered loads or GEPs or instructions from same basic " + "block."); auto FindLastInst = [&]() { Instruction *LastInst = Front; @@ -16035,18 +16495,20 @@ Instruction &BoUpSLP::getLastInstructionInBundle(const TreeEntry *E) { auto *I = dyn_cast<Instruction>(V); if (!I) continue; + if (E->isCopyableElement(I)) + continue; if (LastInst->getParent() == I->getParent()) { if (LastInst->comesBefore(I)) LastInst = I; continue; } - assert(((E->getOpcode() == Instruction::GetElementPtr && + assert(((Opcode == Instruction::GetElementPtr && !isa<GetElementPtrInst>(I)) || E->State == TreeEntry::SplitVectorize || (isVectorLikeInstWithConstOps(LastInst) && isVectorLikeInstWithConstOps(I)) || (GatheredLoadsEntriesFirst.has_value() && - E->getOpcode() == Instruction::Load && E->isGather() && + Opcode == Instruction::Load && E->isGather() && E->Idx < *GatheredLoadsEntriesFirst)) && "Expected vector-like or non-GEP in GEP node insts only."); if (!DT->isReachableFromEntry(LastInst->getParent())) { @@ -16075,16 +16537,18 @@ Instruction &BoUpSLP::getLastInstructionInBundle(const TreeEntry *E) { auto *I = dyn_cast<Instruction>(V); if (!I) continue; + if (E->isCopyableElement(I)) + continue; if (FirstInst->getParent() == I->getParent()) { if (I->comesBefore(FirstInst)) FirstInst = I; continue; } - assert(((E->getOpcode() == Instruction::GetElementPtr && - !isa<GetElementPtrInst>(I)) || - (isVectorLikeInstWithConstOps(FirstInst) && - isVectorLikeInstWithConstOps(I))) && - "Expected vector-like or non-GEP in GEP node insts only."); + assert(((Opcode == Instruction::GetElementPtr && + !isa<GetElementPtrInst>(I)) || + (isVectorLikeInstWithConstOps(FirstInst) && + isVectorLikeInstWithConstOps(I))) && + "Expected vector-like or non-GEP in GEP node insts only."); if (!DT->isReachableFromEntry(FirstInst->getParent())) { FirstInst = I; continue; @@ -16122,7 +16586,7 @@ Instruction &BoUpSLP::getLastInstructionInBundle(const TreeEntry *E) { // Set insertpoint for gathered loads to the very first load. if (GatheredLoadsEntriesFirst.has_value() && E->Idx >= *GatheredLoadsEntriesFirst && !E->isGather() && - E->getOpcode() == Instruction::Load) { + Opcode == Instruction::Load) { Res = FindFirstInst(); EntryToLastInstruction.try_emplace(E, Res); return *Res; @@ -16139,7 +16603,8 @@ Instruction &BoUpSLP::getLastInstructionInBundle(const TreeEntry *E) { return nullptr; for (Value *V : E->Scalars) { auto *I = dyn_cast<Instruction>(V); - if (!I || isa<PHINode>(I) || doesNotNeedToBeScheduled(I)) + if (!I || isa<PHINode>(I) || + (!E->isCopyableElement(I) && doesNotNeedToBeScheduled(I))) continue; ArrayRef<ScheduleBundle *> Bundles = It->second->getScheduleBundles(I); if (Bundles.empty()) @@ -16153,13 +16618,13 @@ Instruction &BoUpSLP::getLastInstructionInBundle(const TreeEntry *E) { }; const ScheduleBundle *Bundle = FindScheduleBundle(E); if (!E->isGather() && !Bundle) { - if ((E->getOpcode() == Instruction::GetElementPtr && + if ((Opcode == Instruction::GetElementPtr && any_of(E->Scalars, [](Value *V) { return !isa<GetElementPtrInst>(V) && isa<Instruction>(V); })) || - all_of(E->Scalars, [](Value *V) { - return isa<PoisonValue>(V) || + all_of(E->Scalars, [&](Value *V) { + return isa<PoisonValue>(V) || E->isCopyableElement(V) || (!isVectorLikeInstWithConstOps(V) && isUsedOutsideBlock(V)); })) Res = FindLastInst(); @@ -18640,6 +19105,7 @@ Value *BoUpSLP::vectorizeTree( TE->UserTreeIndex.UserTE->State == TreeEntry::Vectorize && (TE->UserTreeIndex.UserTE->getOpcode() != Instruction::PHI || TE->UserTreeIndex.UserTE->isAltShuffle()) && + !TE->UserTreeIndex.UserTE->hasCopyableElements() && all_of(TE->UserTreeIndex.UserTE->Scalars, [](Value *V) { return isUsedOutsideBlock(V); })) { Instruction &LastInst = @@ -18903,7 +19369,7 @@ Value *BoUpSLP::vectorizeTree( continue; assert( (ExternallyUsedValues.count(Scalar) || - Scalar->hasNUsesOrMore(UsesLimit) || + ExternalUsesWithNonUsers.count(Scalar) || ExternalUsesAsOriginalScalar.contains(Scalar) || any_of( Scalar->users(), @@ -19182,7 +19648,7 @@ Value *BoUpSLP::vectorizeTree( if (auto *EE = dyn_cast<ExtractElementInst>(Scalar); EE && IgnoredExtracts.contains(EE)) continue; - if (isa<PoisonValue>(Scalar)) + if (!isa<Instruction>(Scalar) || Entry->isCopyableElement(Scalar)) continue; #ifndef NDEBUG Type *Ty = Scalar->getType(); @@ -19424,12 +19890,15 @@ void BoUpSLP::optimizeGatherSequence() { } BoUpSLP::ScheduleBundle & -BoUpSLP::BlockScheduling::buildBundle(ArrayRef<Value *> VL) { +BoUpSLP::BlockScheduling::buildBundle(ArrayRef<Value *> VL, + const InstructionsState &S) { auto &BundlePtr = ScheduledBundlesList.emplace_back(std::make_unique<ScheduleBundle>()); for (Value *V : VL) { if (doesNotNeedToBeScheduled(V)) continue; + if (S.isCopyableElement(V)) + continue; ScheduleData *BundleMember = getScheduleData(V); assert(BundleMember && "no ScheduleData for bundle member " "(maybe not in same basic block)"); @@ -19450,10 +19919,19 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP, const InstructionsState &S) { // No need to schedule PHIs, insertelement, extractelement and extractvalue // instructions. + bool HasCopyables = S.areInstructionsWithCopyableElements(); if (isa<PHINode>(S.getMainOp()) || - isVectorLikeInstWithConstOps(S.getMainOp()) || doesNotNeedToSchedule(VL)) + isVectorLikeInstWithConstOps(S.getMainOp()) || + (!HasCopyables && doesNotNeedToSchedule(VL)) || + all_of(VL, [&](Value *V) { return S.isNonSchedulable(V); })) return nullptr; + // TODO Remove once full support for copyables is landed. + assert(all_of(VL, + [&](Value *V) { + return !S.isCopyableElement(V) || S.isNonSchedulable(V); + }) && + "Copyable elements should not be schedulable"); // Initialize the instruction bundle. Instruction *OldScheduleEnd = ScheduleEnd; LLVM_DEBUG(dbgs() << "SLP: bundle: " << *S.getMainOp() << "\n"); @@ -19499,7 +19977,7 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP, // Make sure that the scheduling region contains all // instructions of the bundle. for (Value *V : VL) { - if (doesNotNeedToBeScheduled(V)) + if (doesNotNeedToBeScheduled(V) || S.isCopyableElement(V)) continue; if (!extendSchedulingRegion(V, S)) { // If the scheduling region got new instructions at the lower end (or it @@ -19516,7 +19994,7 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP, bool ReSchedule = false; for (Value *V : VL) { - if (doesNotNeedToBeScheduled(V)) + if (doesNotNeedToBeScheduled(V) || S.isCopyableElement(V)) continue; ScheduleData *BundleMember = getScheduleData(V); assert(BundleMember && @@ -19541,7 +20019,7 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP, ReSchedule = true; } - ScheduleBundle &Bundle = buildBundle(VL); + ScheduleBundle &Bundle = buildBundle(VL, S); TryScheduleBundleImpl(ReSchedule, Bundle); if (!Bundle.isReady()) { for (ScheduleData *BD : Bundle.getBundle()) { @@ -19558,7 +20036,7 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP, } ScheduledBundlesList.pop_back(); for (Value *V : VL) { - if (doesNotNeedToBeScheduled(V)) + if (doesNotNeedToBeScheduled(V) || S.isCopyableElement(V)) continue; ScheduledBundles.find(cast<Instruction>(V))->getSecond().pop_back(); } @@ -20187,7 +20665,7 @@ bool BoUpSLP::collectValuesToDemote( }; if (E.isGather() || !Visited.insert(&E).second || any_of(E.Scalars, [&](Value *V) { - return !isa<PoisonValue>(V) && all_of(V->users(), [&](User *U) { + return !isa<Constant>(V) && all_of(V->users(), [&](User *U) { return isa<InsertElementInst>(U) && !isVectorized(U); }); })) @@ -20555,9 +21033,10 @@ void BoUpSLP::computeMinimumValueSizes() { if (!isa<CastInst, BinaryOperator, FreezeInst, PHINode, SelectInst>(U) || isa<SIToFPInst, UIToFPInst>(U) || - !isa<CastInst, BinaryOperator, FreezeInst, PHINode, - SelectInst>(UserTE->getMainOp()) || - isa<SIToFPInst, UIToFPInst>(UserTE->getMainOp())) + (UserTE->hasState() && + (!isa<CastInst, BinaryOperator, FreezeInst, PHINode, + SelectInst>(UserTE->getMainOp()) || + isa<SIToFPInst, UIToFPInst>(UserTE->getMainOp())))) return true; unsigned UserTESz = DL->getTypeSizeInBits( UserTE->Scalars.front()->getType()); @@ -20653,7 +21132,12 @@ void BoUpSLP::computeMinimumValueSizes() { if (!IsKnownPositive) ++BitWidth1; - APInt Mask = DB->getDemandedBits(cast<Instruction>(Root)); + auto *I = dyn_cast<Instruction>(Root); + if (!I) { + MaxBitWidth = std::max(BitWidth1, MaxBitWidth); + continue; + } + APInt Mask = DB->getDemandedBits(I); unsigned BitWidth2 = Mask.getBitWidth() - Mask.countl_zero(); MaxBitWidth = std::max<unsigned>(std::min(BitWidth1, BitWidth2), MaxBitWidth); @@ -20802,6 +21286,7 @@ void BoUpSLP::computeMinimumValueSizes() { NodeIdx < VectorizableTree.size() && VectorizableTree[NodeIdx]->UserTreeIndex && VectorizableTree[NodeIdx]->UserTreeIndex.EdgeIdx == 0 && + VectorizableTree[NodeIdx]->UserTreeIndex.UserTE->hasState() && VectorizableTree[NodeIdx]->UserTreeIndex.UserTE->getOpcode() == Instruction::Trunc && !VectorizableTree[NodeIdx]->UserTreeIndex.UserTE->isAltShuffle(); @@ -20982,7 +21467,9 @@ SLPVectorizerPass::vectorizeStoreChain(ArrayRef<Value *> Chain, BoUpSLP &R, for (Value *V : Chain) ValOps.insert(cast<StoreInst>(V)->getValueOperand()); // Operands are not same/alt opcodes or non-power-of-2 uniques - exit. - InstructionsState S = getSameOpcode(ValOps.getArrayRef(), *TLI); + InstructionsCompatibilityAnalysis Analysis(*DT, *DL, *TTI, *TLI); + InstructionsState S = Analysis.buildInstructionsState( + ValOps.getArrayRef(), R, /*TryCopyableElementsVectorization=*/true); if (all_of(ValOps, IsaPred<Instruction>) && ValOps.size() > 1) { DenseSet<Value *> Stores(Chain.begin(), Chain.end()); bool IsAllowedSize = diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index 40a5565..25b9616 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -962,7 +962,11 @@ void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV, BackedgeTakenCount->setUnderlyingValue(TCMO); } - VectorTripCount.setUnderlyingValue(VectorTripCountV); + if (!VectorTripCount.getUnderlyingValue()) + VectorTripCount.setUnderlyingValue(VectorTripCountV); + else + assert(VectorTripCount.getUnderlyingValue() == VectorTripCountV && + "VectorTripCount set earlier must much VectorTripCountV"); IRBuilder<> Builder(State.CFG.PrevBB->getTerminator()); // FIXME: Model VF * UF computation completely in VPlan. diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 6655149..a5de593 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1012,6 +1012,10 @@ public: ReductionStartVector, // Creates a step vector starting from 0 to VF with a step of 1. StepVector, + /// Extracts a single lane (first operand) from a set of vector operands. + /// The lane specifies an index into a vector formed by combining all vector + /// operands (all operands after the first one). + ExtractLane, }; @@ -1837,6 +1841,10 @@ public: getGEPNoWrapFlags(), getDebugLoc()); } + /// Return true if this VPVectorPointerRecipe corresponds to part 0. Note that + /// this is only accurate after the VPlan has been unrolled. + bool isFirstPart() const { return getUnrollPart(*this) == 0; } + /// Return the cost of this VPHeaderPHIRecipe. InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override { @@ -2304,14 +2312,15 @@ public: /// respective masks, ordered [I0, M0, I1, M1, I2, M2, ...]. Note that M0 can /// be omitted (implied by passing an odd number of operands) in which case /// all other incoming values are merged into it. - VPBlendRecipe(PHINode *Phi, ArrayRef<VPValue *> Operands) - : VPSingleDefRecipe(VPDef::VPBlendSC, Operands, Phi, Phi->getDebugLoc()) { + VPBlendRecipe(PHINode *Phi, ArrayRef<VPValue *> Operands, DebugLoc DL) + : VPSingleDefRecipe(VPDef::VPBlendSC, Operands, Phi, DL) { assert(Operands.size() > 0 && "Expected at least one operand!"); } VPBlendRecipe *clone() override { SmallVector<VPValue *> Ops(operands()); - return new VPBlendRecipe(cast<PHINode>(getUnderlyingValue()), Ops); + return new VPBlendRecipe(cast_or_null<PHINode>(getUnderlyingValue()), Ops, + getDebugLoc()); } VP_CLASSOF_IMPL(VPDef::VPBlendSC) @@ -4056,6 +4065,10 @@ public: /// Returns VF * UF of the vector loop region. VPValue &getVFxUF() { return VFxUF; } + LLVMContext &getContext() const { + return getScalarHeader()->getIRBasicBlock()->getContext(); + } + void addVF(ElementCount VF) { VFs.insert(VF); } void setVF(ElementCount VF) { diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp index 3499e65..16072f2 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp @@ -110,6 +110,8 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) { case VPInstruction::BuildStructVector: case VPInstruction::BuildVector: return SetResultTyFromOp(); + case VPInstruction::ExtractLane: + return inferScalarType(R->getOperand(1)); case VPInstruction::FirstActiveLane: return Type::getIntNTy(Ctx, 64); case VPInstruction::ExtractLastElement: diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp index 194874a..6c1f53b 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp @@ -437,9 +437,12 @@ static void addCanonicalIVRecipes(VPlan &Plan, VPBasicBlock *HeaderVPBB, // We are about to replace the branch to exit the region. Remove the original // BranchOnCond, if there is any. + DebugLoc LatchDL = DL; if (!LatchVPBB->empty() && - match(&LatchVPBB->back(), m_BranchOnCond(m_VPValue()))) + match(&LatchVPBB->back(), m_BranchOnCond(m_VPValue()))) { + LatchDL = LatchVPBB->getTerminator()->getDebugLoc(); LatchVPBB->getTerminator()->eraseFromParent(); + } VPBuilder Builder(LatchVPBB); // Add a VPInstruction to increment the scalar canonical IV by VF * UF. @@ -452,7 +455,8 @@ static void addCanonicalIVRecipes(VPlan &Plan, VPBasicBlock *HeaderVPBB, // Add the BranchOnCount VPInstruction to the latch. Builder.createNaryOp(VPInstruction::BranchOnCount, - {CanonicalIVIncrement, &Plan.getVectorTripCount()}, DL); + {CanonicalIVIncrement, &Plan.getVectorTripCount()}, + LatchDL); } void VPlanTransforms::prepareForVectorization( @@ -462,28 +466,27 @@ void VPlanTransforms::prepareForVectorization( VPDominatorTree VPDT; VPDT.recalculate(Plan); - VPBlockBase *HeaderVPB = Plan.getEntry()->getSingleSuccessor(); - canonicalHeaderAndLatch(HeaderVPB, VPDT); - VPBlockBase *LatchVPB = HeaderVPB->getPredecessors()[1]; + auto *HeaderVPBB = cast<VPBasicBlock>(Plan.getEntry()->getSingleSuccessor()); + canonicalHeaderAndLatch(HeaderVPBB, VPDT); + auto *LatchVPBB = cast<VPBasicBlock>(HeaderVPBB->getPredecessors()[1]); VPBasicBlock *VecPreheader = Plan.createVPBasicBlock("vector.ph"); VPBlockUtils::insertBlockAfter(VecPreheader, Plan.getEntry()); VPBasicBlock *MiddleVPBB = Plan.createVPBasicBlock("middle.block"); - // The canonical LatchVPB has the header block as last successor. If it has + // The canonical LatchVPBB has the header block as last successor. If it has // another successor, this successor is an exit block - insert middle block on // its edge. Otherwise, add middle block as another successor retaining header // as last. - if (LatchVPB->getNumSuccessors() == 2) { - VPBlockBase *LatchExitVPB = LatchVPB->getSuccessors()[0]; - VPBlockUtils::insertOnEdge(LatchVPB, LatchExitVPB, MiddleVPBB); + if (LatchVPBB->getNumSuccessors() == 2) { + VPBlockBase *LatchExitVPB = LatchVPBB->getSuccessors()[0]; + VPBlockUtils::insertOnEdge(LatchVPBB, LatchExitVPB, MiddleVPBB); } else { - VPBlockUtils::connectBlocks(LatchVPB, MiddleVPBB); - LatchVPB->swapSuccessors(); + VPBlockUtils::connectBlocks(LatchVPBB, MiddleVPBB); + LatchVPBB->swapSuccessors(); } - addCanonicalIVRecipes(Plan, cast<VPBasicBlock>(HeaderVPB), - cast<VPBasicBlock>(LatchVPB), InductionTy, IVDL); + addCanonicalIVRecipes(Plan, HeaderVPBB, LatchVPBB, InductionTy, IVDL); [[maybe_unused]] bool HandledUncountableEarlyExit = false; // Disconnect all early exits from the loop leaving it with a single exit from @@ -499,8 +502,7 @@ void VPlanTransforms::prepareForVectorization( assert(!HandledUncountableEarlyExit && "can handle exactly one uncountable early exit"); handleUncountableEarlyExit(cast<VPBasicBlock>(Pred), EB, Plan, - cast<VPBasicBlock>(HeaderVPB), - cast<VPBasicBlock>(LatchVPB), Range); + HeaderVPBB, LatchVPBB, Range); HandledUncountableEarlyExit = true; } else { for (VPRecipeBase &R : EB->phis()) @@ -564,15 +566,15 @@ void VPlanTransforms::prepareForVectorization( // the corresponding compare because they may have ended up with different // line numbers and we want to avoid awkward line stepping while debugging. // E.g., if the compare has got a line number inside the loop. - DebugLoc LatchDL = TheLoop->getLoopLatch()->getTerminator()->getDebugLoc(); + DebugLoc LatchDL = LatchVPBB->getTerminator()->getDebugLoc(); VPBuilder Builder(MiddleVPBB); VPValue *Cmp; if (!RequiresScalarEpilogueCheck) - Cmp = Plan.getOrAddLiveIn(ConstantInt::getFalse( - IntegerType::getInt1Ty(TripCount->getType()->getContext()))); + Cmp = Plan.getOrAddLiveIn( + ConstantInt::getFalse(IntegerType::getInt1Ty(Plan.getContext()))); else if (TailFolded) - Cmp = Plan.getOrAddLiveIn(ConstantInt::getTrue( - IntegerType::getInt1Ty(TripCount->getType()->getContext()))); + Cmp = Plan.getOrAddLiveIn( + ConstantInt::getTrue(IntegerType::getInt1Ty(Plan.getContext()))); else Cmp = Builder.createICmp(CmpInst::ICMP_EQ, Plan.getTripCount(), &Plan.getVectorTripCount(), LatchDL, "cmp.n"); @@ -646,7 +648,7 @@ void VPlanTransforms::attachCheckBlock(VPlan &Plan, Value *Cond, .createNaryOp(VPInstruction::BranchOnCond, {CondVPV}, Plan.getCanonicalIV()->getDebugLoc()); if (AddBranchWeights) { - MDBuilder MDB(Plan.getScalarHeader()->getIRBasicBlock()->getContext()); + MDBuilder MDB(Plan.getContext()); MDNode *BranchWeights = MDB.createBranchWeights(CheckBypassWeights, /*IsExpected=*/false); Term->addMetadata(LLVMContext::MD_prof, BranchWeights); diff --git a/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp b/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp index fc8458c..3b3bbc3 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp @@ -251,8 +251,9 @@ void VPPredicator::convertPhisToBlends(VPBasicBlock *VPBB) { } OperandsWithMask.push_back(EdgeMask); } - PHINode *IRPhi = cast<PHINode>(PhiR->getUnderlyingValue()); - auto *Blend = new VPBlendRecipe(IRPhi, OperandsWithMask); + PHINode *IRPhi = cast_or_null<PHINode>(PhiR->getUnderlyingValue()); + auto *Blend = + new VPBlendRecipe(IRPhi, OperandsWithMask, PhiR->getDebugLoc()); Builder.insert(Blend); PhiR->replaceAllUsesWith(Blend); PhiR->eraseFromParent(); diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 0d6152b..225658b 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -427,6 +427,7 @@ unsigned VPUnrollPartAccessor<PartOpIdx>::getUnrollPart(const VPUser &U) const { } namespace llvm { +template class VPUnrollPartAccessor<1>; template class VPUnrollPartAccessor<2>; template class VPUnrollPartAccessor<3>; } @@ -863,6 +864,31 @@ Value *VPInstruction::generate(VPTransformState &State) { Res = Builder.CreateOr(Res, State.get(Op)); return State.VF.isScalar() ? Res : Builder.CreateOrReduce(Res); } + case VPInstruction::ExtractLane: { + Value *LaneToExtract = State.get(getOperand(0), true); + Type *IdxTy = State.TypeAnalysis.inferScalarType(getOperand(0)); + Value *Res = nullptr; + Value *RuntimeVF = getRuntimeVF(State.Builder, IdxTy, State.VF); + + for (unsigned Idx = 1; Idx != getNumOperands(); ++Idx) { + Value *VectorStart = + Builder.CreateMul(RuntimeVF, ConstantInt::get(IdxTy, Idx - 1)); + Value *VectorIdx = Idx == 1 + ? LaneToExtract + : Builder.CreateSub(LaneToExtract, VectorStart); + Value *Ext = State.VF.isScalar() + ? State.get(getOperand(Idx)) + : Builder.CreateExtractElement( + State.get(getOperand(Idx)), VectorIdx); + if (Res) { + Value *Cmp = Builder.CreateICmpUGE(LaneToExtract, VectorStart); + Res = Builder.CreateSelect(Cmp, Ext, Res); + } else { + Res = Ext; + } + } + return Res; + } case VPInstruction::FirstActiveLane: { if (getNumOperands() == 1) { Value *Mask = State.get(getOperand(0)); @@ -921,7 +947,8 @@ InstructionCost VPInstruction::computeCost(ElementCount VF, } switch (getOpcode()) { - case Instruction::ExtractElement: { + case Instruction::ExtractElement: + case VPInstruction::ExtractLane: { // Add on the cost of extracting the element. auto *VecTy = toVectorTy(Ctx.Types.inferScalarType(getOperand(0)), VF); return Ctx.TTI.getVectorInstrCost(Instruction::ExtractElement, VecTy, @@ -983,6 +1010,7 @@ bool VPInstruction::isVectorToScalar() const { return getOpcode() == VPInstruction::ExtractLastElement || getOpcode() == VPInstruction::ExtractPenultimateElement || getOpcode() == Instruction::ExtractElement || + getOpcode() == VPInstruction::ExtractLane || getOpcode() == VPInstruction::FirstActiveLane || getOpcode() == VPInstruction::ComputeAnyOfResult || getOpcode() == VPInstruction::ComputeFindIVResult || @@ -1048,6 +1076,7 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const { case VPInstruction::BuildVector: case VPInstruction::CalculateTripCountMinusVF: case VPInstruction::CanonicalIVIncrementForPart: + case VPInstruction::ExtractLane: case VPInstruction::ExtractLastElement: case VPInstruction::ExtractPenultimateElement: case VPInstruction::FirstActiveLane: @@ -1097,6 +1126,8 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const { case VPInstruction::ComputeAnyOfResult: case VPInstruction::ComputeFindIVResult: return Op == getOperand(1); + case VPInstruction::ExtractLane: + return Op == getOperand(0); }; llvm_unreachable("switch should return"); } @@ -1176,6 +1207,9 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent, case VPInstruction::BuildVector: O << "buildvector"; break; + case VPInstruction::ExtractLane: + O << "extract-lane"; + break; case VPInstruction::ExtractLastElement: O << "extract-last-element"; break; diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 3372bcc..935a4e4 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -774,10 +774,10 @@ static VPValue *optimizeEarlyExitInductionUser(VPlan &Plan, using namespace VPlanPatternMatch; VPValue *Incoming, *Mask; - if (!match(Op, m_VPInstruction<Instruction::ExtractElement>( - m_VPValue(Incoming), + if (!match(Op, m_VPInstruction<VPInstruction::ExtractLane>( m_VPInstruction<VPInstruction::FirstActiveLane>( - m_VPValue(Mask))))) + m_VPValue(Mask)), + m_VPValue(Incoming)))) return nullptr; auto *WideIV = getOptimizableIVOf(Incoming); @@ -1172,6 +1172,14 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) { if (!Plan->isUnrolled()) return; + // VPVectorPointer for part 0 can be replaced by their start pointer. + if (auto *VecPtr = dyn_cast<VPVectorPointerRecipe>(&R)) { + if (VecPtr->isFirstPart()) { + VecPtr->replaceAllUsesWith(VecPtr->getOperand(0)); + return; + } + } + // VPScalarIVSteps for part 0 can be replaced by their start value, if only // the first lane is demanded. if (auto *Steps = dyn_cast<VPScalarIVStepsRecipe>(Def)) { @@ -1307,8 +1315,9 @@ static void simplifyBlends(VPlan &Plan) { OperandsWithMask.push_back(Blend->getMask(I)); } - auto *NewBlend = new VPBlendRecipe( - cast<PHINode>(Blend->getUnderlyingValue()), OperandsWithMask); + auto *NewBlend = + new VPBlendRecipe(cast_or_null<PHINode>(Blend->getUnderlyingValue()), + OperandsWithMask, Blend->getDebugLoc()); NewBlend->insertBefore(&R); VPValue *DeadMask = Blend->getMask(StartIndex); @@ -1361,7 +1370,7 @@ static bool optimizeVectorInductionWidthForTCAndVFUF(VPlan &Plan, unsigned NewBitWidth = ComputeBitWidth(TC->getValue(), BestVF.getKnownMinValue() * BestUF); - LLVMContext &Ctx = Plan.getCanonicalIV()->getScalarType()->getContext(); + LLVMContext &Ctx = Plan.getContext(); auto *NewIVTy = IntegerType::get(Ctx, NewBitWidth); bool MadeChange = false; @@ -1883,9 +1892,7 @@ void VPlanTransforms::truncateToMinimalBitwidths( } } -/// Remove BranchOnCond recipes with true or false conditions together with -/// removing dead edges to their successors. -static void removeBranchOnConst(VPlan &Plan) { +void VPlanTransforms::removeBranchOnConst(VPlan &Plan) { using namespace llvm::VPlanPatternMatch; for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>( vp_depth_first_shallow(Plan.getEntry()))) { @@ -1908,12 +1915,9 @@ static void removeBranchOnConst(VPlan &Plan) { "There must be a single edge between VPBB and its successor"); // Values coming from VPBB into phi recipes of RemoveSucc are removed from // these recipes. - for (VPRecipeBase &R : RemovedSucc->phis()) { - auto *Phi = cast<VPPhiAccessors>(&R); - assert((!isa<VPIRPhi>(&R) || RemovedSucc->getNumPredecessors() == 1) && - "VPIRPhis must have a single predecessor"); - Phi->removeIncomingValueFor(VPBB); - } + for (VPRecipeBase &R : RemovedSucc->phis()) + cast<VPPhiAccessors>(&R)->removeIncomingValueFor(VPBB); + // Disconnect blocks and remove the terminator. RemovedSucc will be deleted // automatically on VPlan destruction if it becomes unreachable. VPBlockUtils::disconnectBlocks(VPBB, RemovedSucc); @@ -2515,8 +2519,8 @@ void VPlanTransforms::createInterleaveGroups( DL.getTypeAllocSize(getLoadStoreType(IRInsertPos)) * IG->getIndex(IRInsertPos), /*IsSigned=*/true); - VPValue *OffsetVPV = Plan.getOrAddLiveIn( - ConstantInt::get(IRInsertPos->getParent()->getContext(), -Offset)); + VPValue *OffsetVPV = + Plan.getOrAddLiveIn(ConstantInt::get(Plan.getContext(), -Offset)); VPBuilder B(InsertPos); Addr = InBounds ? B.createInBoundsPtrAdd(InsertPos->getAddr(), OffsetVPV) : B.createPtrAdd(InsertPos->getAddr(), OffsetVPV); @@ -2842,7 +2846,7 @@ void VPlanTransforms::handleUncountableEarlyExit( VPInstruction::FirstActiveLane, {CondToEarlyExit}, nullptr, "first.active.lane"); IncomingFromEarlyExit = EarlyExitB.createNaryOp( - Instruction::ExtractElement, {IncomingFromEarlyExit, FirstActiveLane}, + VPInstruction::ExtractLane, {FirstActiveLane, IncomingFromEarlyExit}, nullptr, "early.exit.value"); ExitIRI->setOperand(EarlyExitIdx, IncomingFromEarlyExit); } @@ -3093,6 +3097,29 @@ void VPlanTransforms::materializeBroadcasts(VPlan &Plan) { } } +void VPlanTransforms::materializeVectorTripCount( + VPlan &Plan, ElementCount BestVF, unsigned BestUF, + PredicatedScalarEvolution &PSE) { + assert(Plan.hasVF(BestVF) && "BestVF is not available in Plan"); + assert(Plan.hasUF(BestUF) && "BestUF is not available in Plan"); + + VPValue *TC = Plan.getTripCount(); + // Skip cases for which the trip count may be non-trivial to materialize. + if (!Plan.hasScalarTail() || + Plan.getMiddleBlock()->getSingleSuccessor() == + Plan.getScalarPreheader() || + !TC->isLiveIn()) + return; + // Materialize vector trip counts for constants early if it can simply + // be computed as (Original TC / VF * UF) * VF * UF. + ScalarEvolution &SE = *PSE.getSE(); + auto *TCScev = SE.getSCEV(TC->getLiveInIRValue()); + const SCEV *VFxUF = SE.getElementCount(TCScev->getType(), BestVF * BestUF); + auto VecTCScev = SE.getMulExpr(SE.getUDivExpr(TCScev, VFxUF), VFxUF); + if (auto *NewC = dyn_cast<SCEVConstant>(VecTCScev)) + Plan.getVectorTripCount().setUnderlyingValue(NewC->getValue()); +} + /// Returns true if \p V is VPWidenLoadRecipe or VPInterleaveRecipe that can be /// converted to a narrower recipe. \p V is used by a wide recipe that feeds a /// store interleave group at index \p Idx, \p WideMember0 is the recipe feeding @@ -3350,7 +3377,7 @@ void VPlanTransforms::addBranchWeightToMiddleTerminator( if (VF.isScalable() && VScaleForTuning.has_value()) VectorStep *= *VScaleForTuning; assert(VectorStep > 0 && "trip count should not be zero"); - MDBuilder MDB(Plan.getScalarHeader()->getIRBasicBlock()->getContext()); + MDBuilder MDB(Plan.getContext()); MDNode *BranchWeights = MDB.createBranchWeights({1, VectorStep - 1}, /*IsExpected=*/false); MiddleTerm->addMetadata(LLVMContext::MD_prof, BranchWeights); diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h index ab189f6..d5af6cd 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h @@ -224,6 +224,10 @@ struct VPlanTransforms { /// CanonicalIVTy as type for all un-typed live-ins in VPTypeAnalysis. static void simplifyRecipes(VPlan &Plan, Type &CanonicalIVTy); + /// Remove BranchOnCond recipes with true or false conditions together with + /// removing dead edges to their successors. + static void removeBranchOnConst(VPlan &Plan); + /// If there's a single exit block, optimize its phi recipes that use exiting /// IV values by feeding them precomputed end values instead, possibly taken /// one step backwards. @@ -234,6 +238,12 @@ struct VPlanTransforms { /// Add explicit broadcasts for live-ins and VPValues defined in \p Plan's entry block if they are used as vectors. static void materializeBroadcasts(VPlan &Plan); + // Materialize vector trip counts for constants early if it can simply be + // computed as (Original TC / VF * UF) * VF * UF. + static void materializeVectorTripCount(VPlan &Plan, ElementCount BestVF, + unsigned BestUF, + PredicatedScalarEvolution &PSE); + /// Try to convert a plan with interleave groups with VF elements to a plan /// with the interleave groups replaced by wide loads and stores processing VF /// elements, if all transformed interleave groups access the full vector diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp index b89cd21..871e37e 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp @@ -363,6 +363,13 @@ void UnrollState::unrollBlock(VPBlockBase *VPB) { continue; } VPValue *Op0; + if (match(&R, m_VPInstruction<VPInstruction::ExtractLane>( + m_VPValue(Op0), m_VPValue(Op1)))) { + addUniformForAllParts(cast<VPInstruction>(&R)); + for (unsigned Part = 1; Part != UF; ++Part) + R.addOperand(getValueForPart(Op1, Part)); + continue; + } if (match(&R, m_VPInstruction<VPInstruction::ExtractLastElement>( m_VPValue(Op0))) || match(&R, m_VPInstruction<VPInstruction::ExtractPenultimateElement>( |