diff options
Diffstat (limited to 'llvm/lib')
47 files changed, 391 insertions, 262 deletions
diff --git a/llvm/lib/Analysis/DependenceAnalysis.cpp b/llvm/lib/Analysis/DependenceAnalysis.cpp index 256befa..835e270 100644 --- a/llvm/lib/Analysis/DependenceAnalysis.cpp +++ b/llvm/lib/Analysis/DependenceAnalysis.cpp @@ -1074,7 +1074,7 @@ bool DependenceInfo::isKnownPredicate(ICmpInst::Predicate Pred, const SCEV *X, /// Compare to see if S is less than Size, using /// -/// isKnownNegative(S - max(Size, 1)) +/// isKnownNegative(S - Size) /// /// with some extra checking if S is an AddRec and we can prove less-than using /// the loop bounds. @@ -1090,21 +1090,34 @@ bool DependenceInfo::isKnownLessThan(const SCEV *S, const SCEV *Size) const { Size = SE->getTruncateOrZeroExtend(Size, MaxType); // Special check for addrecs using BE taken count - const SCEV *Bound = SE->getMinusSCEV(S, Size); - if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Bound)) { - if (AddRec->isAffine()) { + if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) + if (AddRec->isAffine() && AddRec->hasNoSignedWrap()) { const SCEV *BECount = SE->getBackedgeTakenCount(AddRec->getLoop()); - if (!isa<SCEVCouldNotCompute>(BECount)) { - const SCEV *Limit = AddRec->evaluateAtIteration(BECount, *SE); - if (SE->isKnownNegative(Limit)) - return true; - } + const SCEV *Start = AddRec->getStart(); + const SCEV *Step = AddRec->getStepRecurrence(*SE); + const SCEV *End = AddRec->evaluateAtIteration(BECount, *SE); + const SCEV *Diff0 = SE->getMinusSCEV(Start, Size); + const SCEV *Diff1 = SE->getMinusSCEV(End, Size); + + // If the value of Step is non-negative and the AddRec is non-wrap, it + // reaches its maximum at the last iteration. So it's enouth to check + // whether End - Size is negative. + if (SE->isKnownNonNegative(Step) && SE->isKnownNegative(Diff1)) + return true; + + // If the value of Step is non-positive and the AddRec is non-wrap, the + // initial value is its maximum. + if (SE->isKnownNonPositive(Step) && SE->isKnownNegative(Diff0)) + return true; + + // Even if we don't know the sign of Step, either Start or End must be + // the maximum value of the AddRec since it is non-wrap. + if (SE->isKnownNegative(Diff0) && SE->isKnownNegative(Diff1)) + return true; } - } // Check using normal isKnownNegative - const SCEV *LimitedBound = - SE->getMinusSCEV(S, SE->getSMaxExpr(Size, SE->getOne(Size->getType()))); + const SCEV *LimitedBound = SE->getMinusSCEV(S, Size); return SE->isKnownNegative(LimitedBound); } diff --git a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp index 2b0f212..67c2cfa 100644 --- a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -150,6 +150,10 @@ static ModRefInfo GetLocation(const Instruction *Inst, MemoryLocation &Loc, switch (II->getIntrinsicID()) { case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: + Loc = MemoryLocation::getForArgument(II, 0, TLI); + // These intrinsics don't really modify the memory, but returning Mod + // will allow them to be handled conservatively. + return ModRefInfo::Mod; case Intrinsic::invariant_start: Loc = MemoryLocation::getForArgument(II, 1, TLI); // These intrinsics don't really modify the memory, but returning Mod @@ -441,11 +445,7 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom( Intrinsic::ID ID = II->getIntrinsicID(); switch (ID) { case Intrinsic::lifetime_start: { - // FIXME: This only considers queries directly on the invariant-tagged - // pointer, not on query pointers that are indexed off of them. It'd - // be nice to handle that at some point (the right approach is to use - // GetPointerBaseWithConstantOffset). - MemoryLocation ArgLoc = MemoryLocation::getAfter(II->getArgOperand(1)); + MemoryLocation ArgLoc = MemoryLocation::getAfter(II->getArgOperand(0)); if (BatchAA.isMustAlias(ArgLoc, MemLoc)) return MemDepResult::getDef(II); continue; diff --git a/llvm/lib/Analysis/MemoryLocation.cpp b/llvm/lib/Analysis/MemoryLocation.cpp index 28a2640..72b643c 100644 --- a/llvm/lib/Analysis/MemoryLocation.cpp +++ b/llvm/lib/Analysis/MemoryLocation.cpp @@ -191,7 +191,7 @@ MemoryLocation MemoryLocation::getForArgument(const CallBase *Call, case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: { - assert(ArgIdx == 1 && "Invalid argument index"); + assert(ArgIdx == 0 && "Invalid argument index"); auto *AI = dyn_cast<AllocaInst>(Arg); if (!AI) // lifetime of poison value. diff --git a/llvm/lib/Analysis/StackLifetime.cpp b/llvm/lib/Analysis/StackLifetime.cpp index abe4985..1e20fca 100644 --- a/llvm/lib/Analysis/StackLifetime.cpp +++ b/llvm/lib/Analysis/StackLifetime.cpp @@ -70,7 +70,7 @@ void StackLifetime::collectMarkers() { const IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I); if (!II || !II->isLifetimeStartOrEnd()) continue; - const AllocaInst *AI = dyn_cast<AllocaInst>(II->getArgOperand(1)); + const AllocaInst *AI = dyn_cast<AllocaInst>(II->getArgOperand(0)); if (!AI) continue; auto It = AllocaNumbering.find(AI); diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp index 3f3d5dc9..278dd65 100644 --- a/llvm/lib/CodeGen/AtomicExpandPass.cpp +++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp @@ -1915,7 +1915,6 @@ bool AtomicExpandImpl::expandAtomicOpToLibcall( // TODO: the "order" argument type is "int", not int32. So // getInt32Ty may be wrong if the arch uses e.g. 16-bit ints. - ConstantInt *SizeVal64 = ConstantInt::get(Type::getInt64Ty(Ctx), Size); assert(Ordering != AtomicOrdering::NotAtomic && "expect atomic MO"); Constant *OrderingVal = ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering)); @@ -2012,7 +2011,7 @@ bool AtomicExpandImpl::expandAtomicOpToLibcall( if (CASExpected) { AllocaCASExpected = AllocaBuilder.CreateAlloca(CASExpected->getType()); AllocaCASExpected->setAlignment(AllocaAlignment); - Builder.CreateLifetimeStart(AllocaCASExpected, SizeVal64); + Builder.CreateLifetimeStart(AllocaCASExpected); Builder.CreateAlignedStore(CASExpected, AllocaCASExpected, AllocaAlignment); Args.push_back(AllocaCASExpected); } @@ -2026,7 +2025,7 @@ bool AtomicExpandImpl::expandAtomicOpToLibcall( } else { AllocaValue = AllocaBuilder.CreateAlloca(ValueOperand->getType()); AllocaValue->setAlignment(AllocaAlignment); - Builder.CreateLifetimeStart(AllocaValue, SizeVal64); + Builder.CreateLifetimeStart(AllocaValue); Builder.CreateAlignedStore(ValueOperand, AllocaValue, AllocaAlignment); Args.push_back(AllocaValue); } @@ -2036,7 +2035,7 @@ bool AtomicExpandImpl::expandAtomicOpToLibcall( if (!CASExpected && HasResult && !UseSizedLibcall) { AllocaResult = AllocaBuilder.CreateAlloca(I->getType()); AllocaResult->setAlignment(AllocaAlignment); - Builder.CreateLifetimeStart(AllocaResult, SizeVal64); + Builder.CreateLifetimeStart(AllocaResult); Args.push_back(AllocaResult); } @@ -2069,7 +2068,7 @@ bool AtomicExpandImpl::expandAtomicOpToLibcall( // And then, extract the results... if (ValueOperand && !UseSizedLibcall) - Builder.CreateLifetimeEnd(AllocaValue, SizeVal64); + Builder.CreateLifetimeEnd(AllocaValue); if (CASExpected) { // The final result from the CAS is {load of 'expected' alloca, bool result @@ -2078,7 +2077,7 @@ bool AtomicExpandImpl::expandAtomicOpToLibcall( Value *V = PoisonValue::get(FinalResultTy); Value *ExpectedOut = Builder.CreateAlignedLoad( CASExpected->getType(), AllocaCASExpected, AllocaAlignment); - Builder.CreateLifetimeEnd(AllocaCASExpected, SizeVal64); + Builder.CreateLifetimeEnd(AllocaCASExpected); V = Builder.CreateInsertValue(V, ExpectedOut, 0); V = Builder.CreateInsertValue(V, Result, 1); I->replaceAllUsesWith(V); @@ -2089,7 +2088,7 @@ bool AtomicExpandImpl::expandAtomicOpToLibcall( else { V = Builder.CreateAlignedLoad(I->getType(), AllocaResult, AllocaAlignment); - Builder.CreateLifetimeEnd(AllocaResult, SizeVal64); + Builder.CreateLifetimeEnd(AllocaResult); } I->replaceAllUsesWith(V); } diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index bbfae57..d30dfa7 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -2209,7 +2209,7 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, unsigned Op = ID == Intrinsic::lifetime_start ? TargetOpcode::LIFETIME_START : TargetOpcode::LIFETIME_END; - const AllocaInst *AI = dyn_cast<AllocaInst>(CI.getArgOperand(1)); + const AllocaInst *AI = dyn_cast<AllocaInst>(CI.getArgOperand(0)); if (!AI || !AI->isStaticAlloca()) return true; diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 7341914..5f1e38a 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -12843,22 +12843,21 @@ SDValue DAGCombiner::visitMHISTOGRAM(SDNode *N) { SDLoc DL(HG); EVT MemVT = HG->getMemoryVT(); + EVT DataVT = Index.getValueType(); MachineMemOperand *MMO = HG->getMemOperand(); ISD::MemIndexType IndexType = HG->getIndexType(); if (ISD::isConstantSplatVectorAllZeros(Mask.getNode())) return Chain; - SDValue Ops[] = {Chain, Inc, Mask, BasePtr, Index, - HG->getScale(), HG->getIntID()}; - if (refineUniformBase(BasePtr, Index, HG->isIndexScaled(), DAG, DL)) + if (refineUniformBase(BasePtr, Index, HG->isIndexScaled(), DAG, DL) || + refineIndexType(Index, IndexType, DataVT, DAG)) { + SDValue Ops[] = {Chain, Inc, Mask, BasePtr, Index, + HG->getScale(), HG->getIntID()}; return DAG.getMaskedHistogram(DAG.getVTList(MVT::Other), MemVT, DL, Ops, MMO, IndexType); + } - EVT DataVT = Index.getValueType(); - if (refineIndexType(Index, IndexType, DataVT, DAG)) - return DAG.getMaskedHistogram(DAG.getVTList(MVT::Other), MemVT, DL, Ops, - MMO, IndexType); return SDValue(); } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index b9e72c9..5ef1746 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -1371,7 +1371,7 @@ void SelectionDAG::init(MachineFunction &NewMF, const TargetLibraryInfo *LibraryInfo, UniformityInfo *NewUA, ProfileSummaryInfo *PSIin, BlockFrequencyInfo *BFIin, MachineModuleInfo &MMIin, - FunctionVarLocs const *VarLocs, bool HasDivergency) { + FunctionVarLocs const *VarLocs) { MF = &NewMF; SDAGISelPass = PassPtr; ORE = &NewORE; @@ -1384,7 +1384,6 @@ void SelectionDAG::init(MachineFunction &NewMF, BFI = BFIin; MMI = &MMIin; FnVarLocs = VarLocs; - DivergentTarget = HasDivergency; } SelectionDAG::~SelectionDAG() { @@ -2331,8 +2330,7 @@ SDValue SelectionDAG::getRegister(Register Reg, EVT VT) { return SDValue(E, 0); auto *N = newSDNode<RegisterSDNode>(Reg, VTs); - N->SDNodeBits.IsDivergent = - DivergentTarget && TLI->isSDNodeSourceOfDivergence(N, FLI, UA); + N->SDNodeBits.IsDivergent = TLI->isSDNodeSourceOfDivergence(N, FLI, UA); CSEMap.InsertNode(N, IP); InsertNode(N); return SDValue(N, 0); @@ -12264,8 +12262,6 @@ static bool gluePropagatesDivergence(const SDNode *Node) { } bool SelectionDAG::calculateDivergence(SDNode *N) { - if (!DivergentTarget) - return false; if (TLI->isSDNodeAlwaysUniform(N)) { assert(!TLI->isSDNodeSourceOfDivergence(N, FLI, UA) && "Conflicting divergence information!"); @@ -12285,8 +12281,6 @@ bool SelectionDAG::calculateDivergence(SDNode *N) { } void SelectionDAG::updateDivergence(SDNode *N) { - if (!DivergentTarget) - return; SmallVector<SDNode *, 16> Worklist(1, N); do { N = Worklist.pop_back_val(); @@ -13847,20 +13841,16 @@ void SelectionDAG::createOperands(SDNode *Node, ArrayRef<SDValue> Vals) { Ops[I].setInitial(Vals[I]); EVT VT = Ops[I].getValueType(); - // Take care of the Node's operands iff target has divergence // Skip Chain. It does not carry divergence. - if (DivergentTarget && VT != MVT::Other && + if (VT != MVT::Other && (VT != MVT::Glue || gluePropagatesDivergence(Ops[I].getNode())) && Ops[I].getNode()->isDivergent()) { - // Node is going to be divergent if at least one of its operand is - // divergent, unless it belongs to the "AlwaysUniform" exemptions. IsDivergent = true; } } Node->NumOperands = Vals.size(); Node->OperandList = Ops; - // Check the divergence of the Node itself. - if (DivergentTarget && !TLI->isSDNodeAlwaysUniform(Node)) { + if (!TLI->isSDNodeAlwaysUniform(Node)) { IsDivergent |= TLI->isSDNodeSourceOfDivergence(Node, FLI, UA); Node->SDNodeBits.IsDivergent = IsDivergent; } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index f5f5c14..0d1e954 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -7597,7 +7597,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, if (TM.getOptLevel() == CodeGenOptLevel::None) return; - const AllocaInst *LifetimeObject = dyn_cast<AllocaInst>(I.getArgOperand(1)); + const AllocaInst *LifetimeObject = dyn_cast<AllocaInst>(I.getArgOperand(0)); if (!LifetimeObject) return; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 26071ed..ece50ed 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -480,10 +480,7 @@ void SelectionDAGISel::initializeAnalysisResults( MachineModuleInfo &MMI = MAMP.getCachedResult<MachineModuleAnalysis>(*Fn.getParent())->getMMI(); - TTI = &FAM.getResult<TargetIRAnalysis>(Fn); - - CurDAG->init(*MF, *ORE, MFAM, LibInfo, UA, PSI, BFI, MMI, FnVarLocs, - TTI->hasBranchDivergence(&Fn)); + CurDAG->init(*MF, *ORE, MFAM, LibInfo, UA, PSI, BFI, MMI, FnVarLocs); // Now get the optional analyzes if we want to. // This is based on the possibly changed OptLevel (after optnone is taken @@ -501,6 +498,10 @@ void SelectionDAGISel::initializeAnalysisResults( BatchAA = std::nullopt; SP = &FAM.getResult<SSPLayoutAnalysis>(Fn); + +#if !defined(NDEBUG) && LLVM_ENABLE_ABI_BREAKING_CHECKS + TTI = &FAM.getResult<TargetIRAnalysis>(Fn); +#endif } void SelectionDAGISel::initializeAnalysisResults(MachineFunctionPass &MFP) { @@ -536,10 +537,7 @@ void SelectionDAGISel::initializeAnalysisResults(MachineFunctionPass &MFP) { MachineModuleInfo &MMI = MFP.getAnalysis<MachineModuleInfoWrapperPass>().getMMI(); - TTI = &MFP.getAnalysis<TargetTransformInfoWrapperPass>().getTTI(Fn); - - CurDAG->init(*MF, *ORE, &MFP, LibInfo, UA, PSI, BFI, MMI, FnVarLocs, - TTI->hasBranchDivergence(&Fn)); + CurDAG->init(*MF, *ORE, &MFP, LibInfo, UA, PSI, BFI, MMI, FnVarLocs); // Now get the optional analyzes if we want to. // This is based on the possibly changed OptLevel (after optnone is taken @@ -558,6 +556,10 @@ void SelectionDAGISel::initializeAnalysisResults(MachineFunctionPass &MFP) { BatchAA = std::nullopt; SP = &MFP.getAnalysis<StackProtector>().getLayoutInfo(); + +#if !defined(NDEBUG) && LLVM_ENABLE_ABI_BREAKING_CHECKS + TTI = &MFP.getAnalysis<TargetTransformInfoWrapperPass>().getTTI(Fn); +#endif } bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index 7159107..35f00ae 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -1311,14 +1311,15 @@ static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn, } break; case 'l': - if (Name.starts_with("lifetime.start") || - Name.starts_with("lifetime.end")) { - // Unless remangling is required, do not upgrade the function declaration, - // but do upgrade the calls. - if (auto Result = llvm::Intrinsic::remangleIntrinsicFunction(F)) - NewFn = *Result; - else - NewFn = F; + if ((Name.starts_with("lifetime.start") || + Name.starts_with("lifetime.end")) && + F->arg_size() == 2) { + rename(F); + NewFn = Intrinsic::getOrInsertDeclaration( + F->getParent(), + Name.starts_with("lifetime.start") ? Intrinsic::lifetime_start + : Intrinsic::lifetime_end, + F->getArg(0)->getType()); return true; } break; @@ -5133,21 +5134,20 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) { case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: { - Value *Size = CI->getArgOperand(0); - Value *Ptr = CI->getArgOperand(1); - if (isa<AllocaInst>(Ptr)) { + if (CI->arg_size() != 2) { DefaultCase(); return; } + Value *Ptr = CI->getArgOperand(1); // Try to strip pointer casts, such that the lifetime works on an alloca. Ptr = Ptr->stripPointerCasts(); if (isa<AllocaInst>(Ptr)) { // Don't use NewFn, as we might have looked through an addrspacecast. if (NewFn->getIntrinsicID() == Intrinsic::lifetime_start) - NewCall = Builder.CreateLifetimeStart(Ptr, cast<ConstantInt>(Size)); + NewCall = Builder.CreateLifetimeStart(Ptr); else - NewCall = Builder.CreateLifetimeEnd(Ptr, cast<ConstantInt>(Size)); + NewCall = Builder.CreateLifetimeEnd(Ptr); break; } diff --git a/llvm/lib/IR/IRBuilder.cpp b/llvm/lib/IR/IRBuilder.cpp index 49c6dc7..614c3a9 100644 --- a/llvm/lib/IR/IRBuilder.cpp +++ b/llvm/lib/IR/IRBuilder.cpp @@ -411,28 +411,16 @@ CallInst *IRBuilderBase::CreateFPMinimumReduce(Value *Src) { return getReductionIntrinsic(Intrinsic::vector_reduce_fminimum, Src); } -CallInst *IRBuilderBase::CreateLifetimeStart(Value *Ptr, ConstantInt *Size) { +CallInst *IRBuilderBase::CreateLifetimeStart(Value *Ptr) { assert(isa<PointerType>(Ptr->getType()) && "lifetime.start only applies to pointers."); - if (!Size) - Size = getInt64(-1); - else - assert(Size->getType() == getInt64Ty() && - "lifetime.start requires the size to be an i64"); - Value *Ops[] = { Size, Ptr }; - return CreateIntrinsic(Intrinsic::lifetime_start, {Ptr->getType()}, Ops); + return CreateIntrinsic(Intrinsic::lifetime_start, {Ptr->getType()}, {Ptr}); } -CallInst *IRBuilderBase::CreateLifetimeEnd(Value *Ptr, ConstantInt *Size) { +CallInst *IRBuilderBase::CreateLifetimeEnd(Value *Ptr) { assert(isa<PointerType>(Ptr->getType()) && "lifetime.end only applies to pointers."); - if (!Size) - Size = getInt64(-1); - else - assert(Size->getType() == getInt64Ty() && - "lifetime.end requires the size to be an i64"); - Value *Ops[] = { Size, Ptr }; - return CreateIntrinsic(Intrinsic::lifetime_end, {Ptr->getType()}, Ops); + return CreateIntrinsic(Intrinsic::lifetime_end, {Ptr->getType()}, {Ptr}); } CallInst *IRBuilderBase::CreateInvariantStart(Value *Ptr, ConstantInt *Size) { diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index ca3f148..f5dcb5e 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -6770,7 +6770,7 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) { } case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: { - Value *Ptr = Call.getArgOperand(1); + Value *Ptr = Call.getArgOperand(0); Check(isa<AllocaInst>(Ptr) || isa<PoisonValue>(Ptr), "llvm.lifetime.start/end can only be used on alloca or poison", &Call); diff --git a/llvm/lib/MC/MCObjectStreamer.cpp b/llvm/lib/MC/MCObjectStreamer.cpp index 8c27958..d0c6144 100644 --- a/llvm/lib/MC/MCObjectStreamer.cpp +++ b/llvm/lib/MC/MCObjectStreamer.cpp @@ -443,7 +443,7 @@ void MCObjectStreamer::emitInstToData(const MCInst &Inst, // MCAssembler::relaxAlign. auto *Sec = F->getParent(); if (!Sec->isLinkerRelaxable()) - Sec->setLinkerRelaxable(); + Sec->setFirstLinkerRelaxable(F->getLayoutOrder()); // Do not add data after a linker-relaxable instruction. The difference // between a new label and a label at or before the linker-relaxable // instruction cannot be resolved at assemble-time. diff --git a/llvm/lib/MC/MCSection.cpp b/llvm/lib/MC/MCSection.cpp index 27ca131..9ed6fd1 100644 --- a/llvm/lib/MC/MCSection.cpp +++ b/llvm/lib/MC/MCSection.cpp @@ -20,7 +20,7 @@ using namespace llvm; MCSection::MCSection(StringRef Name, bool IsText, bool IsBss, MCSymbol *Begin) : Begin(Begin), HasInstructions(false), IsRegistered(false), IsText(IsText), - IsBss(IsBss), LinkerRelaxable(false), Name(Name) { + IsBss(IsBss), Name(Name) { DummyFragment.setParent(this); } diff --git a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp index 201bfe0..d6a3d59 100644 --- a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp +++ b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp @@ -1236,14 +1236,20 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB, .add(MI.getOperand(3)); transferImpOps(MI, I, I); } else { + unsigned RegState = + getRenamableRegState(MI.getOperand(1).isRenamable()) | + getKillRegState( + MI.getOperand(1).isKill() && + MI.getOperand(1).getReg() != MI.getOperand(2).getReg() && + MI.getOperand(1).getReg() != MI.getOperand(3).getReg()); BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::ORRv8i8 : AArch64::ORRv16i8)) .addReg(DstReg, RegState::Define | getRenamableRegState(MI.getOperand(0).isRenamable())) - .add(MI.getOperand(1)) - .add(MI.getOperand(1)); + .addReg(MI.getOperand(1).getReg(), RegState) + .addReg(MI.getOperand(1).getReg(), RegState); auto I2 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BSLv8i8 diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td index d068a12..b033f88 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -7362,7 +7362,9 @@ multiclass SIMDDifferentThreeVectorBD<bit U, bits<4> opc, string asm, [(set (v8i16 V128:$Rd), (OpNode (v8i8 V64:$Rn), (v8i8 V64:$Rm)))]>; def v16i8 : BaseSIMDDifferentThreeVector<U, 0b001, opc, V128, V128, V128, - asm#"2", ".8h", ".16b", ".16b", []>; + asm#"2", ".8h", ".16b", ".16b", + [(set (v8i16 V128:$Rd), (OpNode (v8i8 (extract_high_v16i8 (v16i8 V128:$Rn))), + (v8i8 (extract_high_v16i8 (v16i8 V128:$Rm)))))]>; let Predicates = [HasAES] in { def v1i64 : BaseSIMDDifferentThreeVector<U, 0b110, opc, V128, V64, V64, @@ -7374,10 +7376,6 @@ multiclass SIMDDifferentThreeVectorBD<bit U, bits<4> opc, string asm, [(set (v16i8 V128:$Rd), (OpNode (extract_high_v2i64 (v2i64 V128:$Rn)), (extract_high_v2i64 (v2i64 V128:$Rm))))]>; } - - def : Pat<(v8i16 (OpNode (v8i8 (extract_high_v16i8 (v16i8 V128:$Rn))), - (v8i8 (extract_high_v16i8 (v16i8 V128:$Rm))))), - (!cast<Instruction>(NAME#"v16i8") V128:$Rn, V128:$Rm)>; } multiclass SIMDLongThreeVectorHS<bit U, bits<4> opc, string asm, @@ -7402,6 +7400,7 @@ multiclass SIMDLongThreeVectorHS<bit U, bits<4> opc, string asm, (extract_high_v4i32 (v4i32 V128:$Rm))))]>; } +let isCommutable = 1 in multiclass SIMDLongThreeVectorBHSabdl<bit U, bits<4> opc, string asm, SDPatternOperator OpNode = null_frag> { def v8i8_v8i16 : BaseSIMDDifferentThreeVector<U, 0b000, opc, @@ -7483,6 +7482,7 @@ multiclass SIMDLongThreeVectorTiedBHSabal<bit U, bits<4> opc, (extract_high_v4i32 (v4i32 V128:$Rm)))))))]>; } +let isCommutable = 1 in multiclass SIMDLongThreeVectorBHS<bit U, bits<4> opc, string asm, SDPatternOperator OpNode = null_frag> { def v8i8_v8i16 : BaseSIMDDifferentThreeVector<U, 0b000, opc, diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index ac31236..8cfbff9 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -6055,6 +6055,7 @@ defm MLA : SIMDThreeSameVectorBHSTied<0, 0b10010, "mla", null_frag>; defm MLS : SIMDThreeSameVectorBHSTied<1, 0b10010, "mls", null_frag>; defm MUL : SIMDThreeSameVectorBHS<0, 0b10011, "mul", mul>; +let isCommutable = 1 in defm PMUL : SIMDThreeSameVectorB<1, 0b10011, "pmul", int_aarch64_neon_pmul>; defm SABA : SIMDThreeSameVectorBHSTied<0, 0b01111, "saba", TriOpFrag<(add node:$LHS, (abds node:$MHS, node:$RHS))> >; @@ -6806,6 +6807,7 @@ defm ADDHN : SIMDNarrowThreeVectorBHS<0,0b0100,"addhn", int_aarch64_neon_addhn> defm SUBHN : SIMDNarrowThreeVectorBHS<0,0b0110,"subhn", int_aarch64_neon_subhn>; defm RADDHN : SIMDNarrowThreeVectorBHS<1,0b0100,"raddhn",int_aarch64_neon_raddhn>; defm RSUBHN : SIMDNarrowThreeVectorBHS<1,0b0110,"rsubhn",int_aarch64_neon_rsubhn>; +let isCommutable = 1 in defm PMULL : SIMDDifferentThreeVectorBD<0,0b1110,"pmull", AArch64pmull>; defm SABAL : SIMDLongThreeVectorTiedBHSabal<0,0b0101,"sabal", abds>; defm SABDL : SIMDLongThreeVectorBHSabdl<0, 0b0111, "sabdl", abds>; @@ -6822,6 +6824,7 @@ defm SQDMLAL : SIMDLongThreeVectorSQDMLXTiedHS<0, 0b1001, "sqdmlal", saddsat>; defm SQDMLSL : SIMDLongThreeVectorSQDMLXTiedHS<0, 0b1011, "sqdmlsl", ssubsat>; defm SQDMULL : SIMDLongThreeVectorHS<0, 0b1101, "sqdmull", int_aarch64_neon_sqdmull>; +let isCommutable = 0 in defm SSUBL : SIMDLongThreeVectorBHS<0, 0b0010, "ssubl", BinOpFrag<(sub (sext node:$LHS), (sext node:$RHS))>>; defm SSUBW : SIMDWideThreeVectorBHS<0, 0b0011, "ssubw", @@ -6836,6 +6839,7 @@ defm UMLAL : SIMDLongThreeVectorTiedBHS<1, 0b1000, "umlal", defm UMLSL : SIMDLongThreeVectorTiedBHS<1, 0b1010, "umlsl", TriOpFrag<(sub node:$LHS, (AArch64umull node:$MHS, node:$RHS))>>; defm UMULL : SIMDLongThreeVectorBHS<1, 0b1100, "umull", AArch64umull>; +let isCommutable = 0 in defm USUBL : SIMDLongThreeVectorBHS<1, 0b0010, "usubl", BinOpFrag<(sub (zanyext node:$LHS), (zanyext node:$RHS))>>; defm USUBW : SIMDWideThreeVectorBHS< 1, 0b0011, "usubw", diff --git a/llvm/lib/Target/AArch64/AArch64StackTagging.cpp b/llvm/lib/Target/AArch64/AArch64StackTagging.cpp index f136a184..a67bd42 100644 --- a/llvm/lib/Target/AArch64/AArch64StackTagging.cpp +++ b/llvm/lib/Target/AArch64/AArch64StackTagging.cpp @@ -585,8 +585,7 @@ bool AArch64StackTagging::runOnFunction(Function &Fn) { ClMaxLifetimes); if (StandardLifetime) { IntrinsicInst *Start = Info.LifetimeStart[0]; - uint64_t Size = - cast<ConstantInt>(Start->getArgOperand(0))->getZExtValue(); + uint64_t Size = *Info.AI->getAllocationSize(*DL); Size = alignTo(Size, kTagGranuleSize); tagAlloca(AI, Start->getNextNode(), TagPCall, Size); diff --git a/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp b/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp index f580f43..c21a9a1 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp @@ -109,12 +109,17 @@ bool AMDGPURewriteAGPRCopyMFMAImpl::run(MachineFunction &MF) const { // Find AV_* registers assigned to AGPRs. const TargetRegisterClass *VirtRegRC = MRI.getRegClass(VReg); - if (!TRI.isVectorSuperClass(VirtRegRC)) + if (!TRI.hasAGPRs(VirtRegRC)) continue; - const TargetRegisterClass *AssignedRC = TRI.getPhysRegBaseClass(PhysReg); - if (!TRI.isAGPRClass(AssignedRC)) - continue; + const TargetRegisterClass *AssignedRC = VirtRegRC; + if (TRI.hasVGPRs(VirtRegRC)) { + // If this is an AV register, we have to check if the actual assignment is + // to an AGPR + AssignedRC = TRI.getPhysRegBaseClass(PhysReg); + if (!TRI.isAGPRClass(AssignedRC)) + continue; + } LiveInterval &LI = LIS.getInterval(VReg); diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index ea99cc4..75d3cfa 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -802,6 +802,12 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_, setOperationAction(ISD::BSWAP, VT, Expand); } + if (!Subtarget->isThumb1Only() && !Subtarget->hasV8_1MMainlineOps()) + setOperationAction(ISD::SCMP, MVT::i32, Custom); + + if (!Subtarget->hasV8_1MMainlineOps()) + setOperationAction(ISD::UCMP, MVT::i32, Custom); + setOperationAction(ISD::ConstantFP, MVT::f32, Custom); setOperationAction(ISD::ConstantFP, MVT::f64, Custom); @@ -1634,6 +1640,10 @@ bool ARMTargetLowering::useSoftFloat() const { return Subtarget->useSoftFloat(); } +bool ARMTargetLowering::shouldExpandCmpUsingSelects(EVT VT) const { + return !Subtarget->isThumb1Only() && VT.getSizeInBits() <= 32; +} + // FIXME: It might make sense to define the representative register class as the // nearest super-register that has a non-null superset. For example, DPR_VFP2 is // a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently, @@ -10612,6 +10622,133 @@ SDValue ARMTargetLowering::LowerFP_TO_BF16(SDValue Op, return DAG.getBitcast(MVT::i32, Res); } +SDValue ARMTargetLowering::LowerCMP(SDValue Op, SelectionDAG &DAG) const { + SDLoc dl(Op); + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + + // Determine if this is signed or unsigned comparison + bool IsSigned = (Op.getOpcode() == ISD::SCMP); + + // Special case for Thumb1 UCMP only + if (!IsSigned && Subtarget->isThumb1Only()) { + // For Thumb unsigned comparison, use this sequence: + // subs r2, r0, r1 ; r2 = LHS - RHS, sets flags + // sbc r2, r2 ; r2 = r2 - r2 - !carry + // cmp r1, r0 ; compare RHS with LHS + // sbc r1, r1 ; r1 = r1 - r1 - !carry + // subs r0, r2, r1 ; r0 = r2 - r1 (final result) + + // First subtraction: LHS - RHS + SDValue Sub1WithFlags = DAG.getNode( + ARMISD::SUBC, dl, DAG.getVTList(MVT::i32, FlagsVT), LHS, RHS); + SDValue Sub1Result = Sub1WithFlags.getValue(0); + SDValue Flags1 = Sub1WithFlags.getValue(1); + + // SUBE: Sub1Result - Sub1Result - !carry + // This gives 0 if LHS >= RHS (unsigned), -1 if LHS < RHS (unsigned) + SDValue Sbc1 = + DAG.getNode(ARMISD::SUBE, dl, DAG.getVTList(MVT::i32, FlagsVT), + Sub1Result, Sub1Result, Flags1); + SDValue Sbc1Result = Sbc1.getValue(0); + + // Second comparison: RHS vs LHS (reverse comparison) + SDValue CmpFlags = DAG.getNode(ARMISD::CMP, dl, FlagsVT, RHS, LHS); + + // SUBE: RHS - RHS - !carry + // This gives 0 if RHS <= LHS (unsigned), -1 if RHS > LHS (unsigned) + SDValue Sbc2 = DAG.getNode( + ARMISD::SUBE, dl, DAG.getVTList(MVT::i32, FlagsVT), RHS, RHS, CmpFlags); + SDValue Sbc2Result = Sbc2.getValue(0); + + // Final subtraction: Sbc1Result - Sbc2Result (no flags needed) + SDValue Result = + DAG.getNode(ISD::SUB, dl, MVT::i32, Sbc1Result, Sbc2Result); + if (Op.getValueType() != MVT::i32) + Result = DAG.getSExtOrTrunc(Result, dl, Op.getValueType()); + + return Result; + } + + // For the ARM assembly pattern: + // subs r0, r0, r1 ; subtract RHS from LHS and set flags + // movgt r0, #1 ; if LHS > RHS, set result to 1 (GT for signed, HI for + // unsigned) mvnlt r0, #0 ; if LHS < RHS, set result to -1 (LT for + // signed, LO for unsigned) + // ; if LHS == RHS, result remains 0 from the subs + + // Optimization: if RHS is a subtraction against 0, use ADDC instead of SUBC + unsigned Opcode = ARMISD::SUBC; + + // Check if RHS is a subtraction against 0: (0 - X) + if (RHS.getOpcode() == ISD::SUB) { + SDValue SubLHS = RHS.getOperand(0); + SDValue SubRHS = RHS.getOperand(1); + + // Check if it's 0 - X + if (isNullConstant(SubLHS)) { + bool CanUseAdd = false; + if (IsSigned) { + // For SCMP: only if X is known to never be INT_MIN (to avoid overflow) + if (RHS->getFlags().hasNoSignedWrap() || !DAG.computeKnownBits(SubRHS) + .getSignedMinValue() + .isMinSignedValue()) { + CanUseAdd = true; + } + } else { + // For UCMP: only if X is known to never be zero + if (DAG.isKnownNeverZero(SubRHS)) { + CanUseAdd = true; + } + } + + if (CanUseAdd) { + Opcode = ARMISD::ADDC; + RHS = SubRHS; // Replace RHS with X, so we do LHS + X instead of + // LHS - (0 - X) + } + } + } + + // Generate the operation with flags + SDValue OpWithFlags; + if (Opcode == ARMISD::ADDC) { + // Use ADDC: LHS + RHS (where RHS was 0 - X, now X) + OpWithFlags = DAG.getNode(ARMISD::ADDC, dl, + DAG.getVTList(MVT::i32, FlagsVT), LHS, RHS); + } else { + // Use ARMISD::SUBC to generate SUBS instruction (subtract with flags) + OpWithFlags = DAG.getNode(ARMISD::SUBC, dl, + DAG.getVTList(MVT::i32, FlagsVT), LHS, RHS); + } + + SDValue OpResult = OpWithFlags.getValue(0); // The operation result + SDValue Flags = OpWithFlags.getValue(1); // The flags + + // Constants for conditional moves + SDValue One = DAG.getConstant(1, dl, MVT::i32); + SDValue MinusOne = DAG.getAllOnesConstant(dl, MVT::i32); + + // Select condition codes based on signed vs unsigned + ARMCC::CondCodes GTCond = IsSigned ? ARMCC::GT : ARMCC::HI; + ARMCC::CondCodes LTCond = IsSigned ? ARMCC::LT : ARMCC::LO; + + // First conditional move: if greater than, set to 1 + SDValue GTCondValue = DAG.getConstant(GTCond, dl, MVT::i32); + SDValue Result1 = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, OpResult, One, + GTCondValue, Flags); + + // Second conditional move: if less than, set to -1 + SDValue LTCondValue = DAG.getConstant(LTCond, dl, MVT::i32); + SDValue Result2 = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, Result1, MinusOne, + LTCondValue, Flags); + + if (Op.getValueType() != MVT::i32) + Result2 = DAG.getSExtOrTrunc(Result2, dl, Op.getValueType()); + + return Result2; +} + SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { LLVM_DEBUG(dbgs() << "Lowering node: "; Op.dump()); switch (Op.getOpcode()) { @@ -10740,6 +10877,9 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::FP_TO_BF16: return LowerFP_TO_BF16(Op, DAG); case ARMISD::WIN__DBZCHK: return SDValue(); + case ISD::UCMP: + case ISD::SCMP: + return LowerCMP(Op, DAG); } } diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h index 825145d..a84a3cb 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/llvm/lib/Target/ARM/ARMISelLowering.h @@ -607,6 +607,8 @@ class VectorType; bool preferZeroCompareBranch() const override { return true; } + bool shouldExpandCmpUsingSelects(EVT VT) const override; + bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override; bool hasAndNotCompare(SDValue V) const override { @@ -904,6 +906,7 @@ class VectorType; void LowerLOAD(SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const; SDValue LowerFP_TO_BF16(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerCMP(SDValue Op, SelectionDAG &DAG) const; Register getRegisterByName(const char* RegName, LLT VT, const MachineFunction &MF) const override; diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp index fda9d97..ca5d27d 100644 --- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp @@ -254,7 +254,8 @@ bool LoongArchAsmBackend::relaxAlign(MCFragment &F, unsigned &Size) { MCFixup::create(0, Expr, FirstLiteralRelocationKind + ELF::R_LARCH_ALIGN); F.setVarFixups({Fixup}); F.setLinkerRelaxable(); - F.getParent()->setLinkerRelaxable(); + if (!F.getParent()->isLinkerRelaxable()) + F.getParent()->setFirstLinkerRelaxable(F.getLayoutOrder()); return true; } diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp index 76dca47..f123040 100644 --- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -1102,13 +1102,20 @@ void PPCRegisterInfo::lowerCRBitSpilling(MachineBasicBlock::iterator II, SpillsKnownBit = true; break; default: + // When spilling a CR bit, the super register may not be explicitly defined + // (i.e. it can be defined by a CR-logical that only defines the subreg) so + // we state that the CR field is undef. Also, in order to preserve the kill + // flag on the CR bit, we add it as an implicit use. + // On Power10, we can use SETNBC to spill all CR bits. SETNBC will set all // bits (specifically, it produces a -1 if the CR bit is set). Ultimately, // the bit that is of importance to us is bit 32 (bit 0 of a 32-bit // register), and SETNBC will set this. if (Subtarget.isISA3_1()) { BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::SETNBC8 : PPC::SETNBC), Reg) - .addReg(SrcReg, RegState::Undef); + .addReg(SrcReg, RegState::Undef) + .addReg(SrcReg, RegState::Implicit | + getKillRegState(MI.getOperand(0).isKill())); break; } @@ -1122,16 +1129,14 @@ void PPCRegisterInfo::lowerCRBitSpilling(MachineBasicBlock::iterator II, SrcReg == PPC::CR4LT || SrcReg == PPC::CR5LT || SrcReg == PPC::CR6LT || SrcReg == PPC::CR7LT) { BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::SETB8 : PPC::SETB), Reg) - .addReg(getCRFromCRBit(SrcReg), RegState::Undef); + .addReg(getCRFromCRBit(SrcReg), RegState::Undef) + .addReg(SrcReg, RegState::Implicit | + getKillRegState(MI.getOperand(0).isKill())); break; } } // We need to move the CR field that contains the CR bit we are spilling. - // The super register may not be explicitly defined (i.e. it can be defined - // by a CR-logical that only defines the subreg) so we state that the CR - // field is undef. Also, in order to preserve the kill flag on the CR bit, - // we add it as an implicit use. BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MFOCRF8 : PPC::MFOCRF), Reg) .addReg(getCRFromCRBit(SrcReg), RegState::Undef) .addReg(SrcReg, diff --git a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp index 67cc01e..e0ac591 100644 --- a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp +++ b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp @@ -674,6 +674,9 @@ static constexpr FeatureBitset XAndesGroup = { static constexpr DecoderListEntry DecoderList32[]{ // Vendor Extensions + {DecoderTableXCV32, XCVFeatureGroup, "CORE-V extensions"}, + {DecoderTableXRivos32, XRivosFeatureGroup, "Rivos"}, + {DecoderTableXqci32, XqciFeatureGroup, "Qualcomm uC Extensions"}, {DecoderTableXVentana32, {RISCV::FeatureVendorXVentanaCondOps}, "XVentanaCondOps"}, @@ -690,9 +693,6 @@ static constexpr DecoderListEntry DecoderList32[]{ "MIPS mips.pref"}, {DecoderTableXAndes32, XAndesGroup, "Andes extensions"}, // Standard Extensions - {DecoderTableXCV32, XCVFeatureGroup, "CORE-V extensions"}, - {DecoderTableXqci32, XqciFeatureGroup, "Qualcomm uC Extensions"}, - {DecoderTableXRivos32, XRivosFeatureGroup, "Rivos"}, {DecoderTable32, {}, "standard 32-bit instructions"}, {DecoderTableRV32Only32, {}, "RV32-only standard 32-bit instructions"}, {DecoderTableZfinx32, {}, "Zfinx (Float in Integer)"}, diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp index a997ea5..8d956ce 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp @@ -32,6 +32,11 @@ static cl::opt<bool> ULEB128Reloc( "riscv-uleb128-reloc", cl::init(true), cl::Hidden, cl::desc("Emit R_RISCV_SET_ULEB128/E_RISCV_SUB_ULEB128 if appropriate")); +static cl::opt<bool> + AlignRvc("riscv-align-rvc", cl::init(true), cl::Hidden, + cl::desc("When generating R_RISCV_ALIGN, insert $alignment-2 " + "bytes of NOPs even in norvc code")); + RISCVAsmBackend::RISCVAsmBackend(const MCSubtargetInfo &STI, uint8_t OSABI, bool Is64Bit, const MCTargetOptions &Options) : MCAsmBackend(llvm::endianness::little), STI(STI), OSABI(OSABI), @@ -306,12 +311,21 @@ void RISCVAsmBackend::relaxInstruction(MCInst &Inst, // If conditions are met, compute the padding size and create a fixup encoding // the padding size in the addend. bool RISCVAsmBackend::relaxAlign(MCFragment &F, unsigned &Size) { - // Use default handling unless linker relaxation is enabled and the alignment - // is larger than the nop size. - const MCSubtargetInfo *STI = F.getSubtargetInfo(); - if (!STI->hasFeature(RISCV::FeatureRelax)) + // Alignments before the first linker-relaxable instruction have fixed sizes + // and do not require relocations. Alignments after a linker-relaxable + // instruction require a relocation, even if the STI specifies norelax. + // + // firstLinkerRelaxable is the layout order within the subsection, which may + // be smaller than the section's order. Therefore, alignments in a + // lower-numbered subsection may be unnecessarily treated as linker-relaxable. + auto *Sec = F.getParent(); + if (F.getLayoutOrder() <= Sec->firstLinkerRelaxable()) return false; - unsigned MinNopLen = STI->hasFeature(RISCV::FeatureStdExtZca) ? 2 : 4; + + // Use default handling unless the alignment is larger than the nop size. + const MCSubtargetInfo *STI = F.getSubtargetInfo(); + unsigned MinNopLen = + AlignRvc || STI->hasFeature(RISCV::FeatureStdExtZca) ? 2 : 4; if (F.getAlignment() <= MinNopLen) return false; @@ -321,7 +335,6 @@ bool RISCVAsmBackend::relaxAlign(MCFragment &F, unsigned &Size) { MCFixup::create(0, Expr, FirstLiteralRelocationKind + ELF::R_RISCV_ALIGN); F.setVarFixups({Fixup}); F.setLinkerRelaxable(); - F.getParent()->setLinkerRelaxable(); return true; } @@ -474,8 +487,9 @@ bool RISCVAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count, // TODO: emit a mapping symbol right here if (Count % 4 == 2) { - // The canonical nop with Zca is c.nop. - OS.write(STI->hasFeature(RISCV::FeatureStdExtZca) ? "\x01\0" : "\0\0", 2); + // The canonical nop with Zca is c.nop. For .balign 4, we generate a 2-byte + // c.nop even in a norvc region. + OS.write("\x01\0", 2); Count -= 2; } diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h index 05d504c..6a1f4b3 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h @@ -114,6 +114,9 @@ public: bool enableScalableVectorization() const override { return ST->hasVInstructions(); } + bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) const override { + return ST->hasVInstructions(); + } TailFoldingStyle getPreferredTailFoldingStyle(bool IVUpdateMayOverflow) const override { return ST->hasVInstructions() ? TailFoldingStyle::DataWithEVL diff --git a/llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp b/llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp index 74aec4f..2b34f61 100644 --- a/llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp @@ -359,18 +359,15 @@ static void lowerExpectAssume(IntrinsicInst *II) { } } -static bool toSpvOverloadedIntrinsic(IntrinsicInst *II, Intrinsic::ID NewID, - ArrayRef<unsigned> OpNos) { - Function *F = nullptr; - if (OpNos.empty()) { - F = Intrinsic::getOrInsertDeclaration(II->getModule(), NewID); - } else { - SmallVector<Type *, 4> Tys; - for (unsigned OpNo : OpNos) - Tys.push_back(II->getOperand(OpNo)->getType()); - F = Intrinsic::getOrInsertDeclaration(II->getModule(), NewID, Tys); - } - II->setCalledFunction(F); +static bool toSpvLifetimeIntrinsic(IntrinsicInst *II, Intrinsic::ID NewID) { + IRBuilder<> Builder(II); + auto *Alloca = cast<AllocaInst>(II->getArgOperand(0)); + std::optional<TypeSize> Size = + Alloca->getAllocationSize(Alloca->getDataLayout()); + Value *SizeVal = Builder.getInt64(Size ? *Size : -1); + Builder.CreateIntrinsic(NewID, Alloca->getType(), + {SizeVal, II->getArgOperand(0)}); + II->eraseFromParent(); return true; } @@ -406,8 +403,8 @@ bool SPIRVPrepareFunctions::substituteIntrinsicCalls(Function *F) { break; case Intrinsic::lifetime_start: if (!STI.isShader()) { - Changed |= toSpvOverloadedIntrinsic( - II, Intrinsic::SPVIntrinsics::spv_lifetime_start, {1}); + Changed |= toSpvLifetimeIntrinsic( + II, Intrinsic::SPVIntrinsics::spv_lifetime_start); } else { II->eraseFromParent(); Changed = true; @@ -415,8 +412,8 @@ bool SPIRVPrepareFunctions::substituteIntrinsicCalls(Function *F) { break; case Intrinsic::lifetime_end: if (!STI.isShader()) { - Changed |= toSpvOverloadedIntrinsic( - II, Intrinsic::SPVIntrinsics::spv_lifetime_end, {1}); + Changed |= toSpvLifetimeIntrinsic( + II, Intrinsic::SPVIntrinsics::spv_lifetime_end); } else { II->eraseFromParent(); Changed = true; diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp index f32c9bd..2611c29 100644 --- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp +++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp @@ -436,20 +436,6 @@ bool SystemZTTIImpl::isLSRCostLess( C2.ScaleCost, C2.SetupCost); } -bool SystemZTTIImpl::areInlineCompatible(const Function *Caller, - const Function *Callee) const { - const TargetMachine &TM = getTLI()->getTargetMachine(); - - const FeatureBitset &CallerBits = - TM.getSubtargetImpl(*Caller)->getFeatureBits(); - const FeatureBitset &CalleeBits = - TM.getSubtargetImpl(*Callee)->getFeatureBits(); - - // Support only equal feature bitsets. Restriction should be relaxed in the - // future to allow inlining when callee's bits are subset of the caller's. - return CallerBits == CalleeBits; -} - unsigned SystemZTTIImpl::getNumberOfRegisters(unsigned ClassID) const { bool Vector = (ClassID == 1); if (!Vector) diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h index dc5736e..fc681de 100644 --- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h +++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h @@ -65,9 +65,6 @@ public: bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2) const override; - bool areInlineCompatible(const Function *Caller, - const Function *Callee) const override; - /// @} /// \name Vector TTI Implementations diff --git a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp index 3320508..b775c43 100644 --- a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp @@ -1821,7 +1821,7 @@ static void sinkLifetimeStartMarkers(Function &F, coro::Shape &Shape, // only used outside the region. if (Valid && Lifetimes.size() != 0) { auto *NewLifetime = Lifetimes[0]->clone(); - NewLifetime->replaceUsesOfWith(NewLifetime->getOperand(1), AI); + NewLifetime->replaceUsesOfWith(NewLifetime->getOperand(0), AI); NewLifetime->insertBefore(DomBB->getTerminator()->getIterator()); // All the outsided lifetime.start markers are no longer necessary. diff --git a/llvm/lib/Transforms/IPO/ExpandVariadics.cpp b/llvm/lib/Transforms/IPO/ExpandVariadics.cpp index da60f52..6ed3b62 100644 --- a/llvm/lib/Transforms/IPO/ExpandVariadics.cpp +++ b/llvm/lib/Transforms/IPO/ExpandVariadics.cpp @@ -226,13 +226,6 @@ public: /*IsVarArgs=*/false); } - static ConstantInt *sizeOfAlloca(LLVMContext &Ctx, const DataLayout &DL, - AllocaInst *Alloced) { - std::optional<TypeSize> AllocaTypeSize = Alloced->getAllocationSize(DL); - uint64_t AsInt = AllocaTypeSize ? AllocaTypeSize->getFixedValue() : 0; - return ConstantInt::get(Type::getInt64Ty(Ctx), AsInt); - } - bool expansionApplicableToFunction(Module &M, Function *F) { if (F->isIntrinsic() || !F->isVarArg() || F->hasFnAttribute(Attribute::Naked)) @@ -577,8 +570,7 @@ ExpandVariadics::defineVariadicWrapper(Module &M, IRBuilder<> &Builder, AllocaInst *VaListInstance = Builder.CreateAlloca(VaListTy, nullptr, "va_start"); - Builder.CreateLifetimeStart(VaListInstance, - sizeOfAlloca(Ctx, DL, VaListInstance)); + Builder.CreateLifetimeStart(VaListInstance); Builder.CreateIntrinsic(Intrinsic::vastart, {DL.getAllocaPtrType(Ctx)}, {VaListInstance}); @@ -595,8 +587,7 @@ ExpandVariadics::defineVariadicWrapper(Module &M, IRBuilder<> &Builder, Builder.CreateIntrinsic(Intrinsic::vaend, {DL.getAllocaPtrType(Ctx)}, {VaListInstance}); - Builder.CreateLifetimeEnd(VaListInstance, - sizeOfAlloca(Ctx, DL, VaListInstance)); + Builder.CreateLifetimeEnd(VaListInstance); if (Result->getType()->isVoidTy()) Builder.CreateRetVoid(); @@ -746,7 +737,7 @@ bool ExpandVariadics::expandCall(Module &M, IRBuilder<> &Builder, CallBase *CB, // Initialize the fields in the struct Builder.SetInsertPoint(CB); - Builder.CreateLifetimeStart(Alloced, sizeOfAlloca(Ctx, DL, Alloced)); + Builder.CreateLifetimeStart(Alloced); Frame.initializeStructAlloca(DL, Builder, Alloced); const unsigned NumArgs = FuncType->getNumParams(); @@ -762,7 +753,7 @@ bool ExpandVariadics::expandCall(Module &M, IRBuilder<> &Builder, CallBase *CB, Builder.SetCurrentDebugLocation(CB->getStableDebugLoc()); VaList = Builder.CreateAlloca(VaListTy, nullptr, "va_argument"); Builder.SetInsertPoint(CB); - Builder.CreateLifetimeStart(VaList, sizeOfAlloca(Ctx, DL, VaList)); + Builder.CreateLifetimeStart(VaList); } Builder.SetInsertPoint(CB); Args.push_back(ABI->initializeVaList(M, Ctx, Builder, VaList, Alloced)); @@ -802,9 +793,9 @@ bool ExpandVariadics::expandCall(Module &M, IRBuilder<> &Builder, CallBase *CB, } if (VaList) - Builder.CreateLifetimeEnd(VaList, sizeOfAlloca(Ctx, DL, VaList)); + Builder.CreateLifetimeEnd(VaList); - Builder.CreateLifetimeEnd(Alloced, sizeOfAlloca(Ctx, DL, Alloced)); + Builder.CreateLifetimeEnd(Alloced); NewCB->setAttributes(PAL); NewCB->takeName(CB); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 47e017e..d7a2ef7 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1532,6 +1532,51 @@ static Instruction *foldBitOrderCrossLogicOp(Value *V, return nullptr; } +/// Helper to match idempotent binary intrinsics, namely, intrinsics where +/// `f(f(x, y), y) == f(x, y)` holds. +static bool isIdempotentBinaryIntrinsic(Intrinsic::ID IID) { + switch (IID) { + case Intrinsic::smax: + case Intrinsic::smin: + case Intrinsic::umax: + case Intrinsic::umin: + case Intrinsic::maximum: + case Intrinsic::minimum: + case Intrinsic::maximumnum: + case Intrinsic::minimumnum: + case Intrinsic::maxnum: + case Intrinsic::minnum: + return true; + default: + return false; + } +} + +/// Attempt to simplify value-accumulating recurrences of kind: +/// %umax.acc = phi i8 [ %umax, %backedge ], [ %a, %entry ] +/// %umax = call i8 @llvm.umax.i8(i8 %umax.acc, i8 %b) +/// And let the idempotent binary intrinsic be hoisted, when the operands are +/// known to be loop-invariant. +static Value *foldIdempotentBinaryIntrinsicRecurrence(InstCombinerImpl &IC, + IntrinsicInst *II) { + PHINode *PN; + Value *Init, *OtherOp; + + // A binary intrinsic recurrence with loop-invariant operands is equivalent to + // `call @llvm.binary.intrinsic(Init, OtherOp)`. + auto IID = II->getIntrinsicID(); + if (!isIdempotentBinaryIntrinsic(IID) || + !matchSimpleBinaryIntrinsicRecurrence(II, PN, Init, OtherOp) || + !IC.getDominatorTree().dominates(OtherOp, PN)) + return nullptr; + + auto *InvariantBinaryInst = + IC.Builder.CreateBinaryIntrinsic(IID, Init, OtherOp); + if (isa<FPMathOperator>(InvariantBinaryInst)) + cast<Instruction>(InvariantBinaryInst)->copyFastMathFlags(II); + return InvariantBinaryInst; +} + static Value *simplifyReductionOperand(Value *Arg, bool CanReorderLanes) { if (!CanReorderLanes) return nullptr; @@ -3912,6 +3957,9 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { if (Value *Reverse = foldReversedIntrinsicOperands(II)) return replaceInstUsesWith(*II, Reverse); + if (Value *Res = foldIdempotentBinaryIntrinsicRecurrence(*this, II)) + return replaceInstUsesWith(*II, Res); + // Some intrinsics (like experimental_gc_statepoint) can be used in invoke // context, so it is handled in visitCallBase and we should trigger it. return visitCallBase(*II); diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 8da65c5..50258af 100644 --- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -1211,23 +1211,19 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> { return; if (!II.isLifetimeStartOrEnd()) return; - // Found lifetime intrinsic, add ASan instrumentation if necessary. - auto *Size = cast<ConstantInt>(II.getArgOperand(0)); - // If size argument is undefined, don't do anything. - if (Size->isMinusOne()) return; - // Check that size doesn't saturate uint64_t and can - // be stored in IntptrTy. - const uint64_t SizeValue = Size->getValue().getLimitedValue(); - if (SizeValue == ~0ULL || - !ConstantInt::isValueValidForType(IntptrTy, SizeValue)) - return; // Find alloca instruction that corresponds to llvm.lifetime argument. - AllocaInst *AI = dyn_cast<AllocaInst>(II.getArgOperand(1)); + AllocaInst *AI = dyn_cast<AllocaInst>(II.getArgOperand(0)); // We're interested only in allocas we can handle. if (!AI || !ASan.isInterestingAlloca(*AI)) return; + + std::optional<TypeSize> Size = AI->getAllocationSize(AI->getDataLayout()); + // Check that size is known and can be stored in IntptrTy. + if (!Size || !ConstantInt::isValueValidForType(IntptrTy, *Size)) + return; + bool DoPoison = (ID == Intrinsic::lifetime_end); - AllocaPoisonCall APC = {&II, AI, SizeValue, DoPoison}; + AllocaPoisonCall APC = {&II, AI, *Size, DoPoison}; if (AI->isStaticAlloca()) StaticAllocaPoisonCallVec.push_back(APC); else if (ClInstrumentDynamicAllocas) diff --git a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp index bcb90d6..fc34d14 100644 --- a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp @@ -1469,22 +1469,6 @@ void HWAddressSanitizer::instrumentStack(memtag::StackInfo &SInfo, size_t Size = memtag::getAllocaSizeInBytes(*AI); size_t AlignedSize = alignTo(Size, Mapping.getObjectAlignment()); - auto HandleLifetime = [&](IntrinsicInst *II) { - // Set the lifetime intrinsic to cover the whole alloca. This reduces the - // set of assumptions we need to make about the lifetime. Without this we - // would need to ensure that we can track the lifetime pointer to a - // constant offset from the alloca, and would still need to change the - // size to include the extra alignment we use for the untagging to make - // the size consistent. - // - // The check for standard lifetime below makes sure that we have exactly - // one set of start / end in any execution (i.e. the ends are not - // reachable from each other), so this will not cause any problems. - II->setArgOperand(0, ConstantInt::get(Int64Ty, AlignedSize)); - }; - llvm::for_each(Info.LifetimeStart, HandleLifetime); - llvm::for_each(Info.LifetimeEnd, HandleLifetime); - AI->replaceUsesWithIf(Replacement, [AILong](const Use &U) { auto *User = U.getUser(); return User != AILong && !isa<LifetimeIntrinsic>(User); diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp index 7d3c940..6e81387 100644 --- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -3301,7 +3301,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { void handleLifetimeStart(IntrinsicInst &I) { if (!PoisonStack) return; - AllocaInst *AI = dyn_cast<AllocaInst>(I.getArgOperand(1)); + AllocaInst *AI = dyn_cast<AllocaInst>(I.getArgOperand(0)); if (AI) LifetimeStartList.push_back(std::make_pair(&I, AI)); } diff --git a/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp index 4edf25c..9471ae3 100644 --- a/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp @@ -818,12 +818,12 @@ bool TypeSanitizer::instrumentMemInst(Value *V, Instruction *ShadowBase, } } } else if (auto *II = dyn_cast<LifetimeIntrinsic>(I)) { - auto *AI = dyn_cast<AllocaInst>(II->getArgOperand(1)); + auto *AI = dyn_cast<AllocaInst>(II->getArgOperand(0)); if (!AI) return false; Size = GetAllocaSize(AI); - Dest = II->getArgOperand(1); + Dest = II->getArgOperand(0); } else if (auto *AI = dyn_cast<AllocaInst>(I)) { // We need to clear the types for new stack allocations (or else we might // read stale type information from a previous function execution). diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp index 9b87180..f46d54b 100644 --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -1363,7 +1363,7 @@ struct DSEState { if (auto *CB = dyn_cast<CallBase>(I)) { if (CB->getIntrinsicID() == Intrinsic::lifetime_end) return { - std::make_pair(MemoryLocation::getForArgument(CB, 1, &TLI), false)}; + std::make_pair(MemoryLocation::getForArgument(CB, 0, &TLI), false)}; if (Value *FreedOp = getFreedOperand(CB, &TLI)) return {std::make_pair(MemoryLocation::getAfter(FreedOp), true)}; } diff --git a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp index 85ee824..a097d33 100644 --- a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp +++ b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp @@ -434,7 +434,7 @@ bool InferAddressSpacesImpl::rewriteIntrinsicOperands(IntrinsicInst *II, NewV = NewV->stripPointerCasts(); Function *NewDecl = Intrinsic::getOrInsertDeclaration( M, II->getIntrinsicID(), {NewV->getType()}); - II->setArgOperand(1, NewV); + II->setArgOperand(0, NewV); II->setCalledFunction(NewDecl); return true; } @@ -491,7 +491,7 @@ void InferAddressSpacesImpl::collectRewritableIntrinsicOperands( } case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: { - appendsFlatAddressExpressionToPostorderStack(II->getArgOperand(1), + appendsFlatAddressExpressionToPostorderStack(II->getArgOperand(0), PostorderStack, Visited); break; } diff --git a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp index b3bffeb..fcdb8a9 100644 --- a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp +++ b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp @@ -2166,7 +2166,7 @@ public: // If the loads don't alias the lifetime.end, it won't interfere with // fusion. - MemoryLocation EndLoc = MemoryLocation::getForArgument(End, 1, nullptr); + MemoryLocation EndLoc = MemoryLocation::getForArgument(End, 0, nullptr); if (!EndLoc.Ptr) continue; if (AA->isNoAlias(Load0Loc, EndLoc) && AA->isNoAlias(Load1Loc, EndLoc)) diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 79721dc..f237322 100644 --- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -915,7 +915,7 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad, // move the bitcast as well, which we don't handle. if (SkippedLifetimeStart) { auto *LifetimeArg = - dyn_cast<Instruction>(SkippedLifetimeStart->getOperand(1)); + dyn_cast<Instruction>(SkippedLifetimeStart->getOperand(0)); if (LifetimeArg && LifetimeArg->getParent() == C->getParent() && C->comesBefore(LifetimeArg)) return false; @@ -1010,7 +1010,7 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad, // Lifetime of srcAlloca ends at lifetime.end. if (auto *II = dyn_cast<IntrinsicInst>(&I)) { if (II->getIntrinsicID() == Intrinsic::lifetime_end && - II->getArgOperand(1) == srcAlloca) + II->getArgOperand(0) == srcAlloca) break; } @@ -1393,7 +1393,7 @@ static bool hasUndefContents(MemorySSA *MSSA, BatchAAResults &AA, Value *V, if (auto *II = dyn_cast_or_null<IntrinsicInst>(Def->getMemoryInst())) if (II->getIntrinsicID() == Intrinsic::lifetime_start) if (auto *Alloca = dyn_cast<AllocaInst>(getUnderlyingObject(V))) - return II->getArgOperand(1) == Alloca; + return II->getArgOperand(0) == Alloca; return false; } diff --git a/llvm/lib/Transforms/Scalar/NewGVN.cpp b/llvm/lib/Transforms/Scalar/NewGVN.cpp index 1a52af1..40eeeb2 100644 --- a/llvm/lib/Transforms/Scalar/NewGVN.cpp +++ b/llvm/lib/Transforms/Scalar/NewGVN.cpp @@ -1535,7 +1535,7 @@ NewGVN::performSymbolicLoadCoercion(Type *LoadType, Value *LoadPtr, if (auto *II = dyn_cast<IntrinsicInst>(DepInst)) { if (II->getIntrinsicID() == Intrinsic::lifetime_start) { - auto *LifetimePtr = II->getOperand(1); + auto *LifetimePtr = II->getOperand(0); if (LoadPtr == lookupOperandLeader(LifetimePtr) || AA->isMustAlias(LoadPtr, LifetimePtr)) return createConstantExpression(UndefValue::get(LoadType)); diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp index 03d9f32..d6e27aa 100644 --- a/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/llvm/lib/Transforms/Scalar/SROA.cpp @@ -1260,10 +1260,7 @@ private: return PI.setAborted(&II); if (II.isLifetimeStartOrEnd()) { - ConstantInt *Length = cast<ConstantInt>(II.getArgOperand(0)); - uint64_t Size = std::min(AllocSize - Offset.getLimitedValue(), - Length->getLimitedValue()); - insertUse(II, Offset, Size, true); + insertUse(II, Offset, AllocSize, true); return; } @@ -3614,30 +3611,14 @@ private: return true; } - assert(II.getArgOperand(1) == OldPtr); - // Lifetime intrinsics are only promotable if they cover the whole alloca. - // Therefore, we drop lifetime intrinsics which don't cover the whole - // alloca. - // (In theory, intrinsics which partially cover an alloca could be - // promoted, but PromoteMemToReg doesn't handle that case.) - // FIXME: Check whether the alloca is promotable before dropping the - // lifetime intrinsics? - if (NewBeginOffset != NewAllocaBeginOffset || - NewEndOffset != NewAllocaEndOffset) - return true; - - ConstantInt *Size = - ConstantInt::get(cast<IntegerType>(II.getArgOperand(0)->getType()), - NewEndOffset - NewBeginOffset); - // Lifetime intrinsics always expect an i8* so directly get such a pointer - // for the new alloca slice. + assert(II.getArgOperand(0) == OldPtr); Type *PointerTy = IRB.getPtrTy(OldPtr->getType()->getPointerAddressSpace()); Value *Ptr = getNewAllocaSlicePtr(IRB, PointerTy); Value *New; if (II.getIntrinsicID() == Intrinsic::lifetime_start) - New = IRB.CreateLifetimeStart(Ptr, Size); + New = IRB.CreateLifetimeStart(Ptr); else - New = IRB.CreateLifetimeEnd(Ptr, Size); + New = IRB.CreateLifetimeEnd(Ptr); (void)New; LLVM_DEBUG(dbgs() << " to: " << *New << "\n"); diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 7a9dd37..bbd1ed6 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -1099,7 +1099,7 @@ static void eraseLifetimeMarkersOnInputs(const SetVector<BasicBlock *> &Blocks, // Get the memory operand of the lifetime marker. If the underlying // object is a sunk alloca, or is otherwise defined in the extraction // region, the lifetime marker must not be erased. - Value *Mem = II->getOperand(1)->stripInBoundsOffsets(); + Value *Mem = II->getOperand(0); if (SunkAllocas.count(Mem) || definedInRegion(Blocks, Mem)) continue; @@ -1115,8 +1115,6 @@ static void eraseLifetimeMarkersOnInputs(const SetVector<BasicBlock *> &Blocks, static void insertLifetimeMarkersSurroundingCall( Module *M, ArrayRef<Value *> LifetimesStart, ArrayRef<Value *> LifetimesEnd, CallInst *TheCall) { - LLVMContext &Ctx = M->getContext(); - auto NegativeOne = ConstantInt::getSigned(Type::getInt64Ty(Ctx), -1); Instruction *Term = TheCall->getParent()->getTerminator(); // Emit lifetime markers for the pointers given in \p Objects. Insert the @@ -1130,7 +1128,7 @@ static void insertLifetimeMarkersSurroundingCall( Function *Func = Intrinsic::getOrInsertDeclaration(M, MarkerFunc, Mem->getType()); - auto Marker = CallInst::Create(Func, {NegativeOne, Mem}); + auto Marker = CallInst::Create(Func, Mem); if (InsertBefore) Marker->insertBefore(TheCall->getIterator()); else diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp index 59a47a9..fa3c467 100644 --- a/llvm/lib/Transforms/Utils/InlineFunction.cpp +++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -3004,31 +3004,11 @@ void llvm::InlineFunctionImpl(CallBase &CB, InlineFunctionInfo &IFI, if (hasLifetimeMarkers(AI)) continue; - // Try to determine the size of the allocation. - ConstantInt *AllocaSize = nullptr; - if (ConstantInt *AIArraySize = - dyn_cast<ConstantInt>(AI->getArraySize())) { - auto &DL = Caller->getDataLayout(); - Type *AllocaType = AI->getAllocatedType(); - TypeSize AllocaTypeSize = DL.getTypeAllocSize(AllocaType); - uint64_t AllocaArraySize = AIArraySize->getLimitedValue(); - - // Don't add markers for zero-sized allocas. - if (AllocaArraySize == 0) - continue; - - // Check that array size doesn't saturate uint64_t and doesn't - // overflow when it's multiplied by type size. - if (!AllocaTypeSize.isScalable() && - AllocaArraySize != std::numeric_limits<uint64_t>::max() && - std::numeric_limits<uint64_t>::max() / AllocaArraySize >= - AllocaTypeSize.getFixedValue()) { - AllocaSize = ConstantInt::get(Type::getInt64Ty(AI->getContext()), - AllocaArraySize * AllocaTypeSize); - } - } + std::optional<TypeSize> Size = AI->getAllocationSize(AI->getDataLayout()); + if (Size && Size->isZero()) + continue; - builder.CreateLifetimeStart(AI, AllocaSize); + builder.CreateLifetimeStart(AI); for (ReturnInst *RI : Returns) { // Don't insert llvm.lifetime.end calls between a musttail or deoptimize // call and a return. The return kills all local allocas. @@ -3038,7 +3018,7 @@ void llvm::InlineFunctionImpl(CallBase &CB, InlineFunctionInfo &IFI, if (InlinedDeoptimizeCalls && RI->getParent()->getTerminatingDeoptimizeCall()) continue; - IRBuilder<>(RI).CreateLifetimeEnd(AI, AllocaSize); + IRBuilder<>(RI).CreateLifetimeEnd(AI); } } } diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp index 2619e73..b559212 100644 --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -481,7 +481,7 @@ bool llvm::wouldInstructionBeTriviallyDead(const Instruction *I, return true; if (II->isLifetimeStartOrEnd()) { - auto *Arg = II->getArgOperand(1); + auto *Arg = II->getArgOperand(0); if (isa<PoisonValue>(Arg)) return true; diff --git a/llvm/lib/Transforms/Utils/MemoryTaggingSupport.cpp b/llvm/lib/Transforms/Utils/MemoryTaggingSupport.cpp index 472c03f..1f59b17 100644 --- a/llvm/lib/Transforms/Utils/MemoryTaggingSupport.cpp +++ b/llvm/lib/Transforms/Utils/MemoryTaggingSupport.cpp @@ -155,7 +155,7 @@ void StackInfoBuilder::visit(OptimizationRemarkEmitter &ORE, return; } if (auto *II = dyn_cast<LifetimeIntrinsic>(&Inst)) { - AllocaInst *AI = dyn_cast<AllocaInst>(II->getArgOperand(1)); + AllocaInst *AI = dyn_cast<AllocaInst>(II->getArgOperand(0)); if (!AI || getAllocaInterestingness(*AI) != AllocaInterestingness::kInteresting) return; |