diff options
Diffstat (limited to 'llvm/lib')
55 files changed, 803 insertions, 362 deletions
diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp index b8c540c..9f8ac6e 100644 --- a/llvm/lib/Analysis/IVDescriptors.cpp +++ b/llvm/lib/Analysis/IVDescriptors.cpp @@ -849,17 +849,12 @@ RecurrenceDescriptor::isMinMaxPattern(Instruction *I, RecurKind Kind, /// %sum.2 = select %cmp, %add, %sum.1 RecurrenceDescriptor::InstDesc RecurrenceDescriptor::isConditionalRdxPattern(Instruction *I) { - SelectInst *SI = dyn_cast<SelectInst>(I); - if (!SI) - return InstDesc(false, I); - - CmpInst *CI = dyn_cast<CmpInst>(SI->getCondition()); + Value *TrueVal, *FalseVal; // Only handle single use cases for now. - if (!CI || !CI->hasOneUse()) + if (!match(I, + m_Select(m_OneUse(m_Cmp()), m_Value(TrueVal), m_Value(FalseVal)))) return InstDesc(false, I); - Value *TrueVal = SI->getTrueValue(); - Value *FalseVal = SI->getFalseValue(); // Handle only when either of operands of select instruction is a PHI // node for now. if ((isa<PHINode>(TrueVal) && isa<PHINode>(FalseVal)) || @@ -886,7 +881,7 @@ RecurrenceDescriptor::isConditionalRdxPattern(Instruction *I) { if (!IPhi || IPhi != FalseVal) return InstDesc(false, I); - return InstDesc(true, SI); + return InstDesc(true, I); } RecurrenceDescriptor::InstDesc RecurrenceDescriptor::isRecurrenceInstr( diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index 4e38626..e08ef60 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -6644,7 +6644,7 @@ Value *llvm::simplifyBinaryIntrinsic(Intrinsic::ID IID, Type *ReturnType, "Invalid mask width"); // If index-width (mask size) is less than pointer-size then mask is // 1-extended. - if (match(Op1, m_PtrToInt(m_Specific(Op0)))) + if (match(Op1, m_PtrToIntOrAddr(m_Specific(Op0)))) return Op0; // NOTE: We may have attributes associated with the return value of the diff --git a/llvm/lib/Analysis/MemorySSA.cpp b/llvm/lib/Analysis/MemorySSA.cpp index ab37338..0b2e3fc 100644 --- a/llvm/lib/Analysis/MemorySSA.cpp +++ b/llvm/lib/Analysis/MemorySSA.cpp @@ -393,7 +393,7 @@ static bool isUseTriviallyOptimizableToLiveOnEntry(AliasAnalysisType &AA, /// \param AA The AliasAnalysis we used for our search. /// \param AllowImpreciseClobber Always false, unless we do relaxed verify. -LLVM_ATTRIBUTE_UNUSED static void +[[maybe_unused]] static void checkClobberSanity(const MemoryAccess *Start, MemoryAccess *ClobberAt, const MemoryLocation &StartLoc, const MemorySSA &MSSA, const UpwardsMemoryQuery &Query, BatchAAResults &AA, diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index 3fab6b0..a64b93d 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -6417,8 +6417,18 @@ APInt ScalarEvolution::getConstantMultipleImpl(const SCEV *S, case scSequentialUMinExpr: return GetGCDMultiple(cast<SCEVNAryExpr>(S)); case scUnknown: { - // ask ValueTracking for known bits + // Ask ValueTracking for known bits. SCEVUnknown only become available at + // the point their underlying IR instruction has been defined. If CtxI was + // not provided, use: + // * the first instruction in the entry block if it is an argument + // * the instruction itself otherwise. const SCEVUnknown *U = cast<SCEVUnknown>(S); + if (!CtxI) { + if (isa<Argument>(U->getValue())) + CtxI = &*F.getEntryBlock().begin(); + else if (auto *I = dyn_cast<Instruction>(U->getValue())) + CtxI = I; + } unsigned Known = computeKnownBits(U->getValue(), getDataLayout(), &AC, CtxI, &DT) .countMinTrailingZeros(); @@ -15761,6 +15771,21 @@ void ScalarEvolution::LoopGuards::collectFromBlock( const SCEV *OneAlignedUp = GetNextSCEVDividesByDivisor(One, DividesBy); To = SE.getUMaxExpr(FromRewritten, OneAlignedUp); + } else { + if (LHS->getType()->isPointerTy()) { + LHS = SE.getLosslessPtrToIntExpr(LHS); + RHS = SE.getLosslessPtrToIntExpr(RHS); + if (isa<SCEVCouldNotCompute>(LHS) || isa<SCEVCouldNotCompute>(RHS)) + break; + } + auto AddSubRewrite = [&](const SCEV *A, const SCEV *B) { + const SCEV *Sub = SE.getMinusSCEV(A, B); + AddRewrite(Sub, Sub, + SE.getUMaxExpr(Sub, SE.getOne(From->getType()))); + }; + AddSubRewrite(LHS, RHS); + AddSubRewrite(RHS, LHS); + continue; } break; default: diff --git a/llvm/lib/Analysis/StaticDataProfileInfo.cpp b/llvm/lib/Analysis/StaticDataProfileInfo.cpp index 1f751ee..e7f0b2c 100644 --- a/llvm/lib/Analysis/StaticDataProfileInfo.cpp +++ b/llvm/lib/Analysis/StaticDataProfileInfo.cpp @@ -60,6 +60,36 @@ void StaticDataProfileInfo::addConstantProfileCount( OriginalCount = getInstrMaxCountValue(); } +StaticDataProfileInfo::StaticDataHotness +StaticDataProfileInfo::getConstantHotnessUsingProfileCount( + const Constant *C, const ProfileSummaryInfo *PSI, uint64_t Count) const { + // The accummulated counter shows the constant is hot. Return enum 'hot' + // whether this variable is seen by unprofiled functions or not. + if (PSI->isHotCount(Count)) + return StaticDataHotness::Hot; + // The constant is not hot, and seen by unprofiled functions. We don't want to + // assign it to unlikely sections, even if the counter says 'cold'. So return + // enum 'LukewarmOrUnknown'. + if (ConstantWithoutCounts.count(C)) + return StaticDataHotness::LukewarmOrUnknown; + // The accummulated counter shows the constant is cold so return enum 'cold'. + if (PSI->isColdCount(Count)) + return StaticDataHotness::Cold; + + return StaticDataHotness::LukewarmOrUnknown; +} + +StringRef StaticDataProfileInfo::hotnessToStr(StaticDataHotness Hotness) const { + switch (Hotness) { + case StaticDataHotness::Cold: + return "unlikely"; + case StaticDataHotness::Hot: + return "hot"; + default: + return ""; + } +} + std::optional<uint64_t> StaticDataProfileInfo::getConstantProfileCount(const Constant *C) const { auto I = ConstantProfileCounts.find(C); @@ -70,23 +100,10 @@ StaticDataProfileInfo::getConstantProfileCount(const Constant *C) const { StringRef StaticDataProfileInfo::getConstantSectionPrefix( const Constant *C, const ProfileSummaryInfo *PSI) const { - auto Count = getConstantProfileCount(C); + std::optional<uint64_t> Count = getConstantProfileCount(C); if (!Count) return ""; - // The accummulated counter shows the constant is hot. Return 'hot' whether - // this variable is seen by unprofiled functions or not. - if (PSI->isHotCount(*Count)) - return "hot"; - // The constant is not hot, and seen by unprofiled functions. We don't want to - // assign it to unlikely sections, even if the counter says 'cold'. So return - // an empty prefix before checking whether the counter is cold. - if (ConstantWithoutCounts.count(C)) - return ""; - // The accummulated counter shows the constant is cold. Return 'unlikely'. - if (PSI->isColdCount(*Count)) - return "unlikely"; - // The counter says lukewarm. Return an empty prefix. - return ""; + return hotnessToStr(getConstantHotnessUsingProfileCount(C, PSI, *Count)); } bool StaticDataProfileInfoWrapperPass::doInitialization(Module &M) { diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 219bbc9..05fffe9 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -1437,7 +1437,8 @@ getBBAddrMapFeature(const MachineFunction &MF, int NumMBBSectionRanges, BrProbEnabled, MF.hasBBSections() && NumMBBSectionRanges > 1, static_cast<bool>(BBAddrMapSkipEmitBBEntries), - HasCalls}; + HasCalls, + false}; } void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) { diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp index 4931403..53f1cfe2 100644 --- a/llvm/lib/CodeGen/AtomicExpandPass.cpp +++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp @@ -770,7 +770,7 @@ struct PartwordMaskValues { Value *Inv_Mask = nullptr; }; -LLVM_ATTRIBUTE_UNUSED +[[maybe_unused]] raw_ostream &operator<<(raw_ostream &O, const PartwordMaskValues &PMV) { auto PrintObj = [&O](auto *V) { if (V) diff --git a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp index 3812823..04d9309 100644 --- a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp +++ b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp @@ -112,7 +112,7 @@ APInt GISelValueTracking::getKnownOnes(Register R) { return getKnownBits(R).One; } -LLVM_ATTRIBUTE_UNUSED static void +[[maybe_unused]] static void dumpResult(const MachineInstr &MI, const KnownBits &Known, unsigned Depth) { dbgs() << "[" << Depth << "] Compute known bits: " << MI << "[" << Depth << "] Computed for: " << MI << "[" << Depth << "] Known: 0x" @@ -2013,6 +2013,43 @@ unsigned GISelValueTracking::computeNumSignBits(Register R, FirstAnswer = std::min(Src1NumSignBits, Src2NumSignBits) - 1; break; } + case TargetOpcode::G_ADD: { + Register Src2 = MI.getOperand(2).getReg(); + unsigned Src2NumSignBits = + computeNumSignBits(Src2, DemandedElts, Depth + 1); + if (Src2NumSignBits <= 2) + return 1; // Early out. + + Register Src1 = MI.getOperand(1).getReg(); + unsigned Src1NumSignBits = + computeNumSignBits(Src1, DemandedElts, Depth + 1); + if (Src1NumSignBits == 1) + return 1; // Early Out. + + // Special case decrementing a value (ADD X, -1): + KnownBits Known2 = getKnownBits(Src2, DemandedElts, Depth); + if (Known2.isAllOnes()) { + KnownBits Known1 = getKnownBits(Src1, DemandedElts, Depth); + // If the input is known to be 0 or 1, the output is 0/-1, which is all + // sign bits set. + if ((Known1.Zero | 1).isAllOnes()) + return TyBits; + + // If we are subtracting one from a positive number, there is no carry + // out of the result. + if (Known1.isNonNegative()) { + FirstAnswer = Src1NumSignBits; + break; + } + + // Otherwise, we treat this like an ADD. + } + + // Add can have at most one carry bit. Thus we know that the output + // is, at worst, one more bit than the inputs. + FirstAnswer = std::min(Src1NumSignBits, Src2NumSignBits) - 1; + break; + } case TargetOpcode::G_FCMP: case TargetOpcode::G_ICMP: { bool IsFP = Opcode == TargetOpcode::G_FCMP; diff --git a/llvm/lib/CodeGen/LiveIntervals.cpp b/llvm/lib/CodeGen/LiveIntervals.cpp index 0e38017..d2f2c3e 100644 --- a/llvm/lib/CodeGen/LiveIntervals.cpp +++ b/llvm/lib/CodeGen/LiveIntervals.cpp @@ -661,7 +661,10 @@ void LiveIntervals::extendToIndices(LiveRange &LR, void LiveIntervals::pruneValue(LiveRange &LR, SlotIndex Kill, SmallVectorImpl<SlotIndex> *EndPoints) { LiveQueryResult LRQ = LR.Query(Kill); - VNInfo *VNI = LRQ.valueOutOrDead(); + // LR may have liveness reachable from early clobber slot, which may be + // only live-in instead of live-out of the instruction. + // For example, LR =[1r, 3r), Kill = 3e, we have to prune [3e, 3r) of LR. + VNInfo *VNI = LRQ.valueOutOrDead() ? LRQ.valueOutOrDead() : LRQ.valueIn(); if (!VNI) return; diff --git a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp index 3268c26..9662511 100644 --- a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -1551,7 +1551,7 @@ LLVM_DUMP_METHOD void ILPValue::dump() const { dbgs() << *this << '\n'; } -LLVM_ATTRIBUTE_UNUSED +[[maybe_unused]] raw_ostream &llvm::operator<<(raw_ostream &OS, const ILPValue &Val) { Val.print(OS); return OS; diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index e153842..358e060 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -658,13 +658,13 @@ namespace { bool InexpensiveOnly = false, std::optional<EVT> OutVT = std::nullopt); SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags); - SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags); - SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags); - SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip); + SDValue buildRsqrtEstimate(SDValue Op); + SDValue buildSqrtEstimate(SDValue Op); + SDValue buildSqrtEstimateImpl(SDValue Op, bool Recip); SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations, - SDNodeFlags Flags, bool Reciprocal); + bool Reciprocal); SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations, - SDNodeFlags Flags, bool Reciprocal); + bool Reciprocal); SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, bool DemandHighBits = true); SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1); @@ -5044,7 +5044,6 @@ static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) { unsigned Opc = N->getOpcode(); bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc); - ConstantSDNode *N1C = isConstOrConstSplat(N1); // X / undef -> undef // X % undef -> undef @@ -5076,7 +5075,7 @@ static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) { // division-by-zero or remainder-by-zero, so assume the divisor is 1. // TODO: Similarly, if we're zero-extending a boolean divisor, then assume // it's a 1. - if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1)) + if (isOneOrOneSplat(N1) || (VT.getScalarType() == MVT::i1)) return IsDiv ? N0 : DAG.getConstant(0, DL, VT); return SDValue(); @@ -18591,20 +18590,18 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { // If this FDIV is part of a reciprocal square root, it may be folded // into a target-specific square root estimate instruction. if (N1.getOpcode() == ISD::FSQRT) { - if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags)) + if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0))) return DAG.getNode(ISD::FMUL, DL, VT, N0, RV); } else if (N1.getOpcode() == ISD::FP_EXTEND && N1.getOperand(0).getOpcode() == ISD::FSQRT) { - if (SDValue RV = - buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) { + if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0))) { RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV); AddToWorklist(RV.getNode()); return DAG.getNode(ISD::FMUL, DL, VT, N0, RV); } } else if (N1.getOpcode() == ISD::FP_ROUND && N1.getOperand(0).getOpcode() == ISD::FSQRT) { - if (SDValue RV = - buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) { + if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0))) { RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1)); AddToWorklist(RV.getNode()); return DAG.getNode(ISD::FMUL, DL, VT, N0, RV); @@ -18636,7 +18633,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { SDValue AA = DAG.getNode(ISD::FMUL, DL, VT, A, A); SDValue AAZ = DAG.getNode(ISD::FMUL, DL, VT, AA, Sqrt.getOperand(0)); - if (SDValue Rsqrt = buildRsqrtEstimate(AAZ, Flags)) + if (SDValue Rsqrt = buildRsqrtEstimate(AAZ)) return DAG.getNode(ISD::FMUL, DL, VT, N0, Rsqrt); // Estimate creation failed. Clean up speculatively created nodes. @@ -18646,7 +18643,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { // We found a FSQRT, so try to make this fold: // X / (Y * sqrt(Z)) -> X * (rsqrt(Z) / Y) - if (SDValue Rsqrt = buildRsqrtEstimate(Sqrt.getOperand(0), Flags)) { + if (SDValue Rsqrt = buildRsqrtEstimate(Sqrt.getOperand(0))) { SDValue Div = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, Rsqrt, Y); AddToWorklist(Div.getNode()); return DAG.getNode(ISD::FMUL, DL, VT, N0, Div); @@ -18743,11 +18740,12 @@ SDValue DAGCombiner::visitFSQRT(SDNode *N) { return SDValue(); // FSQRT nodes have flags that propagate to the created nodes. + SelectionDAG::FlagInserter FlagInserter(DAG, Flags); // TODO: If this is N0/sqrt(N0), and we reach this node before trying to // transform the fdiv, we may produce a sub-optimal estimate sequence // because the reciprocal calculation may not have to filter out a // 0.0 input. - return buildSqrtEstimate(N0, Flags); + return buildSqrtEstimate(N0); } /// copysign(x, fp_extend(y)) -> copysign(x, y) @@ -29744,28 +29742,27 @@ SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op, /// X_{i+1} = X_i (1.5 - A X_i^2 / 2) /// As a result, we precompute A/2 prior to the iteration loop. SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est, - unsigned Iterations, - SDNodeFlags Flags, bool Reciprocal) { + unsigned Iterations, bool Reciprocal) { EVT VT = Arg.getValueType(); SDLoc DL(Arg); SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT); // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that // this entire sequence requires only one FP constant. - SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags); - HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags); + SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg); + HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg); // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est) for (unsigned i = 0; i < Iterations; ++i) { - SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags); - NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags); - NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags); - Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags); + SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est); + NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst); + NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst); + Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst); } // If non-reciprocal square root is requested, multiply the result by Arg. if (!Reciprocal) - Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags); + Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg); return Est; } @@ -29776,8 +29773,7 @@ SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est, /// => /// X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0)) SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est, - unsigned Iterations, - SDNodeFlags Flags, bool Reciprocal) { + unsigned Iterations, bool Reciprocal) { EVT VT = Arg.getValueType(); SDLoc DL(Arg); SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT); @@ -29790,9 +29786,9 @@ SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est, // Newton iterations for reciprocal square root: // E = (E * -0.5) * ((A * E) * E + -3.0) for (unsigned i = 0; i < Iterations; ++i) { - SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags); - SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags); - SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags); + SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est); + SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est); + SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree); // When calculating a square root at the last iteration build: // S = ((A * E) * -0.5) * ((A * E) * E + -3.0) @@ -29800,13 +29796,13 @@ SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est, SDValue LHS; if (Reciprocal || (i + 1) < Iterations) { // RSQRT: LHS = (E * -0.5) - LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags); + LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf); } else { // SQRT: LHS = (A * E) * -0.5 - LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags); + LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf); } - Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags); + Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS); } return Est; @@ -29815,8 +29811,7 @@ SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est, /// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case /// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if /// Op can be zero. -SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, - bool Reciprocal) { +SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, bool Reciprocal) { if (LegalDAG) return SDValue(); @@ -29844,8 +29839,8 @@ SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, if (Iterations > 0) Est = UseOneConstNR - ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal) - : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal); + ? buildSqrtNROneConst(Op, Est, Iterations, Reciprocal) + : buildSqrtNRTwoConst(Op, Est, Iterations, Reciprocal); if (!Reciprocal) { SDLoc DL(Op); // Try the target specific test first. @@ -29863,12 +29858,12 @@ SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, return SDValue(); } -SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) { - return buildSqrtEstimateImpl(Op, Flags, true); +SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op) { + return buildSqrtEstimateImpl(Op, true); } -SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) { - return buildSqrtEstimateImpl(Op, Flags, false); +SDValue DAGCombiner::buildSqrtEstimate(SDValue Op) { + return buildSqrtEstimateImpl(Op, false); } /// Return true if there is any possibility that the two addresses overlap. diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index c9aeef7..90edaf3 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -5063,8 +5063,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, break; case ISD::ADD: case ISD::ADDC: - // Add can have at most one carry bit. Thus we know that the output - // is, at worst, one more bit than the inputs. + // TODO: Move Operand 1 check before Operand 0 check Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1); if (Tmp == 1) return 1; // Early out. @@ -5088,6 +5087,9 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, Tmp2 = ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth + 1); if (Tmp2 == 1) return 1; // Early out. + + // Add can have at most one carry bit. Thus we know that the output + // is, at worst, one more bit than the inputs. return std::min(Tmp, Tmp2) - 1; case ISD::SUB: Tmp2 = ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth + 1); @@ -6403,8 +6405,9 @@ static SDValue foldCONCAT_VECTORS(const SDLoc &DL, EVT VT, if (VT.isScalableVector()) return SDValue(); - // A CONCAT_VECTOR with all UNDEF/BUILD_VECTOR operands can be - // simplified to one big BUILD_VECTOR. + // A CONCAT_VECTOR of scalar sources, such as UNDEF, BUILD_VECTOR and + // single-element INSERT_VECTOR_ELT operands can be simplified to one big + // BUILD_VECTOR. // FIXME: Add support for SCALAR_TO_VECTOR as well. EVT SVT = VT.getScalarType(); SmallVector<SDValue, 16> Elts; @@ -6414,6 +6417,10 @@ static SDValue foldCONCAT_VECTORS(const SDLoc &DL, EVT VT, Elts.append(OpVT.getVectorNumElements(), DAG.getUNDEF(SVT)); else if (Op.getOpcode() == ISD::BUILD_VECTOR) Elts.append(Op->op_begin(), Op->op_end()); + else if (Op.getOpcode() == ISD::INSERT_VECTOR_ELT && + OpVT.getVectorNumElements() == 1 && + isNullConstant(Op.getOperand(2))) + Elts.push_back(Op.getOperand(1)); else return SDValue(); } diff --git a/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp b/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp index 7a0256f..fa39603 100644 --- a/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp +++ b/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp @@ -338,9 +338,13 @@ static void convertFunctionLineTable(OutputAggregator &Out, CUInfo &CUI, if (FilePath.empty()) { // If we had a DW_AT_decl_file, but got no file then we need to emit a // warning. + const uint64_t DwarfFileIdx = dwarf::toUnsigned( + Die.findRecursively(dwarf::DW_AT_decl_file), UINT32_MAX); + // Check if there is no DW_AT_decl_line attribute, and don't report an + // error if it isn't there. + if (DwarfFileIdx == UINT32_MAX) + return; Out.Report("Invalid file index in DW_AT_decl_file", [&](raw_ostream &OS) { - const uint64_t DwarfFileIdx = dwarf::toUnsigned( - Die.findRecursively(dwarf::DW_AT_decl_file), UINT32_MAX); OS << "error: function DIE at " << HEX32(Die.getOffset()) << " has an invalid file index " << DwarfFileIdx << " in its DW_AT_decl_file attribute, unable to create a single " @@ -629,6 +633,10 @@ Error DwarfTransformer::convert(uint32_t NumThreads, OutputAggregator &Out) { size_t NumBefore = Gsym.getNumFunctionInfos(); auto getDie = [&](DWARFUnit &DwarfUnit) -> DWARFDie { DWARFDie ReturnDie = DwarfUnit.getUnitDIE(false); + // Apple uses DW_AT_GNU_dwo_id for things other than split DWARF. + if (IsMachO) + return ReturnDie; + if (DwarfUnit.getDWOId()) { DWARFUnit *DWOCU = DwarfUnit.getNonSkeletonUnitDIE(false).getDwarfUnit(); if (!DWOCU->isDWOUnit()) diff --git a/llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp b/llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp index 5b3c05e..6c7e27e 100644 --- a/llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp @@ -260,22 +260,17 @@ public: } // Run finalization actions. - using WrapperFunctionCall = orc::shared::WrapperFunctionCall; - runFinalizeActions( - G->allocActions(), - [this, OnFinalized = std::move(OnFinalized)]( - Expected<std::vector<WrapperFunctionCall>> DeallocActions) mutable { - completeFinalization(std::move(OnFinalized), - std::move(DeallocActions)); - }); - } + auto DeallocActions = runFinalizeActions(G->allocActions()); + if (!DeallocActions) { + OnFinalized(DeallocActions.takeError()); + return; + } - void abandon(OnAbandonedFunction OnAbandoned) override { - Error Err = Error::success(); - if (auto EC = sys::Memory::releaseMappedMemory(FinalizationSegments)) - Err = joinErrors(std::move(Err), errorCodeToError(EC)); - if (auto EC = sys::Memory::releaseMappedMemory(StandardSegments)) - Err = joinErrors(std::move(Err), errorCodeToError(EC)); + // Release the finalize segments slab. + if (auto EC = sys::Memory::releaseMappedMemory(FinalizationSegments)) { + OnFinalized(errorCodeToError(EC)); + return; + } #ifndef NDEBUG // Set 'G' to null to flag that we've been successfully finalized. @@ -284,22 +279,17 @@ public: G = nullptr; #endif - OnAbandoned(std::move(Err)); + // Continue with finalized allocation. + OnFinalized(MemMgr.createFinalizedAlloc(std::move(StandardSegments), + std::move(*DeallocActions))); } -private: - void completeFinalization( - OnFinalizedFunction OnFinalized, - Expected<std::vector<orc::shared::WrapperFunctionCall>> DeallocActions) { - - if (!DeallocActions) - return OnFinalized(DeallocActions.takeError()); - - // Release the finalize segments slab. - if (auto EC = sys::Memory::releaseMappedMemory(FinalizationSegments)) { - OnFinalized(errorCodeToError(EC)); - return; - } + void abandon(OnAbandonedFunction OnAbandoned) override { + Error Err = Error::success(); + if (auto EC = sys::Memory::releaseMappedMemory(FinalizationSegments)) + Err = joinErrors(std::move(Err), errorCodeToError(EC)); + if (auto EC = sys::Memory::releaseMappedMemory(StandardSegments)) + Err = joinErrors(std::move(Err), errorCodeToError(EC)); #ifndef NDEBUG // Set 'G' to null to flag that we've been successfully finalized. @@ -308,11 +298,10 @@ private: G = nullptr; #endif - // Continue with finalized allocation. - OnFinalized(MemMgr.createFinalizedAlloc(std::move(StandardSegments), - std::move(*DeallocActions))); + OnAbandoned(std::move(Err)); } +private: Error applyProtections() { for (auto &KV : BL.segments()) { const auto &AG = KV.first; diff --git a/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp b/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp index 7b327af..7e606c6a 100644 --- a/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp +++ b/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp @@ -91,19 +91,9 @@ void InProcessMemoryMapper::initialize(MemoryMapper::AllocInfo &AI, sys::Memory::InvalidateInstructionCache(Base.toPtr<void *>(), Size); } - std::vector<shared::WrapperFunctionCall> DeinitializeActions; - { - std::promise<MSVCPExpected<std::vector<shared::WrapperFunctionCall>>> P; - auto F = P.get_future(); - shared::runFinalizeActions( - AI.Actions, [&](Expected<std::vector<shared::WrapperFunctionCall>> R) { - P.set_value(std::move(R)); - }); - if (auto DeinitializeActionsOrErr = F.get()) - DeinitializeActions = std::move(*DeinitializeActionsOrErr); - else - return OnInitialized(DeinitializeActionsOrErr.takeError()); - } + auto DeinitializeActions = shared::runFinalizeActions(AI.Actions); + if (!DeinitializeActions) + return OnInitialized(DeinitializeActions.takeError()); { std::lock_guard<std::mutex> Lock(Mutex); @@ -111,7 +101,7 @@ void InProcessMemoryMapper::initialize(MemoryMapper::AllocInfo &AI, // This is the maximum range whose permission have been possibly modified auto &Alloc = Allocations[MinAddr]; Alloc.Size = MaxAddr - MinAddr; - Alloc.DeinitializationActions = std::move(DeinitializeActions); + Alloc.DeinitializationActions = std::move(*DeinitializeActions); Reservations[AI.MappingBase.toPtr<void *>()].Allocations.push_back(MinAddr); } @@ -128,10 +118,10 @@ void InProcessMemoryMapper::deinitialize( for (auto Base : llvm::reverse(Bases)) { - shared::runDeallocActions( - Allocations[Base].DeinitializationActions, [&](Error Err) { - AllErr = joinErrors(std::move(AllErr), std::move(Err)); - }); + if (Error Err = shared::runDeallocActions( + Allocations[Base].DeinitializationActions)) { + AllErr = joinErrors(std::move(AllErr), std::move(Err)); + } // Reset protections to read/write so the area can be reused if (auto EC = sys::Memory::protectMappedMemory( diff --git a/llvm/lib/ExecutionEngine/Orc/Shared/AllocationActions.cpp b/llvm/lib/ExecutionEngine/Orc/Shared/AllocationActions.cpp index 08ab0c6..91f2899 100644 --- a/llvm/lib/ExecutionEngine/Orc/Shared/AllocationActions.cpp +++ b/llvm/lib/ExecutionEngine/Orc/Shared/AllocationActions.cpp @@ -12,39 +12,31 @@ namespace llvm { namespace orc { namespace shared { -void runFinalizeActions(AllocActions &AAs, - OnRunFinalizeActionsCompleteFn OnComplete) { +Expected<std::vector<WrapperFunctionCall>> +runFinalizeActions(AllocActions &AAs) { std::vector<WrapperFunctionCall> DeallocActions; DeallocActions.reserve(numDeallocActions(AAs)); for (auto &AA : AAs) { if (AA.Finalize) - - if (auto Err = AA.Finalize.runWithSPSRetErrorMerged()) { - while (!DeallocActions.empty()) { - Err = joinErrors(std::move(Err), - DeallocActions.back().runWithSPSRetErrorMerged()); - DeallocActions.pop_back(); - } - return OnComplete(std::move(Err)); - } + if (auto Err = AA.Finalize.runWithSPSRetErrorMerged()) + return joinErrors(std::move(Err), runDeallocActions(DeallocActions)); if (AA.Dealloc) DeallocActions.push_back(std::move(AA.Dealloc)); } AAs.clear(); - OnComplete(std::move(DeallocActions)); + return DeallocActions; } -void runDeallocActions(ArrayRef<WrapperFunctionCall> DAs, - OnRunDeallocActionsComeleteFn OnComplete) { +Error runDeallocActions(ArrayRef<WrapperFunctionCall> DAs) { Error Err = Error::success(); while (!DAs.empty()) { Err = joinErrors(std::move(Err), DAs.back().runWithSPSRetErrorMerged()); DAs = DAs.drop_back(); } - OnComplete(std::move(Err)); + return Err; } } // namespace shared diff --git a/llvm/lib/ExecutionEngine/Orc/TargetProcess/ExecutorSharedMemoryMapperService.cpp b/llvm/lib/ExecutionEngine/Orc/TargetProcess/ExecutorSharedMemoryMapperService.cpp index 8c24b1f..4fbf232 100644 --- a/llvm/lib/ExecutionEngine/Orc/TargetProcess/ExecutorSharedMemoryMapperService.cpp +++ b/llvm/lib/ExecutionEngine/Orc/TargetProcess/ExecutorSharedMemoryMapperService.cpp @@ -9,10 +9,8 @@ #include "llvm/ExecutionEngine/Orc/TargetProcess/ExecutorSharedMemoryMapperService.h" #include "llvm/Config/llvm-config.h" // for LLVM_ON_UNIX #include "llvm/ExecutionEngine/Orc/Shared/OrcRTBridge.h" -#include "llvm/Support/MSVCErrorWorkarounds.h" #include "llvm/Support/Process.h" #include "llvm/Support/WindowsError.h" -#include <future> #include <sstream> #if defined(LLVM_ON_UNIX) @@ -183,24 +181,15 @@ Expected<ExecutorAddr> ExecutorSharedMemoryMapperService::initialize( } // Run finalization actions and get deinitlization action list. - std::vector<shared::WrapperFunctionCall> DeinitializeActions; - { - std::promise<MSVCPExpected<std::vector<shared::WrapperFunctionCall>>> P; - auto F = P.get_future(); - shared::runFinalizeActions( - FR.Actions, [&](Expected<std::vector<shared::WrapperFunctionCall>> R) { - P.set_value(std::move(R)); - }); - if (auto DeinitializeActionsOrErr = F.get()) - DeinitializeActions = std::move(*DeinitializeActionsOrErr); - else - return DeinitializeActionsOrErr.takeError(); + auto DeinitializeActions = shared::runFinalizeActions(FR.Actions); + if (!DeinitializeActions) { + return DeinitializeActions.takeError(); } { std::lock_guard<std::mutex> Lock(Mutex); Allocations[MinAddr].DeinitializationActions = - std::move(DeinitializeActions); + std::move(*DeinitializeActions); Reservations[Reservation.toPtr<void *>()].Allocations.push_back(MinAddr); } @@ -221,11 +210,10 @@ Error ExecutorSharedMemoryMapperService::deinitialize( std::lock_guard<std::mutex> Lock(Mutex); for (auto Base : llvm::reverse(Bases)) { - shared::runDeallocActions( - Allocations[Base].DeinitializationActions, [&](Error Err) { - if (Err) - AllErr = joinErrors(std::move(AllErr), std::move(Err)); - }); + if (Error Err = shared::runDeallocActions( + Allocations[Base].DeinitializationActions)) { + AllErr = joinErrors(std::move(AllErr), std::move(Err)); + } // Remove the allocation from the allocation list of its reservation for (auto &Reservation : Reservations) { diff --git a/llvm/lib/IR/ConstantFold.cpp b/llvm/lib/IR/ConstantFold.cpp index 3842b1a..6a9ef2e 100644 --- a/llvm/lib/IR/ConstantFold.cpp +++ b/llvm/lib/IR/ConstantFold.cpp @@ -741,7 +741,8 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, Constant *C1, assert(!CI2->isZero() && "And zero handled above"); if (ConstantExpr *CE1 = dyn_cast<ConstantExpr>(C1)) { // If and'ing the address of a global with a constant, fold it. - if (CE1->getOpcode() == Instruction::PtrToInt && + if ((CE1->getOpcode() == Instruction::PtrToInt || + CE1->getOpcode() == Instruction::PtrToAddr) && isa<GlobalValue>(CE1->getOperand(0))) { GlobalValue *GV = cast<GlobalValue>(CE1->getOperand(0)); diff --git a/llvm/lib/ObjCopy/ConfigManager.cpp b/llvm/lib/ObjCopy/ConfigManager.cpp index eef8a21..6b7b4f1 100644 --- a/llvm/lib/ObjCopy/ConfigManager.cpp +++ b/llvm/lib/ObjCopy/ConfigManager.cpp @@ -122,14 +122,14 @@ ConfigManager::getDXContainerConfig() const { if (!Common.AddGnuDebugLink.empty() || !Common.SplitDWO.empty() || !Common.AllocSectionsPrefix.empty() || Common.DiscardMode != DiscardType::None || !Common.AddSection.empty() || - !Common.DumpSection.empty() || !Common.KeepSection.empty() || - !Common.SectionsToRename.empty() || !Common.SetSectionAlignment.empty() || - !Common.SetSectionFlags.empty() || !Common.SetSectionType.empty() || - Common.ExtractDWO || Common.OnlyKeepDebug || Common.StripAllGNU || - Common.StripDWO || Common.StripDebug || Common.StripNonAlloc || - Common.StripSections || Common.StripUnneeded || - Common.DecompressDebugSections || Common.GapFill != 0 || - Common.PadTo != 0 || Common.ChangeSectionLMAValAll != 0 || + !Common.KeepSection.empty() || !Common.SectionsToRename.empty() || + !Common.SetSectionAlignment.empty() || !Common.SetSectionFlags.empty() || + !Common.SetSectionType.empty() || Common.ExtractDWO || + Common.OnlyKeepDebug || Common.StripAllGNU || Common.StripDWO || + Common.StripDebug || Common.StripNonAlloc || Common.StripSections || + Common.StripUnneeded || Common.DecompressDebugSections || + Common.GapFill != 0 || Common.PadTo != 0 || + Common.ChangeSectionLMAValAll != 0 || !Common.ChangeSectionAddress.empty()) { return createStringError(llvm::errc::invalid_argument, "option is not supported for DXContainer"); diff --git a/llvm/lib/ObjCopy/DXContainer/DXContainerObjcopy.cpp b/llvm/lib/ObjCopy/DXContainer/DXContainerObjcopy.cpp index d7f3c0d..95ab3d9 100644 --- a/llvm/lib/ObjCopy/DXContainer/DXContainerObjcopy.cpp +++ b/llvm/lib/ObjCopy/DXContainer/DXContainerObjcopy.cpp @@ -9,8 +9,10 @@ #include "llvm/ObjCopy/DXContainer/DXContainerObjcopy.h" #include "DXContainerReader.h" #include "DXContainerWriter.h" +#include "llvm/BinaryFormat/DXContainer.h" #include "llvm/ObjCopy/CommonConfig.h" #include "llvm/ObjCopy/DXContainer/DXContainerConfig.h" +#include "llvm/Support/FileOutputBuffer.h" #include "llvm/Support/raw_ostream.h" namespace llvm { @@ -42,7 +44,47 @@ static Error extractPartAsObject(StringRef PartName, StringRef OutFilename, "part '%s' not found", PartName.str().c_str()); } +static Error dumpPartToFile(StringRef PartName, StringRef Filename, + StringRef InputFilename, Object &Obj) { + auto PartIter = llvm::find_if( + Obj.Parts, [&PartName](const Part &P) { return P.Name == PartName; }); + if (PartIter == Obj.Parts.end()) + return createFileError(Filename, + std::make_error_code(std::errc::invalid_argument), + "part '%s' not found", PartName.str().c_str()); + ArrayRef<uint8_t> Contents = PartIter->Data; + // The DXContainer format is a bit odd because the part-specific headers are + // contained inside the part data itself. For parts that contain LLVM bitcode + // when we dump the part we want to skip the part-specific header so that we + // get a valid .bc file that we can inspect. All the data contained inside the + // program header is pulled out of the bitcode, so the header can be + // reconstructed if needed from the bitcode itself. More comprehensive + // documentation on the DXContainer format can be found at + // https://llvm.org/docs/DirectX/DXContainer.html. + + if (PartName == "DXIL" || PartName == "STAT") + Contents = Contents.drop_front(sizeof(llvm::dxbc::ProgramHeader)); + if (Contents.empty()) + return createFileError(Filename, object_error::parse_failed, + "part '%s' is empty", PartName.str().c_str()); + Expected<std::unique_ptr<FileOutputBuffer>> BufferOrErr = + FileOutputBuffer::create(Filename, Contents.size()); + if (!BufferOrErr) + return createFileError(Filename, BufferOrErr.takeError()); + std::unique_ptr<FileOutputBuffer> Buf = std::move(*BufferOrErr); + llvm::copy(Contents, Buf->getBufferStart()); + if (Error E = Buf->commit()) + return createFileError(Filename, std::move(E)); + return Error::success(); +} + static Error handleArgs(const CommonConfig &Config, Object &Obj) { + for (StringRef Flag : Config.DumpSection) { + auto [SecName, FileName] = Flag.split("="); + if (Error E = dumpPartToFile(SecName, FileName, Config.InputFilename, Obj)) + return E; + } + // Extract all sections before any modifications. for (StringRef Flag : Config.ExtractSection) { StringRef SectionName; diff --git a/llvm/lib/Object/ELF.cpp b/llvm/lib/Object/ELF.cpp index 53699ce0..f256e7b 100644 --- a/llvm/lib/Object/ELF.cpp +++ b/llvm/lib/Object/ELF.cpp @@ -837,7 +837,7 @@ decodeBBAddrMapImpl(const ELFFile<ELFT> &EF, Version = Data.getU8(Cur); if (!Cur) break; - if (Version < 2 || Version > 3) + if (Version < 2 || Version > 4) return createError("unsupported SHT_LLVM_BB_ADDR_MAP version: " + Twine(static_cast<int>(Version))); Feature = Data.getU8(Cur); // Feature byte @@ -852,6 +852,11 @@ decodeBBAddrMapImpl(const ELFFile<ELFT> &EF, "callsite offsets feature is enabled: version = " + Twine(static_cast<int>(Version)) + " feature = " + Twine(static_cast<int>(Feature))); + if (FeatEnable.BBHash && Version < 4) + return createError("version should be >= 4 for SHT_LLVM_BB_ADDR_MAP when " + "basic block hash feature is enabled: version = " + + Twine(static_cast<int>(Version)) + + " feature = " + Twine(static_cast<int>(Feature))); uint32_t NumBlocksInBBRange = 0; uint32_t NumBBRanges = 1; typename ELFFile<ELFT>::uintX_t RangeBaseAddress = 0; @@ -907,6 +912,7 @@ decodeBBAddrMapImpl(const ELFFile<ELFT> &EF, uint32_t Size = readULEB128As<uint32_t>(Data, Cur, ULEBSizeErr) + LastCallsiteEndOffset; uint32_t MD = readULEB128As<uint32_t>(Data, Cur, ULEBSizeErr); + uint64_t Hash = FeatEnable.BBHash ? Data.getU64(Cur) : 0; Expected<BBAddrMap::BBEntry::Metadata> MetadataOrErr = BBAddrMap::BBEntry::Metadata::decode(MD); if (!MetadataOrErr) { @@ -914,7 +920,7 @@ decodeBBAddrMapImpl(const ELFFile<ELFT> &EF, break; } BBEntries.push_back({ID, Offset + PrevBBEndOffset, Size, - *MetadataOrErr, CallsiteEndOffsets}); + *MetadataOrErr, CallsiteEndOffsets, Hash}); PrevBBEndOffset += Offset + Size; } TotalNumBlocks += BBEntries.size(); diff --git a/llvm/lib/ObjectYAML/ELFEmitter.cpp b/llvm/lib/ObjectYAML/ELFEmitter.cpp index faeeab3..8b75fbe 100644 --- a/llvm/lib/ObjectYAML/ELFEmitter.cpp +++ b/llvm/lib/ObjectYAML/ELFEmitter.cpp @@ -1465,7 +1465,7 @@ void ELFState<ELFT>::writeSectionContent( for (const auto &[Idx, E] : llvm::enumerate(*Section.Entries)) { // Write version and feature values. if (Section.Type == llvm::ELF::SHT_LLVM_BB_ADDR_MAP) { - if (E.Version > 3) + if (E.Version > 4) WithColor::warning() << "unsupported SHT_LLVM_BB_ADDR_MAP version: " << static_cast<int>(E.Version) << "; encoding using the most recent version"; @@ -1526,6 +1526,12 @@ void ELFState<ELFT>::writeSectionContent( } SHeader.sh_size += CBA.writeULEB128(BBE.Size); SHeader.sh_size += CBA.writeULEB128(BBE.Metadata); + if (FeatureOrErr->BBHash || BBE.Hash.has_value()) { + uint64_t Hash = + BBE.Hash.has_value() ? BBE.Hash.value() : llvm::yaml::Hex64(0); + CBA.write<uint64_t>(Hash, ELFT::Endianness); + SHeader.sh_size += 8; + } } } if (!PGOAnalyses) diff --git a/llvm/lib/ObjectYAML/ELFYAML.cpp b/llvm/lib/ObjectYAML/ELFYAML.cpp index d9cce1e..421d6603 100644 --- a/llvm/lib/ObjectYAML/ELFYAML.cpp +++ b/llvm/lib/ObjectYAML/ELFYAML.cpp @@ -1887,6 +1887,7 @@ void MappingTraits<ELFYAML::BBAddrMapEntry::BBEntry>::mapping( IO.mapRequired("Size", E.Size); IO.mapRequired("Metadata", E.Metadata); IO.mapOptional("CallsiteEndOffsets", E.CallsiteEndOffsets); + IO.mapOptional("Hash", E.Hash); } void MappingTraits<ELFYAML::PGOAnalysisMapEntry>::mapping( diff --git a/llvm/lib/Support/TextEncoding.cpp b/llvm/lib/Support/TextEncoding.cpp index 804ff07..41f5187 100644 --- a/llvm/lib/Support/TextEncoding.cpp +++ b/llvm/lib/Support/TextEncoding.cpp @@ -54,9 +54,9 @@ static std::optional<TextEncoding> getKnownEncoding(StringRef Name) { return std::nullopt; } -LLVM_ATTRIBUTE_UNUSED static void -HandleOverflow(size_t &Capacity, char *&Output, size_t &OutputLength, - SmallVectorImpl<char> &Result) { +[[maybe_unused]] static void HandleOverflow(size_t &Capacity, char *&Output, + size_t &OutputLength, + SmallVectorImpl<char> &Result) { // No space left in output buffer. Double the size of the underlying // memory in the SmallVectorImpl, adjust pointer and length and continue // the conversion. diff --git a/llvm/lib/Support/UnicodeNameToCodepoint.cpp b/llvm/lib/Support/UnicodeNameToCodepoint.cpp index 8d66348..6f8e091 100644 --- a/llvm/lib/Support/UnicodeNameToCodepoint.cpp +++ b/llvm/lib/Support/UnicodeNameToCodepoint.cpp @@ -476,7 +476,7 @@ nearestMatchesForCodepointName(StringRef Pattern, std::size_t MaxMatchesCount) { std::min(NormalizedName.size(), UnicodeNameToCodepointLargestNameSize) + 1; - LLVM_ATTRIBUTE_UNUSED static std::size_t Rows = + [[maybe_unused]] static std::size_t Rows = UnicodeNameToCodepointLargestNameSize + 1; std::vector<char> Distances( diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index 2c3870c..636d4f8a 100644 --- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -8217,6 +8217,8 @@ bool AArch64AsmParser::parseDataExpr(const MCExpr *&Res) { Spec = AArch64::S_GOTPCREL; else if (Identifier == "plt") Spec = AArch64::S_PLT; + else if (Identifier == "funcinit") + Spec = AArch64::S_FUNCINIT; } if (Spec == AArch64::S_None) return Error(Loc, "invalid relocation specifier"); diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp index a388216..892b8da 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp @@ -232,6 +232,8 @@ unsigned AArch64ELFObjectWriter::getRelocType(const MCFixup &Fixup, } if (RefKind == AArch64::S_AUTH || RefKind == AArch64::S_AUTHADDR) return ELF::R_AARCH64_AUTH_ABS64; + if (RefKind == AArch64::S_FUNCINIT) + return ELF::R_AARCH64_FUNCINIT64; return ELF::R_AARCH64_ABS64; } case AArch64::fixup_aarch64_add_imm12: diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp index 2b5cf34..bc090c6 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp @@ -40,6 +40,7 @@ const MCAsmInfo::AtSpecifier ELFAtSpecifiers[] = { {AArch64::S_GOT, "GOT"}, {AArch64::S_GOTPCREL, "GOTPCREL"}, {AArch64::S_PLT, "PLT"}, + {AArch64::S_FUNCINIT, "FUNCINIT"}, }; const MCAsmInfo::AtSpecifier MachOAtSpecifiers[] = { diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h index 0dfa61b..f2acff5 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h @@ -164,6 +164,7 @@ enum { // ELF relocation specifiers in data directives: S_PLT = 0x400, S_GOTPCREL, + S_FUNCINIT, // Mach-O @ relocation specifiers: S_MACHO_GOT, diff --git a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp index dd6fa16..d71f728 100644 --- a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp +++ b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp @@ -130,6 +130,12 @@ SMECallAttrs::SMECallAttrs(const CallBase &CB, const AArch64TargetLowering *TLI) if (auto *CalledFunction = CB.getCalledFunction()) CalledFn = SMEAttrs(*CalledFunction, TLI); + // An `invoke` of an agnostic ZA function may not return normally (it may + // resume in an exception block). In this case, it acts like a private ZA + // callee and may require a ZA save to be set up before it is called. + if (isa<InvokeInst>(CB)) + CalledFn.set(SMEAttrs::ZA_State_Agnostic, /*Enable=*/false); + // FIXME: We probably should not allow SME attributes on direct calls but // clang duplicates streaming mode attributes at each callsite. assert((IsIndirect || diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 80e985d..a2841c11 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -18168,7 +18168,7 @@ Align SITargetLowering::getPrefLoopAlignment(MachineLoop *ML) const { return CacheLineAlign; } -LLVM_ATTRIBUTE_UNUSED +[[maybe_unused]] static bool isCopyFromRegOfInlineAsm(const SDNode *N) { assert(N->getOpcode() == ISD::CopyFromReg); do { diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index a44a247..d516330 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -865,22 +865,16 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, return; } - if (DestReg == AMDGPU::VCC_LO) { - if (AMDGPU::SReg_32RegClass.contains(SrcReg)) { - BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), AMDGPU::VCC_LO) - .addReg(SrcReg, getKillRegState(KillSrc)); - } else { + if (!AMDGPU::SReg_32RegClass.contains(SrcReg)) { + if (DestReg == AMDGPU::VCC_LO) { // FIXME: Hack until VReg_1 removed. assert(AMDGPU::VGPR_32RegClass.contains(SrcReg)); BuildMI(MBB, MI, DL, get(AMDGPU::V_CMP_NE_U32_e32)) - .addImm(0) - .addReg(SrcReg, getKillRegState(KillSrc)); + .addImm(0) + .addReg(SrcReg, getKillRegState(KillSrc)); + return; } - return; - } - - if (!AMDGPU::SReg_32RegClass.contains(SrcReg)) { reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc); return; } @@ -898,22 +892,16 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, return; } - if (DestReg == AMDGPU::VCC) { - if (AMDGPU::SReg_64_EncodableRegClass.contains(SrcReg)) { - BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), AMDGPU::VCC) - .addReg(SrcReg, getKillRegState(KillSrc)); - } else { + if (!AMDGPU::SReg_64_EncodableRegClass.contains(SrcReg)) { + if (DestReg == AMDGPU::VCC) { // FIXME: Hack until VReg_1 removed. assert(AMDGPU::VGPR_32RegClass.contains(SrcReg)); BuildMI(MBB, MI, DL, get(AMDGPU::V_CMP_NE_U32_e32)) - .addImm(0) - .addReg(SrcReg, getKillRegState(KillSrc)); + .addImm(0) + .addReg(SrcReg, getKillRegState(KillSrc)); + return; } - return; - } - - if (!AMDGPU::SReg_64_EncodableRegClass.contains(SrcReg)) { reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc); return; } diff --git a/llvm/lib/Target/CSKY/Disassembler/CSKYDisassembler.cpp b/llvm/lib/Target/CSKY/Disassembler/CSKYDisassembler.cpp index 39e651d..8945ec3 100644 --- a/llvm/lib/Target/CSKY/Disassembler/CSKYDisassembler.cpp +++ b/llvm/lib/Target/CSKY/Disassembler/CSKYDisassembler.cpp @@ -166,7 +166,7 @@ static DecodeStatus DecodeFPR64RegisterClass(MCInst &Inst, uint64_t RegNo, } // TODO -LLVM_ATTRIBUTE_UNUSED +[[maybe_unused]] static DecodeStatus DecodesFPR128RegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, const MCDisassembler *Decoder) { diff --git a/llvm/lib/Target/DirectX/DXContainerGlobals.cpp b/llvm/lib/Target/DirectX/DXContainerGlobals.cpp index ca81d30..8ace2d2 100644 --- a/llvm/lib/Target/DirectX/DXContainerGlobals.cpp +++ b/llvm/lib/Target/DirectX/DXContainerGlobals.cpp @@ -28,6 +28,7 @@ #include "llvm/Support/MD5.h" #include "llvm/TargetParser/Triple.h" #include "llvm/Transforms/Utils/ModuleUtils.h" +#include <cstdint> #include <optional> using namespace llvm; @@ -193,7 +194,12 @@ void DXContainerGlobals::addResourcesForPSV(Module &M, PSVRuntimeInfo &PSV) { dxbc::PSV::v2::ResourceBindInfo BindInfo; BindInfo.Type = Type; BindInfo.LowerBound = Binding.LowerBound; - BindInfo.UpperBound = Binding.LowerBound + Binding.Size - 1; + assert(Binding.Size == UINT32_MAX || + (uint64_t)Binding.LowerBound + Binding.Size - 1 <= UINT32_MAX && + "Resource range is too large"); + BindInfo.UpperBound = (Binding.Size == UINT32_MAX) + ? UINT32_MAX + : Binding.LowerBound + Binding.Size - 1; BindInfo.Space = Binding.Space; BindInfo.Kind = static_cast<dxbc::PSV::ResourceKind>(Kind); BindInfo.Flags = Flags; diff --git a/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp b/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp index 974f653..5f180d6 100644 --- a/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp +++ b/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp @@ -667,11 +667,10 @@ static DecodeStatus DecodeHvxWRRegisterClass(MCInst &Inst, unsigned RegNo, return DecodeRegisterClass(Inst, RegNo, HvxWRDecoderTable); } -LLVM_ATTRIBUTE_UNUSED // Suppress warning temporarily. - static DecodeStatus - DecodeHvxVQRRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t /*Address*/, - const MCDisassembler *Decoder) { +[[maybe_unused]] // Suppress warning temporarily. +static DecodeStatus DecodeHvxVQRRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t /*Address*/, + const MCDisassembler *Decoder) { static const MCPhysReg HvxVQRDecoderTable[] = { Hexagon::VQ0, Hexagon::VQ1, Hexagon::VQ2, Hexagon::VQ3, Hexagon::VQ4, Hexagon::VQ5, Hexagon::VQ6, Hexagon::VQ7}; diff --git a/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp b/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp index 5dde47a..a3296e0 100644 --- a/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp +++ b/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp @@ -419,8 +419,8 @@ namespace { using HCE = HexagonConstExtenders; - LLVM_ATTRIBUTE_UNUSED - raw_ostream &operator<< (raw_ostream &OS, const OffsetRange &OR) { + [[maybe_unused]] + raw_ostream &operator<<(raw_ostream &OS, const OffsetRange &OR) { if (OR.Min > OR.Max) OS << '!'; OS << '[' << OR.Min << ',' << OR.Max << "]a" << unsigned(OR.Align) @@ -435,8 +435,8 @@ namespace { const HexagonRegisterInfo &HRI; }; - LLVM_ATTRIBUTE_UNUSED - raw_ostream &operator<< (raw_ostream &OS, const PrintRegister &P) { + [[maybe_unused]] + raw_ostream &operator<<(raw_ostream &OS, const PrintRegister &P) { if (P.Rs.Reg != 0) OS << printReg(P.Rs.Reg, &P.HRI, P.Rs.Sub); else @@ -451,8 +451,8 @@ namespace { const HexagonRegisterInfo &HRI; }; - LLVM_ATTRIBUTE_UNUSED - raw_ostream &operator<< (raw_ostream &OS, const PrintExpr &P) { + [[maybe_unused]] + raw_ostream &operator<<(raw_ostream &OS, const PrintExpr &P) { OS << "## " << (P.Ex.Neg ? "- " : "+ "); if (P.Ex.Rs.Reg != 0) OS << printReg(P.Ex.Rs.Reg, &P.HRI, P.Ex.Rs.Sub); @@ -469,15 +469,15 @@ namespace { const HexagonRegisterInfo &HRI; }; - LLVM_ATTRIBUTE_UNUSED - raw_ostream &operator<< (raw_ostream &OS, const PrintInit &P) { + [[maybe_unused]] + raw_ostream &operator<<(raw_ostream &OS, const PrintInit &P) { OS << '[' << P.ExtI.first << ", " << PrintExpr(P.ExtI.second, P.HRI) << ']'; return OS; } - LLVM_ATTRIBUTE_UNUSED - raw_ostream &operator<< (raw_ostream &OS, const HCE::ExtDesc &ED) { + [[maybe_unused]] + raw_ostream &operator<<(raw_ostream &OS, const HCE::ExtDesc &ED) { assert(ED.OpNum != -1u); const MachineBasicBlock &MBB = *ED.getOp().getParent()->getParent(); const MachineFunction &MF = *MBB.getParent(); @@ -493,8 +493,8 @@ namespace { return OS; } - LLVM_ATTRIBUTE_UNUSED - raw_ostream &operator<< (raw_ostream &OS, const HCE::ExtRoot &ER) { + [[maybe_unused]] + raw_ostream &operator<<(raw_ostream &OS, const HCE::ExtRoot &ER) { switch (ER.Kind) { case MachineOperand::MO_Immediate: OS << "imm:" << ER.V.ImmVal; @@ -527,8 +527,8 @@ namespace { return OS; } - LLVM_ATTRIBUTE_UNUSED - raw_ostream &operator<< (raw_ostream &OS, const HCE::ExtValue &EV) { + [[maybe_unused]] + raw_ostream &operator<<(raw_ostream &OS, const HCE::ExtValue &EV) { OS << HCE::ExtRoot(EV) << " off:" << EV.Offset; return OS; } @@ -540,8 +540,8 @@ namespace { const HexagonRegisterInfo &HRI; }; - LLVM_ATTRIBUTE_UNUSED - raw_ostream &operator<< (raw_ostream &OS, const PrintIMap &P) { + [[maybe_unused]] + raw_ostream &operator<<(raw_ostream &OS, const PrintIMap &P) { OS << "{\n"; for (const std::pair<const HCE::ExtenderInit, HCE::IndexList> &Q : P.IMap) { OS << " " << PrintInit(Q.first, P.HRI) << " -> {"; diff --git a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp index 4d96cfa..c7a4f68 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp @@ -789,7 +789,7 @@ struct ShuffleMask { } }; -LLVM_ATTRIBUTE_UNUSED +[[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS, const ShuffleMask &SM) { SM.print(OS); return OS; diff --git a/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp b/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp index 87d052b..e4c0a16 100644 --- a/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp +++ b/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp @@ -364,7 +364,7 @@ private: const HexagonVectorCombine &HVC; }; -LLVM_ATTRIBUTE_UNUSED +[[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS, const AlignVectors::AddrInfo &AI) { OS << "Inst: " << AI.Inst << " " << *AI.Inst << '\n'; OS << "Addr: " << *AI.Addr << '\n'; @@ -375,7 +375,7 @@ raw_ostream &operator<<(raw_ostream &OS, const AlignVectors::AddrInfo &AI) { return OS; } -LLVM_ATTRIBUTE_UNUSED +[[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS, const AlignVectors::MoveGroup &MG) { OS << "IsLoad:" << (MG.IsLoad ? "yes" : "no"); OS << ", IsHvx:" << (MG.IsHvx ? "yes" : "no") << '\n'; @@ -394,7 +394,7 @@ raw_ostream &operator<<(raw_ostream &OS, const AlignVectors::MoveGroup &MG) { return OS; } -LLVM_ATTRIBUTE_UNUSED +[[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS, const AlignVectors::ByteSpan::Block &B) { OS << " @" << B.Pos << " [" << B.Seg.Start << ',' << B.Seg.Size << "] "; @@ -408,7 +408,7 @@ raw_ostream &operator<<(raw_ostream &OS, return OS; } -LLVM_ATTRIBUTE_UNUSED +[[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS, const AlignVectors::ByteSpan &BS) { OS << "ByteSpan[size=" << BS.size() << ", extent=" << BS.extent() << '\n'; for (const AlignVectors::ByteSpan::Block &B : BS) diff --git a/llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp b/llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp index fa8ae60..2ff5843 100644 --- a/llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp +++ b/llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp @@ -111,7 +111,7 @@ namespace { friend raw_ostream &operator<< (raw_ostream &OS, const DepChain &D); }; - LLVM_ATTRIBUTE_UNUSED + [[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS, const DepChain &D) { const ChainOfDependences &CD = D.Chain; int ChainSize = CD.size(); @@ -144,7 +144,7 @@ namespace { bool isDefined() { return Inst2Replace != nullptr; } }; - LLVM_ATTRIBUTE_UNUSED + [[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS, const ReuseValue &RU) { OS << "** ReuseValue ***\n"; OS << "Instruction to Replace: " << *(RU.Inst2Replace) << "\n"; diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h index ca98269..e3094b4 100644 --- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h +++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h @@ -275,7 +275,7 @@ namespace HexagonII { INST_ICLASS_ALU32_3 = 0xf0000000 }; - LLVM_ATTRIBUTE_UNUSED + [[maybe_unused]] static unsigned getMemAccessSizeInBytes(MemAccessSize S) { switch (S) { case ByteAccess: return 1; diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp index 96ad5c6..0a8838c 100644 --- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp +++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp @@ -156,13 +156,13 @@ FunctionPass *llvm::createRISCVVLOptimizerPass() { return new RISCVVLOptimizer(); } -LLVM_ATTRIBUTE_UNUSED +[[maybe_unused]] static raw_ostream &operator<<(raw_ostream &OS, const OperandInfo &OI) { OI.print(OS); return OS; } -LLVM_ATTRIBUTE_UNUSED +[[maybe_unused]] static raw_ostream &operator<<(raw_ostream &OS, const std::optional<OperandInfo> &OI) { if (OI) diff --git a/llvm/lib/Target/SystemZ/SystemZ.h b/llvm/lib/Target/SystemZ/SystemZ.h index a0cf881..5a06ea3 100644 --- a/llvm/lib/Target/SystemZ/SystemZ.h +++ b/llvm/lib/Target/SystemZ/SystemZ.h @@ -24,6 +24,7 @@ class SystemZTargetMachine; namespace SystemZ { // Condition-code mask values. +const unsigned CCMASK_NONE = 0; const unsigned CCMASK_0 = 1 << 3; const unsigned CCMASK_1 = 1 << 2; const unsigned CCMASK_2 = 1 << 1; diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index 3b7d11a..de28faf 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -15,6 +15,7 @@ #include "SystemZConstantPoolValue.h" #include "SystemZMachineFunctionInfo.h" #include "SystemZTargetMachine.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -24,6 +25,7 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/IntrinsicsS390.h" +#include "llvm/IR/PatternMatch.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/KnownBits.h" @@ -1514,6 +1516,9 @@ SystemZTargetLowering::getConstraintType(StringRef Constraint) const { default: break; } + } else if (Constraint.size() == 5 && Constraint.starts_with("{")) { + if (StringRef("{@cc}").compare(Constraint) == 0) + return C_Other; } return TargetLowering::getConstraintType(Constraint); } @@ -1707,6 +1712,10 @@ SystemZTargetLowering::getRegForInlineAsmConstraint( return parseRegisterNumber(Constraint, &SystemZ::VR128BitRegClass, SystemZMC::VR128Regs, 32); } + if (Constraint[1] == '@') { + if (StringRef("{@cc}").compare(Constraint) == 0) + return std::make_pair(0u, &SystemZ::GR32BitRegClass); + } } return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); } @@ -1737,6 +1746,38 @@ Register SystemZTargetLowering::getExceptionSelectorRegister( return Subtarget.isTargetXPLINK64() ? SystemZ::R2D : SystemZ::R7D; } +// Convert condition code in CCReg to an i32 value. +static SDValue getCCResult(SelectionDAG &DAG, SDValue CCReg) { + SDLoc DL(CCReg); + SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, CCReg); + return DAG.getNode(ISD::SRL, DL, MVT::i32, IPM, + DAG.getConstant(SystemZ::IPM_CC, DL, MVT::i32)); +} + +// Lower @cc targets via setcc. +SDValue SystemZTargetLowering::LowerAsmOutputForConstraint( + SDValue &Chain, SDValue &Glue, const SDLoc &DL, + const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const { + if (StringRef("{@cc}").compare(OpInfo.ConstraintCode) != 0) + return SDValue(); + + // Check that return type is valid. + if (OpInfo.ConstraintVT.isVector() || !OpInfo.ConstraintVT.isInteger() || + OpInfo.ConstraintVT.getSizeInBits() < 8) + report_fatal_error("Glue output operand is of invalid type"); + + MachineFunction &MF = DAG.getMachineFunction(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + MRI.addLiveIn(SystemZ::CC); + + if (Glue.getNode()) { + Glue = DAG.getCopyFromReg(Chain, DL, SystemZ::CC, MVT::i32, Glue); + Chain = Glue.getValue(1); + } else + Glue = DAG.getCopyFromReg(Chain, DL, SystemZ::CC, MVT::i32); + return getCCResult(DAG, Glue); +} + void SystemZTargetLowering::LowerAsmOperandForConstraint( SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops, SelectionDAG &DAG) const { @@ -5300,14 +5341,6 @@ SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op, Node->getMemoryVT(), Node->getMemOperand()); } -// Convert condition code in CCReg to an i32 value. -static SDValue getCCResult(SelectionDAG &DAG, SDValue CCReg) { - SDLoc DL(CCReg); - SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, CCReg); - return DAG.getNode(ISD::SRL, DL, MVT::i32, IPM, - DAG.getConstant(SystemZ::IPM_CC, DL, MVT::i32)); -} - SDValue SystemZTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const { @@ -8723,95 +8756,247 @@ SDValue SystemZTargetLowering::combineSETCC( return SDValue(); } -static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask) { +static std::pair<SDValue, int> findCCUse(const SDValue &Val) { + switch (Val.getOpcode()) { + default: + return std::make_pair(SDValue(), SystemZ::CCMASK_NONE); + case SystemZISD::IPM: + if (Val.getOperand(0).getOpcode() == SystemZISD::CLC || + Val.getOperand(0).getOpcode() == SystemZISD::STRCMP) + return std::make_pair(Val.getOperand(0), SystemZ::CCMASK_ICMP); + return std::make_pair(Val.getOperand(0), SystemZ::CCMASK_ANY); + case SystemZISD::SELECT_CCMASK: { + SDValue Op4CCReg = Val.getOperand(4); + if (Op4CCReg.getOpcode() == SystemZISD::ICMP || + Op4CCReg.getOpcode() == SystemZISD::TM) { + auto [OpCC, OpCCValid] = findCCUse(Op4CCReg.getOperand(0)); + if (OpCC != SDValue()) + return std::make_pair(OpCC, OpCCValid); + } + auto *CCValid = dyn_cast<ConstantSDNode>(Val.getOperand(2)); + if (!CCValid) + return std::make_pair(SDValue(), SystemZ::CCMASK_NONE); + int CCValidVal = CCValid->getZExtValue(); + return std::make_pair(Op4CCReg, CCValidVal); + } + case ISD::ADD: + case ISD::AND: + case ISD::OR: + case ISD::XOR: + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: + auto [Op0CC, Op0CCValid] = findCCUse(Val.getOperand(0)); + if (Op0CC != SDValue()) + return std::make_pair(Op0CC, Op0CCValid); + return findCCUse(Val.getOperand(1)); + } +} + +static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask, + SelectionDAG &DAG); + +SmallVector<SDValue, 4> static simplifyAssumingCCVal(SDValue &Val, SDValue &CC, + SelectionDAG &DAG) { + SDLoc DL(Val); + auto Opcode = Val.getOpcode(); + switch (Opcode) { + default: + return {}; + case ISD::Constant: + return {Val, Val, Val, Val}; + case SystemZISD::IPM: { + SDValue IPMOp0 = Val.getOperand(0); + if (IPMOp0 != CC) + return {}; + SmallVector<SDValue, 4> ShiftedCCVals; + for (auto CC : {0, 1, 2, 3}) + ShiftedCCVals.emplace_back( + DAG.getConstant((CC << SystemZ::IPM_CC), DL, MVT::i32)); + return ShiftedCCVals; + } + case SystemZISD::SELECT_CCMASK: { + SDValue TrueVal = Val.getOperand(0), FalseVal = Val.getOperand(1); + auto *CCValid = dyn_cast<ConstantSDNode>(Val.getOperand(2)); + auto *CCMask = dyn_cast<ConstantSDNode>(Val.getOperand(3)); + if (!CCValid || !CCMask) + return {}; + + int CCValidVal = CCValid->getZExtValue(); + int CCMaskVal = CCMask->getZExtValue(); + const auto &&TrueSDVals = simplifyAssumingCCVal(TrueVal, CC, DAG); + const auto &&FalseSDVals = simplifyAssumingCCVal(FalseVal, CC, DAG); + if (TrueSDVals.empty() || FalseSDVals.empty()) + return {}; + SDValue Op4CCReg = Val.getOperand(4); + if (Op4CCReg != CC) + combineCCMask(Op4CCReg, CCValidVal, CCMaskVal, DAG); + if (Op4CCReg != CC) + return {}; + SmallVector<SDValue, 4> MergedSDVals; + for (auto &CCVal : {0, 1, 2, 3}) + MergedSDVals.emplace_back(((CCMaskVal & (1 << (3 - CCVal))) != 0) + ? TrueSDVals[CCVal] + : FalseSDVals[CCVal]); + return MergedSDVals; + } + case ISD::ADD: + case ISD::AND: + case ISD::OR: + case ISD::XOR: + case ISD::SRA: + // Avoid introducing CC spills (because ADD/AND/OR/XOR/SRA + // would clobber CC). + if (!Val.hasOneUse()) + return {}; + [[fallthrough]]; + case ISD::SHL: + case ISD::SRL: + SDValue Op0 = Val.getOperand(0), Op1 = Val.getOperand(1); + const auto &&Op0SDVals = simplifyAssumingCCVal(Op0, CC, DAG); + const auto &&Op1SDVals = simplifyAssumingCCVal(Op1, CC, DAG); + if (Op0SDVals.empty() || Op1SDVals.empty()) + return {}; + SmallVector<SDValue, 4> BinaryOpSDVals; + for (auto CCVal : {0, 1, 2, 3}) + BinaryOpSDVals.emplace_back(DAG.getNode( + Opcode, DL, Val.getValueType(), Op0SDVals[CCVal], Op1SDVals[CCVal])); + return BinaryOpSDVals; + } +} + +static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask, + SelectionDAG &DAG) { // We have a SELECT_CCMASK or BR_CCMASK comparing the condition code // set by the CCReg instruction using the CCValid / CCMask masks, - // If the CCReg instruction is itself a ICMP testing the condition + // If the CCReg instruction is itself a ICMP / TM testing the condition // code set by some other instruction, see whether we can directly // use that condition code. - - // Verify that we have an ICMP against some constant. - if (CCValid != SystemZ::CCMASK_ICMP) - return false; - auto *ICmp = CCReg.getNode(); - if (ICmp->getOpcode() != SystemZISD::ICMP) - return false; - auto *CompareLHS = ICmp->getOperand(0).getNode(); - auto *CompareRHS = dyn_cast<ConstantSDNode>(ICmp->getOperand(1)); - if (!CompareRHS) + auto *CCNode = CCReg.getNode(); + if (!CCNode) return false; - // Optimize the case where CompareLHS is a SELECT_CCMASK. - if (CompareLHS->getOpcode() == SystemZISD::SELECT_CCMASK) { - // Verify that we have an appropriate mask for a EQ or NE comparison. - bool Invert = false; - if (CCMask == SystemZ::CCMASK_CMP_NE) - Invert = !Invert; - else if (CCMask != SystemZ::CCMASK_CMP_EQ) + if (CCNode->getOpcode() == SystemZISD::TM) { + if (CCValid != SystemZ::CCMASK_TM) return false; - - // Verify that the ICMP compares against one of select values. - auto *TrueVal = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(0)); - if (!TrueVal) - return false; - auto *FalseVal = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(1)); - if (!FalseVal) + auto emulateTMCCMask = [](const SDValue &Op0Val, const SDValue &Op1Val) { + auto *Op0Node = dyn_cast<ConstantSDNode>(Op0Val.getNode()); + auto *Op1Node = dyn_cast<ConstantSDNode>(Op1Val.getNode()); + if (!Op0Node || !Op1Node) + return -1; + auto Op0APVal = Op0Node->getAPIntValue(); + auto Op1APVal = Op1Node->getAPIntValue(); + auto Result = Op0APVal & Op1APVal; + bool AllOnes = Result == Op1APVal; + bool AllZeros = Result == 0; + bool IsLeftMostBitSet = Result[Op1APVal.getActiveBits()] != 0; + return AllZeros ? 0 : AllOnes ? 3 : IsLeftMostBitSet ? 2 : 1; + }; + SDValue Op0 = CCNode->getOperand(0); + SDValue Op1 = CCNode->getOperand(1); + auto [Op0CC, Op0CCValid] = findCCUse(Op0); + if (Op0CC == SDValue()) return false; - if (CompareRHS->getAPIntValue() == FalseVal->getAPIntValue()) - Invert = !Invert; - else if (CompareRHS->getAPIntValue() != TrueVal->getAPIntValue()) + const auto &&Op0SDVals = simplifyAssumingCCVal(Op0, Op0CC, DAG); + const auto &&Op1SDVals = simplifyAssumingCCVal(Op1, Op0CC, DAG); + if (Op0SDVals.empty() || Op1SDVals.empty()) return false; - - // Compute the effective CC mask for the new branch or select. - auto *NewCCValid = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(2)); - auto *NewCCMask = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(3)); - if (!NewCCValid || !NewCCMask) - return false; - CCValid = NewCCValid->getZExtValue(); - CCMask = NewCCMask->getZExtValue(); - if (Invert) - CCMask ^= CCValid; - - // Return the updated CCReg link. - CCReg = CompareLHS->getOperand(4); + int NewCCMask = 0; + for (auto CC : {0, 1, 2, 3}) { + auto CCVal = emulateTMCCMask(Op0SDVals[CC], Op1SDVals[CC]); + if (CCVal < 0) + return false; + NewCCMask <<= 1; + NewCCMask |= (CCMask & (1 << (3 - CCVal))) != 0; + } + NewCCMask &= Op0CCValid; + CCReg = Op0CC; + CCMask = NewCCMask; + CCValid = Op0CCValid; return true; } + if (CCNode->getOpcode() != SystemZISD::ICMP || + CCValid != SystemZ::CCMASK_ICMP) + return false; - // Optimize the case where CompareRHS is (SRA (SHL (IPM))). - if (CompareLHS->getOpcode() == ISD::SRA) { - auto *SRACount = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(1)); - if (!SRACount || SRACount->getZExtValue() != 30) - return false; - auto *SHL = CompareLHS->getOperand(0).getNode(); - if (SHL->getOpcode() != ISD::SHL) - return false; - auto *SHLCount = dyn_cast<ConstantSDNode>(SHL->getOperand(1)); - if (!SHLCount || SHLCount->getZExtValue() != 30 - SystemZ::IPM_CC) - return false; - auto *IPM = SHL->getOperand(0).getNode(); - if (IPM->getOpcode() != SystemZISD::IPM) - return false; - - // Avoid introducing CC spills (because SRA would clobber CC). - if (!CompareLHS->hasOneUse()) - return false; - // Verify that the ICMP compares against zero. - if (CompareRHS->getZExtValue() != 0) + SDValue CmpOp0 = CCNode->getOperand(0); + SDValue CmpOp1 = CCNode->getOperand(1); + SDValue CmpOp2 = CCNode->getOperand(2); + auto [Op0CC, Op0CCValid] = findCCUse(CmpOp0); + if (Op0CC != SDValue()) { + const auto &&Op0SDVals = simplifyAssumingCCVal(CmpOp0, Op0CC, DAG); + const auto &&Op1SDVals = simplifyAssumingCCVal(CmpOp1, Op0CC, DAG); + if (Op0SDVals.empty() || Op1SDVals.empty()) return false; - // Compute the effective CC mask for the new branch or select. - CCMask = SystemZ::reverseCCMask(CCMask); - - // Return the updated CCReg link. - CCReg = IPM->getOperand(0); + auto *CmpType = dyn_cast<ConstantSDNode>(CmpOp2); + auto CmpTypeVal = CmpType->getZExtValue(); + const auto compareCCSigned = [&CmpTypeVal](const SDValue &Op0Val, + const SDValue &Op1Val) { + auto *Op0Node = dyn_cast<ConstantSDNode>(Op0Val.getNode()); + auto *Op1Node = dyn_cast<ConstantSDNode>(Op1Val.getNode()); + if (!Op0Node || !Op1Node) + return -1; + auto Op0APVal = Op0Node->getAPIntValue(); + auto Op1APVal = Op1Node->getAPIntValue(); + if (CmpTypeVal == SystemZICMP::SignedOnly) + return Op0APVal == Op1APVal ? 0 : Op0APVal.slt(Op1APVal) ? 1 : 2; + return Op0APVal == Op1APVal ? 0 : Op0APVal.ult(Op1APVal) ? 1 : 2; + }; + int NewCCMask = 0; + for (auto CC : {0, 1, 2, 3}) { + auto CCVal = compareCCSigned(Op0SDVals[CC], Op1SDVals[CC]); + if (CCVal < 0) + return false; + NewCCMask <<= 1; + NewCCMask |= (CCMask & (1 << (3 - CCVal))) != 0; + } + NewCCMask &= Op0CCValid; + CCMask = NewCCMask; + CCReg = Op0CC; + CCValid = Op0CCValid; return true; } return false; } -SDValue SystemZTargetLowering::combineBR_CCMASK( - SDNode *N, DAGCombinerInfo &DCI) const { +// Merging versus split in multiple branches cost. +TargetLoweringBase::CondMergingParams +SystemZTargetLowering::getJumpConditionMergingParams(Instruction::BinaryOps Opc, + const Value *Lhs, + const Value *Rhs) const { + const auto isFlagOutOpCC = [](const Value *V) { + using namespace llvm::PatternMatch; + const Value *RHSVal; + const APInt *RHSC; + if (const auto *I = dyn_cast<Instruction>(V)) { + // PatternMatch.h provides concise tree-based pattern match of llvm IR. + if (match(I->getOperand(0), m_And(m_Value(RHSVal), m_APInt(RHSC))) || + match(I, m_Cmp(m_Value(RHSVal), m_APInt(RHSC)))) { + if (const auto *CB = dyn_cast<CallBase>(RHSVal)) { + if (CB->isInlineAsm()) { + const InlineAsm *IA = cast<InlineAsm>(CB->getCalledOperand()); + return IA && + IA->getConstraintString().find("{@cc}") != std::string::npos; + } + } + } + } + return false; + }; + // Pattern (ICmp %asm) or (ICmp (And %asm)). + // Cost of longest dependency chain (ICmp, And) is 2. CostThreshold or + // BaseCost can be set >=2. If cost of instruction <= CostThreshold + // conditionals will be merged or else conditionals will be split. + if (isFlagOutOpCC(Lhs) && isFlagOutOpCC(Rhs)) + return {3, 0, -1}; + // Default. + return {-1, -1, -1}; +} + +SDValue SystemZTargetLowering::combineBR_CCMASK(SDNode *N, + DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; // Combine BR_CCMASK (ICMP (SELECT_CCMASK)) into a single BR_CCMASK. @@ -8824,8 +9009,7 @@ SDValue SystemZTargetLowering::combineBR_CCMASK( int CCMaskVal = CCMask->getZExtValue(); SDValue Chain = N->getOperand(0); SDValue CCReg = N->getOperand(4); - - if (combineCCMask(CCReg, CCValidVal, CCMaskVal)) + if (combineCCMask(CCReg, CCValidVal, CCMaskVal, DAG)) return DAG.getNode(SystemZISD::BR_CCMASK, SDLoc(N), N->getValueType(0), Chain, DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32), @@ -8848,16 +9032,80 @@ SDValue SystemZTargetLowering::combineSELECT_CCMASK( int CCMaskVal = CCMask->getZExtValue(); SDValue CCReg = N->getOperand(4); - if (combineCCMask(CCReg, CCValidVal, CCMaskVal)) - return DAG.getNode(SystemZISD::SELECT_CCMASK, SDLoc(N), N->getValueType(0), - N->getOperand(0), N->getOperand(1), - DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32), - DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32), - CCReg); + bool IsCombinedCCReg = combineCCMask(CCReg, CCValidVal, CCMaskVal, DAG); + + // Populate SDVals vector for each condition code ccval for given Val, which + // can again be another nested select_ccmask with the same CC. + const auto constructCCSDValsFromSELECT = [&CCReg](SDValue &Val) { + if (Val.getOpcode() == SystemZISD::SELECT_CCMASK) { + SmallVector<SDValue, 4> Res; + if (Val.getOperand(4) != CCReg) + return SmallVector<SDValue, 4>{}; + SDValue TrueVal = Val.getOperand(0), FalseVal = Val.getOperand(1); + auto *CCMask = dyn_cast<ConstantSDNode>(Val.getOperand(3)); + if (!CCMask) + return SmallVector<SDValue, 4>{}; + + int CCMaskVal = CCMask->getZExtValue(); + for (auto &CC : {0, 1, 2, 3}) + Res.emplace_back(((CCMaskVal & (1 << (3 - CC))) != 0) ? TrueVal + : FalseVal); + return Res; + } + return SmallVector<SDValue, 4>{Val, Val, Val, Val}; + }; + // Attempting to optimize TrueVal/FalseVal in outermost select_ccmask either + // with CCReg found by combineCCMask or original CCReg. + SDValue TrueVal = N->getOperand(0); + SDValue FalseVal = N->getOperand(1); + auto &&TrueSDVals = simplifyAssumingCCVal(TrueVal, CCReg, DAG); + auto &&FalseSDVals = simplifyAssumingCCVal(FalseVal, CCReg, DAG); + // TrueSDVals/FalseSDVals might be empty in case of non-constant + // TrueVal/FalseVal for select_ccmask, which can not be optimized further. + if (TrueSDVals.empty()) + TrueSDVals = constructCCSDValsFromSELECT(TrueVal); + if (FalseSDVals.empty()) + FalseSDVals = constructCCSDValsFromSELECT(FalseVal); + if (!TrueSDVals.empty() && !FalseSDVals.empty()) { + SmallSet<SDValue, 4> MergedSDValsSet; + // Ignoring CC values outside CCValiid. + for (auto CC : {0, 1, 2, 3}) { + if ((CCValidVal & ((1 << (3 - CC)))) != 0) + MergedSDValsSet.insert(((CCMaskVal & (1 << (3 - CC))) != 0) + ? TrueSDVals[CC] + : FalseSDVals[CC]); + } + if (MergedSDValsSet.size() == 1) + return *MergedSDValsSet.begin(); + if (MergedSDValsSet.size() == 2) { + auto BeginIt = MergedSDValsSet.begin(); + SDValue NewTrueVal = *BeginIt, NewFalseVal = *next(BeginIt); + if (NewTrueVal == FalseVal || NewFalseVal == TrueVal) + std::swap(NewTrueVal, NewFalseVal); + int NewCCMask = 0; + for (auto CC : {0, 1, 2, 3}) { + NewCCMask <<= 1; + NewCCMask |= ((CCMaskVal & (1 << (3 - CC))) != 0) + ? (TrueSDVals[CC] == NewTrueVal) + : (FalseSDVals[CC] == NewTrueVal); + } + CCMaskVal = NewCCMask; + CCMaskVal &= CCValidVal; + TrueVal = NewTrueVal; + FalseVal = NewFalseVal; + IsCombinedCCReg = true; + } + } + + if (IsCombinedCCReg) + return DAG.getNode( + SystemZISD::SELECT_CCMASK, SDLoc(N), N->getValueType(0), TrueVal, + FalseVal, DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32), + DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32), CCReg); + return SDValue(); } - SDValue SystemZTargetLowering::combineGET_CCMASK( SDNode *N, DAGCombinerInfo &DCI) const { diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h index f8706b7..d5b7603 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h @@ -533,6 +533,18 @@ public: } const char *getTargetNodeName(unsigned Opcode) const override; + + // This function currently returns cost for srl/ipm/cc sequence for merging. + CondMergingParams + getJumpConditionMergingParams(Instruction::BinaryOps Opc, const Value *Lhs, + const Value *Rhs) const override; + + // Handle Lowering flag assembly outputs. + SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag, + const SDLoc &DL, + const AsmOperandInfo &Constraint, + SelectionDAG &DAG) const override; + std::pair<unsigned, const TargetRegisterClass *> getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index 47c24fc..f973949 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -601,6 +601,29 @@ static MachineBasicBlock *LowerMemcpy(MachineInstr &MI, DebugLoc DL, MachineOperand Src = MI.getOperand(3); MachineOperand Len = MI.getOperand(4); + // If the length is a constant, we don't actually need the check. + if (MachineInstr *Def = MRI.getVRegDef(Len.getReg())) { + if (Def->getOpcode() == WebAssembly::CONST_I32 || + Def->getOpcode() == WebAssembly::CONST_I64) { + if (Def->getOperand(1).getImm() == 0) { + // A zero-length memcpy is a no-op. + MI.eraseFromParent(); + return BB; + } + // A non-zero-length memcpy doesn't need a zero check. + unsigned MemoryCopy = + Int64 ? WebAssembly::MEMORY_COPY_A64 : WebAssembly::MEMORY_COPY_A32; + BuildMI(*BB, MI, DL, TII.get(MemoryCopy)) + .add(DstMem) + .add(SrcMem) + .add(Dst) + .add(Src) + .add(Len); + MI.eraseFromParent(); + return BB; + } + } + // We're going to add an extra use to `Len` to test if it's zero; that // use shouldn't be a kill, even if the original use is. MachineOperand NoKillLen = Len; @@ -669,6 +692,28 @@ static MachineBasicBlock *LowerMemset(MachineInstr &MI, DebugLoc DL, MachineOperand Val = MI.getOperand(2); MachineOperand Len = MI.getOperand(3); + // If the length is a constant, we don't actually need the check. + if (MachineInstr *Def = MRI.getVRegDef(Len.getReg())) { + if (Def->getOpcode() == WebAssembly::CONST_I32 || + Def->getOpcode() == WebAssembly::CONST_I64) { + if (Def->getOperand(1).getImm() == 0) { + // A zero-length memset is a no-op. + MI.eraseFromParent(); + return BB; + } + // A non-zero-length memset doesn't need a zero check. + unsigned MemoryFill = + Int64 ? WebAssembly::MEMORY_FILL_A64 : WebAssembly::MEMORY_FILL_A32; + BuildMI(*BB, MI, DL, TII.get(MemoryFill)) + .add(Mem) + .add(Dst) + .add(Val) + .add(Len); + MI.eraseFromParent(); + return BB; + } + } + // We're going to add an extra use to `Len` to test if it's zero; that // use shouldn't be a kill, even if the original use is. MachineOperand NoKillLen = Len; diff --git a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp index 28fa2cd..b81641f 100644 --- a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp +++ b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp @@ -414,6 +414,8 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI, getActionDefinitionsBuilder(G_SEXT_INREG).lower(); + getActionDefinitionsBuilder(G_IS_FPCLASS).lower(); + // fp constants getActionDefinitionsBuilder(G_FCONSTANT) .legalFor({s32, s64}) diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td index 3af8b3e..2bf016a 100644 --- a/llvm/lib/Target/X86/X86.td +++ b/llvm/lib/Target/X86/X86.td @@ -1335,9 +1335,8 @@ def ProcessorFeatures { !listconcat(ARLFeatures, ARLSAdditionalFeatures); // Pantherlake - list<SubtargetFeature> PTLAdditionalFeatures = [FeaturePREFETCHI]; list<SubtargetFeature> PTLFeatures = - !listremove(!listconcat(ARLSFeatures, PTLAdditionalFeatures), [FeatureWIDEKL]); + !listremove(ARLSFeatures, [FeatureWIDEKL]); // Clearwaterforest diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index eea84a2..c32b1a6 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -3624,6 +3624,16 @@ X86TargetLowering::getJumpConditionMergingParams(Instruction::BinaryOps Opc, match(Lhs, m_SpecificICmp(ICmpInst::ICMP_EQ, m_Value(), m_Value())) && match(Rhs, m_SpecificICmp(ICmpInst::ICMP_EQ, m_Value(), m_Value()))) BaseCost += 1; + + // For OR conditions with EQ comparisons, prefer splitting into branches + // (unless CCMP is available). OR+EQ cannot be optimized via bitwise ops, + // unlike OR+NE which becomes (P|Q)!=0. Similarly, don't split signed + // comparisons (SLT, SGT) that can be optimized. + if (BaseCost >= 0 && !Subtarget.hasCCMP() && Opc == Instruction::Or && + match(Lhs, m_SpecificICmp(ICmpInst::ICMP_EQ, m_Value(), m_Value())) && + match(Rhs, m_SpecificICmp(ICmpInst::ICMP_EQ, m_Value(), m_Value()))) + return {-1, -1, -1}; + return {BaseCost, BrMergingLikelyBias.getValue(), BrMergingUnlikelyBias.getValue()}; } @@ -3787,7 +3797,7 @@ static bool isUndefOrZeroOrInRange(ArrayRef<int> Mask, int Low, int Hi) { /// Return true if every element in Mask, is an in-place blend/select mask or is /// undef. -LLVM_ATTRIBUTE_UNUSED static bool isBlendOrUndef(ArrayRef<int> Mask) { +[[maybe_unused]] static bool isBlendOrUndef(ArrayRef<int> Mask) { unsigned NumElts = Mask.size(); for (auto [I, M] : enumerate(Mask)) if (!isUndefOrEqual(M, I) && !isUndefOrEqual(M, I + NumElts)) @@ -8096,7 +8106,7 @@ static SDValue LowerBUILD_VECTORvXi1(SDValue Op, const SDLoc &dl, return DstVec; } -LLVM_ATTRIBUTE_UNUSED static bool isHorizOp(unsigned Opcode) { +[[maybe_unused]] static bool isHorizOp(unsigned Opcode) { switch (Opcode) { case X86ISD::PACKSS: case X86ISD::PACKUS: @@ -20813,7 +20823,7 @@ SDValue X86TargetLowering::FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, // for DAG type consistency we have to match the FP operand type. APFloat Thresh(APFloat::IEEEsingle(), APInt(32, 0x5f000000)); - LLVM_ATTRIBUTE_UNUSED APFloat::opStatus Status = APFloat::opOK; + [[maybe_unused]] APFloat::opStatus Status = APFloat::opOK; bool LosesInfo = false; if (TheVT == MVT::f64) // The rounding mode is irrelevant as the conversion should be exact. @@ -22856,7 +22866,7 @@ static SDValue combineVectorSizedSetCCEquality(EVT VT, SDValue X, SDValue Y, // be generated by the memcmp expansion pass with oversized integer compares // (see PR33325). bool IsOrXorXorTreeCCZero = isNullConstant(Y) && isOrXorXorTree(X); - if (isNullConstant(Y) && !IsOrXorXorTreeCCZero) + if (isNullConstant(Y) && OpSize == 128 && !IsOrXorXorTreeCCZero) return SDValue(); // Don't perform this combine if constructing the vector will be expensive. diff --git a/llvm/lib/TargetParser/X86TargetParser.cpp b/llvm/lib/TargetParser/X86TargetParser.cpp index edca7c1..1932a3a 100644 --- a/llvm/lib/TargetParser/X86TargetParser.cpp +++ b/llvm/lib/TargetParser/X86TargetParser.cpp @@ -175,7 +175,7 @@ constexpr FeatureBitset FeaturesArrowlakeS = FeaturesArrowlake | FeatureAVXVNNIINT16 | FeatureSHA512 | FeatureSM3 | FeatureSM4; constexpr FeatureBitset FeaturesPantherlake = - (FeaturesArrowlakeS ^ FeatureWIDEKL) | FeaturePREFETCHI; + (FeaturesArrowlakeS ^ FeatureWIDEKL); constexpr FeatureBitset FeaturesClearwaterforest = (FeaturesSierraforest ^ FeatureWIDEKL) | FeatureAVXVNNIINT16 | FeatureSHA512 | FeatureSM3 | FeatureSM4 | FeaturePREFETCHI | FeatureUSERMSR; diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index 6b67b48..09cb225 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -2979,10 +2979,14 @@ Instruction *InstCombinerImpl::foldAndOrOfSelectUsingImpliedCond(Value *Op, "Op must be either i1 or vector of i1."); if (SI.getCondition()->getType() != Op->getType()) return nullptr; - if (Value *V = simplifyNestedSelectsUsingImpliedCond(SI, Op, IsAnd, DL)) - return SelectInst::Create(Op, - IsAnd ? V : ConstantInt::getTrue(Op->getType()), - IsAnd ? ConstantInt::getFalse(Op->getType()) : V); + if (Value *V = simplifyNestedSelectsUsingImpliedCond(SI, Op, IsAnd, DL)) { + Instruction *MDFrom = nullptr; + if (!ProfcheckDisableMetadataFixes) + MDFrom = &SI; + return SelectInst::Create( + Op, IsAnd ? V : ConstantInt::getTrue(Op->getType()), + IsAnd ? ConstantInt::getFalse(Op->getType()) : V, "", nullptr, MDFrom); + } return nullptr; } diff --git a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp index ff5f390..66e45ec 100644 --- a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp +++ b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp @@ -266,8 +266,7 @@ void DFAJumpThreading::unfold(DomTreeUpdater *DTU, LoopInfo *LI, if (!ProfcheckDisableMetadataFixes) BI->setMetadata(LLVMContext::MD_prof, SI->getMetadata(LLVMContext::MD_prof)); - DTU->applyUpdates({{DominatorTree::Insert, StartBlock, EndBlock}, - {DominatorTree::Insert, StartBlock, NewBlock}}); + DTU->applyUpdates({{DominatorTree::Insert, StartBlock, NewBlock}}); } else { BasicBlock *EndBlock = SIUse->getParent(); BasicBlock *NewBlockT = BasicBlock::Create( @@ -1479,10 +1478,13 @@ bool DFAJumpThreading::run(Function &F) { DTU->flush(); #ifdef EXPENSIVE_CHECKS - assert(DTU->getDomTree().verify(DominatorTree::VerificationLevel::Full)); verifyFunction(F, &dbgs()); #endif + if (MadeChanges && VerifyDomInfo) + assert(DTU->getDomTree().verify(DominatorTree::VerificationLevel::Full) && + "Failed to maintain validity of domtree!"); + return MadeChanges; } diff --git a/llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp b/llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp index 213d0f3..1335665 100644 --- a/llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp +++ b/llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp @@ -39,10 +39,11 @@ public: private: AliasAnalysis *AA; }; +} // namespace /// iterativelyFlattenCFG - Call FlattenCFG on all the blocks in the function, /// iterating until no more changes are made. -bool iterativelyFlattenCFG(Function &F, AliasAnalysis *AA) { +static bool iterativelyFlattenCFG(Function &F, AliasAnalysis *AA) { bool Changed = false; bool LocalChange = true; @@ -67,7 +68,6 @@ bool iterativelyFlattenCFG(Function &F, AliasAnalysis *AA) { } return Changed; } -} // namespace char FlattenCFGLegacyPass::ID = 0; diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp index b9d332b..578fec7 100644 --- a/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/llvm/lib/Transforms/Scalar/SROA.cpp @@ -118,9 +118,13 @@ STATISTIC( STATISTIC(NumDeleted, "Number of instructions deleted"); STATISTIC(NumVectorized, "Number of vectorized aggregates"); +namespace llvm { /// Disable running mem2reg during SROA in order to test or debug SROA. static cl::opt<bool> SROASkipMem2Reg("sroa-skip-mem2reg", cl::init(false), cl::Hidden); +extern cl::opt<bool> ProfcheckDisableMetadataFixes; +} // namespace llvm + namespace { class AllocaSliceRewriter; @@ -1777,7 +1781,8 @@ static void speculateSelectInstLoads(SelectInst &SI, LoadInst &LI, } Value *V = IRB.CreateSelect(SI.getCondition(), TL, FL, - LI.getName() + ".sroa.speculated"); + LI.getName() + ".sroa.speculated", + ProfcheckDisableMetadataFixes ? nullptr : &SI); LLVM_DEBUG(dbgs() << " speculated to: " << *V << "\n"); LI.replaceAllUsesWith(V); @@ -2662,7 +2667,9 @@ static Value *insertVector(IRBuilderTy &IRB, Value *Old, Value *V, for (unsigned i = 0; i != cast<FixedVectorType>(VecTy)->getNumElements(); ++i) Mask2.push_back(IRB.getInt1(i >= BeginIndex && i < EndIndex)); - V = IRB.CreateSelect(ConstantVector::get(Mask2), V, Old, Name + "blend"); + // No profiling support for vector selects. + V = IRB.CreateSelectWithUnknownProfile(ConstantVector::get(Mask2), V, Old, + DEBUG_TYPE, Name + "blend"); LLVM_DEBUG(dbgs() << " blend: " << *V << "\n"); return V; @@ -4360,10 +4367,13 @@ private: }; Value *Cond, *True, *False; + Instruction *MDFrom = nullptr; if (auto *SI = dyn_cast<SelectInst>(Sel)) { Cond = SI->getCondition(); True = SI->getTrueValue(); False = SI->getFalseValue(); + if (!ProfcheckDisableMetadataFixes) + MDFrom = SI; } else { Cond = Sel->getOperand(0); True = ConstantInt::get(Sel->getType(), 1); @@ -4383,8 +4393,12 @@ private: IRB.CreateGEP(Ty, FalseOps[0], ArrayRef(FalseOps).drop_front(), False->getName() + ".sroa.gep", NW); - Value *NSel = - IRB.CreateSelect(Cond, NTrue, NFalse, Sel->getName() + ".sroa.sel"); + Value *NSel = MDFrom + ? IRB.CreateSelect(Cond, NTrue, NFalse, + Sel->getName() + ".sroa.sel", MDFrom) + : IRB.CreateSelectWithUnknownProfile( + Cond, NTrue, NFalse, DEBUG_TYPE, + Sel->getName() + ".sroa.sel"); Visited.erase(&GEPI); GEPI.replaceAllUsesWith(NSel); GEPI.eraseFromParent(); diff --git a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp index 2ee91a9..0f3978f 100644 --- a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp +++ b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp @@ -47,6 +47,7 @@ #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/SSAUpdater.h" +#include "llvm/Transforms/Utils/SSAUpdaterBulk.h" #include <cassert> #include <utility> @@ -321,7 +322,7 @@ class StructurizeCFG { void collectInfos(); - void insertConditions(bool Loops); + void insertConditions(bool Loops, SSAUpdaterBulk &PhiInserter); void simplifyConditions(); @@ -671,10 +672,9 @@ void StructurizeCFG::collectInfos() { } /// Insert the missing branch conditions -void StructurizeCFG::insertConditions(bool Loops) { +void StructurizeCFG::insertConditions(bool Loops, SSAUpdaterBulk &PhiInserter) { BranchVector &Conds = Loops ? LoopConds : Conditions; Value *Default = Loops ? BoolTrue : BoolFalse; - SSAUpdater PhiInserter; for (BranchInst *Term : Conds) { assert(Term->isConditional()); @@ -683,8 +683,9 @@ void StructurizeCFG::insertConditions(bool Loops) { BasicBlock *SuccTrue = Term->getSuccessor(0); BasicBlock *SuccFalse = Term->getSuccessor(1); - PhiInserter.Initialize(Boolean, ""); - PhiInserter.AddAvailableValue(Loops ? SuccFalse : Parent, Default); + unsigned Variable = PhiInserter.AddVariable("", Boolean); + PhiInserter.AddAvailableValue(Variable, Loops ? SuccFalse : Parent, + Default); BBPredicates &Preds = Loops ? LoopPreds[SuccFalse] : Predicates[SuccTrue]; @@ -697,7 +698,7 @@ void StructurizeCFG::insertConditions(bool Loops) { ParentInfo = PI; break; } - PhiInserter.AddAvailableValue(BB, PI.Pred); + PhiInserter.AddAvailableValue(Variable, BB, PI.Pred); Dominator.addAndRememberBlock(BB); } @@ -706,9 +707,9 @@ void StructurizeCFG::insertConditions(bool Loops) { CondBranchWeights::setMetadata(*Term, ParentInfo.Weights); } else { if (!Dominator.resultIsRememberedBlock()) - PhiInserter.AddAvailableValue(Dominator.result(), Default); + PhiInserter.AddAvailableValue(Variable, Dominator.result(), Default); - Term->setCondition(PhiInserter.GetValueInMiddleOfBlock(Parent)); + PhiInserter.AddUse(Variable, &Term->getOperandUse(0)); } } } @@ -1414,8 +1415,12 @@ bool StructurizeCFG::run(Region *R, DominatorTree *DT, orderNodes(); collectInfos(); createFlow(); - insertConditions(false); - insertConditions(true); + + SSAUpdaterBulk PhiInserter; + insertConditions(false, PhiInserter); + insertConditions(true, PhiInserter); + PhiInserter.RewriteAndOptimizeAllUses(*DT); + setPhiValues(); simplifyHoistedPhis(); simplifyConditions(); diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 8ca3bed..23f5623 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -24,12 +24,9 @@ #ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_H #define LLVM_TRANSFORMS_VECTORIZE_VPLAN_H -#include "VPlanAnalysis.h" #include "VPlanValue.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Twine.h" #include "llvm/ADT/ilist.h" @@ -41,10 +38,11 @@ #include "llvm/IR/Operator.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/InstructionCost.h" -#include <algorithm> #include <cassert> #include <cstddef> +#include <functional> #include <string> +#include <utility> namespace llvm { @@ -346,13 +344,6 @@ public: /// Return the cost of the block. virtual InstructionCost cost(ElementCount VF, VPCostContext &Ctx) = 0; - /// Return true if it is legal to hoist instructions into this block. - bool isLegalToHoistInto() { - // There are currently no constraints that prevent an instruction to be - // hoisted into a VPBlockBase. - return true; - } - #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void printAsOperand(raw_ostream &OS, bool PrintType = false) const { OS << getName(); |