diff options
author | Marina Taylor <marina_taylor@apple.com> | 2024-11-29 18:28:39 +0000 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-11-29 18:28:39 +0000 |
commit | 8fb748b4a7c45b56c2e4f37c327fd88958ab3758 (patch) | |
tree | 2ae00587b2e8092d2bc69be52a102a729a7730e3 /llvm/lib/Analysis/InlineCost.cpp | |
parent | fe042904829b83a61c1f4bc904f8f9e5b6da891e (diff) | |
download | llvm-8fb748b4a7c45b56c2e4f37c327fd88958ab3758.zip llvm-8fb748b4a7c45b56c2e4f37c327fd88958ab3758.tar.gz llvm-8fb748b4a7c45b56c2e4f37c327fd88958ab3758.tar.bz2 |
[Inliner] Don't count a call penalty for foldable __memcpy_chk and similar (#117876)
When the size is an appropriate constant, __memcpy_chk will turn into a
memcpy that gets folded away by InstCombine. Therefore this patch avoids
counting these as calls for purposes of inlining costs.
This is only really relevant on platforms whose headers redirect memcpy
to __memcpy_chk (such as Darwin). On platforms that use intrinsics,
memcpy and similar functions are already exempt from call penalties.
Diffstat (limited to 'llvm/lib/Analysis/InlineCost.cpp')
-rw-r--r-- | llvm/lib/Analysis/InlineCost.cpp | 86 |
1 files changed, 69 insertions, 17 deletions
diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp index 22bb406c..32acf23 100644 --- a/llvm/lib/Analysis/InlineCost.cpp +++ b/llvm/lib/Analysis/InlineCost.cpp @@ -249,6 +249,9 @@ protected: /// Getter for BlockFrequencyInfo function_ref<BlockFrequencyInfo &(Function &)> GetBFI; + /// Getter for TargetLibraryInfo + function_ref<const TargetLibraryInfo &(Function &)> GetTLI; + /// Profile summary information. ProfileSummaryInfo *PSI; @@ -433,6 +436,7 @@ protected: bool simplifyIntrinsicCallIsConstant(CallBase &CB); bool simplifyIntrinsicCallObjectSize(CallBase &CB); ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V); + bool isLoweredToCall(Function *F, CallBase &Call); /// Return true if the given argument to the function being considered for /// inlining has the given attribute set either at the call site or the @@ -492,13 +496,15 @@ protected: bool visitUnreachableInst(UnreachableInst &I); public: - CallAnalyzer(Function &Callee, CallBase &Call, const TargetTransformInfo &TTI, - function_ref<AssumptionCache &(Function &)> GetAssumptionCache, - function_ref<BlockFrequencyInfo &(Function &)> GetBFI = nullptr, - ProfileSummaryInfo *PSI = nullptr, - OptimizationRemarkEmitter *ORE = nullptr) + CallAnalyzer( + Function &Callee, CallBase &Call, const TargetTransformInfo &TTI, + function_ref<AssumptionCache &(Function &)> GetAssumptionCache, + function_ref<BlockFrequencyInfo &(Function &)> GetBFI = nullptr, + function_ref<const TargetLibraryInfo &(Function &)> GetTLI = nullptr, + ProfileSummaryInfo *PSI = nullptr, + OptimizationRemarkEmitter *ORE = nullptr) : TTI(TTI), GetAssumptionCache(GetAssumptionCache), GetBFI(GetBFI), - PSI(PSI), F(Callee), DL(F.getDataLayout()), ORE(ORE), + GetTLI(GetTLI), PSI(PSI), F(Callee), DL(F.getDataLayout()), ORE(ORE), CandidateCall(Call) {} InlineResult analyze(); @@ -688,7 +694,8 @@ class InlineCostCallAnalyzer final : public CallAnalyzer { /// FIXME: if InlineCostCallAnalyzer is derived from, this may need /// to instantiate the derived class. InlineCostCallAnalyzer CA(*F, Call, IndirectCallParams, TTI, - GetAssumptionCache, GetBFI, PSI, ORE, false); + GetAssumptionCache, GetBFI, GetTLI, PSI, ORE, + false); if (CA.analyze().isSuccess()) { // We were able to inline the indirect call! Subtract the cost from the // threshold to get the bonus we want to apply, but don't go below zero. @@ -1106,10 +1113,12 @@ public: const TargetTransformInfo &TTI, function_ref<AssumptionCache &(Function &)> GetAssumptionCache, function_ref<BlockFrequencyInfo &(Function &)> GetBFI = nullptr, + function_ref<const TargetLibraryInfo &(Function &)> GetTLI = nullptr, ProfileSummaryInfo *PSI = nullptr, OptimizationRemarkEmitter *ORE = nullptr, bool BoostIndirect = true, bool IgnoreThreshold = false) - : CallAnalyzer(Callee, Call, TTI, GetAssumptionCache, GetBFI, PSI, ORE), + : CallAnalyzer(Callee, Call, TTI, GetAssumptionCache, GetBFI, GetTLI, PSI, + ORE), ComputeFullInlineCost(OptComputeFullInlineCost || Params.ComputeFullInlineCost || ORE || isCostBenefitAnalysisEnabled()), @@ -1228,8 +1237,8 @@ private: InlineConstants::IndirectCallThreshold; InlineCostCallAnalyzer CA(*F, Call, IndirectCallParams, TTI, - GetAssumptionCache, GetBFI, PSI, ORE, false, - true); + GetAssumptionCache, GetBFI, GetTLI, PSI, ORE, + false, true); if (CA.analyze().isSuccess()) { increment(InlineCostFeatureIndex::nested_inline_cost_estimate, CA.getCost()); @@ -1355,9 +1364,11 @@ public: const TargetTransformInfo &TTI, function_ref<AssumptionCache &(Function &)> &GetAssumptionCache, function_ref<BlockFrequencyInfo &(Function &)> GetBFI, + function_ref<const TargetLibraryInfo &(Function &)> GetTLI, ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE, Function &Callee, CallBase &Call) - : CallAnalyzer(Callee, Call, TTI, GetAssumptionCache, GetBFI, PSI) {} + : CallAnalyzer(Callee, Call, TTI, GetAssumptionCache, GetBFI, GetTLI, + PSI) {} const InlineCostFeatures &features() const { return Cost; } }; @@ -2260,6 +2271,44 @@ bool CallAnalyzer::simplifyCallSite(Function *F, CallBase &Call) { return false; } +bool CallAnalyzer::isLoweredToCall(Function *F, CallBase &Call) { + const TargetLibraryInfo *TLI = GetTLI ? &GetTLI(*F) : nullptr; + LibFunc LF; + if (!TLI || !TLI->getLibFunc(*F, LF) || !TLI->has(LF)) + return TTI.isLoweredToCall(F); + + switch (LF) { + case LibFunc_memcpy_chk: + case LibFunc_memmove_chk: + case LibFunc_mempcpy_chk: + case LibFunc_memset_chk: { + // Calls to __memcpy_chk whose length is known to fit within the object + // size will eventually be replaced by inline stores. Therefore, these + // should not incur a call penalty. This is only really relevant on + // platforms whose headers redirect memcpy to __memcpy_chk (e.g. Darwin), as + // other platforms use memcpy intrinsics, which are already exempt from the + // call penalty. + auto *LenOp = dyn_cast<ConstantInt>(Call.getOperand(2)); + if (!LenOp) + LenOp = dyn_cast_or_null<ConstantInt>( + SimplifiedValues.lookup(Call.getOperand(2))); + auto *ObjSizeOp = dyn_cast<ConstantInt>(Call.getOperand(3)); + if (!ObjSizeOp) + ObjSizeOp = dyn_cast_or_null<ConstantInt>( + SimplifiedValues.lookup(Call.getOperand(3))); + if (LenOp && ObjSizeOp && + LenOp->getLimitedValue() <= ObjSizeOp->getLimitedValue()) { + return false; + } + break; + } + default: + break; + } + + return TTI.isLoweredToCall(F); +} + bool CallAnalyzer::visitCallBase(CallBase &Call) { if (!onCallBaseVisitStart(Call)) return true; @@ -2341,7 +2390,7 @@ bool CallAnalyzer::visitCallBase(CallBase &Call) { return false; } - if (TTI.isLoweredToCall(F)) { + if (isLoweredToCall(F, Call)) { onLoweredCall(F, Call, IsIndirectCall); } @@ -2945,6 +2994,7 @@ std::optional<int> llvm::getInliningCostEstimate( CallBase &Call, TargetTransformInfo &CalleeTTI, function_ref<AssumptionCache &(Function &)> GetAssumptionCache, function_ref<BlockFrequencyInfo &(Function &)> GetBFI, + function_ref<const TargetLibraryInfo &(Function &)> GetTLI, ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE) { const InlineParams Params = {/* DefaultThreshold*/ 0, /*HintThreshold*/ {}, @@ -2958,7 +3008,7 @@ std::optional<int> llvm::getInliningCostEstimate( /*EnableDeferral*/ true}; InlineCostCallAnalyzer CA(*Call.getCalledFunction(), Call, Params, CalleeTTI, - GetAssumptionCache, GetBFI, PSI, ORE, true, + GetAssumptionCache, GetBFI, GetTLI, PSI, ORE, true, /*IgnoreThreshold*/ true); auto R = CA.analyze(); if (!R.isSuccess()) @@ -2970,9 +3020,10 @@ std::optional<InlineCostFeatures> llvm::getInliningCostFeatures( CallBase &Call, TargetTransformInfo &CalleeTTI, function_ref<AssumptionCache &(Function &)> GetAssumptionCache, function_ref<BlockFrequencyInfo &(Function &)> GetBFI, + function_ref<const TargetLibraryInfo &(Function &)> GetTLI, ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE) { - InlineCostFeaturesAnalyzer CFA(CalleeTTI, GetAssumptionCache, GetBFI, PSI, - ORE, *Call.getCalledFunction(), Call); + InlineCostFeaturesAnalyzer CFA(CalleeTTI, GetAssumptionCache, GetBFI, GetTLI, + PSI, ORE, *Call.getCalledFunction(), Call); auto R = CFA.analyze(); if (!R.isSuccess()) return std::nullopt; @@ -3072,7 +3123,7 @@ InlineCost llvm::getInlineCost( << ")\n"); InlineCostCallAnalyzer CA(*Callee, Call, Params, CalleeTTI, - GetAssumptionCache, GetBFI, PSI, ORE); + GetAssumptionCache, GetBFI, GetTLI, PSI, ORE); InlineResult ShouldInline = CA.analyze(); LLVM_DEBUG(CA.dump()); @@ -3263,7 +3314,8 @@ InlineCostAnnotationPrinterPass::run(Function &F, continue; OptimizationRemarkEmitter ORE(CalledFunction); InlineCostCallAnalyzer ICCA(*CalledFunction, *CB, Params, TTI, - GetAssumptionCache, nullptr, &PSI, &ORE); + GetAssumptionCache, nullptr, nullptr, &PSI, + &ORE); ICCA.analyze(); OS << " Analyzing call of " << CalledFunction->getName() << "... (caller:" << CB->getCaller()->getName() << ")\n"; |