diff options
Diffstat (limited to 'llvm')
-rw-r--r-- | llvm/include/llvm/Analysis/MemoryProfileInfo.h | 8 | ||||
-rw-r--r-- | llvm/lib/Analysis/MemoryProfileInfo.cpp | 28 | ||||
-rw-r--r-- | llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp | 9 | ||||
-rw-r--r-- | llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 16 | ||||
-rw-r--r-- | llvm/lib/Transforms/Vectorize/VPlan.cpp | 9 | ||||
-rw-r--r-- | llvm/lib/Transforms/Vectorize/VPlanHelpers.h | 16 | ||||
-rw-r--r-- | llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 122 | ||||
-rw-r--r-- | llvm/test/ThinLTO/X86/memprof-basic.ll | 5 | ||||
-rw-r--r-- | llvm/test/Transforms/PGOProfile/memprof.ll | 19 | ||||
-rw-r--r-- | llvm/tools/llvm-c-test/debuginfo.c | 2 | ||||
-rw-r--r-- | llvm/unittests/Analysis/MemoryProfileInfoTest.cpp | 21 |
11 files changed, 104 insertions, 151 deletions
diff --git a/llvm/include/llvm/Analysis/MemoryProfileInfo.h b/llvm/include/llvm/Analysis/MemoryProfileInfo.h index 571caf9..be690a4 100644 --- a/llvm/include/llvm/Analysis/MemoryProfileInfo.h +++ b/llvm/include/llvm/Analysis/MemoryProfileInfo.h @@ -59,6 +59,14 @@ LLVM_ABI std::string getAllocTypeAttributeString(AllocationType Type); /// True if the AllocTypes bitmask contains just a single type. LLVM_ABI bool hasSingleAllocType(uint8_t AllocTypes); +/// Removes any existing "ambiguous" memprof attribute. Called before we apply a +/// specific allocation type such as "cold", "notcold", or "hot". +LLVM_ABI void removeAnyExistingAmbiguousAttribute(CallBase *CB); + +/// Adds an "ambiguous" memprof attribute to call with a matched allocation +/// profile but that we haven't yet been able to disambiguate. +LLVM_ABI void addAmbiguousAttribute(CallBase *CB); + /// Class to build a trie of call stack contexts for a particular profiled /// allocation call, along with their associated allocation types. /// The allocation will be at the root of the trie, which is then used to diff --git a/llvm/lib/Analysis/MemoryProfileInfo.cpp b/llvm/lib/Analysis/MemoryProfileInfo.cpp index 0c1f8db..92a5b6f 100644 --- a/llvm/lib/Analysis/MemoryProfileInfo.cpp +++ b/llvm/lib/Analysis/MemoryProfileInfo.cpp @@ -54,6 +54,10 @@ cl::opt<unsigned> MinPercentMaxColdSize( "memprof-min-percent-max-cold-size", cl::init(100), cl::Hidden, cl::desc("Min percent of max cold bytes for critical cold context")); +LLVM_ABI cl::opt<bool> MemProfUseAmbiguousAttributes( + "memprof-ambiguous-attributes", cl::init(true), cl::Hidden, + cl::desc("Apply ambiguous memprof attribute to ambiguous allocations")); + } // end namespace llvm bool llvm::memprof::metadataIncludesAllContextSizeInfo() { @@ -125,6 +129,26 @@ bool llvm::memprof::hasSingleAllocType(uint8_t AllocTypes) { return NumAllocTypes == 1; } +void llvm::memprof::removeAnyExistingAmbiguousAttribute(CallBase *CB) { + if (!CB->hasFnAttr("memprof")) + return; + assert(CB->getFnAttr("memprof").getValueAsString() == "ambiguous"); + CB->removeFnAttr("memprof"); +} + +void llvm::memprof::addAmbiguousAttribute(CallBase *CB) { + if (!MemProfUseAmbiguousAttributes) + return; + // We may have an existing ambiguous attribute if we are reanalyzing + // after inlining. + if (CB->hasFnAttr("memprof")) { + assert(CB->getFnAttr("memprof").getValueAsString() == "ambiguous"); + } else { + auto A = llvm::Attribute::get(CB->getContext(), "memprof", "ambiguous"); + CB->addFnAttr(A); + } +} + void CallStackTrie::addCallStack( AllocationType AllocType, ArrayRef<uint64_t> StackIds, std::vector<ContextTotalSize> ContextSizeInfo) { @@ -470,6 +494,9 @@ void CallStackTrie::addSingleAllocTypeAttribute(CallBase *CI, AllocationType AT, StringRef Descriptor) { auto AllocTypeString = getAllocTypeAttributeString(AT); auto A = llvm::Attribute::get(CI->getContext(), "memprof", AllocTypeString); + // After inlining we may be able to convert an existing ambiguous allocation + // to an unambiguous one. + removeAnyExistingAmbiguousAttribute(CI); CI->addFnAttr(A); if (MemProfReportHintedSizes) { std::vector<ContextTotalSize> ContextSizeInfo; @@ -529,6 +556,7 @@ bool CallStackTrie::buildAndAttachMIBMetadata(CallBase *CI) { assert(MIBCallStack.size() == 1 && "Should only be left with Alloc's location in stack"); CI->setMetadata(LLVMContext::MD_memprof, MDNode::get(Ctx, MIBNodes)); + addAmbiguousAttribute(CI); return true; } // If there exists corner case that CallStackTrie has one chain to leaf diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp index faeab95..cfdfd94 100644 --- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp +++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp @@ -3986,6 +3986,7 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones( void ModuleCallsiteContextGraph::updateAllocationCall( CallInfo &Call, AllocationType AllocType) { std::string AllocTypeString = getAllocTypeAttributeString(AllocType); + removeAnyExistingAmbiguousAttribute(cast<CallBase>(Call.call())); auto A = llvm::Attribute::get(Call.call()->getFunction()->getContext(), "memprof", AllocTypeString); cast<CallBase>(Call.call())->addFnAttr(A); @@ -5661,9 +5662,10 @@ bool MemProfContextDisambiguation::applyImport(Module &M) { auto *MemProfMD = I.getMetadata(LLVMContext::MD_memprof); // Include allocs that were already assigned a memprof function - // attribute in the statistics. - if (CB->getAttributes().hasFnAttr("memprof")) { - assert(!MemProfMD); + // attribute in the statistics. Only do this for those that do not have + // memprof metadata, since we add an "ambiguous" memprof attribute by + // default. + if (CB->getAttributes().hasFnAttr("memprof") && !MemProfMD) { CB->getAttributes().getFnAttr("memprof").getValueAsString() == "cold" ? AllocTypeColdThinBackend++ : AllocTypeNotColdThinBackend++; @@ -5740,6 +5742,7 @@ bool MemProfContextDisambiguation::applyImport(Module &M) { // clone J-1 (J==0 is the original clone and does not have a VMaps // entry). CBClone = cast<CallBase>((*VMaps[J - 1])[CB]); + removeAnyExistingAmbiguousAttribute(CBClone); CBClone->addFnAttr(A); ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemprofAttribute", CBClone) << ore::NV("AllocationCall", CBClone) << " in clone " diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index cb6bfb2..56a3d6d 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -3903,8 +3903,7 @@ void LoopVectorizationPlanner::emitInvalidCostRemarks( if (VF.isScalar()) continue; - VPCostContext CostCtx(CM.TTI, *CM.TLI, *Plan, CM, CM.CostKind, - *CM.PSE.getSE()); + VPCostContext CostCtx(CM.TTI, *CM.TLI, *Plan, CM, CM.CostKind); precomputeCosts(*Plan, VF, CostCtx); auto Iter = vp_depth_first_deep(Plan->getVectorLoopRegion()->getEntry()); for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(Iter)) { @@ -4161,8 +4160,7 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() { // Add on other costs that are modelled in VPlan, but not in the legacy // cost model. - VPCostContext CostCtx(CM.TTI, *CM.TLI, *P, CM, CM.CostKind, - *CM.PSE.getSE()); + VPCostContext CostCtx(CM.TTI, *CM.TLI, *P, CM, CM.CostKind); VPRegionBlock *VectorRegion = P->getVectorLoopRegion(); assert(VectorRegion && "Expected to have a vector region!"); for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>( @@ -6854,7 +6852,7 @@ LoopVectorizationPlanner::precomputeCosts(VPlan &Plan, ElementCount VF, InstructionCost LoopVectorizationPlanner::cost(VPlan &Plan, ElementCount VF) const { - VPCostContext CostCtx(CM.TTI, *CM.TLI, Plan, CM, CM.CostKind, *PSE.getSE()); + VPCostContext CostCtx(CM.TTI, *CM.TLI, Plan, CM, CM.CostKind); InstructionCost Cost = precomputeCosts(Plan, VF, CostCtx); // Now compute and add the VPlan-based cost. @@ -7087,8 +7085,7 @@ VectorizationFactor LoopVectorizationPlanner::computeBestVF() { // simplifications not accounted for in the legacy cost model. If that's the // case, don't trigger the assertion, as the extra simplifications may cause a // different VF to be picked by the VPlan-based cost model. - VPCostContext CostCtx(CM.TTI, *CM.TLI, BestPlan, CM, CM.CostKind, - *CM.PSE.getSE()); + VPCostContext CostCtx(CM.TTI, *CM.TLI, BestPlan, CM, CM.CostKind); precomputeCosts(BestPlan, BestFactor.Width, CostCtx); // Verify that the VPlan-based and legacy cost models agree, except for VPlans // with early exits and plans with additional VPlan simplifications. The @@ -8624,8 +8621,7 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes( // TODO: Enable following transform when the EVL-version of extended-reduction // and mulacc-reduction are implemented. if (!CM.foldTailWithEVL()) { - VPCostContext CostCtx(CM.TTI, *CM.TLI, *Plan, CM, CM.CostKind, - *CM.PSE.getSE()); + VPCostContext CostCtx(CM.TTI, *CM.TLI, *Plan, CM, CM.CostKind); VPlanTransforms::runPass(VPlanTransforms::convertToAbstractRecipes, *Plan, CostCtx, Range); } @@ -10079,7 +10075,7 @@ bool LoopVectorizePass::processLoop(Loop *L) { bool ForceVectorization = Hints.getForce() == LoopVectorizeHints::FK_Enabled; VPCostContext CostCtx(CM.TTI, *CM.TLI, LVP.getPlanFor(VF.Width), CM, - CM.CostKind, *CM.PSE.getSE()); + CM.CostKind); if (!ForceVectorization && !isOutsideLoopWorkProfitable(Checks, VF, L, PSE, CostCtx, LVP.getPlanFor(VF.Width), SEL, diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index 2555ebe..07b191a 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -1772,8 +1772,7 @@ VPCostContext::getOperandInfo(VPValue *V) const { } InstructionCost VPCostContext::getScalarizationOverhead( - Type *ResultTy, ArrayRef<const VPValue *> Operands, ElementCount VF, - bool AlwaysIncludeReplicatingR) { + Type *ResultTy, ArrayRef<const VPValue *> Operands, ElementCount VF) { if (VF.isScalar()) return 0; @@ -1793,11 +1792,7 @@ InstructionCost VPCostContext::getScalarizationOverhead( SmallPtrSet<const VPValue *, 4> UniqueOperands; SmallVector<Type *> Tys; for (auto *Op : Operands) { - if (Op->isLiveIn() || - (!AlwaysIncludeReplicatingR && - isa<VPReplicateRecipe, VPPredInstPHIRecipe>(Op)) || - (isa<VPReplicateRecipe>(Op) && - cast<VPReplicateRecipe>(Op)->getOpcode() == Instruction::Load) || + if (Op->isLiveIn() || isa<VPReplicateRecipe, VPPredInstPHIRecipe>(Op) || !UniqueOperands.insert(Op).second) continue; Tys.push_back(toVectorizedTy(Types.inferScalarType(Op), VF)); diff --git a/llvm/lib/Transforms/Vectorize/VPlanHelpers.h b/llvm/lib/Transforms/Vectorize/VPlanHelpers.h index 1580a3b..fc1a09e 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanHelpers.h +++ b/llvm/lib/Transforms/Vectorize/VPlanHelpers.h @@ -349,14 +349,12 @@ struct VPCostContext { LoopVectorizationCostModel &CM; SmallPtrSet<Instruction *, 8> SkipCostComputation; TargetTransformInfo::TargetCostKind CostKind; - ScalarEvolution &SE; VPCostContext(const TargetTransformInfo &TTI, const TargetLibraryInfo &TLI, const VPlan &Plan, LoopVectorizationCostModel &CM, - TargetTransformInfo::TargetCostKind CostKind, - ScalarEvolution &SE) + TargetTransformInfo::TargetCostKind CostKind) : TTI(TTI), TLI(TLI), Types(Plan), LLVMCtx(Plan.getContext()), CM(CM), - CostKind(CostKind), SE(SE) {} + CostKind(CostKind) {} /// Return the cost for \p UI with \p VF using the legacy cost model as /// fallback until computing the cost of all recipes migrates to VPlan. @@ -376,12 +374,10 @@ struct VPCostContext { /// Estimate the overhead of scalarizing a recipe with result type \p ResultTy /// and \p Operands with \p VF. This is a convenience wrapper for the - /// type-based getScalarizationOverhead API. If \p AlwaysIncludeReplicatingR - /// is true, always compute the cost of scalarizing replicating operands. - InstructionCost - getScalarizationOverhead(Type *ResultTy, ArrayRef<const VPValue *> Operands, - ElementCount VF, - bool AlwaysIncludeReplicatingR = false); + /// type-based getScalarizationOverhead API. + InstructionCost getScalarizationOverhead(Type *ResultTy, + ArrayRef<const VPValue *> Operands, + ElementCount VF); }; /// This class can be used to assign names to VPValues. For VPValues without diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 43d61f2..67b9244 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -40,7 +40,6 @@ #include <cassert> using namespace llvm; -using namespace llvm::VPlanPatternMatch; using VectorParts = SmallVector<Value *, 2>; @@ -304,6 +303,7 @@ VPPartialReductionRecipe::computeCost(ElementCount VF, VPRecipeBase *OpR = Op->getDefiningRecipe(); // If the partial reduction is predicated, a select will be operand 0 + using namespace llvm::VPlanPatternMatch; if (match(getOperand(1), m_Select(m_VPValue(), m_VPValue(Op), m_VPValue()))) { OpR = Op->getDefiningRecipe(); } @@ -1963,6 +1963,7 @@ InstructionCost VPWidenSelectRecipe::computeCost(ElementCount VF, Type *VectorTy = toVectorTy(Ctx.Types.inferScalarType(this), VF); VPValue *Op0, *Op1; + using namespace llvm::VPlanPatternMatch; if (!ScalarCond && ScalarTy->getScalarSizeInBits() == 1 && (match(this, m_LogicalAnd(m_VPValue(Op0), m_VPValue(Op1))) || match(this, m_LogicalOr(m_VPValue(Op0), m_VPValue(Op1))))) { @@ -3110,62 +3111,6 @@ bool VPReplicateRecipe::shouldPack() const { }); } -/// Returns true if \p Ptr is a pointer computation for which the legacy cost -/// model computes a SCEV expression when computing the address cost. -static bool shouldUseAddressAccessSCEV(const VPValue *Ptr) { - auto *PtrR = Ptr->getDefiningRecipe(); - if (!PtrR || !((isa<VPReplicateRecipe>(PtrR) && - cast<VPReplicateRecipe>(PtrR)->getOpcode() == - Instruction::GetElementPtr) || - isa<VPWidenGEPRecipe>(PtrR) || - match(Ptr, m_GetElementPtr(m_VPValue(), m_VPValue())))) - return false; - - // We are looking for a GEP where all indices are either loop invariant or - // inductions. - for (VPValue *Opd : drop_begin(PtrR->operands())) { - if (!Opd->isDefinedOutsideLoopRegions() && - !isa<VPScalarIVStepsRecipe, VPWidenIntOrFpInductionRecipe>(Opd)) - return false; - } - - return true; -} - -/// Returns true if \p V is used as part of the address of another load or -/// store. -static bool isUsedByLoadStoreAddress(const VPUser *V) { - SmallPtrSet<const VPUser *, 4> Seen; - SmallVector<const VPUser *> WorkList = {V}; - - while (!WorkList.empty()) { - auto *Cur = dyn_cast<VPSingleDefRecipe>(WorkList.pop_back_val()); - if (!Cur || !Seen.insert(Cur).second) - continue; - - for (VPUser *U : Cur->users()) { - if (auto *InterleaveR = dyn_cast<VPInterleaveBase>(U)) - if (InterleaveR->getAddr() == Cur) - return true; - if (auto *RepR = dyn_cast<VPReplicateRecipe>(U)) { - if (RepR->getOpcode() == Instruction::Load && - RepR->getOperand(0) == Cur) - return true; - if (RepR->getOpcode() == Instruction::Store && - RepR->getOperand(1) == Cur) - return true; - } - if (auto *MemR = dyn_cast<VPWidenMemoryRecipe>(U)) { - if (MemR->getAddr() == Cur && MemR->isConsecutive()) - return true; - } - } - - append_range(WorkList, cast<VPSingleDefRecipe>(Cur)->users()); - } - return false; -} - InstructionCost VPReplicateRecipe::computeCost(ElementCount VF, VPCostContext &Ctx) const { Instruction *UI = cast<Instruction>(getUnderlyingValue()); @@ -3273,58 +3218,21 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF, } case Instruction::Load: case Instruction::Store: { - if (VF.isScalable() && !isSingleScalar()) - return InstructionCost::getInvalid(); - + if (isSingleScalar()) { + bool IsLoad = UI->getOpcode() == Instruction::Load; + Type *ValTy = Ctx.Types.inferScalarType(IsLoad ? this : getOperand(0)); + Type *ScalarPtrTy = Ctx.Types.inferScalarType(getOperand(IsLoad ? 0 : 1)); + const Align Alignment = getLoadStoreAlignment(UI); + unsigned AS = getLoadStoreAddressSpace(UI); + TTI::OperandValueInfo OpInfo = TTI::getOperandInfo(UI->getOperand(0)); + InstructionCost ScalarMemOpCost = Ctx.TTI.getMemoryOpCost( + UI->getOpcode(), ValTy, Alignment, AS, Ctx.CostKind, OpInfo, UI); + return ScalarMemOpCost + Ctx.TTI.getAddressComputationCost( + ScalarPtrTy, nullptr, nullptr, Ctx.CostKind); + } // TODO: See getMemInstScalarizationCost for how to handle replicating and // predicated cases. - const VPRegionBlock *ParentRegion = getParent()->getParent(); - if (ParentRegion && ParentRegion->isReplicator()) - break; - - bool IsLoad = UI->getOpcode() == Instruction::Load; - const VPValue *PtrOp = getOperand(!IsLoad); - // TODO: Handle cases where we need to pass a SCEV to - // getAddressComputationCost. - if (shouldUseAddressAccessSCEV(PtrOp)) - break; - - Type *ValTy = Ctx.Types.inferScalarType(IsLoad ? this : getOperand(0)); - Type *ScalarPtrTy = Ctx.Types.inferScalarType(PtrOp); - const Align Alignment = getLoadStoreAlignment(UI); - unsigned AS = getLoadStoreAddressSpace(UI); - TTI::OperandValueInfo OpInfo = TTI::getOperandInfo(UI->getOperand(0)); - InstructionCost ScalarMemOpCost = Ctx.TTI.getMemoryOpCost( - UI->getOpcode(), ValTy, Alignment, AS, Ctx.CostKind, OpInfo); - - Type *PtrTy = isSingleScalar() ? ScalarPtrTy : toVectorTy(ScalarPtrTy, VF); - - InstructionCost ScalarCost = - ScalarMemOpCost + Ctx.TTI.getAddressComputationCost( - PtrTy, &Ctx.SE, nullptr, Ctx.CostKind); - if (isSingleScalar()) - return ScalarCost; - - SmallVector<const VPValue *> OpsToScalarize; - Type *ResultTy = Type::getVoidTy(PtrTy->getContext()); - // Set ResultTy and OpsToScalarize, if scalarization is needed. Currently we - // don't assign scalarization overhead in general, if the target prefers - // vectorized addressing or the loaded value is used as part of an address - // of another load or store. - bool PreferVectorizedAddressing = Ctx.TTI.prefersVectorizedAddressing(); - if (PreferVectorizedAddressing || !isUsedByLoadStoreAddress(this)) { - bool EfficientVectorLoadStore = - Ctx.TTI.supportsEfficientVectorElementLoadStore(); - if (!(IsLoad && !PreferVectorizedAddressing) && - !(!IsLoad && EfficientVectorLoadStore)) - append_range(OpsToScalarize, operands()); - - if (!EfficientVectorLoadStore) - ResultTy = Ctx.Types.inferScalarType(this); - } - - return (ScalarCost * VF.getFixedValue()) + - Ctx.getScalarizationOverhead(ResultTy, OpsToScalarize, VF, true); + break; } } diff --git a/llvm/test/ThinLTO/X86/memprof-basic.ll b/llvm/test/ThinLTO/X86/memprof-basic.ll index 0ff0ce0..537e1b8 100644 --- a/llvm/test/ThinLTO/X86/memprof-basic.ll +++ b/llvm/test/ThinLTO/X86/memprof-basic.ll @@ -103,7 +103,9 @@ declare i32 @sleep() define internal ptr @_Z3barv() #0 !dbg !15 { entry: - %call = call ptr @_Znam(i64 0), !memprof !2, !callsite !7 + ;; Use an ambiguous attribute for this allocation, which is now added to such + ;; allocations during matching. It should not affect cloning. + %call = call ptr @_Znam(i64 0) #1, !memprof !2, !callsite !7 ret ptr null } @@ -125,6 +127,7 @@ entry: uselistorder ptr @_Z3foov, { 1, 0 } attributes #0 = { noinline optnone } +attributes #1 = { "memprof"="ambiguous" } !llvm.dbg.cu = !{!13} !llvm.module.flags = !{!20, !21} diff --git a/llvm/test/Transforms/PGOProfile/memprof.ll b/llvm/test/Transforms/PGOProfile/memprof.ll index c69d031..f6a89a8 100644 --- a/llvm/test/Transforms/PGOProfile/memprof.ll +++ b/llvm/test/Transforms/PGOProfile/memprof.ll @@ -38,7 +38,7 @@ ; ALL-NOT: no profile data available for function ;; Using a memprof-only profile for memprof-use should only give memprof metadata -; RUN: opt < %s -passes='memprof-use<profile-filename=%t.memprofdata>' -pgo-warn-missing-function -S -memprof-print-match-info -stats 2>&1 | FileCheck %s --check-prefixes=MEMPROF,ALL,MEMPROFONLY,MEMPROFMATCHINFO,MEMPROFSTATS +; RUN: opt < %s -passes='memprof-use<profile-filename=%t.memprofdata>' -pgo-warn-missing-function -S -memprof-print-match-info -stats 2>&1 | FileCheck %s --check-prefixes=MEMPROF,ALL,MEMPROFONLY,MEMPROFMATCHINFO,MEMPROFSTATS,AMBIG ; There should not be any PGO metadata ; MEMPROFONLY-NOT: !prof @@ -51,10 +51,10 @@ ;; Test the same thing but by passing the memory profile through to a default ;; pipeline via -memory-profile-file=, which should cause the necessary field ;; of the PGOOptions structure to be populated with the profile filename. -; RUN: opt < %s -passes='default<O2>' -memory-profile-file=%t.memprofdata -pgo-warn-missing-function -S 2>&1 | FileCheck %s --check-prefixes=MEMPROF,ALL,MEMPROFONLY +; RUN: opt < %s -passes='default<O2>' -memory-profile-file=%t.memprofdata -pgo-warn-missing-function -S 2>&1 | FileCheck %s --check-prefixes=MEMPROF,ALL,MEMPROFONLY,AMBIG ;; Using a pgo+memprof profile for memprof-use should only give memprof metadata -; RUN: opt < %s -passes='memprof-use<profile-filename=%t.pgomemprofdata>' -pgo-warn-missing-function -S 2>&1 | FileCheck %s --check-prefixes=MEMPROF,ALL,MEMPROFONLY +; RUN: opt < %s -passes='memprof-use<profile-filename=%t.pgomemprofdata>' -pgo-warn-missing-function -S 2>&1 | FileCheck %s --check-prefixes=MEMPROF,ALL,MEMPROFONLY,AMBIG ;; Using a pgo-only profile for memprof-use should give an error ; RUN: not opt < %s -passes='memprof-use<profile-filename=%t.pgoprofdata>' -S 2>&1 | FileCheck %s --check-prefixes=MEMPROFWITHPGOONLY @@ -72,7 +72,7 @@ ;; Using a pgo+memprof profile for both memprof-use and pgo-instr-use should ;; give both memprof and pgo metadata. -; RUN: opt < %s -passes='pgo-instr-use,memprof-use<profile-filename=%t.pgomemprofdata>' -pgo-test-profile-file=%t.pgomemprofdata -pgo-warn-missing-function -S 2>&1 | FileCheck %s --check-prefixes=MEMPROF,ALL,PGO +; RUN: opt < %s -passes='pgo-instr-use,memprof-use<profile-filename=%t.pgomemprofdata>' -pgo-test-profile-file=%t.pgomemprofdata -pgo-warn-missing-function -S 2>&1 | FileCheck %s --check-prefixes=MEMPROF,ALL,PGO,AMBIG ;; Check that the total sizes are reported if requested. A message should be ;; emitted for the pruned context. Also check that remarks are emitted for the @@ -108,7 +108,11 @@ ;; However, with the same threshold, but hot hints not enabled, it should be ;; notcold again. -; RUN: opt < %s -passes='memprof-use<profile-filename=%t.memprofdata>' -pgo-warn-missing-function -S -memprof-min-ave-lifetime-access-density-hot-threshold=0 2>&1 | FileCheck %s --check-prefixes=MEMPROF,ALL +; RUN: opt < %s -passes='memprof-use<profile-filename=%t.memprofdata>' -pgo-warn-missing-function -S -memprof-min-ave-lifetime-access-density-hot-threshold=0 2>&1 | FileCheck %s --check-prefixes=MEMPROF,ALL,AMBIG + +;; Test that we don't get an ambiguous memprof attribute when +;; -memprof-ambiguous-attributes is disabled. +; RUN: opt < %s -passes='memprof-use<profile-filename=%t.memprofdata>' -pgo-warn-missing-function -S -memprof-ambiguous-attributes=false 2>&1 | FileCheck %s --check-prefixes=MEMPROF,ALL,NOAMBIG ; MEMPROFMATCHINFO: MemProf notcold context with id 1093248920606587996 has total profiled size 10 is matched with 1 frames ; MEMPROFMATCHINFO: MemProf notcold context with id 5725971306423925017 has total profiled size 10 is matched with 1 frames @@ -140,7 +144,7 @@ target triple = "x86_64-unknown-linux-gnu" ; PGO: !prof define dso_local noundef ptr @_Z3foov() #0 !dbg !10 { entry: - ; MEMPROF: call {{.*}} @_Znam{{.*}} !memprof ![[M1:[0-9]+]], !callsite ![[C1:[0-9]+]] + ; MEMPROF: call {{.*}} @_Znam{{.*}} #[[A0:[0-9]+]]{{.*}} !memprof ![[M1:[0-9]+]], !callsite ![[C1:[0-9]+]] ; MEMPROFNOCOLINFO: call {{.*}} @_Znam{{.*}} !memprof ![[M1:[0-9]+]], !callsite ![[C1:[0-9]+]] %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6, !dbg !13 ret ptr %call, !dbg !14 @@ -364,6 +368,9 @@ for.end: ; preds = %for.cond ret i32 0, !dbg !103 } +;; We optionally apply an ambiguous memprof attribute to ambiguous allocations +; AMBIG: #[[A0]] = { builtin allocsize(0) "memprof"="ambiguous" } +; NOAMBIG: #[[A0]] = { builtin allocsize(0) } ; MEMPROF: #[[A1]] = { builtin allocsize(0) "memprof"="notcold" } ; MEMPROF: #[[A2]] = { builtin allocsize(0) "memprof"="cold" } ; MEMPROF: ![[M1]] = !{![[MIB1:[0-9]+]], ![[MIB2:[0-9]+]], ![[MIB3:[0-9]+]], ![[MIB4:[0-9]+]]} diff --git a/llvm/tools/llvm-c-test/debuginfo.c b/llvm/tools/llvm-c-test/debuginfo.c index e376d82..a2f4b3e 100644 --- a/llvm/tools/llvm-c-test/debuginfo.c +++ b/llvm/tools/llvm-c-test/debuginfo.c @@ -328,8 +328,10 @@ int llvm_test_dibuilder(void) { // Test that LLVMGetFirstDbgRecord and LLVMGetLastDbgRecord return NULL for // instructions without debug info. LLVMDbgRecordRef Phi1FirstDbgRecord = LLVMGetFirstDbgRecord(Phi1); + (void)Phi1FirstDbgRecord; assert(Phi1FirstDbgRecord == NULL); LLVMDbgRecordRef Phi1LastDbgRecord = LLVMGetLastDbgRecord(Phi1); + (void)Phi1LastDbgRecord; assert(Phi1LastDbgRecord == NULL); // Insert a non-phi before the `ret` but not before the debug records to diff --git a/llvm/unittests/Analysis/MemoryProfileInfoTest.cpp b/llvm/unittests/Analysis/MemoryProfileInfoTest.cpp index d8457a3..d1c0f64 100644 --- a/llvm/unittests/Analysis/MemoryProfileInfoTest.cpp +++ b/llvm/unittests/Analysis/MemoryProfileInfoTest.cpp @@ -230,7 +230,8 @@ declare dso_local noalias noundef i8* @malloc(i64 noundef) CallBase *Call = findCall(*Func, "call"); Trie.buildAndAttachMIBMetadata(Call); - EXPECT_FALSE(Call->hasFnAttr("memprof")); + EXPECT_TRUE(Call->hasFnAttr("memprof")); + EXPECT_EQ(Call->getFnAttr("memprof").getValueAsString(), "ambiguous"); EXPECT_TRUE(Call->hasMetadata(LLVMContext::MD_memprof)); MDNode *MemProfMD = Call->getMetadata(LLVMContext::MD_memprof); ASSERT_EQ(MemProfMD->getNumOperands(), 2u); @@ -279,7 +280,8 @@ declare dso_local noalias noundef i8* @malloc(i64 noundef) CallBase *Call = findCall(*Func, "call"); Trie.buildAndAttachMIBMetadata(Call); - EXPECT_FALSE(Call->hasFnAttr("memprof")); + EXPECT_TRUE(Call->hasFnAttr("memprof")); + EXPECT_EQ(Call->getFnAttr("memprof").getValueAsString(), "ambiguous"); EXPECT_TRUE(Call->hasMetadata(LLVMContext::MD_memprof)); MDNode *MemProfMD = Call->getMetadata(LLVMContext::MD_memprof); ASSERT_EQ(MemProfMD->getNumOperands(), 2u); @@ -333,7 +335,8 @@ declare dso_local noalias noundef i8* @malloc(i64 noundef) CallBase *Call = findCall(*Func, "call"); Trie.buildAndAttachMIBMetadata(Call); - EXPECT_FALSE(Call->hasFnAttr("memprof")); + EXPECT_TRUE(Call->hasFnAttr("memprof")); + EXPECT_EQ(Call->getFnAttr("memprof").getValueAsString(), "ambiguous"); EXPECT_TRUE(Call->hasMetadata(LLVMContext::MD_memprof)); MDNode *MemProfMD = Call->getMetadata(LLVMContext::MD_memprof); ASSERT_EQ(MemProfMD->getNumOperands(), 2u); @@ -392,7 +395,8 @@ declare dso_local noalias noundef i8* @malloc(i64 noundef) CallBase *Call = findCall(*Func, "call"); Trie.buildAndAttachMIBMetadata(Call); - EXPECT_FALSE(Call->hasFnAttr("memprof")); + EXPECT_TRUE(Call->hasFnAttr("memprof")); + EXPECT_EQ(Call->getFnAttr("memprof").getValueAsString(), "ambiguous"); EXPECT_TRUE(Call->hasMetadata(LLVMContext::MD_memprof)); MDNode *MemProfMD = Call->getMetadata(LLVMContext::MD_memprof); ASSERT_EQ(MemProfMD->getNumOperands(), 2u); @@ -463,7 +467,8 @@ declare dso_local noalias noundef i8* @malloc(i64 noundef) ASSERT_NE(Call, nullptr); Trie.buildAndAttachMIBMetadata(Call); - EXPECT_FALSE(Call->hasFnAttr("memprof")); + EXPECT_TRUE(Call->hasFnAttr("memprof")); + EXPECT_EQ(Call->getFnAttr("memprof").getValueAsString(), "ambiguous"); EXPECT_TRUE(Call->hasMetadata(LLVMContext::MD_memprof)); MDNode *MemProfMD = Call->getMetadata(LLVMContext::MD_memprof); EXPECT_THAT(MemProfMD, MemprofMetadataEquals(ExpectedVals)); @@ -536,7 +541,8 @@ declare dso_local noalias noundef i8* @malloc(i64 noundef) // Restore original option value. MemProfKeepAllNotColdContexts = OrigMemProfKeepAllNotColdContexts; - EXPECT_FALSE(Call->hasFnAttr("memprof")); + EXPECT_TRUE(Call->hasFnAttr("memprof")); + EXPECT_EQ(Call->getFnAttr("memprof").getValueAsString(), "ambiguous"); EXPECT_TRUE(Call->hasMetadata(LLVMContext::MD_memprof)); MDNode *MemProfMD = Call->getMetadata(LLVMContext::MD_memprof); EXPECT_THAT(MemProfMD, MemprofMetadataEquals(ExpectedVals)); @@ -664,7 +670,8 @@ declare dso_local noalias noundef i8* @malloc(i64 noundef) // The hot allocations will be converted to NotCold and pruned as they // are unnecessary to determine how to clone the cold allocation. - EXPECT_FALSE(Call->hasFnAttr("memprof")); + EXPECT_TRUE(Call->hasFnAttr("memprof")); + EXPECT_EQ(Call->getFnAttr("memprof").getValueAsString(), "ambiguous"); EXPECT_TRUE(Call->hasMetadata(LLVMContext::MD_memprof)); MemProfMD = Call->getMetadata(LLVMContext::MD_memprof); ASSERT_EQ(MemProfMD->getNumOperands(), 2u); |