From 820cc01d1e65f7be7c3c27bcdcb6b8c13f4ec2e6 Mon Sep 17 00:00:00 2001 From: Eugene Leviant Date: Fri, 5 Jul 2019 12:00:10 +0000 Subject: [ThinLTO] Attempt to recommit r365040 after caching fix It's possible that some function can load and store the same variable using the same constant expression: store %Derived* @foo, %Derived** bitcast (%Base** @bar to %Derived**) %42 = load %Derived*, %Derived** bitcast (%Base** @bar to %Derived**) The bitcast expression was mistakenly cached while processing loads, and never examined later when processing store. This caused @bar to be mistakenly treated as read-only variable. See load-store-caching.ll. llvm-svn: 365188 --- llvm/lib/Analysis/ModuleSummaryAnalysis.cpp | 122 ++++++++++++++++++++++------ 1 file changed, 95 insertions(+), 27 deletions(-) (limited to 'llvm/lib/Analysis/ModuleSummaryAnalysis.cpp') diff --git a/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp b/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp index 914561f..e25eb29 100644 --- a/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp +++ b/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp @@ -231,6 +231,13 @@ static bool isNonVolatileLoad(const Instruction *I) { return false; } +static bool isNonVolatileStore(const Instruction *I) { + if (const auto *SI = dyn_cast(I)) + return !SI->isVolatile(); + + return false; +} + static void computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M, const Function &F, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, DominatorTree &DT, @@ -245,7 +252,7 @@ static void computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M, // Map from callee ValueId to profile count. Used to accumulate profile // counts for all static calls to a given callee. MapVector CallGraphEdges; - SetVector RefEdges; + SetVector RefEdges, LoadRefEdges, StoreRefEdges; SetVector TypeTests; SetVector TypeTestAssumeVCalls, TypeCheckedLoadVCalls; @@ -258,6 +265,7 @@ static void computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M, // list. findRefEdges(Index, &F, RefEdges, Visited); std::vector NonVolatileLoads; + std::vector NonVolatileStores; bool HasInlineAsmMaybeReferencingInternal = false; for (const BasicBlock &BB : F) @@ -265,12 +273,34 @@ static void computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M, if (isa(I)) continue; ++NumInsts; - if (isNonVolatileLoad(&I)) { - // Postpone processing of non-volatile load instructions - // See comments below - Visited.insert(&I); - NonVolatileLoads.push_back(&I); - continue; + // Regular LTO module doesn't participate in ThinLTO import, + // so no reference from it can be read/writeonly, since this + // would require importing variable as local copy + if (IsThinLTO) { + if (isNonVolatileLoad(&I)) { + // Postpone processing of non-volatile load instructions + // See comments below + Visited.insert(&I); + NonVolatileLoads.push_back(&I); + continue; + } else if (isNonVolatileStore(&I)) { + Visited.insert(&I); + NonVolatileStores.push_back(&I); + // All references from second operand of store (destination address) + // can be considered write-only if they're not referenced by any + // non-store instruction. References from first operand of store + // (stored value) can't be treated either as read- or as write-only + // so we add them to RefEdges as we do with all other instructions + // except non-volatile load. + Value *Stored = I.getOperand(0); + if (auto *GV = dyn_cast(Stored)) + // findRefEdges will try to examine GV operands, so instead + // of calling it we should add GV to RefEdges directly. + RefEdges.insert(Index.getOrInsertValueInfo(GV)); + else if (auto *U = dyn_cast(Stored)) + findRefEdges(Index, U, RefEdges, Visited); + continue; + } } findRefEdges(Index, &I, RefEdges, Visited); auto CS = ImmutableCallSite(&I); @@ -361,24 +391,61 @@ static void computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M, } } - // By now we processed all instructions in a function, except - // non-volatile loads. All new refs we add in a loop below - // are obviously constant. All constant refs are grouped in the - // end of RefEdges vector, so we can use a single integer value - // to identify them. - unsigned RefCnt = RefEdges.size(); - for (const Instruction *I : NonVolatileLoads) { - Visited.erase(I); - findRefEdges(Index, I, RefEdges, Visited); - } - std::vector Refs = RefEdges.takeVector(); - // Regular LTO module doesn't participate in ThinLTO import, - // so no reference from it can be readonly, since this would - // require importing variable as local copy - if (IsThinLTO) - for (; RefCnt < Refs.size(); ++RefCnt) + std::vector Refs; + if (IsThinLTO) { + auto AddRefEdges = [&](const std::vector &Instrs, + SetVector &Edges, + SmallPtrSet &Cache) { + for (const auto *I : Instrs) { + Cache.erase(I); + findRefEdges(Index, I, Edges, Cache); + } + }; + + // By now we processed all instructions in a function, except + // non-volatile loads and non-volatile value stores. Let's find + // ref edges for both of instruction sets + AddRefEdges(NonVolatileLoads, LoadRefEdges, Visited); + // We can add some values to the Visited set when processing load + // instructions which are also used by stores in NonVolatileStores. + // For example this can happen if we have following code: + // + // store %Derived* @foo, %Derived** bitcast (%Base** @bar to %Derived**) + // %42 = load %Derived*, %Derived** bitcast (%Base** @bar to %Derived**) + // + // After processing loads we'll add bitcast to the Visited set, and if + // we use the same set while processing stores, we'll never see store + // to @bar and @bar will be mistakenly treated as readonly. + SmallPtrSet StoreCache; + AddRefEdges(NonVolatileStores, StoreRefEdges, StoreCache); + + // If both load and store instruction reference the same variable + // we won't be able to optimize it. Add all such reference edges + // to RefEdges set. + for (auto &VI : StoreRefEdges) + if (LoadRefEdges.remove(VI)) + RefEdges.insert(VI); + + unsigned RefCnt = RefEdges.size(); + // All new reference edges inserted in two loops below are either + // read or write only. They will be grouped in the end of RefEdges + // vector, so we can use a single integer value to identify them. + for (auto &VI : LoadRefEdges) + RefEdges.insert(VI); + + unsigned FirstWORef = RefEdges.size(); + for (auto &VI : StoreRefEdges) + RefEdges.insert(VI); + + Refs = RefEdges.takeVector(); + for (; RefCnt < FirstWORef; ++RefCnt) Refs[RefCnt].setReadOnly(); + for (; RefCnt < Refs.size(); ++RefCnt) + Refs[RefCnt].setWriteOnly(); + } else { + Refs = RefEdges.takeVector(); + } // Explicit add hot edges to enforce importing for designated GUIDs for // sample PGO, to enable the same inlines as the profiled optimized binary. for (auto &I : F.getImportGUIDs()) @@ -526,10 +593,11 @@ static void computeVariableSummary(ModuleSummaryIndex &Index, } } - // Don't mark variables we won't be able to internalize as read-only. - GlobalVarSummary::GVarFlags VarFlags( + // Don't mark variables we won't be able to internalize as read/write-only. + bool CanBeInternalized = !V.hasComdat() && !V.hasAppendingLinkage() && !V.isInterposable() && - !V.hasAvailableExternallyLinkage() && !V.hasDLLExportStorageClass()); + !V.hasAvailableExternallyLinkage() && !V.hasDLLExportStorageClass(); + GlobalVarSummary::GVarFlags VarFlags(CanBeInternalized, CanBeInternalized); auto GVarSummary = llvm::make_unique(Flags, VarFlags, RefEdges.takeVector()); if (NonRenamableLocal) @@ -647,7 +715,7 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex( } else { std::unique_ptr Summary = llvm::make_unique( - GVFlags, GlobalVarSummary::GVarFlags(), + GVFlags, GlobalVarSummary::GVarFlags(false, false), ArrayRef{}); Index.addGlobalValueSummary(*GV, std::move(Summary)); } -- cgit v1.1