diff options
29 files changed, 1197 insertions, 11 deletions
diff --git a/llvm/include/llvm/CGData/CodeGenData.h b/llvm/include/llvm/CGData/CodeGenData.h index 5d7c747..da0e412 100644 --- a/llvm/include/llvm/CGData/CodeGenData.h +++ b/llvm/include/llvm/CGData/CodeGenData.h @@ -145,6 +145,9 @@ public: const OutlinedHashTree *getOutlinedHashTree() { return PublishedHashTree.get(); } + const StableFunctionMap *getStableFunctionMap() { + return PublishedStableFunctionMap.get(); + } /// Returns true if we should write codegen data. bool emitCGData() { return EmitCGData; } @@ -169,10 +172,18 @@ inline bool hasOutlinedHashTree() { return CodeGenData::getInstance().hasOutlinedHashTree(); } +inline bool hasStableFunctionMap() { + return CodeGenData::getInstance().hasStableFunctionMap(); +} + inline const OutlinedHashTree *getOutlinedHashTree() { return CodeGenData::getInstance().getOutlinedHashTree(); } +inline const StableFunctionMap *getStableFunctionMap() { + return CodeGenData::getInstance().getStableFunctionMap(); +} + inline bool emitCGData() { return CodeGenData::getInstance().emitCGData(); } inline void diff --git a/llvm/include/llvm/CGData/StableFunctionMap.h b/llvm/include/llvm/CGData/StableFunctionMap.h index a2e1abb..8881adf 100644 --- a/llvm/include/llvm/CGData/StableFunctionMap.h +++ b/llvm/include/llvm/CGData/StableFunctionMap.h @@ -110,7 +110,7 @@ struct StableFunctionMap { size_t size(SizeType Type = UniqueHashCount) const; /// Finalize the stable function map by trimming content. - void finalize(); + void finalize(bool SkipTrim = false); private: /// Insert a `StableFunctionEntry` into the function map directly. This diff --git a/llvm/include/llvm/CGData/StableFunctionMapRecord.h b/llvm/include/llvm/CGData/StableFunctionMapRecord.h index 0517f2c..f9b3ed7 100644 --- a/llvm/include/llvm/CGData/StableFunctionMapRecord.h +++ b/llvm/include/llvm/CGData/StableFunctionMapRecord.h @@ -49,7 +49,7 @@ struct StableFunctionMapRecord { void deserializeYAML(yaml::Input &YIS); /// Finalize the stable function map by trimming content. - void finalize() { FunctionMap->finalize(); } + void finalize(bool SkipTrim = false) { FunctionMap->finalize(SkipTrim); } /// Merge the stable function map into this one. void merge(const StableFunctionMapRecord &Other) { diff --git a/llvm/include/llvm/CodeGen/GlobalMergeFunctions.h b/llvm/include/llvm/CodeGen/GlobalMergeFunctions.h new file mode 100644 index 0000000..82239e0 --- /dev/null +++ b/llvm/include/llvm/CodeGen/GlobalMergeFunctions.h @@ -0,0 +1,85 @@ +//===------ GlobalMergeFunctions.h - Global merge functions -----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass defines the implementation of a function merging mechanism +// that utilizes a stable function hash to track differences in constants and +// identify potential merge candidates. The process involves two rounds: +// 1. The first round collects stable function hashes and identifies merge +// candidates with matching hashes. It also computes the set of parameters +// that point to different constants during the stable function merge. +// 2. The second round leverages this collected global function information to +// optimistically create a merged function in each module context, ensuring +// correct transformation. +// Similar to the global outliner, this approach uses the linker's deduplication +// (ICF) to fold identical merged functions, thereby reducing the final binary +// size. The work is inspired by the concepts discussed in the following paper: +// https://dl.acm.org/doi/pdf/10.1145/3652032.3657575. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_GLOBALMERGEFUNCTIONS_H +#define LLVM_CODEGEN_GLOBALMERGEFUNCTIONS_H + +#include "llvm/CGData/StableFunctionMap.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Pass.h" + +enum class HashFunctionMode { + Local, + BuildingHashFuncion, + UsingHashFunction, +}; + +namespace llvm { + +// A vector of locations (the pair of (instruction, operand) indices) reachable +// from a parameter. +using ParamLocs = SmallVector<IndexPair, 4>; +// A vector of parameters +using ParamLocsVecTy = SmallVector<ParamLocs, 8>; + +/// GlobalMergeFunc is a ModulePass that implements a function merging mechanism +/// using stable function hashes. It identifies and merges functions with +/// matching hashes across modules to optimize binary size. +class GlobalMergeFunc { + HashFunctionMode MergerMode = HashFunctionMode::Local; + + std::unique_ptr<StableFunctionMap> LocalFunctionMap; + + const ModuleSummaryIndex *Index; + +public: + /// The suffix used to identify the merged function that parameterizes + /// the constant values. Note that the original function, without this suffix, + /// becomes a thunk supplying contexts to the merged function via parameters. + static constexpr const char MergingInstanceSuffix[] = ".Tgm"; + + GlobalMergeFunc(const ModuleSummaryIndex *Index) : Index(Index) {}; + + void initializeMergerMode(const Module &M); + + bool run(Module &M); + + /// Analyze module to create stable function into LocalFunctionMap. + void analyze(Module &M); + + /// Emit LocalFunctionMap into __llvm_merge section. + void emitFunctionMap(Module &M); + + /// Merge functions in the module using the given function map. + bool merge(Module &M, const StableFunctionMap *FunctionMap); +}; + +/// Global function merging pass for new pass manager. +struct GlobalMergeFuncPass : public PassInfoMixin<GlobalMergeFuncPass> { + PreservedAnalyses run(Module &M, AnalysisManager<Module> &); +}; + +} // end namespace llvm +#endif // LLVM_CODEGEN_GLOBALMERGEFUNCTIONS_H diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h index 72054ab..708ff46 100644 --- a/llvm/include/llvm/CodeGen/Passes.h +++ b/llvm/include/llvm/CodeGen/Passes.h @@ -507,6 +507,9 @@ namespace llvm { /// This pass frees the memory occupied by the MachineFunction. FunctionPass *createFreeMachineFunctionPass(); + /// This pass performs merging similar functions globally. + ModulePass *createGlobalMergeFuncPass(); + /// This pass performs outlining on machine instructions directly before /// printing assembly. ModulePass *createMachineOutlinerPass(bool RunOnAllFunctions = true); diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index bf934de..0e16627 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -123,6 +123,7 @@ void initializeGCEmptyBasicBlocksPass(PassRegistry &); void initializeGCMachineCodeAnalysisPass(PassRegistry &); void initializeGCModuleInfoPass(PassRegistry &); void initializeGVNLegacyPassPass(PassRegistry &); +void initializeGlobalMergeFuncPassWrapperPass(PassRegistry &); void initializeGlobalMergePass(PassRegistry &); void initializeGlobalsAAWrapperPassPass(PassRegistry &); void initializeHardwareLoopsLegacyPass(PassRegistry &); diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h index 8d89cc8..54245ca 100644 --- a/llvm/include/llvm/LinkAllPasses.h +++ b/llvm/include/llvm/LinkAllPasses.h @@ -79,6 +79,7 @@ struct ForcePassLinking { (void)llvm::createDomOnlyViewerWrapperPassPass(); (void)llvm::createDomViewerWrapperPassPass(); (void)llvm::createAlwaysInlinerLegacyPass(); + (void)llvm::createGlobalMergeFuncPass(); (void)llvm::createGlobalsAAWrapperPass(); (void)llvm::createInstSimplifyLegacyPass(); (void)llvm::createInstructionCombiningPass(); diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h index 9e95625..3f7d226 100644 --- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h +++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h @@ -35,6 +35,7 @@ #include "llvm/CodeGen/FinalizeISel.h" #include "llvm/CodeGen/GCMetadata.h" #include "llvm/CodeGen/GlobalMerge.h" +#include "llvm/CodeGen/GlobalMergeFunctions.h" #include "llvm/CodeGen/IndirectBrExpand.h" #include "llvm/CodeGen/InterleavedAccess.h" #include "llvm/CodeGen/InterleavedLoadCombine.h" @@ -713,6 +714,9 @@ void CodeGenPassBuilder<Derived, TargetMachineT>::addIRPasses( // Convert conditional moves to conditional jumps when profitable. if (getOptLevel() != CodeGenOptLevel::None && !Opt.DisableSelectOptimize) addPass(SelectOptimizePass(&TM)); + + if (Opt.EnableGlobalMergeFunc) + addPass(GlobalMergeFuncPass()); } /// Turn exception handling constructs into something the code generators can diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def b/llvm/include/llvm/Passes/MachinePassRegistry.def index 9d12a12..3ceb5ca 100644 --- a/llvm/include/llvm/Passes/MachinePassRegistry.def +++ b/llvm/include/llvm/Passes/MachinePassRegistry.def @@ -29,6 +29,7 @@ MODULE_PASS("jmc-instrumenter", JMCInstrumenterPass()) MODULE_PASS("lower-emutls", LowerEmuTLSPass()) MODULE_PASS("pre-isel-intrinsic-lowering", PreISelIntrinsicLoweringPass()) MODULE_PASS("shadow-stack-gc-lowering", ShadowStackGCLoweringPass()) +MODULE_PASS("global-merge-func", GlobalMergeFuncPass()) #undef MODULE_PASS #ifndef FUNCTION_ANALYSIS diff --git a/llvm/include/llvm/Target/CGPassBuilderOption.h b/llvm/include/llvm/Target/CGPassBuilderOption.h index 8ab6d63..29bdb9c 100644 --- a/llvm/include/llvm/Target/CGPassBuilderOption.h +++ b/llvm/include/llvm/Target/CGPassBuilderOption.h @@ -31,6 +31,7 @@ struct CGPassBuilderOption { bool DisableVerify = false; bool EnableImplicitNullChecks = false; bool EnableBlockPlacementStats = false; + bool EnableGlobalMergeFunc = false; bool EnableMachineFunctionSplitter = false; bool MISchedPostRA = false; bool EarlyLiveIntervals = false; diff --git a/llvm/lib/CGData/StableFunctionMap.cpp b/llvm/lib/CGData/StableFunctionMap.cpp index cfef5b2..fe7be0c 100644 --- a/llvm/lib/CGData/StableFunctionMap.cpp +++ b/llvm/lib/CGData/StableFunctionMap.cpp @@ -14,11 +14,43 @@ //===----------------------------------------------------------------------===// #include "llvm/CGData/StableFunctionMap.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" #define DEBUG_TYPE "stable-function-map" using namespace llvm; +static cl::opt<unsigned> + GlobalMergingMinMerges("global-merging-min-merges", + cl::desc("Minimum number of similar functions with " + "the same hash required for merging."), + cl::init(2), cl::Hidden); +static cl::opt<unsigned> GlobalMergingMinInstrs( + "global-merging-min-instrs", + cl::desc("The minimum instruction count required when merging functions."), + cl::init(1), cl::Hidden); +static cl::opt<unsigned> GlobalMergingMaxParams( + "global-merging-max-params", + cl::desc( + "The maximum number of parameters allowed when merging functions."), + cl::init(std::numeric_limits<unsigned>::max()), cl::Hidden); +static cl::opt<unsigned> GlobalMergingParamOverhead( + "global-merging-param-overhead", + cl::desc("The overhead cost associated with each parameter when merging " + "functions."), + cl::init(2), cl::Hidden); +static cl::opt<unsigned> + GlobalMergingCallOverhead("global-merging-call-overhead", + cl::desc("The overhead cost associated with each " + "function call when merging functions."), + cl::init(1), cl::Hidden); +static cl::opt<unsigned> GlobalMergingExtraThreshold( + "global-merging-extra-threshold", + cl::desc("An additional cost threshold that must be exceeded for merging " + "to be considered beneficial."), + cl::init(0), cl::Hidden); + unsigned StableFunctionMap::getIdOrCreateForName(StringRef Name) { auto It = NameToId.find(Name); if (It != NameToId.end()) @@ -117,7 +149,38 @@ static void removeIdenticalIndexPair( SF->IndexOperandHashMap->erase(Pair); } -void StableFunctionMap::finalize() { +static bool isProfitable( + const SmallVector<std::unique_ptr<StableFunctionMap::StableFunctionEntry>> + &SFS) { + unsigned StableFunctionCount = SFS.size(); + if (StableFunctionCount < GlobalMergingMinMerges) + return false; + + unsigned InstCount = SFS[0]->InstCount; + if (InstCount < GlobalMergingMinInstrs) + return false; + + unsigned ParamCount = SFS[0]->IndexOperandHashMap->size(); + if (ParamCount > GlobalMergingMaxParams) + return false; + + unsigned Benefit = InstCount * (StableFunctionCount - 1); + unsigned Cost = + (GlobalMergingParamOverhead * ParamCount + GlobalMergingCallOverhead) * + StableFunctionCount + + GlobalMergingExtraThreshold; + + bool Result = Benefit > Cost; + LLVM_DEBUG(dbgs() << "isProfitable: Hash = " << SFS[0]->Hash << ", " + << "StableFunctionCount = " << StableFunctionCount + << ", InstCount = " << InstCount + << ", ParamCount = " << ParamCount + << ", Benefit = " << Benefit << ", Cost = " << Cost + << ", Result = " << (Result ? "true" : "false") << "\n"); + return Result; +} + +void StableFunctionMap::finalize(bool SkipTrim) { for (auto It = HashToFuncs.begin(); It != HashToFuncs.end(); ++It) { auto &[StableHash, SFS] = *It; @@ -158,9 +221,15 @@ void StableFunctionMap::finalize() { continue; } + if (SkipTrim) + continue; + // Trim the index pair that has the same operand hash across // stable functions. removeIdenticalIndexPair(SFS); + + if (!isProfitable(SFS)) + HashToFuncs.erase(It); } Finalized = true; diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt index 5a17944..5f0496a 100644 --- a/llvm/lib/CodeGen/CMakeLists.txt +++ b/llvm/lib/CodeGen/CMakeLists.txt @@ -71,6 +71,7 @@ add_llvm_component_library(LLVMCodeGen GCMetadataPrinter.cpp GCRootLowering.cpp GlobalMerge.cpp + GlobalMergeFunctions.cpp HardwareLoops.cpp IfConversion.cpp ImplicitNullChecks.cpp diff --git a/llvm/lib/CodeGen/GlobalMergeFunctions.cpp b/llvm/lib/CodeGen/GlobalMergeFunctions.cpp new file mode 100644 index 0000000..2b367ca --- /dev/null +++ b/llvm/lib/CodeGen/GlobalMergeFunctions.cpp @@ -0,0 +1,672 @@ +//===---- GlobalMergeFunctions.cpp - Global merge functions -------*- C++ -===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass implements the global merge function pass. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/GlobalMergeFunctions.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/ModuleSummaryAnalysis.h" +#include "llvm/CGData/CodeGenData.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/StructuralHash.h" +#include "llvm/InitializePasses.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Transforms/Utils/ModuleUtils.h" + +#define DEBUG_TYPE "global-merge-func" + +using namespace llvm; +using namespace llvm::support; + +static cl::opt<bool> DisableCGDataForMerging( + "disable-cgdata-for-merging", cl::Hidden, + cl::desc("Disable codegen data for function merging. Local " + "merging is still enabled within a module."), + cl::init(false)); + +STATISTIC(NumMismatchedFunctionHash, + "Number of mismatched function hash for global merge function"); +STATISTIC(NumMismatchedInstCount, + "Number of mismatched instruction count for global merge function"); +STATISTIC(NumMismatchedConstHash, + "Number of mismatched const hash for global merge function"); +STATISTIC(NumMismatchedModuleId, + "Number of mismatched Module Id for global merge function"); +STATISTIC(NumMergedFunctions, + "Number of functions that are actually merged using function hash"); +STATISTIC(NumAnalyzedModues, "Number of modules that are analyzed"); +STATISTIC(NumAnalyzedFunctions, "Number of functions that are analyzed"); +STATISTIC(NumEligibleFunctions, "Number of functions that are eligible"); + +/// Returns true if the \OpIdx operand of \p CI is the callee operand. +static bool isCalleeOperand(const CallBase *CI, unsigned OpIdx) { + return &CI->getCalledOperandUse() == &CI->getOperandUse(OpIdx); +} + +static bool canParameterizeCallOperand(const CallBase *CI, unsigned OpIdx) { + if (CI->isInlineAsm()) + return false; + Function *Callee = CI->getCalledOperand() + ? dyn_cast_or_null<Function>( + CI->getCalledOperand()->stripPointerCasts()) + : nullptr; + if (Callee) { + if (Callee->isIntrinsic()) + return false; + auto Name = Callee->getName(); + // objc_msgSend stubs must be called, and can't have their address taken. + if (Name.starts_with("objc_msgSend$")) + return false; + // Calls to dtrace probes must generate unique patchpoints. + if (Name.starts_with("__dtrace")) + return false; + } + if (isCalleeOperand(CI, OpIdx) && + CI->getOperandBundle(LLVMContext::OB_ptrauth).has_value()) { + // The operand is the callee and it has already been signed. Ignore this + // because we cannot add another ptrauth bundle to the call instruction. + return false; + } + return true; +} + +/// Returns true if function \p F is eligible for merging. +bool isEligibleFunction(Function *F) { + if (F->isDeclaration()) + return false; + + if (F->hasFnAttribute(llvm::Attribute::NoMerge) || + F->hasFnAttribute(llvm::Attribute::AlwaysInline)) + return false; + + if (F->hasAvailableExternallyLinkage()) + return false; + + if (F->getFunctionType()->isVarArg()) + return false; + + if (F->getCallingConv() == CallingConv::SwiftTail) + return false; + + // If function contains callsites with musttail, if we merge + // it, the merged function will have the musttail callsite, but + // the number of parameters can change, thus the parameter count + // of the callsite will mismatch with the function itself. + for (const BasicBlock &BB : *F) { + for (const Instruction &I : BB) { + const auto *CB = dyn_cast<CallBase>(&I); + if (CB && CB->isMustTailCall()) + return false; + } + } + + return true; +} + +static bool isEligibleInstrunctionForConstantSharing(const Instruction *I) { + switch (I->getOpcode()) { + case Instruction::Load: + case Instruction::Store: + case Instruction::Call: + case Instruction::Invoke: + return true; + default: + return false; + } +} + +static bool ignoreOp(const Instruction *I, unsigned OpIdx) { + assert(OpIdx < I->getNumOperands() && "Invalid operand index"); + + if (!isEligibleInstrunctionForConstantSharing(I)) + return false; + + if (!isa<Constant>(I->getOperand(OpIdx))) + return false; + + if (const auto *CI = dyn_cast<CallBase>(I)) + return canParameterizeCallOperand(CI, OpIdx); + + return true; +} + +static Value *createCast(IRBuilder<> &Builder, Value *V, Type *DestTy) { + Type *SrcTy = V->getType(); + if (SrcTy->isStructTy()) { + assert(DestTy->isStructTy()); + assert(SrcTy->getStructNumElements() == DestTy->getStructNumElements()); + Value *Result = PoisonValue::get(DestTy); + for (unsigned int I = 0, E = SrcTy->getStructNumElements(); I < E; ++I) { + Value *Element = + createCast(Builder, Builder.CreateExtractValue(V, ArrayRef(I)), + DestTy->getStructElementType(I)); + + Result = Builder.CreateInsertValue(Result, Element, ArrayRef(I)); + } + return Result; + } + assert(!DestTy->isStructTy()); + if (auto *SrcAT = dyn_cast<ArrayType>(SrcTy)) { + auto *DestAT = dyn_cast<ArrayType>(DestTy); + assert(DestAT); + assert(SrcAT->getNumElements() == DestAT->getNumElements()); + Value *Result = UndefValue::get(DestTy); + for (unsigned int I = 0, E = SrcAT->getNumElements(); I < E; ++I) { + Value *Element = + createCast(Builder, Builder.CreateExtractValue(V, ArrayRef(I)), + DestAT->getElementType()); + + Result = Builder.CreateInsertValue(Result, Element, ArrayRef(I)); + } + return Result; + } + assert(!DestTy->isArrayTy()); + if (SrcTy->isIntegerTy() && DestTy->isPointerTy()) + return Builder.CreateIntToPtr(V, DestTy); + if (SrcTy->isPointerTy() && DestTy->isIntegerTy()) + return Builder.CreatePtrToInt(V, DestTy); + return Builder.CreateBitCast(V, DestTy); +} + +void GlobalMergeFunc::analyze(Module &M) { + ++NumAnalyzedModues; + for (Function &Func : M) { + ++NumAnalyzedFunctions; + if (isEligibleFunction(&Func)) { + ++NumEligibleFunctions; + + auto FI = llvm::StructuralHashWithDifferences(Func, ignoreOp); + + // Convert the operand map to a vector for a serialization-friendly + // format. + IndexOperandHashVecType IndexOperandHashes; + for (auto &Pair : *FI.IndexOperandHashMap) + IndexOperandHashes.emplace_back(Pair); + + StableFunction SF(FI.FunctionHash, get_stable_name(Func.getName()).str(), + M.getModuleIdentifier(), FI.IndexInstruction->size(), + std::move(IndexOperandHashes)); + + LocalFunctionMap->insert(SF); + } + } +} + +/// Tuple to hold function info to process merging. +struct FuncMergeInfo { + StableFunctionMap::StableFunctionEntry *SF; + Function *F; + std::unique_ptr<IndexInstrMap> IndexInstruction; + FuncMergeInfo(StableFunctionMap::StableFunctionEntry *SF, Function *F, + std::unique_ptr<IndexInstrMap> IndexInstruction) + : SF(SF), F(F), IndexInstruction(std::move(IndexInstruction)) {} +}; + +// Given the func info, and the parameterized locations, create and return +// a new merged function by replacing the original constants with the new +// parameters. +static Function *createMergedFunction(FuncMergeInfo &FI, + ArrayRef<Type *> ConstParamTypes, + const ParamLocsVecTy &ParamLocsVec) { + // Synthesize a new merged function name by appending ".Tgm" to the root + // function's name. + auto *MergedFunc = FI.F; + std::string NewFunctionName = + MergedFunc->getName().str() + GlobalMergeFunc::MergingInstanceSuffix; + auto *M = MergedFunc->getParent(); + assert(!M->getFunction(NewFunctionName)); + + FunctionType *OrigTy = MergedFunc->getFunctionType(); + // Get the original params' types. + SmallVector<Type *> ParamTypes(OrigTy->param_begin(), OrigTy->param_end()); + // Append const parameter types that are passed in. + ParamTypes.append(ConstParamTypes.begin(), ConstParamTypes.end()); + FunctionType *FuncType = FunctionType::get(OrigTy->getReturnType(), + ParamTypes, /*isVarArg=*/false); + + // Declare a new function + Function *NewFunction = + Function::Create(FuncType, MergedFunc->getLinkage(), NewFunctionName); + if (auto *SP = MergedFunc->getSubprogram()) + NewFunction->setSubprogram(SP); + NewFunction->copyAttributesFrom(MergedFunc); + NewFunction->setDLLStorageClass(GlobalValue::DefaultStorageClass); + + NewFunction->setLinkage(GlobalValue::InternalLinkage); + NewFunction->addFnAttr(Attribute::NoInline); + + // Add the new function before the root function. + M->getFunctionList().insert(MergedFunc->getIterator(), NewFunction); + + // Move the body of MergedFunc into the NewFunction. + NewFunction->splice(NewFunction->begin(), MergedFunc); + + // Update the original args by the new args. + auto NewArgIter = NewFunction->arg_begin(); + for (Argument &OrigArg : MergedFunc->args()) { + Argument &NewArg = *NewArgIter++; + OrigArg.replaceAllUsesWith(&NewArg); + } + + // Replace the original Constants by the new args. + unsigned NumOrigArgs = MergedFunc->arg_size(); + for (unsigned ParamIdx = 0; ParamIdx < ParamLocsVec.size(); ++ParamIdx) { + Argument *NewArg = NewFunction->getArg(NumOrigArgs + ParamIdx); + for (auto [InstIndex, OpndIndex] : ParamLocsVec[ParamIdx]) { + auto *Inst = FI.IndexInstruction->lookup(InstIndex); + auto *OrigC = Inst->getOperand(OpndIndex); + if (OrigC->getType() != NewArg->getType()) { + IRBuilder<> Builder(Inst->getParent(), Inst->getIterator()); + Inst->setOperand(OpndIndex, + createCast(Builder, NewArg, OrigC->getType())); + } else { + Inst->setOperand(OpndIndex, NewArg); + } + } + } + + return NewFunction; +} + +// Given the original function (Thunk) and the merged function (ToFunc), create +// a thunk to the merged function. +static void createThunk(FuncMergeInfo &FI, ArrayRef<Constant *> Params, + Function *ToFunc) { + auto *Thunk = FI.F; + + assert(Thunk->arg_size() + Params.size() == + ToFunc->getFunctionType()->getNumParams()); + Thunk->dropAllReferences(); + + BasicBlock *BB = BasicBlock::Create(Thunk->getContext(), "", Thunk); + IRBuilder<> Builder(BB); + + SmallVector<Value *> Args; + unsigned ParamIdx = 0; + FunctionType *ToFuncTy = ToFunc->getFunctionType(); + + // Add arguments which are passed through Thunk. + for (Argument &AI : Thunk->args()) { + Args.push_back(createCast(Builder, &AI, ToFuncTy->getParamType(ParamIdx))); + ++ParamIdx; + } + + // Add new arguments defined by Params. + for (auto *Param : Params) { + assert(ParamIdx < ToFuncTy->getNumParams()); + Args.push_back( + createCast(Builder, Param, ToFuncTy->getParamType(ParamIdx))); + ++ParamIdx; + } + + CallInst *CI = Builder.CreateCall(ToFunc, Args); + bool isSwiftTailCall = ToFunc->getCallingConv() == CallingConv::SwiftTail && + Thunk->getCallingConv() == CallingConv::SwiftTail; + CI->setTailCallKind(isSwiftTailCall ? llvm::CallInst::TCK_MustTail + : llvm::CallInst::TCK_Tail); + CI->setCallingConv(ToFunc->getCallingConv()); + CI->setAttributes(ToFunc->getAttributes()); + if (Thunk->getReturnType()->isVoidTy()) + Builder.CreateRetVoid(); + else + Builder.CreateRet(createCast(Builder, CI, Thunk->getReturnType())); +} + +// Check if the old merged/optimized IndexOperandHashMap is compatible with +// the current IndexOperandHashMap. An operand hash may not be stable across +// different builds due to varying modules combined. To address this, we relax +// the hash check condition by comparing Const hash patterns instead of absolute +// hash values. For example, let's assume we have three Consts located at idx1, +// idx3, and idx6, where their corresponding hashes are hash1, hash2, and hash1 +// in the old merged map below: +// Old (Merged): [(idx1, hash1), (idx3, hash2), (idx6, hash1)] +// Current: [(idx1, hash1'), (idx3, hash2'), (idx6, hash1')] +// If the current function also has three Consts in the same locations, +// with hash sequences hash1', hash2', and hash1' where the first and third +// are the same as the old hash sequences, we consider them matched. +static bool checkConstHashCompatible( + const DenseMap<IndexPair, stable_hash> &OldInstOpndIndexToConstHash, + const DenseMap<IndexPair, stable_hash> &CurrInstOpndIndexToConstHash) { + + DenseMap<stable_hash, stable_hash> OldHashToCurrHash; + for (const auto &[Index, OldHash] : OldInstOpndIndexToConstHash) { + auto It = CurrInstOpndIndexToConstHash.find(Index); + if (It == CurrInstOpndIndexToConstHash.end()) + return false; + + auto CurrHash = It->second; + auto J = OldHashToCurrHash.find(OldHash); + if (J == OldHashToCurrHash.end()) + OldHashToCurrHash.insert({OldHash, CurrHash}); + else if (J->second != CurrHash) + return false; + } + + return true; +} + +// Validate the locations pointed by a param has the same hash and Constant. +static bool +checkConstLocationCompatible(const StableFunctionMap::StableFunctionEntry &SF, + const IndexInstrMap &IndexInstruction, + const ParamLocsVecTy &ParamLocsVec) { + for (auto &ParamLocs : ParamLocsVec) { + std::optional<stable_hash> OldHash; + std::optional<Constant *> OldConst; + for (auto &Loc : ParamLocs) { + assert(SF.IndexOperandHashMap->count(Loc)); + auto CurrHash = SF.IndexOperandHashMap.get()->at(Loc); + auto [InstIndex, OpndIndex] = Loc; + assert(InstIndex < IndexInstruction.size()); + const auto *Inst = IndexInstruction.lookup(InstIndex); + auto *CurrConst = cast<Constant>(Inst->getOperand(OpndIndex)); + if (!OldHash) { + OldHash = CurrHash; + OldConst = CurrConst; + } else if (CurrConst != *OldConst || CurrHash != *OldHash) { + return false; + } + } + } + return true; +} + +static ParamLocsVecTy computeParamInfo( + const SmallVector<std::unique_ptr<StableFunctionMap::StableFunctionEntry>> + &SFS) { + std::map<std::vector<stable_hash>, ParamLocs> HashSeqToLocs; + auto &RSF = *SFS[0]; + unsigned StableFunctionCount = SFS.size(); + + for (auto &[IndexPair, Hash] : *RSF.IndexOperandHashMap) { + // Const hash sequence across stable functions. + // We will allocate a parameter per unique hash squence. + // can't use SmallVector as key + std::vector<stable_hash> ConstHashSeq; + ConstHashSeq.push_back(Hash); + bool Identical = true; + for (unsigned J = 1; J < StableFunctionCount; ++J) { + auto &SF = SFS[J]; + auto SHash = SF->IndexOperandHashMap->at(IndexPair); + if (Hash != SHash) + Identical = false; + ConstHashSeq.push_back(SHash); + } + + if (Identical) + continue; + + // For each unique Const hash sequence (parameter), add the locations. + HashSeqToLocs[ConstHashSeq].push_back(IndexPair); + } + + ParamLocsVecTy ParamLocsVec; + for (auto &[HashSeq, Locs] : HashSeqToLocs) { + ParamLocsVec.push_back(std::move(Locs)); + llvm::sort(ParamLocsVec, [&](const ParamLocs &L, const ParamLocs &R) { + return L[0] < R[0]; + }); + } + return ParamLocsVec; +} + +bool GlobalMergeFunc::merge(Module &M, const StableFunctionMap *FunctionMap) { + bool Changed = false; + + // Build a map from stable function name to function. + StringMap<Function *> StableNameToFuncMap; + for (auto &F : M) + StableNameToFuncMap[get_stable_name(F.getName())] = &F; + // Track merged functions + DenseSet<Function *> MergedFunctions; + + auto ModId = M.getModuleIdentifier(); + for (auto &[Hash, SFS] : FunctionMap->getFunctionMap()) { + // Parameter locations based on the unique hash sequences + // across the candidates. + std::optional<ParamLocsVecTy> ParamLocsVec; + Function *MergedFunc = nullptr; + std::string MergedModId; + SmallVector<FuncMergeInfo> FuncMergeInfos; + for (auto &SF : SFS) { + // Get the function from the stable name. + auto I = StableNameToFuncMap.find( + *FunctionMap->getNameForId(SF->FunctionNameId)); + if (I == StableNameToFuncMap.end()) + continue; + Function *F = I->second; + assert(F); + // Skip if the function has been merged before. + if (MergedFunctions.count(F)) + continue; + // Consider the function if it is eligible for merging. + if (!isEligibleFunction(F)) + continue; + + auto FI = llvm::StructuralHashWithDifferences(*F, ignoreOp); + uint64_t FuncHash = FI.FunctionHash; + if (Hash != FuncHash) { + ++NumMismatchedFunctionHash; + continue; + } + + if (SF->InstCount != FI.IndexInstruction->size()) { + ++NumMismatchedInstCount; + continue; + } + bool HasValidSharedConst = true; + for (auto &[Index, Hash] : *SF->IndexOperandHashMap) { + auto [InstIndex, OpndIndex] = Index; + assert(InstIndex < FI.IndexInstruction->size()); + auto *Inst = FI.IndexInstruction->lookup(InstIndex); + if (!ignoreOp(Inst, OpndIndex)) { + HasValidSharedConst = false; + break; + } + } + if (!HasValidSharedConst) { + ++NumMismatchedConstHash; + continue; + } + if (!checkConstHashCompatible(*SF->IndexOperandHashMap, + *FI.IndexOperandHashMap)) { + ++NumMismatchedConstHash; + continue; + } + if (!ParamLocsVec.has_value()) { + ParamLocsVec = computeParamInfo(SFS); + LLVM_DEBUG(dbgs() << "[GlobalMergeFunc] Merging hash: " << Hash + << " with Params " << ParamLocsVec->size() << "\n"); + } + if (!checkConstLocationCompatible(*SF, *FI.IndexInstruction, + *ParamLocsVec)) { + ++NumMismatchedConstHash; + continue; + } + + if (MergedFunc) { + // Check if the matched functions fall into the same (first) module. + // This module check is not strictly necessary as the functions can move + // around. We just want to avoid merging functions from different + // modules than the first one in the function map, as they may not end + // up with being ICFed by the linker. + if (MergedModId != *FunctionMap->getNameForId(SF->ModuleNameId)) { + ++NumMismatchedModuleId; + continue; + } + } else { + MergedFunc = F; + MergedModId = *FunctionMap->getNameForId(SF->ModuleNameId); + } + + FuncMergeInfos.emplace_back(SF.get(), F, std::move(FI.IndexInstruction)); + MergedFunctions.insert(F); + } + unsigned FuncMergeInfoSize = FuncMergeInfos.size(); + if (FuncMergeInfoSize == 0) + continue; + + LLVM_DEBUG(dbgs() << "[GlobalMergeFunc] Merging function count " + << FuncMergeInfoSize << " in " << ModId << "\n"); + + for (auto &FMI : FuncMergeInfos) { + Changed = true; + + // We've already validated all locations of constant operands pointed by + // the parameters. Populate parameters pointing to the original constants. + SmallVector<Constant *> Params; + SmallVector<Type *> ParamTypes; + for (auto &ParamLocs : *ParamLocsVec) { + assert(!ParamLocs.empty()); + auto &[InstIndex, OpndIndex] = ParamLocs[0]; + auto *Inst = FMI.IndexInstruction->lookup(InstIndex); + auto *Opnd = cast<Constant>(Inst->getOperand(OpndIndex)); + Params.push_back(Opnd); + ParamTypes.push_back(Opnd->getType()); + } + + // Create a merged function derived from the current function. + Function *MergedFunc = + createMergedFunction(FMI, ParamTypes, *ParamLocsVec); + + LLVM_DEBUG({ + dbgs() << "[GlobalMergeFunc] Merged function (hash:" << FMI.SF->Hash + << ") " << MergedFunc->getName() << " generated from " + << FMI.F->getName() << ":\n"; + MergedFunc->dump(); + }); + + // Transform the current function into a thunk that calls the merged + // function. + createThunk(FMI, Params, MergedFunc); + LLVM_DEBUG({ + dbgs() << "[GlobalMergeFunc] Thunk generated: \n"; + FMI.F->dump(); + }); + ++NumMergedFunctions; + } + } + + return Changed; +} + +void GlobalMergeFunc::initializeMergerMode(const Module &M) { + // Initialize the local function map regardless of the merger mode. + LocalFunctionMap = std::make_unique<StableFunctionMap>(); + + // Disable codegen data for merging. The local merge is still enabled. + if (DisableCGDataForMerging) + return; + + // (Full)LTO module does not have functions added to the index. + // In this case, we run a local merger without using codegen data. + if (Index && !Index->hasExportedFunctions(M)) + return; + + if (cgdata::emitCGData()) + MergerMode = HashFunctionMode::BuildingHashFuncion; + else if (cgdata::hasStableFunctionMap()) + MergerMode = HashFunctionMode::UsingHashFunction; +} + +void GlobalMergeFunc::emitFunctionMap(Module &M) { + LLVM_DEBUG(dbgs() << "Emit function map. Size: " << LocalFunctionMap->size() + << "\n"); + // No need to emit the function map if it is empty. + if (LocalFunctionMap->empty()) + return; + SmallVector<char> Buf; + raw_svector_ostream OS(Buf); + + StableFunctionMapRecord::serialize(OS, LocalFunctionMap.get()); + + std::unique_ptr<MemoryBuffer> Buffer = MemoryBuffer::getMemBuffer( + OS.str(), "in-memory stable function map", false); + + Triple TT(M.getTargetTriple()); + embedBufferInModule(M, *Buffer.get(), + getCodeGenDataSectionName(CG_merge, TT.getObjectFormat()), + Align(4)); +} + +bool GlobalMergeFunc::run(Module &M) { + initializeMergerMode(M); + + const StableFunctionMap *FuncMap; + if (MergerMode == HashFunctionMode::UsingHashFunction) { + // Use the prior CG data to optimistically create global merge candidates. + FuncMap = cgdata::getStableFunctionMap(); + } else { + analyze(M); + // Emit the local function map to the custom section, __llvm_merge before + // finalizing it. + if (MergerMode == HashFunctionMode::BuildingHashFuncion) + emitFunctionMap(M); + LocalFunctionMap->finalize(); + FuncMap = LocalFunctionMap.get(); + } + + return merge(M, FuncMap); +} + +namespace { + +class GlobalMergeFuncPassWrapper : public ModulePass { + +public: + static char ID; + + GlobalMergeFuncPassWrapper(); + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addUsedIfAvailable<ImmutableModuleSummaryIndexWrapperPass>(); + AU.setPreservesAll(); + ModulePass::getAnalysisUsage(AU); + } + + StringRef getPassName() const override { return "Global Merge Functions"; } + + bool runOnModule(Module &M) override; +}; + +} // namespace + +char GlobalMergeFuncPassWrapper::ID = 0; +INITIALIZE_PASS_BEGIN(GlobalMergeFuncPassWrapper, "global-merge-func", + "Global merge function pass", false, false) +INITIALIZE_PASS_END(GlobalMergeFuncPassWrapper, "global-merge-func", + "Global merge function pass", false, false) + +namespace llvm { +ModulePass *createGlobalMergeFuncPass() { + return new GlobalMergeFuncPassWrapper(); +} +} // namespace llvm + +GlobalMergeFuncPassWrapper::GlobalMergeFuncPassWrapper() : ModulePass(ID) { + initializeGlobalMergeFuncPassWrapperPass( + *llvm::PassRegistry::getPassRegistry()); +} + +bool GlobalMergeFuncPassWrapper::runOnModule(Module &M) { + const ModuleSummaryIndex *Index = nullptr; + if (auto *IndexWrapperPass = + getAnalysisIfAvailable<ImmutableModuleSummaryIndexWrapperPass>()) + Index = IndexWrapperPass->getIndex(); + + return GlobalMergeFunc(Index).run(M); +} + +PreservedAnalyses GlobalMergeFuncPass::run(Module &M, + AnalysisManager<Module> &AM) { + ModuleSummaryIndex *Index = &(AM.getResult<ModuleSummaryIndexAnalysis>(M)); + bool Changed = GlobalMergeFunc(Index).run(M); + return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all(); +} diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp index 782ed60..5f3fe12 100644 --- a/llvm/lib/CodeGen/TargetPassConfig.cpp +++ b/llvm/lib/CodeGen/TargetPassConfig.cpp @@ -141,6 +141,9 @@ static cl::opt<RunOutliner> EnableMachineOutliner( "Disable all outlining"), // Sentinel value for unspecified option. clEnumValN(RunOutliner::AlwaysOutline, "", ""))); +static cl::opt<bool> EnableGlobalMergeFunc( + "enable-global-merge-func", cl::Hidden, + cl::desc("Enable global merge functions that are based on hash function")); // Disable the pass to fix unwind information. Whether the pass is included in // the pipeline is controlled via the target options, this option serves as // manual override. @@ -489,6 +492,7 @@ CGPassBuilderOption llvm::getCGPassBuilderOption() { SET_BOOLEAN_OPTION(EarlyLiveIntervals) SET_BOOLEAN_OPTION(EnableBlockPlacementStats) + SET_BOOLEAN_OPTION(EnableGlobalMergeFunc) SET_BOOLEAN_OPTION(EnableImplicitNullChecks) SET_BOOLEAN_OPTION(EnableMachineOutliner) SET_BOOLEAN_OPTION(MISchedPostRA) @@ -884,6 +888,9 @@ void TargetPassConfig::addIRPasses() { // Convert conditional moves to conditional jumps when profitable. if (getOptLevel() != CodeGenOptLevel::None && !DisableSelectOptimize) addPass(createSelectOptimizePass()); + + if (EnableGlobalMergeFunc) + addPass(createGlobalMergeFuncPass()); } /// Turn exception handling constructs into something the code generators can diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index 47f012e..1bdf88e 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -91,6 +91,7 @@ #include "llvm/CodeGen/FinalizeISel.h" #include "llvm/CodeGen/GCMetadata.h" #include "llvm/CodeGen/GlobalMerge.h" +#include "llvm/CodeGen/GlobalMergeFunctions.h" #include "llvm/CodeGen/HardwareLoops.h" #include "llvm/CodeGen/IndirectBrExpand.h" #include "llvm/CodeGen/InterleavedAccess.h" diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp index f016987..5a7c327 100644 --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -24,6 +24,7 @@ #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/ScopedNoAliasAA.h" #include "llvm/Analysis/TypeBasedAliasAnalysis.h" +#include "llvm/CodeGen/GlobalMergeFunctions.h" #include "llvm/IR/PassManager.h" #include "llvm/Pass.h" #include "llvm/Passes/OptimizationLevel.h" diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index ca3fea4..21d53e5 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -70,6 +70,7 @@ MODULE_PASS("extract-blocks", BlockExtractorPass({}, false)) MODULE_PASS("expand-variadics", ExpandVariadicsPass(ExpandVariadicsMode::Disable)) MODULE_PASS("forceattrs", ForceFunctionAttrsPass()) MODULE_PASS("function-import", FunctionImportPass()) +MODULE_PASS("global-merge-func", GlobalMergeFuncPass()) MODULE_PASS("globalopt", GlobalOptPass()) MODULE_PASS("globalsplit", GlobalSplitPass()) MODULE_PASS("hipstdpar-interpose-alloc", HipStdParAllocationInterpositionPass()) diff --git a/llvm/test/ThinLTO/AArch64/cgdata-merge-local.ll b/llvm/test/ThinLTO/AArch64/cgdata-merge-local.ll new file mode 100644 index 0000000..660ffe6 --- /dev/null +++ b/llvm/test/ThinLTO/AArch64/cgdata-merge-local.ll @@ -0,0 +1,77 @@ +; This test checks if two similar functions, f1 and f2, can be merged locally within a single module +; while parameterizing a difference in their global variables, g1 and g2. +; To achieve this, we create two instances of the global merging function, f1.Tgm and f2.Tgm, +; which are tail-called from thunks f1 and f2 respectively. +; These identical functions, f1.Tgm and f2.Tgm, will be folded by the linker via Identical Code Folding (IFC). + +; RUN: opt -S --passes=global-merge-func %s | FileCheck %s + +; A merging instance is created with additional parameter. +; CHECK: define internal i32 @f1.Tgm(i32 %0, ptr %1) +; CHECK-NEXT: entry: +; CHECK-NEXT: %idxprom = sext i32 %0 to i64 +; CHECK-NEXT: %arrayidx = getelementptr inbounds [0 x i32], ptr @g, i64 0, i64 %idxprom +; CHECK-NEXT: %2 = load i32, ptr %arrayidx, align 4 +; CHECK-NEXT: %3 = load volatile i32, ptr %1, align 4 +; CHECK-NEXT: %mul = mul nsw i32 %3, %2 +; CHECK-NEXT: %add = add nsw i32 %mul, 1 +; CHECK-NEXT: ret i32 %add + +; The original function becomes a thunk passing g1. +; CHECK: define i32 @f1(i32 %a) +; CHECK-NEXT: %1 = tail call i32 @f1.Tgm(i32 %a, ptr @g1) +; CHECK-NEXT: ret i32 %1 + +; A same sequence is produced for f2.Tgm. +; CHECK: define internal i32 @f2.Tgm(i32 %0, ptr %1) +; CHECK-NEXT: entry: +; CHECK-NEXT: %idxprom = sext i32 %0 to i64 +; CHECK-NEXT: %arrayidx = getelementptr inbounds [0 x i32], ptr @g, i64 0, i64 %idxprom +; CHECK-NEXT: %2 = load i32, ptr %arrayidx, align 4 +; CHECK-NEXT: %3 = load volatile i32, ptr %1, align 4 +; CHECK-NEXT: %mul = mul nsw i32 %3, %2 +; CHECK-NEXT: %add = add nsw i32 %mul, 1 +; CHECK-NEXT: ret i32 %add + +; The original function becomes a thunk passing g2. +; CHECK: define i32 @f2(i32 %a) +; CHECK-NEXT: %1 = tail call i32 @f2.Tgm(i32 %a, ptr @g2) +; CHECK-NEXT: ret i32 %1 + +; RUN: llc -enable-global-merge-func=true < %s | FileCheck %s --check-prefix=MERGE +; RUN: llc -enable-global-merge-func=false < %s | FileCheck %s --check-prefix=NOMERGE + +; MERGE: _f1.Tgm +; MERGE: _f2.Tgm + +; NOMERGE-NOT: _f1.Tgm +; NOMERGE-NOT: _f2.Tgm + +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" +target triple = "arm64-unknown-ios12.0.0" + +@g = external local_unnamed_addr global [0 x i32], align 4 +@g1 = external global i32, align 4 +@g2 = external global i32, align 4 + +define i32 @f1(i32 %a) { +entry: + %idxprom = sext i32 %a to i64 + %arrayidx = getelementptr inbounds [0 x i32], [0 x i32]* @g, i64 0, i64 %idxprom + %0 = load i32, i32* %arrayidx, align 4 + %1 = load volatile i32, i32* @g1, align 4 + %mul = mul nsw i32 %1, %0 + %add = add nsw i32 %mul, 1 + ret i32 %add +} + +define i32 @f2(i32 %a) { +entry: + %idxprom = sext i32 %a to i64 + %arrayidx = getelementptr inbounds [0 x i32], [0 x i32]* @g, i64 0, i64 %idxprom + %0 = load i32, i32* %arrayidx, align 4 + %1 = load volatile i32, i32* @g2, align 4 + %mul = mul nsw i32 %1, %0 + %add = add nsw i32 %mul, 1 + ret i32 %add +} diff --git a/llvm/test/ThinLTO/AArch64/cgdata-merge-read.ll b/llvm/test/ThinLTO/AArch64/cgdata-merge-read.ll new file mode 100644 index 0000000..da756e7 --- /dev/null +++ b/llvm/test/ThinLTO/AArch64/cgdata-merge-read.ll @@ -0,0 +1,82 @@ +; This test demonstrates how similar functions are handled during global outlining. +; Currently, we do not attempt to share an merged function for identical sequences. +; Instead, each merging instance is created uniquely. + +; RUN: rm -rf %t; split-file %s %t + +; RUN: opt -module-summary -module-hash %t/foo.ll -o %t-foo.bc +; RUN: opt -module-summary -module-hash %t/goo.ll -o %t-goo.bc + +; First, run with -codegen-data-generate=true to generate the cgdata in the object files. +; Using llvm-cgdata, merge the cg data. +; RUN: llvm-lto2 run -enable-global-merge-func=true -codegen-data-generate=true %t-foo.bc %t-goo.bc -o %tout-write \ +; RUN: -r %t-foo.bc,_f1,px \ +; RUN: -r %t-goo.bc,_f2,px \ +; RUN: -r %t-foo.bc,_g,l -r %t-foo.bc,_g1,l -r %t-foo.bc,_g2,l \ +; RUN: -r %t-goo.bc,_g,l -r %t-goo.bc,_g1,l -r %t-goo.bc,_g2,l +; RUN: llvm-cgdata --merge -o %tout.cgdata %tout-write.1 %tout-write.2 + +; Now run with -codegen-data-use-path=%tout.cgdata to optimize the binary. +; Each module has its own merging instance as it is matched against the merged cgdata. +; RUN: llvm-lto2 run -enable-global-merge-func=true \ +; RUN: -codegen-data-use-path=%tout.cgdata \ +; RUN: %t-foo.bc %t-goo.bc -o %tout-read \ +; RUN: -r %t-foo.bc,_f1,px \ +; RUN: -r %t-goo.bc,_f2,px \ +; RUN: -r %t-foo.bc,_g,l -r %t-foo.bc,_g1,l -r %t-foo.bc,_g2,l \ +; RUN: -r %t-goo.bc,_g,l -r %t-goo.bc,_g1,l -r %t-goo.bc,_g2,l +; RUN: llvm-nm %tout-read.1 | FileCheck %s --check-prefix=READ1 +; RUN: llvm-nm %tout-read.2 | FileCheck %s --check-prefix=READ2 +; RUN: llvm-objdump -d %tout-read.1 | FileCheck %s --check-prefix=THUNK1 +; RUN: llvm-objdump -d %tout-read.2 | FileCheck %s --check-prefix=THUNK2 + +; READ1: _f1.Tgm +; READ2: _f2.Tgm + +; THUNK1: <_f1>: +; THUNK1-NEXT: adrp x1, +; THUNK1-NEXT: ldr x1, [x1] +; THUNK1-NEXT: b + +; THUNK2: <_f2>: +; THUNK2-NEXT: adrp x1, +; THUNK2-NEXT: ldr x1, [x1] +; THUNK2-NEXT: b + +;--- foo.ll +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" +target triple = "arm64-unknown-ios12.0.0" + +@g = external local_unnamed_addr global [0 x i32], align 4 +@g1 = external global i32, align 4 +@g2 = external global i32, align 4 + +define i32 @f1(i32 %a) { +entry: + %idxprom = sext i32 %a to i64 + %arrayidx = getelementptr inbounds [0 x i32], [0 x i32]* @g, i64 0, i64 %idxprom + %0 = load i32, i32* %arrayidx, align 4 + %1 = load volatile i32, i32* @g1, align 4 + %mul = mul nsw i32 %1, %0 + %add = add nsw i32 %mul, 1 + ret i32 %add +} + +;--- goo.ll +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" +target triple = "arm64-unknown-ios12.0.0" + +@g = external local_unnamed_addr global [0 x i32], align 4 +@g1 = external global i32, align 4 +@g2 = external global i32, align 4 + +define i32 @f2(i32 %a) { +entry: + %idxprom = sext i32 %a to i64 + %arrayidx = getelementptr inbounds [0 x i32], [0 x i32]* @g, i64 0, i64 %idxprom + %0 = load i32, i32* %arrayidx, align 4 + %1 = load volatile i32, i32* @g2, align 4 + %mul = mul nsw i32 %1, %0 + %add = add nsw i32 %mul, 1 + ret i32 %add +} diff --git a/llvm/test/ThinLTO/AArch64/cgdata-merge-two-rounds.ll b/llvm/test/ThinLTO/AArch64/cgdata-merge-two-rounds.ll new file mode 100644 index 0000000..06880e3 --- /dev/null +++ b/llvm/test/ThinLTO/AArch64/cgdata-merge-two-rounds.ll @@ -0,0 +1,68 @@ +; TODO: This test checks if the how similar functions are handled during global outlining +; by repeating the codegen via -codegen-data-thinlto-two-rounds=true. + +; RUN: rm -rf %t; split-file %s %t + +; RUN: opt -module-summary -module-hash %t/foo.ll -o %t-foo.bc +; RUN: opt -module-summary -module-hash %t/goo.ll -o %t-goo.bc + +; RUN: llvm-lto2 run -enable-global-merge-func=true -codegen-data-thinlto-two-rounds=true %t-foo.bc %t-goo.bc -o %tout \ +; RUN: -r %t-foo.bc,_f1,px \ +; RUN: -r %t-goo.bc,_f2,px \ +; RUN: -r %t-foo.bc,_g,l -r %t-foo.bc,_g1,l -r %t-foo.bc,_g2,l \ +; RUN: -r %t-goo.bc,_g,l -r %t-goo.bc,_g1,l -r %t-goo.bc,_g2,l +; RUN: llvm-nm %tout.1 | FileCheck %s --check-prefix=OUT1 +; RUN: llvm-nm %tout.2 | FileCheck %s --check-prefix=OUT2 +; RUN: llvm-objdump -d %tout.1 | FileCheck %s --check-prefix=THUNK1 +; RUN: llvm-objdump -d %tout.2 | FileCheck %s --check-prefix=THUNK2 + +; OUT1: _f1.Tgm +; OUT2: _f2.Tgm + +; THUNK1: <_f1>: +; THUNK1-NEXT: adrp x1, +; THUNK1-NEXT: ldr x1, [x1] +; THUNK1-NEXT: b + +; THUNK2: <_f2>: +; THUNK2-NEXT: adrp x1, +; THUNK2-NEXT: ldr x1, [x1] +; THUNK2-NEXT: b + +;--- foo.ll +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" +target triple = "arm64-unknown-ios12.0.0" + +@g = external local_unnamed_addr global [0 x i32], align 4 +@g1 = external global i32, align 4 +@g2 = external global i32, align 4 + +define i32 @f1(i32 %a) { +entry: + %idxprom = sext i32 %a to i64 + %arrayidx = getelementptr inbounds [0 x i32], [0 x i32]* @g, i64 0, i64 %idxprom + %0 = load i32, i32* %arrayidx, align 4 + %1 = load volatile i32, i32* @g1, align 4 + %mul = mul nsw i32 %1, %0 + %add = add nsw i32 %mul, 1 + ret i32 %add +} + +;--- goo.ll +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" +target triple = "arm64-unknown-ios12.0.0" + +@g = external local_unnamed_addr global [0 x i32], align 4 +@g1 = external global i32, align 4 +@g2 = external global i32, align 4 + +define i32 @f2(i32 %a) { +entry: + %idxprom = sext i32 %a to i64 + %arrayidx = getelementptr inbounds [0 x i32], [0 x i32]* @g, i64 0, i64 %idxprom + %0 = load i32, i32* %arrayidx, align 4 + %1 = load volatile i32, i32* @g2, align 4 + %mul = mul nsw i32 %1, %0 + %add = add nsw i32 %mul, 1 + ret i32 %add +} diff --git a/llvm/test/ThinLTO/AArch64/cgdata-merge-write.ll b/llvm/test/ThinLTO/AArch64/cgdata-merge-write.ll new file mode 100644 index 0000000..a4022eb --- /dev/null +++ b/llvm/test/ThinLTO/AArch64/cgdata-merge-write.ll @@ -0,0 +1,97 @@ +; This test verifies whether a stable function is encoded into the __llvm_merge section +; when the -codegen-data-generate flag is used under -enable-global-merge-func=true. + +; RUN: rm -rf %t; split-file %s %t + +; RUN: opt -module-summary -module-hash %t/foo.ll -o %t-foo.bc +; RUN: opt -module-summary -module-hash %t/goo.ll -o %t-goo.bc + +; RUN: llvm-lto2 run -enable-global-merge-func=true -codegen-data-generate=false %t-foo.bc %t-goo.bc -o %tout-nowrite \ +; RUN: -r %t-foo.bc,_f1,px \ +; RUN: -r %t-goo.bc,_f2,px \ +; RUN: -r %t-foo.bc,_g,l -r %t-foo.bc,_g1,l -r %t-foo.bc,_g2,l \ +; RUN: -r %t-goo.bc,_g,l -r %t-goo.bc,_g1,l -r %t-goo.bc,_g2,l +; RUN: llvm-nm %tout-nowrite.1 | FileCheck %s --check-prefix=NOWRITE +; RUN: llvm-nm %tout-nowrite.2 | FileCheck %s --check-prefix=NOWRITE + +; No merge instance is locally created as each module has a singltone function. +; NOWRITE-NOT: _f1.Tgm +; NOWRITE-NOT: _f2.Tgm + +; RUN: llvm-lto2 run -enable-global-merge-func=true -codegen-data-generate=true %t-foo.bc %t-goo.bc -o %tout-nowrite \ +; RUN: -r %t-foo.bc,_f1,px \ +; RUN: -r %t-goo.bc,_f2,px \ +; RUN: -r %t-foo.bc,_g,l -r %t-foo.bc,_g1,l -r %t-foo.bc,_g2,l \ +; RUN: -r %t-goo.bc,_g,l -r %t-goo.bc,_g1,l -r %t-goo.bc,_g2,l +; RUN: llvm-nm %tout-nowrite.1 | FileCheck %s --check-prefix=WRITE +; RUN: llvm-nm %tout-nowrite.2 | FileCheck %s --check-prefix=WRITE +; RUN: llvm-objdump -h %tout-nowrite.1 | FileCheck %s --check-prefix=SECTNAME +; RUN: llvm-objdump -h %tout-nowrite.2 | FileCheck %s --check-prefix=SECTNAME + +; On a write mode, no merging happens yet for each module. +; We only create stable functions and publish them into __llvm_merge section for each object. +; WRITE-NOT: _f1.Tgm +; WRITE-NOT: _f2.Tgm +; SECTNAME: __llvm_merge + +; Merge the cgdata using llvm-cgdata. +; We now validate the content of the merged cgdata. +; Two functions have the same hash with only one different constnat at a same location. +; RUN: llvm-cgdata --merge -o %tout.cgdata %tout-nowrite.1 %tout-nowrite.2 +; RUN: llvm-cgdata --convert %tout.cgdata -o - | FileCheck %s + +; CHECK: - Hash: [[#%d,HASH:]] +; CHECK-NEXT: FunctionName: f1 +; CHECK-NEXT: ModuleName: {{.*}} +; CHECK-NEXT: InstCount: [[#%d,INSTCOUNT:]] +; CHECK-NEXT: IndexOperandHashes: +; CHECK-NEXT: - InstIndex: [[#%d,INSTINDEX:]] +; CHECK-NEXT: OpndIndex: [[#%d,OPNDINDEX:]] +; CHECK-NEXT: OpndHash: {{.*}} + +; CHECK: - Hash: [[#%d,HASH]] +; CHECK-NEXT: FunctionName: f2 +; CHECK-NEXT: ModuleName: {{.*}} +; CHECK-NEXT: InstCount: [[#%d,INSTCOUNT]] +; CHECK-NEXT: IndexOperandHashes: +; CHECK-NEXT: - InstIndex: [[#%d,INSTINDEX]] +; CHECK-NEXT: OpndIndex: [[#%d,OPNDINDEX]] +; CHECK-NEXT: OpndHash: {{.*}} + +;--- foo.ll +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" +target triple = "arm64-unknown-ios12.0.0" + +@g = external local_unnamed_addr global [0 x i32], align 4 +@g1 = external global i32, align 4 +@g2 = external global i32, align 4 + +define i32 @f1(i32 %a) { +entry: + %idxprom = sext i32 %a to i64 + %arrayidx = getelementptr inbounds [0 x i32], [0 x i32]* @g, i64 0, i64 %idxprom + %0 = load i32, i32* %arrayidx, align 4 + %1 = load volatile i32, i32* @g1, align 4 + %mul = mul nsw i32 %1, %0 + %add = add nsw i32 %mul, 1 + ret i32 %add +} + +;--- goo.ll +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" +target triple = "arm64-unknown-ios12.0.0" + +@g = external local_unnamed_addr global [0 x i32], align 4 +@g1 = external global i32, align 4 +@g2 = external global i32, align 4 + +define i32 @f2(i32 %a) { +entry: + %idxprom = sext i32 %a to i64 + %arrayidx = getelementptr inbounds [0 x i32], [0 x i32]* @g, i64 0, i64 %idxprom + %0 = load i32, i32* %arrayidx, align 4 + %1 = load volatile i32, i32* @g2, align 4 + %mul = mul nsw i32 %1, %0 + %add = add nsw i32 %mul, 1 + ret i32 %add +} diff --git a/llvm/test/tools/llvm-cgdata/merge-combined-funcmap-hashtree.test b/llvm/test/tools/llvm-cgdata/merge-combined-funcmap-hashtree.test index b9bf067..f7e078b 100644 --- a/llvm/test/tools/llvm-cgdata/merge-combined-funcmap-hashtree.test +++ b/llvm/test/tools/llvm-cgdata/merge-combined-funcmap-hashtree.test @@ -21,7 +21,7 @@ RUN: sed "s/<RAW_2_BYTES>/$(cat %t/raw-funcmap-bytes.txt)/g" %t/merge-both-hasht RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-both-hashtree-funcmap.ll -o %t/merge-both-hashtree-funcmap.o # Merge an object file having cgdata (__llvm_outline and __llvm_merge) -RUN: llvm-cgdata -m %t/merge-both-hashtree-funcmap.o -o %t/merge-both-hashtree-funcmap.cgdata +RUN: llvm-cgdata -m --skip-trim %t/merge-both-hashtree-funcmap.o -o %t/merge-both-hashtree-funcmap.cgdata RUN: llvm-cgdata -s %t/merge-both-hashtree-funcmap.cgdata | FileCheck %s CHECK: Outlined hash tree: diff --git a/llvm/test/tools/llvm-cgdata/merge-funcmap-archive.test b/llvm/test/tools/llvm-cgdata/merge-funcmap-archive.test index f643c8d..c1881bc 100644 --- a/llvm/test/tools/llvm-cgdata/merge-funcmap-archive.test +++ b/llvm/test/tools/llvm-cgdata/merge-funcmap-archive.test @@ -21,7 +21,7 @@ RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-2.ll -o %t/merge-2.o RUN: llvm-ar rcs %t/merge-archive.a %t/merge-1.o %t/merge-2.o # Merge the archive into the codegen data file. -RUN: llvm-cgdata --merge %t/merge-archive.a -o %t/merge-archive.cgdata +RUN: llvm-cgdata --merge --skip-trim %t/merge-archive.a -o %t/merge-archive.cgdata RUN: llvm-cgdata --show %t/merge-archive.cgdata | FileCheck %s RUN: llvm-cgdata --show %t/merge-archive.cgdata| FileCheck %s diff --git a/llvm/test/tools/llvm-cgdata/merge-funcmap-concat.test b/llvm/test/tools/llvm-cgdata/merge-funcmap-concat.test index c8acf1f..301ee6d 100644 --- a/llvm/test/tools/llvm-cgdata/merge-funcmap-concat.test +++ b/llvm/test/tools/llvm-cgdata/merge-funcmap-concat.test @@ -15,7 +15,7 @@ RUN: od -t x1 -j 32 -An %t/raw-2.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed RUN: sed "s/<RAW_2_BYTES>/$(cat %t/raw-2-bytes.txt)/g" %t/merge-concat-template-2.ll > %t/merge-concat.ll RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-concat.ll -o %t/merge-concat.o -RUN: llvm-cgdata --merge %t/merge-concat.o -o %t/merge-concat.cgdata +RUN: llvm-cgdata --merge --skip-trim %t/merge-concat.o -o %t/merge-concat.cgdata RUN: llvm-cgdata --show %t/merge-concat.cgdata | FileCheck %s CHECK: Stable function map: diff --git a/llvm/test/tools/llvm-cgdata/merge-funcmap-double.test b/llvm/test/tools/llvm-cgdata/merge-funcmap-double.test index 3ae67f0..98a9148 100644 --- a/llvm/test/tools/llvm-cgdata/merge-funcmap-double.test +++ b/llvm/test/tools/llvm-cgdata/merge-funcmap-double.test @@ -18,7 +18,7 @@ RUN: sed "s/<RAW_2_BYTES>/$(cat %t/raw-2-bytes.txt)/g" %t/merge-2-template.ll > RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-2.ll -o %t/merge-2.o # Merge two object files into the codegen data file. -RUN: llvm-cgdata --merge %t/merge-1.o %t/merge-2.o -o %t/merge.cgdata +RUN: llvm-cgdata --merge --skip-trim %t/merge-1.o %t/merge-2.o -o %t/merge.cgdata RUN: llvm-cgdata --show %t/merge.cgdata | FileCheck %s CHECK: Stable function map: diff --git a/llvm/test/tools/llvm-cgdata/merge-funcmap-single.test b/llvm/test/tools/llvm-cgdata/merge-funcmap-single.test index 6a4e635..2075fac 100644 --- a/llvm/test/tools/llvm-cgdata/merge-funcmap-single.test +++ b/llvm/test/tools/llvm-cgdata/merge-funcmap-single.test @@ -13,7 +13,7 @@ RUN: sed "s/<RAW_1_BYTES>/$(cat %t/raw-single-bytes.txt)/g" %t/merge-single-temp RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-single.ll -o %t/merge-single.o # Merge an object file having cgdata (__llvm_merge) -RUN: llvm-cgdata -m %t/merge-single.o -o %t/merge-single.cgdata +RUN: llvm-cgdata -m --skip-trim %t/merge-single.o -o %t/merge-single.cgdata RUN: llvm-cgdata -s %t/merge-single.cgdata | FileCheck %s CHECK: Stable function map: CHECK-NEXT: Unique hash Count: 1 diff --git a/llvm/tools/llvm-cgdata/Opts.td b/llvm/tools/llvm-cgdata/Opts.td index b2cfc6a..8da933f 100644 --- a/llvm/tools/llvm-cgdata/Opts.td +++ b/llvm/tools/llvm-cgdata/Opts.td @@ -24,6 +24,7 @@ def : F<"m", "Alias for --merge">, Alias<merge>, Group<action_group>; // Additional options def cgdata_version : FF<"cgdata-version", "Display the cgdata version">; +def skip_trim : FF<"skip-trim", "Skip trimming content when merging the cgdata">; def output : Option<["--"], "output", KIND_SEPARATE>, HelpText<"Specify the name for the output file to be created">, MetaVarName<"<file>">; def : JoinedOrSeparate<["-"], "o">, Alias<output>, MetaVarName<"<file>">, HelpText<"Alias for --output">; diff --git a/llvm/tools/llvm-cgdata/llvm-cgdata.cpp b/llvm/tools/llvm-cgdata/llvm-cgdata.cpp index 0931cad..d33459b 100644 --- a/llvm/tools/llvm-cgdata/llvm-cgdata.cpp +++ b/llvm/tools/llvm-cgdata/llvm-cgdata.cpp @@ -76,6 +76,7 @@ static StringRef ToolName; static StringRef OutputFilename = "-"; static StringRef Filename; static bool ShowCGDataVersion; +static bool SkipTrim; static CGDataAction Action; static std::optional<CGDataFormat> OutputFormat; static std::vector<std::string> InputFilenames; @@ -214,7 +215,7 @@ static int merge_main(int argc, const char *argv[]) { if (!Result) exitWithError("failed to merge codegen data files."); - GlobalFunctionMapRecord.finalize(); + GlobalFunctionMapRecord.finalize(SkipTrim); CodeGenDataWriter Writer; if (!GlobalOutlineRecord.empty()) @@ -301,6 +302,7 @@ static void parseArgs(int argc, char **argv) { } ShowCGDataVersion = Args.hasArg(OPT_cgdata_version); + SkipTrim = Args.hasArg(OPT_skip_trim); if (opt::Arg *A = Args.getLastArg(OPT_format)) { StringRef OF = A->getValue(); diff --git a/llvm/unittests/CGData/StableFunctionMapTest.cpp b/llvm/unittests/CGData/StableFunctionMapTest.cpp index 5e178dc..d551ac8 100644 --- a/llvm/unittests/CGData/StableFunctionMapTest.cpp +++ b/llvm/unittests/CGData/StableFunctionMapTest.cpp @@ -108,8 +108,8 @@ TEST(StableFunctionMap, Finalize2) { TEST(StableFunctionMap, Finalize3) { StableFunctionMap Map; - StableFunction Func1{1, "Func1", "Mod1", 2, {{{0, 1}, 3}, {{1, 1}, 1}}}; - StableFunction Func2{1, "Func2", "Mod2", 2, {{{0, 1}, 2}, {{1, 1}, 1}}}; + StableFunction Func1{1, "Func1", "Mod1", 12, {{{0, 1}, 3}, {{1, 1}, 1}}}; + StableFunction Func2{1, "Func2", "Mod2", 12, {{{0, 1}, 2}, {{1, 1}, 1}}}; Map.insert(Func1); Map.insert(Func2); |