From 23ec5782c3cc0d77b5e44285e5124cd4a3ffeeef Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Tue, 12 Apr 2022 11:37:42 -0700 Subject: [Bitcode] materialize Functions early when BlockAddress taken IRLinker builds a work list of functions to materialize, then moves them from a source module to a destination module one at a time. This is a problem for blockaddress Constants, since they need not refer to the function they are used in; IPSCCP is quite good at sinking these constants deep into other functions when passed as arguments. This would lead to curious errors during LTO: ld.lld: error: Never resolved function from blockaddress ... based on the ordering of function definitions in IR. The problem was that IRLinker would basically do: for function f in worklist: materialize f splice f from source module to destination module in one pass, with Functions being lazily added to the running worklist. This confuses BitcodeReader, which cannot disambiguate whether a blockaddress is referring to a function which has not yet been parsed ("materialized") or is simply empty because its body was spliced out. This causes BitcodeReader to insert Functions into its BasicBlockFwdRefs list incorrectly, as it will never re-materialize an already materialized (but spliced out) function. Because of the possibility that blockaddress Constants may appear in Functions other than the ones they reference, this patch adds a new bitcode function code FUNC_CODE_BLOCKADDR_USERS that is a simple list of Functions that contain BlockAddress Constants that refer back to this Function, rather then the Function they are scoped in. We then materialize those functions when materializing `f` from the example loop above. This might over-materialize Functions should the user of BitcodeReader ultimately decide not to link those Functions, but we can at least now we can avoid this ordering related issue with blockaddresses. Fixes: https://github.com/llvm/llvm-project/issues/52787 Fixes: https://github.com/ClangBuiltLinux/linux/issues/1215 Reviewed By: dexonsmith Differential Revision: https://reviews.llvm.org/D120781 --- llvm/lib/Bitcode/Writer/BitcodeWriter.cpp | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) (limited to 'llvm/lib/Bitcode/Writer/BitcodeWriter.cpp') diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp index c76294d..79c7685 100644 --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -19,6 +19,7 @@ #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" @@ -3359,8 +3360,10 @@ void ModuleBitcodeWriter::writeFunction( bool NeedsMetadataAttachment = F.hasMetadata(); DILocation *LastDL = nullptr; + SmallPtrSet BlockAddressUsers; + // Finally, emit all the instructions, in order. - for (const BasicBlock &BB : F) + for (const BasicBlock &BB : F) { for (const Instruction &I : BB) { writeInstruction(I, InstID, Vals); @@ -3392,6 +3395,25 @@ void ModuleBitcodeWriter::writeFunction( LastDL = DL; } + if (BlockAddress *BA = BlockAddress::lookup(&BB)) { + for (User *U : BA->users()) { + if (auto *I = dyn_cast(U)) { + Function *P = I->getParent()->getParent(); + if (P != &F) + BlockAddressUsers.insert(P); + } + } + } + } + + if (!BlockAddressUsers.empty()) { + SmallVector Record; + Record.reserve(BlockAddressUsers.size()); + for (Function *F : BlockAddressUsers) + Record.push_back(VE.getValueID(F)); + Stream.EmitRecord(bitc::FUNC_CODE_BLOCKADDR_USERS, Record); + } + // Emit names for all the instructions etc. if (auto *Symtab = F.getValueSymbolTable()) writeFunctionLevelValueSymbolTable(*Symtab); -- cgit v1.1