diff options
author | modimo <modimo@fb.com> | 2023-07-13 19:02:52 -0700 |
---|---|---|
committer | modimo <modimo@fb.com> | 2023-09-18 15:51:49 -0700 |
commit | 272bd6f9cc86bf6b4dd6bd51e85c46db10e8b86a (patch) | |
tree | fd79fdbbbb65537e9e78ac848bfacd045b59f7d1 /llvm/lib | |
parent | 4176ce61f156c8daa1e6bf8cc86d5e61e9413149 (diff) | |
download | llvm-272bd6f9cc86bf6b4dd6bd51e85c46db10e8b86a.zip llvm-272bd6f9cc86bf6b4dd6bd51e85c46db10e8b86a.tar.gz llvm-272bd6f9cc86bf6b4dd6bd51e85c46db10e8b86a.tar.bz2 |
[WPD][LLD] Add option to validate RTTI is enabled on all native types and prevent devirtualization on types with native RTTI
Discussion about this approach: https://discourse.llvm.org/t/rfc-safer-whole-program-class-hierarchy-analysis/65144/18
When enabling WPD in an environment where native binaries are present, types we want to optimize can be derived from inside these native files and devirtualizing them can lead to correctness issues. RTTI can be used as a way to determine all such types in native files and exclude them from WPD providing a safe checked way to enable WPD.
The approach is:
1. In the linker, identify if RTTI is available for all native types. If not, under `--lto-validate-all-vtables-have-type-infos` `--lto-whole-program-visibility` is automatically disabled. This is done by examining all .symtab symbols in object files and .dynsym symbols in DSOs for vtable (_ZTV) and typeinfo (_ZTI) symbols and ensuring there's always a match for every vtable symbol.
2. During thinlink, if `--lto-validate-all-vtables-have-type-infos` is set and RTTI is available for all native types, identify all typename (_ZTS) symbols via their corresponding typeinfo (_ZTI) symbols that are used natively or outside of our summary and exclude them from WPD.
Testing:
ninja check-all
large Meta service that uses boost, glog and libstdc++.so runs successfully with WPD via --lto-whole-program-visibility. Previously, native types in boost caused incorrect devirtualization that led to crashes.
Reviewed By: MaskRay, tejohnson
Differential Revision: https://reviews.llvm.org/D155659
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/LTO/LTO.cpp | 55 | ||||
-rw-r--r-- | llvm/lib/LTO/LTOCodeGenerator.cpp | 13 | ||||
-rw-r--r-- | llvm/lib/LTO/ThinLTOCodeGenerator.cpp | 9 | ||||
-rw-r--r-- | llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp | 76 |
4 files changed, 132 insertions, 21 deletions
diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp index 3e008ed..4a64aa4 100644 --- a/llvm/lib/LTO/LTO.cpp +++ b/llvm/lib/LTO/LTO.cpp @@ -1270,13 +1270,27 @@ Error LTO::runRegularLTO(AddStreamFn AddStream) { updateMemProfAttributes(*RegularLTO.CombinedModule, ThinLTO.CombinedIndex); + bool WholeProgramVisibilityEnabledInLTO = + Conf.HasWholeProgramVisibility && + // If validation is enabled, upgrade visibility only when all vtables + // have typeinfos. + (!Conf.ValidateAllVtablesHaveTypeInfos || Conf.AllVtablesHaveTypeInfos); + + // This returns true when the name is local or not defined. Locals are + // expected to be handled separately. + auto IsVisibleToRegularObj = [&](StringRef name) { + auto It = GlobalResolutions.find(name); + return (It == GlobalResolutions.end() || It->second.VisibleOutsideSummary); + }; + // If allowed, upgrade public vcall visibility metadata to linkage unit // visibility before whole program devirtualization in the optimizer. - updateVCallVisibilityInModule(*RegularLTO.CombinedModule, - Conf.HasWholeProgramVisibility, - DynamicExportSymbols); + updateVCallVisibilityInModule( + *RegularLTO.CombinedModule, WholeProgramVisibilityEnabledInLTO, + DynamicExportSymbols, Conf.ValidateAllVtablesHaveTypeInfos, + IsVisibleToRegularObj); updatePublicTypeTestCalls(*RegularLTO.CombinedModule, - Conf.HasWholeProgramVisibility); + WholeProgramVisibilityEnabledInLTO); if (Conf.PreOptModuleHook && !Conf.PreOptModuleHook(0, *RegularLTO.CombinedModule)) @@ -1683,13 +1697,38 @@ Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache, std::set<GlobalValue::GUID> ExportedGUIDs; - if (hasWholeProgramVisibility(Conf.HasWholeProgramVisibility)) + bool WholeProgramVisibilityEnabledInLTO = + Conf.HasWholeProgramVisibility && + // If validation is enabled, upgrade visibility only when all vtables + // have typeinfos. + (!Conf.ValidateAllVtablesHaveTypeInfos || Conf.AllVtablesHaveTypeInfos); + if (hasWholeProgramVisibility(WholeProgramVisibilityEnabledInLTO)) ThinLTO.CombinedIndex.setWithWholeProgramVisibility(); + + // If we're validating, get the vtable symbols that should not be + // upgraded because they correspond to typeIDs outside of index-based + // WPD info. + DenseSet<GlobalValue::GUID> VisibleToRegularObjSymbols; + if (WholeProgramVisibilityEnabledInLTO && + Conf.ValidateAllVtablesHaveTypeInfos) { + // This returns true when the name is local or not defined. Locals are + // expected to be handled separately. + auto IsVisibleToRegularObj = [&](StringRef name) { + auto It = GlobalResolutions.find(name); + return (It == GlobalResolutions.end() || + It->second.VisibleOutsideSummary); + }; + + getVisibleToRegularObjVtableGUIDs(ThinLTO.CombinedIndex, + VisibleToRegularObjSymbols, + IsVisibleToRegularObj); + } + // If allowed, upgrade public vcall visibility to linkage unit visibility in // the summaries before whole program devirtualization below. - updateVCallVisibilityInIndex(ThinLTO.CombinedIndex, - Conf.HasWholeProgramVisibility, - DynamicExportSymbols); + updateVCallVisibilityInIndex( + ThinLTO.CombinedIndex, WholeProgramVisibilityEnabledInLTO, + DynamicExportSymbols, VisibleToRegularObjSymbols); // Perform index-based WPD. This will return immediately if there are // no index entries in the typeIdMetadata map (e.g. if we are instead diff --git a/llvm/lib/LTO/LTOCodeGenerator.cpp b/llvm/lib/LTO/LTOCodeGenerator.cpp index 52a4a9b..52d8fff 100644 --- a/llvm/lib/LTO/LTOCodeGenerator.cpp +++ b/llvm/lib/LTO/LTOCodeGenerator.cpp @@ -605,11 +605,14 @@ bool LTOCodeGenerator::optimize() { // pipeline run below. updatePublicTypeTestCalls(*MergedModule, /* WholeProgramVisibilityEnabledInLTO */ false); - updateVCallVisibilityInModule(*MergedModule, - /* WholeProgramVisibilityEnabledInLTO */ false, - // FIXME: This needs linker information via a - // TBD new interface. - /* DynamicExportSymbols */ {}); + updateVCallVisibilityInModule( + *MergedModule, + /* WholeProgramVisibilityEnabledInLTO */ false, + // FIXME: These need linker information via a + // TBD new interface. + /*DynamicExportSymbols=*/{}, + /*ValidateAllVtablesHaveTypeInfos=*/false, + /*IsVisibleToRegularObj=*/[](StringRef) { return true; }); // We always run the verifier once on the merged module, the `DisableVerify` // parameter only applies to subsequent verify. diff --git a/llvm/lib/LTO/ThinLTOCodeGenerator.cpp b/llvm/lib/LTO/ThinLTOCodeGenerator.cpp index 02a4535..acff1e2 100644 --- a/llvm/lib/LTO/ThinLTOCodeGenerator.cpp +++ b/llvm/lib/LTO/ThinLTOCodeGenerator.cpp @@ -1057,11 +1057,14 @@ void ThinLTOCodeGenerator::run() { // via the internal option. Must be done before WPD below. if (hasWholeProgramVisibility(/* WholeProgramVisibilityEnabledInLTO */ false)) Index->setWithWholeProgramVisibility(); + + // FIXME: This needs linker information via a TBD new interface updateVCallVisibilityInIndex(*Index, - /* WholeProgramVisibilityEnabledInLTO */ false, - // FIXME: This needs linker information via a + /*WholeProgramVisibilityEnabledInLTO=*/false, + // FIXME: These need linker information via a // TBD new interface. - /* DynamicExportSymbols */ {}); + /*DynamicExportSymbols=*/{}, + /*VisibleToRegularObjSymbols=*/{}); // Perform index-based WPD. This will return immediately if there are // no index entries in the typeIdMetadata map (e.g. if we are instead diff --git a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp index 311a421..ae3ec7c 100644 --- a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp +++ b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp @@ -782,12 +782,52 @@ bool llvm::hasWholeProgramVisibility(bool WholeProgramVisibilityEnabledInLTO) { !DisableWholeProgramVisibility; } +static bool +typeIDVisibleToRegularObj(StringRef TypeID, + function_ref<bool(StringRef)> IsVisibleToRegularObj) { + // TypeID for member function pointer type is an internal construct + // and won't exist in IsVisibleToRegularObj. The full TypeID + // will be present and participate in invalidation. + if (TypeID.ends_with(".virtual")) + return false; + + // TypeID that doesn't start with Itanium mangling (_ZTS) will be + // non-externally visible types which cannot interact with + // external native files. See CodeGenModule::CreateMetadataIdentifierImpl. + if (!TypeID.consume_front("_ZTS")) + return false; + + // TypeID is keyed off the type name symbol (_ZTS). However, the native + // object may not contain this symbol if it does not contain a key + // function for the base type and thus only contains a reference to the + // type info (_ZTI). To catch this case we query using the type info + // symbol corresponding to the TypeID. + std::string typeInfo = ("_ZTI" + TypeID).str(); + return IsVisibleToRegularObj(typeInfo); +} + +static bool +skipUpdateDueToValidation(GlobalVariable &GV, + function_ref<bool(StringRef)> IsVisibleToRegularObj) { + SmallVector<MDNode *, 2> Types; + GV.getMetadata(LLVMContext::MD_type, Types); + + for (auto Type : Types) + if (auto *TypeID = dyn_cast<MDString>(Type->getOperand(1).get())) + return typeIDVisibleToRegularObj(TypeID->getString(), + IsVisibleToRegularObj); + + return false; +} + /// If whole program visibility asserted, then upgrade all public vcall /// visibility metadata on vtable definitions to linkage unit visibility in /// Module IR (for regular or hybrid LTO). void llvm::updateVCallVisibilityInModule( Module &M, bool WholeProgramVisibilityEnabledInLTO, - const DenseSet<GlobalValue::GUID> &DynamicExportSymbols) { + const DenseSet<GlobalValue::GUID> &DynamicExportSymbols, + bool ValidateAllVtablesHaveTypeInfos, + function_ref<bool(StringRef)> IsVisibleToRegularObj) { if (!hasWholeProgramVisibility(WholeProgramVisibilityEnabledInLTO)) return; for (GlobalVariable &GV : M.globals()) { @@ -798,7 +838,13 @@ void llvm::updateVCallVisibilityInModule( GV.getVCallVisibility() == GlobalObject::VCallVisibilityPublic && // Don't upgrade the visibility for symbols exported to the dynamic // linker, as we have no information on their eventual use. - !DynamicExportSymbols.count(GV.getGUID())) + !DynamicExportSymbols.count(GV.getGUID()) && + // With validation enabled, we want to exclude symbols visible to + // regular objects. Local symbols will be in this group due to the + // current implementation but those with VCallVisibilityTranslationUnit + // will have already been marked in clang so are unaffected. + !(ValidateAllVtablesHaveTypeInfos && + skipUpdateDueToValidation(GV, IsVisibleToRegularObj))) GV.setVCallVisibilityMetadata(GlobalObject::VCallVisibilityLinkageUnit); } } @@ -830,12 +876,26 @@ void llvm::updatePublicTypeTestCalls(Module &M, } } +/// Based on typeID string, get all associated vtable GUIDS that are +/// visible to regular objects. +void llvm::getVisibleToRegularObjVtableGUIDs( + ModuleSummaryIndex &Index, + DenseSet<GlobalValue::GUID> &VisibleToRegularObjSymbols, + function_ref<bool(StringRef)> IsVisibleToRegularObj) { + for (const auto &typeID : Index.typeIdCompatibleVtableMap()) { + if (typeIDVisibleToRegularObj(typeID.first, IsVisibleToRegularObj)) + for (const TypeIdOffsetVtableInfo &P : typeID.second) + VisibleToRegularObjSymbols.insert(P.VTableVI.getGUID()); + } +} + /// If whole program visibility asserted, then upgrade all public vcall /// visibility metadata on vtable definition summaries to linkage unit /// visibility in Module summary index (for ThinLTO). void llvm::updateVCallVisibilityInIndex( ModuleSummaryIndex &Index, bool WholeProgramVisibilityEnabledInLTO, - const DenseSet<GlobalValue::GUID> &DynamicExportSymbols) { + const DenseSet<GlobalValue::GUID> &DynamicExportSymbols, + const DenseSet<GlobalValue::GUID> &VisibleToRegularObjSymbols) { if (!hasWholeProgramVisibility(WholeProgramVisibilityEnabledInLTO)) return; for (auto &P : Index) { @@ -848,6 +908,12 @@ void llvm::updateVCallVisibilityInIndex( if (!GVar || GVar->getVCallVisibility() != GlobalObject::VCallVisibilityPublic) continue; + // With validation enabled, we want to exclude symbols visible to regular + // objects. Local symbols will be in this group due to the current + // implementation but those with VCallVisibilityTranslationUnit will have + // already been marked in clang so are unaffected. + if (VisibleToRegularObjSymbols.count(P.first)) + continue; GVar->setVCallVisibility(GlobalObject::VCallVisibilityLinkageUnit); } } @@ -1041,8 +1107,8 @@ bool DevirtModule::tryFindVirtualCallTargets( } bool DevirtIndex::tryFindVirtualCallTargets( - std::vector<ValueInfo> &TargetsForSlot, const TypeIdCompatibleVtableInfo TIdInfo, - uint64_t ByteOffset) { + std::vector<ValueInfo> &TargetsForSlot, + const TypeIdCompatibleVtableInfo TIdInfo, uint64_t ByteOffset) { for (const TypeIdOffsetVtableInfo &P : TIdInfo) { // Find a representative copy of the vtable initializer. // We can have multiple available_externally, linkonce_odr and weak_odr |