diff options
49 files changed, 1486 insertions, 204 deletions
diff --git a/bolt/lib/Rewrite/DWARFRewriter.cpp b/bolt/lib/Rewrite/DWARFRewriter.cpp index 1ec216b..f0dd7ba 100644 --- a/bolt/lib/Rewrite/DWARFRewriter.cpp +++ b/bolt/lib/Rewrite/DWARFRewriter.cpp @@ -620,8 +620,8 @@ void DWARFRewriter::updateDebugInfo() { uint32_t CUIndex = 0; std::mutex AccessMutex; // Needs to be invoked in the same order as CUs are processed. - auto createRangeLocListAddressWriters = - [&](DWARFUnit &CU) -> DebugLocWriter * { + llvm::DenseMap<uint64_t, uint64_t> LocListWritersIndexByCU; + auto createRangeLocListAddressWriters = [&](DWARFUnit &CU) { std::lock_guard<std::mutex> Lock(AccessMutex); const uint16_t DwarfVersion = CU.getVersion(); if (DwarfVersion >= 5) { @@ -641,7 +641,6 @@ void DWARFRewriter::updateDebugInfo() { RangeListsWritersByCU[*DWOId] = std::move(DWORangeListsSectionWriter); } AddressWritersByCU[CU.getOffset()] = std::move(AddrW); - } else { auto AddrW = std::make_unique<DebugAddrWriter>(&BC, CU.getAddressByteSize()); @@ -657,7 +656,7 @@ void DWARFRewriter::updateDebugInfo() { std::move(LegacyRangesSectionWriterByCU); } } - return LocListWritersByCU[CUIndex++].get(); + LocListWritersIndexByCU[CU.getOffset()] = CUIndex++; }; DWARF5AcceleratorTable DebugNamesTable(opts::CreateDebugNames, BC, @@ -666,74 +665,70 @@ void DWARFRewriter::updateDebugInfo() { DWPState State; if (opts::WriteDWP) initDWPState(State); - auto processUnitDIE = [&](DWARFUnit *Unit, DIEBuilder *DIEBlder) { - // Check if the unit is a skeleton and we need special updates for it and - // its matching split/DWO CU. + auto processSplitCU = [&](DWARFUnit &Unit, DWARFUnit &SplitCU, + DIEBuilder &DIEBlder, + DebugRangesSectionWriter &TempRangesSectionWriter, + DebugAddrWriter &AddressWriter) { + DIEBuilder DWODIEBuilder(BC, &(SplitCU).getContext(), DebugNamesTable, + &Unit); + DWODIEBuilder.buildDWOUnit(SplitCU); + std::string DWOName = ""; + std::optional<std::string> DwarfOutputPath = + opts::DwarfOutputPath.empty() + ? std::nullopt + : std::optional<std::string>(opts::DwarfOutputPath.c_str()); + { + std::lock_guard<std::mutex> Lock(AccessMutex); + DWOName = DIEBlder.updateDWONameCompDir( + *StrOffstsWriter, *StrWriter, Unit, DwarfOutputPath, std::nullopt); + } + DebugStrOffsetsWriter DWOStrOffstsWriter(BC); + DebugStrWriter DWOStrWriter((SplitCU).getContext(), true); + DWODIEBuilder.updateDWONameCompDirForTypes( + DWOStrOffstsWriter, DWOStrWriter, SplitCU, DwarfOutputPath, DWOName); + DebugLoclistWriter DebugLocDWoWriter(Unit, Unit.getVersion(), true, + AddressWriter); + + updateUnitDebugInfo(SplitCU, DWODIEBuilder, DebugLocDWoWriter, + TempRangesSectionWriter, AddressWriter); + DebugLocDWoWriter.finalize(DWODIEBuilder, + *DWODIEBuilder.getUnitDIEbyUnit(SplitCU)); + if (Unit.getVersion() >= 5) + TempRangesSectionWriter.finalizeSection(); + + emitDWOBuilder(DWOName, DWODIEBuilder, *this, SplitCU, Unit, State, + DebugLocDWoWriter, DWOStrOffstsWriter, DWOStrWriter, + GDBIndexSection); + }; + auto processMainBinaryCU = [&](DWARFUnit &Unit, DIEBuilder &DIEBlder) { std::optional<DWARFUnit *> SplitCU; std::optional<uint64_t> RangesBase; - std::optional<uint64_t> DWOId = Unit->getDWOId(); + std::optional<uint64_t> DWOId = Unit.getDWOId(); if (DWOId) SplitCU = BC.getDWOCU(*DWOId); - DebugLocWriter *DebugLocWriter = createRangeLocListAddressWriters(*Unit); - DebugRangesSectionWriter *RangesSectionWriter = - Unit->getVersion() >= 5 ? RangeListsSectionWriter.get() - : LegacyRangesSectionWriter.get(); - DebugAddrWriter *AddressWriter = - AddressWritersByCU[Unit->getOffset()].get(); - // Skipping CUs that failed to load. - if (SplitCU) { - DIEBuilder DWODIEBuilder(BC, &(*SplitCU)->getContext(), DebugNamesTable, - Unit); - DWODIEBuilder.buildDWOUnit(**SplitCU); - std::string DWOName = ""; - std::optional<std::string> DwarfOutputPath = - opts::DwarfOutputPath.empty() - ? std::nullopt - : std::optional<std::string>(opts::DwarfOutputPath.c_str()); - { - std::lock_guard<std::mutex> Lock(AccessMutex); - DWOName = DIEBlder->updateDWONameCompDir( - *StrOffstsWriter, *StrWriter, *Unit, DwarfOutputPath, std::nullopt); - } - DebugStrOffsetsWriter DWOStrOffstsWriter(BC); - DebugStrWriter DWOStrWriter((*SplitCU)->getContext(), true); - DWODIEBuilder.updateDWONameCompDirForTypes(DWOStrOffstsWriter, - DWOStrWriter, **SplitCU, - DwarfOutputPath, DWOName); - DebugLoclistWriter DebugLocDWoWriter(*Unit, Unit->getVersion(), true, - *AddressWriter); - DebugRangesSectionWriter *TempRangesSectionWriter = RangesSectionWriter; - if (Unit->getVersion() >= 5) { - TempRangesSectionWriter = RangeListsWritersByCU[*DWOId].get(); - } else { - TempRangesSectionWriter = LegacyRangesWritersByCU[*DWOId].get(); - RangesBase = RangesSectionWriter->getSectionOffset(); - } - - updateUnitDebugInfo(*(*SplitCU), DWODIEBuilder, DebugLocDWoWriter, - *TempRangesSectionWriter, *AddressWriter); - DebugLocDWoWriter.finalize(DWODIEBuilder, - *DWODIEBuilder.getUnitDIEbyUnit(**SplitCU)); - if (Unit->getVersion() >= 5) - TempRangesSectionWriter->finalizeSection(); - - emitDWOBuilder(DWOName, DWODIEBuilder, *this, **SplitCU, *Unit, State, - DebugLocDWoWriter, DWOStrOffstsWriter, DWOStrWriter, - GDBIndexSection); - } - - if (Unit->getVersion() >= 5) { - RangesBase = RangesSectionWriter->getSectionOffset() + + DebugLocWriter &DebugLocWriter = + *LocListWritersByCU[LocListWritersIndexByCU[Unit.getOffset()]].get(); + DebugRangesSectionWriter &RangesSectionWriter = + Unit.getVersion() >= 5 ? *RangeListsSectionWriter.get() + : *LegacyRangesSectionWriter.get(); + DebugAddrWriter &AddressWriter = + *AddressWritersByCU[Unit.getOffset()].get(); + if (Unit.getVersion() >= 5) + RangeListsSectionWriter->setAddressWriter(&AddressWriter); + if (Unit.getVersion() >= 5) { + RangesBase = RangesSectionWriter.getSectionOffset() + getDWARF5RngListLocListHeaderSize(); - RangesSectionWriter->initSection(*Unit); - StrOffstsWriter->finalizeSection(*Unit, *DIEBlder); + RangesSectionWriter.initSection(Unit); + StrOffstsWriter->finalizeSection(Unit, DIEBlder); + } else if (SplitCU) { + RangesBase = LegacyRangesSectionWriter.get()->getSectionOffset(); } - updateUnitDebugInfo(*Unit, *DIEBlder, *DebugLocWriter, *RangesSectionWriter, - *AddressWriter, RangesBase); - DebugLocWriter->finalize(*DIEBlder, *DIEBlder->getUnitDIEbyUnit(*Unit)); - if (Unit->getVersion() >= 5) - RangesSectionWriter->finalizeSection(); + updateUnitDebugInfo(Unit, DIEBlder, DebugLocWriter, RangesSectionWriter, + AddressWriter, RangesBase); + DebugLocWriter.finalize(DIEBlder, *DIEBlder.getUnitDIEbyUnit(Unit)); + if (Unit.getVersion() >= 5) + RangesSectionWriter.finalizeSection(); }; DIEBuilder DIEBlder(BC, BC.DwCtx.get(), DebugNamesTable); @@ -751,8 +746,24 @@ void DWARFRewriter::updateDebugInfo() { CUPartitionVector PartVec = partitionCUs(*BC.DwCtx); for (std::vector<DWARFUnit *> &Vec : PartVec) { DIEBlder.buildCompileUnits(Vec); + for (DWARFUnit *CU : DIEBlder.getProcessedCUs()) { + createRangeLocListAddressWriters(*CU); + std::optional<DWARFUnit *> SplitCU; + std::optional<uint64_t> DWOId = CU->getDWOId(); + if (DWOId) + SplitCU = BC.getDWOCU(*DWOId); + if (!SplitCU) + continue; + DebugAddrWriter &AddressWriter = + *AddressWritersByCU[CU->getOffset()].get(); + DebugRangesSectionWriter *TempRangesSectionWriter = + CU->getVersion() >= 5 ? RangeListsWritersByCU[*DWOId].get() + : LegacyRangesWritersByCU[*DWOId].get(); + processSplitCU(*CU, **SplitCU, DIEBlder, *TempRangesSectionWriter, + AddressWriter); + } for (DWARFUnit *CU : DIEBlder.getProcessedCUs()) - processUnitDIE(CU, &DIEBlder); + processMainBinaryCU(*CU, DIEBlder); finalizeCompileUnits(DIEBlder, *Streamer, OffsetMap, DIEBlder.getProcessedCUs(), *FinalAddrWriter); } diff --git a/bolt/test/X86/dwarf5-dwarf4-types-backward-forward-cross-reference.test b/bolt/test/X86/dwarf5-dwarf4-types-backward-forward-cross-reference.test index 070648c..b48d6a5 100644 --- a/bolt/test/X86/dwarf5-dwarf4-types-backward-forward-cross-reference.test +++ b/bolt/test/X86/dwarf5-dwarf4-types-backward-forward-cross-reference.test @@ -5,10 +5,11 @@ # RUN: %clang %cflags %tmain.o %thelper.o -o %t.exe # RUN: llvm-bolt %t.exe -o %t.bolt --update-debug-sections # RUN: llvm-dwarfdump --show-form --verbose --debug-info %t.bolt | FileCheck --check-prefix=POSTCHECK %s +# RUN: llvm-dwarfdump --show-form --verbose --debug-addr %t.bolt | FileCheck --check-prefix=POSTCHECKADDR %s # RUN: llvm-dwarfdump --show-form --verbose --debug-types %t.bolt | FileCheck --check-prefix=POSTCHECKTU %s ## This test checks that BOLT handles correctly backward and forward cross CU references -## for DWARF5 and DWARF4 with -fdebug-types-section +## for DWARF5 and DWARF4 with -fdebug-types-section and checks the address table is correct. # POSTCHECK: version = 0x0005 # POSTCHECK: DW_TAG_type_unit @@ -29,6 +30,15 @@ # POSTCHECK: DW_TAG_variable [20] # POSTCHECK: DW_AT_type [DW_FORM_ref_addr] (0x{{[0-9a-f]+}} "Foo3a") +# POSTCHECKADDR: Addrs: [ +# POSTCHECKADDR-NEXT: 0x0000000000001360 +# POSTCHECKADDR-NEXT: 0x0000000000000000 +# POSTCHECKADDR-NEXT: ] +# POSTCHECKADDR: Addrs: [ +# POSTCHECKADDR-NEXT: 0x00000000000013e0 +# POSTCHECKADDR-NEXT: 0x0000000000000000 +# POSTCHECKADDR-NEXT: ] + # POSTCHECKTU: version = 0x0004 # POSTCHECKTU: DW_TAG_type_unit # POSTCHECKTU: DW_TAG_structure_type diff --git a/bolt/test/X86/dwarf5-locexpr-referrence.test b/bolt/test/X86/dwarf5-locexpr-referrence.test index ea73d76..cc7bb27 100644 --- a/bolt/test/X86/dwarf5-locexpr-referrence.test +++ b/bolt/test/X86/dwarf5-locexpr-referrence.test @@ -5,8 +5,10 @@ # RUN: %clang %cflags -dwarf-5 %tmain.o %thelper.o -o %t.exe -Wl,-q # RUN: llvm-bolt %t.exe -o %t.bolt --update-debug-sections # RUN: llvm-dwarfdump --show-form --verbose --debug-info %t.bolt | FileCheck --check-prefix=CHECK %s +# RUN: llvm-dwarfdump --show-form --verbose --debug-addr %t.bolt | FileCheck --check-prefix=CHECKADDR %s -## This test checks that we update relative DIE references with DW_OP_convert that are in locexpr. +## This test checks that we update relative DIE references with DW_OP_convert that are in locexpr +## and checks the address table is correct. # CHECK: version = 0x0005 # CHECK: DW_TAG_variable @@ -19,3 +21,18 @@ # CHECK-SAME: DW_OP_convert (0x00000028 -> 0x00000092) # CHECK-SAME: DW_OP_convert (0x0000002c -> 0x00000096) # CHECK: version = 0x0005 + +# CHECKADDR: Addrs: [ +# CHECKADDR-NEXT: 0x0000000000001330 +# CHECKADDR-NEXT: 0x0000000000000000 +# CHECKADDR-NEXT: 0x0000000000001333 +# CHECKADDR-NEXT: ] +# CHECKADDR: Addrs: [ +# CHECKADDR-NEXT: 0x0000000000001340 +# CHECKADDR-NEXT: 0x0000000000000000 +# CHECKADDR-NEXT: 0x0000000000001343 +# CHECKADDR-NEXT: ] +# CHECKADDR: Addrs: [ +# CHECKADDR-NEXT: 0x0000000000001320 +# CHECKADDR-NEXT: 0x0000000000000000 +# CHECKADDR-NEXT: ] diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 69269cf..fa36405 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -3232,6 +3232,10 @@ def fimplicit_module_maps : Flag <["-"], "fimplicit-module-maps">, Group<f_Group Visibility<[ClangOption, CC1Option, CLOption]>, HelpText<"Implicitly search the file system for module map files.">, MarshallingInfoFlag<HeaderSearchOpts<"ImplicitModuleMaps">>; +defm modulemap_allow_subdirectory_search : BoolFOption <"modulemap-allow-subdirectory-search", + HeaderSearchOpts<"AllowModuleMapSubdirectorySearch">, DefaultTrue, + PosFlag<SetTrue, [], [], "Allow to search for module maps in subdirectories of search paths">, + NegFlag<SetFalse>, BothFlags<[NoXarchOption], [ClangOption, CC1Option]>>; defm modules : BoolFOption<"modules", LangOpts<"Modules">, Default<fcxx_modules.KeyPath>, PosFlag<SetTrue, [], [ClangOption, CC1Option], diff --git a/clang/include/clang/Lex/HeaderSearchOptions.h b/clang/include/clang/Lex/HeaderSearchOptions.h index 1763514..83a95e9 100644 --- a/clang/include/clang/Lex/HeaderSearchOptions.h +++ b/clang/include/clang/Lex/HeaderSearchOptions.h @@ -270,6 +270,12 @@ public: LLVM_PREFERRED_TYPE(bool) unsigned ModulesIncludeVFSUsage : 1; + /// Whether we should look for a module in module maps only in provided + /// header search paths or if we are allowed to look for module maps in + /// subdirectories of provided paths too. + LLVM_PREFERRED_TYPE(bool) + unsigned AllowModuleMapSubdirectorySearch : 1; + HeaderSearchOptions(StringRef _Sysroot = "/") : Sysroot(_Sysroot), ModuleFormat("raw"), DisableModuleHash(false), ImplicitModuleMaps(false), ModuleMapFileHomeIsCwd(false), @@ -285,7 +291,8 @@ public: ModulesSkipHeaderSearchPaths(false), ModulesSkipPragmaDiagnosticMappings(false), ModulesPruneNonAffectingModuleMaps(true), ModulesHashContent(false), - ModulesStrictContextHash(false), ModulesIncludeVFSUsage(false) {} + ModulesStrictContextHash(false), ModulesIncludeVFSUsage(false), + AllowModuleMapSubdirectorySearch(true) {} /// AddPath - Add the \p Path path to the specified \p Group list. void AddPath(StringRef Path, frontend::IncludeDirGroup Group, diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 78936fd..bc77b98 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -3960,6 +3960,9 @@ static bool RenderModulesOptions(Compilation &C, const Driver &D, options::OPT_fno_modules_strict_decluse, false)) CmdArgs.push_back("-fmodules-strict-decluse"); + Args.addOptOutFlag(CmdArgs, options::OPT_fmodulemap_allow_subdirectory_search, + options::OPT_fno_modulemap_allow_subdirectory_search); + // -fno-implicit-modules turns off implicitly compiling modules on demand. bool ImplicitModules = false; if (!Args.hasFlag(options::OPT_fimplicit_modules, diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp index 59453c4..61d12b1 100644 --- a/clang/lib/Driver/ToolChains/Cuda.cpp +++ b/clang/lib/Driver/ToolChains/Cuda.cpp @@ -609,6 +609,10 @@ void NVPTX::Linker::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back(Args.MakeArgString( "--pxtas-path=" + Args.getLastArgValue(options::OPT_ptxas_path_EQ))); + if (Args.hasArg(options::OPT_cuda_path_EQ)) + CmdArgs.push_back(Args.MakeArgString( + "--cuda-path=" + Args.getLastArgValue(options::OPT_cuda_path_EQ))); + // Add paths specified in LIBRARY_PATH environment variable as -L options. addDirectoryList(Args, CmdArgs, "-L", "LIBRARY_PATH"); diff --git a/clang/lib/Driver/ToolChains/Darwin.cpp b/clang/lib/Driver/ToolChains/Darwin.cpp index c6f9d7b..17b6074 100644 --- a/clang/lib/Driver/ToolChains/Darwin.cpp +++ b/clang/lib/Driver/ToolChains/Darwin.cpp @@ -3036,6 +3036,35 @@ void Darwin::addClangTargetOptions( if (!DriverArgs.hasArgNoClaim(options::OPT_fdefine_target_os_macros, options::OPT_fno_define_target_os_macros)) CC1Args.push_back("-fdefine-target-os-macros"); + + // Disable subdirectory modulemap search on sufficiently recent SDKs. + if (SDKInfo && + !DriverArgs.hasFlag(options::OPT_fmodulemap_allow_subdirectory_search, + options::OPT_fno_modulemap_allow_subdirectory_search, + false)) { + bool RequiresSubdirectorySearch; + VersionTuple SDKVersion = SDKInfo->getVersion(); + switch (TargetPlatform) { + default: + RequiresSubdirectorySearch = true; + break; + case MacOS: + RequiresSubdirectorySearch = SDKVersion < VersionTuple(15, 0); + break; + case IPhoneOS: + case TvOS: + RequiresSubdirectorySearch = SDKVersion < VersionTuple(18, 0); + break; + case WatchOS: + RequiresSubdirectorySearch = SDKVersion < VersionTuple(11, 0); + break; + case XROS: + RequiresSubdirectorySearch = SDKVersion < VersionTuple(2, 0); + break; + } + if (!RequiresSubdirectorySearch) + CC1Args.push_back("-fno-modulemap-allow-subdirectory-search"); + } } void Darwin::addClangCC1ASTargetOptions( diff --git a/clang/lib/Headers/stdarg.h b/clang/lib/Headers/stdarg.h index 8292ab9..6203d7a 100644 --- a/clang/lib/Headers/stdarg.h +++ b/clang/lib/Headers/stdarg.h @@ -20,19 +20,18 @@ * modules. */ #if defined(__MVS__) && __has_include_next(<stdarg.h>) -#include <__stdarg_header_macro.h> #undef __need___va_list #undef __need_va_list #undef __need_va_arg #undef __need___va_copy #undef __need_va_copy +#include <__stdarg_header_macro.h> #include_next <stdarg.h> #else #if !defined(__need___va_list) && !defined(__need_va_list) && \ !defined(__need_va_arg) && !defined(__need___va_copy) && \ !defined(__need_va_copy) -#include <__stdarg_header_macro.h> #define __need___va_list #define __need_va_list #define __need_va_arg @@ -45,6 +44,7 @@ !defined(__STRICT_ANSI__) #define __need_va_copy #endif +#include <__stdarg_header_macro.h> #endif #ifdef __need___va_list diff --git a/clang/lib/Headers/stddef.h b/clang/lib/Headers/stddef.h index 8985c52..99b275a 100644 --- a/clang/lib/Headers/stddef.h +++ b/clang/lib/Headers/stddef.h @@ -20,7 +20,6 @@ * modules. */ #if defined(__MVS__) && __has_include_next(<stddef.h>) -#include <__stddef_header_macro.h> #undef __need_ptrdiff_t #undef __need_size_t #undef __need_rsize_t @@ -31,6 +30,7 @@ #undef __need_max_align_t #undef __need_offsetof #undef __need_wint_t +#include <__stddef_header_macro.h> #include_next <stddef.h> #else @@ -40,7 +40,6 @@ !defined(__need_NULL) && !defined(__need_nullptr_t) && \ !defined(__need_unreachable) && !defined(__need_max_align_t) && \ !defined(__need_offsetof) && !defined(__need_wint_t) -#include <__stddef_header_macro.h> #define __need_ptrdiff_t #define __need_size_t /* ISO9899:2011 7.20 (C11 Annex K): Define rsize_t if __STDC_WANT_LIB_EXT1__ is @@ -49,7 +48,24 @@ #define __need_rsize_t #endif #define __need_wchar_t +#if !defined(__STDDEF_H) || __has_feature(modules) +/* + * __stddef_null.h is special when building without modules: if __need_NULL is + * set, then it will unconditionally redefine NULL. To avoid stepping on client + * definitions of NULL, __need_NULL should only be set the first time this + * header is included, that is when __STDDEF_H is not defined. However, when + * building with modules, this header is a textual header and needs to + * unconditionally include __stdef_null.h to support multiple submodules + * exporting _Builtin_stddef.null. Take module SM with submodules A and B, whose + * headers both include stddef.h When SM.A builds, __STDDEF_H will be defined. + * When SM.B builds, the definition from SM.A will leak when building without + * local submodule visibility. stddef.h wouldn't include __stddef_null.h, and + * SM.B wouldn't import _Builtin_stddef.null, and SM.B's `export *` wouldn't + * export NULL as expected. When building with modules, always include + * __stddef_null.h so that everything works as expected. + */ #define __need_NULL +#endif #if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L) || \ defined(__cplusplus) #define __need_nullptr_t @@ -65,6 +81,7 @@ /* wint_t is provided by <wchar.h> and not <stddef.h>. It's here * for compatibility, but must be explicitly requested. Therefore * __need_wint_t is intentionally not defined here. */ +#include <__stddef_header_macro.h> #endif #if defined(__need_ptrdiff_t) diff --git a/clang/lib/Lex/DependencyDirectivesScanner.cpp b/clang/lib/Lex/DependencyDirectivesScanner.cpp index 31a4c0f..088d1cc 100644 --- a/clang/lib/Lex/DependencyDirectivesScanner.cpp +++ b/clang/lib/Lex/DependencyDirectivesScanner.cpp @@ -914,8 +914,7 @@ bool Scanner::lexPPLine(const char *&First, const char *const End) { case pp_import: // Ignore missing filenames in include or import directives. if (lexIncludeFilename(First, End).is(tok::eod)) { - skipDirective(Id, First, End); - return true; + return false; } break; default: diff --git a/clang/lib/Lex/HeaderSearch.cpp b/clang/lib/Lex/HeaderSearch.cpp index c3b3064..d2210e7 100644 --- a/clang/lib/Lex/HeaderSearch.cpp +++ b/clang/lib/Lex/HeaderSearch.cpp @@ -378,20 +378,22 @@ Module *HeaderSearch::lookupModule(StringRef ModuleName, StringRef SearchName, break; } - // If we've already performed the exhaustive search for module maps in this - // search directory, don't do it again. - if (Dir.haveSearchedAllModuleMaps()) - continue; + if (HSOpts->AllowModuleMapSubdirectorySearch) { + // If we've already performed the exhaustive search for module maps in + // this search directory, don't do it again. + if (Dir.haveSearchedAllModuleMaps()) + continue; - // Load all module maps in the immediate subdirectories of this search - // directory if ModuleName was from @import. - if (AllowExtraModuleMapSearch) - loadSubdirectoryModuleMaps(Dir); + // Load all module maps in the immediate subdirectories of this search + // directory if ModuleName was from @import. + if (AllowExtraModuleMapSearch) + loadSubdirectoryModuleMaps(Dir); - // Look again for the module. - Module = ModMap.findModule(ModuleName); - if (Module) - break; + // Look again for the module. + Module = ModMap.findModule(ModuleName); + if (Module) + break; + } } return Module; diff --git a/clang/test/CodeGenCXX/ptrauth-static-destructors.cpp b/clang/test/CodeGenCXX/ptrauth-static-destructors.cpp index 1240f26..634450b 100644 --- a/clang/test/CodeGenCXX/ptrauth-static-destructors.cpp +++ b/clang/test/CodeGenCXX/ptrauth-static-destructors.cpp @@ -2,13 +2,27 @@ // RUN: | FileCheck %s --check-prefix=CXAATEXIT // RUN: %clang_cc1 -triple arm64-apple-ios -fptrauth-calls -emit-llvm -std=c++11 %s -o - \ -// RUN: -fno-use-cxa-atexit | FileCheck %s --check-prefixes=ATEXIT,DARWIN +// RUN: -fno-use-cxa-atexit | FileCheck %s --check-prefixes=ATEXIT,ATEXIT_DARWIN // RUN: %clang_cc1 -triple aarch64-linux-gnu -fptrauth-calls -emit-llvm -std=c++11 %s -o - \ // RUN: | FileCheck %s --check-prefix=CXAATEXIT // RUN: %clang_cc1 -triple aarch64-linux-gnu -fptrauth-calls -emit-llvm -std=c++11 %s -o - \ -// RUN: -fno-use-cxa-atexit | FileCheck %s --check-prefixes=ATEXIT,ELF +// RUN: -fno-use-cxa-atexit | FileCheck %s --check-prefixes=ATEXIT,ATEXIT_ELF + +// RUN: %clang_cc1 -triple arm64-apple-ios -fptrauth-calls -emit-llvm -std=c++11 %s \ +// RUN: -fptrauth-function-pointer-type-discrimination -o - | FileCheck %s --check-prefix=CXAATEXIT_DISC + +// RUN: %clang_cc1 -triple arm64-apple-ios -fptrauth-calls -emit-llvm -std=c++11 %s -o - \ +// RUN: -fptrauth-function-pointer-type-discrimination -fno-use-cxa-atexit \ +// RUN: | FileCheck %s --check-prefixes=ATEXIT_DISC,ATEXIT_DISC_DARWIN + +// RUN: %clang_cc1 -triple aarch64-linux-gnu -fptrauth-calls -emit-llvm -std=c++11 %s \ +// RUN: -fptrauth-function-pointer-type-discrimination -o - | FileCheck %s --check-prefix=CXAATEXIT_DISC + +// RUN: %clang_cc1 -triple aarch64-linux-gnu -fptrauth-calls -emit-llvm -std=c++11 %s -o - \ +// RUN: -fptrauth-function-pointer-type-discrimination -fno-use-cxa-atexit \ +// RUN: | FileCheck %s --check-prefixes=ATEXIT_DISC,ATEXIT_DISC_ELF class Foo { public: @@ -21,11 +35,22 @@ Foo global; // CXAATEXIT: define internal void @__cxx_global_var_init() // CXAATEXIT: call i32 @__cxa_atexit(ptr ptrauth (ptr @_ZN3FooD1Ev, i32 0), ptr @global, ptr @__dso_handle) +// CXAATEXIT_DISC: define internal void @__cxx_global_var_init() +// CXAATEXIT_DISC: call i32 @__cxa_atexit(ptr ptrauth (ptr @_ZN3FooD1Ev, i32 0, i64 10942), ptr @global, ptr @__dso_handle) // ATEXIT: define internal void @__cxx_global_var_init() // ATEXIT: %{{.*}} = call i32 @atexit(ptr ptrauth (ptr @__dtor_global, i32 0)) -// DARWIN: define internal void @__dtor_global() {{.*}} section "__TEXT,__StaticInit,regular,pure_instructions" { -// ELF: define internal void @__dtor_global() {{.*}} section ".text.startup" { -// DARWIN: %{{.*}} = call ptr @_ZN3FooD1Ev(ptr @global) -// ELF: call void @_ZN3FooD1Ev(ptr @global) +// ATEXIT_DARWIN: define internal void @__dtor_global() {{.*}} section "__TEXT,__StaticInit,regular,pure_instructions" { +// ATEXIT_ELF: define internal void @__dtor_global() {{.*}} section ".text.startup" { +// ATEXIT_DARWIN: %{{.*}} = call ptr @_ZN3FooD1Ev(ptr @global) +// ATEXIT_ELF: call void @_ZN3FooD1Ev(ptr @global) + +// ATEXIT_DISC: define internal void @__cxx_global_var_init() +// ATEXIT_DISC: %{{.*}} = call i32 @atexit(ptr ptrauth (ptr @__dtor_global, i32 0, i64 10942)) + + +// ATEXIT_DISC_DARWIN: define internal void @__dtor_global() {{.*}} section "__TEXT,__StaticInit,regular,pure_instructions" { +// ATEXIT_DISC_ELF: define internal void @__dtor_global() {{.*}} section ".text.startup" { +// ATEXIT_DISC_DARWIN: %{{.*}} = call ptr @_ZN3FooD1Ev(ptr @global) +// ATEXIT_DISC_ELF: call void @_ZN3FooD1Ev(ptr @global) diff --git a/clang/test/Driver/linker-wrapper-passes.c b/clang/test/Driver/linker-wrapper-passes.c index aadcf47..fb63ef7 100644 --- a/clang/test/Driver/linker-wrapper-passes.c +++ b/clang/test/Driver/linker-wrapper-passes.c @@ -4,6 +4,9 @@ // REQUIRES: x86-registered-target // REQUIRES: amdgpu-registered-target +// https://github.com/llvm/llvm-project/issues/100212 +// XFAIL: * + // Setup. // RUN: mkdir -p %t // RUN: %clang -cc1 -emit-llvm-bc -o %t/host-x86_64-unknown-linux-gnu.bc \ @@ -13,7 +16,7 @@ // RUN: opt %t/openmp-amdgcn-amd-amdhsa.bc -o %t/openmp-amdgcn-amd-amdhsa.bc \ // RUN: -passes=forceattrs -force-remove-attribute=f:noinline // RUN: clang-offload-packager -o %t/openmp-x86_64-unknown-linux-gnu.out \ -// RUN: --image=file=%t/openmp-amdgcn-amd-amdhsa.bc,triple=amdgcn-amd-amdhsa +// RUN: --image=file=%t/openmp-amdgcn-amd-amdhsa.bc,arch=gfx90a,triple=amdgcn-amd-amdhsa // RUN: %clang -cc1 -S -o %t/host-x86_64-unknown-linux-gnu.s \ // RUN: -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa \ // RUN: -fembed-offload-object=%t/openmp-x86_64-unknown-linux-gnu.out \ diff --git a/clang/test/Driver/modulemap-allow-subdirectory-search.c b/clang/test/Driver/modulemap-allow-subdirectory-search.c new file mode 100644 index 0000000..ee993a7 --- /dev/null +++ b/clang/test/Driver/modulemap-allow-subdirectory-search.c @@ -0,0 +1,27 @@ +// RUN: rm -rf %t +// RUN: split-file %s %t + +// Check that with a sufficiently new SDK not searching for module maps in subdirectories. + +// New SDK. +// RUN: %clang -target x86_64-apple-macos10.13 -isysroot %t/MacOSX15.0.sdk -fmodules %t/test.c -### 2>&1 \ +// RUN: | FileCheck --check-prefix=NO-SUBDIRECTORIES %t/test.c +// Old SDK. +// RUN: %clang -target x86_64-apple-macos10.13 -isysroot %t/MacOSX14.0.sdk -fmodules %t/test.c -### 2>&1 \ +// RUN: | FileCheck --check-prefix=SEARCH-SUBDIRECTORIES %t/test.c +// Non-Darwin platform. +// RUN: %clang -target i386-unknown-linux -isysroot %t/MacOSX15.0.sdk -fmodules %t/test.c -### 2>&1 \ +// RUN: | FileCheck --check-prefix=SEARCH-SUBDIRECTORIES %t/test.c +// New SDK overriding the default. +// RUN: %clang -target x86_64-apple-macos10.13 -isysroot %t/MacOSX15.0.sdk -fmodules %t/test.c -fmodulemap-allow-subdirectory-search -### 2>&1 \ +// RUN: | FileCheck --check-prefix=SEARCH-SUBDIRECTORIES %t/test.c + +//--- test.c +// NO-SUBDIRECTORIES: "-fno-modulemap-allow-subdirectory-search" +// SEARCH-SUBDIRECTORIES-NOT: "-fno-modulemap-allow-subdirectory-search" + +//--- MacOSX15.0.sdk/SDKSettings.json +{"Version":"15.0", "MaximumDeploymentTarget": "15.0.99"} + +//--- MacOSX14.0.sdk/SDKSettings.json +{"Version":"14.0", "MaximumDeploymentTarget": "14.0.99"} diff --git a/clang/test/Driver/nvlink-wrapper.c b/clang/test/Driver/nvlink-wrapper.c index fdda93f..318315d 100644 --- a/clang/test/Driver/nvlink-wrapper.c +++ b/clang/test/Driver/nvlink-wrapper.c @@ -63,3 +63,10 @@ int baz() { return y + x; } // RUN: -arch sm_52 -o a.out 2>&1 | FileCheck %s --check-prefix=LTO // LTO: ptxas{{.*}} -m64 -c [[PTX:.+]].s -O3 -arch sm_52 -o [[CUBIN:.+]].cubin // LTO: nvlink{{.*}} -arch sm_52 -o a.out [[CUBIN]].cubin {{.*}}-u-{{.*}}.cubin {{.*}}-y-{{.*}}.cubin + +// +// Check that we don't forward some arguments. +// +// RUN: clang-nvlink-wrapper --dry-run %t.o %t-u.o %t-y.a \ +// RUN: -arch sm_52 --cuda-path/opt/cuda -o a.out 2>&1 | FileCheck %s --check-prefix=PATH +// PATH-NOT: --cuda-path=/opt/cuda diff --git a/clang/test/Headers/stddefneeds.cpp b/clang/test/Headers/stddefneeds.cpp index 0763bbd..0282e8a 100644 --- a/clang/test/Headers/stddefneeds.cpp +++ b/clang/test/Headers/stddefneeds.cpp @@ -56,14 +56,21 @@ max_align_t m5; #undef NULL #define NULL 0 -// glibc (and other) headers then define __need_NULL and rely on stddef.h -// to redefine NULL to the correct value again. -#define __need_NULL +// Including stddef.h again shouldn't redefine NULL #include <stddef.h> // gtk headers then use __attribute__((sentinel)), which doesn't work if NULL // is 0. -void f(const char* c, ...) __attribute__((sentinel)); +void f(const char* c, ...) __attribute__((sentinel)); // expected-note{{function has been explicitly marked sentinel here}} void g() { + f("", NULL); // expected-warning{{missing sentinel in function call}} +} + +// glibc (and other) headers then define __need_NULL and rely on stddef.h +// to redefine NULL to the correct value again. +#define __need_NULL +#include <stddef.h> + +void h() { f("", NULL); // Shouldn't warn. } diff --git a/clang/test/Modules/modulemap-allow-subdirectory-search.m b/clang/test/Modules/modulemap-allow-subdirectory-search.m new file mode 100644 index 0000000..ef6f9b1 --- /dev/null +++ b/clang/test/Modules/modulemap-allow-subdirectory-search.m @@ -0,0 +1,18 @@ +// RUN: rm -rf %t +// RUN: split-file %s %t + +// RUN: %clang_cc1 -fsyntax-only -fmodules -fimplicit-module-maps -fmodules-cache-path=%t/modules.cache -I %t/include %t/test.m +// RUN: %clang_cc1 -fsyntax-only -fmodules -fimplicit-module-maps -fmodules-cache-path=%t/modules.cache -I %t/include %t/test.m -fmodulemap-allow-subdirectory-search +// RUN: not %clang_cc1 -fsyntax-only -fmodules -fimplicit-module-maps -fmodules-cache-path=%t/modules.cache -I %t/include %t/test.m -fno-modulemap-allow-subdirectory-search + +//--- include/UnrelatedName/Header.h +// empty + +//--- include/UnrelatedName/module.modulemap +module UsefulCode { + header "Header.h" + export * +} + +//--- test.m +@import UsefulCode; diff --git a/clang/test/Modules/stddef.cpp b/clang/test/Modules/stddef.cpp new file mode 100644 index 0000000..c53bfa3 --- /dev/null +++ b/clang/test/Modules/stddef.cpp @@ -0,0 +1,73 @@ +// RUN: rm -rf %t +// RUN: split-file %s %t +// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t/no-lsv -I%t %t/stddef.cpp -verify +// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-local-submodule-visibility -fmodules-cache-path=%t/lsv -I%t %t/stddef.cpp -verify + +//--- stddef.cpp +#include <b.h> + +void *pointer = NULL; +size_t size = 0; + +// When building with modules, a pcm is never re-imported, so re-including +// stddef.h will not re-import _Builtin_stddef.null to restore the definition of +// NULL, even though stddef.h will unconditionally include __stddef_null.h when +// building with modules. +#undef NULL +#include <stddef.h> + +void *anotherPointer = NULL; // expected-error{{use of undeclared identifier 'NULL'}} + +// stddef.h needs to be a `textual` header to support clients doing things like +// this. +// +// #define __need_NULL +// #include <stddef.h> +// +// As a textual header designed to be included multiple times, it can't directly +// declare anything, or those declarations would go into every module that +// included it. e.g. if stddef.h contained all of its declarations, and modules +// A and B included stddef.h, they would both have the declaration for size_t. +// That breaks Swift, which uses the module name as part of the type name, i.e. +// A.size_t and B.size_t are treated as completely different types in Swift and +// cannot be interchanged. To fix that, stddef.h (and stdarg.h) are split out +// into a separate file per __need macro that can be normal headers in explicit +// submodules. That runs into yet another wrinkle though. When modules build, +// declarations from previous submodules leak into subsequent ones when not +// using local submodule visibility. Consider if stddef.h did the normal thing. +// +// #ifndef __STDDEF_H +// #define __STDDEF_H +// // include all of the sub-headers +// #endif +// +// When SM builds without local submodule visibility, it will precompile a.h +// first. When it gets to b.h, the __STDDEF_H declaration from precompiling a.h +// will leak, and so when b.h includes stddef.h, it won't include any of its +// sub-headers, and SM.B will thus not import _Builtin_stddef or make any of its +// submodules visible. Precompiling b.h will be fine since it sees all of the +// declarations from a.h including stddef.h, but clients that only include b.h +// will not see any of the stddef.h types. stddef.h thus has to make sure to +// always include the necessary sub-headers, even if they've been included +// already. They all have their own header guards to allow this. +// __stddef_null.h is extra special, so this test makes sure to cover NULL plus +// one of the normal stddef.h types. + +//--- module.modulemap +module SM { + module A { + header "a.h" + export * + } + + module B { + header "b.h" + export * + } +} + +//--- a.h +#include <stddef.h> + +//--- b.h +#include <stddef.h> diff --git a/clang/tools/clang-nvlink-wrapper/NVLinkOpts.td b/clang/tools/clang-nvlink-wrapper/NVLinkOpts.td index e84b530..8c80a51 100644 --- a/clang/tools/clang-nvlink-wrapper/NVLinkOpts.td +++ b/clang/tools/clang-nvlink-wrapper/NVLinkOpts.td @@ -12,9 +12,9 @@ def verbose : Flag<["-"], "v">, HelpText<"Print verbose information">; def version : Flag<["--"], "version">, HelpText<"Display the version number and exit">; -def cuda_path_EQ : Joined<["--"], "cuda-path=">, +def cuda_path_EQ : Joined<["--"], "cuda-path=">, Flags<[WrapperOnlyOption]>, MetaVarName<"<dir>">, HelpText<"Set the system CUDA path">; -def ptxas_path_EQ : Joined<["--"], "ptxas-path=">, +def ptxas_path_EQ : Joined<["--"], "ptxas-path=">, Flags<[WrapperOnlyOption]>, MetaVarName<"<dir>">, HelpText<"Set the 'ptxas' path">; def o : JoinedOrSeparate<["-"], "o">, MetaVarName<"<path>">, diff --git a/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp b/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp index 513e184..bdb5e23 100644 --- a/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp +++ b/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp @@ -653,12 +653,28 @@ TEST(MinimizeSourceToDependencyDirectivesTest, AtImport) { TEST(MinimizeSourceToDependencyDirectivesTest, EmptyIncludesAndImports) { SmallVector<char, 128> Out; - ASSERT_TRUE(minimizeSourceToDependencyDirectives("#import\n", Out)); - ASSERT_TRUE(minimizeSourceToDependencyDirectives("#include\n", Out)); - ASSERT_TRUE(minimizeSourceToDependencyDirectives("#ifdef A\n" - "#import \n" - "#endif\n", - Out)); + ASSERT_FALSE(minimizeSourceToDependencyDirectives("#import\n", Out)); + EXPECT_STREQ("<TokBeforeEOF>\n", Out.data()); + + ASSERT_FALSE(minimizeSourceToDependencyDirectives("#include\n", Out)); + EXPECT_STREQ("<TokBeforeEOF>\n", Out.data()); + + ASSERT_FALSE(minimizeSourceToDependencyDirectives("#ifdef A\n" + "#import \n" + "#endif\n", + Out)); + // The ifdef block is removed because it's "empty". + EXPECT_STREQ("<TokBeforeEOF>\n", Out.data()); + + ASSERT_FALSE(minimizeSourceToDependencyDirectives("#ifdef A\n" + "#import \n" + "#define B\n" + "#endif\n", + Out)); + EXPECT_STREQ("#ifdef A\n" + "#define B\n" + "#endif\n", + Out.data()); } TEST(MinimizeSourceToDependencyDirectivesTest, AtImportFailures) { diff --git a/compiler-rt/lib/scudo/standalone/secondary.h b/compiler-rt/lib/scudo/standalone/secondary.h index 9a8e53b..d850574 100644 --- a/compiler-rt/lib/scudo/standalone/secondary.h +++ b/compiler-rt/lib/scudo/standalone/secondary.h @@ -19,6 +19,7 @@ #include "stats.h" #include "string_utils.h" #include "thread_annotations.h" +#include "vector.h" namespace scudo { @@ -73,12 +74,18 @@ static inline void unmap(LargeBlock::Header *H) { } namespace { + struct CachedBlock { + static constexpr u16 CacheIndexMax = UINT16_MAX; + static constexpr u16 InvalidEntry = CacheIndexMax; + uptr CommitBase = 0; uptr CommitSize = 0; uptr BlockBegin = 0; MemMapT MemMap = {}; u64 Time = 0; + u16 Next = 0; + u16 Prev = 0; bool isValid() { return CommitBase != 0; } @@ -188,10 +195,11 @@ public: Str->append("Stats: CacheRetrievalStats: SuccessRate: %u/%u " "(%zu.%02zu%%)\n", SuccessfulRetrieves, CallsToRetrieve, Integral, Fractional); - for (CachedBlock Entry : Entries) { - if (!Entry.isValid()) - continue; - Str->append("StartBlockAddress: 0x%zx, EndBlockAddress: 0x%zx, " + Str->append("Cache Entry Info (Most Recent -> Least Recent):\n"); + + for (u32 I = LRUHead; I != CachedBlock::InvalidEntry; I = Entries[I].Next) { + CachedBlock &Entry = Entries[I]; + Str->append(" StartBlockAddress: 0x%zx, EndBlockAddress: 0x%zx, " "BlockSize: %zu %s\n", Entry.CommitBase, Entry.CommitBase + Entry.CommitSize, Entry.CommitSize, Entry.Time == 0 ? "[R]" : ""); @@ -202,6 +210,10 @@ public: static_assert(Config::getDefaultMaxEntriesCount() <= Config::getEntriesArraySize(), ""); + // Ensure the cache entry array size fits in the LRU list Next and Prev + // index fields + static_assert(Config::getEntriesArraySize() <= CachedBlock::CacheIndexMax, + "Cache entry array is too large to be indexed."); void init(s32 ReleaseToOsInterval) NO_THREAD_SAFETY_ANALYSIS { DCHECK_EQ(EntriesCount, 0U); @@ -213,23 +225,33 @@ public: if (Config::getDefaultReleaseToOsIntervalMs() != INT32_MIN) ReleaseToOsInterval = Config::getDefaultReleaseToOsIntervalMs(); setOption(Option::ReleaseInterval, static_cast<sptr>(ReleaseToOsInterval)); + + // The cache is initially empty + LRUHead = CachedBlock::InvalidEntry; + LRUTail = CachedBlock::InvalidEntry; + + // Available entries will be retrieved starting from the beginning of the + // Entries array + AvailableHead = 0; + for (u32 I = 0; I < Config::getEntriesArraySize() - 1; I++) + Entries[I].Next = static_cast<u16>(I + 1); + + Entries[Config::getEntriesArraySize() - 1].Next = CachedBlock::InvalidEntry; } void store(const Options &Options, LargeBlock::Header *H) EXCLUDES(Mutex) { if (!canCache(H->CommitSize)) return unmap(H); - bool EntryCached = false; - bool EmptyCache = false; const s32 Interval = atomic_load_relaxed(&ReleaseToOsIntervalMs); - const u64 Time = getMonotonicTimeFast(); - const u32 MaxCount = atomic_load_relaxed(&MaxEntriesCount); + u64 Time; CachedBlock Entry; + Entry.CommitBase = H->CommitBase; Entry.CommitSize = H->CommitSize; Entry.BlockBegin = reinterpret_cast<uptr>(H + 1); Entry.MemMap = H->MemMap; - Entry.Time = Time; + Entry.Time = UINT64_MAX; if (useMemoryTagging<Config>(Options)) { if (Interval == 0 && !SCUDO_FUCHSIA) { // Release the memory and make it inaccessible at the same time by @@ -243,17 +265,32 @@ public: Entry.MemMap.setMemoryPermission(Entry.CommitBase, Entry.CommitSize, MAP_NOACCESS); } - } else if (Interval == 0) { - Entry.MemMap.releaseAndZeroPagesToOS(Entry.CommitBase, Entry.CommitSize); - Entry.Time = 0; } + + // Usually only one entry will be evicted from the cache. + // Only in the rare event that the cache shrinks in real-time + // due to a decrease in the configurable value MaxEntriesCount + // will more than one cache entry be evicted. + // The vector is used to save the MemMaps of evicted entries so + // that the unmap call can be performed outside the lock + Vector<MemMapT, 1U> EvictionMemMaps; + do { ScopedLock L(Mutex); + + // Time must be computed under the lock to ensure + // that the LRU cache remains sorted with respect to + // time in a multithreaded environment + Time = getMonotonicTimeFast(); + if (Entry.Time != 0) + Entry.Time = Time; + if (useMemoryTagging<Config>(Options) && QuarantinePos == -1U) { // If we get here then memory tagging was disabled in between when we // read Options and when we locked Mutex. We can't insert our entry into // the quarantine or the cache because the permissions would be wrong so // just unmap it. + Entry.MemMap.unmap(Entry.MemMap.getBase(), Entry.MemMap.getCapacity()); break; } if (Config::getQuarantineSize() && useMemoryTagging<Config>(Options)) { @@ -269,36 +306,32 @@ public: OldestTime = Entry.Time; Entry = PrevEntry; } - if (EntriesCount >= MaxCount) { - if (IsFullEvents++ == 4U) - EmptyCache = true; - } else { - for (u32 I = 0; I < MaxCount; I++) { - if (Entries[I].isValid()) - continue; - if (I != 0) - Entries[I] = Entries[0]; - Entries[0] = Entry; - EntriesCount++; - if (OldestTime == 0) - OldestTime = Entry.Time; - EntryCached = true; - break; - } + + // All excess entries are evicted from the cache + while (needToEvict()) { + // Save MemMaps of evicted entries to perform unmap outside of lock + EvictionMemMaps.push_back(Entries[LRUTail].MemMap); + remove(LRUTail); } + + insert(Entry); + + if (OldestTime == 0) + OldestTime = Entry.Time; } while (0); - if (EmptyCache) - empty(); - else if (Interval >= 0) + + for (MemMapT &EvictMemMap : EvictionMemMaps) + EvictMemMap.unmap(EvictMemMap.getBase(), EvictMemMap.getCapacity()); + + if (Interval >= 0) { + // TODO: Add ReleaseToOS logic to LRU algorithm releaseOlderThan(Time - static_cast<u64>(Interval) * 1000000); - if (!EntryCached) - Entry.MemMap.unmap(Entry.MemMap.getBase(), Entry.MemMap.getCapacity()); + } } bool retrieve(Options Options, uptr Size, uptr Alignment, uptr HeadersSize, LargeBlock::Header **H, bool *Zeroed) EXCLUDES(Mutex) { const uptr PageSize = getPageSizeCached(); - const u32 MaxCount = atomic_load_relaxed(&MaxEntriesCount); // 10% of the requested size proved to be the optimal choice for // retrieving cached blocks after testing several options. constexpr u32 FragmentedBytesDivisor = 10; @@ -312,9 +345,8 @@ public: return false; u32 OptimalFitIndex = 0; uptr MinDiff = UINTPTR_MAX; - for (u32 I = 0; I < MaxCount; I++) { - if (!Entries[I].isValid()) - continue; + for (u32 I = LRUHead; I != CachedBlock::InvalidEntry; + I = Entries[I].Next) { const uptr CommitBase = Entries[I].CommitBase; const uptr CommitSize = Entries[I].CommitSize; const uptr AllocPos = @@ -347,8 +379,7 @@ public: } if (Found) { Entry = Entries[OptimalFitIndex]; - Entries[OptimalFitIndex].invalidate(); - EntriesCount--; + remove(OptimalFitIndex); SuccessfulRetrieves++; } } @@ -417,12 +448,9 @@ public: Quarantine[I].invalidate(); } } - const u32 MaxCount = atomic_load_relaxed(&MaxEntriesCount); - for (u32 I = 0; I < MaxCount; I++) { - if (Entries[I].isValid()) { - Entries[I].MemMap.setMemoryPermission(Entries[I].CommitBase, - Entries[I].CommitSize, 0); - } + for (u32 I = LRUHead; I != CachedBlock::InvalidEntry; I = Entries[I].Next) { + Entries[I].MemMap.setMemoryPermission(Entries[I].CommitBase, + Entries[I].CommitSize, 0); } QuarantinePos = -1U; } @@ -434,6 +462,66 @@ public: void unmapTestOnly() { empty(); } private: + bool needToEvict() REQUIRES(Mutex) { + return (EntriesCount >= atomic_load_relaxed(&MaxEntriesCount)); + } + + void insert(const CachedBlock &Entry) REQUIRES(Mutex) { + DCHECK_LT(EntriesCount, atomic_load_relaxed(&MaxEntriesCount)); + + // Cache should be populated with valid entries when not empty + DCHECK_NE(AvailableHead, CachedBlock::InvalidEntry); + + u32 FreeIndex = AvailableHead; + AvailableHead = Entries[AvailableHead].Next; + + if (EntriesCount == 0) { + LRUTail = static_cast<u16>(FreeIndex); + } else { + // Check list order + if (EntriesCount > 1) + DCHECK_GE(Entries[LRUHead].Time, Entries[Entries[LRUHead].Next].Time); + Entries[LRUHead].Prev = static_cast<u16>(FreeIndex); + } + + Entries[FreeIndex] = Entry; + Entries[FreeIndex].Next = LRUHead; + Entries[FreeIndex].Prev = CachedBlock::InvalidEntry; + LRUHead = static_cast<u16>(FreeIndex); + EntriesCount++; + + // Availability stack should not have available entries when all entries + // are in use + if (EntriesCount == Config::getEntriesArraySize()) + DCHECK_EQ(AvailableHead, CachedBlock::InvalidEntry); + } + + void remove(uptr I) REQUIRES(Mutex) { + DCHECK(Entries[I].isValid()); + + Entries[I].invalidate(); + + if (I == LRUHead) + LRUHead = Entries[I].Next; + else + Entries[Entries[I].Prev].Next = Entries[I].Next; + + if (I == LRUTail) + LRUTail = Entries[I].Prev; + else + Entries[Entries[I].Next].Prev = Entries[I].Prev; + + Entries[I].Next = AvailableHead; + AvailableHead = static_cast<u16>(I); + EntriesCount--; + + // Cache should not have valid entries when not empty + if (EntriesCount == 0) { + DCHECK_EQ(LRUHead, CachedBlock::InvalidEntry); + DCHECK_EQ(LRUTail, CachedBlock::InvalidEntry); + } + } + void empty() { MemMapT MapInfo[Config::getEntriesArraySize()]; uptr N = 0; @@ -443,11 +531,10 @@ private: if (!Entries[I].isValid()) continue; MapInfo[N] = Entries[I].MemMap; - Entries[I].invalidate(); + remove(I); N++; } EntriesCount = 0; - IsFullEvents = 0; } for (uptr I = 0; I < N; I++) { MemMapT &MemMap = MapInfo[I]; @@ -484,7 +571,6 @@ private: atomic_u32 MaxEntriesCount = {}; atomic_uptr MaxEntrySize = {}; u64 OldestTime GUARDED_BY(Mutex) = 0; - u32 IsFullEvents GUARDED_BY(Mutex) = 0; atomic_s32 ReleaseToOsIntervalMs = {}; u32 CallsToRetrieve GUARDED_BY(Mutex) = 0; u32 SuccessfulRetrieves GUARDED_BY(Mutex) = 0; @@ -492,6 +578,13 @@ private: CachedBlock Entries[Config::getEntriesArraySize()] GUARDED_BY(Mutex) = {}; NonZeroLengthArray<CachedBlock, Config::getQuarantineSize()> Quarantine GUARDED_BY(Mutex) = {}; + + // The LRUHead of the cache is the most recently used cache entry + u16 LRUHead GUARDED_BY(Mutex) = 0; + // The LRUTail of the cache is the least recently used cache entry + u16 LRUTail GUARDED_BY(Mutex) = 0; + // The AvailableHead is the top of the stack of available entries + u16 AvailableHead GUARDED_BY(Mutex) = 0; }; template <typename Config> class MapAllocator { diff --git a/libc/cmake/modules/LibcConfig.cmake b/libc/cmake/modules/LibcConfig.cmake index 7a3e606..da166dd 100644 --- a/libc/cmake/modules/LibcConfig.cmake +++ b/libc/cmake/modules/LibcConfig.cmake @@ -113,7 +113,7 @@ function(load_libc_config config_file) message(FATAL_ERROR ${json_error}) endif() if(NOT DEFINED ${opt_name}) - message(FATAL_ERROR: " Option ${opt_name} defined in ${config_file} is invalid.") + message(FATAL_ERROR " Option ${opt_name} defined in ${config_file} is invalid.") endif() if(ARGN) list(FIND ARGN ${opt_name} optname_exists) diff --git a/libc/config/config.json b/libc/config/config.json index 2005f42..2bf432e 100644 --- a/libc/config/config.json +++ b/libc/config/config.json @@ -1,8 +1,8 @@ { "errno": { "LIBC_CONF_ERRNO_MODE": { - "value": "", - "doc": "The implementation used for errno, acceptable values are LIBC_ERRNO_MODE_UNDEFINED, LIBC_ERRNO_MODE_THREAD_LOCAL, LIBC_ERRNO_MODE_SHARED, LIBC_ERRNO_MODE_EXTERNAL, and LIBC_ERRNO_MODE_SYSTEM." + "value": "LIBC_ERRNO_MODE_DEFAULT", + "doc": "The implementation used for errno, acceptable values are LIBC_ERRNO_MODE_DEFAULT, LIBC_ERRNO_MODE_UNDEFINED, LIBC_ERRNO_MODE_THREAD_LOCAL, LIBC_ERRNO_MODE_SHARED, LIBC_ERRNO_MODE_EXTERNAL, and LIBC_ERRNO_MODE_SYSTEM." } }, "printf": { diff --git a/libc/src/errno/libc_errno.cpp b/libc/src/errno/libc_errno.cpp index 7a17a5a..d1600d1 100644 --- a/libc/src/errno/libc_errno.cpp +++ b/libc/src/errno/libc_errno.cpp @@ -9,6 +9,8 @@ #include "libc_errno.h" #include "src/__support/macros/config.h" +// libc uses a fallback default value, either system or thread local. +#define LIBC_ERRNO_MODE_DEFAULT 0 // libc never stores a value; `errno` macro uses get link-time failure. #define LIBC_ERRNO_MODE_UNDEFINED 1 // libc maintains per-thread state (requires C++ `thread_local` support). @@ -23,7 +25,8 @@ // fullbuild mode, effectively the same as `LIBC_ERRNO_MODE_EXTERNAL`. #define LIBC_ERRNO_MODE_SYSTEM 5 -#ifndef LIBC_ERRNO_MODE +#if !defined(LIBC_ERRNO_MODE) || LIBC_ERRNO_MODE == LIBC_ERRNO_MODE_DEFAULT +#undef LIBC_ERRNO_MODE #if defined(LIBC_FULL_BUILD) || !defined(LIBC_COPT_PUBLIC_PACKAGING) #define LIBC_ERRNO_MODE LIBC_ERRNO_MODE_THREAD_LOCAL #else @@ -31,12 +34,14 @@ #endif #endif // LIBC_ERRNO_MODE -#if LIBC_ERRNO_MODE != LIBC_ERRNO_MODE_UNDEFINED && \ +#if LIBC_ERRNO_MODE != LIBC_ERRNO_MODE_DEFAULT && \ + LIBC_ERRNO_MODE != LIBC_ERRNO_MODE_UNDEFINED && \ LIBC_ERRNO_MODE != LIBC_ERRNO_MODE_THREAD_LOCAL && \ LIBC_ERRNO_MODE != LIBC_ERRNO_MODE_SHARED && \ LIBC_ERRNO_MODE != LIBC_ERRNO_MODE_EXTERNAL && \ LIBC_ERRNO_MODE != LIBC_ERRNO_MODE_SYSTEM #error LIBC_ERRNO_MODE must be one of the following values: \ +LIBC_ERRNO_MODE_DEFAULT, \ LIBC_ERRNO_MODE_UNDEFINED, \ LIBC_ERRNO_MODE_THREAD_LOCAL, \ LIBC_ERRNO_MODE_SHARED, \ diff --git a/libc/src/setjmp/riscv/longjmp.cpp b/libc/src/setjmp/riscv/longjmp.cpp index b14f636..0f9537c 100644 --- a/libc/src/setjmp/riscv/longjmp.cpp +++ b/libc/src/setjmp/riscv/longjmp.cpp @@ -30,7 +30,6 @@ namespace LIBC_NAMESPACE_DECL { -[[gnu::naked]] LLVM_LIBC_FUNCTION(void, longjmp, (__jmp_buf * buf, int val)) { LOAD(ra, buf->__pc); LOAD(s0, buf->__regs[0]); diff --git a/libc/src/setjmp/riscv/setjmp.cpp b/libc/src/setjmp/riscv/setjmp.cpp index 92982cc..12def57 100644 --- a/libc/src/setjmp/riscv/setjmp.cpp +++ b/libc/src/setjmp/riscv/setjmp.cpp @@ -29,7 +29,6 @@ namespace LIBC_NAMESPACE_DECL { -[[gnu::naked]] LLVM_LIBC_FUNCTION(int, setjmp, (__jmp_buf * buf)) { STORE(ra, buf->__pc); STORE(s0, buf->__regs[0]); diff --git a/libcxx/include/string_view b/libcxx/include/string_view index 72dbf0b..2a03ee9 100644 --- a/libcxx/include/string_view +++ b/libcxx/include/string_view @@ -448,8 +448,11 @@ public: } _LIBCPP_CONSTEXPR _LIBCPP_HIDE_FROM_ABI basic_string_view substr(size_type __pos = 0, size_type __n = npos) const { + // Use the `__assume_valid` form of the constructor to avoid an unnecessary check. Any substring of a view is a + // valid view. In particular, `size()` is known to be smaller than `numeric_limits<difference_type>::max()`, so the + // new size is also smaller. See also https://github.com/llvm/llvm-project/issues/91634. return __pos > size() ? (__throw_out_of_range("string_view::substr"), basic_string_view()) - : basic_string_view(data() + __pos, std::min(__n, size() - __pos)); + : basic_string_view(__assume_valid(), data() + __pos, std::min(__n, size() - __pos)); } _LIBCPP_CONSTEXPR_SINCE_CXX14 int compare(basic_string_view __sv) const _NOEXCEPT { @@ -674,6 +677,16 @@ public: #endif private: + struct __assume_valid {}; + + // This is the same as the pointer and length constructor, but without the additional hardening checks. It is intended + // for use within the class, when the class invariants already guarantee the resulting object is valid. The compiler + // usually cannot eliminate the redundant checks because it does not know class invariants. + _LIBCPP_CONSTEXPR _LIBCPP_HIDE_FROM_ABI + basic_string_view(__assume_valid, const _CharT* __s, size_type __len) _NOEXCEPT + : __data_(__s), + __size_(__len) {} + const value_type* __data_; size_type __size_; }; diff --git a/libcxx/test/libcxx/fuzzing/random.pass.cpp b/libcxx/test/libcxx/fuzzing/random.pass.cpp index 6639776..af80fb8 100644 --- a/libcxx/test/libcxx/fuzzing/random.pass.cpp +++ b/libcxx/test/libcxx/fuzzing/random.pass.cpp @@ -8,7 +8,7 @@ // This test fails because Clang no longer enables -fdelayed-template-parsing // by default on Windows with C++20 (#69431). -// XFAIL: msvc && (clang-18 || clang-19) +// XFAIL: msvc && (clang-18 || clang-19 || clang-20) // UNSUPPORTED: c++03, c++11 diff --git a/libcxx/test/std/depr/depr.c.headers/math_h.pass.cpp b/libcxx/test/std/depr/depr.c.headers/math_h.pass.cpp index bbfb0c554..0f47a51 100644 --- a/libcxx/test/std/depr/depr.c.headers/math_h.pass.cpp +++ b/libcxx/test/std/depr/depr.c.headers/math_h.pass.cpp @@ -8,7 +8,7 @@ // This test fails because Clang no longer enables -fdelayed-template-parsing // by default on Windows with C++20 (#69431). -// XFAIL: msvc && (clang-18 || clang-19) +// XFAIL: msvc && (clang-18 || clang-19 || clang-20) // <math.h> diff --git a/libcxx/test/std/numerics/c.math/cmath.pass.cpp b/libcxx/test/std/numerics/c.math/cmath.pass.cpp index 19b5fd0..6028aa5 100644 --- a/libcxx/test/std/numerics/c.math/cmath.pass.cpp +++ b/libcxx/test/std/numerics/c.math/cmath.pass.cpp @@ -8,7 +8,7 @@ // This test fails because Clang no longer enables -fdelayed-template-parsing // by default on Windows with C++20 (#69431). -// XFAIL: msvc && (clang-18 || clang-19) +// XFAIL: msvc && (clang-18 || clang-19 || clang-20) // <cmath> diff --git a/libcxx/utils/ci/Dockerfile b/libcxx/utils/ci/Dockerfile index 9e1865e..490bee4 100644 --- a/libcxx/utils/ci/Dockerfile +++ b/libcxx/utils/ci/Dockerfile @@ -106,6 +106,7 @@ RUN sudo apt-get update \ #RUN apt-get update && apt-get install -y ninja-build python3 python3-distutils python3-psutil git gdb ccache # TODO add ninja-build once 1.11 is available in Ubuntu, also remove the manual installation. RUN <<EOF + set -e wget -qO /tmp/ninja.gz https://github.com/ninja-build/ninja/releases/latest/download/ninja-linux.zip gunzip /tmp/ninja.gz chmod a+x /tmp/ninja @@ -115,6 +116,7 @@ EOF # These two locales are not enabled by default so generate them RUN <<EOF + set -e printf "fr_CA ISO-8859-1\ncs_CZ ISO-8859-2" | sudo tee -a /etc/locale.gen sudo mkdir /usr/local/share/i1en/ printf "fr_CA ISO-8859-1\ncs_CZ ISO-8859-2" | sudo tee -a /usr/local/share/i1en/SUPPORTED @@ -129,6 +131,7 @@ EOF # 14 release branch CI uses it. The tip-of-trunk CI will never use Clang 12, # though. RUN <<EOF + set -e sudo apt-get update wget https://apt.llvm.org/llvm.sh -O /tmp/llvm.sh chmod +x /tmp/llvm.sh @@ -142,6 +145,7 @@ EOF # Install the most recent GCC, like clang install the previous version as a transition. RUN <<EOF + set -e sudo git clone https://github.com/compiler-explorer/infra.git /tmp/ce-infra (cd /tmp/ce-infra && sudo make ce) sudo /tmp/ce-infra/bin/ce_install install compilers/c++/x86/gcc $GCC_LATEST_VERSION.1.0 @@ -155,13 +159,14 @@ EOF RUN <<EOF # Install a recent CMake + set -e wget https://github.com/Kitware/CMake/releases/download/v3.21.1/cmake-3.21.1-linux-x86_64.sh -O /tmp/install-cmake.sh sudo bash /tmp/install-cmake.sh --prefix=/usr --exclude-subdir --skip-license rm /tmp/install-cmake.sh EOF # ===----------------------------------------------------------------------===## -# Android Buildkite Image +# Android Builder Base Image # ===----------------------------------------------------------------------===## FROM ubuntu:jammy AS android-builder-base @@ -170,10 +175,11 @@ ARG ANDROID_CLANG_VERSION ARG ANDROID_CLANG_PREBUILTS_COMMIT ARG ANDROID_SYSROOT_BID -RUN apt-get update && apt-get install -y curl unzip git +RUN apt-get update && apt-get install -y curl bzip2 git unzip # Install the Android platform tools (e.g. adb) into /opt/android/sdk. RUN <<EOF + set -e mkdir -p /opt/android/sdk cd /opt/android/sdk curl -LO https://dl.google.com/android/repository/platform-tools-latest-linux.zip @@ -187,6 +193,7 @@ EOF ENV ANDROID_CLANG_VERSION=$ANDROID_CLANG_VERSION ENV ANDROID_CLANG_PREBUILTS_COMMIT=$ANDROID_CLANG_PREBUILTS_COMMIT RUN <<EOF + set -e git clone --filter=blob:none --sparse \ https://android.googlesource.com/platform/prebuilts/clang/host/linux-x86 \ /opt/android/clang @@ -206,6 +213,7 @@ EOF ENV ANDROID_SYSROOT_BID=$ANDROID_SYSROOT_BID RUN <<EOF + set -e cd /opt/android curl -L -o ndk_platform.tar.bz2 \ https://androidbuildinternal.googleapis.com/android/internal/build/v3/builds/${ANDROID_SYSROOT_BID}/ndk/attempts/latest/artifacts/ndk_platform.tar.bz2/url @@ -213,19 +221,6 @@ RUN <<EOF rm ndk_platform.tar.bz2 EOF -# Install Docker -RUN <<EOF - curl -fsSL https://get.docker.com -o /tmp/get-docker.sh - sh /tmp/get-docker.sh - rm /tmp/get-docker.sh - - # Install Docker. Mark the binary setuid so it can be run without prefixing it - # with sudo. Adding the container user to the docker group doesn't work because - # /var/run/docker.sock is owned by the host's docker GID, not the container's - # docker GID. - chmod u+s /usr/bin/docker -EOF - # ===----------------------------------------------------------------------===## # Buildkite Builder Image # ===----------------------------------------------------------------------===## @@ -243,6 +238,7 @@ WORKDIR /home/libcxx-builder # Install the Buildkite agent and dependencies. This must be done as non-root # for the Buildkite agent to be installed in a path where we can find it. RUN <<EOF + set -e cd /home/libcxx-builder curl -sL https://raw.githubusercontent.com/buildkite/agent/main/install.sh -o /tmp/install-agent.sh bash /tmp/install-agent.sh @@ -271,6 +267,22 @@ COPY ./vendor/android/container-setup.sh /opt/android/container-setup.sh ENV PATH="/opt/android/sdk/platform-tools:${PATH}" +USER root + +# Install Docker +RUN <<EOF + set -e + curl -fsSL https://get.docker.com -o /tmp/get-docker.sh + sh /tmp/get-docker.sh + rm /tmp/get-docker.sh + + # Install Docker. Mark the binary setuid so it can be run without prefixing it + # with sudo. Adding the container user to the docker group doesn't work because + # /var/run/docker.sock is owned by the host's docker GID, not the container's + # docker GID. + chmod u+s /usr/bin/docker +EOF + USER libcxx-builder WORKDIR /home/libcxx-builder diff --git a/libcxx/utils/ci/vendor/android/run-buildbot-container b/libcxx/utils/ci/vendor/android/run-buildbot-container index 4ab8319..7b5d9a4 100755 --- a/libcxx/utils/ci/vendor/android/run-buildbot-container +++ b/libcxx/utils/ci/vendor/android/run-buildbot-container @@ -27,5 +27,5 @@ if [ -S /var/run/docker.sock ]; then DOCKER_OPTIONS+=(--volume /var/run/docker.sock:/var/run/docker.sock) fi -docker run "${DOCKER_OPTIONS[@]}" libcxx-builder-android \ +docker run "${DOCKER_OPTIONS[@]}" ghcr.io/libcxx/android-buildkite-builder \ bash -c 'git config --global --add safe.directory /llvm; (/opt/android/container-setup.sh && exec bash)' diff --git a/llvm/include/llvm/SandboxIR/SandboxIR.h b/llvm/include/llvm/SandboxIR/SandboxIR.h index 0c67206..6c04c92 100644 --- a/llvm/include/llvm/SandboxIR/SandboxIR.h +++ b/llvm/include/llvm/SandboxIR/SandboxIR.h @@ -76,6 +76,7 @@ class Context; class Function; class Instruction; class SelectInst; +class BranchInst; class LoadInst; class ReturnInst; class StoreInst; @@ -179,6 +180,7 @@ protected: friend class User; // For getting `Val`. friend class Use; // For getting `Val`. friend class SelectInst; // For getting `Val`. + friend class BranchInst; // For getting `Val`. friend class LoadInst; // For getting `Val`. friend class StoreInst; // For getting `Val`. friend class ReturnInst; // For getting `Val`. @@ -343,6 +345,14 @@ protected: virtual unsigned getUseOperandNo(const Use &Use) const = 0; friend unsigned Use::getOperandNo() const; // For getUseOperandNo() + void swapOperandsInternal(unsigned OpIdxA, unsigned OpIdxB) { + assert(OpIdxA < getNumOperands() && "OpIdxA out of bounds!"); + assert(OpIdxB < getNumOperands() && "OpIdxB out of bounds!"); + auto UseA = getOperandUse(OpIdxA); + auto UseB = getOperandUse(OpIdxB); + UseA.swap(UseB); + } + #ifndef NDEBUG void verifyUserOfLLVMUse(const llvm::Use &Use) const; #endif // NDEBUG @@ -504,6 +514,7 @@ protected: /// returns its topmost LLVM IR instruction. llvm::Instruction *getTopmostLLVMInstruction() const; friend class SelectInst; // For getTopmostLLVMInstruction(). + friend class BranchInst; // For getTopmostLLVMInstruction(). friend class LoadInst; // For getTopmostLLVMInstruction(). friend class StoreInst; // For getTopmostLLVMInstruction(). friend class ReturnInst; // For getTopmostLLVMInstruction(). @@ -617,6 +628,100 @@ public: #endif }; +class BranchInst : public Instruction { + /// Use Context::createBranchInst(). Don't call the constructor directly. + BranchInst(llvm::BranchInst *BI, Context &Ctx) + : Instruction(ClassID::Br, Opcode::Br, BI, Ctx) {} + friend Context; // for BranchInst() + Use getOperandUseInternal(unsigned OpIdx, bool Verify) const final { + return getOperandUseDefault(OpIdx, Verify); + } + SmallVector<llvm::Instruction *, 1> getLLVMInstrs() const final { + return {cast<llvm::Instruction>(Val)}; + } + +public: + unsigned getUseOperandNo(const Use &Use) const final { + return getUseOperandNoDefault(Use); + } + unsigned getNumOfIRInstrs() const final { return 1u; } + static BranchInst *create(BasicBlock *IfTrue, Instruction *InsertBefore, + Context &Ctx); + static BranchInst *create(BasicBlock *IfTrue, BasicBlock *InsertAtEnd, + Context &Ctx); + static BranchInst *create(BasicBlock *IfTrue, BasicBlock *IfFalse, + Value *Cond, Instruction *InsertBefore, + Context &Ctx); + static BranchInst *create(BasicBlock *IfTrue, BasicBlock *IfFalse, + Value *Cond, BasicBlock *InsertAtEnd, Context &Ctx); + /// For isa/dyn_cast. + static bool classof(const Value *From); + bool isUnconditional() const { + return cast<llvm::BranchInst>(Val)->isUnconditional(); + } + bool isConditional() const { + return cast<llvm::BranchInst>(Val)->isConditional(); + } + Value *getCondition() const; + void setCondition(Value *V) { setOperand(0, V); } + unsigned getNumSuccessors() const { return 1 + isConditional(); } + BasicBlock *getSuccessor(unsigned SuccIdx) const; + void setSuccessor(unsigned Idx, BasicBlock *NewSucc); + void swapSuccessors() { swapOperandsInternal(1, 2); } + +private: + struct LLVMBBToSBBB { + Context &Ctx; + LLVMBBToSBBB(Context &Ctx) : Ctx(Ctx) {} + BasicBlock *operator()(llvm::BasicBlock *BB) const; + }; + + struct ConstLLVMBBToSBBB { + Context &Ctx; + ConstLLVMBBToSBBB(Context &Ctx) : Ctx(Ctx) {} + const BasicBlock *operator()(const llvm::BasicBlock *BB) const; + }; + +public: + using sb_succ_op_iterator = + mapped_iterator<llvm::BranchInst::succ_op_iterator, LLVMBBToSBBB>; + iterator_range<sb_succ_op_iterator> successors() { + iterator_range<llvm::BranchInst::succ_op_iterator> LLVMRange = + cast<llvm::BranchInst>(Val)->successors(); + LLVMBBToSBBB BBMap(Ctx); + sb_succ_op_iterator MappedBegin = map_iterator(LLVMRange.begin(), BBMap); + sb_succ_op_iterator MappedEnd = map_iterator(LLVMRange.end(), BBMap); + return make_range(MappedBegin, MappedEnd); + } + + using const_sb_succ_op_iterator = + mapped_iterator<llvm::BranchInst::const_succ_op_iterator, + ConstLLVMBBToSBBB>; + iterator_range<const_sb_succ_op_iterator> successors() const { + iterator_range<llvm::BranchInst::const_succ_op_iterator> ConstLLVMRange = + static_cast<const llvm::BranchInst *>(cast<llvm::BranchInst>(Val)) + ->successors(); + ConstLLVMBBToSBBB ConstBBMap(Ctx); + const_sb_succ_op_iterator ConstMappedBegin = + map_iterator(ConstLLVMRange.begin(), ConstBBMap); + const_sb_succ_op_iterator ConstMappedEnd = + map_iterator(ConstLLVMRange.end(), ConstBBMap); + return make_range(ConstMappedBegin, ConstMappedEnd); + } + +#ifndef NDEBUG + void verify() const final { + assert(isa<llvm::BranchInst>(Val) && "Expected BranchInst!"); + } + friend raw_ostream &operator<<(raw_ostream &OS, const BranchInst &BI) { + BI.dump(OS); + return OS; + } + void dump(raw_ostream &OS) const override; + LLVM_DUMP_METHOD void dump() const override; +#endif +}; + class LoadInst final : public Instruction { /// Use LoadInst::create() instead of calling the constructor. LoadInst(llvm::LoadInst *LI, Context &Ctx) @@ -870,6 +975,8 @@ protected: SelectInst *createSelectInst(llvm::SelectInst *SI); friend SelectInst; // For createSelectInst() + BranchInst *createBranchInst(llvm::BranchInst *I); + friend BranchInst; // For createBranchInst() LoadInst *createLoadInst(llvm::LoadInst *LI); friend LoadInst; // For createLoadInst() StoreInst *createStoreInst(llvm::StoreInst *SI); diff --git a/llvm/include/llvm/SandboxIR/SandboxIRValues.def b/llvm/include/llvm/SandboxIR/SandboxIRValues.def index efa9155..f3d6167 100644 --- a/llvm/include/llvm/SandboxIR/SandboxIRValues.def +++ b/llvm/include/llvm/SandboxIR/SandboxIRValues.def @@ -26,6 +26,7 @@ DEF_USER(Constant, Constant) // ClassID, Opcode(s), Class DEF_INSTR(Opaque, OP(Opaque), OpaqueInst) DEF_INSTR(Select, OP(Select), SelectInst) +DEF_INSTR(Br, OP(Br), BranchInst) DEF_INSTR(Load, OP(Load), LoadInst) DEF_INSTR(Store, OP(Store), StoreInst) DEF_INSTR(Ret, OP(Ret), ReturnInst) diff --git a/llvm/include/llvm/SandboxIR/Tracker.h b/llvm/include/llvm/SandboxIR/Tracker.h index b88eb3d..3daec3f 100644 --- a/llvm/include/llvm/SandboxIR/Tracker.h +++ b/llvm/include/llvm/SandboxIR/Tracker.h @@ -101,6 +101,27 @@ public: #endif }; +/// Tracks swapping a Use with another Use. +class UseSwap : public IRChangeBase { + Use ThisUse; + Use OtherUse; + +public: + UseSwap(const Use &ThisUse, const Use &OtherUse, Tracker &Tracker) + : IRChangeBase(Tracker), ThisUse(ThisUse), OtherUse(OtherUse) { + assert(ThisUse.getUser() == OtherUse.getUser() && "Expected same user!"); + } + void revert() final { ThisUse.swap(OtherUse); } + void accept() final {} +#ifndef NDEBUG + void dump(raw_ostream &OS) const final { + dumpCommon(OS); + OS << "UseSwap"; + } + LLVM_DUMP_METHOD void dump() const final; +#endif +}; + class EraseFromParent : public IRChangeBase { /// Contains all the data we need to restore an "erased" (i.e., detached) /// instruction: the instruction itself and its operands in order. diff --git a/llvm/include/llvm/SandboxIR/Use.h b/llvm/include/llvm/SandboxIR/Use.h index d77b456..03cbfe6 100644 --- a/llvm/include/llvm/SandboxIR/Use.h +++ b/llvm/include/llvm/SandboxIR/Use.h @@ -47,6 +47,7 @@ public: void set(Value *V); class User *getUser() const { return Usr; } unsigned getOperandNo() const; + void swap(Use &OtherUse); Context *getContext() const { return Ctx; } bool operator==(const Use &Other) const { assert(Ctx == Other.Ctx && "Contexts differ!"); diff --git a/llvm/lib/SandboxIR/SandboxIR.cpp b/llvm/lib/SandboxIR/SandboxIR.cpp index 51c9af8..ceadb34 100644 --- a/llvm/lib/SandboxIR/SandboxIR.cpp +++ b/llvm/lib/SandboxIR/SandboxIR.cpp @@ -20,6 +20,13 @@ void Use::set(Value *V) { LLVMUse->set(V->Val); } unsigned Use::getOperandNo() const { return Usr->getUseOperandNo(*this); } +void Use::swap(Use &OtherUse) { + auto &Tracker = Ctx->getTracker(); + if (Tracker.isTracking()) + Tracker.track(std::make_unique<UseSwap>(*this, OtherUse, Tracker)); + LLVMUse->swap(*OtherUse.LLVMUse); +} + #ifndef NDEBUG void Use::dump(raw_ostream &OS) const { Value *Def = nullptr; @@ -500,6 +507,85 @@ void SelectInst::dump() const { } #endif // NDEBUG +BranchInst *BranchInst::create(BasicBlock *IfTrue, Instruction *InsertBefore, + Context &Ctx) { + auto &Builder = Ctx.getLLVMIRBuilder(); + Builder.SetInsertPoint(cast<llvm::Instruction>(InsertBefore->Val)); + llvm::BranchInst *NewBr = + Builder.CreateBr(cast<llvm::BasicBlock>(IfTrue->Val)); + return Ctx.createBranchInst(NewBr); +} + +BranchInst *BranchInst::create(BasicBlock *IfTrue, BasicBlock *InsertAtEnd, + Context &Ctx) { + auto &Builder = Ctx.getLLVMIRBuilder(); + Builder.SetInsertPoint(cast<llvm::BasicBlock>(InsertAtEnd->Val)); + llvm::BranchInst *NewBr = + Builder.CreateBr(cast<llvm::BasicBlock>(IfTrue->Val)); + return Ctx.createBranchInst(NewBr); +} + +BranchInst *BranchInst::create(BasicBlock *IfTrue, BasicBlock *IfFalse, + Value *Cond, Instruction *InsertBefore, + Context &Ctx) { + auto &Builder = Ctx.getLLVMIRBuilder(); + Builder.SetInsertPoint(cast<llvm::Instruction>(InsertBefore->Val)); + llvm::BranchInst *NewBr = + Builder.CreateCondBr(Cond->Val, cast<llvm::BasicBlock>(IfTrue->Val), + cast<llvm::BasicBlock>(IfFalse->Val)); + return Ctx.createBranchInst(NewBr); +} + +BranchInst *BranchInst::create(BasicBlock *IfTrue, BasicBlock *IfFalse, + Value *Cond, BasicBlock *InsertAtEnd, + Context &Ctx) { + auto &Builder = Ctx.getLLVMIRBuilder(); + Builder.SetInsertPoint(cast<llvm::BasicBlock>(InsertAtEnd->Val)); + llvm::BranchInst *NewBr = + Builder.CreateCondBr(Cond->Val, cast<llvm::BasicBlock>(IfTrue->Val), + cast<llvm::BasicBlock>(IfFalse->Val)); + return Ctx.createBranchInst(NewBr); +} + +bool BranchInst::classof(const Value *From) { + return From->getSubclassID() == ClassID::Br; +} + +Value *BranchInst::getCondition() const { + assert(isConditional() && "Cannot get condition of an uncond branch!"); + return Ctx.getValue(cast<llvm::BranchInst>(Val)->getCondition()); +} + +BasicBlock *BranchInst::getSuccessor(unsigned SuccIdx) const { + assert(SuccIdx < getNumSuccessors() && + "Successor # out of range for Branch!"); + return cast_or_null<BasicBlock>( + Ctx.getValue(cast<llvm::BranchInst>(Val)->getSuccessor(SuccIdx))); +} + +void BranchInst::setSuccessor(unsigned Idx, BasicBlock *NewSucc) { + assert((Idx == 0 || Idx == 1) && "Out of bounds!"); + setOperand(2u - Idx, NewSucc); +} + +BasicBlock *BranchInst::LLVMBBToSBBB::operator()(llvm::BasicBlock *BB) const { + return cast<BasicBlock>(Ctx.getValue(BB)); +} +const BasicBlock * +BranchInst::ConstLLVMBBToSBBB::operator()(const llvm::BasicBlock *BB) const { + return cast<BasicBlock>(Ctx.getValue(BB)); +} +#ifndef NDEBUG +void BranchInst::dump(raw_ostream &OS) const { + dumpCommonPrefix(OS); + dumpCommonSuffix(OS); +} +void BranchInst::dump() const { + dump(dbgs()); + dbgs() << "\n"; +} +#endif // NDEBUG + LoadInst *LoadInst::create(Type *Ty, Value *Ptr, MaybeAlign Align, Instruction *InsertBefore, Context &Ctx, const Twine &Name) { @@ -758,6 +844,11 @@ Value *Context::getOrCreateValueInternal(llvm::Value *LLVMV, llvm::User *U) { It->second = std::unique_ptr<SelectInst>(new SelectInst(LLVMSel, *this)); return It->second.get(); } + case llvm::Instruction::Br: { + auto *LLVMBr = cast<llvm::BranchInst>(LLVMV); + It->second = std::unique_ptr<BranchInst>(new BranchInst(LLVMBr, *this)); + return It->second.get(); + } case llvm::Instruction::Load: { auto *LLVMLd = cast<llvm::LoadInst>(LLVMV); It->second = std::unique_ptr<LoadInst>(new LoadInst(LLVMLd, *this)); @@ -796,6 +887,11 @@ SelectInst *Context::createSelectInst(llvm::SelectInst *SI) { return cast<SelectInst>(registerValue(std::move(NewPtr))); } +BranchInst *Context::createBranchInst(llvm::BranchInst *BI) { + auto NewPtr = std::unique_ptr<BranchInst>(new BranchInst(BI, *this)); + return cast<BranchInst>(registerValue(std::move(NewPtr))); +} + LoadInst *Context::createLoadInst(llvm::LoadInst *LI) { auto NewPtr = std::unique_ptr<LoadInst>(new LoadInst(LI, *this)); return cast<LoadInst>(registerValue(std::move(NewPtr))); diff --git a/llvm/lib/SandboxIR/Tracker.cpp b/llvm/lib/SandboxIR/Tracker.cpp index 626c9c2..c741776 100644 --- a/llvm/lib/SandboxIR/Tracker.cpp +++ b/llvm/lib/SandboxIR/Tracker.cpp @@ -35,6 +35,11 @@ void UseSet::dump() const { dump(dbgs()); dbgs() << "\n"; } + +void UseSwap::dump() const { + dump(dbgs()); + dbgs() << "\n"; +} #endif // NDEBUG Tracker::~Tracker() { diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td index 3d37eb2..bb36ce7 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td @@ -78,7 +78,7 @@ def HasSignExt : def HasSIMD128 : Predicate<"Subtarget->hasSIMD128()">, - AssemblerPredicate<(all_of FeatureSIMD128), "simd128">; + AssemblerPredicate<(any_of FeatureSIMD128, FeatureRelaxedSIMD), "simd128">; def HasTailCall : Predicate<"Subtarget->hasTailCall()">, diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp index 66bd786..64da3dfd 100644 --- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp +++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp @@ -242,9 +242,16 @@ public: // recursion. bool Recursive = false; - // The corresponding allocation or interior call. + // The corresponding allocation or interior call. This is the primary call + // for which we have created this node. CallInfo Call; + // List of other calls that can be treated the same as the primary call + // through cloning. I.e. located in the same function and have the same + // (possibly pruned) stack ids. They will be updated the same way as the + // primary call when assigning to function clones. + std::vector<CallInfo> MatchingCalls; + // For alloc nodes this is a unique id assigned when constructed, and for // callsite stack nodes it is the original stack id when the node is // constructed from the memprof MIB metadata on the alloc nodes. Note that @@ -457,6 +464,9 @@ protected: /// iteration. MapVector<FuncTy *, std::vector<CallInfo>> FuncToCallsWithMetadata; + /// Records the function each call is located in. + DenseMap<CallInfo, const FuncTy *> CallToFunc; + /// Map from callsite node to the enclosing caller function. std::map<const ContextNode *, const FuncTy *> NodeToCallingFunc; @@ -474,7 +484,8 @@ private: /// StackIdToMatchingCalls map. void assignStackNodesPostOrder( ContextNode *Node, DenseSet<const ContextNode *> &Visited, - DenseMap<uint64_t, std::vector<CallContextInfo>> &StackIdToMatchingCalls); + DenseMap<uint64_t, std::vector<CallContextInfo>> &StackIdToMatchingCalls, + DenseMap<CallInfo, CallInfo> &CallToMatchingCall); /// Duplicates the given set of context ids, updating the provided /// map from each original id with the newly generated context ids, @@ -1230,10 +1241,11 @@ static void checkNode(const ContextNode<DerivedCCG, FuncTy, CallTy> *Node, template <typename DerivedCCG, typename FuncTy, typename CallTy> void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>:: - assignStackNodesPostOrder(ContextNode *Node, - DenseSet<const ContextNode *> &Visited, - DenseMap<uint64_t, std::vector<CallContextInfo>> - &StackIdToMatchingCalls) { + assignStackNodesPostOrder( + ContextNode *Node, DenseSet<const ContextNode *> &Visited, + DenseMap<uint64_t, std::vector<CallContextInfo>> + &StackIdToMatchingCalls, + DenseMap<CallInfo, CallInfo> &CallToMatchingCall) { auto Inserted = Visited.insert(Node); if (!Inserted.second) return; @@ -1246,7 +1258,8 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>:: // Skip any that have been removed during the recursion. if (!Edge) continue; - assignStackNodesPostOrder(Edge->Caller, Visited, StackIdToMatchingCalls); + assignStackNodesPostOrder(Edge->Caller, Visited, StackIdToMatchingCalls, + CallToMatchingCall); } // If this node's stack id is in the map, update the graph to contain new @@ -1289,8 +1302,19 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>:: auto &[Call, Ids, Func, SavedContextIds] = Calls[I]; // Skip any for which we didn't assign any ids, these don't get a node in // the graph. - if (SavedContextIds.empty()) + if (SavedContextIds.empty()) { + // If this call has a matching call (located in the same function and + // having the same stack ids), simply add it to the context node created + // for its matching call earlier. These can be treated the same through + // cloning and get updated at the same time. + if (!CallToMatchingCall.contains(Call)) + continue; + auto MatchingCall = CallToMatchingCall[Call]; + assert(NonAllocationCallToContextNodeMap.contains(MatchingCall)); + NonAllocationCallToContextNodeMap[MatchingCall]->MatchingCalls.push_back( + Call); continue; + } assert(LastId == Ids.back()); @@ -1422,6 +1446,10 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() { // there is more than one call with the same stack ids. Their (possibly newly // duplicated) context ids are saved in the StackIdToMatchingCalls map. DenseMap<uint32_t, DenseSet<uint32_t>> OldToNewContextIds; + // Save a map from each call to any that are found to match it. I.e. located + // in the same function and have the same (possibly pruned) stack ids. We use + // this to avoid creating extra graph nodes as they can be treated the same. + DenseMap<CallInfo, CallInfo> CallToMatchingCall; for (auto &It : StackIdToMatchingCalls) { auto &Calls = It.getSecond(); // Skip single calls with a single stack id. These don't need a new node. @@ -1460,6 +1488,13 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() { DenseSet<uint32_t> LastNodeContextIds = LastNode->getContextIds(); assert(!LastNodeContextIds.empty()); + // Map from function to the first call from the below list (with matching + // stack ids) found in that function. Note that calls from different + // functions can have the same stack ids because this is the list of stack + // ids that had (possibly pruned) nodes after building the graph from the + // allocation MIBs. + DenseMap<const FuncTy *, CallInfo> FuncToCallMap; + for (unsigned I = 0; I < Calls.size(); I++) { auto &[Call, Ids, Func, SavedContextIds] = Calls[I]; assert(SavedContextIds.empty()); @@ -1533,6 +1568,18 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() { continue; } + const FuncTy *CallFunc = CallToFunc[Call]; + + // If the prior call had the same stack ids this map would not be empty. + // Check if we already have a call that "matches" because it is located + // in the same function. + if (FuncToCallMap.contains(CallFunc)) { + // Record the matching call found for this call, and skip it. We + // will subsequently combine it into the same node. + CallToMatchingCall[Call] = FuncToCallMap[CallFunc]; + continue; + } + // Check if the next set of stack ids is the same (since the Calls vector // of tuples is sorted by the stack ids we can just look at the next one). bool DuplicateContextIds = false; @@ -1562,7 +1609,14 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() { set_subtract(LastNodeContextIds, StackSequenceContextIds); if (LastNodeContextIds.empty()) break; - } + // No longer possibly in a sequence of calls with duplicate stack ids, + // clear the map. + FuncToCallMap.clear(); + } else + // Record the call with its function, so we can locate it the next time + // we find a call from this function when processing the calls with the + // same stack ids. + FuncToCallMap[CallFunc] = Call; } } @@ -1579,7 +1633,8 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() { // associated context ids over to the new nodes. DenseSet<const ContextNode *> Visited; for (auto &Entry : AllocationCallToContextNodeMap) - assignStackNodesPostOrder(Entry.second, Visited, StackIdToMatchingCalls); + assignStackNodesPostOrder(Entry.second, Visited, StackIdToMatchingCalls, + CallToMatchingCall); if (VerifyCCG) check(); } @@ -1679,6 +1734,7 @@ ModuleCallsiteContextGraph::ModuleCallsiteContextGraph( continue; if (auto *MemProfMD = I.getMetadata(LLVMContext::MD_memprof)) { CallsWithMetadata.push_back(&I); + CallToFunc[&I] = &F; auto *AllocNode = addAllocNode(&I, &F); auto *CallsiteMD = I.getMetadata(LLVMContext::MD_callsite); assert(CallsiteMD); @@ -1700,8 +1756,10 @@ ModuleCallsiteContextGraph::ModuleCallsiteContextGraph( I.setMetadata(LLVMContext::MD_callsite, nullptr); } // For callsite metadata, add to list for this function for later use. - else if (I.getMetadata(LLVMContext::MD_callsite)) + else if (I.getMetadata(LLVMContext::MD_callsite)) { CallsWithMetadata.push_back(&I); + CallToFunc[&I] = &F; + } } } if (!CallsWithMetadata.empty()) @@ -1756,8 +1814,10 @@ IndexCallsiteContextGraph::IndexCallsiteContextGraph( // correlate properly in applyImport in the backends. if (AN.MIBs.empty()) continue; - CallsWithMetadata.push_back({&AN}); - auto *AllocNode = addAllocNode({&AN}, FS); + IndexCall AllocCall(&AN); + CallsWithMetadata.push_back(AllocCall); + CallToFunc[AllocCall] = FS; + auto *AllocNode = addAllocNode(AllocCall, FS); // Pass an empty CallStack to the CallsiteContext (second) // parameter, since for ThinLTO we already collapsed out the inlined // stack ids on the allocation call during ModuleSummaryAnalysis. @@ -1788,8 +1848,11 @@ IndexCallsiteContextGraph::IndexCallsiteContextGraph( } // For callsite metadata, add to list for this function for later use. if (!FS->callsites().empty()) - for (auto &SN : FS->mutableCallsites()) - CallsWithMetadata.push_back({&SN}); + for (auto &SN : FS->mutableCallsites()) { + IndexCall StackNodeCall(&SN); + CallsWithMetadata.push_back(StackNodeCall); + CallToFunc[StackNodeCall] = FS; + } if (!CallsWithMetadata.empty()) FuncToCallsWithMetadata[FS] = CallsWithMetadata; @@ -2225,6 +2288,14 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::print( if (Recursive) OS << " (recursive)"; OS << "\n"; + if (!MatchingCalls.empty()) { + OS << "\tMatchingCalls:\n"; + for (auto &MatchingCall : MatchingCalls) { + OS << "\t"; + MatchingCall.print(OS); + OS << "\n"; + } + } OS << "\tAllocTypes: " << getAllocTypeString(AllocTypes) << "\n"; OS << "\tContextIds:"; // Make a copy of the computed context ids that we can sort for stability. @@ -2478,6 +2549,7 @@ CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::moveEdgeToNewCalleeClone( std::make_unique<ContextNode>(Node->IsAllocation, Node->Call)); ContextNode *Clone = NodeOwner.back().get(); Node->addClone(Clone); + Clone->MatchingCalls = Node->MatchingCalls; assert(NodeToCallingFunc.count(Node)); NodeToCallingFunc[Clone] = NodeToCallingFunc[Node]; moveEdgeToExistingCalleeClone(Edge, Clone, CallerEdgeI, /*NewClone=*/true, @@ -3021,6 +3093,14 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() { if (CallMap.count(Call)) CallClone = CallMap[Call]; CallsiteClone->setCall(CallClone); + // Need to do the same for all matching calls. + for (auto &MatchingCall : Node->MatchingCalls) { + CallInfo CallClone(MatchingCall); + if (CallMap.count(MatchingCall)) + CallClone = CallMap[MatchingCall]; + // Updates the call in the list. + MatchingCall = CallClone; + } }; // Keep track of the clones of callsite Node that need to be assigned to @@ -3187,6 +3267,16 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() { CallInfo NewCall(CallMap[OrigCall]); assert(NewCall); NewClone->setCall(NewCall); + // Need to do the same for all matching calls. + for (auto &MatchingCall : NewClone->MatchingCalls) { + CallInfo OrigMatchingCall(MatchingCall); + OrigMatchingCall.setCloneNo(0); + assert(CallMap.count(OrigMatchingCall)); + CallInfo NewCall(CallMap[OrigMatchingCall]); + assert(NewCall); + // Updates the call in the list. + MatchingCall = NewCall; + } } } // Fall through to handling below to perform the recording of the @@ -3373,6 +3463,7 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() { if (Node->IsAllocation) { updateAllocationCall(Node->Call, allocTypeToUse(Node->AllocTypes)); + assert(Node->MatchingCalls.empty()); return; } @@ -3381,6 +3472,9 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() { auto CalleeFunc = CallsiteToCalleeFuncCloneMap[Node]; updateCall(Node->Call, CalleeFunc); + // Update all the matching calls as well. + for (auto &Call : Node->MatchingCalls) + updateCall(Call, CalleeFunc); }; // Performs DFS traversal starting from allocation nodes to update calls to diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 1b787d0..2d6d67a 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -283,12 +283,12 @@ static Instruction *getInstructionForCost(const VPRecipeBase *R) { } InstructionCost VPRecipeBase::cost(ElementCount VF, VPCostContext &Ctx) { - if (auto *UI = getInstructionForCost(this)) - if (Ctx.skipCostComputation(UI, VF.isVector())) - return 0; + auto *UI = getInstructionForCost(this); + if (UI && Ctx.skipCostComputation(UI, VF.isVector())) + return 0; InstructionCost RecipeCost = computeCost(VF, Ctx); - if (ForceTargetInstructionCost.getNumOccurrences() > 0 && + if (UI && ForceTargetInstructionCost.getNumOccurrences() > 0 && RecipeCost.isValid()) RecipeCost = InstructionCost(ForceTargetInstructionCost); diff --git a/llvm/test/CodeGen/WebAssembly/simd-asm-pred.ll b/llvm/test/CodeGen/WebAssembly/simd-asm-pred.ll new file mode 100644 index 0000000..f022c3e --- /dev/null +++ b/llvm/test/CodeGen/WebAssembly/simd-asm-pred.ll @@ -0,0 +1,24 @@ +; RUN: llc < %s -verify-machineinstrs -mattr=+relaxed-simd | FileCheck %s + +; Test that setting "relaxed-simd" target feature set also implies 'simd128' in +; AssemblerPredicate, which is used to verify instructions in AsmPrinter. + +target triple = "wasm32-unknown-unknown" + +declare <2 x i64> @llvm.wasm.relaxed.laneselect.v2i64(<2 x i64>, <2 x i64>, <2 x i64>) + +; The compiled result of this function uses LOCAL_GET_V128, which is predicated +; on the 'simd128' feature. We should be able to compile this when only +; 'relaxed-simd' is set, which implies 'simd128'. +define <2 x i64> @test(<2 x i64>, <2 x i64>, <2 x i64>) #0 { +; CHECK-LABEL: test: +; CHECK: .functype test (v128, v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 2 +; CHECK-NEXT: i64x2.relaxed_laneselect +start: + %_4 = tail call <2 x i64> @llvm.wasm.relaxed.laneselect.v2i64(<2 x i64> %0, <2 x i64> %1, <2 x i64> %2) #3 + ret <2 x i64> %_4 +} diff --git a/llvm/test/Instrumentation/MemorySanitizer/AArch64/neon_vst_float.ll b/llvm/test/Instrumentation/MemorySanitizer/AArch64/neon_vst_float.ll new file mode 100644 index 0000000..54b5705 --- /dev/null +++ b/llvm/test/Instrumentation/MemorySanitizer/AArch64/neon_vst_float.ll @@ -0,0 +1,307 @@ +; No assertions yet because the test case crashes MSan +; +; Test memory sanitizer instrumentation for Arm NEON VST_{2,3,4} and +; VST_1x{2,3,4} instructions, including floating-point parameters. +; +; RUN: opt < %s -passes=msan -S | FileCheck %s +; +; UNSUPPORTED: {{.*}} +; +; Generated with: +; grep call clang/test/CodeGen/aarch64-neon-intrinsics.c \ +; | grep 'neon[.]st' \ +; | sed -r 's/^\/\/ CHECK:[ ]*//' \ +; | cut -d ' ' -f 1 --complement \ +; | sed -r 's/[[][[]TMP[0-9]+[]][]]/%A/' \ +; | sed -r 's/[[][[]TMP[0-9]+[]][]]/%B/' \ +; | sed -r 's/[[][[]TMP[0-9]+[]][]]/%C/' \ +; | sed -r 's/[[][[]TMP[0-9]+[]][]]/%D/' \ +; | sort \ +; | uniq \ +; | while read x; \ +; do \ +; y=`echo "$x" \ +; | sed -r 's/@llvm[.]aarch64[.]neon[.]/@/' \ +; | sed -r 's/[.]p0//' \ +; | tr '.' '_'`; \ +; echo "define $y sanitize_memory {"; \ +; echo " call $x"; \ +; echo " ret void"; \ +; echo "}"; \ +; echo; \ +; done + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64--linux-android9001" + +; ----------------------------------------------------------------------------------------------------------------------------------------------- + +define void @st1x2_v1f64(<1 x double> %A, <1 x double> %B, ptr %a) sanitize_memory { + call void @llvm.aarch64.neon.st1x2.v1f64.p0(<1 x double> %A, <1 x double> %B, ptr %a) + ret void +} + +define void @st1x2_v1i64(<1 x i64> %A, <1 x i64> %B, ptr %a) sanitize_memory { + call void @llvm.aarch64.neon.st1x2.v1i64.p0(<1 x i64> %A, <1 x i64> %B, ptr %a) + ret void +} + +define void @st1x2_v2f64(<2 x double> %A, <2 x double> %B, ptr %a) sanitize_memory { + call void @llvm.aarch64.neon.st1x2.v2f64.p0(<2 x double> %A, <2 x double> %B, ptr %a) + ret void +} + +define void @st1x2_v2i64(<2 x i64> %A, <2 x i64> %B, ptr %a) sanitize_memory { + call void @llvm.aarch64.neon.st1x2.v2i64.p0(<2 x i64> %A, <2 x i64> %B, ptr %a) + ret void +} + +define void @st1x3_v1f64(<1 x double> %A, <1 x double> %B, <1 x double> %C, ptr %a) sanitize_memory { + call void @llvm.aarch64.neon.st1x3.v1f64.p0(<1 x double> %A, <1 x double> %B, <1 x double> %C, ptr %a) + ret void +} + +define void @st1x3_v1i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, ptr %a) sanitize_memory { + call void @llvm.aarch64.neon.st1x3.v1i64.p0(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, ptr %a) + ret void +} + +define void @st1x3_v2f64(<2 x double> %A, <2 x double> %B, <2 x double> %C, ptr %a) sanitize_memory { + call void @llvm.aarch64.neon.st1x3.v2f64.p0(<2 x double> %A, <2 x double> %B, <2 x double> %C, ptr %a) + ret void +} + +define void @st1x3_v2i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, ptr %a) sanitize_memory { + call void @llvm.aarch64.neon.st1x3.v2i64.p0(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, ptr %a) + ret void +} + +define void @st1x4_v1f64(<1 x double> %A, <1 x double> %B, <1 x double> %C, <1 x double> %D, ptr %a) sanitize_memory { + call void @llvm.aarch64.neon.st1x4.v1f64.p0(<1 x double> %A, <1 x double> %B, <1 x double> %C, <1 x double> %D, ptr %a) + ret void +} + +define void @st1x4_v1i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, ptr %a) sanitize_memory { + call void @llvm.aarch64.neon.st1x4.v1i64.p0(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, ptr %a) + ret void +} + +define void @st1x4_v2f64(<2 x double> %A, <2 x double> %B, <2 x double> %C, <2 x double> %D, ptr %a) sanitize_memory { + call void @llvm.aarch64.neon.st1x4.v2f64.p0(<2 x double> %A, <2 x double> %B, <2 x double> %C, <2 x double> %D, ptr %a) + ret void +} + +define void @st1x4_v2i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, ptr %a) sanitize_memory { + call void @llvm.aarch64.neon.st1x4.v2i64.p0(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, ptr %a) + ret void +} + +define void @st2_v16i8(<16 x i8> %A, <16 x i8> %B, ptr %a) sanitize_memory { + call void @llvm.aarch64.neon.st2.v16i8.p0(<16 x i8> %A, <16 x i8> %B, ptr %a) + ret void +} + +define void @st2_v1f64(<1 x double> %A, <1 x double> %B, ptr %a) sanitize_memory { + call void @llvm.aarch64.neon.st2.v1f64.p0(<1 x double> %A, <1 x double> %B, ptr %a) + ret void +} + +define void @st2_v1i64(<1 x i64> %A, <1 x i64> %B, ptr %a) sanitize_memory { + call void @llvm.aarch64.neon.st2.v1i64.p0(<1 x i64> %A, <1 x i64> %B, ptr %a) + ret void +} + +define void @st2_v2f32(<2 x float> %A, <2 x float> %B, ptr %a) sanitize_memory { + call void @llvm.aarch64.neon.st2.v2f32.p0(<2 x float> %A, <2 x float> %B, ptr %a) + ret void +} + +define void @st2_v2f64(<2 x double> %A, <2 x double> %B, ptr %a) sanitize_memory { + call void @llvm.aarch64.neon.st2.v2f64.p0(<2 x double> %A, <2 x double> %B, ptr %a) + ret void +} + +define void @st2_v2i32(<2 x i32> %A, <2 x i32> %B, ptr %a) sanitize_memory { + call void @llvm.aarch64.neon.st2.v2i32.p0(<2 x i32> %A, <2 x i32> %B, ptr %a) + ret void +} + +define void @st2_v2i64(<2 x i64> %A, <2 x i64> %B, ptr %a) sanitize_memory { + call void @llvm.aarch64.neon.st2.v2i64.p0(<2 x i64> %A, <2 x i64> %B, ptr %a) + ret void +} + +define void @st2_v4f16(<4 x half> %A, <4 x half> %B, ptr %a) sanitize_memory { + call void @llvm.aarch64.neon.st2.v4f16.p0(<4 x half> %A, <4 x half> %B, ptr %a) + ret void +} + +define void @st2_v4f32(<4 x float> %A, <4 x float> %B, ptr %a) sanitize_memory { + call void @llvm.aarch64.neon.st2.v4f32.p0(<4 x float> %A, <4 x float> %B, ptr %a) + ret void +} + +define void @st2_v4i16(<4 x i16> %A, <4 x i16> %B, ptr %a) sanitize_memory { + call void @llvm.aarch64.neon.st2.v4i16.p0(<4 x i16> %A, <4 x i16> %B, ptr %a) + ret void +} + +define void @st2_v4i32(<4 x i32> %A, <4 x i32> %B, ptr %a) sanitize_memory { + call void @llvm.aarch64.neon.st2.v4i32.p0(<4 x i32> %A, <4 x i32> %B, ptr %a) + ret void +} + +define void @st2_v8f16(<8 x half> %A, <8 x half> %B, ptr %a) sanitize_memory { + call void @llvm.aarch64.neon.st2.v8f16.p0(<8 x half> %A, <8 x half> %B, ptr %a) + ret void +} + +define void @st2_v8i16(<8 x i16> %A, <8 x i16> %B, ptr %a) sanitize_memory { + call void @llvm.aarch64.neon.st2.v8i16.p0(<8 x i16> %A, <8 x i16> %B, ptr %a) + ret void +} + +define void @st2_v8i8(<8 x i8> %A, <8 x i8> %B, ptr %a) sanitize_memory { + call void @llvm.aarch64.neon.st2.v8i8.p0(<8 x i8> %A, <8 x i8> %B, ptr %a) + ret void +} + +define void @st3_v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, ptr %a) sanitize_memory { + call void @llvm.aarch64.neon.st3.v16i8.p0(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, ptr %a) + ret void +} + +define void @st3_v1f64(<1 x double> %A, <1 x double> %B, <1 x double> %C, ptr %a) sanitize_memory { + call void @llvm.aarch64.neon.st3.v1f64.p0(<1 x double> %A, <1 x double> %B, <1 x double> %C, ptr %a) + ret void +} + +define void @st3_v1i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, ptr %a) sanitize_memory { + call void @llvm.aarch64.neon.st3.v1i64.p0(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, ptr %a) + ret void +} + +define void @st3_v2f32(<2 x float> %A, <2 x float> %B, <2 x float> %C, ptr %a) sanitize_memory { + call void @llvm.aarch64.neon.st3.v2f32.p0(<2 x float> %A, <2 x float> %B, <2 x float> %C, ptr %a) + ret void +} + +define void @st3_v2f64(<2 x double> %A, <2 x double> %B, <2 x double> %C, ptr %a) sanitize_memory { + call void @llvm.aarch64.neon.st3.v2f64.p0(<2 x double> %A, <2 x double> %B, <2 x double> %C, ptr %a) + ret void +} + +define void @st3_v2i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, ptr %a) sanitize_memory { + call void @llvm.aarch64.neon.st3.v2i32.p0(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, ptr %a) + ret void +} + +define void @st3_v2i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, ptr %a) sanitize_memory { + call void @llvm.aarch64.neon.st3.v2i64.p0(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, ptr %a) + ret void +} + +define void @st3_v4f16(<4 x half> %A, <4 x half> %B, <4 x half> %C, ptr %a) sanitize_memory { + call void @llvm.aarch64.neon.st3.v4f16.p0(<4 x half> %A, <4 x half> %B, <4 x half> %C, ptr %a) + ret void +} + +define void @st3_v4f32(<4 x float> %A, <4 x float> %B, <4 x float> %C, ptr %a) sanitize_memory { + call void @llvm.aarch64.neon.st3.v4f32.p0(<4 x float> %A, <4 x float> %B, <4 x float> %C, ptr %a) + ret void +} + +define void @st3_v4i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, ptr %a) sanitize_memory { + call void @llvm.aarch64.neon.st3.v4i16.p0(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, ptr %a) + ret void +} + +define void @st3_v4i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, ptr %a) sanitize_memory { + call void @llvm.aarch64.neon.st3.v4i32.p0(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, ptr %a) + ret void +} + +define void @st3_v8f16(<8 x half> %A, <8 x half> %B, <8 x half> %C, ptr %a) sanitize_memory { + call void @llvm.aarch64.neon.st3.v8f16.p0(<8 x half> %A, <8 x half> %B, <8 x half> %C, ptr %a) + ret void +} + +define void @st3_v8i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, ptr %a) sanitize_memory { + call void @llvm.aarch64.neon.st3.v8i16.p0(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, ptr %a) + ret void +} + +define void @st3_v8i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, ptr %a) sanitize_memory { + call void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, ptr %a) + ret void +} + +define void @st4_v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, ptr %a) sanitize_memory { + call void @llvm.aarch64.neon.st4.v16i8.p0(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, ptr %a) + ret void +} + +define void @st4_v1f64(<1 x double> %A, <1 x double> %B, <1 x double> %C, <1 x double> %D, ptr %a) sanitize_memory { + call void @llvm.aarch64.neon.st4.v1f64.p0(<1 x double> %A, <1 x double> %B, <1 x double> %C, <1 x double> %D, ptr %a) + ret void +} + +define void @st4_v1i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, ptr %a) sanitize_memory { + call void @llvm.aarch64.neon.st4.v1i64.p0(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, ptr %a) + ret void +} + +define void @st4_v2f32(<2 x float> %A, <2 x float> %B, <2 x float> %C, <2 x float> %D, ptr %a) sanitize_memory { + call void @llvm.aarch64.neon.st4.v2f32.p0(<2 x float> %A, <2 x float> %B, <2 x float> %C, <2 x float> %D, ptr %a) + ret void +} + +define void @st4_v2f64(<2 x double> %A, <2 x double> %B, <2 x double> %C, <2 x double> %D, ptr %a) sanitize_memory { + call void @llvm.aarch64.neon.st4.v2f64.p0(<2 x double> %A, <2 x double> %B, <2 x double> %C, <2 x double> %D, ptr %a) + ret void +} + +define void @st4_v2i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, ptr %a) sanitize_memory { + call void @llvm.aarch64.neon.st4.v2i32.p0(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, ptr %a) + ret void +} + +define void @st4_v2i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, ptr %a) sanitize_memory { + call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, ptr %a) + ret void +} + +define void @st4_v4f16(<4 x half> %A, <4 x half> %B, <4 x half> %C, <4 x half> %D, ptr %a) sanitize_memory { + call void @llvm.aarch64.neon.st4.v4f16.p0(<4 x half> %A, <4 x half> %B, <4 x half> %C, <4 x half> %D, ptr %a) + ret void +} + +define void @st4_v4f32(<4 x float> %A, <4 x float> %B, <4 x float> %C, <4 x float> %D, ptr %a) sanitize_memory { + call void @llvm.aarch64.neon.st4.v4f32.p0(<4 x float> %A, <4 x float> %B, <4 x float> %C, <4 x float> %D, ptr %a) + ret void +} + +define void @st4_v4i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, ptr %a) sanitize_memory { + call void @llvm.aarch64.neon.st4.v4i16.p0(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, ptr %a) + ret void +} + +define void @st4_v4i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, ptr %a) sanitize_memory { + call void @llvm.aarch64.neon.st4.v4i32.p0(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, ptr %a) + ret void +} + +define void @st4_v8f16(<8 x half> %A, <8 x half> %B, <8 x half> %C, <8 x half> %D, ptr %a) sanitize_memory { + call void @llvm.aarch64.neon.st4.v8f16.p0(<8 x half> %A, <8 x half> %B, <8 x half> %C, <8 x half> %D, ptr %a) + ret void +} + +define void @st4_v8i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, ptr %a) sanitize_memory { + call void @llvm.aarch64.neon.st4.v8i16.p0(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, ptr %a) + ret void +} + +define void @st4_v8i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, ptr %a) sanitize_memory { + call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, ptr %a) + ret void +} diff --git a/llvm/test/Transforms/LoopVectorize/SystemZ/force-target-instruction-cost.ll b/llvm/test/Transforms/LoopVectorize/SystemZ/force-target-instruction-cost.ll new file mode 100644 index 0000000..3477c8d --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/SystemZ/force-target-instruction-cost.ll @@ -0,0 +1,73 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -p loop-vectorize -mcpu=z16 -force-target-instruction-cost=1 -S %s | FileCheck %s + +target triple = "systemz-unknown-linux-unknown" + +define void @test_scalar_steps_target_instruction_cost(ptr %dst) { +; CHECK-LABEL: define void @test_scalar_steps_target_instruction_cost( +; CHECK-SAME: ptr [[DST:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE2:.*]] ] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[INDEX]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[VEC_IV:%.*]] = add <2 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1> +; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <2 x i64> [[VEC_IV]], <i64 8, i64 8> +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0 +; CHECK-NEXT: br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] +; CHECK: [[PRED_STORE_IF]]: +; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[TMP2]] +; CHECK-NEXT: store i64 [[TMP2]], ptr [[TMP3]], align 8 +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] +; CHECK: [[PRED_STORE_CONTINUE]]: +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP0]], i32 1 +; CHECK-NEXT: br i1 [[TMP4]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2]] +; CHECK: [[PRED_STORE_IF1]]: +; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 3 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[TMP5]] +; CHECK-NEXT: store i64 [[TMP5]], ptr [[TMP6]], align 8 +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE2]] +; CHECK: [[PRED_STORE_CONTINUE2]]: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10 +; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 30, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[IV]] +; CHECK-NEXT: store i64 [[IV]], ptr [[GEP]], align 8 +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 3 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IV]], 22 +; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %gep = getelementptr inbounds i64, ptr %dst, i64 %iv + store i64 %iv, ptr %gep, align 8 + %iv.next = add nuw nsw i64 %iv, 3 + %cmp = icmp ult i64 %iv, 22 + br i1 %cmp, label %loop, label %exit + +exit: + ret void +} +;. +; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} +; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} +; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} +; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]} +;. diff --git a/llvm/unittests/SandboxIR/SandboxIRTest.cpp b/llvm/unittests/SandboxIR/SandboxIRTest.cpp index ba90b4f..c600103 100644 --- a/llvm/unittests/SandboxIR/SandboxIRTest.cpp +++ b/llvm/unittests/SandboxIR/SandboxIRTest.cpp @@ -398,7 +398,7 @@ bb1: EXPECT_EQ(Buff, R"IR( void @foo(i32 %arg0, i32 %arg1) { bb0: - br label %bb1 ; SB3. (Opaque) + br label %bb1 ; SB3. (Br) bb1: ret void ; SB5. (Ret) @@ -466,7 +466,7 @@ bb1: BB0.dump(BS); EXPECT_EQ(Buff, R"IR( bb0: - br label %bb1 ; SB2. (Opaque) + br label %bb1 ; SB2. (Br) )IR"); } #endif // NDEBUG @@ -629,6 +629,111 @@ define void @foo(i1 %c0, i8 %v0, i8 %v1, i1 %c1) { } } +TEST_F(SandboxIRTest, BranchInst) { + parseIR(C, R"IR( +define void @foo(i1 %cond0, i1 %cond2) { + bb0: + br i1 %cond0, label %bb1, label %bb2 + bb1: + ret void + bb2: + ret void +} +)IR"); + llvm::Function *LLVMF = &*M->getFunction("foo"); + sandboxir::Context Ctx(C); + sandboxir::Function *F = Ctx.createFunction(LLVMF); + auto *Cond0 = F->getArg(0); + auto *Cond1 = F->getArg(1); + auto *BB0 = cast<sandboxir::BasicBlock>( + Ctx.getValue(getBasicBlockByName(*LLVMF, "bb0"))); + auto *BB1 = cast<sandboxir::BasicBlock>( + Ctx.getValue(getBasicBlockByName(*LLVMF, "bb1"))); + auto *Ret1 = BB1->getTerminator(); + auto *BB2 = cast<sandboxir::BasicBlock>( + Ctx.getValue(getBasicBlockByName(*LLVMF, "bb2"))); + auto *Ret2 = BB2->getTerminator(); + auto It = BB0->begin(); + auto *Br0 = cast<sandboxir::BranchInst>(&*It++); + // Check isUnconditional(). + EXPECT_FALSE(Br0->isUnconditional()); + // Check isConditional(). + EXPECT_TRUE(Br0->isConditional()); + // Check getCondition(). + EXPECT_EQ(Br0->getCondition(), Cond0); + // Check setCondition(). + Br0->setCondition(Cond1); + EXPECT_EQ(Br0->getCondition(), Cond1); + // Check getNumSuccessors(). + EXPECT_EQ(Br0->getNumSuccessors(), 2u); + // Check getSuccessor(). + EXPECT_EQ(Br0->getSuccessor(0), BB1); + EXPECT_EQ(Br0->getSuccessor(1), BB2); + // Check swapSuccessors(). + Br0->swapSuccessors(); + EXPECT_EQ(Br0->getSuccessor(0), BB2); + EXPECT_EQ(Br0->getSuccessor(1), BB1); + // Check successors(). + EXPECT_EQ(range_size(Br0->successors()), 2u); + unsigned SuccIdx = 0; + SmallVector<sandboxir::BasicBlock *> ExpectedSuccs({BB1, BB2}); + for (sandboxir::BasicBlock *Succ : Br0->successors()) + EXPECT_EQ(Succ, ExpectedSuccs[SuccIdx++]); + + { + // Check unconditional BranchInst::create() InsertBefore. + auto *Br = sandboxir::BranchInst::create(BB1, /*InsertBefore=*/Ret1, Ctx); + EXPECT_FALSE(Br->isConditional()); + EXPECT_TRUE(Br->isUnconditional()); +#ifndef NDEBUG + EXPECT_DEATH(Br->getCondition(), ".*condition.*"); +#endif // NDEBUG + unsigned SuccIdx = 0; + SmallVector<sandboxir::BasicBlock *> ExpectedSuccs({BB1}); + for (sandboxir::BasicBlock *Succ : Br->successors()) + EXPECT_EQ(Succ, ExpectedSuccs[SuccIdx++]); + EXPECT_EQ(Br->getNextNode(), Ret1); + } + { + // Check unconditional BranchInst::create() InsertAtEnd. + auto *Br = sandboxir::BranchInst::create(BB1, /*InsertAtEnd=*/BB1, Ctx); + EXPECT_FALSE(Br->isConditional()); + EXPECT_TRUE(Br->isUnconditional()); +#ifndef NDEBUG + EXPECT_DEATH(Br->getCondition(), ".*condition.*"); +#endif // NDEBUG + unsigned SuccIdx = 0; + SmallVector<sandboxir::BasicBlock *> ExpectedSuccs({BB1}); + for (sandboxir::BasicBlock *Succ : Br->successors()) + EXPECT_EQ(Succ, ExpectedSuccs[SuccIdx++]); + EXPECT_EQ(Br->getPrevNode(), Ret1); + } + { + // Check conditional BranchInst::create() InsertBefore. + auto *Br = sandboxir::BranchInst::create(BB1, BB2, Cond0, + /*InsertBefore=*/Ret1, Ctx); + EXPECT_TRUE(Br->isConditional()); + EXPECT_EQ(Br->getCondition(), Cond0); + unsigned SuccIdx = 0; + SmallVector<sandboxir::BasicBlock *> ExpectedSuccs({BB2, BB1}); + for (sandboxir::BasicBlock *Succ : Br->successors()) + EXPECT_EQ(Succ, ExpectedSuccs[SuccIdx++]); + EXPECT_EQ(Br->getNextNode(), Ret1); + } + { + // Check conditional BranchInst::create() InsertAtEnd. + auto *Br = sandboxir::BranchInst::create(BB1, BB2, Cond0, + /*InsertAtEnd=*/BB2, Ctx); + EXPECT_TRUE(Br->isConditional()); + EXPECT_EQ(Br->getCondition(), Cond0); + unsigned SuccIdx = 0; + SmallVector<sandboxir::BasicBlock *> ExpectedSuccs({BB2, BB1}); + for (sandboxir::BasicBlock *Succ : Br->successors()) + EXPECT_EQ(Succ, ExpectedSuccs[SuccIdx++]); + EXPECT_EQ(Br->getPrevNode(), Ret2); + } +} + TEST_F(SandboxIRTest, LoadInst) { parseIR(C, R"IR( define void @foo(ptr %arg0, ptr %arg1) { diff --git a/llvm/unittests/SandboxIR/TrackerTest.cpp b/llvm/unittests/SandboxIR/TrackerTest.cpp index 354cd18..dd9dcd5 100644 --- a/llvm/unittests/SandboxIR/TrackerTest.cpp +++ b/llvm/unittests/SandboxIR/TrackerTest.cpp @@ -69,6 +69,49 @@ define void @foo(ptr %ptr) { EXPECT_EQ(Ld->getOperand(0), Gep0); } +TEST_F(TrackerTest, SwapOperands) { + parseIR(C, R"IR( +define void @foo(i1 %cond) { + bb0: + br i1 %cond, label %bb1, label %bb2 + bb1: + ret void + bb2: + ret void +} +)IR"); + Function &LLVMF = *M->getFunction("foo"); + sandboxir::Context Ctx(C); + Ctx.createFunction(&LLVMF); + auto *BB0 = cast<sandboxir::BasicBlock>( + Ctx.getValue(getBasicBlockByName(LLVMF, "bb0"))); + auto *BB1 = cast<sandboxir::BasicBlock>( + Ctx.getValue(getBasicBlockByName(LLVMF, "bb1"))); + auto *BB2 = cast<sandboxir::BasicBlock>( + Ctx.getValue(getBasicBlockByName(LLVMF, "bb2"))); + auto &Tracker = Ctx.getTracker(); + Tracker.save(); + auto It = BB0->begin(); + auto *Br = cast<sandboxir::BranchInst>(&*It++); + + unsigned SuccIdx = 0; + SmallVector<sandboxir::BasicBlock *> ExpectedSuccs({BB2, BB1}); + for (auto *Succ : Br->successors()) + EXPECT_EQ(Succ, ExpectedSuccs[SuccIdx++]); + + // This calls User::swapOperandsInternal() internally. + Br->swapSuccessors(); + + SuccIdx = 0; + for (auto *Succ : reverse(Br->successors())) + EXPECT_EQ(Succ, ExpectedSuccs[SuccIdx++]); + + Ctx.getTracker().revert(); + SuccIdx = 0; + for (auto *Succ : Br->successors()) + EXPECT_EQ(Succ, ExpectedSuccs[SuccIdx++]); +} + TEST_F(TrackerTest, RUWIf_RAUW_RUOW) { parseIR(C, R"IR( define void @foo(ptr %ptr) { diff --git a/llvm/utils/mlgo-utils/mlgo/__init__.py b/llvm/utils/mlgo-utils/mlgo/__init__.py index c5b208c..d3369ab 100644 --- a/llvm/utils/mlgo-utils/mlgo/__init__.py +++ b/llvm/utils/mlgo-utils/mlgo/__init__.py @@ -4,7 +4,7 @@ from datetime import timezone, datetime -__versioninfo__ = (19, 0, 0) +__versioninfo__ = (20, 0, 0) __version__ = ( ".".join(str(v) for v in __versioninfo__) + "dev" diff --git a/llvm/utils/release/bump-version.py b/llvm/utils/release/bump-version.py index b1799cb..5db62e8 100755 --- a/llvm/utils/release/bump-version.py +++ b/llvm/utils/release/bump-version.py @@ -188,6 +188,11 @@ if __name__ == "__main__": "llvm/utils/lit/lit/__init__.py", LitProcessor(args), ), + # mlgo-utils configuration + ( + "llvm/utils/mlgo-utils/mlgo/__init__.py", + LitProcessor(args), + ), # GN build system ( "llvm/utils/gn/secondary/llvm/version.gni", |