diff options
author | Amir Ayupov <aaupov@meta.com> | 2024-07-22 15:31:36 -0700 |
---|---|---|
committer | Amir Ayupov <aaupov@fb.com> | 2024-07-22 15:31:36 -0700 |
commit | 5cd086013b91df97f69b151c42d24f1d2a5de08b (patch) | |
tree | d2c5252067f5abb26cee735f23c98b97e27c79ca | |
parent | 56ffbd97fda16008d02180a460211829354f1094 (diff) | |
download | llvm-users/aaupov/spr/main.boltnfc-track-fragment-relationships-using-equivalenceclasses.zip llvm-users/aaupov/spr/main.boltnfc-track-fragment-relationships-using-equivalenceclasses.tar.gz llvm-users/aaupov/spr/main.boltnfc-track-fragment-relationships-using-equivalenceclasses.tar.bz2 |
[𝘀𝗽𝗿] changes to main this commit is based onusers/aaupov/spr/main.boltnfc-track-fragment-relationships-using-equivalenceclasses
Created using spr 1.3.4
[skip ci]
283 files changed, 7057 insertions, 2306 deletions
diff --git a/a-abfdec1d.o.tmp b/a-abfdec1d.o.tmp deleted file mode 100644 index e69de29..0000000 --- a/a-abfdec1d.o.tmp +++ /dev/null diff --git a/bolt/docs/OptimizingClang.md b/bolt/docs/OptimizingClang.md index ff7e71b..685fcc2 100644 --- a/bolt/docs/OptimizingClang.md +++ b/bolt/docs/OptimizingClang.md @@ -49,6 +49,7 @@ $ cd ${TOPLEV}/stage3 $ CPATH=${TOPLEV}/stage2-prof-use-lto/install/bin/ $ cmake -G Ninja ${TOPLEV}/llvm -DLLVM_TARGETS_TO_BUILD=X86 -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_C_COMPILER=$CPATH/clang -DCMAKE_CXX_COMPILER=$CPATH/clang++ \ + -DLLVM_ENABLE_PROJECTS="clang" \ -DLLVM_USE_LINKER=lld -DCMAKE_INSTALL_PREFIX=${TOPLEV}/stage3/install $ perf record -e cycles:u -j any,u -- ninja clang ``` diff --git a/bolt/docs/generate_doc.py b/bolt/docs/generate_doc.py index d8829da..763dc00 100644 --- a/bolt/docs/generate_doc.py +++ b/bolt/docs/generate_doc.py @@ -45,7 +45,7 @@ def parse_bolt_options(output): cleaned_line = line.strip() if cleaned_line.casefold() in map(str.casefold, section_headers): - if prev_section != None: # Save last option from prev section + if prev_section is not None: # Save last option from prev section add_info(sections, current_section, option, description) option, description = None, [] @@ -76,7 +76,7 @@ def parse_bolt_options(output): description = [descr] if option.startswith("--print") or option.startswith("--time"): current_section = "BOLT printing options:" - elif prev_section != None: + elif prev_section is not None: current_section = prev_section continue diff --git a/bolt/include/bolt/Core/BinaryContext.h b/bolt/include/bolt/Core/BinaryContext.h index de9ba09..b3cf9f8 100644 --- a/bolt/include/bolt/Core/BinaryContext.h +++ b/bolt/include/bolt/Core/BinaryContext.h @@ -1452,10 +1452,7 @@ public: std::unique_ptr<MCObjectWriter> OW = MAB->createObjectWriter(OS); std::unique_ptr<MCStreamer> Streamer(TheTarget->createMCObjectStreamer( *TheTriple, *Ctx, std::unique_ptr<MCAsmBackend>(MAB), std::move(OW), - std::unique_ptr<MCCodeEmitter>(MCE), *STI, - /* RelaxAll */ false, - /* IncrementalLinkerCompatible */ false, - /* DWARFMustBeAtTheEnd */ false)); + std::unique_ptr<MCCodeEmitter>(MCE), *STI)); return Streamer; } diff --git a/bolt/lib/Core/BinaryContext.cpp b/bolt/lib/Core/BinaryContext.cpp index 035f68e..83a5484 100644 --- a/bolt/lib/Core/BinaryContext.cpp +++ b/bolt/lib/Core/BinaryContext.cpp @@ -2367,10 +2367,7 @@ BinaryContext::calculateEmittedSize(BinaryFunction &BF, bool FixBranches) { std::unique_ptr<MCObjectWriter> OW = MAB->createObjectWriter(VecOS); std::unique_ptr<MCStreamer> Streamer(TheTarget->createMCObjectStreamer( *TheTriple, *LocalCtx, std::unique_ptr<MCAsmBackend>(MAB), std::move(OW), - std::unique_ptr<MCCodeEmitter>(MCEInstance.MCE.release()), *STI, - /*RelaxAll=*/false, - /*IncrementalLinkerCompatible=*/false, - /*DWARFMustBeAtTheEnd=*/false)); + std::unique_ptr<MCCodeEmitter>(MCEInstance.MCE.release()), *STI)); Streamer->initSections(false, *STI); diff --git a/bolt/lib/Rewrite/BinaryPassManager.cpp b/bolt/lib/Rewrite/BinaryPassManager.cpp index aaa0e1f..5dfef0b 100644 --- a/bolt/lib/Rewrite/BinaryPassManager.cpp +++ b/bolt/lib/Rewrite/BinaryPassManager.cpp @@ -263,6 +263,10 @@ static cl::opt<bool> CMOVConversionFlag("cmov-conversion", cl::ReallyHidden, cl::cat(BoltOptCategory)); +static cl::opt<bool> ShortenInstructions("shorten-instructions", + cl::desc("shorten instructions"), + cl::init(true), + cl::cat(BoltOptCategory)); } // namespace opts namespace llvm { @@ -378,7 +382,8 @@ Error BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) { else if (opts::Hugify) Manager.registerPass(std::make_unique<HugePage>(NeverPrint)); - Manager.registerPass(std::make_unique<ShortenInstructions>(NeverPrint)); + Manager.registerPass(std::make_unique<ShortenInstructions>(NeverPrint), + opts::ShortenInstructions); Manager.registerPass(std::make_unique<RemoveNops>(NeverPrint), !opts::KeepNops); diff --git a/bolt/lib/Rewrite/DWARFRewriter.cpp b/bolt/lib/Rewrite/DWARFRewriter.cpp index 1ec216b..ccb45f4 100644 --- a/bolt/lib/Rewrite/DWARFRewriter.cpp +++ b/bolt/lib/Rewrite/DWARFRewriter.cpp @@ -620,9 +620,10 @@ void DWARFRewriter::updateDebugInfo() { uint32_t CUIndex = 0; std::mutex AccessMutex; // Needs to be invoked in the same order as CUs are processed. - auto createRangeLocListAddressWriters = - [&](DWARFUnit &CU) -> DebugLocWriter * { + llvm::DenseMap<uint64_t, uint64_t> LocListWritersIndexByCU; + auto createRangeLocListAddressWriters = [&](DWARFUnit &CU) { std::lock_guard<std::mutex> Lock(AccessMutex); + const uint16_t DwarfVersion = CU.getVersion(); if (DwarfVersion >= 5) { auto AddrW = std::make_unique<DebugAddrWriterDwarf5>( @@ -641,7 +642,6 @@ void DWARFRewriter::updateDebugInfo() { RangeListsWritersByCU[*DWOId] = std::move(DWORangeListsSectionWriter); } AddressWritersByCU[CU.getOffset()] = std::move(AddrW); - } else { auto AddrW = std::make_unique<DebugAddrWriter>(&BC, CU.getAddressByteSize()); @@ -657,7 +657,7 @@ void DWARFRewriter::updateDebugInfo() { std::move(LegacyRangesSectionWriterByCU); } } - return LocListWritersByCU[CUIndex++].get(); + LocListWritersIndexByCU[CU.getOffset()] = CUIndex++; }; DWARF5AcceleratorTable DebugNamesTable(opts::CreateDebugNames, BC, @@ -666,74 +666,68 @@ void DWARFRewriter::updateDebugInfo() { DWPState State; if (opts::WriteDWP) initDWPState(State); - auto processUnitDIE = [&](DWARFUnit *Unit, DIEBuilder *DIEBlder) { - // Check if the unit is a skeleton and we need special updates for it and - // its matching split/DWO CU. - std::optional<DWARFUnit *> SplitCU; + auto processSplitCU = [&](DWARFUnit &Unit, DWARFUnit &SplitCU, + DIEBuilder &DIEBlder, + DebugRangesSectionWriter &TempRangesSectionWriter, + DebugAddrWriter &AddressWriter) { + DIEBuilder DWODIEBuilder(BC, &(SplitCU).getContext(), DebugNamesTable, + &Unit); + DWODIEBuilder.buildDWOUnit(SplitCU); + std::string DWOName = ""; + std::optional<std::string> DwarfOutputPath = + opts::DwarfOutputPath.empty() + ? std::nullopt + : std::optional<std::string>(opts::DwarfOutputPath.c_str()); + { + std::lock_guard<std::mutex> Lock(AccessMutex); + DWOName = DIEBlder.updateDWONameCompDir( + *StrOffstsWriter, *StrWriter, Unit, DwarfOutputPath, std::nullopt); + } + DebugStrOffsetsWriter DWOStrOffstsWriter(BC); + DebugStrWriter DWOStrWriter((SplitCU).getContext(), true); + DWODIEBuilder.updateDWONameCompDirForTypes( + DWOStrOffstsWriter, DWOStrWriter, SplitCU, DwarfOutputPath, DWOName); + DebugLoclistWriter DebugLocDWoWriter(Unit, Unit.getVersion(), true, + AddressWriter); + + updateUnitDebugInfo(SplitCU, DWODIEBuilder, DebugLocDWoWriter, + TempRangesSectionWriter, AddressWriter); + DebugLocDWoWriter.finalize(DWODIEBuilder, + *DWODIEBuilder.getUnitDIEbyUnit(SplitCU)); + if (Unit.getVersion() >= 5) + TempRangesSectionWriter.finalizeSection(); + + emitDWOBuilder(DWOName, DWODIEBuilder, *this, SplitCU, Unit, State, + DebugLocDWoWriter, DWOStrOffstsWriter, DWOStrWriter, + GDBIndexSection); + }; + auto processMainBinaryCU = [&](DWARFUnit &Unit, DIEBuilder &DIEBlder) { + DebugAddrWriter &AddressWriter = + *AddressWritersByCU[Unit.getOffset()].get(); + DebugRangesSectionWriter &RangesSectionWriter = + Unit.getVersion() >= 5 ? *RangeListsSectionWriter.get() + : *LegacyRangesSectionWriter.get(); + DebugLocWriter &DebugLocWriter = + *LocListWritersByCU[LocListWritersIndexByCU[Unit.getOffset()]].get(); std::optional<uint64_t> RangesBase; - std::optional<uint64_t> DWOId = Unit->getDWOId(); + std::optional<DWARFUnit *> SplitCU; + std::optional<uint64_t> DWOId = Unit.getDWOId(); if (DWOId) SplitCU = BC.getDWOCU(*DWOId); - DebugLocWriter *DebugLocWriter = createRangeLocListAddressWriters(*Unit); - DebugRangesSectionWriter *RangesSectionWriter = - Unit->getVersion() >= 5 ? RangeListsSectionWriter.get() - : LegacyRangesSectionWriter.get(); - DebugAddrWriter *AddressWriter = - AddressWritersByCU[Unit->getOffset()].get(); - // Skipping CUs that failed to load. - if (SplitCU) { - DIEBuilder DWODIEBuilder(BC, &(*SplitCU)->getContext(), DebugNamesTable, - Unit); - DWODIEBuilder.buildDWOUnit(**SplitCU); - std::string DWOName = ""; - std::optional<std::string> DwarfOutputPath = - opts::DwarfOutputPath.empty() - ? std::nullopt - : std::optional<std::string>(opts::DwarfOutputPath.c_str()); - { - std::lock_guard<std::mutex> Lock(AccessMutex); - DWOName = DIEBlder->updateDWONameCompDir( - *StrOffstsWriter, *StrWriter, *Unit, DwarfOutputPath, std::nullopt); - } - DebugStrOffsetsWriter DWOStrOffstsWriter(BC); - DebugStrWriter DWOStrWriter((*SplitCU)->getContext(), true); - DWODIEBuilder.updateDWONameCompDirForTypes(DWOStrOffstsWriter, - DWOStrWriter, **SplitCU, - DwarfOutputPath, DWOName); - DebugLoclistWriter DebugLocDWoWriter(*Unit, Unit->getVersion(), true, - *AddressWriter); - DebugRangesSectionWriter *TempRangesSectionWriter = RangesSectionWriter; - if (Unit->getVersion() >= 5) { - TempRangesSectionWriter = RangeListsWritersByCU[*DWOId].get(); - } else { - TempRangesSectionWriter = LegacyRangesWritersByCU[*DWOId].get(); - RangesBase = RangesSectionWriter->getSectionOffset(); - } - - updateUnitDebugInfo(*(*SplitCU), DWODIEBuilder, DebugLocDWoWriter, - *TempRangesSectionWriter, *AddressWriter); - DebugLocDWoWriter.finalize(DWODIEBuilder, - *DWODIEBuilder.getUnitDIEbyUnit(**SplitCU)); - if (Unit->getVersion() >= 5) - TempRangesSectionWriter->finalizeSection(); - - emitDWOBuilder(DWOName, DWODIEBuilder, *this, **SplitCU, *Unit, State, - DebugLocDWoWriter, DWOStrOffstsWriter, DWOStrWriter, - GDBIndexSection); - } - - if (Unit->getVersion() >= 5) { - RangesBase = RangesSectionWriter->getSectionOffset() + + if (Unit.getVersion() >= 5) { + RangesBase = RangesSectionWriter.getSectionOffset() + getDWARF5RngListLocListHeaderSize(); - RangesSectionWriter->initSection(*Unit); - StrOffstsWriter->finalizeSection(*Unit, *DIEBlder); + RangesSectionWriter.initSection(Unit); + StrOffstsWriter->finalizeSection(Unit, DIEBlder); + } else if (SplitCU) { + RangesBase = LegacyRangesSectionWriter.get()->getSectionOffset(); } - updateUnitDebugInfo(*Unit, *DIEBlder, *DebugLocWriter, *RangesSectionWriter, - *AddressWriter, RangesBase); - DebugLocWriter->finalize(*DIEBlder, *DIEBlder->getUnitDIEbyUnit(*Unit)); - if (Unit->getVersion() >= 5) - RangesSectionWriter->finalizeSection(); + updateUnitDebugInfo(Unit, DIEBlder, DebugLocWriter, RangesSectionWriter, + AddressWriter, RangesBase); + DebugLocWriter.finalize(DIEBlder, *DIEBlder.getUnitDIEbyUnit(Unit)); + if (Unit.getVersion() >= 5) + RangesSectionWriter.finalizeSection(); }; DIEBuilder DIEBlder(BC, BC.DwCtx.get(), DebugNamesTable); @@ -751,8 +745,24 @@ void DWARFRewriter::updateDebugInfo() { CUPartitionVector PartVec = partitionCUs(*BC.DwCtx); for (std::vector<DWARFUnit *> &Vec : PartVec) { DIEBlder.buildCompileUnits(Vec); + for (DWARFUnit *CU : DIEBlder.getProcessedCUs()) { + createRangeLocListAddressWriters(*CU); + std::optional<DWARFUnit *> SplitCU; + std::optional<uint64_t> DWOId = CU->getDWOId(); + if (DWOId) + SplitCU = BC.getDWOCU(*DWOId); + if (!SplitCU) + continue; + DebugAddrWriter &AddressWriter = + *AddressWritersByCU[CU->getOffset()].get(); + DebugRangesSectionWriter *TempRangesSectionWriter = + CU->getVersion() >= 5 ? RangeListsWritersByCU[*DWOId].get() + : LegacyRangesWritersByCU[*DWOId].get(); + processSplitCU(*CU, **SplitCU, DIEBlder, *TempRangesSectionWriter, + AddressWriter); + } for (DWARFUnit *CU : DIEBlder.getProcessedCUs()) - processUnitDIE(CU, &DIEBlder); + processMainBinaryCU(*CU, DIEBlder); finalizeCompileUnits(DIEBlder, *Streamer, OffsetMap, DIEBlder.getProcessedCUs(), *FinalAddrWriter); } diff --git a/bolt/test/AArch64/update-debug-reloc.test b/bolt/test/AArch64/update-debug-reloc.test index d57f42a..dd83229 100644 --- a/bolt/test/AArch64/update-debug-reloc.test +++ b/bolt/test/AArch64/update-debug-reloc.test @@ -2,7 +2,7 @@ # update-debug-sections option. RUN: %clang %cflags -g %p/../Inputs/asm_foo.s %p/../Inputs/asm_main.c -o %t.exe -RUN: llvm-bolt %t.exe -o %t --update-debug-sections +RUN: llvm-bolt %t.exe -o %t --update-debug-sections 2>&1 | FileCheck %s CHECK: BOLT-INFO: Target architecture: aarch64 CHECK-NOT: Reloc num: 10 diff --git a/bolt/test/AArch64/veneer-gold.s b/bolt/test/AArch64/veneer-gold.s index 3b3e34e..275febc 100644 --- a/bolt/test/AArch64/veneer-gold.s +++ b/bolt/test/AArch64/veneer-gold.s @@ -29,7 +29,7 @@ dummy: .type foo, %function foo: # CHECK: <foo>: -# CHECK-NEXT : {{.*}} bl {{.*}} <foo2> +# CHECK-NEXT: {{.*}} bl {{.*}} <foo2> bl .L2 ret .size foo, .-foo @@ -38,7 +38,7 @@ foo: .type foo2, %function foo2: # CHECK: <foo2>: -# CHECK-NEXT : {{.*}} bl {{.*}} <foo2> +# CHECK-NEXT: {{.*}} bl {{.*}} <foo2> bl .L2 ret .size foo2, .-foo2 diff --git a/bolt/test/X86/dwarf5-df-types-modify-dwo-name-mixed.test b/bolt/test/X86/dwarf5-df-types-modify-dwo-name-mixed.test index a4f5ee7..6c603ba 100644 --- a/bolt/test/X86/dwarf5-df-types-modify-dwo-name-mixed.test +++ b/bolt/test/X86/dwarf5-df-types-modify-dwo-name-mixed.test @@ -90,7 +90,7 @@ ; BOLT-DWP: DW_TAG_compile_unit ; BOLT-DWP: DW_AT_dwo_name ("main.dwo.dwo") ; BOLT-DWP: DW_TAG_type_unit -; BOLT-DW-NOT: DW_AT_dwo_name +; BOLT-DWP-NOT: DW_AT_dwo_name ; BOLT-DWP: Contribution size = 68, Format = DWARF32, Version = 5 ; BOLT-DWP-NEXT: "main" ; BOLT-DWP-NEXT: "int" diff --git a/bolt/test/X86/dwarf5-one-loclists-two-bases.test b/bolt/test/X86/dwarf5-one-loclists-two-bases.test index 873512a..f25f6c7 100644 --- a/bolt/test/X86/dwarf5-one-loclists-two-bases.test +++ b/bolt/test/X86/dwarf5-one-loclists-two-bases.test @@ -34,7 +34,7 @@ # POSTCHECK: version = 0x0005 # POSTCHECK: DW_AT_loclists_base [DW_FORM_sec_offset] (0x0000000c) # POSTCHECK: DW_AT_rnglists_base [DW_FORM_sec_offset] (0x0000000c) -# POSTCHECK-EMPTY +# POSTCHECK-EMPTY: # POSTCHECK: DW_TAG_variable # POSTCHECK: DW_AT_location [DW_FORM_loclistx] # POSTCHECK-SAME: indexed (0x0) diff --git a/bolt/test/X86/dwarf5-two-loclists.test b/bolt/test/X86/dwarf5-two-loclists.test index 2ede02f3..a7c6351 100644 --- a/bolt/test/X86/dwarf5-two-loclists.test +++ b/bolt/test/X86/dwarf5-two-loclists.test @@ -45,7 +45,7 @@ # POSTCHECK: version = 0x0005 # POSTCHECK: DW_AT_loclists_base [DW_FORM_sec_offset] (0x0000000c) # POSTCHECK: DW_AT_rnglists_base [DW_FORM_sec_offset] (0x0000000c) -# POSTCHECK-EMPTY +# POSTCHECK-EMPTY: # POSTCHECK: DW_TAG_variable # POSTCHECK: DW_AT_location [DW_FORM_loclistx] # POSTCHECK-SAME: indexed (0x0) diff --git a/bolt/test/X86/dwarf5-two-rnglists.test b/bolt/test/X86/dwarf5-two-rnglists.test index 17cdc76..98f2e34 100644 --- a/bolt/test/X86/dwarf5-two-rnglists.test +++ b/bolt/test/X86/dwarf5-two-rnglists.test @@ -52,7 +52,7 @@ # POSTCHECK-NEXT: DW_AT_addr_base [DW_FORM_sec_offset] (0x00000008) # POSTCHECK-NEXT: DW_AT_loclists_base [DW_FORM_sec_offset] (0x0000000c) # POSTCHECK-NEXT: DW_AT_rnglists_base [DW_FORM_sec_offset] (0x0000000c) -# POSTCHECK-EMPTY +# POSTCHECK-EMPTY: # POSTCHECK: DW_TAG_subprogram # POSTCHECK-NEXT: DW_AT_ranges [DW_FORM_rnglistx] # POSTCHECK-SAME: indexed (0x1) @@ -75,7 +75,7 @@ # POSTCHECK-NEXT: DW_AT_addr_base [DW_FORM_sec_offset] (0x00000030) # POSTCHECK-NEXT: DW_AT_loclists_base [DW_FORM_sec_offset] (0x00000045) # POSTCHECK-NEXT: DW_AT_rnglists_base [DW_FORM_sec_offset] (0x00000035) -# POSTCHECK-EMPTY +# POSTCHECK-EMPTY: # POSTCHECK: DW_TAG_subprogram # POSTCHECK-NEXT: DW_AT_ranges [DW_FORM_rnglistx] diff --git a/bolt/test/perf2bolt/lit.local.cfg b/bolt/test/perf2bolt/lit.local.cfg index 05f41ff..4ee9ad0 100644 --- a/bolt/test/perf2bolt/lit.local.cfg +++ b/bolt/test/perf2bolt/lit.local.cfg @@ -1,4 +1,4 @@ import shutil -if shutil.which("perf") != None: +if shutil.which("perf") is not None: config.available_features.add("perf")
\ No newline at end of file diff --git a/clang/cmake/caches/Fuchsia.cmake b/clang/cmake/caches/Fuchsia.cmake index 4d3af3a..2d2dcb9 100644 --- a/clang/cmake/caches/Fuchsia.cmake +++ b/clang/cmake/caches/Fuchsia.cmake @@ -67,9 +67,6 @@ set(_FUCHSIA_BOOTSTRAP_PASSTHROUGH SWIG_EXECUTABLE CMAKE_FIND_PACKAGE_PREFER_CONFIG CMAKE_SYSROOT - CMAKE_MODULE_LINKER_FLAGS - CMAKE_SHARED_LINKER_FLAGS - CMAKE_EXE_LINKER_FLAGS LLVM_WINSYSROOT LLVM_VFSOVERLAY ) diff --git a/clang/docs/MSVCCompatibility.rst b/clang/docs/MSVCCompatibility.rst index b248605..0b6fea5 100644 --- a/clang/docs/MSVCCompatibility.rst +++ b/clang/docs/MSVCCompatibility.rst @@ -154,3 +154,133 @@ a hint suggesting how to fix the problem. As of this writing, Clang is able to compile a simple ATL hello world application. There are still issues parsing WRL headers for modern Windows 8 apps, but they should be addressed soon. + +__forceinline behavior +====================== + +``__forceinline`` behaves like ``[[clang::always_inline]]``. +Inlining is always attempted regardless of optimization level. + +This differs from MSVC where ``__forceinline`` is only respected once inline expansion is enabled +which allows any function marked implicitly or explicitly ``inline`` or ``__forceinline`` to be expanded. +Therefore functions marked ``__forceinline`` will be expanded when the optimization level is ``/Od`` unlike +MSVC where ``__forceinline`` will not be expanded under ``/Od``. + +SIMD and instruction set intrinsic behavior +=========================================== + +Clang follows the GCC model for intrinsics and not the MSVC model. +There are currently no plans to support the MSVC model. + +MSVC intrinsics always emit the machine instruction the intrinsic models regardless of the compile time options specified. +For example ``__popcnt`` always emits the x86 popcnt instruction even if the compiler does not have the option enabled to emit popcnt on its own volition. + +There are two common cases where code that compiles with MSVC will need reworking to build on clang. +Assume the examples are only built with `-msse2` so we do not have the intrinsics at compile time. + +.. code-block:: c++ + + unsigned PopCnt(unsigned v) { + if (HavePopCnt) + return __popcnt(v); + else + return GenericPopCnt(v); + } + +.. code-block:: c++ + + __m128 dot4_sse3(__m128 v0, __m128 v1) { + __m128 r = _mm_mul_ps(v0, v1); + r = _mm_hadd_ps(r, r); + r = _mm_hadd_ps(r, r); + return r; + } + +Clang expects that either you have compile time support for the target features, `-msse3` and `-mpopcnt`, you mark the function with the expected target feature or use runtime detection with an indirect call. + +.. code-block:: c++ + + __attribute__((__target__("sse3"))) __m128 dot4_sse3(__m128 v0, __m128 v1) { + __m128 r = _mm_mul_ps(v0, v1); + r = _mm_hadd_ps(r, r); + r = _mm_hadd_ps(r, r); + return r; + } + +The SSE3 dot product can be easily fixed by either building the translation unit with SSE3 support or using `__target__` to compile that specific function with SSE3 support. + +.. code-block:: c++ + + unsigned PopCnt(unsigned v) { + if (HavePopCnt) + return __popcnt(v); + else + return GenericPopCnt(v); + } + +The above ``PopCnt`` example must be changed to work with clang. If we mark the function with `__target__("popcnt")` then the compiler is free to emit popcnt at will which we do not want. While this isn't a concern in our small example it is a concern in larger functions with surrounding code around the intrinsics. Similar reasoning for compiling the translation unit with `-mpopcnt`. +We must split each branch into its own function that can be called indirectly instead of using the intrinsic directly. + +.. code-block:: c++ + + __attribute__((__target__("popcnt"))) unsigned hwPopCnt(unsigned v) { return __popcnt(v); } + unsigned (*PopCnt)(unsigned) = HavePopCnt ? hwPopCnt : GenericPopCnt; + +.. code-block:: c++ + + __attribute__((__target__("popcnt"))) unsigned hwPopCnt(unsigned v) { return __popcnt(v); } + unsigned PopCnt(unsigned v) { + if (HavePopCnt) + return hwPopCnt(v); + else + return GenericPopCnt(v); + } + +In the above example ``hwPopCnt`` will not be inlined into ``PopCnt`` since ``PopCnt`` doesn't have the popcnt target feature. +With a larger function that does real work the function call overhead is negligible. However in our popcnt example there is the function call +overhead. There is no analog for this specific MSVC behavior in clang. + +For clang we effectively have to create the dispatch function ourselves to each specfic implementation. + +SIMD vector types +================= + +Clang's simd vector types are builtin types and not user defined types as in MSVC. This does have some observable behavior changes. +We will look at the x86 `__m128` type for the examples below but the statements apply to all vector types including ARM's `float32x4_t`. + +There are no members that can be accessed on the vector types. Vector types are not structs in clang. +You cannot use ``__m128.m128_f32[0]`` to access the first element of the `__m128`. +This also means struct initialization like ``__m128{ { 0.0f, 0.0f, 0.0f, 0.0f } }`` will not compile with clang. + +Since vector types are builtin types, clang implements operators on them natively. + +.. code-block:: c++ + + #ifdef _MSC_VER + __m128 operator+(__m128 a, __m128 b) { return _mm_add_ps(a, b); } + #endif + +The above code will fail to compile since overloaded 'operator+' must have at least one parameter of class or enumeration type. +You will need to fix such code to have the check ``#if defined(_MSC_VER) && !defined(__clang__)``. + +Since `__m128` is not a class type in clang any overloads after a template definition will not be considered. + +.. code-block:: c++ + + template<class T> + void foo(T) {} + + template<class T> + void bar(T t) { + foo(t); + } + + void foo(__m128) {} + + int main() { + bar(_mm_setzero_ps()); + } + +With MSVC ``foo(__m128)`` will be selected but with clang ``foo<__m128>()`` will be selected since on clang `__m128` is a builtin type. + +In general the takeaway is `__m128` is a builtin type on clang while a class type on MSVC. diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 7ac6ed9..4638b91 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -285,6 +285,8 @@ C++2c Feature Support - Implemented `P2963R3 Ordering of constraints involving fold expressions <https://wg21.link/P2963R3>`_. +- Implemented `P3034R1 Module Declarations Shouldn’t be Macros <https://wg21.link/P3034R1>`_. + Resolutions to C++ Defect Reports ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -750,9 +752,6 @@ Improvements to Clang's time-trace - Clang now specifies that using ``auto`` in a lambda parameter is a C++14 extension when appropriate. (`#46059: <https://github.com/llvm/llvm-project/issues/46059>`_). -- Clang now adds source file infomation for template instantiations as ``event["args"]["filename"]``. This - added behind an option ``-ftime-trace-verbose``. This is expected to increase the size of trace by 2-3 times. - Improvements to Coverage Mapping -------------------------------- diff --git a/clang/include/clang/Basic/DiagnosticLexKinds.td b/clang/include/clang/Basic/DiagnosticLexKinds.td index 12d7b8c..08ece01 100644 --- a/clang/include/clang/Basic/DiagnosticLexKinds.td +++ b/clang/include/clang/Basic/DiagnosticLexKinds.td @@ -952,6 +952,11 @@ def warn_module_conflict : Warning< InGroup<ModuleConflict>; // C++20 modules +def err_module_decl_cannot_be_macros : Error< + "the module name in a module%select{| partition}0 declaration cannot contain " + "an object-like macro %1">; +def err_unxepected_paren_in_module_decl : Error< + "unexpected '(' after the module name in a module%select{| partition}0 declaration">; def err_header_import_semi_in_macro : Error< "semicolon terminating header import declaration cannot be produced " "by a macro">; diff --git a/clang/include/clang/Basic/IdentifierTable.h b/clang/include/clang/Basic/IdentifierTable.h index ae9ebd9..f40f74d 100644 --- a/clang/include/clang/Basic/IdentifierTable.h +++ b/clang/include/clang/Basic/IdentifierTable.h @@ -180,6 +180,10 @@ class alignas(IdentifierInfoAlignment) IdentifierInfo { LLVM_PREFERRED_TYPE(bool) unsigned IsModulesImport : 1; + // True if this is the 'module' contextual keyword. + LLVM_PREFERRED_TYPE(bool) + unsigned IsModulesDecl : 1; + // True if this is a mangled OpenMP variant name. LLVM_PREFERRED_TYPE(bool) unsigned IsMangledOpenMPVariantName : 1; @@ -196,7 +200,7 @@ class alignas(IdentifierInfoAlignment) IdentifierInfo { LLVM_PREFERRED_TYPE(bool) unsigned IsFinal : 1; - // 22 bits left in a 64-bit word. + // 21 bits left in a 64-bit word. // Managed by the language front-end. void *FETokenInfo = nullptr; @@ -212,8 +216,8 @@ class alignas(IdentifierInfoAlignment) IdentifierInfo { IsCPPOperatorKeyword(false), NeedsHandleIdentifier(false), IsFromAST(false), ChangedAfterLoad(false), FEChangedAfterLoad(false), RevertedTokenID(false), OutOfDate(false), IsModulesImport(false), - IsMangledOpenMPVariantName(false), IsDeprecatedMacro(false), - IsRestrictExpansion(false), IsFinal(false) {} + IsModulesDecl(false), IsMangledOpenMPVariantName(false), + IsDeprecatedMacro(false), IsRestrictExpansion(false), IsFinal(false) {} public: IdentifierInfo(const IdentifierInfo &) = delete; @@ -520,6 +524,18 @@ public: RecomputeNeedsHandleIdentifier(); } + /// Determine whether this is the contextual keyword \c module. + bool isModulesDeclaration() const { return IsModulesDecl; } + + /// Set whether this identifier is the contextual keyword \c module. + void setModulesDeclaration(bool I) { + IsModulesDecl = I; + if (I) + NeedsHandleIdentifier = true; + else + RecomputeNeedsHandleIdentifier(); + } + /// Determine whether this is the mangled name of an OpenMP variant. bool isMangledOpenMPVariantName() const { return IsMangledOpenMPVariantName; } @@ -740,6 +756,8 @@ public: // If this is the 'import' contextual keyword, mark it as such. if (Name == "import") II->setModulesImport(true); + else if (Name == "module") + II->setModulesDeclaration(true); return *II; } diff --git a/clang/include/clang/Basic/TokenKinds.def b/clang/include/clang/Basic/TokenKinds.def index 7f4912b..8db18c0 100644 --- a/clang/include/clang/Basic/TokenKinds.def +++ b/clang/include/clang/Basic/TokenKinds.def @@ -1003,6 +1003,9 @@ ANNOTATION(module_include) ANNOTATION(module_begin) ANNOTATION(module_end) +// Annotations for C++, Clang and Objective-C named modules. +ANNOTATION(module_name) + // Annotation for a header_name token that has been looked up and transformed // into the name of a header unit. ANNOTATION(header_unit) diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 9c6cebd..6046b0c 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -3998,10 +3998,6 @@ def ftime_trace_granularity_EQ : Joined<["-"], "ftime-trace-granularity=">, Grou HelpText<"Minimum time granularity (in microseconds) traced by time profiler">, Visibility<[ClangOption, CC1Option, CLOption, DXCOption]>, MarshallingInfoInt<FrontendOpts<"TimeTraceGranularity">, "500u">; -def ftime_trace_verbose : Joined<["-"], "ftime-trace-verbose">, Group<f_Group>, - HelpText<"Make time trace capture verbose event details (e.g. source filenames). This can increase the size of the output by 2-3 times">, - Visibility<[ClangOption, CC1Option, CLOption, DXCOption]>, - MarshallingInfoFlag<FrontendOpts<"TimeTraceVerbose">>; def ftime_trace_EQ : Joined<["-"], "ftime-trace=">, Group<f_Group>, HelpText<"Similar to -ftime-trace. Specify the JSON file or a directory which will contain the JSON file">, Visibility<[ClangOption, CC1Option, CLOption, DXCOption]>, @@ -5553,6 +5549,10 @@ def pg : Flag<["-"], "pg">, HelpText<"Enable mcount instrumentation">, MarshallingInfoFlag<CodeGenOpts<"InstrumentForProfiling">>; def pipe : Flag<["-", "--"], "pipe">, HelpText<"Use pipes between commands, when possible">; +// Facebook T92898286 +def post_link_optimize : Flag<["--"], "post-link-optimize">, + HelpText<"Apply post-link optimizations using BOLT">; +// End Facebook T92898286 def prebind__all__twolevel__modules : Flag<["-"], "prebind_all_twolevel_modules">; def prebind : Flag<["-"], "prebind">; def preload : Flag<["-"], "preload">; diff --git a/clang/include/clang/Frontend/FrontendOptions.h b/clang/include/clang/Frontend/FrontendOptions.h index 8241925..5e5034f 100644 --- a/clang/include/clang/Frontend/FrontendOptions.h +++ b/clang/include/clang/Frontend/FrontendOptions.h @@ -580,11 +580,6 @@ public: /// Minimum time granularity (in microseconds) traced by time profiler. unsigned TimeTraceGranularity; - /// Make time trace capture verbose event details (e.g. source filenames). - /// This can increase the size of the output by 2-3 times. - LLVM_PREFERRED_TYPE(bool) - unsigned TimeTraceVerbose : 1; - /// Path which stores the output files for -ftime-trace std::string TimeTracePath; @@ -606,8 +601,7 @@ public: EmitSymbolGraph(false), EmitExtensionSymbolGraphs(false), EmitSymbolGraphSymbolLabelsForTesting(false), EmitPrettySymbolGraphs(false), GenReducedBMI(false), - UseClangIRPipeline(false), TimeTraceGranularity(500), - TimeTraceVerbose(false) {} + UseClangIRPipeline(false), TimeTraceGranularity(500) {} /// getInputKindForExtension - Return the appropriate input kind for a file /// extension. For example, "c" would return Language::C. diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h index fc7d005..56aef99 100644 --- a/clang/include/clang/Lex/Preprocessor.h +++ b/clang/include/clang/Lex/Preprocessor.h @@ -615,10 +615,6 @@ private: ModuleDeclSeq ModuleDeclState; - /// Whether the module import expects an identifier next. Otherwise, - /// it expects a '.' or ';'. - bool ModuleImportExpectsIdentifier = false; - /// The identifier and source location of the currently-active /// \#pragma clang arc_cf_code_audited begin. std::pair<IdentifierInfo *, SourceLocation> PragmaARCCFCodeAuditedInfo; @@ -1744,11 +1740,14 @@ public: /// Lex a token, forming a header-name token if possible. bool LexHeaderName(Token &Result, bool AllowMacroExpansion = true); + /// Lex a module name or a partition name. + bool LexModuleName(Token &Result, bool IsImport); + /// Lex the parameters for an #embed directive, returns nullopt on error. std::optional<LexEmbedParametersResult> LexEmbedParameters(Token &Current, bool ForHasEmbed); - bool LexAfterModuleImport(Token &Result); + bool LexAfterModuleDecl(Token &Result); void CollectPpImportSuffix(SmallVectorImpl<Token> &Toks); void makeModuleVisible(Module *M, SourceLocation Loc); @@ -3039,6 +3038,9 @@ private: static bool CLK_LexAfterModuleImport(Preprocessor &P, Token &Result) { return P.LexAfterModuleImport(Result); } + static bool CLK_LexAfterModuleDecl(Preprocessor &P, Token &Result) { + return P.LexAfterModuleDecl(Result); + } }; /// Abstract base class that describes a handler that will receive @@ -3071,6 +3073,77 @@ struct EmbedAnnotationData { /// Registry of pragma handlers added by plugins using PragmaHandlerRegistry = llvm::Registry<PragmaHandler>; +/// Represents module or partition name token sequance. +/// +/// module-name: +/// module-name-qualifier[opt] identifier +/// +/// partition-name: [C++20] +/// : module-name-qualifier[opt] identifier +/// +/// module-name-qualifier +/// module-name-qualifier[opt] identifier . +/// +/// This class can only be created by the preprocessor and guarantees that the +/// two source array being contiguous in memory and only contains 3 kind of +/// tokens (identifier, '.' and ':'). And only available when the preprocessor +/// returns annot_module_name token. +/// +/// For exmaple: +/// +/// export module m.n:c.d +/// +/// The module name array has 3 tokens ['m', '.', 'n']. +/// The partition name array has 4 tokens [':', 'c', '.', 'd']. +/// +/// When import a partition in a named module fragment (Eg. import :part1;), +/// the module name array will be empty, and the partition name array has 2 +/// tokens. +/// +/// When we meet a private-module-fragment (Eg. module :private;), preprocessor +/// will not return a annot_module_name token, but will return 2 separate tokens +/// [':', 'kw_private']. + +class ModuleNameInfo { + friend class Preprocessor; + ArrayRef<Token> ModuleName; + ArrayRef<Token> PartitionName; + + ModuleNameInfo(ArrayRef<Token> AnnotToks, std::optional<unsigned> ColonIndex); + +public: + /// Return the contiguous token array. + ArrayRef<Token> getTokens() const { + if (ModuleName.empty()) + return PartitionName; + if (PartitionName.empty()) + return ModuleName; + return ArrayRef(ModuleName.begin(), PartitionName.end()); + } + bool hasModuleName() const { return !ModuleName.empty(); } + bool hasPartitionName() const { return !PartitionName.empty(); } + ArrayRef<Token> getModuleName() const { return ModuleName; } + ArrayRef<Token> getPartitionName() const { return PartitionName; } + Token getColonToken() const { + assert(hasPartitionName() && "Do not have a partition name"); + return getPartitionName().front(); + } + + /// Under the standard C++ Modules, the dot is just part of the module name, + /// and not a real hierarchy separator. Flatten such module names now. + std::string getFlatName() const; + + /// Build a module id path from the contiguous token array, both include + /// module name and partition name. + void getModuleIdPath( + SmallVectorImpl<std::pair<IdentifierInfo *, SourceLocation>> &Path) const; + + /// Build a module id path from \param ModuleName. + static void getModuleIdPath( + ArrayRef<Token> ModuleName, + SmallVectorImpl<std::pair<IdentifierInfo *, SourceLocation>> &Path); +}; + } // namespace clang #endif // LLVM_CLANG_LEX_PREPROCESSOR_H diff --git a/clang/include/clang/Lex/Token.h b/clang/include/clang/Lex/Token.h index 4f29fb7..2be3ad39 100644 --- a/clang/include/clang/Lex/Token.h +++ b/clang/include/clang/Lex/Token.h @@ -235,6 +235,9 @@ public: assert(isAnnotation() && "Used AnnotVal on non-annotation token"); return PtrData; } + template <class T> T getAnnotationValueAs() const { + return static_cast<T>(getAnnotationValue()); + } void setAnnotationValue(void *val) { assert(isAnnotation() && "Used AnnotVal on non-annotation token"); PtrData = val; diff --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h index 93e60be..afcdacf0 100644 --- a/clang/include/clang/Parse/Parser.h +++ b/clang/include/clang/Parse/Parser.h @@ -3876,7 +3876,7 @@ private: } bool ParseModuleName( - SourceLocation UseLoc, + SourceLocation UseLoc, ArrayRef<Token> ModuleName, SmallVectorImpl<std::pair<IdentifierInfo *, SourceLocation>> &Path, bool IsImport); diff --git a/clang/lib/AST/Interp/Boolean.h b/clang/lib/AST/Interp/Boolean.h index 1bfb26b..23f7286 100644 --- a/clang/lib/AST/Interp/Boolean.h +++ b/clang/lib/AST/Interp/Boolean.h @@ -56,7 +56,7 @@ class Boolean final { APSInt toAPSInt(unsigned NumBits) const { return APSInt(toAPSInt().zextOrTrunc(NumBits), true); } - APValue toAPValue() const { return APValue(toAPSInt()); } + APValue toAPValue(const ASTContext &) const { return APValue(toAPSInt()); } Boolean toUnsigned() const { return *this; } diff --git a/clang/lib/AST/Interp/Disasm.cpp b/clang/lib/AST/Interp/Disasm.cpp index c6c6275..867284e 100644 --- a/clang/lib/AST/Interp/Disasm.cpp +++ b/clang/lib/AST/Interp/Disasm.cpp @@ -366,9 +366,9 @@ LLVM_DUMP_METHOD void EvaluationResult::dump() const { OS << "LValue: "; if (const auto *P = std::get_if<Pointer>(&Value)) - P->toAPValue().printPretty(OS, ASTCtx, SourceType); + P->toAPValue(ASTCtx).printPretty(OS, ASTCtx, SourceType); else if (const auto *FP = std::get_if<FunctionPointer>(&Value)) // Nope - FP->toAPValue().printPretty(OS, ASTCtx, SourceType); + FP->toAPValue(ASTCtx).printPretty(OS, ASTCtx, SourceType); OS << "\n"; break; } diff --git a/clang/lib/AST/Interp/EvalEmitter.cpp b/clang/lib/AST/Interp/EvalEmitter.cpp index 59e7868..0853653 100644 --- a/clang/lib/AST/Interp/EvalEmitter.cpp +++ b/clang/lib/AST/Interp/EvalEmitter.cpp @@ -145,7 +145,7 @@ template <PrimType OpType> bool EvalEmitter::emitRet(const SourceInfo &Info) { return false; using T = typename PrimConv<OpType>::T; - EvalResult.setValue(S.Stk.pop<T>().toAPValue()); + EvalResult.setValue(S.Stk.pop<T>().toAPValue(Ctx.getASTContext())); return true; } @@ -169,7 +169,9 @@ template <> bool EvalEmitter::emitRet<PT_Ptr>(const SourceInfo &Info) { return false; // Never allow reading from a non-const pointer, unless the memory // has been created in this evaluation. - if (!Ptr.isConst() && Ptr.block()->getEvalID() != Ctx.getEvalID()) + if (!Ptr.isZero() && Ptr.isBlockPointer() && + Ptr.block()->getEvalID() != Ctx.getEvalID() && + (!CheckLoad(S, OpPC, Ptr, AK_Read) || !Ptr.isConst())) return false; if (std::optional<APValue> V = @@ -179,7 +181,7 @@ template <> bool EvalEmitter::emitRet<PT_Ptr>(const SourceInfo &Info) { return false; } } else { - EvalResult.setValue(Ptr.toAPValue()); + EvalResult.setValue(Ptr.toAPValue(Ctx.getASTContext())); } return true; @@ -283,7 +285,8 @@ void EvalEmitter::updateGlobalTemporaries() { APValue *Cached = Temp->getOrCreateValue(true); if (std::optional<PrimType> T = Ctx.classify(E->getType())) { - TYPE_SWITCH(*T, { *Cached = Ptr.deref<T>().toAPValue(); }); + TYPE_SWITCH( + *T, { *Cached = Ptr.deref<T>().toAPValue(Ctx.getASTContext()); }); } else { if (std::optional<APValue> APV = Ptr.toRValue(Ctx, Temp->getTemporaryExpr()->getType())) diff --git a/clang/lib/AST/Interp/EvaluationResult.cpp b/clang/lib/AST/Interp/EvaluationResult.cpp index 0bebfd4..1b25571 100644 --- a/clang/lib/AST/Interp/EvaluationResult.cpp +++ b/clang/lib/AST/Interp/EvaluationResult.cpp @@ -21,9 +21,9 @@ APValue EvaluationResult::toAPValue() const { case LValue: // Either a pointer or a function pointer. if (const auto *P = std::get_if<Pointer>(&Value)) - return P->toAPValue(); + return P->toAPValue(Ctx->getASTContext()); else if (const auto *FP = std::get_if<FunctionPointer>(&Value)) - return FP->toAPValue(); + return FP->toAPValue(Ctx->getASTContext()); else llvm_unreachable("Unhandled LValue type"); break; @@ -46,7 +46,7 @@ std::optional<APValue> EvaluationResult::toRValue() const { if (const auto *P = std::get_if<Pointer>(&Value)) return P->toRValue(*Ctx, getSourceType()); else if (const auto *FP = std::get_if<FunctionPointer>(&Value)) // Nope - return FP->toAPValue(); + return FP->toAPValue(Ctx->getASTContext()); llvm_unreachable("Unhandled lvalue kind"); } diff --git a/clang/lib/AST/Interp/Floating.h b/clang/lib/AST/Interp/Floating.h index e4ac76d..1144878 100644 --- a/clang/lib/AST/Interp/Floating.h +++ b/clang/lib/AST/Interp/Floating.h @@ -69,7 +69,7 @@ public: APSInt toAPSInt(unsigned NumBits = 0) const { return APSInt(F.bitcastToAPInt()); } - APValue toAPValue() const { return APValue(F); } + APValue toAPValue(const ASTContext &) const { return APValue(F); } void print(llvm::raw_ostream &OS) const { // Can't use APFloat::print() since it appends a newline. SmallVector<char, 16> Buffer; diff --git a/clang/lib/AST/Interp/FunctionPointer.h b/clang/lib/AST/Interp/FunctionPointer.h index fc3d7a4..0f2c6e5 100644 --- a/clang/lib/AST/Interp/FunctionPointer.h +++ b/clang/lib/AST/Interp/FunctionPointer.h @@ -40,7 +40,7 @@ public: return Func->getDecl()->isWeak(); } - APValue toAPValue() const { + APValue toAPValue(const ASTContext &) const { if (!Func) return APValue(static_cast<Expr *>(nullptr), CharUnits::Zero(), {}, /*OnePastTheEnd=*/false, /*IsNull=*/true); @@ -69,7 +69,7 @@ public: if (!Func) return "nullptr"; - return toAPValue().getAsString(Ctx, Func->getDecl()->getType()); + return toAPValue(Ctx).getAsString(Ctx, Func->getDecl()->getType()); } uint64_t getIntegerRepresentation() const { diff --git a/clang/lib/AST/Interp/Integral.h b/clang/lib/AST/Interp/Integral.h index db4cc9a..aafdd02 100644 --- a/clang/lib/AST/Interp/Integral.h +++ b/clang/lib/AST/Interp/Integral.h @@ -112,7 +112,7 @@ public: else return APSInt(toAPSInt().zextOrTrunc(NumBits), !Signed); } - APValue toAPValue() const { return APValue(toAPSInt()); } + APValue toAPValue(const ASTContext &) const { return APValue(toAPSInt()); } Integral<Bits, false> toUnsigned() const { return Integral<Bits, false>(*this); diff --git a/clang/lib/AST/Interp/IntegralAP.h b/clang/lib/AST/Interp/IntegralAP.h index 7464f15..b8aa210 100644 --- a/clang/lib/AST/Interp/IntegralAP.h +++ b/clang/lib/AST/Interp/IntegralAP.h @@ -133,7 +133,7 @@ public: else return APSInt(V.zext(Bits), !Signed); } - APValue toAPValue() const { return APValue(toAPSInt()); } + APValue toAPValue(const ASTContext &) const { return APValue(toAPSInt()); } bool isZero() const { return V.isZero(); } bool isPositive() const { return V.isNonNegative(); } diff --git a/clang/lib/AST/Interp/Interp.cpp b/clang/lib/AST/Interp/Interp.cpp index cd6fc60..6fcd90e 100644 --- a/clang/lib/AST/Interp/Interp.cpp +++ b/clang/lib/AST/Interp/Interp.cpp @@ -728,8 +728,8 @@ bool CheckDynamicMemoryAllocation(InterpState &S, CodePtr OpPC) { return true; const SourceInfo &E = S.Current->getSource(OpPC); - S.FFDiag(E, diag::note_constexpr_new); - return false; + S.CCEDiag(E, diag::note_constexpr_new); + return true; } bool CheckNewDeleteForms(InterpState &S, CodePtr OpPC, bool NewWasArray, diff --git a/clang/lib/AST/Interp/Interp.h b/clang/lib/AST/Interp/Interp.h index f86b787..b2581b5 100644 --- a/clang/lib/AST/Interp/Interp.h +++ b/clang/lib/AST/Interp/Interp.h @@ -39,8 +39,9 @@ namespace interp { using APSInt = llvm::APSInt; /// Convert a value to an APValue. -template <typename T> bool ReturnValue(const T &V, APValue &R) { - R = V.toAPValue(); +template <typename T> +bool ReturnValue(const InterpState &S, const T &V, APValue &R) { + R = V.toAPValue(S.getCtx()); return true; } @@ -286,7 +287,7 @@ bool Ret(InterpState &S, CodePtr &PC, APValue &Result) { } else { delete S.Current; S.Current = nullptr; - if (!ReturnValue<T>(Ret, Result)) + if (!ReturnValue<T>(S, Ret, Result)) return false; } return true; @@ -1318,7 +1319,7 @@ bool InitGlobalTemp(InterpState &S, CodePtr OpPC, uint32_t I, const Pointer &Ptr = S.P.getGlobal(I); const T Value = S.Stk.peek<T>(); - APValue APV = Value.toAPValue(); + APValue APV = Value.toAPValue(S.getCtx()); APValue *Cached = Temp->getOrCreateValue(true); *Cached = APV; diff --git a/clang/lib/AST/Interp/MemberPointer.cpp b/clang/lib/AST/Interp/MemberPointer.cpp index 96f6364..0c1b6ed 100644 --- a/clang/lib/AST/Interp/MemberPointer.cpp +++ b/clang/lib/AST/Interp/MemberPointer.cpp @@ -60,13 +60,13 @@ FunctionPointer MemberPointer::toFunctionPointer(const Context &Ctx) const { return FunctionPointer(Ctx.getProgram().getFunction(cast<FunctionDecl>(Dcl))); } -APValue MemberPointer::toAPValue() const { +APValue MemberPointer::toAPValue(const ASTContext &ASTCtx) const { if (isZero()) return APValue(static_cast<ValueDecl *>(nullptr), /*IsDerivedMember=*/false, /*Path=*/{}); if (hasBase()) - return Base.toAPValue(); + return Base.toAPValue(ASTCtx); return APValue(cast<ValueDecl>(getDecl()), /*IsDerivedMember=*/false, /*Path=*/{}); diff --git a/clang/lib/AST/Interp/MemberPointer.h b/clang/lib/AST/Interp/MemberPointer.h index f56dc53..2b3be12 100644 --- a/clang/lib/AST/Interp/MemberPointer.h +++ b/clang/lib/AST/Interp/MemberPointer.h @@ -80,7 +80,7 @@ public: return MemberPointer(Instance, this->Dcl, this->PtrOffset); } - APValue toAPValue() const; + APValue toAPValue(const ASTContext &) const; bool isZero() const { return Base.isZero() && !Dcl; } bool hasBase() const { return !Base.isZero(); } diff --git a/clang/lib/AST/Interp/Pointer.cpp b/clang/lib/AST/Interp/Pointer.cpp index ff4da0f..229007c 100644 --- a/clang/lib/AST/Interp/Pointer.cpp +++ b/clang/lib/AST/Interp/Pointer.cpp @@ -16,6 +16,7 @@ #include "MemberPointer.h" #include "PrimType.h" #include "Record.h" +#include "clang/AST/RecordLayout.h" using namespace clang; using namespace clang::interp; @@ -119,7 +120,7 @@ void Pointer::operator=(Pointer &&P) { } } -APValue Pointer::toAPValue() const { +APValue Pointer::toAPValue(const ASTContext &ASTCtx) const { llvm::SmallVector<APValue::LValuePathEntry, 5> Path; if (isZero()) @@ -141,25 +142,38 @@ APValue Pointer::toAPValue() const { else llvm_unreachable("Invalid allocation type"); - if (isDummy() || isUnknownSizeArray() || Desc->asExpr()) + if (isUnknownSizeArray() || Desc->asExpr()) return APValue(Base, CharUnits::Zero(), Path, /*IsOnePastEnd=*/isOnePastEnd(), /*IsNullPtr=*/false); - // TODO: compute the offset into the object. CharUnits Offset = CharUnits::Zero(); + auto getFieldOffset = [&](const FieldDecl *FD) -> CharUnits { + const ASTRecordLayout &Layout = ASTCtx.getASTRecordLayout(FD->getParent()); + unsigned FieldIndex = FD->getFieldIndex(); + return ASTCtx.toCharUnitsFromBits(Layout.getFieldOffset(FieldIndex)); + }; + // Build the path into the object. Pointer Ptr = *this; while (Ptr.isField() || Ptr.isArrayElement()) { if (Ptr.isArrayRoot()) { Path.push_back(APValue::LValuePathEntry( {Ptr.getFieldDesc()->asDecl(), /*IsVirtual=*/false})); + + if (const auto *FD = dyn_cast<FieldDecl>(Ptr.getFieldDesc()->asDecl())) + Offset += getFieldOffset(FD); + Ptr = Ptr.getBase(); } else if (Ptr.isArrayElement()) { + unsigned Index; if (Ptr.isOnePastEnd()) - Path.push_back(APValue::LValuePathEntry::ArrayIndex(Ptr.getArray().getNumElems())); + Index = Ptr.getArray().getNumElems(); else - Path.push_back(APValue::LValuePathEntry::ArrayIndex(Ptr.getIndex())); + Index = Ptr.getIndex(); + + Offset += (Index * ASTCtx.getTypeSizeInChars(Ptr.getType())); + Path.push_back(APValue::LValuePathEntry::ArrayIndex(Index)); Ptr = Ptr.getArray(); } else { // TODO: figure out if base is virtual @@ -170,12 +184,21 @@ APValue Pointer::toAPValue() const { if (const auto *BaseOrMember = Desc->asDecl()) { Path.push_back(APValue::LValuePathEntry({BaseOrMember, IsVirtual})); Ptr = Ptr.getBase(); + + if (const auto *FD = dyn_cast<FieldDecl>(BaseOrMember)) + Offset += getFieldOffset(FD); + continue; } llvm_unreachable("Invalid field type"); } } + // FIXME(perf): We compute the lvalue path above, but we can't supply it + // for dummy pointers (that causes crashes later in CheckConstantExpression). + if (isDummy()) + Path.clear(); + // We assemble the LValuePath starting from the innermost pointer to the // outermost one. SO in a.b.c, the first element in Path will refer to // the field 'c', while later code expects it to refer to 'a'. @@ -220,13 +243,19 @@ std::string Pointer::toDiagnosticString(const ASTContext &Ctx) const { if (isIntegralPointer()) return (Twine("&(") + Twine(asIntPointer().Value + Offset) + ")").str(); - return toAPValue().getAsString(Ctx, getType()); + return toAPValue(Ctx).getAsString(Ctx, getType()); } bool Pointer::isInitialized() const { if (isIntegralPointer()) return true; + if (isRoot() && PointeeStorage.BS.Base == sizeof(GlobalInlineDescriptor)) { + const GlobalInlineDescriptor &GD = + *reinterpret_cast<const GlobalInlineDescriptor *>(block()->rawData()); + return GD.InitState == GlobalInitState::Initialized; + } + assert(PointeeStorage.BS.Pointee && "Cannot check if null pointer was initialized"); const Descriptor *Desc = getFieldDesc(); @@ -249,12 +278,6 @@ bool Pointer::isInitialized() const { if (asBlockPointer().Base == 0) return true; - if (isRoot() && PointeeStorage.BS.Base == sizeof(GlobalInlineDescriptor)) { - const GlobalInlineDescriptor &GD = - *reinterpret_cast<const GlobalInlineDescriptor *>(block()->rawData()); - return GD.InitState == GlobalInitState::Initialized; - } - // Field has its bit in an inline descriptor. return getInlineDesc()->IsInitialized; } @@ -266,6 +289,13 @@ void Pointer::initialize() const { assert(PointeeStorage.BS.Pointee && "Cannot initialize null pointer"); const Descriptor *Desc = getFieldDesc(); + if (isRoot() && PointeeStorage.BS.Base == sizeof(GlobalInlineDescriptor)) { + GlobalInlineDescriptor &GD = *reinterpret_cast<GlobalInlineDescriptor *>( + asBlockPointer().Pointee->rawData()); + GD.InitState = GlobalInitState::Initialized; + return; + } + assert(Desc); if (Desc->isPrimitiveArray()) { // Primitive global arrays don't have an initmap. @@ -294,13 +324,6 @@ void Pointer::initialize() const { return; } - if (isRoot() && PointeeStorage.BS.Base == sizeof(GlobalInlineDescriptor)) { - GlobalInlineDescriptor &GD = *reinterpret_cast<GlobalInlineDescriptor *>( - asBlockPointer().Pointee->rawData()); - GD.InitState = GlobalInitState::Initialized; - return; - } - // Field has its bit in an inline descriptor. assert(PointeeStorage.BS.Base != 0 && "Only composite fields can be initialised"); @@ -344,10 +367,12 @@ bool Pointer::hasSameArray(const Pointer &A, const Pointer &B) { std::optional<APValue> Pointer::toRValue(const Context &Ctx, QualType ResultType) const { + const ASTContext &ASTCtx = Ctx.getASTContext(); assert(!ResultType.isNull()); // Method to recursively traverse composites. std::function<bool(QualType, const Pointer &, APValue &)> Composite; - Composite = [&Composite, &Ctx](QualType Ty, const Pointer &Ptr, APValue &R) { + Composite = [&Composite, &Ctx, &ASTCtx](QualType Ty, const Pointer &Ptr, + APValue &R) { if (const auto *AT = Ty->getAs<AtomicType>()) Ty = AT->getValueType(); @@ -358,7 +383,7 @@ std::optional<APValue> Pointer::toRValue(const Context &Ctx, // Primitive values. if (std::optional<PrimType> T = Ctx.classify(Ty)) { - TYPE_SWITCH(*T, R = Ptr.deref<T>().toAPValue()); + TYPE_SWITCH(*T, R = Ptr.deref<T>().toAPValue(ASTCtx)); return true; } @@ -375,7 +400,7 @@ std::optional<APValue> Pointer::toRValue(const Context &Ctx, QualType FieldTy = F.Decl->getType(); if (FP.isActive()) { if (std::optional<PrimType> T = Ctx.classify(FieldTy)) { - TYPE_SWITCH(*T, Value = FP.deref<T>().toAPValue()); + TYPE_SWITCH(*T, Value = FP.deref<T>().toAPValue(ASTCtx)); } else { Ok &= Composite(FieldTy, FP, Value); } @@ -398,7 +423,7 @@ std::optional<APValue> Pointer::toRValue(const Context &Ctx, APValue &Value = R.getStructField(I); if (std::optional<PrimType> T = Ctx.classify(FieldTy)) { - TYPE_SWITCH(*T, Value = FP.deref<T>().toAPValue()); + TYPE_SWITCH(*T, Value = FP.deref<T>().toAPValue(ASTCtx)); } else { Ok &= Composite(FieldTy, FP, Value); } @@ -436,7 +461,7 @@ std::optional<APValue> Pointer::toRValue(const Context &Ctx, APValue &Slot = R.getArrayInitializedElt(I); const Pointer &EP = Ptr.atIndex(I); if (std::optional<PrimType> T = Ctx.classify(ElemTy)) { - TYPE_SWITCH(*T, Slot = EP.deref<T>().toAPValue()); + TYPE_SWITCH(*T, Slot = EP.deref<T>().toAPValue(ASTCtx)); } else { Ok &= Composite(ElemTy, EP.narrow(), Slot); } @@ -475,7 +500,7 @@ std::optional<APValue> Pointer::toRValue(const Context &Ctx, Values.reserve(VT->getNumElements()); for (unsigned I = 0; I != VT->getNumElements(); ++I) { TYPE_SWITCH(ElemT, { - Values.push_back(Ptr.atIndex(I).deref<T>().toAPValue()); + Values.push_back(Ptr.atIndex(I).deref<T>().toAPValue(ASTCtx)); }); } @@ -493,11 +518,11 @@ std::optional<APValue> Pointer::toRValue(const Context &Ctx, // We can return these as rvalues, but we can't deref() them. if (isZero() || isIntegralPointer()) - return toAPValue(); + return toAPValue(ASTCtx); // Just load primitive types. if (std::optional<PrimType> T = Ctx.classify(ResultType)) { - TYPE_SWITCH(*T, return this->deref<T>().toAPValue()); + TYPE_SWITCH(*T, return this->deref<T>().toAPValue(ASTCtx)); } // Return the composite type. diff --git a/clang/lib/AST/Interp/Pointer.h b/clang/lib/AST/Interp/Pointer.h index 972f55a..7fa6a32 100644 --- a/clang/lib/AST/Interp/Pointer.h +++ b/clang/lib/AST/Interp/Pointer.h @@ -118,7 +118,7 @@ public: bool operator!=(const Pointer &P) const { return !(P == *this); } /// Converts the pointer to an APValue. - APValue toAPValue() const; + APValue toAPValue(const ASTContext &ASTCtx) const; /// Converts the pointer to a string usable in diagnostics. std::string toDiagnosticString(const ASTContext &Ctx) const; diff --git a/clang/lib/Basic/IdentifierTable.cpp b/clang/lib/Basic/IdentifierTable.cpp index 4f7ccaf..97d8302 100644 --- a/clang/lib/Basic/IdentifierTable.cpp +++ b/clang/lib/Basic/IdentifierTable.cpp @@ -322,8 +322,9 @@ void IdentifierTable::AddKeywords(const LangOptions &LangOpts) { if (LangOpts.IEEE128) AddKeyword("__ieee128", tok::kw___float128, KEYALL, LangOpts, *this); - // Add the 'import' contextual keyword. + // Add the 'import' and 'module' contextual keyword. get("import").setModulesImport(true); + get("module").setModulesDeclaration(true); } /// Checks if the specified token kind represents a keyword in the diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index f7b987b..71cdaa1 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -1077,33 +1077,6 @@ void Clang::AddPreprocessingOptions(Compilation &C, const JobAction &JA, if (JA.isOffloading(Action::OFK_HIP)) getToolChain().AddHIPIncludeArgs(Args, CmdArgs); - // If we are compiling for a GPU target we want to override the system headers - // with ones created by the 'libc' project if present. - if (!Args.hasArg(options::OPT_nostdinc) && - !Args.hasArg(options::OPT_nogpuinc) && - !Args.hasArg(options::OPT_nobuiltininc)) { - // Without an offloading language we will include these headers directly. - // Offloading languages will instead only use the declarations stored in - // the resource directory at clang/lib/Headers/llvm_libc_wrappers. - if ((getToolChain().getTriple().isNVPTX() || - getToolChain().getTriple().isAMDGCN()) && - C.getActiveOffloadKinds() == Action::OFK_None) { - SmallString<128> P(llvm::sys::path::parent_path(D.Dir)); - llvm::sys::path::append(P, "include"); - llvm::sys::path::append(P, getToolChain().getTripleString()); - CmdArgs.push_back("-internal-isystem"); - CmdArgs.push_back(Args.MakeArgString(P)); - } else if (C.getActiveOffloadKinds() == Action::OFK_OpenMP) { - // TODO: CUDA / HIP include their own headers for some common functions - // implemented here. We'll need to clean those up so they do not conflict. - SmallString<128> P(D.ResourceDir); - llvm::sys::path::append(P, "include"); - llvm::sys::path::append(P, "llvm_libc_wrappers"); - CmdArgs.push_back("-internal-isystem"); - CmdArgs.push_back(Args.MakeArgString(P)); - } - } - // If we are offloading to a target via OpenMP we need to include the // openmp_wrappers folder which contains alternative system headers. if (JA.isDeviceOffloading(Action::OFK_OpenMP) && @@ -1276,6 +1249,35 @@ void Clang::AddPreprocessingOptions(Compilation &C, const JobAction &JA, }); } + // If we are compiling for a GPU target we want to override the system headers + // with ones created by the 'libc' project if present. + // TODO: This should be moved to `AddClangSystemIncludeArgs` by passing the + // OffloadKind as an argument. + if (!Args.hasArg(options::OPT_nostdinc) && + !Args.hasArg(options::OPT_nogpuinc) && + !Args.hasArg(options::OPT_nobuiltininc)) { + // Without an offloading language we will include these headers directly. + // Offloading languages will instead only use the declarations stored in + // the resource directory at clang/lib/Headers/llvm_libc_wrappers. + if ((getToolChain().getTriple().isNVPTX() || + getToolChain().getTriple().isAMDGCN()) && + C.getActiveOffloadKinds() == Action::OFK_None) { + SmallString<128> P(llvm::sys::path::parent_path(D.Dir)); + llvm::sys::path::append(P, "include"); + llvm::sys::path::append(P, getToolChain().getTripleString()); + CmdArgs.push_back("-internal-isystem"); + CmdArgs.push_back(Args.MakeArgString(P)); + } else if (C.getActiveOffloadKinds() == Action::OFK_OpenMP) { + // TODO: CUDA / HIP include their own headers for some common functions + // implemented here. We'll need to clean those up so they do not conflict. + SmallString<128> P(D.ResourceDir); + llvm::sys::path::append(P, "include"); + llvm::sys::path::append(P, "llvm_libc_wrappers"); + CmdArgs.push_back("-internal-isystem"); + CmdArgs.push_back(Args.MakeArgString(P)); + } + } + // Add system include arguments for all targets but IAMCU. if (!IsIAMCU) forAllAssociatedToolChains(C, JA, getToolChain(), @@ -6757,7 +6759,6 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, if (const char *Name = C.getTimeTraceFile(&JA)) { CmdArgs.push_back(Args.MakeArgString("-ftime-trace=" + Twine(Name))); Args.AddLastArg(CmdArgs, options::OPT_ftime_trace_granularity_EQ); - Args.AddLastArg(CmdArgs, options::OPT_ftime_trace_verbose); } if (Arg *A = Args.getLastArg(options::OPT_ftrapv_handler_EQ)) { diff --git a/clang/lib/Driver/ToolChains/Gnu.cpp b/clang/lib/Driver/ToolChains/Gnu.cpp index 52c2ee9..ff20deb 100644 --- a/clang/lib/Driver/ToolChains/Gnu.cpp +++ b/clang/lib/Driver/ToolChains/Gnu.cpp @@ -672,12 +672,41 @@ void tools::gnutools::Linker::ConstructJob(Compilation &C, const JobAction &JA, } } + // Facebook T92898286 + if (Args.hasArg(options::OPT_post_link_optimize)) + CmdArgs.push_back("-q"); + // End Facebook T92898286 + Args.AddAllArgs(CmdArgs, options::OPT_T); const char *Exec = Args.MakeArgString(ToolChain.GetLinkerPath()); C.addCommand(std::make_unique<Command>(JA, *this, ResponseFileSupport::AtFileCurCP(), Exec, CmdArgs, Inputs, Output)); + // Facebook T92898286 + if (!Args.hasArg(options::OPT_post_link_optimize) || !Output.isFilename()) + return; + + const char *MvExec = Args.MakeArgString(ToolChain.GetProgramPath("mv")); + ArgStringList MoveCmdArgs; + MoveCmdArgs.push_back(Output.getFilename()); + const char *PreBoltBin = + Args.MakeArgString(Twine(Output.getFilename()) + ".pre-bolt"); + MoveCmdArgs.push_back(PreBoltBin); + C.addCommand(std::make_unique<Command>(JA, *this, ResponseFileSupport::None(), + MvExec, MoveCmdArgs, std::nullopt)); + + ArgStringList BoltCmdArgs; + const char *BoltExec = + Args.MakeArgString(ToolChain.GetProgramPath("llvm-bolt")); + BoltCmdArgs.push_back(PreBoltBin); + BoltCmdArgs.push_back("-reorder-blocks=reverse"); + BoltCmdArgs.push_back("-update-debug-sections"); + BoltCmdArgs.push_back("-o"); + BoltCmdArgs.push_back(Output.getFilename()); + C.addCommand(std::make_unique<Command>(JA, *this, ResponseFileSupport::None(), + BoltExec, BoltCmdArgs, std::nullopt)); + // End Facebook T92898286 } void tools::gnutools::Assembler::ConstructJob(Compilation &C, diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index b6d6e52..db66911 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -372,10 +372,6 @@ private: OpeningParen.Previous->is(tok::kw__Generic)) { Contexts.back().ContextType = Context::C11GenericSelection; Contexts.back().IsExpression = true; - } else if (Line.InPPDirective && - (!OpeningParen.Previous || - OpeningParen.Previous->isNot(tok::identifier))) { - Contexts.back().IsExpression = true; } else if (Contexts[Contexts.size() - 2].CaretFound) { // This is the parameter list of an ObjC block. Contexts.back().IsExpression = false; @@ -388,7 +384,20 @@ private: OpeningParen.Previous->MatchingParen->isOneOf( TT_ObjCBlockLParen, TT_FunctionTypeLParen)) { Contexts.back().IsExpression = false; - } else if (!Line.MustBeDeclaration && !Line.InPPDirective) { + } else if (Line.InPPDirective) { + auto IsExpr = [&OpeningParen] { + const auto *Tok = OpeningParen.Previous; + if (!Tok || Tok->isNot(tok::identifier)) + return true; + Tok = Tok->Previous; + while (Tok && Tok->endsSequence(tok::coloncolon, tok::identifier)) { + assert(Tok->Previous); + Tok = Tok->Previous->Previous; + } + return !Tok || !Tok->Tok.getIdentifierInfo(); + }; + Contexts.back().IsExpression = IsExpr(); + } else if (!Line.MustBeDeclaration) { bool IsForOrCatch = OpeningParen.Previous && OpeningParen.Previous->isOneOf(tok::kw_for, tok::kw_catch); diff --git a/clang/lib/Frontend/PrintPreprocessedOutput.cpp b/clang/lib/Frontend/PrintPreprocessedOutput.cpp index 0592423..1fff88c 100644 --- a/clang/lib/Frontend/PrintPreprocessedOutput.cpp +++ b/clang/lib/Frontend/PrintPreprocessedOutput.cpp @@ -758,9 +758,10 @@ void PrintPPOutputPPCallbacks::HandleWhitespaceBeforeTok(const Token &Tok, // These tokens are not expanded to anything and don't need whitespace before // them. if (Tok.is(tok::eof) || - (Tok.isAnnotation() && !Tok.is(tok::annot_header_unit) && - !Tok.is(tok::annot_module_begin) && !Tok.is(tok::annot_module_end) && - !Tok.is(tok::annot_repl_input_end) && !Tok.is(tok::annot_embed))) + (Tok.isAnnotation() && Tok.isNot(tok::annot_header_unit) && + Tok.isNot(tok::annot_module_begin) && Tok.isNot(tok::annot_module_end) && + Tok.isNot(tok::annot_module_name) && + Tok.isNot(tok::annot_repl_input_end) && Tok.isNot(tok::annot_embed))) return; // EmittedDirectiveOnThisLine takes priority over RequireSameLine. @@ -951,6 +952,11 @@ static void PrintPreprocessedTokens(Preprocessor &PP, Token &Tok, PP.Lex(Tok); IsStartOfLine = true; continue; + } else if (Tok.is(tok::annot_module_name)) { + auto *Info = static_cast<ModuleNameInfo *>(Tok.getAnnotationValue()); + *Callbacks->OS << Info->getFlatName(); + PP.Lex(Tok); + continue; } else if (Tok.is(tok::annot_header_unit)) { // This is a header-name that has been (effectively) converted into a // module-name. diff --git a/clang/lib/Headers/prfchwintrin.h b/clang/lib/Headers/prfchwintrin.h index 8a13784..eaea5f3 100644 --- a/clang/lib/Headers/prfchwintrin.h +++ b/clang/lib/Headers/prfchwintrin.h @@ -8,7 +8,7 @@ */ #if !defined(__X86INTRIN_H) && !defined(_MM3DNOW_H_INCLUDED) -#error "Never use <prfchwintrin.h> directly; include <x86intrin.h> or <mm3dnow.h> instead." +#error "Never use <prfchwintrin.h> directly; include <x86intrin.h> instead." #endif #ifndef __PRFCHWINTRIN_H diff --git a/clang/lib/Lex/PPLexerChange.cpp b/clang/lib/Lex/PPLexerChange.cpp index 8221db4..c3a9039 100644 --- a/clang/lib/Lex/PPLexerChange.cpp +++ b/clang/lib/Lex/PPLexerChange.cpp @@ -122,7 +122,8 @@ void Preprocessor::EnterSourceFileWithLexer(Lexer *TheLexer, CurPPLexer = TheLexer; CurDirLookup = CurDir; CurLexerSubmodule = nullptr; - if (CurLexerCallback != CLK_LexAfterModuleImport) + if (CurLexerCallback != CLK_LexAfterModuleImport && + CurLexerCallback != CLK_LexAfterModuleDecl) CurLexerCallback = TheLexer->isDependencyDirectivesLexer() ? CLK_DependencyDirectivesLexer : CLK_Lexer; @@ -161,8 +162,7 @@ void Preprocessor::EnterMacro(Token &Tok, SourceLocation ILEnd, PushIncludeMacroStack(); CurDirLookup = nullptr; CurTokenLexer = std::move(TokLexer); - if (CurLexerCallback != CLK_LexAfterModuleImport) - CurLexerCallback = CLK_TokenLexer; + CurLexerCallback = CLK_TokenLexer; } /// EnterTokenStream - Add a "macro" context to the top of the include stack, @@ -216,7 +216,8 @@ void Preprocessor::EnterTokenStream(const Token *Toks, unsigned NumToks, PushIncludeMacroStack(); CurDirLookup = nullptr; CurTokenLexer = std::move(TokLexer); - if (CurLexerCallback != CLK_LexAfterModuleImport) + if (CurLexerCallback != CLK_LexAfterModuleImport && + CurLexerCallback != CLK_LexAfterModuleDecl) CurLexerCallback = CLK_TokenLexer; } diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp index 63e27e6..2726fae 100644 --- a/clang/lib/Lex/Preprocessor.cpp +++ b/clang/lib/Lex/Preprocessor.cpp @@ -860,9 +860,15 @@ bool Preprocessor::HandleIdentifier(Token &Identifier) { ModuleImportLoc = Identifier.getLocation(); NamedModuleImportPath.clear(); IsAtImport = true; - ModuleImportExpectsIdentifier = true; CurLexerCallback = CLK_LexAfterModuleImport; } + + if ((II.isModulesDeclaration() || Identifier.is(tok::kw_module)) && + !InMacroArgs && !DisableMacroExpansion && + (getLangOpts().CPlusPlusModules || getLangOpts().DebuggerSupport) && + CurLexerCallback != CLK_CachingLexer) { + CurLexerCallback = CLK_LexAfterModuleDecl; + } return true; } @@ -905,6 +911,7 @@ void Preprocessor::Lex(Token &Result) { // This token is injected to represent the translation of '#include "a.h"' // into "import a.h;". Mimic the notional ';'. case tok::annot_module_include: + case tok::annot_repl_input_end: case tok::semi: TrackGMFState.handleSemi(); StdCXXImportSeqState.handleSemi(); @@ -919,12 +926,30 @@ void Preprocessor::Lex(Token &Result) { StdCXXImportSeqState.handleExport(); ModuleDeclState.handleExport(); break; - case tok::colon: - ModuleDeclState.handleColon(); - break; - case tok::period: - ModuleDeclState.handlePeriod(); + case tok::annot_module_name: { + auto *Info = static_cast<ModuleNameInfo *>(Result.getAnnotationValue()); + for (const auto &Tok : Info->getTokens()) { + switch (Tok.getKind()) { + case tok::identifier: + ModuleDeclState.handleIdentifier(Tok.getIdentifierInfo()); + break; + case tok::period: + ModuleDeclState.handlePeriod(); + break; + case tok::colon: + ModuleDeclState.handleColon(); + break; + default: + llvm_unreachable("Unexpected token in module name"); + } + } + if (ModuleDeclState.isModuleCandidate()) + break; + TrackGMFState.handleMisc(); + StdCXXImportSeqState.handleMisc(); + ModuleDeclState.handleMisc(); break; + } case tok::identifier: // Check "import" and "module" when there is no open bracket. The two // identifiers are not meaningful with open brackets. @@ -936,17 +961,17 @@ void Preprocessor::Lex(Token &Result) { ModuleImportLoc = Result.getLocation(); NamedModuleImportPath.clear(); IsAtImport = false; - ModuleImportExpectsIdentifier = true; CurLexerCallback = CLK_LexAfterModuleImport; } break; - } else if (Result.getIdentifierInfo() == getIdentifierInfo("module")) { + } + if (Result.getIdentifierInfo()->isModulesDeclaration()) { TrackGMFState.handleModule(StdCXXImportSeqState.afterTopLevelSeq()); ModuleDeclState.handleModule(); + CurLexerCallback = CLK_LexAfterModuleDecl; break; } } - ModuleDeclState.handleIdentifier(Result.getIdentifierInfo()); if (ModuleDeclState.isModuleCandidate()) break; [[fallthrough]]; @@ -1121,6 +1146,151 @@ void Preprocessor::CollectPpImportSuffix(SmallVectorImpl<Token> &Toks) { } } +ModuleNameInfo::ModuleNameInfo(ArrayRef<Token> AnnotToks, + std::optional<unsigned> ColonIndex) { + assert(!AnnotToks.empty() && "Named module token cannot be empty."); + if (!ColonIndex.has_value()) + ColonIndex = AnnotToks.size(); + ModuleName = ArrayRef(AnnotToks.begin(), AnnotToks.begin() + *ColonIndex); + PartitionName = ArrayRef(AnnotToks.begin() + *ColonIndex, AnnotToks.end()); + assert(ModuleName.end() == PartitionName.begin()); +} + +std::string ModuleNameInfo::getFlatName() const { + std::string FlatModuleName; + for (auto &Tok : getTokens()) { + switch (Tok.getKind()) { + case tok::identifier: + FlatModuleName += Tok.getIdentifierInfo()->getName(); + break; + case tok::period: + FlatModuleName += '.'; + break; + case tok::colon: + FlatModuleName += ':'; + break; + default: + llvm_unreachable("Unexpected token in module name"); + } + } + return FlatModuleName; +} + +void ModuleNameInfo::getModuleIdPath( + SmallVectorImpl<std::pair<IdentifierInfo *, SourceLocation>> &Path) const { + return getModuleIdPath(getTokens(), Path); +} + +void ModuleNameInfo::getModuleIdPath( + ArrayRef<Token> ModuleName, + SmallVectorImpl<std::pair<IdentifierInfo *, SourceLocation>> &Path) { + for (const auto &Tok : ModuleName) { + if (Tok.is(tok::identifier)) + Path.push_back( + std::make_pair(Tok.getIdentifierInfo(), Tok.getLocation())); + } +} + +/// Lex a module name or a partition name. +/// +/// module-name: +/// module-name-qualifier[opt] identifier +/// +/// partition-name: [C++20] +/// : module-name-qualifier[opt] identifier +/// +/// module-name-qualifier +/// module-name-qualifier[opt] identifier . +bool Preprocessor::LexModuleName(Token &Result, bool IsImport) { + bool ExpectsIdentifier = true, IsLexingPartition = false; + SmallVector<Token, 8> ModuleName; + std::optional<unsigned> ColonTokIndex; + auto LexNextToken = [&](Token &Tok) { + if (IsImport) + Lex(Tok); + else + LexUnexpandedToken(Tok); + }; + + while (true) { + LexNextToken(Result); + if (ExpectsIdentifier && Result.is(tok::identifier)) { + auto *MI = getMacroInfo(Result.getIdentifierInfo()); + if (getLangOpts().CPlusPlusModules && !IsImport && MI && + MI->isObjectLike()) { + Diag(Result, diag::err_module_decl_cannot_be_macros) + << Result.getLocation() << IsLexingPartition + << Result.getIdentifierInfo(); + } + ModuleName.push_back(Result); + ExpectsIdentifier = false; + continue; + } + + if (!ExpectsIdentifier && Result.is(tok::period)) { + ModuleName.push_back(Result); + ExpectsIdentifier = true; + continue; + } + + // Module partition only allowed in C++20 Modules. + if (getLangOpts().CPlusPlusModules && Result.is(tok::colon)) { + // Handle the form like: import :P; + // If the token after ':' is not an identifier, this is a invalid module + // name. + if (ModuleName.empty()) { + Token Tmp; + LexNextToken(Tmp); + EnterToken(Tmp, /*IsReiject=*/false); + // A private-module-fragment: + // export module :private; + if (!IsImport && Tmp.is(tok::kw_private)) + return true; + // import :N; + if (IsImport && Tmp.isNot(tok::identifier)) + return false; + } else if (!ExpectsIdentifier) { + ExpectsIdentifier = true; + } + IsLexingPartition = true; + ColonTokIndex = ModuleName.size(); + ModuleName.push_back(Result); + continue; + } + + // [cpp.module]/p2: where the pp-tokens (if any) shall not begin with a ( + // preprocessing token [...] + // + // We only emit diagnostic in the preprocessor, and in the parser we skip + // invalid tokens and recover from errors. + if (getLangOpts().CPlusPlusModules && !ExpectsIdentifier && + Result.is(tok::l_paren)) + Diag(Result, diag::err_unxepected_paren_in_module_decl) + << IsLexingPartition; + break; + } + + // Put the last token back to stream, it's not a valid part of module name. + // We lexed it unexpanded but it might be a valid macro expansion + Result.clearFlag(Token::DisableExpand); + auto ToksCopy = std::make_unique<Token[]>(1); + *ToksCopy.get() = Result; + EnterTokenStream(std::move(ToksCopy), 1, + /*DisableMacroExpansion=*/false, + /*IsReinject=*/false); + + if (ModuleName.empty()) + return false; + Result.startToken(); + Result.setKind(tok::annot_module_name); + Result.setLocation(ModuleName.front().getLocation()); + Result.setAnnotationEndLoc(ModuleName.back().getLocation()); + auto AnnotToks = ArrayRef(ModuleName).copy(getPreprocessorAllocator()); + ModuleNameInfo *Info = + new (getPreprocessorAllocator()) ModuleNameInfo(AnnotToks, ColonTokIndex); + Result.setAnnotationValue(static_cast<void *>(Info)); + return true; +} /// Lex a token following the 'import' contextual keyword. /// @@ -1145,6 +1315,17 @@ bool Preprocessor::LexAfterModuleImport(Token &Result) { // Figure out what kind of lexer we actually have. recomputeCurLexerKind(); + // Allocate a holding buffer for a sequence of tokens and introduce it into + // the token stream. + auto EnterTokens = [this](ArrayRef<Token> Toks) { + auto ToksCopy = std::make_unique<Token[]>(Toks.size()); + std::copy(Toks.begin(), Toks.end(), ToksCopy.get()); + EnterTokenStream(std::move(ToksCopy), Toks.size(), + /*DisableMacroExpansion*/ true, /*IsReinject*/ false); + }; + + SmallVector<Token, 32> Suffix; + // Lex the next token. The header-name lexing rules are used at the start of // a pp-import. // @@ -1155,122 +1336,108 @@ bool Preprocessor::LexAfterModuleImport(Token &Result) { if (LexHeaderName(Result)) return true; - if (Result.is(tok::colon) && ModuleDeclState.isNamedModule()) { - std::string Name = ModuleDeclState.getPrimaryName().str(); - Name += ":"; - NamedModuleImportPath.push_back( - {getIdentifierInfo(Name), Result.getLocation()}); - CurLexerCallback = CLK_LexAfterModuleImport; - return true; - } - } else { - Lex(Result); - } + // Check for a header-name. + if (Result.is(tok::header_name)) { + // Enter the header-name token into the token stream; a Lex action cannot + // both return a token and cache tokens (doing so would corrupt the token + // cache if the call to Lex comes from CachingLex / PeekAhead). + Suffix.push_back(Result); + + // Consume the pp-import-suffix and expand any macros in it now. We'll add + // it back into the token stream later. + CollectPpImportSuffix(Suffix); + if (Suffix.back().isNot(tok::semi)) { + // This is not a pp-import after all. + EnterTokens(Suffix); + return false; + } - // Allocate a holding buffer for a sequence of tokens and introduce it into - // the token stream. - auto EnterTokens = [this](ArrayRef<Token> Toks) { - auto ToksCopy = std::make_unique<Token[]>(Toks.size()); - std::copy(Toks.begin(), Toks.end(), ToksCopy.get()); - EnterTokenStream(std::move(ToksCopy), Toks.size(), - /*DisableMacroExpansion*/ true, /*IsReinject*/ false); - }; + // C++2a [cpp.module]p1: + // The ';' preprocessing-token terminating a pp-import shall not have + // been produced by macro replacement. + SourceLocation SemiLoc = Suffix.back().getLocation(); + if (SemiLoc.isMacroID()) + Diag(SemiLoc, diag::err_header_import_semi_in_macro); + + // Reconstitute the import token. + Token ImportTok; + ImportTok.startToken(); + ImportTok.setKind(tok::kw_import); + ImportTok.setLocation(ModuleImportLoc); + ImportTok.setIdentifierInfo(getIdentifierInfo("import")); + ImportTok.setLength(6); + + auto Action = HandleHeaderIncludeOrImport( + /*HashLoc*/ SourceLocation(), ImportTok, Suffix.front(), SemiLoc); + switch (Action.Kind) { + case ImportAction::None: + break; - bool ImportingHeader = Result.is(tok::header_name); - // Check for a header-name. - SmallVector<Token, 32> Suffix; - if (ImportingHeader) { - // Enter the header-name token into the token stream; a Lex action cannot - // both return a token and cache tokens (doing so would corrupt the token - // cache if the call to Lex comes from CachingLex / PeekAhead). - Suffix.push_back(Result); + case ImportAction::ModuleBegin: + // Let the parser know we're textually entering the module. + Suffix.emplace_back(); + Suffix.back().startToken(); + Suffix.back().setKind(tok::annot_module_begin); + Suffix.back().setLocation(SemiLoc); + Suffix.back().setAnnotationEndLoc(SemiLoc); + Suffix.back().setAnnotationValue(Action.ModuleForHeader); + [[fallthrough]]; + + case ImportAction::ModuleImport: + case ImportAction::HeaderUnitImport: + case ImportAction::SkippedModuleImport: + // We chose to import (or textually enter) the file. Convert the + // header-name token into a header unit annotation token. + Suffix[0].setKind(tok::annot_header_unit); + Suffix[0].setAnnotationEndLoc(Suffix[0].getLocation()); + Suffix[0].setAnnotationValue(Action.ModuleForHeader); + // FIXME: Call the moduleImport callback? + break; + case ImportAction::Failure: + assert(TheModuleLoader.HadFatalFailure && + "This should be an early exit only to a fatal error"); + Result.setKind(tok::eof); + CurLexer->cutOffLexing(); + EnterTokens(Suffix); + return true; + } - // Consume the pp-import-suffix and expand any macros in it now. We'll add - // it back into the token stream later. - CollectPpImportSuffix(Suffix); - if (Suffix.back().isNot(tok::semi)) { - // This is not a pp-import after all. EnterTokens(Suffix); return false; } + } else { + Lex(Result); + } - // C++2a [cpp.module]p1: - // The ';' preprocessing-token terminating a pp-import shall not have - // been produced by macro replacement. - SourceLocation SemiLoc = Suffix.back().getLocation(); - if (SemiLoc.isMacroID()) - Diag(SemiLoc, diag::err_header_import_semi_in_macro); - - // Reconstitute the import token. - Token ImportTok; - ImportTok.startToken(); - ImportTok.setKind(tok::kw_import); - ImportTok.setLocation(ModuleImportLoc); - ImportTok.setIdentifierInfo(getIdentifierInfo("import")); - ImportTok.setLength(6); - - auto Action = HandleHeaderIncludeOrImport( - /*HashLoc*/ SourceLocation(), ImportTok, Suffix.front(), SemiLoc); - switch (Action.Kind) { - case ImportAction::None: - break; - - case ImportAction::ModuleBegin: - // Let the parser know we're textually entering the module. - Suffix.emplace_back(); - Suffix.back().startToken(); - Suffix.back().setKind(tok::annot_module_begin); - Suffix.back().setLocation(SemiLoc); - Suffix.back().setAnnotationEndLoc(SemiLoc); - Suffix.back().setAnnotationValue(Action.ModuleForHeader); - [[fallthrough]]; - - case ImportAction::ModuleImport: - case ImportAction::HeaderUnitImport: - case ImportAction::SkippedModuleImport: - // We chose to import (or textually enter) the file. Convert the - // header-name token into a header unit annotation token. - Suffix[0].setKind(tok::annot_header_unit); - Suffix[0].setAnnotationEndLoc(Suffix[0].getLocation()); - Suffix[0].setAnnotationValue(Action.ModuleForHeader); - // FIXME: Call the moduleImport callback? - break; - case ImportAction::Failure: - assert(TheModuleLoader.HadFatalFailure && - "This should be an early exit only to a fatal error"); - Result.setKind(tok::eof); - CurLexer->cutOffLexing(); - EnterTokens(Suffix); + if (Result.isOneOf(tok::identifier, tok::colon)) { + EnterToken(Result, /*IsReinject=*/false); + if (!LexModuleName(Result, /*IsImport=*/true)) return true; + auto *Info = Result.getAnnotationValueAs<ModuleNameInfo *>(); + if (getLangOpts().CPlusPlusModules) { + // Under the standard C++ Modules, the dot is just part of the module + // name, and not a real hierarchy separator. Flatten such module names + // now. + // + // FIXME: Is this the right level to be performing this transformation? + std::string FlatModuleName; + if (Info->getTokens().front().is(tok::colon)) { + // Import a module partition allowed in C++20 Modules. + // We can import a partition in named module TU. + if (NamedModuleImportPath.empty() && ModuleDeclState.isNamedModule()) + FlatModuleName = llvm::Twine(ModuleDeclState.getPrimaryName()) + .concat(Info->getFlatName()) + .str(); + else + return true; + } else { + FlatModuleName = Info->getFlatName(); + } + NamedModuleImportPath.emplace_back(getIdentifierInfo(FlatModuleName), + Result.getLocation()); + } else { + Info->getModuleIdPath(NamedModuleImportPath); } - - EnterTokens(Suffix); - return false; - } - - // The token sequence - // - // import identifier (. identifier)* - // - // indicates a module import directive. We already saw the 'import' - // contextual keyword, so now we're looking for the identifiers. - if (ModuleImportExpectsIdentifier && Result.getKind() == tok::identifier) { - // We expected to see an identifier here, and we did; continue handling - // identifiers. - NamedModuleImportPath.push_back( - std::make_pair(Result.getIdentifierInfo(), Result.getLocation())); - ModuleImportExpectsIdentifier = false; - CurLexerCallback = CLK_LexAfterModuleImport; - return true; - } - - // If we're expecting a '.' or a ';', and we got a '.', then wait until we - // see the next identifier. (We can also see a '[[' that begins an - // attribute-specifier-seq here under the Standard C++ Modules.) - if (!ModuleImportExpectsIdentifier && Result.getKind() == tok::period) { - ModuleImportExpectsIdentifier = true; - CurLexerCallback = CLK_LexAfterModuleImport; - return true; } // If we didn't recognize a module name at all, this is not a (valid) import. @@ -1291,24 +1458,6 @@ bool Preprocessor::LexAfterModuleImport(Token &Result) { SemiLoc = Suffix.back().getLocation(); } - // Under the standard C++ Modules, the dot is just part of the module name, - // and not a real hierarchy separator. Flatten such module names now. - // - // FIXME: Is this the right level to be performing this transformation? - std::string FlatModuleName; - if (getLangOpts().CPlusPlusModules) { - for (auto &Piece : NamedModuleImportPath) { - // If the FlatModuleName ends with colon, it implies it is a partition. - if (!FlatModuleName.empty() && FlatModuleName.back() != ':') - FlatModuleName += "."; - FlatModuleName += Piece.first->getName(); - } - SourceLocation FirstPathLoc = NamedModuleImportPath[0].second; - NamedModuleImportPath.clear(); - NamedModuleImportPath.push_back( - std::make_pair(getIdentifierInfo(FlatModuleName), FirstPathLoc)); - } - Module *Imported = nullptr; // We don't/shouldn't load the standard c++20 modules when preprocessing. if (getLangOpts().Modules && !isInImportingCXXNamedModules()) { @@ -1330,6 +1479,33 @@ bool Preprocessor::LexAfterModuleImport(Token &Result) { return true; } +/// Lex a token following the 'module' contextual keyword. +/// +/// [cpp.module]/p2: +/// The pp-tokens, if any, of a pp-module shall be of the form: +/// pp-module-name pp-module-partition[opt] pp-tokens[opt] +/// +/// where the pp-tokens (if any) shall not begin with a ( preprocessing token +/// and the grammar non-terminals are defined as: +/// pp-module-name: +/// pp-module-name-qualifierp[opt] identifier +/// pp-module-partition: +/// : pp-module-name-qualifier[opt] identifier +/// pp-module-name-qualifier: +/// identifier . +/// pp-module-name-qualifier identifier . +/// No identifier in the pp-module-name or pp-module-partition shall currently +/// be defined as an object-like macro. +/// +/// [cpp.module]/p3: +/// Any preprocessing tokens after the module preprocessing token in the module +/// directive are processed just as in normal text. +bool Preprocessor::LexAfterModuleDecl(Token &Result) { + // Figure out what kind of lexer we actually have. + recomputeCurLexerKind(); + return LexModuleName(Result, /*IsImport=*/false); +} + void Preprocessor::makeModuleVisible(Module *M, SourceLocation Loc) { CurSubmoduleState->VisibleModules.setVisible( M, Loc, [](Module *) {}, diff --git a/clang/lib/Lex/TokenConcatenation.cpp b/clang/lib/Lex/TokenConcatenation.cpp index 865879d..cdb6369 100644 --- a/clang/lib/Lex/TokenConcatenation.cpp +++ b/clang/lib/Lex/TokenConcatenation.cpp @@ -160,6 +160,13 @@ static char GetFirstChar(const Preprocessor &PP, const Token &Tok) { bool TokenConcatenation::AvoidConcat(const Token &PrevPrevTok, const Token &PrevTok, const Token &Tok) const { + // If previous token is a module name, we need avoid concat it with current + // token, otherwise, there will has an extra space between 'M' and ';' for the + // following code: + // + // import M; + if (PrevTok.is(tok::annot_module_name)) + return false; // Conservatively assume that every annotation token that has a printable // form requires whitespace. if (PrevTok.isAnnotation()) @@ -190,6 +197,9 @@ bool TokenConcatenation::AvoidConcat(const Token &PrevPrevTok, return true; ConcatInfo &= ~aci_avoid_equal; } + + if (Tok.is(tok::annot_module_name)) + return true; if (Tok.isAnnotation()) { // Modules annotation can show up when generated automatically for includes. assert(Tok.isOneOf(tok::annot_module_include, tok::annot_module_begin, diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp index 7ce9a9c..577527d 100644 --- a/clang/lib/Parse/ParseDecl.cpp +++ b/clang/lib/Parse/ParseDecl.cpp @@ -3958,7 +3958,13 @@ void Parser::ParseDeclarationSpecifiers( // We're done with the declaration-specifiers. goto DoneWithDeclSpec; - + case tok::annot_module_name: { + PP.EnterTokenStream( + Tok.getAnnotationValueAs<ModuleNameInfo *>()->getTokens(), + /*DisableMacroExpansion=*/true, /*IsReinject=*/false); + ConsumeAnyToken(); + [[fallthrough]]; + } // typedef-name case tok::kw___super: case tok::kw_decltype: diff --git a/clang/lib/Parse/Parser.cpp b/clang/lib/Parse/Parser.cpp index 5ebe71e..afb2e1e 100644 --- a/clang/lib/Parse/Parser.cpp +++ b/clang/lib/Parse/Parser.cpp @@ -2511,18 +2511,28 @@ Parser::ParseModuleDecl(Sema::ModuleImportState &ImportState) { } SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 2> Path; - if (ParseModuleName(ModuleLoc, Path, /*IsImport*/ false)) + if (Tok.isNot(tok::annot_module_name)) { + Diag(Tok, diag::err_module_expected_ident) << /*IsImport=*/false; + SkipUntil(tok::semi, StopBeforeMatch); + return nullptr; + } + + auto *Info = Tok.getAnnotationValueAs<ModuleNameInfo *>(); + ConsumeAnnotationToken(); + if (ParseModuleName(ModuleLoc, Info->getModuleName(), Path, + /*IsImport=*/false)) return nullptr; // Parse the optional module-partition. SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 2> Partition; - if (Tok.is(tok::colon)) { - SourceLocation ColonLoc = ConsumeToken(); + if (Info->hasPartitionName()) { + SourceLocation ColonLoc = Info->getColonToken().getLocation(); if (!getLangOpts().CPlusPlusModules) Diag(ColonLoc, diag::err_unsupported_module_partition) << SourceRange(ColonLoc, Partition.back().second); // Recover by ignoring the partition name. - else if (ParseModuleName(ModuleLoc, Partition, /*IsImport*/ false)) + else if (ParseModuleName(ModuleLoc, Info->getPartitionName(), Partition, + /*IsImport=*/false)) return nullptr; } @@ -2581,18 +2591,32 @@ Decl *Parser::ParseModuleImport(SourceLocation AtLoc, // This is a header import that the preprocessor mapped to a module import. HeaderUnit = reinterpret_cast<Module *>(Tok.getAnnotationValue()); ConsumeAnnotationToken(); - } else if (Tok.is(tok::colon)) { - SourceLocation ColonLoc = ConsumeToken(); - if (!getLangOpts().CPlusPlusModules) - Diag(ColonLoc, diag::err_unsupported_module_partition) - << SourceRange(ColonLoc, Path.back().second); - // Recover by leaving partition empty. - else if (ParseModuleName(ColonLoc, Path, /*IsImport*/ true)) - return nullptr; - else - IsPartition = true; } else { - if (ParseModuleName(ImportLoc, Path, /*IsImport*/ true)) + if (Tok.isNot(tok::annot_module_name)) { + if (Tok.is(tok::code_completion)) { + cutOffParsing(); + Actions.CodeCompletion().CodeCompleteModuleImport(ImportLoc, Path); + return nullptr; + } + Diag(Tok, diag::err_module_expected_ident) << /*IsImport=*/true; + SkipUntil(tok::semi, StopBeforeMatch); + return nullptr; + } + auto *Info = Tok.getAnnotationValueAs<ModuleNameInfo *>(); + ConsumeAnnotationToken(); + if (Info->hasPartitionName()) { + SourceLocation ColonLoc = Info->getColonToken().getLocation(); + if (!getLangOpts().CPlusPlusModules) + Diag(ColonLoc, diag::err_unsupported_module_partition) + << SourceRange(ColonLoc, Path.back().second); + // Recover by leaving partition empty. + else if (ParseModuleName(ColonLoc, Info->getPartitionName(), Path, + /*IsImport=*/true)) + return nullptr; + else + IsPartition = true; + } else if (ParseModuleName(ImportLoc, Info->getModuleName(), Path, + /*IsImport=*/true)) return nullptr; } @@ -2689,32 +2713,31 @@ Decl *Parser::ParseModuleImport(SourceLocation AtLoc, /// module-name-qualifier: /// module-name-qualifier[opt] identifier '.' bool Parser::ParseModuleName( - SourceLocation UseLoc, + SourceLocation UseLoc, ArrayRef<Token> ModuleName, SmallVectorImpl<std::pair<IdentifierInfo *, SourceLocation>> &Path, bool IsImport) { - // Parse the module path. - while (true) { - if (!Tok.is(tok::identifier)) { - if (Tok.is(tok::code_completion)) { - cutOffParsing(); - Actions.CodeCompletion().CodeCompleteModuleImport(UseLoc, Path); - return true; - } - - Diag(Tok, diag::err_module_expected_ident) << IsImport; - SkipUntil(tok::semi); + ModuleNameInfo::getModuleIdPath(ModuleName, Path); + // Eg. import A.B. + if (ModuleName.back().isNot(tok::identifier)) { + if (Tok.is(tok::code_completion)) { + cutOffParsing(); + Actions.CodeCompletion().CodeCompleteModuleImport(UseLoc, Path); return true; } + Diag(ModuleName.back(), diag::err_module_expected_ident) << IsImport; + SkipUntil(tok::semi, StopBeforeMatch); + return true; + } - // Record this part of the module path. - Path.push_back(std::make_pair(Tok.getIdentifierInfo(), Tok.getLocation())); - ConsumeToken(); - - if (Tok.isNot(tok::period)) - return false; - - ConsumeToken(); + // [cpp.module]/p2: where the pp-tokens (if any) shall not begin with a ( + // preprocessing token [...] + // + // Skip unitl ';' to recovery. + if (getLangOpts().CPlusPlusModules && Tok.is(tok::l_paren)) { + SkipUntil(tok::semi, StopBeforeMatch); + return true; } + return false; } /// Try recover parser when module annotation appears where it must not diff --git a/clang/lib/Sema/SemaTemplateInstantiate.cpp b/clang/lib/Sema/SemaTemplateInstantiate.cpp index 725b62db..a7bc674 100644 --- a/clang/lib/Sema/SemaTemplateInstantiate.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiate.cpp @@ -3426,16 +3426,11 @@ Sema::InstantiateClass(SourceLocation PointOfInstantiation, return true; llvm::TimeTraceScope TimeScope("InstantiateClass", [&]() { - llvm::TimeTraceMetadata M; - llvm::raw_string_ostream OS(M.Detail); + std::string Name; + llvm::raw_string_ostream OS(Name); Instantiation->getNameForDiagnostic(OS, getPrintingPolicy(), /*Qualified=*/true); - if (llvm::isTimeTraceVerbose()) { - auto Loc = SourceMgr.getExpansionLoc(Instantiation->getLocation()); - M.File = SourceMgr.getFilename(Loc); - M.Line = SourceMgr.getExpansionLineNumber(Loc); - } - return M; + return Name; }); Pattern = PatternDef; diff --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp index a12d2ef..97161fe 100644 --- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp @@ -4966,16 +4966,11 @@ void Sema::InstantiateFunctionDefinition(SourceLocation PointOfInstantiation, } llvm::TimeTraceScope TimeScope("InstantiateFunction", [&]() { - llvm::TimeTraceMetadata M; - llvm::raw_string_ostream OS(M.Detail); + std::string Name; + llvm::raw_string_ostream OS(Name); Function->getNameForDiagnostic(OS, getPrintingPolicy(), /*Qualified=*/true); - if (llvm::isTimeTraceVerbose()) { - auto Loc = SourceMgr.getExpansionLoc(Function->getLocation()); - M.File = SourceMgr.getFilename(Loc); - M.Line = SourceMgr.getExpansionLineNumber(Loc); - } - return M; + return Name; }); // If we're performing recursive template instantiation, create our own diff --git a/clang/test/AST/Interp/codegen.cpp b/clang/test/AST/Interp/codegen.cpp new file mode 100644 index 0000000..8a0d070d --- /dev/null +++ b/clang/test/AST/Interp/codegen.cpp @@ -0,0 +1,20 @@ +// RUN: %clang_cc1 -triple x86_64-linux -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-linux -emit-llvm -o - %s -fexperimental-new-constant-interpreter | FileCheck %s + + +int arr[2]; +// CHECK: @pastEnd = constant ptr getelementptr (i8, ptr @arr, i64 8) +int &pastEnd = arr[2]; + +// CHECK: @F = constant ptr @arr, align 8 +int &F = arr[0]; + +struct S { + int a; + float c[3]; +}; + +// CHECK: @s = global %struct.S zeroinitializer, align 4 +S s; +// CHECK: @sp = constant ptr getelementptr (i8, ptr @s, i64 16), align 8 +float &sp = s.c[3]; diff --git a/clang/test/AST/Interp/cxx11.cpp b/clang/test/AST/Interp/cxx11.cpp index 92ab9b6..cf2dfba 100644 --- a/clang/test/AST/Interp/cxx11.cpp +++ b/clang/test/AST/Interp/cxx11.cpp @@ -152,3 +152,11 @@ void A::f(SortOrder order) { return; } } + +namespace FinalLtorDiags { + template<int*> struct A {}; // both-note {{template parameter is declared here}} + int k; + int *q = &k; // both-note {{declared here}} + A<q> c; // both-error {{non-type template argument of type 'int *' is not a constant expression}} \ + // both-note {{read of non-constexpr variable 'q' is not allowed in a constant expression}} +} diff --git a/clang/test/AST/Interp/new-delete.cpp b/clang/test/AST/Interp/new-delete.cpp index cb46426..7a85def 100644 --- a/clang/test/AST/Interp/new-delete.cpp +++ b/clang/test/AST/Interp/new-delete.cpp @@ -560,4 +560,9 @@ constexpr int a() { // both-error {{never produces a constant expression}} } static_assert(a() == 1, ""); // both-error {{not an integral constant expression}} \ // both-note {{in call to 'a()'}} + + +static_assert(true ? *new int : 4, ""); // both-error {{expression is not an integral constant expression}} \ + // both-note {{read of uninitialized object is not allowed in a constant expression}} + #endif diff --git a/clang/test/CXX/cpp/cpp.module/p2.cppm b/clang/test/CXX/cpp/cpp.module/p2.cppm new file mode 100644 index 0000000..966a88c --- /dev/null +++ b/clang/test/CXX/cpp/cpp.module/p2.cppm @@ -0,0 +1,88 @@ +// RUN: rm -rf %t +// RUN: mkdir -p %t +// RUN: split-file %s %t + +// RUN: %clang_cc1 -std=c++20 %t/A.cppm -triple x86_64-linux-gnu -verify +// RUN: %clang_cc1 -std=c++20 %t/B.cppm -triple x86_64-linux-gnu -verify +// RUN: %clang_cc1 -std=c++20 %t/C.cppm -triple x86_64-linux-gnu -verify +// RUN: %clang_cc1 -std=c++20 %t/D.cppm -triple x86_64-linux-gnu -verify +// RUN: %clang_cc1 -std=c++20 %t/E.cppm -triple x86_64-linux-gnu -verify +// RUN: %clang_cc1 -std=c++20 %t/F.cppm -triple x86_64-linux-gnu -verify +// RUN: %clang_cc1 -std=c++20 %t/G.cppm -triple x86_64-linux-gnu -verify +// RUN: %clang_cc1 -std=c++20 %t/H.cppm -triple x86_64-linux-gnu -verify +// RUN: %clang_cc1 -std=c++20 %t/I.cppm -triple x86_64-linux-gnu -verify +// RUN: %clang_cc1 -std=c++20 %t/J.cppm -triple x86_64-linux-gnu -verify + +//--- version.h +#ifndef VERSION_H +#define VERSION_H + +#define VERSION libv5 +#define A a +#define B b +#define C c +#define FUNC_LIKE(X) function_like_##X +#define ATTRS [[]] +#define SEMICOLON ; + +#endif // VERSION_H + +//--- A.cppm +module; +#include "version.h" +export module VERSION; // expected-error {{the module name in a module declaration cannot contain an object-like macro 'VERSION'}} + +//--- B.cppm +module; +#include "version.h" +export module A.B; // expected-error {{the module name in a module declaration cannot contain an object-like macro 'A'}} \ + // expected-error {{the module name in a module declaration cannot contain an object-like macro 'B'}} + +//--- C.cppm +module; // expected-error {{missing 'module' declaration at end of global module fragment introduced here}} +#include "version.h" +export module A.FUNC_LIKE(foo):C; // expected-error {{the module name in a module declaration cannot contain an object-like macro 'A'}} \ + // expected-error {{unexpected '(' after the module name in a module declaration}} + +//--- D.cppm +module; // expected-error {{missing 'module' declaration at end of global module fragment introduced here}} +#include "version.h" +export module B.A.FUNC_LIKE(bar):C; // expected-error {{the module name in a module declaration cannot contain an object-like macro 'B'}} \ + // expected-error {{the module name in a module declaration cannot contain an object-like macro 'A'}} \ + // expected-error {{unexpected '(' after the module name in a module declaration}} + +//--- E.cppm +module; +#include "version.h" +export module a.FUNC_LIKE:c; // OK, FUNC_LIKE would not be treated as a macro name. +// expected-no-diagnostics + +//--- F.cppm +module; +#include "version.h" +export module a.FUNC_LIKE:c ATTRS; // OK, FUNC_LIKE would not be treated as a macro name. +// expected-no-diagnostics + +//--- G.cppm +module; // expected-error {{missing 'module' declaration at end of global module fragment introduced here}} +#include "version.h" +export module A.FUNC_LIKE(B c:C ATTRS // expected-error {{the module name in a module declaration cannot contain an object-like macro 'A'}} \ + // expected-error {{unexpected '(' after the module name in a module declaration}} + +//--- H.cppm +module; // expected-error {{missing 'module' declaration at end of global module fragment introduced here}} +#include "version.h" +export module A.FUNC_LIKE(B,). c:C ATTRS // expected-error {{the module name in a module declaration cannot contain an object-like macro 'A'}} \ + // expected-error {{unexpected '(' after the module name in a module declaration}} + +//--- I.cppm +module; // expected-error {{missing 'module' declaration at end of global module fragment introduced here}} +#include "version.h" +export module A.FUNC_LIKE(B,) c:C ATTRS // expected-error {{the module name in a module declaration cannot contain an object-like macro 'A'}} \ + // expected-error {{unexpected '(' after the module name in a module declaration}} + +//--- J.cppm +module; +#include "version.h" +export module unexpanded : unexpanded ATTRS SEMICOLON // OK, ATTRS and SEMICOLON can be expanded. +// expected-no-diagnostics diff --git a/clang/test/CXX/module/basic/basic.link/module-declaration.cpp b/clang/test/CXX/module/basic/basic.link/module-declaration.cpp index d71358cc..14bbc91 100644 --- a/clang/test/CXX/module/basic/basic.link/module-declaration.cpp +++ b/clang/test/CXX/module/basic/basic.link/module-declaration.cpp @@ -8,27 +8,19 @@ // RUN: %clang_cc1 -std=c++20 -emit-module-interface -fmodule-file=x=%t/x.pcm %t/x.y.cppm -o %t/x.y.pcm // // Module implementation for unknown and known module. (The former is ill-formed.) -// RUN: %clang_cc1 -std=c++20 -I%t -fmodule-file=x.y=%t/x.y.pcm -verify -x c++ %t/M.cpp \ -// RUN: -DTEST=1 -DEXPORT= -DMODULE_NAME=z -// RUN: %clang_cc1 -std=c++20 -I%t -fmodule-file=x=%t/x.pcm -fmodule-file=x.y=%t/x.y.pcm -verify -x c++ %t/M.cpp \ -// RUN: -DTEST=2 -DEXPORT= -DMODULE_NAME=x +// RUN: %clang_cc1 -std=c++20 -I%t -fmodule-file=x.y=%t/x.y.pcm -verify -x c++ %t/M1.cpp +// RUN: %clang_cc1 -std=c++20 -I%t -fmodule-file=x=%t/x.pcm -fmodule-file=x.y=%t/x.y.pcm -verify -x c++ %t/M2.cpp // // Module interface for unknown and known module. (The latter is ill-formed due to // redefinition.) -// RUN: %clang_cc1 -std=c++20 -I%t -fmodule-file=x.y=%t/x.y.pcm -verify %t/M.cpp \ -// RUN: -DTEST=3 -DEXPORT=export -DMODULE_NAME=z -// RUN: %clang_cc1 -std=c++20 -I%t -fmodule-file=x.y=%t/x.y.pcm -verify %t/M.cpp \ -// RUN: -DTEST=4 -DEXPORT=export -DMODULE_NAME=x +// RUN: %clang_cc1 -std=c++20 -I%t -fmodule-file=x.y=%t/x.y.pcm -verify %t/M3.cpp +// RUN: %clang_cc1 -std=c++20 -I%t -fmodule-file=x.y=%t/x.y.pcm -verify %t/M4.cpp // // Miscellaneous syntax. -// RUN: %clang_cc1 -std=c++20 -I%t -fmodule-file=x.y=%t/x.y.pcm -verify %t/M.cpp \ -// RUN: -DTEST=7 -DEXPORT=export -DMODULE_NAME='z elderberry' -// RUN: %clang_cc1 -std=c++20 -I%t -fmodule-file=x.y=%t/x.y.pcm -verify %t/M.cpp \ -// RUN: -DTEST=8 -DEXPORT=export -DMODULE_NAME='z [[]]' -// RUN: %clang_cc1 -std=c++20 -I%t -fmodule-file=x.y=%t/x.y.pcm -verify %t/M.cpp \ -// RUN: -DTEST=9 -DEXPORT=export -DMODULE_NAME='z [[fancy]]' -// RUN: %clang_cc1 -std=c++20 -I%t -fmodule-file=x.y=%t/x.y.pcm -verify %t/M.cpp \ -// RUN: -DTEST=10 -DEXPORT=export -DMODULE_NAME='z [[maybe_unused]]' +// RUN: %clang_cc1 -std=c++20 -I%t -fmodule-file=x.y=%t/x.y.pcm -verify %t/M5.cpp +// RUN: %clang_cc1 -std=c++20 -I%t -fmodule-file=x.y=%t/x.y.pcm -verify %t/M6.cpp +// RUN: %clang_cc1 -std=c++20 -I%t -fmodule-file=x.y=%t/x.y.pcm -verify %t/M7.cpp +// RUN: %clang_cc1 -std=c++20 -I%t -fmodule-file=x.y=%t/x.y.pcm -verify %t/M8.cpp //--- x.cppm export module x; @@ -38,17 +30,26 @@ int a, b; export module x.y; int c; -//--- M.cpp - -EXPORT module MODULE_NAME; -#if TEST == 7 -// expected-error@-2 {{expected ';'}} expected-error@-2 {{a type specifier is required}} -#elif TEST == 9 -// expected-warning@-4 {{unknown attribute 'fancy' ignored}} -#elif TEST == 10 -// expected-error-re@-6 {{'maybe_unused' attribute cannot be applied to a module{{$}}}} -#elif TEST == 1 -// expected-error@-8 {{module 'z' not found}} -#else -// expected-no-diagnostics -#endif +//--- M1.cpp +module z; // expected-error {{module 'z' not found}} + +//--- M2.cpp +module x; // expected-no-diagnostics + +//--- M3.cpp +export module z; // expected-no-diagnostics + +//--- M4.cpp +export module x; // expected-no-diagnostics + +//--- M5.cpp +export module z elderberry; // expected-error {{expected ';'}} expected-error {{a type specifier is required}} + +//--- M6.cpp +export module z [[]]; // expected-no-diagnostics + +//--- M7.cpp +export module z [[fancy]]; // expected-warning {{unknown attribute 'fancy' ignored}} + +//--- M8.cpp +export module z [[maybe_unused]]; // expected-error-re {{'maybe_unused' attribute cannot be applied to a module{{$}}}} diff --git a/clang/test/CXX/module/dcl.dcl/dcl.module/dcl.module.import/p1.cppm b/clang/test/CXX/module/dcl.dcl/dcl.module/dcl.module.import/p1.cppm index 873e4c0..ecad4db 100644 --- a/clang/test/CXX/module/dcl.dcl/dcl.module/dcl.module.import/p1.cppm +++ b/clang/test/CXX/module/dcl.dcl/dcl.module/dcl.module.import/p1.cppm @@ -6,10 +6,12 @@ // RUN: %clang_cc1 -std=c++20 -emit-module-interface -fmodule-file=x=%t/x.pcm %t/x.y.cppm -o %t/x.y.pcm // RUN: %clang_cc1 -std=c++20 -emit-module-interface %t/a.b.cppm -o %t/a.b.pcm // -// RUN: %clang_cc1 -std=c++20 -I%t -fmodule-file=x.y=%t/x.y.pcm -fmodule-file=x=%t/x.pcm -verify %t/test.cpp \ -// RUN: -DMODULE_NAME=z -DINTERFACE +// RUN: %clang_cc1 -std=c++20 -I%t -fmodule-file=x.y=%t/x.y.pcm -fmodule-file=x=%t/x.pcm -verify %t/test-interface.cpp \ +// RUN: -DINTERFACE // RUN: %clang_cc1 -std=c++20 -I%t -fmodule-file=x.y=%t/x.y.pcm -fmodule-file=x=%t/x.pcm \ -// RUN: -fmodule-file=a.b=%t/a.b.pcm -verify %t/test.cpp -DMODULE_NAME=a.b +// RUN: -fmodule-file=a.b=%t/a.b.pcm -verify %t/test.cpp +// RUN: %clang_cc1 -std=c++20 -I%t -fmodule-file=x.y=%t/x.y.pcm -fmodule-file=x=%t/x.pcm \ +// RUN: -verify %t/test-module-not-found.cpp // RUN: %clang_cc1 -std=c++20 -I%t -fmodule-file=x.y=%t/x.y.pcm -fmodule-file=x=%t/x.pcm -verify %t/test.x.cpp //--- x.cppm @@ -34,11 +36,8 @@ int use_2 = b; // ok int use_3 = c; // expected-error {{use of undeclared identifier 'c'}} //--- test.cpp -#ifdef INTERFACE -export module MODULE_NAME; -#else -module MODULE_NAME; -#endif +module; +module a.b; import x; @@ -51,6 +50,28 @@ import x.y; import x.; // expected-error {{expected a module name after 'import'}} import .x; // expected-error {{expected a module name after 'import'}} -import blarg; // expected-error {{module 'blarg' not found}} +int use_4 = c; // ok + + +//--- test-interface.cpp +module; +export module z; + +import x; + +import x [[]]; +import x [[foo]]; // expected-warning {{unknown attribute 'foo' ignored}} +import x [[noreturn]]; // expected-error {{'noreturn' attribute cannot be applied to a module import}} +import x [[blarg::noreturn]]; // expected-warning {{unknown attribute 'noreturn' ignored}} + +import x.y; +import x.; // expected-error {{expected a module name after 'import'}} +import .x; // expected-error {{expected a module name after 'import'}} int use_4 = c; // ok + +//--- test-module-not-found.cpp +module; + +import blarg; // expected-error {{module 'blarg' not found}} + diff --git a/clang/test/CodeGen/attr-target-x86.c b/clang/test/CodeGen/attr-target-x86.c index 3c2b511..b1ae667 100644 --- a/clang/test/CodeGen/attr-target-x86.c +++ b/clang/test/CodeGen/attr-target-x86.c @@ -64,7 +64,7 @@ void __attribute__((target("avx10.1-512"))) avx10_1_512(void) {} // CHECK: #4 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-avx,-avx10.1-256,-avx10.1-512,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512f,-avx512fp16,-avx512ifma,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint16,-avxvnniint8,-f16c,-fma,-fma4,-sha512,-sm3,-sm4,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-xop" "tune-cpu"="i686" // CHECK: #5 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cmov,+crc32,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-aes,-avx10.1-256,-avx10.1-512,-vaes" // CHECK-NOT: tune-cpu -// CHECK: #6 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-3dnow,-3dnowa,-mmx" +// CHECK: #6 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-mmx" // CHECK: #7 = {{.*}}"target-cpu"="lakemont" "target-features"="+cx8,+mmx" // CHECK-NOT: tune-cpu // CHECK: #8 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87" "tune-cpu"="sandybridge" diff --git a/clang/test/Driver/ftime-trace-sections.cpp b/clang/test/Driver/ftime-trace-sections.cpp index da7109b..0c16052 100644 --- a/clang/test/Driver/ftime-trace-sections.cpp +++ b/clang/test/Driver/ftime-trace-sections.cpp @@ -1,5 +1,5 @@ // RUN: rm -rf %t && mkdir %t && cd %t -// RUN: %clangxx -S -ftime-trace -ftime-trace-granularity=0 -ftime-trace-verbose -o out %s +// RUN: %clangxx -S -ftime-trace -ftime-trace-granularity=0 -o out %s // RUN: %python %S/ftime-trace-sections.py < out.json template <typename T> diff --git a/clang/test/Driver/ftime-trace.cpp b/clang/test/Driver/ftime-trace.cpp index 60c5885..5fe63de 100644 --- a/clang/test/Driver/ftime-trace.cpp +++ b/clang/test/Driver/ftime-trace.cpp @@ -1,18 +1,18 @@ // RUN: rm -rf %t && mkdir -p %t && cd %t -// RUN: %clangxx -S -no-canonical-prefixes -ftime-trace -ftime-trace-granularity=0 -ftime-trace-verbose -o out %s +// RUN: %clangxx -S -no-canonical-prefixes -ftime-trace -ftime-trace-granularity=0 -o out %s // RUN: cat out.json \ // RUN: | %python -c 'import json, sys; json.dump(json.loads(sys.stdin.read()), sys.stdout, sort_keys=True, indent=2)' \ // RUN: | FileCheck %s -// RUN: %clangxx -S -no-canonical-prefixes -ftime-trace=new-name.json -ftime-trace-granularity=0 -ftime-trace-verbose -o out %s +// RUN: %clangxx -S -no-canonical-prefixes -ftime-trace=new-name.json -ftime-trace-granularity=0 -o out %s // RUN: cat new-name.json \ // RUN: | %python -c 'import json, sys; json.dump(json.loads(sys.stdin.read()), sys.stdout, sort_keys=True, indent=2)' \ // RUN: | FileCheck %s // RUN: mkdir dir1 dir2 -// RUN: %clangxx -S -no-canonical-prefixes -ftime-trace=dir1 -ftime-trace-granularity=0 -ftime-trace-verbose -o out %s +// RUN: %clangxx -S -no-canonical-prefixes -ftime-trace=dir1 -ftime-trace-granularity=0 -o out %s // RUN: cat dir1/out.json \ // RUN: | %python -c 'import json, sys; json.dump(json.loads(sys.stdin.read()), sys.stdout, sort_keys=True, indent=2)' \ // RUN: | FileCheck %s -// RUN: %clangxx -S -no-canonical-prefixes -ftime-trace=dir2/ -ftime-trace-granularity=0 -ftime-trace-verbose -o out %s +// RUN: %clangxx -S -no-canonical-prefixes -ftime-trace=dir2/ -ftime-trace-granularity=0 -o out %s // RUN: cat dir2/out.json \ // RUN: | %python -c 'import json, sys; json.dump(json.loads(sys.stdin.read()), sys.stdout, sort_keys=True, indent=2)' \ // RUN: | FileCheck %s @@ -34,33 +34,32 @@ // RUN: mkdir d e f && cp %s d/a.cpp && touch d/b.c /// TODO: Support -fno-integrated-as. -// RUN: %clang -### -c -ftime-trace -ftime-trace-granularity=0 -ftime-trace-verbose -fintegrated-as d/a.cpp -o e/a.o 2>&1 | FileCheck %s --check-prefix=COMPILE1 -// COMPILE1: -cc1{{.*}} "-ftime-trace=e/a.json" "-ftime-trace-granularity=0" "-ftime-trace-verbose" +// RUN: %clang -### -c -ftime-trace -ftime-trace-granularity=0 -fintegrated-as d/a.cpp -o e/a.o 2>&1 | FileCheck %s --check-prefix=COMPILE1 +// COMPILE1: -cc1{{.*}} "-ftime-trace=e/a.json" "-ftime-trace-granularity=0" -// RUN: %clang -### -c -ftime-trace -ftime-trace-granularity=0 -ftime-trace-verbose d/a.cpp d/b.c -dumpdir f/ 2>&1 | FileCheck %s --check-prefix=COMPILE2 -// COMPILE2: -cc1{{.*}} "-ftime-trace=f/a.json" "-ftime-trace-granularity=0" "-ftime-trace-verbose" -// COMPILE2: -cc1{{.*}} "-ftime-trace=f/b.json" "-ftime-trace-granularity=0" "-ftime-trace-verbose" +// RUN: %clang -### -c -ftime-trace -ftime-trace-granularity=0 d/a.cpp d/b.c -dumpdir f/ 2>&1 | FileCheck %s --check-prefix=COMPILE2 +// COMPILE2: -cc1{{.*}} "-ftime-trace=f/a.json" "-ftime-trace-granularity=0" +// COMPILE2: -cc1{{.*}} "-ftime-trace=f/b.json" "-ftime-trace-granularity=0" /// -o specifies the link output. Create ${output}-${basename}.json. -// RUN: %clang -### -ftime-trace -ftime-trace-granularity=0 -ftime-trace-verbose d/a.cpp d/b.c -o e/x 2>&1 | FileCheck %s --check-prefix=LINK1 -// LINK1: -cc1{{.*}} "-ftime-trace=e/x-a.json" "-ftime-trace-granularity=0" "-ftime-trace-verbose" -// LINK1: -cc1{{.*}} "-ftime-trace=e/x-b.json" "-ftime-trace-granularity=0" "-ftime-trace-verbose" +// RUN: %clang -### -ftime-trace -ftime-trace-granularity=0 d/a.cpp d/b.c -o e/x 2>&1 | FileCheck %s --check-prefix=LINK1 +// LINK1: -cc1{{.*}} "-ftime-trace=e/x-a.json" "-ftime-trace-granularity=0" +// LINK1: -cc1{{.*}} "-ftime-trace=e/x-b.json" "-ftime-trace-granularity=0" /// -dumpdir is f/g, not ending with a path separator. We create f/g${basename}.json. -// RUN: %clang -### -ftime-trace -ftime-trace-granularity=0 -ftime-trace-verbose d/a.cpp d/b.c -o e/x -dumpdir f/g 2>&1 | FileCheck %s --check-prefix=LINK2 -// LINK2: -cc1{{.*}} "-ftime-trace=f/ga.json" "-ftime-trace-granularity=0" "-ftime-trace-verbose" -// LINK2: -cc1{{.*}} "-ftime-trace=f/gb.json" "-ftime-trace-granularity=0" "-ftime-trace-verbose" +// RUN: %clang -### -ftime-trace -ftime-trace-granularity=0 d/a.cpp d/b.c -o e/x -dumpdir f/g 2>&1 | FileCheck %s --check-prefix=LINK2 +// LINK2: -cc1{{.*}} "-ftime-trace=f/ga.json" "-ftime-trace-granularity=0" +// LINK2: -cc1{{.*}} "-ftime-trace=f/gb.json" "-ftime-trace-granularity=0" -// RUN: %clang -### -ftime-trace=e -ftime-trace-granularity=0 -ftime-trace-verbose d/a.cpp d/b.c -o f/x -dumpdir f/ 2>&1 | FileCheck %s --check-prefix=LINK3 -// LINK3: -cc1{{.*}} "-ftime-trace=e{{/|\\\\}}a-{{[^.]*}}.json" "-ftime-trace-granularity=0" "-ftime-trace-verbose" -// LINK3: -cc1{{.*}} "-ftime-trace=e{{/|\\\\}}b-{{[^.]*}}.json" "-ftime-trace-granularity=0" "-ftime-trace-verbose" +// RUN: %clang -### -ftime-trace=e -ftime-trace-granularity=0 d/a.cpp d/b.c -o f/x -dumpdir f/ 2>&1 | FileCheck %s --check-prefix=LINK3 +// LINK3: -cc1{{.*}} "-ftime-trace=e{{/|\\\\}}a-{{[^.]*}}.json" "-ftime-trace-granularity=0" +// LINK3: -cc1{{.*}} "-ftime-trace=e{{/|\\\\}}b-{{[^.]*}}.json" "-ftime-trace-granularity=0" -// RUN: %clang -### -ftime-trace -ftime-trace=e -ftime-trace-granularity=1 -ftime-trace-verbose -xassembler d/a.cpp 2>&1 | \ +// RUN: %clang -### -ftime-trace -ftime-trace=e -ftime-trace-granularity=1 -xassembler d/a.cpp 2>&1 | \ // RUN: FileCheck %s --check-prefix=UNUSED // UNUSED: warning: argument unused during compilation: '-ftime-trace' // UNUSED-NEXT: warning: argument unused during compilation: '-ftime-trace=e' // UNUSED-NEXT: warning: argument unused during compilation: '-ftime-trace-granularity=1' -// UNUSED-NEXT: warning: argument unused during compilation: '-ftime-trace-verbose' // UNUSED-NOT: warning: template <typename T> diff --git a/clang/test/Driver/gpu-libc-headers.c b/clang/test/Driver/gpu-libc-headers.c index 32a5edb..53c0168 100644 --- a/clang/test/Driver/gpu-libc-headers.c +++ b/clang/test/Driver/gpu-libc-headers.c @@ -4,15 +4,15 @@ // RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp=libomp --sysroot=./ \ // RUN: -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target=nvptx64-nvidia-cuda --offload-arch=sm_70 \ // RUN: -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-HEADERS -// CHECK-HEADERS: "-cc1"{{.*}}"-internal-isystem" "{{.*}}include{{.*}}llvm_libc_wrappers"{{.*}}"-isysroot" "./" -// CHECK-HEADERS: "-cc1"{{.*}}"-internal-isystem" "{{.*}}include{{.*}}llvm_libc_wrappers"{{.*}}"-isysroot" "./" +// CHECK-HEADERS: "-cc1"{{.*}}"-isysroot" "./"{{.*}}"-internal-isystem" "{{.*}}include{{.*}}llvm_libc_wrappers" +// CHECK-HEADERS: "-cc1"{{.*}}"-isysroot" "./"{{.*}}"-internal-isystem" "{{.*}}include{{.*}}llvm_libc_wrappers" // RUN: %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx90a --sysroot=./ \ // RUN: -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-HEADERS-AMDGPU // RUN: %clang -### --target=nvptx64-nvidia-cuda -march=sm_89 --sysroot=./ \ // RUN: -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-HEADERS-NVPTX -// CHECK-HEADERS-AMDGPU: "-cc1"{{.*}}"-internal-isystem" "{{.*}}include{{.*}}amdgcn-amd-amdhsa"{{.*}}"-isysroot" "./" -// CHECK-HEADERS-NVPTX: "-cc1"{{.*}}"-internal-isystem" "{{.*}}include{{.*}}nvptx64-nvidia-cuda"{{.*}}"-isysroot" "./" +// CHECK-HEADERS-AMDGPU: "-cc1"{{.*}}"-isysroot" "./"{{.*}}"-internal-isystem" "{{.*}}include{{.*}}amdgcn-amd-amdhsa" +// CHECK-HEADERS-NVPTX: "-cc1"{{.*}}"-isysroot" "./"{{.*}}"-internal-isystem" "{{.*}}include{{.*}}nvptx64-nvidia-cuda" // RUN: %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx1030 -nogpulib \ // RUN: -nogpuinc %s 2>&1 | FileCheck %s --check-prefix=CHECK-HEADERS-DISABLED diff --git a/clang/test/SemaCXX/modules.cppm b/clang/test/SemaCXX/modules.cppm index 41204be..267417b 100644 --- a/clang/test/SemaCXX/modules.cppm +++ b/clang/test/SemaCXX/modules.cppm @@ -1,19 +1,17 @@ -// RUN: %clang_cc1 -std=c++20 -emit-module-interface %s -o %t.0.pcm -verify -DTEST=0 -// RUN: %clang_cc1 -std=c++20 -emit-module-interface %s -o %t.1.pcm -verify -DTEST=1 -// RUN: %clang_cc1 -std=c++20 -emit-module-interface %s -fmodule-file=foo=%t.0.pcm -o %t.2.pcm -verify -DTEST=2 -// RUN: %clang_cc1 -std=c++20 -emit-module-interface %s -fmodule-file=foo=%t.0.pcm -o %t.3.pcm -verify -Dfoo=bar -DTEST=3 +// RUN: rm -rf %t +// RUN: mkdir -p %t +// RUN: split-file %s %t -#if TEST == 0 || TEST == 2 -// expected-no-diagnostics -#endif +// RUN: %clang_cc1 -std=c++20 -emit-module-interface %t/A.cppm -o %t.0.pcm -verify +// RUN: %clang_cc1 -std=c++20 -emit-module-interface %t/B.cppm -o %t.1.pcm -verify +// RUN: %clang_cc1 -std=c++20 -emit-module-interface %t/C.cppm -fmodule-file=foo=%t.0.pcm -o %t.2.pcm -verify +// RUN: %clang_cc1 -std=c++20 -emit-module-interface %t/D.cppm -fmodule-file=foo=%t.0.pcm -o %t.3.pcm -verify +// RUN: %clang_cc1 -std=c++20 -emit-module-interface %t/E.cppm -fmodule-file=foo=%t.0.pcm -o %t.3.pcm -verify -Dfoo=bar +//--- A.cppm export module foo; - static int m; - int n; - -#if TEST == 0 export { int a; int b; @@ -27,7 +25,43 @@ export void f() {} export struct T { } t; -#elif TEST == 3 +// expected-no-diagnostics + +//--- B.cppm +export module foo; +static int m; +int n; +struct S { + export int n; // expected-error {{expected member name or ';'}} + export static int n; // expected-error {{expected member name or ';'}} +}; + +// FIXME: Exports of declarations without external linkage are disallowed. +// Exports of declarations with non-external-linkage types are disallowed. + +// Cannot export within another export. This isn't precisely covered by the +// language rules right now, but (per personal correspondence between zygoloid +// and gdr) is the intent. +export { // expected-note {{export block begins here}} + extern "C++" { + namespace NestedExport { + export { // expected-error {{export declaration appears within another export declaration}} + int q; + } + } // namespace NestedExport + } +} + +//--- C.cppm +export module foo; +static int m; +int n; +// expected-no-diagnostics + +//--- D.cppm +export module foo; +static int m; +int n; int use_a = a; // expected-error {{use of undeclared identifier 'a'}} #undef foo @@ -46,29 +80,12 @@ int use_n = n; // FIXME: this should not be visible, because it is not exported extern int n; static_assert(&n != p); // expected-error{{use of undeclared identifier 'p'}} -#endif -#if TEST == 1 -struct S { - export int n; // expected-error {{expected member name or ';'}} - export static int n; // expected-error {{expected member name or ';'}} -}; -#endif - -// FIXME: Exports of declarations without external linkage are disallowed. -// Exports of declarations with non-external-linkage types are disallowed. +//--- E.cppm +export module foo; // expected-error {{the module name in a module declaration cannot contain an object-like macro 'foo'}} +static int m; +int n; +int use_a = a; // expected-error {{use of undeclared identifier 'a'}} -// Cannot export within another export. This isn't precisely covered by the -// language rules right now, but (per personal correspondence between zygoloid -// and gdr) is the intent. -#if TEST == 1 -export { // expected-note {{export block begins here}} - extern "C++" { - namespace NestedExport { - export { // expected-error {{export declaration appears within another export declaration}} - int q; - } - } // namespace NestedExport - } -} -#endif +#undef foo +import foo; // expected-error {{imports must immediately follow the module declaration}} diff --git a/clang/tools/driver/cc1_main.cpp b/clang/tools/driver/cc1_main.cpp index f5e5fad..c2ccb47 100644 --- a/clang/tools/driver/cc1_main.cpp +++ b/clang/tools/driver/cc1_main.cpp @@ -241,8 +241,7 @@ int cc1_main(ArrayRef<const char *> Argv, const char *Argv0, void *MainAddr) { if (!Clang->getFrontendOpts().TimeTracePath.empty()) { llvm::timeTraceProfilerInitialize( - Clang->getFrontendOpts().TimeTraceGranularity, Argv0, - Clang->getFrontendOpts().TimeTraceVerbose); + Clang->getFrontendOpts().TimeTraceGranularity, Argv0); } // --print-supported-cpus takes priority over the actual compilation. if (Clang->getFrontendOpts().PrintSupportedCPUs) diff --git a/clang/tools/driver/cc1as_main.cpp b/clang/tools/driver/cc1as_main.cpp index ec93f09..15d1e0c 100644 --- a/clang/tools/driver/cc1as_main.cpp +++ b/clang/tools/driver/cc1as_main.cpp @@ -531,6 +531,9 @@ static bool ExecuteAssemblerImpl(AssemblerInvocation &Opts, MCOptions.MCNoWarn = Opts.NoWarn; MCOptions.MCFatalWarnings = Opts.FatalWarnings; MCOptions.MCNoTypeCheck = Opts.NoTypeCheck; + MCOptions.ShowMCInst = Opts.ShowInst; + MCOptions.AsmVerbose = true; + MCOptions.MCUseDwarfDirectory = MCTargetOptions::EnableDwarfDirectory; MCOptions.ABIName = Opts.TargetABI; // FIXME: There is a bit of code duplication with addPassesToEmitFile. @@ -571,9 +574,7 @@ static bool ExecuteAssemblerImpl(AssemblerInvocation &Opts, Triple T(Opts.Triple); Str.reset(TheTarget->createMCObjectStreamer( - T, Ctx, std::move(MAB), std::move(OW), std::move(CE), *STI, - Opts.RelaxAll, Opts.IncrementalLinkerCompatible, - /*DWARFMustBeAtTheEnd*/ true)); + T, Ctx, std::move(MAB), std::move(OW), std::move(CE), *STI)); Str.get()->initSections(Opts.NoExecStack, *STI); } diff --git a/clang/unittests/AST/Interp/toAPValue.cpp b/clang/unittests/AST/Interp/toAPValue.cpp index d6879d6..5ec607a 100644 --- a/clang/unittests/AST/Interp/toAPValue.cpp +++ b/clang/unittests/AST/Interp/toAPValue.cpp @@ -27,6 +27,7 @@ TEST(ToAPValue, Pointers) { auto AST = tooling::buildASTFromCodeWithArgs( Code, {"-fexperimental-new-constant-interpreter"}); + auto &ASTCtx = AST->getASTContext(); auto &Ctx = AST->getASTContext().getInterpContext(); Program &Prog = Ctx.getProgram(); @@ -47,7 +48,7 @@ TEST(ToAPValue, Pointers) { const Pointer &GP = getGlobalPtr("b"); const Pointer &P = GP.deref<Pointer>(); ASSERT_TRUE(P.isLive()); - APValue A = P.toAPValue(); + APValue A = P.toAPValue(ASTCtx); ASSERT_TRUE(A.isLValue()); ASSERT_TRUE(A.hasLValuePath()); const auto &Path = A.getLValuePath(); @@ -62,7 +63,7 @@ TEST(ToAPValue, Pointers) { const Pointer &GP = getGlobalPtr("p"); const Pointer &P = GP.deref<Pointer>(); ASSERT_TRUE(P.isIntegralPointer()); - APValue A = P.toAPValue(); + APValue A = P.toAPValue(ASTCtx); ASSERT_TRUE(A.isLValue()); ASSERT_TRUE(A.getLValueBase().isNull()); APSInt I; @@ -77,7 +78,7 @@ TEST(ToAPValue, Pointers) { const Pointer &GP = getGlobalPtr("nullp"); const Pointer &P = GP.deref<Pointer>(); ASSERT_TRUE(P.isIntegralPointer()); - APValue A = P.toAPValue(); + APValue A = P.toAPValue(ASTCtx); ASSERT_TRUE(A.isLValue()); ASSERT_TRUE(A.getLValueBase().isNull()); ASSERT_TRUE(A.isNullPointer()); @@ -96,6 +97,7 @@ TEST(ToAPValue, FunctionPointers) { auto AST = tooling::buildASTFromCodeWithArgs( Code, {"-fexperimental-new-constant-interpreter"}); + auto &ASTCtx = AST->getASTContext(); auto &Ctx = AST->getASTContext().getInterpContext(); Program &Prog = Ctx.getProgram(); @@ -117,7 +119,7 @@ TEST(ToAPValue, FunctionPointers) { const Pointer &GP = getGlobalPtr("func"); const FunctionPointer &FP = GP.deref<FunctionPointer>(); ASSERT_FALSE(FP.isZero()); - APValue A = FP.toAPValue(); + APValue A = FP.toAPValue(ASTCtx); ASSERT_TRUE(A.hasValue()); ASSERT_TRUE(A.isLValue()); ASSERT_TRUE(A.hasLValuePath()); @@ -132,7 +134,7 @@ TEST(ToAPValue, FunctionPointers) { ASSERT_NE(D, nullptr); const Pointer &GP = getGlobalPtr("nullp"); const auto &P = GP.deref<FunctionPointer>(); - APValue A = P.toAPValue(); + APValue A = P.toAPValue(ASTCtx); ASSERT_TRUE(A.isLValue()); ASSERT_TRUE(A.getLValueBase().isNull()); ASSERT_TRUE(A.isNullPointer()); @@ -151,6 +153,7 @@ TEST(ToAPValue, FunctionPointersC) { auto AST = tooling::buildASTFromCodeWithArgs( Code, {"-x", "c", "-fexperimental-new-constant-interpreter"}); + auto &ASTCtx = AST->getASTContext(); auto &Ctx = AST->getASTContext().getInterpContext(); Program &Prog = Ctx.getProgram(); @@ -174,7 +177,7 @@ TEST(ToAPValue, FunctionPointersC) { ASSERT_TRUE(GP.isLive()); const FunctionPointer &FP = GP.deref<FunctionPointer>(); ASSERT_FALSE(FP.isZero()); - APValue A = FP.toAPValue(); + APValue A = FP.toAPValue(ASTCtx); ASSERT_TRUE(A.hasValue()); ASSERT_TRUE(A.isLValue()); const auto &Path = A.getLValuePath(); @@ -197,6 +200,7 @@ TEST(ToAPValue, MemberPointers) { auto AST = tooling::buildASTFromCodeWithArgs( Code, {"-fexperimental-new-constant-interpreter"}); + auto &ASTCtx = AST->getASTContext(); auto &Ctx = AST->getASTContext().getInterpContext(); Program &Prog = Ctx.getProgram(); @@ -218,7 +222,7 @@ TEST(ToAPValue, MemberPointers) { const Pointer &GP = getGlobalPtr("pm"); ASSERT_TRUE(GP.isLive()); const MemberPointer &FP = GP.deref<MemberPointer>(); - APValue A = FP.toAPValue(); + APValue A = FP.toAPValue(ASTCtx); ASSERT_EQ(A.getMemberPointerDecl(), getDecl("m")); ASSERT_EQ(A.getKind(), APValue::MemberPointer); } @@ -228,7 +232,7 @@ TEST(ToAPValue, MemberPointers) { ASSERT_TRUE(GP.isLive()); const MemberPointer &NP = GP.deref<MemberPointer>(); ASSERT_TRUE(NP.isZero()); - APValue A = NP.toAPValue(); + APValue A = NP.toAPValue(ASTCtx); ASSERT_EQ(A.getKind(), APValue::MemberPointer); } } diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp index c5e8aa7..f70424c 100644 --- a/clang/unittests/Format/TokenAnnotatorTest.cpp +++ b/clang/unittests/Format/TokenAnnotatorTest.cpp @@ -75,6 +75,26 @@ TEST_F(TokenAnnotatorTest, UnderstandsUsesOfStarAndAmp) { EXPECT_TOKEN(Tokens[10], tok::r_paren, TT_TypeDeclarationParen); EXPECT_TOKEN(Tokens[11], tok::star, TT_PointerOrReference); + Tokens = annotate("#define FOO bar(a * b)"); + ASSERT_EQ(Tokens.size(), 10u) << Tokens; + EXPECT_TOKEN(Tokens[6], tok::star, TT_BinaryOperator); + + Tokens = annotate("#define FOO foo.bar(a & b)"); + ASSERT_EQ(Tokens.size(), 12u) << Tokens; + EXPECT_TOKEN(Tokens[8], tok::amp, TT_BinaryOperator); + + Tokens = annotate("#define FOO foo::bar(a && b)"); + ASSERT_EQ(Tokens.size(), 12u) << Tokens; + EXPECT_TOKEN(Tokens[8], tok::ampamp, TT_BinaryOperator); + + Tokens = annotate("#define FOO foo bar(a *b)"); + ASSERT_EQ(Tokens.size(), 11u) << Tokens; + EXPECT_TOKEN(Tokens[7], tok::star, TT_PointerOrReference); + + Tokens = annotate("#define FOO void foo::bar(a &b)"); + ASSERT_EQ(Tokens.size(), 13u) << Tokens; + EXPECT_TOKEN(Tokens[9], tok::amp, TT_PointerOrReference); + Tokens = annotate("void f() {\n" " while (p < a && *p == 'a')\n" " p++;\n" diff --git a/clang/unittests/Support/TimeProfilerTest.cpp b/clang/unittests/Support/TimeProfilerTest.cpp index 56d880c..5f3950ff 100644 --- a/clang/unittests/Support/TimeProfilerTest.cpp +++ b/clang/unittests/Support/TimeProfilerTest.cpp @@ -10,15 +10,11 @@ #include "clang/Frontend/FrontendActions.h" #include "clang/Lex/PreprocessorOptions.h" -#include "llvm/ADT/StringMap.h" #include "llvm/Support/JSON.h" -#include "llvm/Support/Path.h" #include "llvm/Support/TimeProfiler.h" -#include "llvm/Support/VirtualFileSystem.h" #include <stack> #include "gtest/gtest.h" -#include <tuple> using namespace clang; using namespace llvm; @@ -27,8 +23,7 @@ namespace { // Should be called before testing. void setupProfiler() { - timeTraceProfilerInitialize(/*TimeTraceGranularity=*/0, "test", - /*TimeTraceVerbose=*/true); + timeTraceProfilerInitialize(/*TimeTraceGranularity=*/0, "test"); } // Should be called after `compileFromString()`. @@ -43,24 +38,14 @@ std::string teardownProfiler() { // Returns true if code compiles successfully. // We only parse AST here. This is enough for constexpr evaluation. -bool compileFromString(StringRef Code, StringRef Standard, StringRef File, - llvm::StringMap<std::string> Headers = {}) { +bool compileFromString(StringRef Code, StringRef Standard, StringRef FileName) { CompilerInstance Compiler; Compiler.createDiagnostics(); - llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> FS( - new llvm::vfs::InMemoryFileSystem()); - FS->addFile(File, 0, MemoryBuffer::getMemBuffer(Code)); - for (const auto &Header : Headers) { - FS->addFile(Header.getKey(), 0, - MemoryBuffer::getMemBuffer(Header.getValue())); - } - llvm::IntrusiveRefCntPtr<FileManager> Files( - new FileManager(FileSystemOptions(), FS)); - Compiler.setFileManager(Files.get()); - auto Invocation = std::make_shared<CompilerInvocation>(); - std::vector<const char *> Args = {Standard.data(), File.data()}; + Invocation->getPreprocessorOpts().addRemappedFile( + FileName, MemoryBuffer::getMemBuffer(Code).release()); + const char *Args[] = {Standard.data(), FileName.data()}; CompilerInvocation::CreateFromArgs(*Invocation, Args, Compiler.getDiagnostics()); Compiler.setInvocation(std::move(Invocation)); @@ -75,28 +60,13 @@ bool compileFromString(StringRef Code, StringRef Standard, StringRef File, return Compiler.ExecuteAction(Action); } -std::string GetMetadata(json::Object *Event) { - std::string Metadata; - llvm::raw_string_ostream OS(Metadata); - if (json::Object *Args = Event->getObject("args")) { - if (auto Detail = Args->getString("detail")) - OS << Detail; - // Use only filename to not include os-specific path separators. - if (auto File = Args->getString("file")) - OS << ", " << llvm::sys::path::filename(*File); - if (auto Line = Args->getInteger("line")) - OS << ":" << *Line; - } - return Metadata; -} - // Returns pretty-printed trace graph. std::string buildTraceGraph(StringRef Json) { struct EventRecord { int64_t TimestampBegin; int64_t TimestampEnd; - std::string Name; - std::string Metadata; + StringRef Name; + StringRef Detail; }; std::vector<EventRecord> Events; @@ -111,13 +81,10 @@ std::string buildTraceGraph(StringRef Json) { int64_t TimestampBegin = TraceEventObj->getInteger("ts").value_or(0); int64_t TimestampEnd = TimestampBegin + TraceEventObj->getInteger("dur").value_or(0); - std::string Name = TraceEventObj->getString("name").value_or("").str(); - std::string Metadata = GetMetadata(TraceEventObj); - - // Source events are asynchronous events and may not perfectly nest the - // synchronous events. Skip testing them. - if (Name == "Source") - continue; + StringRef Name = TraceEventObj->getString("name").value_or(""); + StringRef Detail = ""; + if (json::Object *Args = TraceEventObj->getObject("args")) + Detail = Args->getString("detail").value_or(""); // This is a "summary" event, like "Total PerformPendingInstantiations", // skip it @@ -125,7 +92,7 @@ std::string buildTraceGraph(StringRef Json) { continue; Events.emplace_back( - EventRecord{TimestampBegin, TimestampEnd, Name, Metadata}); + EventRecord{TimestampBegin, TimestampEnd, Name, Detail}); } // There can be nested events that are very fast, for example: @@ -165,9 +132,9 @@ std::string buildTraceGraph(StringRef Json) { Stream << "| "; } Stream.write(Event.Name.data(), Event.Name.size()); - if (!Event.Metadata.empty()) { + if (!Event.Detail.empty()) { Stream << " ("; - Stream.write(Event.Metadata.data(), Event.Metadata.size()); + Stream.write(Event.Detail.data(), Event.Detail.size()); Stream << ")"; } Stream << "\n"; @@ -178,7 +145,7 @@ std::string buildTraceGraph(StringRef Json) { } // namespace TEST(TimeProfilerTest, ConstantEvaluationCxx20) { - std::string Code = R"( + constexpr StringRef Code = R"( void print(double value); namespace slow_namespace { @@ -208,7 +175,8 @@ constexpr int slow_init_list[] = {1, 1, 2, 3, 5, 8, 13, 21}; // 25th line setupProfiler(); ASSERT_TRUE(compileFromString(Code, "-std=c++20", "test.cc")); std::string Json = teardownProfiler(); - ASSERT_EQ(R"( + std::string TraceGraph = buildTraceGraph(Json); + ASSERT_TRUE(TraceGraph == R"( Frontend | ParseDeclarationOrFunctionDefinition (test.cc:2:1) | ParseDeclarationOrFunctionDefinition (test.cc:6:1) @@ -234,54 +202,14 @@ Frontend | ParseDeclarationOrFunctionDefinition (test.cc:25:1) | | EvaluateAsInitializer (slow_init_list) | PerformPendingInstantiations -)", - buildTraceGraph(Json)); -} - -TEST(TimeProfilerTest, TemplateInstantiations) { - std::string B_H = R"( - template <typename T> - T fooB(T t) { - return T(); - } +)"); - #define MacroTemp(x) template <typename T> void foo##x(T) { T(); } - )"; - - std::string A_H = R"( - #include "b.h" - - MacroTemp(MTA) - - template <typename T> - void fooA(T t) { fooB(t); fooMTA(t); } - )"; - std::string Code = R"( - #include "a.h" - void user() { fooA(0); } - )"; - - setupProfiler(); - ASSERT_TRUE(compileFromString(Code, "-std=c++20", "test.cc", - /*Headers=*/{{"a.h", A_H}, {"b.h", B_H}})); - std::string Json = teardownProfiler(); - ASSERT_EQ(R"( -Frontend -| ParseFunctionDefinition (fooB) -| ParseFunctionDefinition (fooMTA) -| ParseFunctionDefinition (fooA) -| ParseDeclarationOrFunctionDefinition (test.cc:3:5) -| | ParseFunctionDefinition (user) -| PerformPendingInstantiations -| | InstantiateFunction (fooA<int>, a.h:7) -| | | InstantiateFunction (fooB<int>, b.h:3) -| | | InstantiateFunction (fooMTA<int>, a.h:4) -)", - buildTraceGraph(Json)); + // NOTE: If this test is failing, run this test with + // `llvm::errs() << TraceGraph;` and change the assert above. } TEST(TimeProfilerTest, ConstantEvaluationC99) { - std::string Code = R"( + constexpr StringRef Code = R"( struct { short quantval[4]; // 3rd line } value; @@ -290,12 +218,15 @@ struct { setupProfiler(); ASSERT_TRUE(compileFromString(Code, "-std=c99", "test.c")); std::string Json = teardownProfiler(); - ASSERT_EQ(R"( + std::string TraceGraph = buildTraceGraph(Json); + ASSERT_TRUE(TraceGraph == R"( Frontend | ParseDeclarationOrFunctionDefinition (test.c:2:1) | | isIntegerConstantExpr (<test.c:3:18>) | | EvaluateKnownConstIntCheckOverflow (<test.c:3:18>) | PerformPendingInstantiations -)", - buildTraceGraph(Json)); +)"); + + // NOTE: If this test is failing, run this test with + // `llvm::errs() << TraceGraph;` and change the assert above. } diff --git a/clang/www/cxx_status.html b/clang/www/cxx_status.html index a6ded8b..1f69a4e 100755 --- a/clang/www/cxx_status.html +++ b/clang/www/cxx_status.html @@ -182,7 +182,7 @@ C++23, informally referred to as C++26.</p> <tr> <td>Module Declarations Shouldn’t be Macros</td> <td><a href="https://wg21.link/P3034R1">P3034R1</a> (<a href="#dr">DR</a>)</td> - <td class="none" align="center">No</td> + <td class="unreleased" align="center">Clang 19</td> </tr> <tr> <td>Trivial infinite loops are not Undefined Behavior</td> diff --git a/compiler-rt/lib/asan/asan_interceptors.cpp b/compiler-rt/lib/asan/asan_interceptors.cpp index f8f86a7..74af2e6 100644 --- a/compiler-rt/lib/asan/asan_interceptors.cpp +++ b/compiler-rt/lib/asan/asan_interceptors.cpp @@ -747,7 +747,7 @@ INTERCEPTOR(int, atexit, void (*func)()) { extern "C" { extern int _pthread_atfork(void (*prepare)(), void (*parent)(), void (*child)()); -}; +} INTERCEPTOR(int, pthread_atfork, void (*prepare)(), void (*parent)(), void (*child)()) { diff --git a/compiler-rt/lib/builtins/cpu_model/x86.c b/compiler-rt/lib/builtins/cpu_model/x86.c index ab2b685..867ed97 100644 --- a/compiler-rt/lib/builtins/cpu_model/x86.c +++ b/compiler-rt/lib/builtins/cpu_model/x86.c @@ -141,7 +141,7 @@ enum ProcessorFeatures { FEATURE_AVX512VP2INTERSECT, // FIXME: Below Features has some missings comparing to gcc, it's because gcc // has some not one-to-one mapped in llvm. - FEATURE_3DNOW, + // FEATURE_3DNOW, // FEATURE_3DNOWP, FEATURE_ADX = 40, // FEATURE_ABM, diff --git a/compiler-rt/lib/lsan/lsan_interceptors.cpp b/compiler-rt/lib/lsan/lsan_interceptors.cpp index 6df4b68..b569c337 100644 --- a/compiler-rt/lib/lsan/lsan_interceptors.cpp +++ b/compiler-rt/lib/lsan/lsan_interceptors.cpp @@ -389,7 +389,7 @@ INTERCEPTOR(int, atexit, void (*f)()) { extern "C" { extern int _pthread_atfork(void (*prepare)(), void (*parent)(), void (*child)()); -}; +} INTERCEPTOR(int, pthread_atfork, void (*prepare)(), void (*parent)(), void (*child)()) { diff --git a/cross-project-tests/lit.cfg.py b/cross-project-tests/lit.cfg.py index 774c4ea..6196345 100644 --- a/cross-project-tests/lit.cfg.py +++ b/cross-project-tests/lit.cfg.py @@ -84,7 +84,13 @@ if is_msvc: # use_clang() and use_lld() respectively, so set them to "", if needed. if not hasattr(config, "clang_src_dir"): config.clang_src_dir = "" -llvm_config.use_clang(required=("clang" in config.llvm_enabled_projects)) +# Facebook T92898286 +should_test_bolt = get_required_attr(config, "llvm_test_bolt") +if should_test_bolt: + llvm_config.use_clang(required=("clang" in config.llvm_enabled_projects), additional_flags=["--post-link-optimize"]) +else: + llvm_config.use_clang(required=("clang" in config.llvm_enabled_projects)) +# End Facebook T92898286 if not hasattr(config, "lld_src_dir"): config.lld_src_dir = "" @@ -293,3 +299,9 @@ llvm_config.feature_config([("--build-mode", {"Debug|RelWithDebInfo": "debug-inf # Allow 'REQUIRES: XXX-registered-target' in tests. for arch in config.targets_to_build: config.available_features.add(arch.lower() + "-registered-target") + +# Facebook T92898286 +# Ensure the user's PYTHONPATH is included. +if "PYTHONPATH" in os.environ: + config.environment["PYTHONPATH"] = os.environ["PYTHONPATH"] +# End Facebook T92898286 diff --git a/cross-project-tests/lit.site.cfg.py.in b/cross-project-tests/lit.site.cfg.py.in index 39458df..2d53cd3 100644 --- a/cross-project-tests/lit.site.cfg.py.in +++ b/cross-project-tests/lit.site.cfg.py.in @@ -21,6 +21,10 @@ config.mlir_src_root = "@MLIR_SOURCE_DIR@" config.llvm_use_sanitizer = "@LLVM_USE_SANITIZER@" +# Facebook T92898286 +config.llvm_test_bolt = lit.util.pythonize_bool("@LLVM_TEST_BOLT@") +# End Facebook T92898286 + import lit.llvm lit.llvm.initialize(lit_config, config) diff --git a/libc/config/config.json b/libc/config/config.json index 94bfed8..2005f42 100644 --- a/libc/config/config.json +++ b/libc/config/config.json @@ -77,6 +77,16 @@ "doc": "Default size for the constinit freelist buffer used for the freelist malloc implementation (default 1o 1GB)." } }, + "unistd": { + "LIBC_CONF_ENABLE_TID_CACHE": { + "value": true, + "doc": "Enable caching mechanism for gettid to avoid syscall (only effective in fullbuild mode, default to true). Please refer to Undefined Behavior documentation for implications." + }, + "LIBC_CONF_ENABLE_PID_CACHE": { + "value": true, + "doc": "Enable caching mechanism for getpid to avoid syscall (default to true). Please refer to Undefined Behavior documentation for implications." + } + }, "math": { "LIBC_CONF_MATH_OPTIMIZATIONS": { "value": 0, diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt index e2f6bd7..8afd3fb 100644 --- a/libc/config/linux/aarch64/entrypoints.txt +++ b/libc/config/linux/aarch64/entrypoints.txt @@ -297,6 +297,7 @@ set(TARGET_LIBC_ENTRYPOINTS libc.src.unistd.geteuid libc.src.unistd.getpid libc.src.unistd.getppid + libc.src.unistd.gettid libc.src.unistd.getuid libc.src.unistd.isatty libc.src.unistd.link diff --git a/libc/config/linux/riscv/entrypoints.txt b/libc/config/linux/riscv/entrypoints.txt index 33dd8d0..54a382e 100644 --- a/libc/config/linux/riscv/entrypoints.txt +++ b/libc/config/linux/riscv/entrypoints.txt @@ -17,6 +17,12 @@ set(TARGET_LIBC_ENTRYPOINTS libc.src.ctype.tolower libc.src.ctype.toupper + # dlfcn.h entrypoints + libc.src.dlfcn.dlclose + libc.src.dlfcn.dlerror + libc.src.dlfcn.dlopen + libc.src.dlfcn.dlsym + # errno.h entrypoints libc.src.errno.errno @@ -52,6 +58,7 @@ set(TARGET_LIBC_ENTRYPOINTS libc.src.string.mempcpy libc.src.string.memrchr libc.src.string.memset + libc.src.string.memset_explicit libc.src.string.rindex libc.src.string.stpcpy libc.src.string.stpncpy @@ -180,6 +187,9 @@ set(TARGET_LIBC_ENTRYPOINTS libc.src.stdlib.qsort_r libc.src.stdlib.rand libc.src.stdlib.srand + libc.src.stdlib.strfromd + libc.src.stdlib.strfromf + libc.src.stdlib.strfroml libc.src.stdlib.strtod libc.src.stdlib.strtof libc.src.stdlib.strtol @@ -197,6 +207,7 @@ set(TARGET_LIBC_ENTRYPOINTS # stdio.h entrypoints libc.src.stdio.fdopen + libc.src.stdio.fileno libc.src.stdio.fprintf libc.src.stdio.fscanf libc.src.stdio.printf @@ -211,6 +222,14 @@ set(TARGET_LIBC_ENTRYPOINTS libc.src.stdio.vsnprintf libc.src.stdio.vsprintf + # sys/epoll.h entrypoints + libc.src.sys.epoll.epoll_create + libc.src.sys.epoll.epoll_create1 + libc.src.sys.epoll.epoll_ctl + libc.src.sys.epoll.epoll_pwait + libc.src.sys.epoll.epoll_wait + libc.src.sys.epoll.epoll_pwait2 + # sys/mman.h entrypoints libc.src.sys.mman.madvise libc.src.sys.mman.mincore @@ -261,12 +280,6 @@ set(TARGET_LIBC_ENTRYPOINTS # sys/auxv.h entrypoints libc.src.sys.auxv.getauxval - # sys/epoll.h entrypoints - # Disabled due to epoll_wait syscalls not being available on this platform. - # libc.src.sys.epoll.epoll_wait - # libc.src.sys.epoll.epoll_pwait - # libc.src.sys.epoll.epoll_pwait2 - # termios.h entrypoints libc.src.termios.cfgetispeed libc.src.termios.cfgetospeed @@ -296,12 +309,14 @@ set(TARGET_LIBC_ENTRYPOINTS libc.src.unistd.geteuid libc.src.unistd.getpid libc.src.unistd.getppid + libc.src.unistd.gettid libc.src.unistd.getuid libc.src.unistd.isatty libc.src.unistd.link libc.src.unistd.linkat libc.src.unistd.lseek libc.src.unistd.pathconf + libc.src.unistd.pipe libc.src.unistd.pread libc.src.unistd.pwrite libc.src.unistd.read @@ -347,6 +362,9 @@ set(TARGET_LIBM_ENTRYPOINTS libc.src.math.atan2f libc.src.math.atanf libc.src.math.atanhf + libc.src.math.canonicalize + libc.src.math.canonicalizef + libc.src.math.canonicalizel libc.src.math.cbrt libc.src.math.cbrtf libc.src.math.ceil @@ -365,6 +383,7 @@ set(TARGET_LIBM_ENTRYPOINTS libc.src.math.exp10f libc.src.math.exp2 libc.src.math.exp2f + libc.src.math.exp2m1f libc.src.math.expf libc.src.math.expm1 libc.src.math.expm1f @@ -492,6 +511,9 @@ set(TARGET_LIBM_ENTRYPOINTS libc.src.math.rintf libc.src.math.rintl libc.src.math.round + libc.src.math.roundeven + libc.src.math.roundevenf + libc.src.math.roundevenl libc.src.math.roundf libc.src.math.roundl libc.src.math.scalbn @@ -523,8 +545,10 @@ set(TARGET_LIBM_ENTRYPOINTS if(LIBC_TYPES_HAS_FLOAT128) list(APPEND TARGET_LIBM_ENTRYPOINTS # math.h C23 _Float128 entrypoints + libc.src.math.canonicalizef128 libc.src.math.ceilf128 libc.src.math.copysignf128 + libc.src.math.dmulf128 libc.src.math.fabsf128 libc.src.math.fdimf128 libc.src.math.floorf128 @@ -539,6 +563,7 @@ if(LIBC_TYPES_HAS_FLOAT128) libc.src.math.fminimum_numf128 libc.src.math.fminimumf128 libc.src.math.fmodf128 + libc.src.math.fmulf128 libc.src.math.frexpf128 libc.src.math.fromfpf128 libc.src.math.fromfpxf128 @@ -556,7 +581,9 @@ if(LIBC_TYPES_HAS_FLOAT128) libc.src.math.nextafterf128 libc.src.math.nextdownf128 libc.src.math.nextupf128 + libc.src.math.remquof128 libc.src.math.rintf128 + libc.src.math.roundevenf128 libc.src.math.roundf128 libc.src.math.scalbnf128 libc.src.math.sqrtf128 @@ -566,14 +593,47 @@ if(LIBC_TYPES_HAS_FLOAT128) ) endif() +if(LIBC_COMPILER_HAS_FIXED_POINT) + list(APPEND TARGET_LIBM_ENTRYPOINTS + # stdfix.h _Fract and _Accum entrypoints + libc.src.stdfix.abshk + libc.src.stdfix.abshr + libc.src.stdfix.absk + libc.src.stdfix.abslk + libc.src.stdfix.abslr + libc.src.stdfix.absr + libc.src.stdfix.exphk + libc.src.stdfix.expk + libc.src.stdfix.roundhk + libc.src.stdfix.roundhr + libc.src.stdfix.roundk + libc.src.stdfix.roundlk + libc.src.stdfix.roundlr + libc.src.stdfix.roundr + libc.src.stdfix.rounduhk + libc.src.stdfix.rounduhr + libc.src.stdfix.rounduk + libc.src.stdfix.roundulk + libc.src.stdfix.roundulr + libc.src.stdfix.roundur + libc.src.stdfix.sqrtuhk + libc.src.stdfix.sqrtuhr + libc.src.stdfix.sqrtuk + libc.src.stdfix.sqrtur + libc.src.stdfix.sqrtulr + libc.src.stdfix.uhksqrtus + libc.src.stdfix.uksqrtui + ) +endif() + if(LLVM_LIBC_FULL_BUILD) list(APPEND TARGET_LIBC_ENTRYPOINTS - # compiler entrypoints (no corresponding header) - libc.src.compiler.__stack_chk_fail - # assert.h entrypoints libc.src.assert.__assert_fail + # compiler entrypoints (no corresponding header) + libc.src.compiler.__stack_chk_fail + # dirent.h entrypoints libc.src.dirent.closedir libc.src.dirent.dirfd @@ -598,6 +658,12 @@ if(LLVM_LIBC_FULL_BUILD) libc.src.pthread.pthread_attr_setguardsize libc.src.pthread.pthread_attr_setstack libc.src.pthread.pthread_attr_setstacksize + libc.src.pthread.pthread_condattr_destroy + libc.src.pthread.pthread_condattr_getclock + libc.src.pthread.pthread_condattr_getpshared + libc.src.pthread.pthread_condattr_init + libc.src.pthread.pthread_condattr_setclock + libc.src.pthread.pthread_condattr_setpshared libc.src.pthread.pthread_create libc.src.pthread.pthread_detach libc.src.pthread.pthread_equal @@ -620,6 +686,21 @@ if(LLVM_LIBC_FULL_BUILD) libc.src.pthread.pthread_mutexattr_setrobust libc.src.pthread.pthread_mutexattr_settype libc.src.pthread.pthread_once + libc.src.pthread.pthread_rwlock_destroy + libc.src.pthread.pthread_rwlock_init + libc.src.pthread.pthread_rwlock_rdlock + libc.src.pthread.pthread_rwlock_timedrdlock + libc.src.pthread.pthread_rwlock_timedwrlock + libc.src.pthread.pthread_rwlock_tryrdlock + libc.src.pthread.pthread_rwlock_trywrlock + libc.src.pthread.pthread_rwlock_unlock + libc.src.pthread.pthread_rwlock_wrlock + libc.src.pthread.pthread_rwlockattr_destroy + libc.src.pthread.pthread_rwlockattr_getkind_np + libc.src.pthread.pthread_rwlockattr_getpshared + libc.src.pthread.pthread_rwlockattr_init + libc.src.pthread.pthread_rwlockattr_setkind_np + libc.src.pthread.pthread_rwlockattr_setpshared libc.src.pthread.pthread_self libc.src.pthread.pthread_setname_np libc.src.pthread.pthread_setspecific @@ -643,7 +724,6 @@ if(LLVM_LIBC_FULL_BUILD) libc.src.stdio.fgetc libc.src.stdio.fgetc_unlocked libc.src.stdio.fgets - libc.src.stdio.fileno libc.src.stdio.flockfile libc.src.stdio.fopen libc.src.stdio.fopencookie @@ -652,7 +732,9 @@ if(LLVM_LIBC_FULL_BUILD) libc.src.stdio.fread libc.src.stdio.fread_unlocked libc.src.stdio.fseek + libc.src.stdio.fseeko libc.src.stdio.ftell + libc.src.stdio.ftello libc.src.stdio.funlockfile libc.src.stdio.fwrite libc.src.stdio.fwrite_unlocked @@ -673,9 +755,11 @@ if(LLVM_LIBC_FULL_BUILD) # stdlib.h entrypoints libc.src.stdlib._Exit libc.src.stdlib.abort + libc.src.stdlib.at_quick_exit libc.src.stdlib.atexit libc.src.stdlib.exit libc.src.stdlib.getenv + libc.src.stdlib.quick_exit # signal.h entrypoints libc.src.signal.kill @@ -757,6 +841,9 @@ if(LLVM_LIBC_FULL_BUILD) # sys/select.h entrypoints libc.src.sys.select.select + + # sys/socket.h entrypoints + libc.src.sys.socket.socket ) endif() diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt index 7309e95..d4e246e 100644 --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -228,9 +228,7 @@ set(TARGET_LIBC_ENTRYPOINTS libc.src.sys.epoll.epoll_ctl libc.src.sys.epoll.epoll_pwait libc.src.sys.epoll.epoll_wait - # TODO: Need to check if pwait2 is available before providing. - # https://github.com/llvm/llvm-project/issues/80060 - # libc.src.sys.epoll.epoll_pwait2 + libc.src.sys.epoll.epoll_pwait2 # sys/mman.h entrypoints libc.src.sys.mman.madvise @@ -315,6 +313,7 @@ set(TARGET_LIBC_ENTRYPOINTS libc.src.unistd.geteuid libc.src.unistd.getpid libc.src.unistd.getppid + libc.src.unistd.gettid libc.src.unistd.getuid libc.src.unistd.isatty libc.src.unistd.link diff --git a/libc/docs/configure.rst b/libc/docs/configure.rst index dfb35f6..5c55e4a 100644 --- a/libc/docs/configure.rst +++ b/libc/docs/configure.rst @@ -52,3 +52,6 @@ to learn about the defaults for your platform and target. * **"string" options** - ``LIBC_CONF_MEMSET_X86_USE_SOFTWARE_PREFETCHING``: Inserts prefetch for write instructions (PREFETCHW) for memset on x86 to recover performance when hardware prefetcher is disabled. - ``LIBC_CONF_STRING_UNSAFE_WIDE_READ``: Read more than a byte at a time to perform byte-string operations like strlen. +* **"unistd" options** + - ``LIBC_CONF_ENABLE_PID_CACHE``: Enable caching mechanism for getpid to avoid syscall (default to true). Please refer to Undefined Behavior documentation for implications. + - ``LIBC_CONF_ENABLE_TID_CACHE``: Enable caching mechanism for gettid to avoid syscall (only effective in fullbuild mode, default to true). Please refer to Undefined Behavior documentation for implications. diff --git a/libc/docs/dev/undefined_behavior.rst b/libc/docs/dev/undefined_behavior.rst index 3faae31..b712780 100644 --- a/libc/docs/dev/undefined_behavior.rst +++ b/libc/docs/dev/undefined_behavior.rst @@ -93,3 +93,26 @@ direction in this case. Non-const Constant Return Values -------------------------------- Some libc functions, like ``dlerror()``, return ``char *`` instead of ``const char *`` and then tell the caller they promise not to to modify this value. Any modification of this value is undefined behavior. + +Cached ``getpid/gettid`` +------------------------ +Since version ``2.25``, glibc removes its cache mechanism for ``getpid/gettid`` +(See the history section in https://man7.org/linux/man-pages/man2/getpid.2.html). +LLVM's libc still implements the cache as it is useful for fast deadlock detection. +The cache mechanism is also implemented in MUSL and bionic. The tid/pid cache can +be disabled by setting ``LIBC_CONF_ENABLE_TID_CACHE`` and ``LIBC_CONF_ENABLE_PID_CACHE`` +to ``false`` respectively. + +Unwrapped ``SYS_clone/SYS_fork/SYS_vfork`` +------------------------------------------ +It is highly discouraged to use unwrapped ``SYS_clone/SYS_fork/SYS_vfork``. +First, calling such syscalls without provided libc wrappers ignores +all the ``pthread_atfork`` entries as libc can no longer detect the ``fork``. +Second, libc relies on the ``fork/clone`` wrappers to correctly maintain cache for +process id and thread id, and other important process-specific states such as the list +of robust mutexes. Third, even if the user is to call ``exec*`` functions immediately, +there can still be other unexpected issues. For instance, there can be signal handlers +inherited from parent process triggered inside the instruction window between ``fork`` +and ``exec*``. As libc failed to maintain its internal states correctly, even though the +functions used inside the signal handlers are marked as ``async-signal-safe`` (such as +``getpid``), they will still return wrong values or lead to other even worse situations. diff --git a/libc/include/assert.h.def b/libc/include/assert.h.def index 9c924c7..d5ae14a 100644 --- a/libc/include/assert.h.def +++ b/libc/include/assert.h.def @@ -12,22 +12,19 @@ // This file may be usefully included multiple times to change assert()'s // definition based on NDEBUG. - -#undef assert -#ifdef NDEBUG -#define assert(e) (void)0 -#else - #ifndef __cplusplus #undef static_assert #define static_assert _Static_assert #endif +#undef assert +#ifdef NDEBUG +#define assert(e) (void)0 +#else #ifdef __cplusplus extern "C" #endif _Noreturn void __assert_fail(const char *, const char *, unsigned, const char *) __NOEXCEPT; - #define assert(e) \ ((e) ? (void)0 : __assert_fail(#e, __FILE__, __LINE__, __PRETTY_FUNCTION__)) #endif diff --git a/libc/spec/posix.td b/libc/spec/posix.td index 1878b1e..48f743d 100644 --- a/libc/spec/posix.td +++ b/libc/spec/posix.td @@ -547,6 +547,11 @@ def POSIX : StandardSpec<"POSIX"> { [ArgSpec<VoidType>] >, FunctionSpec< + "gettid", + RetValSpec<PidT>, + [ArgSpec<VoidType>] + >, + FunctionSpec< "getuid", RetValSpec<UidT>, [ArgSpec<VoidType>] @@ -602,16 +607,6 @@ def POSIX : StandardSpec<"POSIX"> { [ArgSpec<ConstCharPtr>] >, FunctionSpec< - "getpid", - RetValSpec<IntType>, - [ArgSpec<VoidType>] - >, - FunctionSpec< - "getppid", - RetValSpec<IntType>, - [ArgSpec<VoidType>] - >, - FunctionSpec< "link", RetValSpec<IntType>, [ArgSpec<ConstCharPtr>, ArgSpec<ConstCharPtr>] diff --git a/libc/src/__support/File/file.cpp b/libc/src/__support/File/file.cpp index 1b545c5..51811a2 100644 --- a/libc/src/__support/File/file.cpp +++ b/libc/src/__support/File/file.cpp @@ -282,7 +282,7 @@ int File::ungetc_unlocked(int c) { return c; } -ErrorOr<int> File::seek(long offset, int whence) { +ErrorOr<int> File::seek(off_t offset, int whence) { FileLock lock(this); if (prev_op == FileOp::WRITE && pos > 0) { diff --git a/libc/src/__support/File/file.h b/libc/src/__support/File/file.h index 0cedf86..42e1d11 100644 --- a/libc/src/__support/File/file.h +++ b/libc/src/__support/File/file.h @@ -183,7 +183,7 @@ public: return read_unlocked(data, len); } - ErrorOr<int> seek(long offset, int whence); + ErrorOr<int> seek(off_t offset, int whence); ErrorOr<off_t> tell(); diff --git a/libc/src/__support/HashTable/randomness.h b/libc/src/__support/HashTable/randomness.h index 06d3e84..244dd41 100644 --- a/libc/src/__support/HashTable/randomness.h +++ b/libc/src/__support/HashTable/randomness.h @@ -36,7 +36,7 @@ LIBC_INLINE uint64_t next_random_seed() { entropy[1] = reinterpret_cast<uint64_t>(&state); #if defined(LIBC_HASHTABLE_USE_GETRANDOM) int errno_backup = libc_errno; - ssize_t count = sizeof(entropy); + size_t count = sizeof(entropy); uint8_t *buffer = reinterpret_cast<uint8_t *>(entropy); while (count > 0) { ssize_t len = getrandom(buffer, count, 0); diff --git a/libc/src/__support/OSUtil/CMakeLists.txt b/libc/src/__support/OSUtil/CMakeLists.txt index 94d1042..517f888 100644 --- a/libc/src/__support/OSUtil/CMakeLists.txt +++ b/libc/src/__support/OSUtil/CMakeLists.txt @@ -15,3 +15,20 @@ add_object_library( DEPENDS ${target_os_util} ) + +if (LIBC_CONF_ENABLE_PID_CACHE) + set(libc_copt_enable_pid_cache 1) +else() + set(libc_copt_enable_pid_cache 0) +endif() + +if(TARGET libc.src.__support.OSUtil.${LIBC_TARGET_OS}.pid) + add_object_library( + pid + ALIAS + DEPENDS + .${LIBC_TARGET_OS}.pid + COMPILE_OPTIONS + -DLIBC_COPT_ENABLE_PID_CACHE=${libc_copt_enable_pid_cache} + ) +endif() diff --git a/libc/src/__support/OSUtil/linux/CMakeLists.txt b/libc/src/__support/OSUtil/linux/CMakeLists.txt index 089cad4..95a83d7 100644 --- a/libc/src/__support/OSUtil/linux/CMakeLists.txt +++ b/libc/src/__support/OSUtil/linux/CMakeLists.txt @@ -23,3 +23,16 @@ add_object_library( libc.hdr.types.struct_f_owner_ex libc.hdr.types.off_t ) + +add_object_library( + pid + SRCS + pid.cpp + HDRS + ../pid.h + DEPENDS + libc.src.__support.OSUtil.osutil + libc.src.__support.common + libc.hdr.types.pid_t + libc.include.sys_syscall +) diff --git a/libc/src/__support/OSUtil/linux/pid.cpp b/libc/src/__support/OSUtil/linux/pid.cpp new file mode 100644 index 0000000..a8499af --- /dev/null +++ b/libc/src/__support/OSUtil/linux/pid.cpp @@ -0,0 +1,20 @@ +//===------------ pid_t utilities implementation ----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/__support/OSUtil/pid.h" +#include "src/__support/OSUtil/syscall.h" +#include <sys/syscall.h> + +namespace LIBC_NAMESPACE_DECL { + +pid_t ProcessIdentity::cache = -1; +pid_t ProcessIdentity::get_uncached() { + return syscall_impl<pid_t>(SYS_getpid); +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/__support/OSUtil/pid.h b/libc/src/__support/OSUtil/pid.h new file mode 100644 index 0000000..d723abe --- /dev/null +++ b/libc/src/__support/OSUtil/pid.h @@ -0,0 +1,41 @@ +//===------------ pid_t utilities -------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC___SUPPORT_OSUTIL_PID_H +#define LLVM_LIBC_SRC___SUPPORT_OSUTIL_PID_H +#include "hdr/types/pid_t.h" +#include "src/__support/macros/attributes.h" +#include "src/__support/macros/optimization.h" + +#ifndef LIBC_COPT_ENABLE_PID_CACHE +#define LIBC_COPT_ENABLE_PID_CACHE 1 +#endif + +namespace LIBC_NAMESPACE_DECL { + +class ProcessIdentity { + static LIBC_INLINE_VAR thread_local bool fork_inflight = true; + static pid_t cache; + static pid_t get_uncached(); + +public: + LIBC_INLINE static void start_fork() { fork_inflight = true; } + LIBC_INLINE static void end_fork() { fork_inflight = false; } + LIBC_INLINE static void refresh_cache() { cache = get_uncached(); } + LIBC_INLINE static pid_t get() { +#if LIBC_COPT_ENABLE_PID_CACHE + if (LIBC_LIKELY(!fork_inflight)) + return cache; +#endif + return get_uncached(); + } +}; + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC___SUPPORT_OSUTIL_PID_H diff --git a/libc/src/__support/threads/CMakeLists.txt b/libc/src/__support/threads/CMakeLists.txt index d2e46b8..f1a2f16 100644 --- a/libc/src/__support/threads/CMakeLists.txt +++ b/libc/src/__support/threads/CMakeLists.txt @@ -44,6 +44,12 @@ if(TARGET libc.src.__support.threads.${LIBC_TARGET_OS}.mutex) ) endif() +if (LIBC_CONF_ENABLE_TID_CACHE) + set(libc_copt_enable_tid_cache 1) +else() + set(libc_copt_enable_tid_cache 0) +endif() + add_header_library( thread_common HDRS @@ -54,6 +60,9 @@ add_header_library( libc.src.__support.CPP.optional libc.src.__support.CPP.string_view libc.src.__support.CPP.stringstream + libc.hdr.types.pid_t + COMPILE_OPTIONS + -DLIBC_COPT_ENABLE_TID_CACHE=${libc_copt_enable_tid_cache} ) if(TARGET libc.src.__support.threads.${LIBC_TARGET_OS}.thread) @@ -89,3 +98,21 @@ if(TARGET libc.src.__support.threads.${LIBC_TARGET_OS}.CndVar) .${LIBC_TARGET_OS}.CndVar ) endif() + +set(tid_dep) +if (LLVM_LIBC_FULL_BUILD) + list(APPEND tid_dep libc.src.__support.thread) +else() + list(APPEND tid_dep libc.src.__support.OSUtil.osutil) + list(APPEND tid_dep libc.include.sys_syscall) +endif() + +add_header_library( + tid + HDRS + tid.h + DEPENDS + libc.src.__support.common + libc.hdr.types.pid_t + ${tid_dep} +) diff --git a/libc/src/__support/threads/linux/CMakeLists.txt b/libc/src/__support/threads/linux/CMakeLists.txt index 8b79715..d86441d 100644 --- a/libc/src/__support/threads/linux/CMakeLists.txt +++ b/libc/src/__support/threads/linux/CMakeLists.txt @@ -55,6 +55,7 @@ add_header_library( libc.src.__support.common libc.src.__support.OSUtil.osutil libc.src.__support.CPP.limits + libc.src.__support.threads.tid COMPILE_OPTIONS -DLIBC_COPT_RWLOCK_DEFAULT_SPIN_COUNT=${LIBC_CONF_RWLOCK_DEFAULT_SPIN_COUNT} ${monotonicity_flags} diff --git a/libc/src/__support/threads/linux/rwlock.h b/libc/src/__support/threads/linux/rwlock.h index d2fb0ce..cae8aa6 100644 --- a/libc/src/__support/threads/linux/rwlock.h +++ b/libc/src/__support/threads/linux/rwlock.h @@ -23,6 +23,7 @@ #include "src/__support/threads/linux/futex_word.h" #include "src/__support/threads/linux/raw_mutex.h" #include "src/__support/threads/sleep.h" +#include "src/__support/threads/tid.h" #ifndef LIBC_COPT_RWLOCK_DEFAULT_SPIN_COUNT #define LIBC_COPT_RWLOCK_DEFAULT_SPIN_COUNT 100 @@ -336,8 +337,6 @@ private: LIBC_INLINE Role get_preference() const { return static_cast<Role>(preference); } - // TODO: use cached thread id once implemented. - LIBC_INLINE static pid_t gettid() { return syscall_impl<pid_t>(SYS_gettid); } template <Role role> LIBC_INLINE LockResult try_lock(RwState &old) { if constexpr (role == Role::Reader) { @@ -359,7 +358,7 @@ private: if (LIBC_LIKELY(old.compare_exchange_weak_with( state, old.set_writer_bit(), cpp::MemoryOrder::ACQUIRE, cpp::MemoryOrder::RELAXED))) { - writer_tid.store(gettid(), cpp::MemoryOrder::RELAXED); + writer_tid.store(gettid_inline(), cpp::MemoryOrder::RELAXED); return LockResult::Success; } // Notice that old is updated by the compare_exchange_weak_with @@ -394,7 +393,7 @@ private: unsigned spin_count = LIBC_COPT_RWLOCK_DEFAULT_SPIN_COUNT) { // Phase 1: deadlock detection. // A deadlock happens if this is a RAW/WAW lock in the same thread. - if (writer_tid.load(cpp::MemoryOrder::RELAXED) == gettid()) + if (writer_tid.load(cpp::MemoryOrder::RELAXED) == gettid_inline()) return LockResult::Deadlock; #if LIBC_COPT_TIMEOUT_ENSURE_MONOTONICITY @@ -520,7 +519,7 @@ public: if (old.has_active_writer()) { // The lock is held by a writer. // Check if we are the owner of the lock. - if (writer_tid.load(cpp::MemoryOrder::RELAXED) != gettid()) + if (writer_tid.load(cpp::MemoryOrder::RELAXED) != gettid_inline()) return LockResult::PermissionDenied; // clear writer tid. writer_tid.store(0, cpp::MemoryOrder::RELAXED); diff --git a/libc/src/__support/threads/linux/thread.cpp b/libc/src/__support/threads/linux/thread.cpp index 36b4a88..c8ad086 100644 --- a/libc/src/__support/threads/linux/thread.cpp +++ b/libc/src/__support/threads/linux/thread.cpp @@ -518,4 +518,6 @@ void thread_exit(ThreadReturnValue retval, ThreadStyle style) { __builtin_unreachable(); } +pid_t Thread::get_uncached_tid() { return syscall_impl<pid_t>(SYS_gettid); } + } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/__support/threads/thread.h b/libc/src/__support/threads/thread.h index ce23a88..9317452 100644 --- a/libc/src/__support/threads/thread.h +++ b/libc/src/__support/threads/thread.h @@ -9,6 +9,11 @@ #ifndef LLVM_LIBC_SRC___SUPPORT_THREADS_THREAD_H #define LLVM_LIBC_SRC___SUPPORT_THREADS_THREAD_H +#ifndef LIBC_COPT_ENABLE_TID_CACHE +#define LIBC_COPT_ENABLE_TID_CACHE 1 +#endif + +#include "hdr/types/pid_t.h" #include "src/__support/CPP/atomic.h" #include "src/__support/CPP/optional.h" #include "src/__support/CPP/string_view.h" @@ -97,13 +102,13 @@ struct alignas(STACK_ALIGNMENT) ThreadAttributes { // exits. It will clean up the thread resources once the thread // exits. cpp::Atomic<uint32_t> detach_state; - void *stack; // Pointer to the thread stack - unsigned long long stacksize; // Size of the stack - unsigned long long guardsize; // Guard size on stack - uintptr_t tls; // Address to the thread TLS memory - uintptr_t tls_size; // The size of area pointed to by |tls|. + void *stack; // Pointer to the thread stack + size_t stacksize; // Size of the stack + size_t guardsize; // Guard size on stack + uintptr_t tls; // Address to the thread TLS memory + uintptr_t tls_size; // The size of area pointed to by |tls|. unsigned char owned_stack; // Indicates if the thread owns this stack memory - int tid; + pid_t tid; ThreadStyle style; ThreadReturnValue retval; ThreadAtExitCallbackMgr *atexit_callback_mgr; @@ -228,6 +233,26 @@ struct Thread { // Return the name of the thread in |name|. Return the error number of error. int get_name(cpp::StringStream &name) const; + + static pid_t get_uncached_tid(); + + LIBC_INLINE void refresh_tid(pid_t cached = -1) { + if (cached >= 0) + this->attrib->tid = cached; + else + this->attrib->tid = get_uncached_tid(); + } + LIBC_INLINE void invalidate_tid() { this->attrib->tid = -1; } + + LIBC_INLINE pid_t get_tid() { +#if LIBC_COPT_ENABLE_TID_CACHE + if (LIBC_UNLIKELY(this->attrib->tid < 0)) + return get_uncached_tid(); + return this->attrib->tid; +#else + return get_uncached_tid(); +#endif + } }; extern LIBC_THREAD_LOCAL Thread self; diff --git a/libc/src/__support/threads/tid.h b/libc/src/__support/threads/tid.h new file mode 100644 index 0000000..a575cff --- /dev/null +++ b/libc/src/__support/threads/tid.h @@ -0,0 +1,34 @@ +//===--- Tid wrapper --------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC___SUPPORT_THREADS_TID_H +#define LLVM_LIBC_SRC___SUPPORT_THREADS_TID_H + +// This header is for internal usage which automatically dispatches full build +// and overlay build behaviors. + +#include "hdr/types/pid_t.h" +#include "src/__support/common.h" +#ifdef LIBC_FULL_BUILD +#include "src/__support/threads/thread.h" +#else +#include "src/__support/OSUtil/syscall.h" +#include <sys/syscall.h> +#endif // LIBC_FULL_BUILD + +namespace LIBC_NAMESPACE_DECL { +LIBC_INLINE pid_t gettid_inline() { +#ifdef LIBC_FULL_BUILD + return self.get_tid(); +#else + return syscall_impl<pid_t>(SYS_gettid); +#endif +} +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC___SUPPORT_THREADS_TID_H diff --git a/libc/src/math/docs/add_math_function.md b/libc/src/math/docs/add_math_function.md index 9c23b8c..e9a6aad 100644 --- a/libc/src/math/docs/add_math_function.md +++ b/libc/src/math/docs/add_math_function.md @@ -18,7 +18,7 @@ together with its specifications: ``` - Add function specs to the file: ``` - libc/spec/stdc.td + libc/newhdrgen/yaml/math.yaml ``` ## Implementation diff --git a/libc/src/stdio/fopencookie.cpp b/libc/src/stdio/fopencookie.cpp index 07be9a5..9f5694e 100644 --- a/libc/src/stdio/fopencookie.cpp +++ b/libc/src/stdio/fopencookie.cpp @@ -43,16 +43,16 @@ FileIOResult CookieFile::cookie_write(File *f, const void *data, size_t size) { auto cookie_file = reinterpret_cast<CookieFile *>(f); if (cookie_file->ops.write == nullptr) return 0; - return cookie_file->ops.write(cookie_file->cookie, - reinterpret_cast<const char *>(data), size); + return static_cast<size_t>(cookie_file->ops.write( + cookie_file->cookie, reinterpret_cast<const char *>(data), size)); } FileIOResult CookieFile::cookie_read(File *f, void *data, size_t size) { auto cookie_file = reinterpret_cast<CookieFile *>(f); if (cookie_file->ops.read == nullptr) return 0; - return cookie_file->ops.read(cookie_file->cookie, - reinterpret_cast<char *>(data), size); + return static_cast<size_t>(cookie_file->ops.read( + cookie_file->cookie, reinterpret_cast<char *>(data), size)); } ErrorOr<off_t> CookieFile::cookie_seek(File *f, off_t offset, int whence) { diff --git a/libc/src/sys/auxv/linux/getauxval.cpp b/libc/src/sys/auxv/linux/getauxval.cpp index 2ca894d..bfa6b23 100644 --- a/libc/src/sys/auxv/linux/getauxval.cpp +++ b/libc/src/sys/auxv/linux/getauxval.cpp @@ -155,7 +155,7 @@ static void initialize_auxv_once(void) { static AuxEntry read_entry(int fd) { AuxEntry buf; - ssize_t size = sizeof(AuxEntry); + size_t size = sizeof(AuxEntry); char *ptr = reinterpret_cast<char *>(&buf); while (size > 0) { ssize_t ret = read(fd, ptr, size); diff --git a/libc/src/sys/epoll/linux/epoll_pwait2.cpp b/libc/src/sys/epoll/linux/epoll_pwait2.cpp index 14b4193..4123157 100644 --- a/libc/src/sys/epoll/linux/epoll_pwait2.cpp +++ b/libc/src/sys/epoll/linux/epoll_pwait2.cpp @@ -25,10 +25,22 @@ namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(int, epoll_pwait2, (int epfd, struct epoll_event *events, int maxevents, const struct timespec *timeout, const sigset_t *sigmask)) { +#ifdef SYS_epoll_pwait2 int ret = LIBC_NAMESPACE::syscall_impl<int>( SYS_epoll_pwait2, epfd, reinterpret_cast<long>(events), maxevents, reinterpret_cast<long>(timeout), reinterpret_cast<long>(sigmask), NSIG / 8); +#elif defined(SYS_epoll_pwait) + // Convert nanoseconds to milliseconds, rounding up if there are remaining + // nanoseconds + long timeout_ms = static_cast<long>(timeout->tv_sec * 1000 + + (timeout->tv_nsec + 999999) / 1000000); + int ret = LIBC_NAMESPACE::syscall_impl<int>( + SYS_epoll_pwait, epfd, reinterpret_cast<long>(events), maxevents, + timeout_ms, reinterpret_cast<long>(sigmask), NSIG / 8); +#else +#error "epoll_pwait and epoll_pwait2 syscalls not available." +#endif // A negative return value indicates an error with the magnitude of the // value being the error code. diff --git a/libc/src/sys/stat/linux/kernel_statx.h b/libc/src/sys/stat/linux/kernel_statx.h index f26f0b8..d0e223a 100644 --- a/libc/src/sys/stat/linux/kernel_statx.h +++ b/libc/src/sys/stat/linux/kernel_statx.h @@ -80,7 +80,7 @@ LIBC_INLINE int statx(int dirfd, const char *__restrict path, int flags, return -ret; statbuf->st_dev = MKDEV(xbuf.stx_dev_major, xbuf.stx_dev_minor); - statbuf->st_ino = xbuf.stx_ino; + statbuf->st_ino = static_cast<decltype(statbuf->st_ino)>(xbuf.stx_ino); statbuf->st_mode = xbuf.stx_mode; statbuf->st_nlink = xbuf.stx_nlink; statbuf->st_uid = xbuf.stx_uid; @@ -94,7 +94,8 @@ LIBC_INLINE int statx(int dirfd, const char *__restrict path, int flags, statbuf->st_ctim.tv_sec = xbuf.stx_ctime.tv_sec; statbuf->st_ctim.tv_nsec = xbuf.stx_ctime.tv_nsec; statbuf->st_blksize = xbuf.stx_blksize; - statbuf->st_blocks = xbuf.stx_blocks; + statbuf->st_blocks = + static_cast<decltype(statbuf->st_blocks)>(xbuf.stx_blocks); return 0; } diff --git a/libc/src/unistd/CMakeLists.txt b/libc/src/unistd/CMakeLists.txt index ddafcd7..ec76712 100644 --- a/libc/src/unistd/CMakeLists.txt +++ b/libc/src/unistd/CMakeLists.txt @@ -333,3 +333,13 @@ add_entrypoint_external( add_entrypoint_external( opterr ) + +add_entrypoint_object( + gettid + SRCS + gettid.cpp + HDRS + gettid.h + DEPENDS + libc.src.__support.threads.tid +) diff --git a/libc/src/unistd/getpid.h b/libc/src/unistd/getpid.h index c3c55b0..5812df0 100644 --- a/libc/src/unistd/getpid.h +++ b/libc/src/unistd/getpid.h @@ -9,12 +9,12 @@ #ifndef LLVM_LIBC_SRC_UNISTD_GETPID_H #define LLVM_LIBC_SRC_UNISTD_GETPID_H +#include "hdr/types/pid_t.h" #include "src/__support/macros/config.h" -#include <unistd.h> namespace LIBC_NAMESPACE_DECL { -pid_t getpid(); +pid_t getpid(void); } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/unistd/gettid.cpp b/libc/src/unistd/gettid.cpp new file mode 100644 index 0000000..6d8ed65 --- /dev/null +++ b/libc/src/unistd/gettid.cpp @@ -0,0 +1,17 @@ +//===-- Implementation file for gettid --------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/unistd/gettid.h" +#include "src/__support/common.h" +#include "src/__support/threads/tid.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(pid_t, gettid, (void)) { return gettid_inline(); } + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/unistd/gettid.h b/libc/src/unistd/gettid.h new file mode 100644 index 0000000..4228319 --- /dev/null +++ b/libc/src/unistd/gettid.h @@ -0,0 +1,21 @@ +//===-- Implementation header for gettid ------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_UNISTD_GETTID_H +#define LLVM_LIBC_SRC_UNISTD_GETTID_H + +#include "hdr/types/pid_t.h" +#include "src/__support/common.h" + +namespace LIBC_NAMESPACE_DECL { + +pid_t gettid(void); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_UNISTD_GETTID_H diff --git a/libc/src/unistd/linux/CMakeLists.txt b/libc/src/unistd/linux/CMakeLists.txt index 7e733d7..651ea60 100644 --- a/libc/src/unistd/linux/CMakeLists.txt +++ b/libc/src/unistd/linux/CMakeLists.txt @@ -101,6 +101,7 @@ add_entrypoint_object( libc.include.sys_syscall libc.src.__support.threads.fork_callbacks libc.src.__support.OSUtil.osutil + libc.src.__support.OSUtil.pid libc.src.__support.threads.thread libc.src.errno.errno ) @@ -204,8 +205,7 @@ add_entrypoint_object( ../getpid.h DEPENDS libc.include.unistd - libc.include.sys_syscall - libc.src.__support.OSUtil.osutil + libc.src.__support.OSUtil.pid ) add_entrypoint_object( diff --git a/libc/src/unistd/linux/fork.cpp b/libc/src/unistd/linux/fork.cpp index 7d47665..8fe1881 100644 --- a/libc/src/unistd/linux/fork.cpp +++ b/libc/src/unistd/linux/fork.cpp @@ -8,13 +8,14 @@ #include "src/unistd/fork.h" +#include "src/__support/OSUtil/pid.h" #include "src/__support/OSUtil/syscall.h" // For internal syscall function. #include "src/__support/common.h" #include "src/__support/macros/config.h" #include "src/__support/threads/fork_callbacks.h" #include "src/__support/threads/thread.h" // For thread self object - #include "src/errno/libc_errno.h" + #include <signal.h> // For SIGCHLD #include <sys/syscall.h> // For syscall numbers. @@ -25,6 +26,14 @@ namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(pid_t, fork, (void)) { invoke_prepare_callbacks(); + + // Invalidate tid/pid cache before fork to avoid post fork signal handler from + // getting wrong values. gettid() is not async-signal-safe, but let's provide + // our best efforts here. + pid_t parent_tid = self.get_tid(); + self.invalidate_tid(); + ProcessIdentity::start_fork(); + #ifdef SYS_fork pid_t ret = LIBC_NAMESPACE::syscall_impl<pid_t>(SYS_fork); #elif defined(SYS_clone) @@ -32,15 +41,6 @@ LLVM_LIBC_FUNCTION(pid_t, fork, (void)) { #else #error "fork and clone syscalls not available." #endif - if (ret == 0) { - // Return value is 0 in the child process. - // The child is created with a single thread whose self object will be a - // copy of parent process' thread which called fork. So, we have to fix up - // the child process' self object with the new process' tid. - self.attrib->tid = LIBC_NAMESPACE::syscall_impl<pid_t>(SYS_gettid); - invoke_child_callbacks(); - return 0; - } if (ret < 0) { // Error case, a child process was not created. @@ -48,6 +48,18 @@ LLVM_LIBC_FUNCTION(pid_t, fork, (void)) { return -1; } + // Child process + if (ret == 0) { + self.refresh_tid(); + ProcessIdentity::refresh_cache(); + ProcessIdentity::end_fork(); + invoke_child_callbacks(); + return 0; + } + + // Parent process + self.refresh_tid(parent_tid); + ProcessIdentity::end_fork(); invoke_parent_callbacks(); return ret; } diff --git a/libc/src/unistd/linux/getpid.cpp b/libc/src/unistd/linux/getpid.cpp index b24c86a..65d6c8a 100644 --- a/libc/src/unistd/linux/getpid.cpp +++ b/libc/src/unistd/linux/getpid.cpp @@ -7,17 +7,10 @@ //===----------------------------------------------------------------------===// #include "src/unistd/getpid.h" - -#include "src/__support/OSUtil/syscall.h" // For internal syscall function. +#include "src/__support/OSUtil/pid.h" #include "src/__support/common.h" -#include "src/__support/macros/config.h" - -#include <sys/syscall.h> // For syscall numbers. - namespace LIBC_NAMESPACE_DECL { -LLVM_LIBC_FUNCTION(pid_t, getpid, ()) { - return LIBC_NAMESPACE::syscall_impl<pid_t>(SYS_getpid); -} +LLVM_LIBC_FUNCTION(pid_t, getpid, (void)) { return ProcessIdentity::get(); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/startup/linux/CMakeLists.txt b/libc/startup/linux/CMakeLists.txt index 336c5d0..585edf2 100644 --- a/libc/startup/linux/CMakeLists.txt +++ b/libc/startup/linux/CMakeLists.txt @@ -101,6 +101,7 @@ add_object_library( libc.include.llvm-libc-macros.link_macros libc.src.__support.threads.thread libc.src.__support.OSUtil.osutil + libc.src.__support.OSUtil.pid libc.src.stdlib.exit libc.src.stdlib.atexit libc.src.unistd.environ diff --git a/libc/startup/linux/do_start.cpp b/libc/startup/linux/do_start.cpp index 824c0e1..4047c06 100644 --- a/libc/startup/linux/do_start.cpp +++ b/libc/startup/linux/do_start.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "startup/linux/do_start.h" #include "include/llvm-libc-macros/link-macros.h" +#include "src/__support/OSUtil/pid.h" #include "src/__support/OSUtil/syscall.h" #include "src/__support/macros/config.h" #include "src/__support/threads/thread.h" @@ -127,6 +128,10 @@ static ThreadAttributes main_thread_attrib; if (tls.size != 0 && !set_thread_ptr(tls.tp)) syscall_impl<long>(SYS_exit, 1); + // Validate process identity cache (TLS needed). + ProcessIdentity::refresh_cache(); + ProcessIdentity::end_fork(); + self.attrib = &main_thread_attrib; main_thread_attrib.atexit_callback_mgr = internal::get_thread_atexit_callback_mgr(); diff --git a/libc/test/integration/src/unistd/CMakeLists.txt b/libc/test/integration/src/unistd/CMakeLists.txt index 3f18231..f50405d0 100644 --- a/libc/test/integration/src/unistd/CMakeLists.txt +++ b/libc/test/integration/src/unistd/CMakeLists.txt @@ -31,6 +31,10 @@ add_integration_test( libc.src.sys.wait.wait4 libc.src.sys.wait.waitpid libc.src.unistd.fork + libc.src.unistd.getpid + libc.src.unistd.gettid + libc.src.stdlib.exit + libc.include.sys_syscall ) if((${LIBC_TARGET_OS} STREQUAL "linux") AND (${LIBC_TARGET_ARCHITECTURE_IS_X86})) diff --git a/libc/test/integration/src/unistd/fork_test.cpp b/libc/test/integration/src/unistd/fork_test.cpp index 9c9213e..4b82d5f 100644 --- a/libc/test/integration/src/unistd/fork_test.cpp +++ b/libc/test/integration/src/unistd/fork_test.cpp @@ -6,17 +6,21 @@ // //===----------------------------------------------------------------------===// +#include "src/__support/OSUtil/syscall.h" #include "src/pthread/pthread_atfork.h" #include "src/signal/raise.h" +#include "src/stdlib/exit.h" #include "src/sys/wait/wait.h" #include "src/sys/wait/wait4.h" #include "src/sys/wait/waitpid.h" #include "src/unistd/fork.h" - +#include "src/unistd/getpid.h" +#include "src/unistd/gettid.h" #include "test/IntegrationTest/test.h" #include <errno.h> #include <signal.h> +#include <sys/syscall.h> #include <sys/wait.h> #include <unistd.h> @@ -140,7 +144,25 @@ void fork_with_atfork_callbacks() { ASSERT_NE(child, DONE); } +void fork_pid_tid_test() { + pid_t pid = fork(); + ASSERT_TRUE(pid >= 0); + ASSERT_EQ(LIBC_NAMESPACE::gettid(), + LIBC_NAMESPACE::syscall_impl<pid_t>(SYS_gettid)); + ASSERT_EQ(LIBC_NAMESPACE::getpid(), + LIBC_NAMESPACE::syscall_impl<pid_t>(SYS_getpid)); + + if (pid == 0) { + LIBC_NAMESPACE::exit(0); + } else { + int status; + LIBC_NAMESPACE::waitpid(pid, &status, 0); + ASSERT_EQ(status, 0); + } +} + TEST_MAIN(int argc, char **argv, char **envp) { + fork_pid_tid_test(); fork_and_wait_normal_exit(); fork_and_wait4_normal_exit(); fork_and_waitpid_normal_exit(); diff --git a/libc/test/src/__support/File/platform_file_test.cpp b/libc/test/src/__support/File/platform_file_test.cpp index 469d750..8aa0721 100644 --- a/libc/test/src/__support/File/platform_file_test.cpp +++ b/libc/test/src/__support/File/platform_file_test.cpp @@ -103,7 +103,8 @@ TEST(LlvmLibcPlatformFileTest, CreateAppendSeekAndReadBack) { constexpr size_t APPEND_TEXT_SIZE = sizeof(APPEND_TEXT) - 1; ASSERT_EQ(file->write(APPEND_TEXT, APPEND_TEXT_SIZE).value, APPEND_TEXT_SIZE); - ASSERT_EQ(file->seek(-APPEND_TEXT_SIZE, SEEK_END).value(), 0); + ASSERT_EQ(file->seek(-static_cast<off_t>(APPEND_TEXT_SIZE), SEEK_END).value(), + 0); char data[APPEND_TEXT_SIZE + 1]; ASSERT_EQ(file->read(data, APPEND_TEXT_SIZE).value, APPEND_TEXT_SIZE); data[APPEND_TEXT_SIZE] = '\0'; diff --git a/libc/test/src/sys/epoll/linux/epoll_create_test.cpp b/libc/test/src/sys/epoll/linux/epoll_create_test.cpp index fdcdcf8..9c4bad1 100644 --- a/libc/test/src/sys/epoll/linux/epoll_create_test.cpp +++ b/libc/test/src/sys/epoll/linux/epoll_create_test.cpp @@ -10,6 +10,7 @@ #include "src/unistd/close.h" #include "test/UnitTest/ErrnoSetterMatcher.h" #include "test/UnitTest/Test.h" +#include <sys/syscall.h> // For syscall numbers. using namespace LIBC_NAMESPACE::testing::ErrnoSetterMatcher; @@ -21,6 +22,8 @@ TEST(LlvmLibcEpollCreateTest, Basic) { ASSERT_THAT(LIBC_NAMESPACE::close(fd), Succeeds()); } +#ifdef SYS_epoll_create TEST(LlvmLibcEpollCreateTest, Fails) { ASSERT_THAT(LIBC_NAMESPACE::epoll_create(0), Fails(EINVAL)); } +#endif diff --git a/libc/test/src/unistd/CMakeLists.txt b/libc/test/src/unistd/CMakeLists.txt index 332455b..f829265 100644 --- a/libc/test/src/unistd/CMakeLists.txt +++ b/libc/test/src/unistd/CMakeLists.txt @@ -379,6 +379,16 @@ add_libc_unittest( ) add_libc_unittest( + gettid_test + SUITE + libc_unistd_unittests + SRCS + gettid_test.cpp + DEPENDS + libc.src.unistd.gettid +) + +add_libc_unittest( getppid_test SUITE libc_unistd_unittests diff --git a/libc/test/src/unistd/gettid_test.cpp b/libc/test/src/unistd/gettid_test.cpp new file mode 100644 index 0000000..c2330f4 --- /dev/null +++ b/libc/test/src/unistd/gettid_test.cpp @@ -0,0 +1,15 @@ +//===-- Unittests for gettid ----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/unistd/gettid.h" +#include "test/UnitTest/Test.h" + +TEST(LlvmLibcGetTidTest, SmokeTest) { + // gettid always succeeds. So, we just call it as a smoke test. + ASSERT_GT(LIBC_NAMESPACE::gettid(), 0); +} diff --git a/libc/utils/gpu/server/rpc_server.cpp b/libc/utils/gpu/server/rpc_server.cpp index 119539e..ed23d22 100644 --- a/libc/utils/gpu/server/rpc_server.cpp +++ b/libc/utils/gpu/server/rpc_server.cpp @@ -108,6 +108,10 @@ void handle_printf(rpc::Server::Port &port) { if (cur_section.has_conv && cur_section.conv_name == 's' && cur_section.conv_val_ptr) { strs_to_copy[lane].emplace_back(cur_section.conv_val_ptr); + // Get the minimum size of the string in the case of padding. + char c = '\0'; + cur_section.conv_val_ptr = &c; + convert(&writer, cur_section); } else if (cur_section.has_conv) { // Ignore conversion errors for the first pass. convert(&writer, cur_section); diff --git a/libcxx/docs/ImplementationDefinedBehavior.rst b/libcxx/docs/ImplementationDefinedBehavior.rst index 3000bb7..f0ef733 100644 --- a/libcxx/docs/ImplementationDefinedBehavior.rst +++ b/libcxx/docs/ImplementationDefinedBehavior.rst @@ -51,6 +51,17 @@ Libc++ determines that a stream is Unicode-capable terminal by: <http://eel.is/c++draft/print.fun#7>`_. This function is used for other ``std::print`` overloads that don't take an ``ostream&`` argument. +`[sf.cmath] <https://wg21.link/sf.cmath>`_ Mathematical Special Functions: Large indices +---------------------------------------------------------------------------------------- + +Most functions within the Mathematical Special Functions section contain integral indices. +The Standard specifies the result for larger indices as implementation-defined. +Libc++ pursuits reasonable results by choosing the same formulas as for indices below that threshold. +E.g. + +- ``std::hermite(unsigned n, T x)`` for ``n >= 128`` + + Listed in the index of implementation-defined behavior ====================================================== diff --git a/libcxx/docs/Status/Cxx17.rst b/libcxx/docs/Status/Cxx17.rst index d4426af..ad4f857 100644 --- a/libcxx/docs/Status/Cxx17.rst +++ b/libcxx/docs/Status/Cxx17.rst @@ -41,6 +41,7 @@ Paper Status .. note:: .. [#note-P0067] P0067: ``std::(to|from)_chars`` for integrals has been available since version 7.0. ``std::to_chars`` for ``float`` and ``double`` since version 14.0 ``std::to_chars`` for ``long double`` uses the implementation for ``double``. + .. [#note-P0226] P0226: Progress is tracked `here <https://https://libcxx.llvm.org/Status/SpecialMath.html>`_. .. [#note-P0607] P0607: The parts of P0607 that are not done are the ``<regex>`` bits. .. [#note-P0154] P0154: The required macros are only implemented as of clang 19. .. [#note-P0452] P0452: The changes to ``std::transform_inclusive_scan`` and ``std::transform_exclusive_scan`` have not yet been implemented. diff --git a/libcxx/docs/Status/Cxx17Papers.csv b/libcxx/docs/Status/Cxx17Papers.csv index 2e560cf..6c657d5 100644 --- a/libcxx/docs/Status/Cxx17Papers.csv +++ b/libcxx/docs/Status/Cxx17Papers.csv @@ -26,7 +26,7 @@ "`P0013R1 <https://wg21.link/p0013r1>`__","LWG","Logical type traits rev 2","Kona","|Complete|","3.8" "","","","","","" "`P0024R2 <https://wg21.link/P0024R2>`__","LWG","The Parallelism TS Should be Standardized","Jacksonville","|Partial|","" -"`P0226R1 <https://wg21.link/P0226R1>`__","LWG","Mathematical Special Functions for C++17","Jacksonville","","" +"`P0226R1 <https://wg21.link/P0226R1>`__","LWG","Mathematical Special Functions for C++17","Jacksonville","|In Progress| [#note-P0226]_","" "`P0220R1 <https://wg21.link/P0220R1>`__","LWG","Adopt Library Fundamentals V1 TS Components for C++17","Jacksonville","|Complete|","16.0" "`P0218R1 <https://wg21.link/P0218R1>`__","LWG","Adopt the File System TS for C++17","Jacksonville","|Complete|","7.0" "`P0033R1 <https://wg21.link/P0033R1>`__","LWG","Re-enabling shared_from_this","Jacksonville","|Complete|","3.9" diff --git a/libcxx/docs/Status/FormatPaper.csv b/libcxx/docs/Status/FormatPaper.csv index f29f1f7..fb96b1f 100644 --- a/libcxx/docs/Status/FormatPaper.csv +++ b/libcxx/docs/Status/FormatPaper.csv @@ -7,7 +7,7 @@ Section,Description,Dependencies,Assignee,Status,First released version `[time.syn] <https://wg21.link/time.syn>`_,"Formatter ``chrono::gps_time<Duration>``",A ``<chrono>`` implementation,Mark de Wever,,, `[time.syn] <https://wg21.link/time.syn>`_,"Formatter ``chrono::file_time<Duration>``",,Mark de Wever,|Complete|,17.0 `[time.syn] <https://wg21.link/time.syn>`_,"Formatter ``chrono::local_time<Duration>``",,Mark de Wever,|Complete|,17.0 -`[time.syn] <https://wg21.link/time.syn>`_,"Formatter ``chrono::local-time-format-t<Duration>``",A ``<chrono>`` implementation,Mark de Wever,,, +`[time.syn] <https://wg21.link/time.syn>`_,"Formatter ``chrono::local-time-format-t<Duration>``",,,|Nothing To Do|, `[time.syn] <https://wg21.link/time.syn>`_,"Formatter ``chrono::day``",,Mark de Wever,|Complete|,16.0 `[time.syn] <https://wg21.link/time.syn>`_,"Formatter ``chrono::month``",,Mark de Wever,|Complete|,16.0 `[time.syn] <https://wg21.link/time.syn>`_,"Formatter ``chrono::year``",,Mark de Wever,|Complete|,16.0 @@ -26,7 +26,7 @@ Section,Description,Dependencies,Assignee,Status,First released version `[time.syn] <https://wg21.link/time.syn>`_,"Formatter ``chrono::hh_mm_ss<duration<Rep, Period>>``",,Mark de Wever,|Complete|,17.0 `[time.syn] <https://wg21.link/time.syn>`_,"Formatter ``chrono::sys_info``",,Mark de Wever,|Complete|,19.0 `[time.syn] <https://wg21.link/time.syn>`_,"Formatter ``chrono::local_info``",,Mark de Wever,|Complete|,19.0 -`[time.syn] <https://wg21.link/time.syn>`_,"Formatter ``chrono::zoned_time<Duration, TimeZonePtr>``",A ``<chrono>`` implementation,Mark de Wever,, +`[time.syn] <https://wg21.link/time.syn>`_,"Formatter ``chrono::zoned_time<Duration, TimeZonePtr>``",,Mark de Wever,|Complete|,19.0 "`P2693R1 <https://wg21.link/P2693R1>`__","Formatting ``thread::id`` and ``stacktrace``" `[thread.thread.id] <https://wg21.link/thread.thread.id>`_,"Formatting ``thread::id``",,Mark de Wever,|Complete|,17.0 diff --git a/libcxx/docs/Status/SpecialMath.rst b/libcxx/docs/Status/SpecialMath.rst new file mode 100644 index 0000000..fcc9f03 --- /dev/null +++ b/libcxx/docs/Status/SpecialMath.rst @@ -0,0 +1,35 @@ +.. special-math-status: + +====================================================== +libc++ Mathematical Special Functions Status (P0226R1) +====================================================== + +.. include:: ../Helpers/Styles.rst + +.. contents:: + :local: + +Overview +======== + +This document contains the status of the C++17 mathematical special functions implementation in libc++. +It is used to track both the status of the sub-projects of the effort and who is assigned to these sub-projects. +This avoids duplicating effort. + +If you are interested in contributing to this effort, please send a message +to the #libcxx channel in the LLVM discord. Please *do not* start working +on any items below that has already been assigned to someone else. + +Sub-projects in the Implementation Effort +========================================= + +.. csv-table:: + :file: SpecialMathProjects.csv + :header-rows: 1 + :widths: auto + +Paper and Issue Status +====================== + +The underlying paper is `Mathematical Special Functions for C++17 (P0226) <https://wg21.link/P0226>`_ and is included in C++17. +Implementation is *In Progress*. diff --git a/libcxx/docs/Status/SpecialMathProjects.csv b/libcxx/docs/Status/SpecialMathProjects.csv new file mode 100644 index 0000000..f964e79 --- /dev/null +++ b/libcxx/docs/Status/SpecialMathProjects.csv @@ -0,0 +1,22 @@ +Section,Description,Assignee,Complete +| `[sf.cmath.assoc.laguerre] <https://wg21.link/sf.cmath.assoc.laguerre>`_, std::assoc_laguerre, None, |Not Started| +| `[sf.cmath.assoc.legendre] <https://wg21.link/sf.cmath.assoc.legendre>`_, std::assoc_legendre, None, |Not Started| +| `[sf.cmath.beta] <https://wg21.link/sf.cmath.beta>`_, std::beta, None, |Not Started| +| `[sf.cmath.comp.ellint.1] <https://wg21.link/sf.cmath.comp.ellint.1>`_, std::comp_ellint_1, None, |Not Started| +| `[sf.cmath.comp.ellint.2] <https://wg21.link/sf.cmath.comp.ellint.2>`_, std::comp_ellint_2, None, |Not Started| +| `[sf.cmath.comp.ellint.3] <https://wg21.link/sf.cmath.comp.ellint.3>`_, std::comp_ellint_3, None, |Not Started| +| `[sf.cmath.cyl.bessel.i] <https://wg21.link/sf.cmath.cyl.bessel.i>`_, std::cyl_bessel_i, None, |Not Started| +| `[sf.cmath.cyl.bessel.j] <https://wg21.link/sf.cmath.cyl.bessel.j>`_, std::cyl_bessel_j, None, |Not Started| +| `[sf.cmath.cyl.bessel.k] <https://wg21.link/sf.cmath.cyl.bessel.k>`_, std::cyl_bessel_k, None, |Not Started| +| `[sf.cmath.cyl.neumann] <https://wg21.link/sf.cmath.cyl.neumann>`_, std::cyl_neumann, None, |Not Started| +| `[sf.cmath.ellint.1] <https://wg21.link/sf.cmath.ellint.1>`_, std::ellint_1, None, |Not Started| +| `[sf.cmath.ellint.2] <https://wg21.link/sf.cmath.ellint.2>`_, std::ellint_2, None, |Not Started| +| `[sf.cmath.ellint.3] <https://wg21.link/sf.cmath.ellint.3>`_, std::ellint_3, None, |Not Started| +| `[sf.cmath.expint] <https://wg21.link/sf.cmath.expint>`_, std::expint, None, |Not Started| +| `[sf.cmath.hermite] <https://wg21.link/sf.cmath.hermite>`_, std::hermite, Paul Xi Cao, |Complete| +| `[sf.cmath.laguerre] <https://wg21.link/sf.cmath.laguerre>`_, std::laguerre, None, |Not Started| +| `[sf.cmath.legendre] <https://wg21.link/sf.cmath.legendre>`_, std::legendre, None, |Not Started| +| `[sf.cmath.riemann.zeta] <https://wg21.link/sf.cmath.riemann.zeta>`_, std::riemann_zeta, None, |Not Started| +| `[sf.cmath.sph.bessel] <https://wg21.link/sf.cmath.sph.bessel>`_, std::sph_bessel, None, |Not Started| +| `[sf.cmath.sph.legendre] <https://wg21.link/sf.cmath.sph.legendre>`_, std::sph_legendre, None, |Not Started| +| `[sf.cmath.sph.neumann] <https://wg21.link/sf.cmath.sph.neumann>`_, std::sph_neumann, None, |Not Started| diff --git a/libcxx/docs/index.rst b/libcxx/docs/index.rst index 69a9e57..4bca3cc 100644 --- a/libcxx/docs/index.rst +++ b/libcxx/docs/index.rst @@ -53,6 +53,7 @@ Getting Started with libc++ Status/PSTL Status/Ranges Status/Spaceship + Status/SpecialMath Status/Zip diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt index 1a4d9c7..3257927 100644 --- a/libcxx/include/CMakeLists.txt +++ b/libcxx/include/CMakeLists.txt @@ -509,6 +509,7 @@ set(files __math/remainder.h __math/roots.h __math/rounding_functions.h + __math/special_functions.h __math/traits.h __math/trigonometric_functions.h __mbstate_t.h diff --git a/libcxx/include/__chrono/convert_to_tm.h b/libcxx/include/__chrono/convert_to_tm.h index 881a497..3a51019 100644 --- a/libcxx/include/__chrono/convert_to_tm.h +++ b/libcxx/include/__chrono/convert_to_tm.h @@ -29,11 +29,13 @@ #include <__chrono/year_month.h> #include <__chrono/year_month_day.h> #include <__chrono/year_month_weekday.h> +#include <__chrono/zoned_time.h> #include <__concepts/same_as.h> #include <__config> #include <__format/format_error.h> #include <__memory/addressof.h> #include <__type_traits/is_convertible.h> +#include <__type_traits/is_specialization.h> #include <cstdint> #include <ctime> #include <limits> @@ -178,7 +180,13 @@ _LIBCPP_HIDE_FROM_ABI _Tm __convert_to_tm(const _ChronoT& __value) { // Has no time information. } else if constexpr (same_as<_ChronoT, chrono::local_info>) { // Has no time information. -# endif +# if !defined(_LIBCPP_HAS_NO_TIME_ZONE_DATABASE) && !defined(_LIBCPP_HAS_NO_FILESYSTEM) && \ + !defined(_LIBCPP_HAS_NO_LOCALIZATION) + } else if constexpr (__is_specialization_v<_ChronoT, chrono::zoned_time>) { + return std::__convert_to_tm<_Tm>( + chrono::sys_time<typename _ChronoT::duration>{__value.get_local_time().time_since_epoch()}); +# endif +# endif // !defined(_LIBCPP_HAS_NO_EXPERIMENTAL_TZDB) } else static_assert(sizeof(_ChronoT) == 0, "Add the missing type specialization"); diff --git a/libcxx/include/__chrono/formatter.h b/libcxx/include/__chrono/formatter.h index 9a77316..449c415 100644 --- a/libcxx/include/__chrono/formatter.h +++ b/libcxx/include/__chrono/formatter.h @@ -33,6 +33,7 @@ #include <__chrono/year_month.h> #include <__chrono/year_month_day.h> #include <__chrono/year_month_weekday.h> +#include <__chrono/zoned_time.h> #include <__concepts/arithmetic.h> #include <__concepts/same_as.h> #include <__config> @@ -44,6 +45,7 @@ #include <__format/parser_std_format_spec.h> #include <__format/write_escaped.h> #include <__memory/addressof.h> +#include <__type_traits/is_specialization.h> #include <cmath> #include <ctime> #include <limits> @@ -137,10 +139,24 @@ __format_sub_seconds(basic_stringstream<_CharT>& __sstr, const chrono::hh_mm_ss< __value.fractional_width); } +# if !defined(_LIBCPP_HAS_NO_EXPERIMENTAL_TZDB) && !defined(_LIBCPP_HAS_NO_TIME_ZONE_DATABASE) && \ + !defined(_LIBCPP_HAS_NO_FILESYSTEM) && !defined(_LIBCPP_HAS_NO_LOCALIZATION) +template <class _CharT, class _Duration, class _TimeZonePtr> +_LIBCPP_HIDE_FROM_ABI void +__format_sub_seconds(basic_stringstream<_CharT>& __sstr, const chrono::zoned_time<_Duration, _TimeZonePtr>& __value) { + __formatter::__format_sub_seconds(__sstr, __value.get_local_time().time_since_epoch()); +} +# endif + template <class _Tp> consteval bool __use_fraction() { if constexpr (__is_time_point<_Tp>) return chrono::hh_mm_ss<typename _Tp::duration>::fractional_width; +# if !defined(_LIBCPP_HAS_NO_EXPERIMENTAL_TZDB) && !defined(_LIBCPP_HAS_NO_TIME_ZONE_DATABASE) && \ + !defined(_LIBCPP_HAS_NO_FILESYSTEM) && !defined(_LIBCPP_HAS_NO_LOCALIZATION) + else if constexpr (__is_specialization_v<_Tp, chrono::zoned_time>) + return chrono::hh_mm_ss<typename _Tp::duration>::fractional_width; +# endif else if constexpr (chrono::__is_duration<_Tp>::value) return chrono::hh_mm_ss<_Tp>::fractional_width; else if constexpr (__is_hh_mm_ss<_Tp>) @@ -212,8 +228,13 @@ _LIBCPP_HIDE_FROM_ABI __time_zone __convert_to_time_zone([[maybe_unused]] const # if !defined(_LIBCPP_HAS_NO_EXPERIMENTAL_TZDB) if constexpr (same_as<_Tp, chrono::sys_info>) return {__value.abbrev, __value.offset}; +# if !defined(_LIBCPP_HAS_NO_TIME_ZONE_DATABASE) && !defined(_LIBCPP_HAS_NO_FILESYSTEM) && \ + !defined(_LIBCPP_HAS_NO_LOCALIZATION) + else if constexpr (__is_specialization_v<_Tp, chrono::zoned_time>) + return __formatter::__convert_to_time_zone(__value.get_info()); +# endif else -# endif +# endif // !defined(_LIBCPP_HAS_NO_EXPERIMENTAL_TZDB) return {"UTC", chrono::seconds{0}}; } @@ -426,7 +447,12 @@ _LIBCPP_HIDE_FROM_ABI constexpr bool __weekday_ok(const _Tp& __value) { return true; else if constexpr (same_as<_Tp, chrono::local_info>) return true; -# endif +# if !defined(_LIBCPP_HAS_NO_TIME_ZONE_DATABASE) && !defined(_LIBCPP_HAS_NO_FILESYSTEM) && \ + !defined(_LIBCPP_HAS_NO_LOCALIZATION) + else if constexpr (__is_specialization_v<_Tp, chrono::zoned_time>) + return true; +# endif +# endif // !defined(_LIBCPP_HAS_NO_EXPERIMENTAL_TZDB) else static_assert(sizeof(_Tp) == 0, "Add the missing type specialization"); } @@ -472,7 +498,12 @@ _LIBCPP_HIDE_FROM_ABI constexpr bool __weekday_name_ok(const _Tp& __value) { return true; else if constexpr (same_as<_Tp, chrono::local_info>) return true; -# endif +# if !defined(_LIBCPP_HAS_NO_TIME_ZONE_DATABASE) && !defined(_LIBCPP_HAS_NO_FILESYSTEM) && \ + !defined(_LIBCPP_HAS_NO_LOCALIZATION) + else if constexpr (__is_specialization_v<_Tp, chrono::zoned_time>) + return true; +# endif +# endif // !defined(_LIBCPP_HAS_NO_EXPERIMENTAL_TZDB) else static_assert(sizeof(_Tp) == 0, "Add the missing type specialization"); } @@ -518,7 +549,12 @@ _LIBCPP_HIDE_FROM_ABI constexpr bool __date_ok(const _Tp& __value) { return true; else if constexpr (same_as<_Tp, chrono::local_info>) return true; -# endif +# if !defined(_LIBCPP_HAS_NO_TIME_ZONE_DATABASE) && !defined(_LIBCPP_HAS_NO_FILESYSTEM) && \ + !defined(_LIBCPP_HAS_NO_LOCALIZATION) + else if constexpr (__is_specialization_v<_Tp, chrono::zoned_time>) + return true; +# endif +# endif // !defined(_LIBCPP_HAS_NO_EXPERIMENTAL_TZDB) else static_assert(sizeof(_Tp) == 0, "Add the missing type specialization"); } @@ -564,7 +600,12 @@ _LIBCPP_HIDE_FROM_ABI constexpr bool __month_name_ok(const _Tp& __value) { return true; else if constexpr (same_as<_Tp, chrono::local_info>) return true; -# endif +# if !defined(_LIBCPP_HAS_NO_TIME_ZONE_DATABASE) && !defined(_LIBCPP_HAS_NO_FILESYSTEM) && \ + !defined(_LIBCPP_HAS_NO_LOCALIZATION) + else if constexpr (__is_specialization_v<_Tp, chrono::zoned_time>) + return true; +# endif +# endif // !defined(_LIBCPP_HAS_NO_EXPERIMENTAL_TZDB) else static_assert(sizeof(_Tp) == 0, "Add the missing type specialization"); } @@ -924,7 +965,23 @@ public: return _Base::__parse(__ctx, __format_spec::__fields_chrono, __format_spec::__flags{}); } }; -# endif // !defined(_LIBCPP_HAS_NO_EXPERIMENTAL_TZDB) +# if !defined(_LIBCPP_HAS_NO_TIME_ZONE_DATABASE) && !defined(_LIBCPP_HAS_NO_FILESYSTEM) && \ + !defined(_LIBCPP_HAS_NO_LOCALIZATION) +// Note due to how libc++'s formatters are implemented there is no need to add +// the exposition only local-time-format-t abstraction. +template <class _Duration, class _TimeZonePtr, __fmt_char_type _CharT> +struct formatter<chrono::zoned_time<_Duration, _TimeZonePtr>, _CharT> : public __formatter_chrono<_CharT> { +public: + using _Base = __formatter_chrono<_CharT>; + + template <class _ParseContext> + _LIBCPP_HIDE_FROM_ABI constexpr typename _ParseContext::iterator parse(_ParseContext& __ctx) { + return _Base::__parse(__ctx, __format_spec::__fields_chrono, __format_spec::__flags::__clock); + } +}; +# endif // !defined(_LIBCPP_HAS_NO_TIME_ZONE_DATABASE) && !defined(_LIBCPP_HAS_NO_FILESYSTEM) && + // !defined(_LIBCPP_HAS_NO_LOCALIZATION) +# endif // !defined(_LIBCPP_HAS_NO_EXPERIMENTAL_TZDB) #endif // if _LIBCPP_STD_VER >= 20 diff --git a/libcxx/include/__chrono/ostream.h b/libcxx/include/__chrono/ostream.h index bb0341b..e6c4325 100644 --- a/libcxx/include/__chrono/ostream.h +++ b/libcxx/include/__chrono/ostream.h @@ -27,6 +27,7 @@ #include <__chrono/year_month.h> #include <__chrono/year_month_day.h> #include <__chrono/year_month_weekday.h> +#include <__chrono/zoned_time.h> #include <__concepts/same_as.h> #include <__config> #include <__format/format_functions.h> @@ -302,6 +303,14 @@ operator<<(basic_ostream<_CharT, _Traits>& __os, const local_info& __info) { _LIBCPP_STATICALLY_WIDEN(_CharT, "{}: {{{}, {}}}"), __result(), __info.first, __info.second); } +# if !defined(_LIBCPP_HAS_NO_TIME_ZONE_DATABASE) && !defined(_LIBCPP_HAS_NO_FILESYSTEM) && \ + !defined(_LIBCPP_HAS_NO_LOCALIZATION) +template <class _CharT, class _Traits, class _Duration, class _TimeZonePtr> +_LIBCPP_HIDE_FROM_ABI basic_ostream<_CharT, _Traits>& +operator<<(basic_ostream<_CharT, _Traits>& __os, const zoned_time<_Duration, _TimeZonePtr>& __tp) { + return __os << std::format(__os.getloc(), _LIBCPP_STATICALLY_WIDEN(_CharT, "{:L%F %T %Z}"), __tp); +} +# endif # endif // !defined(_LIBCPP_HAS_NO_EXPERIMENTAL_TZDB) } // namespace chrono diff --git a/libcxx/include/__configuration/abi.h b/libcxx/include/__configuration/abi.h index cbde788..710548d9 100644 --- a/libcxx/include/__configuration/abi.h +++ b/libcxx/include/__configuration/abi.h @@ -98,6 +98,10 @@ // and WCHAR_MAX. This ABI setting determines whether we should instead track whether the fill // value has been initialized using a separate boolean, which changes the ABI. # define _LIBCPP_ABI_IOS_ALLOW_ARBITRARY_FILL_VALUE +// Make a std::pair of trivially copyable types trivially copyable. +// While this technically doesn't change the layout of pair itself, other types may decide to programatically change +// their representation based on whether something is trivially copyable. +# define _LIBCPP_ABI_TRIVIALLY_COPYABLE_PAIR #elif _LIBCPP_ABI_VERSION == 1 # if !(defined(_LIBCPP_OBJECT_FORMAT_COFF) || defined(_LIBCPP_OBJECT_FORMAT_XCOFF)) // Enable compiling copies of now inline methods into the dylib to support diff --git a/libcxx/include/__math/special_functions.h b/libcxx/include/__math/special_functions.h new file mode 100644 index 0000000..0b1c753 --- /dev/null +++ b/libcxx/include/__math/special_functions.h @@ -0,0 +1,84 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___MATH_SPECIAL_FUNCTIONS_H +#define _LIBCPP___MATH_SPECIAL_FUNCTIONS_H + +#include <__config> +#include <__math/copysign.h> +#include <__math/traits.h> +#include <__type_traits/enable_if.h> +#include <__type_traits/is_integral.h> +#include <limits> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +#if _LIBCPP_STD_VER >= 17 + +template <class _Real> +_LIBCPP_HIDE_FROM_ABI _Real __hermite(unsigned __n, _Real __x) { + // The Hermite polynomial H_n(x). + // The implementation is based on the recurrence formula: H_{n+1}(x) = 2x H_n(x) - 2n H_{n-1}. + // Press, William H., et al. Numerical recipes 3rd edition: The art of scientific computing. + // Cambridge university press, 2007, p. 183. + + // NOLINTBEGIN(readability-identifier-naming) + if (__math::isnan(__x)) + return __x; + + _Real __H_0{1}; + if (__n == 0) + return __H_0; + + _Real __H_n_prev = __H_0; + _Real __H_n = 2 * __x; + for (unsigned __i = 1; __i < __n; ++__i) { + _Real __H_n_next = 2 * (__x * __H_n - __i * __H_n_prev); + __H_n_prev = __H_n; + __H_n = __H_n_next; + } + + if (!__math::isfinite(__H_n)) { + // Overflow occured. Two possible cases: + // n is odd: return infinity of the same sign as x. + // n is even: return +Inf + _Real __inf = std::numeric_limits<_Real>::infinity(); + return (__n & 1) ? __math::copysign(__inf, __x) : __inf; + } + return __H_n; + // NOLINTEND(readability-identifier-naming) +} + +inline _LIBCPP_HIDE_FROM_ABI double hermite(unsigned __n, double __x) { return std::__hermite(__n, __x); } + +inline _LIBCPP_HIDE_FROM_ABI float hermite(unsigned __n, float __x) { + // use double internally -- float is too prone to overflow! + return static_cast<float>(std::hermite(__n, static_cast<double>(__x))); +} + +inline _LIBCPP_HIDE_FROM_ABI long double hermite(unsigned __n, long double __x) { return std::__hermite(__n, __x); } + +inline _LIBCPP_HIDE_FROM_ABI float hermitef(unsigned __n, float __x) { return std::hermite(__n, __x); } + +inline _LIBCPP_HIDE_FROM_ABI long double hermitel(unsigned __n, long double __x) { return std::hermite(__n, __x); } + +template <class _Integer, std::enable_if_t<std::is_integral_v<_Integer>, int> = 0> +_LIBCPP_HIDE_FROM_ABI double hermite(unsigned __n, _Integer __x) { + return std::hermite(__n, static_cast<double>(__x)); +} + +#endif // _LIBCPP_STD_VER >= 17 + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___MATH_SPECIAL_FUNCTIONS_H diff --git a/libcxx/include/__type_traits/datasizeof.h b/libcxx/include/__type_traits/datasizeof.h index 35c1292..a27baf6 100644 --- a/libcxx/include/__type_traits/datasizeof.h +++ b/libcxx/include/__type_traits/datasizeof.h @@ -54,6 +54,7 @@ struct _FirstPaddingByte<_Tp, true> { // the use as an extension. _LIBCPP_DIAGNOSTIC_PUSH _LIBCPP_CLANG_DIAGNOSTIC_IGNORED("-Winvalid-offsetof") +_LIBCPP_GCC_DIAGNOSTIC_IGNORED("-Winvalid-offsetof") template <class _Tp> inline const size_t __datasizeof_v = offsetof(_FirstPaddingByte<_Tp>, __first_padding_byte_); _LIBCPP_DIAGNOSTIC_POP diff --git a/libcxx/include/__utility/pair.h b/libcxx/include/__utility/pair.h index 0afbebc..c0002b7 100644 --- a/libcxx/include/__utility/pair.h +++ b/libcxx/include/__utility/pair.h @@ -32,6 +32,7 @@ #include <__type_traits/is_implicitly_default_constructible.h> #include <__type_traits/is_nothrow_assignable.h> #include <__type_traits/is_nothrow_constructible.h> +#include <__type_traits/is_reference.h> #include <__type_traits/is_same.h> #include <__type_traits/is_swappable.h> #include <__type_traits/is_trivially_relocatable.h> @@ -80,6 +81,38 @@ struct _LIBCPP_TEMPLATE_VIS pair _LIBCPP_HIDE_FROM_ABI pair(pair const&) = default; _LIBCPP_HIDE_FROM_ABI pair(pair&&) = default; + // When we are requested for pair to be trivially copyable by the ABI macro, we use defaulted members + // if it is both legal to do it (i.e. no references) and we have a way to actually implement it, which requires + // the __enable_if__ attribute before C++20. +#ifdef _LIBCPP_ABI_TRIVIALLY_COPYABLE_PAIR + // FIXME: This should really just be a static constexpr variable. It's in a struct to avoid gdb printing the value + // when printing a pair + struct __has_defaulted_members { + static const bool value = !is_reference<first_type>::value && !is_reference<second_type>::value; + }; +# if _LIBCPP_STD_VER >= 20 + _LIBCPP_HIDE_FROM_ABI constexpr pair& operator=(const pair&) + requires __has_defaulted_members::value + = default; + + _LIBCPP_HIDE_FROM_ABI constexpr pair& operator=(pair&&) + requires __has_defaulted_members::value + = default; +# elif __has_attribute(__enable_if__) + _LIBCPP_HIDE_FROM_ABI pair& operator=(const pair&) + __attribute__((__enable_if__(__has_defaulted_members::value, ""))) = default; + + _LIBCPP_HIDE_FROM_ABI pair& operator=(pair&&) + __attribute__((__enable_if__(__has_defaulted_members::value, ""))) = default; +# else +# error "_LIBCPP_ABI_TRIVIALLY_COPYABLE_PAIR isn't supported with this compiler" +# endif +#else + struct __has_defaulted_members { + static const bool value = false; + }; +#endif // defined(_LIBCPP_ABI_TRIVIALLY_COPYABLE_PAIR) && __has_attribute(__enable_if__) + #ifdef _LIBCPP_CXX03_LANG _LIBCPP_HIDE_FROM_ABI pair() : first(), second() {} @@ -225,7 +258,8 @@ struct _LIBCPP_TEMPLATE_VIS pair typename __make_tuple_indices<sizeof...(_Args2) >::type()) {} _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair& - operator=(__conditional_t<is_copy_assignable<first_type>::value && is_copy_assignable<second_type>::value, + operator=(__conditional_t<!__has_defaulted_members::value && is_copy_assignable<first_type>::value && + is_copy_assignable<second_type>::value, pair, __nat> const& __p) noexcept(is_nothrow_copy_assignable<first_type>::value && is_nothrow_copy_assignable<second_type>::value) { @@ -234,10 +268,12 @@ struct _LIBCPP_TEMPLATE_VIS pair return *this; } - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair& operator=( - __conditional_t<is_move_assignable<first_type>::value && is_move_assignable<second_type>::value, pair, __nat>&& - __p) noexcept(is_nothrow_move_assignable<first_type>::value && - is_nothrow_move_assignable<second_type>::value) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair& + operator=(__conditional_t<!__has_defaulted_members::value && is_move_assignable<first_type>::value && + is_move_assignable<second_type>::value, + pair, + __nat>&& __p) noexcept(is_nothrow_move_assignable<first_type>::value && + is_nothrow_move_assignable<second_type>::value) { first = std::forward<first_type>(__p.first); second = std::forward<second_type>(__p.second); return *this; diff --git a/libcxx/include/chrono b/libcxx/include/chrono index 7f25c76..990c415 100644 --- a/libcxx/include/chrono +++ b/libcxx/include/chrono @@ -799,6 +799,11 @@ template<class Duration1, class Duration2, class TimeZonePtr> bool operator==(const zoned_time<Duration1, TimeZonePtr>& x, const zoned_time<Duration2, TimeZonePtr>& y); +template<class charT, class traits, class Duration, class TimeZonePtr> // C++20 + basic_ostream<charT, traits>& + operator<<(basic_ostream<charT, traits>& os, + const zoned_time<Duration, TimeZonePtr>& t); + // [time.zone.leap], leap second support class leap_second { // C++20 public: @@ -881,6 +886,8 @@ namespace std { struct formatter<chrono::hh_mm_ss<duration<Rep, Period>>, charT>; // C++20 template<class charT> struct formatter<chrono::sys_info, charT>; // C++20 template<class charT> struct formatter<chrono::local_info, charT>; // C++20 + template<class Duration, class TimeZonePtr, class charT> // C++20 + struct formatter<chrono::zoned_time<Duration, TimeZonePtr>, charT>; } // namespace std namespace chrono { diff --git a/libcxx/include/cmath b/libcxx/include/cmath index 7a87e35..3c22604 100644 --- a/libcxx/include/cmath +++ b/libcxx/include/cmath @@ -204,6 +204,14 @@ floating_point fmin (arithmetic x, arithmetic y); float fminf(float x, float y); long double fminl(long double x, long double y); +double hermite(unsigned n, double x); // C++17 +float hermite(unsigned n, float x); // C++17 +long double hermite(unsigned n, long double x); // C++17 +float hermitef(unsigned n, float x); // C++17 +long double hermitel(unsigned n, long double x); // C++17 +template <class Integer> +double hermite(unsigned n, Integer x); // C++17 + floating_point hypot (arithmetic x, arithmetic y); float hypotf(float x, float y); long double hypotl(long double x, long double y); @@ -315,6 +323,7 @@ constexpr long double lerp(long double a, long double b, long double t) noexcept #include <limits> #include <version> +#include <__math/special_functions.h> #include <math.h> #ifndef _LIBCPP_MATH_H diff --git a/libcxx/include/module.modulemap b/libcxx/include/module.modulemap index 3443fbc..13d0dce 100644 --- a/libcxx/include/module.modulemap +++ b/libcxx/include/module.modulemap @@ -1485,6 +1485,7 @@ module std_private_math_modulo [system] { header "__mat module std_private_math_remainder [system] { header "__math/remainder.h" } module std_private_math_roots [system] { header "__math/roots.h" } module std_private_math_rounding_functions [system] { header "__math/rounding_functions.h" } +module std_private_math_special_functions [system] { header "__math/special_functions.h" } module std_private_math_traits [system] { header "__math/traits.h" } module std_private_math_trigonometric_functions [system] { header "__math/trigonometric_functions.h" } diff --git a/libcxx/include/vector b/libcxx/include/vector index aaf51d1..4598004 100644 --- a/libcxx/include/vector +++ b/libcxx/include/vector @@ -1443,7 +1443,11 @@ _LIBCPP_CONSTEXPR_SINCE_CXX20 void vector<_Tp, _Allocator>::shrink_to_fit() _NOE #endif // _LIBCPP_HAS_NO_EXCEPTIONS allocator_type& __a = this->__alloc(); __split_buffer<value_type, allocator_type&> __v(size(), size(), __a); - __swap_out_circular_buffer(__v); + // The Standard mandates shrink_to_fit() does not increase the capacity. + // With equal capacity keep the existing buffer. This avoids extra work + // due to swapping the elements. + if (__v.capacity() < capacity()) + __swap_out_circular_buffer(__v); #ifndef _LIBCPP_HAS_NO_EXCEPTIONS } catch (...) { } diff --git a/libcxx/modules/std/cmath.inc b/libcxx/modules/std/cmath.inc index a463c1e..fe8ac77 100644 --- a/libcxx/modules/std/cmath.inc +++ b/libcxx/modules/std/cmath.inc @@ -334,12 +334,14 @@ export namespace std { using std::expint; using std::expintf; using std::expintl; +#endif // [sf.cmath.hermite], Hermite polynomials using std::hermite; using std::hermitef; using std::hermitel; +#if 0 // [sf.cmath.laguerre], Laguerre polynomials using std::laguerre; using std::laguerref; diff --git a/libcxx/test/libcxx/utilities/utility/pairs/pairs.pair/abi.trivial_copy_move.pass.cpp b/libcxx/test/libcxx/utilities/utility/pairs/pairs.pair/abi.trivial_copy_move.pass.cpp index 3ec60c0..5481ba4 100644 --- a/libcxx/test/libcxx/utilities/utility/pairs/pairs.pair/abi.trivial_copy_move.pass.cpp +++ b/libcxx/test/libcxx/utilities/utility/pairs/pairs.pair/abi.trivial_copy_move.pass.cpp @@ -162,8 +162,13 @@ void test_trivial() static_assert(!std::is_trivially_copy_constructible<P>::value, ""); static_assert(!std::is_trivially_move_constructible<P>::value, ""); #endif // TEST_STD_VER >= 11 +#ifndef _LIBCPP_ABI_TRIVIALLY_COPYABLE_PAIR static_assert(!std::is_trivially_copy_assignable<P>::value, ""); static_assert(!std::is_trivially_move_assignable<P>::value, ""); +#else + static_assert(std::is_trivially_copy_assignable<P>::value, ""); + static_assert(std::is_trivially_move_assignable<P>::value, ""); +#endif static_assert(std::is_trivially_destructible<P>::value, ""); } } diff --git a/libcxx/test/libcxx/utilities/utility/pairs/pairs.pair/abi.trivially_copyable.compile.pass.cpp b/libcxx/test/libcxx/utilities/utility/pairs/pairs.pair/abi.trivially_copyable.compile.pass.cpp index 1132b3e..c5f9c8d 100644 --- a/libcxx/test/libcxx/utilities/utility/pairs/pairs.pair/abi.trivially_copyable.compile.pass.cpp +++ b/libcxx/test/libcxx/utilities/utility/pairs/pairs.pair/abi.trivially_copyable.compile.pass.cpp @@ -47,11 +47,20 @@ static_assert(!std::is_trivially_copyable<std::pair<int&, int> >::value, ""); static_assert(!std::is_trivially_copyable<std::pair<int, int&> >::value, ""); static_assert(!std::is_trivially_copyable<std::pair<int&, int&> >::value, ""); +#ifdef _LIBCPP_ABI_TRIVIALLY_COPYABLE_PAIR +static_assert(std::is_trivially_copyable<std::pair<int, int> >::value, ""); +static_assert(std::is_trivially_copyable<std::pair<int, char> >::value, ""); +static_assert(std::is_trivially_copyable<std::pair<char, int> >::value, ""); +static_assert(std::is_trivially_copyable<std::pair<std::pair<char, char>, int> >::value, ""); +static_assert(std::is_trivially_copyable<std::pair<trivially_copyable, int> >::value, ""); +#else static_assert(!std::is_trivially_copyable<std::pair<int, int> >::value, ""); static_assert(!std::is_trivially_copyable<std::pair<int, char> >::value, ""); static_assert(!std::is_trivially_copyable<std::pair<char, int> >::value, ""); static_assert(!std::is_trivially_copyable<std::pair<std::pair<char, char>, int> >::value, ""); static_assert(!std::is_trivially_copyable<std::pair<trivially_copyable, int> >::value, ""); +#endif // _LIBCPP_ABI_TRIVIALLY_COPYABLE_PAIR + #if TEST_STD_VER == 03 // Known ABI difference static_assert(!std::is_trivially_copyable<std::pair<trivially_copyable_no_copy_assignment, int> >::value, ""); static_assert(!std::is_trivially_copyable<std::pair<trivially_copyable_no_move_assignment, int> >::value, ""); @@ -59,10 +68,21 @@ static_assert(!std::is_trivially_copyable<std::pair<trivially_copyable_no_move_a static_assert(std::is_trivially_copyable<std::pair<trivially_copyable_no_copy_assignment, int> >::value, ""); static_assert(std::is_trivially_copyable<std::pair<trivially_copyable_no_move_assignment, int> >::value, ""); #endif + +#ifdef _LIBCPP_ABI_TRIVIALLY_COPYABLE_PAIR +static_assert(std::is_trivially_copyable<std::pair<trivially_copyable_no_construction, int> >::value, ""); +#else static_assert(!std::is_trivially_copyable<std::pair<trivially_copyable_no_construction, int> >::value, ""); +#endif static_assert(std::is_trivially_copy_constructible<std::pair<int, int> >::value, ""); static_assert(std::is_trivially_move_constructible<std::pair<int, int> >::value, ""); +static_assert(std::is_trivially_destructible<std::pair<int, int> >::value, ""); + +#ifdef _LIBCPP_ABI_TRIVIALLY_COPYABLE_PAIR +static_assert(std::is_trivially_copy_assignable<std::pair<int, int> >::value, ""); +static_assert(std::is_trivially_move_assignable<std::pair<int, int> >::value, ""); +#else static_assert(!std::is_trivially_copy_assignable<std::pair<int, int> >::value, ""); static_assert(!std::is_trivially_move_assignable<std::pair<int, int> >::value, ""); -static_assert(std::is_trivially_destructible<std::pair<int, int> >::value, ""); +#endif // _LIBCPP_ABI_TRIVIALLY_COPYABLE_PAIR diff --git a/libcxx/test/std/containers/sequences/vector/vector.capacity/shrink_to_fit.pass.cpp b/libcxx/test/std/containers/sequences/vector/vector.capacity/shrink_to_fit.pass.cpp index 8851e2a..e39afb2 100644 --- a/libcxx/test/std/containers/sequences/vector/vector.capacity/shrink_to_fit.pass.cpp +++ b/libcxx/test/std/containers/sequences/vector/vector.capacity/shrink_to_fit.pass.cpp @@ -71,11 +71,56 @@ TEST_CONSTEXPR_CXX20 bool tests() { return true; } +#if TEST_STD_VER >= 23 +template <typename T> +struct increasing_allocator { + using value_type = T; + std::size_t min_elements = 1000; + increasing_allocator() = default; + + template <typename U> + constexpr increasing_allocator(const increasing_allocator<U>& other) noexcept : min_elements(other.min_elements) {} + + constexpr std::allocation_result<T*> allocate_at_least(std::size_t n) { + if (n < min_elements) + n = min_elements; + min_elements += 1000; + return std::allocator<T>{}.allocate_at_least(n); + } + constexpr T* allocate(std::size_t n) { return allocate_at_least(n).ptr; } + constexpr void deallocate(T* p, std::size_t n) noexcept { std::allocator<T>{}.deallocate(p, n); } +}; + +template <typename T, typename U> +bool operator==(increasing_allocator<T>, increasing_allocator<U>) { + return true; +} + +// https://github.com/llvm/llvm-project/issues/95161 +constexpr bool test_increasing_allocator() { + std::vector<int, increasing_allocator<int>> v; + v.push_back(1); + assert(is_contiguous_container_asan_correct(v)); + std::size_t capacity = v.capacity(); + v.shrink_to_fit(); + assert(v.capacity() <= capacity); + assert(v.size() == 1); + assert(is_contiguous_container_asan_correct(v)); + + return true; +} +#endif // TEST_STD_VER >= 23 + int main(int, char**) { - tests(); + tests(); #if TEST_STD_VER > 17 static_assert(tests()); #endif +#if TEST_STD_VER >= 23 + test_increasing_allocator(); + static_assert(test_increasing_allocator()); +#endif + return 0; } diff --git a/libcxx/test/std/numerics/c.math/hermite.pass.cpp b/libcxx/test/std/numerics/c.math/hermite.pass.cpp new file mode 100644 index 0000000..08fbd5c --- /dev/null +++ b/libcxx/test/std/numerics/c.math/hermite.pass.cpp @@ -0,0 +1,341 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14 + +// <cmath> + +// double hermite(unsigned n, double x); +// float hermite(unsigned n, float x); +// long double hermite(unsigned n, long double x); +// float hermitef(unsigned n, float x); +// long double hermitel(unsigned n, long double x); +// template <class Integer> +// double hermite(unsigned n, Integer x); + +#include <array> +#include <cassert> +#include <cmath> +#include <limits> +#include <vector> + +#include "type_algorithms.h" + +inline constexpr unsigned g_max_n = 128; + +template <class T> +std::array<T, 11> sample_points() { + return {-12.34, -7.42, -1.0, -0.5, -0.1, 0.0, 0.1, 0.5, 1.0, 5.67, 15.67}; +} + +template <class Real> +class CompareFloatingValues { +private: + Real abs_tol; + Real rel_tol; + +public: + CompareFloatingValues() { + abs_tol = []() -> Real { + if (std::is_same_v<Real, float>) + return 1e-5f; + else if (std::is_same_v<Real, double>) + return 1e-11; + else + return 1e-12l; + }(); + + rel_tol = abs_tol; + } + + bool operator()(Real result, Real expected) const { + if (std::isinf(expected) && std::isinf(result)) + return result == expected; + + if (std::isnan(expected) || std::isnan(result)) + return false; + + Real tol = abs_tol + std::abs(expected) * rel_tol; + return std::abs(result - expected) < tol; + } +}; + +// Roots are taken from +// Salzer, Herbert E., Ruth Zucker, and Ruth Capuano. +// Table of the zeros and weight factors of the first twenty Hermite +// polynomials. US Government Printing Office, 1952. +template <class T> +std::vector<T> get_roots(unsigned n) { + switch (n) { + case 0: + return {}; + case 1: + return {T(0)}; + case 2: + return {T(0.707106781186548)}; + case 3: + return {T(0), T(1.224744871391589)}; + case 4: + return {T(0.524647623275290), T(1.650680123885785)}; + case 5: + return {T(0), T(0.958572464613819), T(2.020182870456086)}; + case 6: + return {T(0.436077411927617), T(1.335849074013697), T(2.350604973674492)}; + case 7: + return {T(0), T(0.816287882858965), T(1.673551628767471), T(2.651961356835233)}; + case 8: + return {T(0.381186990207322), T(1.157193712446780), T(1.981656756695843), T(2.930637420257244)}; + case 9: + return {T(0), T(0.723551018752838), T(1.468553289216668), T(2.266580584531843), T(3.190993201781528)}; + case 10: + return { + T(0.342901327223705), T(1.036610829789514), T(1.756683649299882), T(2.532731674232790), T(3.436159118837738)}; + case 11: + return {T(0), + T(0.65680956682100), + T(1.326557084494933), + T(2.025948015825755), + T(2.783290099781652), + T(3.668470846559583)}; + + case 12: + return {T(0.314240376254359), + T(0.947788391240164), + T(1.597682635152605), + T(2.279507080501060), + T(3.020637025120890), + T(3.889724897869782)}; + + case 13: + return {T(0), + T(0.605763879171060), + T(1.220055036590748), + T(1.853107651601512), + T(2.519735685678238), + T(3.246608978372410), + T(4.101337596178640)}; + + case 14: + return {T(0.29174551067256), + T(0.87871378732940), + T(1.47668273114114), + T(2.09518325850772), + T(2.74847072498540), + T(3.46265693360227), + T(4.30444857047363)}; + + case 15: + return {T(0.00000000000000), + T(0.56506958325558), + T(1.13611558521092), + T(1.71999257518649), + T(2.32573248617386), + T(2.96716692790560), + T(3.66995037340445), + T(4.49999070730939)}; + + case 16: + return {T(0.27348104613815), + T(0.82295144914466), + T(1.38025853919888), + T(1.95178799091625), + T(2.54620215784748), + T(3.17699916197996), + T(3.86944790486012), + T(4.68873893930582)}; + + case 17: + return {T(0), + T(0.5316330013427), + T(1.0676487257435), + T(1.6129243142212), + T(2.1735028266666), + T(2.7577629157039), + T(3.3789320911415), + T(4.0619466758755), + T(4.8713451936744)}; + + case 18: + return {T(0.2582677505191), + T(0.7766829192674), + T(1.3009208583896), + T(1.8355316042616), + T(2.3862990891667), + T(2.9613775055316), + T(3.5737690684863), + T(4.2481178735681), + T(5.0483640088745)}; + + case 19: + return {T(0), + T(0.5035201634239), + T(1.0103683871343), + T(1.5241706193935), + T(2.0492317098506), + T(2.5911337897945), + T(3.1578488183476), + T(3.7621873519640), + T(4.4285328066038), + T(5.2202716905375)}; + + case 20: + return {T(0.2453407083009), + T(0.7374737285454), + T(1.2340762153953), + T(1.7385377121166), + T(2.2549740020893), + T(2.7888060584281), + T(3.347854567332), + T(3.9447640401156), + T(4.6036824495507), + T(5.3874808900112)}; + + default: // polynom degree n>20 is unsupported + assert(false); + return {T(-42)}; + } +} + +template <class Real> +void test() { + { // checks if NaNs are reported correctly (i.e. output == input for input == NaN) + using nl = std::numeric_limits<Real>; + for (Real NaN : {nl::quiet_NaN(), nl::signaling_NaN()}) + for (unsigned n = 0; n < g_max_n; ++n) + assert(std::isnan(std::hermite(n, NaN))); + } + + { // simple sample points for n=0..127 should not produce NaNs. + for (Real x : sample_points<Real>()) + for (unsigned n = 0; n < g_max_n; ++n) + assert(!std::isnan(std::hermite(n, x))); + } + + { // checks std::hermite(n, x) for n=0..5 against analytic polynoms + const auto h0 = [](Real) -> Real { return 1; }; + const auto h1 = [](Real y) -> Real { return 2 * y; }; + const auto h2 = [](Real y) -> Real { return 4 * y * y - 2; }; + const auto h3 = [](Real y) -> Real { return y * (8 * y * y - 12); }; + const auto h4 = [](Real y) -> Real { return (16 * std::pow(y, 4) - 48 * y * y + 12); }; + const auto h5 = [](Real y) -> Real { return y * (32 * std::pow(y, 4) - 160 * y * y + 120); }; + + for (Real x : sample_points<Real>()) { + const CompareFloatingValues<Real> compare; + assert(compare(std::hermite(0, x), h0(x))); + assert(compare(std::hermite(1, x), h1(x))); + assert(compare(std::hermite(2, x), h2(x))); + assert(compare(std::hermite(3, x), h3(x))); + assert(compare(std::hermite(4, x), h4(x))); + assert(compare(std::hermite(5, x), h5(x))); + } + } + + { // checks std::hermitef for bitwise equality with std::hermite(unsigned, float) + if constexpr (std::is_same_v<Real, float>) + for (unsigned n = 0; n < g_max_n; ++n) + for (float x : sample_points<float>()) + assert(std::hermite(n, x) == std::hermitef(n, x)); + } + + { // checks std::hermitel for bitwise equality with std::hermite(unsigned, long double) + if constexpr (std::is_same_v<Real, long double>) + for (unsigned n = 0; n < g_max_n; ++n) + for (long double x : sample_points<long double>()) + assert(std::hermite(n, x) == std::hermitel(n, x)); + } + + { // Checks if the characteristic recurrence relation holds: H_{n+1}(x) = 2x H_n(x) - 2n H_{n-1}(x) + for (Real x : sample_points<Real>()) { + for (unsigned n = 1; n < g_max_n - 1; ++n) { + Real H_next = std::hermite(n + 1, x); + Real H_next_recurrence = 2 * (x * std::hermite(n, x) - n * std::hermite(n - 1, x)); + + if (std::isinf(H_next)) + break; + const CompareFloatingValues<Real> compare; + assert(compare(H_next, H_next_recurrence)); + } + } + } + + { // sanity checks: hermite polynoms need to change signs at (simple) roots. checked upto order n<=20. + + // root tolerance: must be smaller than the smallest difference between adjacent roots + Real tol = []() -> Real { + if (std::is_same_v<Real, float>) + return 1e-5f; + else if (std::is_same_v<Real, double>) + return 1e-9; + else + return 1e-10l; + }(); + + const auto is_sign_change = [tol](unsigned n, Real x) -> bool { + return std::hermite(n, x - tol) * std::hermite(n, x + tol) < 0; + }; + + for (unsigned n = 0; n <= 20u; ++n) { + for (Real x : get_roots<Real>(n)) { + // the roots are symmetric: if x is a root, so is -x + if (x > 0) + assert(is_sign_change(n, -x)); + assert(is_sign_change(n, x)); + } + } + } + + { // check input infinity is handled correctly + Real inf = std::numeric_limits<Real>::infinity(); + for (unsigned n = 1; n < g_max_n; ++n) { + assert(std::hermite(n, +inf) == inf); + assert(std::hermite(n, -inf) == ((n & 1) ? -inf : inf)); + } + } + + { // check: if overflow occurs that it is mapped to the correct infinity + if constexpr (std::is_same_v<Real, double>) { + // Q: Why only double? + // A: The numeric values (e.g. overflow threshold `n`) below are different for other types. + static_assert(sizeof(double) == 8); + for (unsigned n = 0; n < g_max_n; ++n) { + // Q: Why n=111 and x=300? + // A: Both are chosen s.t. the first overlow occurs for some `n<g_max_n`. + if (n < 111) { + assert(std::isfinite(std::hermite(n, +300.0))); + assert(std::isfinite(std::hermite(n, -300.0))); + } else { + double inf = std::numeric_limits<double>::infinity(); + assert(std::hermite(n, +300.0) == inf); + assert(std::hermite(n, -300.0) == ((n & 1) ? -inf : inf)); + } + } + } + } +} + +struct TestFloat { + template <class Real> + void operator()() { + test<Real>(); + } +}; + +struct TestInt { + template <class Integer> + void operator()() { + // checks that std::hermite(unsigned, Integer) actually wraps std::hermite(unsigned, double) + for (unsigned n = 0; n < g_max_n; ++n) + for (Integer x : {-42, -7, -5, -1, 0, 1, 5, 7, 42}) + assert(std::hermite(n, x) == std::hermite(n, static_cast<double>(x))); + } +}; + +int main() { + types::for_each(types::floating_point_types(), TestFloat()); + types::for_each(types::type_list<short, int, long, long long>(), TestInt()); +} diff --git a/libcxx/test/std/time/time.syn/formatter.zoned_time.pass.cpp b/libcxx/test/std/time/time.syn/formatter.zoned_time.pass.cpp new file mode 100644 index 0000000..1e366ac --- /dev/null +++ b/libcxx/test/std/time/time.syn/formatter.zoned_time.pass.cpp @@ -0,0 +1,974 @@ +//===----------------------------------------------------------------------===// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17 +// UNSUPPORTED: no-filesystem, no-localization, no-tzdb + +// TODO FMT This test should not require std::to_chars(floating-point) +// XFAIL: availability-fp_to_chars-missing + +// XFAIL: libcpp-has-no-experimental-tzdb + +// REQUIRES: locale.fr_FR.UTF-8 +// REQUIRES: locale.ja_JP.UTF-8 + +// <chrono> +// +// template<class Duration, class TimeZonePtr, class charT> +// struct formatter<chrono::zoned_time<Duration, TimeZonePtr>, charT> + +#include <chrono> +#include <format> + +#include <cassert> +#include <concepts> +#include <locale> +#include <iostream> +#include <type_traits> + +#include "formatter_tests.h" +#include "make_string.h" +#include "platform_support.h" // locale name macros +#include "test_macros.h" + +template <class CharT> +static void test_no_chrono_specs() { + using namespace std::literals::chrono_literals; + + check(SV("1970-01-01 01:00:00.000000042 +01"), + SV("{}"), + std::chrono::zoned_time("Etc/GMT-1", std::chrono::sys_time<std::chrono::nanoseconds>{42ns})); + check(SV("1970-01-01 01:00:00.000042 +01"), + SV("{}"), + std::chrono::zoned_time("Etc/GMT-1", std::chrono::sys_time<std::chrono::microseconds>{42us})); + check(SV("1970-01-01 01:00:00.042 +01"), + SV("{}"), + std::chrono::zoned_time("Etc/GMT-1", std::chrono::sys_time<std::chrono::milliseconds>{42ms})); + check(SV("1970-01-01 01:00:42 +01"), + SV("{}"), + std::chrono::zoned_time("Etc/GMT-1", std::chrono::sys_time<std::chrono::seconds>{42s})); + check(SV("1970-02-12 01:00:00 +01"), + SV("{}"), + std::chrono::zoned_time("Etc/GMT-1", std::chrono::sys_time<std::chrono::days>{std::chrono::days{42}})); + check(SV("1970-10-22 01:00:00 +01"), + SV("{}"), + std::chrono::zoned_time("Etc/GMT-1", std::chrono::sys_time<std::chrono::weeks>{std::chrono::weeks{42}})); +} + +template <class CharT> +static void test_valid_values_year() { + using namespace std::literals::chrono_literals; + + constexpr std::basic_string_view<CharT> fmt = + SV("{:%%C='%C'%t%%EC='%EC'%t%%y='%y'%t%%Oy='%Oy'%t%%Ey='%Ey'%t%%Y='%Y'%t%%EY='%EY'%n}"); + constexpr std::basic_string_view<CharT> lfmt = + SV("{:L%%C='%C'%t%%EC='%EC'%t%%y='%y'%t%%Oy='%Oy'%t%%Ey='%Ey'%t%%Y='%Y'%t%%EY='%EY'%n}"); + + const std::locale loc(LOCALE_ja_JP_UTF_8); + std::locale::global(std::locale(LOCALE_fr_FR_UTF_8)); + + // Non localized output using C-locale + check(SV("%C='19'\t%EC='19'\t%y='70'\t%Oy='70'\t%Ey='70'\t%Y='1970'\t%EY='1970'\n"), + fmt, + std::chrono::zoned_time(std::chrono::sys_seconds{0s})); // 00:00:00 UTC Thursday, 1 January 1970 + + check(SV("%C='20'\t%EC='20'\t%y='09'\t%Oy='09'\t%Ey='09'\t%Y='2009'\t%EY='2009'\n"), + fmt, + std::chrono::zoned_time(std::chrono::sys_seconds{1'234'567'890s})); // 23:31:30 UTC on Friday, 13 February 2009 + + // Use the global locale (fr_FR) + check(SV("%C='19'\t%EC='19'\t%y='70'\t%Oy='70'\t%Ey='70'\t%Y='1970'\t%EY='1970'\n"), + lfmt, + std::chrono::zoned_time(std::chrono::sys_seconds{0s})); // 00:00:00 UTC Thursday, 1 January 1970 + + check(SV("%C='20'\t%EC='20'\t%y='09'\t%Oy='09'\t%Ey='09'\t%Y='2009'\t%EY='2009'\n"), + lfmt, + std::chrono::zoned_time(std::chrono::sys_seconds{1'234'567'890s})); // 23:31:30 UTC on Friday, 13 February 2009 + + // Use supplied locale (ja_JP). This locale has a different alternate. +#if defined(_WIN32) || defined(__APPLE__) || defined(_AIX) || defined(__FreeBSD__) + + check(loc, + SV("%C='19'\t%EC='19'\t%y='70'\t%Oy='70'\t%Ey='70'\t%Y='1970'\t%EY='1970'\n"), + lfmt, + std::chrono::zoned_time(std::chrono::sys_seconds{0s})); // 00:00:00 UTC Thursday, 1 January 1970 + + check(loc, + SV("%C='20'\t%EC='20'\t%y='09'\t%Oy='09'\t%Ey='09'\t%Y='2009'\t%EY='2009'\n"), + lfmt, + std::chrono::zoned_time(std::chrono::sys_seconds{1'234'567'890s})); // 23:31:30 UTC on Friday, 13 February 2009 +#else // defined(_WIN32) || defined(__APPLE__) || defined(_AIX)|| defined(__FreeBSD__) + + check(loc, + SV("%C='19'\t%EC='昭和'\t%y='70'\t%Oy='七十'\t%Ey='45'\t%Y='1970'\t%EY='昭和45年'\n"), + lfmt, + std::chrono::zoned_time(std::chrono::sys_seconds{0s})); // 00:00:00 UTC Thursday, 1 January 1970 + + check(loc, + SV("%C='20'\t%EC='平成'\t%y='09'\t%Oy='九'\t%Ey='21'\t%Y='2009'\t%EY='平成21年'\n"), + lfmt, + std::chrono::zoned_time(std::chrono::sys_seconds{1'234'567'890s})); // 23:31:30 UTC on Friday, 13 February 2009 +#endif // defined(_WIN32) || defined(__APPLE__) || defined(_AIX)|| defined(__FreeBSD__) + + std::locale::global(std::locale::classic()); +} + +template <class CharT> +static void test_valid_values_month() { + using namespace std::literals::chrono_literals; + + constexpr std::basic_string_view<CharT> fmt = SV("{:%%b='%b'%t%%h='%h'%t%%B='%B'%t%%m='%m'%t%%Om='%Om'%n}"); + constexpr std::basic_string_view<CharT> lfmt = SV("{:L%%b='%b'%t%%h='%h'%t%%B='%B'%t%%m='%m'%t%%Om='%Om'%n}"); + + const std::locale loc(LOCALE_ja_JP_UTF_8); + std::locale::global(std::locale(LOCALE_fr_FR_UTF_8)); + + // Non localized output using C-locale + check(SV("%b='Jan'\t%h='Jan'\t%B='January'\t%m='01'\t%Om='01'\n"), + fmt, + std::chrono::zoned_time(std::chrono::sys_seconds{0s})); // 00:00:00 UTC Thursday, 1 January 1970 + + check(SV("%b='May'\t%h='May'\t%B='May'\t%m='05'\t%Om='05'\n"), + fmt, + std::chrono::zoned_time(std::chrono::sys_seconds{2'000'000'000s})); // 03:33:20 UTC on Wednesday, 18 May 2033 + + // Use the global locale (fr_FR) +#if defined(__APPLE__) + check(SV("%b='jan'\t%h='jan'\t%B='janvier'\t%m='01'\t%Om='01'\n"), + lfmt, + std::chrono::zoned_time(std::chrono::sys_seconds{0s})); // 00:00:00 UTC Thursday, 1 January 1970 +#else + check(SV("%b='janv.'\t%h='janv.'\t%B='janvier'\t%m='01'\t%Om='01'\n"), + lfmt, + std::chrono::zoned_time(std::chrono::sys_seconds{0s})); // 00:00:00 UTC Thursday, 1 January 1970 +#endif + + check(SV("%b='mai'\t%h='mai'\t%B='mai'\t%m='05'\t%Om='05'\n"), + lfmt, + std::chrono::zoned_time(std::chrono::sys_seconds{2'000'000'000s})); // 03:33:20 UTC on Wednesday, 18 May 2033 + + // Use supplied locale (ja_JP). This locale has a different alternate. +#ifdef _WIN32 + check(loc, + SV("%b='1'\t%h='1'\t%B='1月'\t%m='01'\t%Om='01'\n"), + lfmt, + std::chrono::zoned_time(std::chrono::sys_seconds{0s})); // 00:00:00 UTC Thursday, 1 January 1970 + + check(loc, + SV("%b='5'\t%h='5'\t%B='5月'\t%m='05'\t%Om='05'\n"), + lfmt, + std::chrono::zoned_time(std::chrono::sys_seconds{2'000'000'000s})); // 03:33:20 UTC on Wednesday, 18 May 2033 +#elif defined(_AIX) // _WIN32 + check(loc, + SV("%b='1月'\t%h='1月'\t%B='1月'\t%m='01'\t%Om='01'\n"), + lfmt, + std::chrono::zoned_time(std::chrono::sys_seconds{0s})); // 00:00:00 UTC Thursday, 1 January 1970 + + check(loc, + SV("%b='5月'\t%h='5月'\t%B='5月'\t%m='05'\t%Om='05'\n"), + lfmt, + std::chrono::zoned_time(std::chrono::sys_seconds{2'000'000'000s})); // 03:33:20 UTC on Wednesday, 18 May 2033 +#elif defined(__APPLE__) // _WIN32 + check(loc, + SV("%b=' 1'\t%h=' 1'\t%B='1月'\t%m='01'\t%Om='01'\n"), + lfmt, + std::chrono::zoned_time(std::chrono::sys_seconds{0s})); // 00:00:00 UTC Thursday, 1 January 1970 + + check(loc, + SV("%b=' 5'\t%h=' 5'\t%B='5月'\t%m='05'\t%Om='05'\n"), + lfmt, + std::chrono::zoned_time(std::chrono::sys_seconds{2'000'000'000s})); // 03:33:20 UTC on Wednesday, 18 May 2033 +#elif defined(__FreeBSD__) // _WIN32 + check(loc, + SV("%b=' 1月'\t%h=' 1月'\t%B='1月'\t%m='01'\t%Om='01'\n"), + lfmt, + std::chrono::zoned_time(std::chrono::sys_seconds{0s})); // 00:00:00 UTC Thursday, 1 January 1970 + + check(loc, + SV("%b=' 5月'\t%h=' 5月'\t%B='5月'\t%m='05'\t%Om='05'\n"), + lfmt, + std::chrono::zoned_time(std::chrono::sys_seconds{2'000'000'000s})); // 03:33:20 UTC on Wednesday, 18 May 2033 +#else // _WIN32 + check(loc, + SV("%b=' 1月'\t%h=' 1月'\t%B='1月'\t%m='01'\t%Om='一'\n"), + lfmt, + std::chrono::zoned_time(std::chrono::sys_seconds{0s})); // 00:00:00 UTC Thursday, 1 January 1970 + + check(loc, + SV("%b=' 5月'\t%h=' 5月'\t%B='5月'\t%m='05'\t%Om='五'\n"), + lfmt, + std::chrono::zoned_time(std::chrono::sys_seconds{2'000'000'000s})); // 03:33:20 UTC on Wednesday, 18 May 2033 +#endif // _WIN32 + + std::locale::global(std::locale::classic()); +} + +template <class CharT> +static void test_valid_values_day() { + using namespace std::literals::chrono_literals; + + constexpr std::basic_string_view<CharT> fmt = SV("{:%%d='%d'%t%%Od='%Od'%t%%e='%e'%t%%Oe='%Oe'%n}"); + constexpr std::basic_string_view<CharT> lfmt = SV("{:L%%d='%d'%t%%Od='%Od'%t%%e='%e'%t%%Oe='%Oe'%n}"); + + const std::locale loc(LOCALE_ja_JP_UTF_8); + std::locale::global(std::locale(LOCALE_fr_FR_UTF_8)); + + // Non localized output using C-locale + check(SV("%d='01'\t%Od='01'\t%e=' 1'\t%Oe=' 1'\n"), + fmt, + std::chrono::zoned_time(std::chrono::sys_seconds{0s})); // 00:00:00 UTC Thursday, 1 January 1970 + + check(SV("%d='13'\t%Od='13'\t%e='13'\t%Oe='13'\n"), + fmt, + std::chrono::zoned_time(std::chrono::sys_seconds{1'234'567'890s})); // 23:31:30 UTC on Friday, 13 February 2009 + + // Use the global locale (fr_FR) + check(SV("%d='01'\t%Od='01'\t%e=' 1'\t%Oe=' 1'\n"), + lfmt, + std::chrono::zoned_time(std::chrono::sys_seconds{0s})); // 00:00:00 UTC Thursday, 1 January 1970 + + check(SV("%d='13'\t%Od='13'\t%e='13'\t%Oe='13'\n"), + lfmt, + std::chrono::zoned_time(std::chrono::sys_seconds{1'234'567'890s})); // 23:31:30 UTC on Friday, 13 February 2009 + + // Use supplied locale (ja_JP). This locale has a different alternate. +#if defined(_WIN32) || defined(__APPLE__) || defined(_AIX) || defined(__FreeBSD__) + check(loc, + SV("%d='01'\t%Od='01'\t%e=' 1'\t%Oe=' 1'\n"), + lfmt, + std::chrono::zoned_time(std::chrono::sys_seconds{0s})); // 00:00:00 UTC Thursday, 1 January 1970 + + check(loc, + SV("%d='13'\t%Od='13'\t%e='13'\t%Oe='13'\n"), + lfmt, + std::chrono::zoned_time(std::chrono::sys_seconds{1'234'567'890s})); // 23:31:30 UTC on Friday, 13 February 2009 +#else // defined(_WIN32) || defined(__APPLE__) || defined(_AIX) || defined(__FreeBSD__) + check(loc, + SV("%d='01'\t%Od='一'\t%e=' 1'\t%Oe='一'\n"), + lfmt, + std::chrono::zoned_time(std::chrono::sys_seconds{0s})); // 00:00:00 UTC Thursday, 1 January 1970 + + check(loc, + SV("%d='13'\t%Od='十三'\t%e='13'\t%Oe='十三'\n"), + lfmt, + std::chrono::zoned_time(std::chrono::sys_seconds{1'234'567'890s})); // 23:31:30 UTC on Friday, 13 February 2009 + +#endif // defined(_WIN32) || defined(__APPLE__) || defined(_AIX) || defined(__FreeBSD__) + + std::locale::global(std::locale::classic()); +} + +template <class CharT> +static void test_valid_values_weekday() { + using namespace std::literals::chrono_literals; + + constexpr std::basic_string_view<CharT> fmt = + SV("{:%%a='%a'%t%%A='%A'%t%%u='%u'%t%%Ou='%Ou'%t%%w='%w'%t%%Ow='%Ow'%n}"); + constexpr std::basic_string_view<CharT> lfmt = + SV("{:L%%a='%a'%t%%A='%A'%t%%u='%u'%t%%Ou='%Ou'%t%%w='%w'%t%%Ow='%Ow'%n}"); + + const std::locale loc(LOCALE_ja_JP_UTF_8); + std::locale::global(std::locale(LOCALE_fr_FR_UTF_8)); + + // Non localized output using C-locale + check(SV("%a='Thu'\t%A='Thursday'\t%u='4'\t%Ou='4'\t%w='4'\t%Ow='4'\n"), + fmt, + std::chrono::zoned_time(std::chrono::sys_seconds{0s})); // 00:00:00 UTC Thursday, 1 January 1970 + + check(SV("%a='Sun'\t%A='Sunday'\t%u='7'\t%Ou='7'\t%w='0'\t%Ow='0'\n"), + fmt, + std::chrono::zoned_time(std::chrono::sys_seconds{4'294'967'295s})); // 06:28:15 UTC on Sunday, 7 February 2106 + + // Use the global locale (fr_FR) +#if defined(__APPLE__) + check(SV("%a='Jeu'\t%A='Jeudi'\t%u='4'\t%Ou='4'\t%w='4'\t%Ow='4'\n"), + lfmt, + std::chrono::zoned_time(std::chrono::sys_seconds{0s})); // 00:00:00 UTC Thursday, 1 January 1970 + + check(SV("%a='Dim'\t%A='Dimanche'\t%u='7'\t%Ou='7'\t%w='0'\t%Ow='0'\n"), + lfmt, + std::chrono::zoned_time(std::chrono::sys_seconds{4'294'967'295s})); // 06:28:15 UTC on Sunday, 7 February 2106 +#else + check(SV("%a='jeu.'\t%A='jeudi'\t%u='4'\t%Ou='4'\t%w='4'\t%Ow='4'\n"), + lfmt, + std::chrono::zoned_time(std::chrono::sys_seconds{0s})); // 00:00:00 UTC Thursday, 1 January 1970 + + check(SV("%a='dim.'\t%A='dimanche'\t%u='7'\t%Ou='7'\t%w='0'\t%Ow='0'\n"), + lfmt, + std::chrono::zoned_time(std::chrono::sys_seconds{4'294'967'295s})); // 06:28:15 UTC on Sunday, 7 February 2106 +#endif + + // Use supplied locale (ja_JP). + // This locale has a different alternate, but not on all platforms +#if defined(_WIN32) || defined(__APPLE__) || defined(_AIX) || defined(__FreeBSD__) + check(loc, + SV("%a='木'\t%A='木曜日'\t%u='4'\t%Ou='4'\t%w='4'\t%Ow='4'\n"), + lfmt, + std::chrono::zoned_time(std::chrono::sys_seconds{0s})); // 00:00:00 UTC Thursday, 1 January 1970 + + check(loc, + SV("%a='日'\t%A='日曜日'\t%u='7'\t%Ou='7'\t%w='0'\t%Ow='0'\n"), + lfmt, + std::chrono::zoned_time(std::chrono::sys_seconds{4'294'967'295s})); // 06:28:15 UTC on Sunday, 7 February 2106 +#else // defined(_WIN32) || defined(__APPLE__) || defined(_AIX) || defined(__FreeBSD__) + check(loc, + SV("%a='木'\t%A='木曜日'\t%u='4'\t%Ou='四'\t%w='4'\t%Ow='四'\n"), + lfmt, + std::chrono::zoned_time(std::chrono::sys_seconds{0s})); // 00:00:00 UTC Thursday, 1 January 1970 + + check(loc, + SV("%a='日'\t%A='日曜日'\t%u='7'\t%Ou='七'\t%w='0'\t%Ow='〇'\n"), + lfmt, + std::chrono::zoned_time(std::chrono::sys_seconds{4'294'967'295s})); // 06:28:15 UTC on Sunday, 7 February 2106 +#endif // defined(_WIN32) || defined(__APPLE__) || defined(_AIX) || defined(__FreeBSD__) + + std::locale::global(std::locale::classic()); +} + +template <class CharT> +static void test_valid_values_day_of_year() { + using namespace std::literals::chrono_literals; + + constexpr std::basic_string_view<CharT> fmt = SV("{:%%j='%j'%n}"); + constexpr std::basic_string_view<CharT> lfmt = SV("{:L%%j='%j'%n}"); + + const std::locale loc(LOCALE_ja_JP_UTF_8); + std::locale::global(std::locale(LOCALE_fr_FR_UTF_8)); + + // Non localized output using C-locale + check(SV("%j='001'\n"), + fmt, + std::chrono::zoned_time(std::chrono::sys_seconds{0s})); // 00:00:00 UTC Thursday, 1 January 1970 + check(SV("%j='138'\n"), + fmt, + std::chrono::zoned_time(std::chrono::sys_seconds{2'000'000'000s})); // 03:33:20 UTC on Wednesday, 18 May 2033 + + // Use the global locale (fr_FR) + check(SV("%j='001'\n"), + lfmt, + std::chrono::zoned_time(std::chrono::sys_seconds{0s})); // 00:00:00 UTC Thursday, 1 January 1970 + check(SV("%j='138'\n"), + lfmt, + std::chrono::zoned_time(std::chrono::sys_seconds{2'000'000'000s})); // 03:33:20 UTC on Wednesday, 18 May 2033 + + // Use supplied locale (ja_JP). This locale has a different alternate. + check(loc, + SV("%j='001'\n"), + lfmt, + std::chrono::zoned_time(std::chrono::sys_seconds{0s})); // 00:00:00 UTC Thursday, 1 January 1970 + + check(loc, + SV("%j='138'\n"), + lfmt, + std::chrono::zoned_time(std::chrono::sys_seconds{2'000'000'000s})); // 03:33:20 UTC on Wednesday, 18 May 2033 + + std::locale::global(std::locale::classic()); +} + +template <class CharT> +static void test_valid_values_week() { + using namespace std::literals::chrono_literals; + + constexpr std::basic_string_view<CharT> fmt = SV("{:%%U='%U'%t%%OU='%OU'%t%%W='%W'%t%%OW='%OW'%n}"); + constexpr std::basic_string_view<CharT> lfmt = SV("{:L%%U='%U'%t%%OU='%OU'%t%%W='%W'%t%%OW='%OW'%n}"); + + const std::locale loc(LOCALE_ja_JP_UTF_8); + std::locale::global(std::locale(LOCALE_fr_FR_UTF_8)); + + // Non localized output using C-locale + check(SV("%U='00'\t%OU='00'\t%W='00'\t%OW='00'\n"), + fmt, + std::chrono::zoned_time(std::chrono::sys_seconds{0s})); // 00:00:00 UTC Thursday, 1 January 1970 + + check(SV("%U='20'\t%OU='20'\t%W='20'\t%OW='20'\n"), + fmt, + std::chrono::zoned_time(std::chrono::sys_seconds{2'000'000'000s})); // 03:33:20 UTC on Wednesday, 18 May 2033 + + // Use the global locale (fr_FR) + check(SV("%U='00'\t%OU='00'\t%W='00'\t%OW='00'\n"), + lfmt, + std::chrono::zoned_time(std::chrono::sys_seconds{0s})); // 00:00:00 UTC Thursday, 1 January 1970 + + check(SV("%U='20'\t%OU='20'\t%W='20'\t%OW='20'\n"), + lfmt, + std::chrono::zoned_time(std::chrono::sys_seconds{2'000'000'000s})); // 03:33:20 UTC on Wednesday, 18 May 2033 + + // Use supplied locale (ja_JP). This locale has a different alternate. +#if defined(_WIN32) || defined(__APPLE__) || defined(_AIX) || defined(__FreeBSD__) + check(loc, + SV("%U='00'\t%OU='00'\t%W='00'\t%OW='00'\n"), + lfmt, + std::chrono::zoned_time(std::chrono::sys_seconds{0s})); // 00:00:00 UTC Thursday, 1 January 1970 + + check(loc, + SV("%U='20'\t%OU='20'\t%W='20'\t%OW='20'\n"), + lfmt, + std::chrono::zoned_time(std::chrono::sys_seconds{2'000'000'000s})); // 03:33:20 UTC on Wednesday, 18 May 2033 +#else // defined(_WIN32) || defined(__APPLE__) || defined(_AIX) || defined(__FreeBSD__) + check(loc, + SV("%U='00'\t%OU='〇'\t%W='00'\t%OW='〇'\n"), + lfmt, + std::chrono::zoned_time(std::chrono::sys_seconds{0s})); // 00:00:00 UTC Thursday, 1 January 1970 + + check(loc, + SV("%U='20'\t%OU='二十'\t%W='20'\t%OW='二十'\n"), + lfmt, + std::chrono::zoned_time(std::chrono::sys_seconds{2'000'000'000s})); // 03:33:20 UTC on Wednesday, 18 May 2033 +#endif // defined(_WIN32) || defined(__APPLE__) || defined(_AIX) || defined(__FreeBSD__) + std::locale::global(std::locale::classic()); +} + +template <class CharT> +static void test_valid_values_iso_8601_week() { + using namespace std::literals::chrono_literals; + + constexpr std::basic_string_view<CharT> fmt = SV("{:%%g='%g'%t%%G='%G'%t%%V='%V'%t%%OV='%OV'%n}"); + constexpr std::basic_string_view<CharT> lfmt = SV("{:L%%g='%g'%t%%G='%G'%t%%V='%V'%t%%OV='%OV'%n}"); + + const std::locale loc(LOCALE_ja_JP_UTF_8); + std::locale::global(std::locale(LOCALE_fr_FR_UTF_8)); + + // Non localized output using C-locale + check(SV("%g='70'\t%G='1970'\t%V='01'\t%OV='01'\n"), + fmt, + std::chrono::zoned_time(std::chrono::sys_seconds{0s})); // 00:00:00 UTC Thursday, 1 January 1970 + + check(SV("%g='09'\t%G='2009'\t%V='07'\t%OV='07'\n"), + fmt, + std::chrono::zoned_time(std::chrono::sys_seconds{1'234'567'890s})); // 23:31:30 UTC on Friday, 13 February 2009 + + // Use the global locale (fr_FR) + check(SV("%g='70'\t%G='1970'\t%V='01'\t%OV='01'\n"), + lfmt, + std::chrono::zoned_time(std::chrono::sys_seconds{0s})); // 00:00:00 UTC Thursday, 1 January 1970 + + check(SV("%g='09'\t%G='2009'\t%V='07'\t%OV='07'\n"), + lfmt, + std::chrono::zoned_time(std::chrono::sys_seconds{1'234'567'890s})); // 23:31:30 UTC on Friday, 13 February 2009 + + // Use supplied locale (ja_JP). This locale has a different alternate. +#if defined(_WIN32) || defined(__APPLE__) || defined(_AIX) || defined(__FreeBSD__) + check(loc, + SV("%g='70'\t%G='1970'\t%V='01'\t%OV='01'\n"), + lfmt, + std::chrono::zoned_time(std::chrono::sys_seconds{0s})); // 00:00:00 UTC Thursday, 1 January 1970 + + check(loc, + SV("%g='09'\t%G='2009'\t%V='07'\t%OV='07'\n"), + lfmt, + std::chrono::zoned_time(std::chrono::sys_seconds{1'234'567'890s})); // 23:31:30 UTC on Friday, 13 February 2009 +#else // defined(_WIN32) || defined(__APPLE__) || defined(_AIX) || defined(__FreeBSD__) + check(loc, + SV("%g='70'\t%G='1970'\t%V='01'\t%OV='一'\n"), + lfmt, + std::chrono::zoned_time(std::chrono::sys_seconds{0s})); // 00:00:00 UTC Thursday, 1 January 1970 + + check(loc, + SV("%g='09'\t%G='2009'\t%V='07'\t%OV='七'\n"), + lfmt, + std::chrono::zoned_time(std::chrono::sys_seconds{1'234'567'890s})); // 23:31:30 UTC on Friday, 13 February 2009 +#endif // defined(_WIN32) || defined(__APPLE__) || defined(_AIX) || defined(__FreeBSD__) + + std::locale::global(std::locale::classic()); +} + +template <class CharT> +static void test_valid_values_date() { + using namespace std::literals::chrono_literals; + + constexpr std::basic_string_view<CharT> fmt = SV("{:%%D='%D'%t%%F='%F'%t%%x='%x'%t%%Ex='%Ex'%n}"); + constexpr std::basic_string_view<CharT> lfmt = SV("{:L%%D='%D'%t%%F='%F'%t%%x='%x'%t%%Ex='%Ex'%n}"); + + const std::locale loc(LOCALE_ja_JP_UTF_8); + std::locale::global(std::locale(LOCALE_fr_FR_UTF_8)); + + // Non localized output using C-locale + check(SV("%D='01/01/70'\t%F='1970-01-01'\t%x='01/01/70'\t%Ex='01/01/70'\n"), + fmt, + std::chrono::zoned_time(std::chrono::sys_seconds{0s})); // 00:00:00 UTC Thursday, 1 January 1970 + + check(SV("%D='02/13/09'\t%F='2009-02-13'\t%x='02/13/09'\t%Ex='02/13/09'\n"), + fmt, + std::chrono::zoned_time(std::chrono::sys_seconds{1'234'567'890s})); // 23:31:30 UTC on Friday, 13 February 2009 + + // Use the global locale (fr_FR) +#if defined(__APPLE__) || defined(__FreeBSD__) + check(SV("%D='01/01/70'\t%F='1970-01-01'\t%x='01.01.1970'\t%Ex='01.01.1970'\n"), + lfmt, + std::chrono::zoned_time(std::chrono::sys_seconds{0s})); // 00:00:00 UTC Thursday, 1 January 1970 + + check(SV("%D='02/13/09'\t%F='2009-02-13'\t%x='13.02.2009'\t%Ex='13.02.2009'\n"), + lfmt, + std::chrono::zoned_time(std::chrono::sys_seconds{1'234'567'890s})); // 23:31:30 UTC on Friday, 13 February 2009 +#else + check(SV("%D='01/01/70'\t%F='1970-01-01'\t%x='01/01/1970'\t%Ex='01/01/1970'\n"), + lfmt, + std::chrono::zoned_time(std::chrono::sys_seconds{0s})); // 00:00:00 UTC Thursday, 1 January 1970 + + check(SV("%D='02/13/09'\t%F='2009-02-13'\t%x='13/02/2009'\t%Ex='13/02/2009'\n"), + lfmt, + std::chrono::zoned_time(std::chrono::sys_seconds{1'234'567'890s})); // 23:31:30 UTC on Friday, 13 February 2009 +#endif + + // Use supplied locale (ja_JP). This locale has a different alternate. +#if defined(_WIN32) || defined(__APPLE__) || defined(_AIX) || defined(__FreeBSD__) + check(loc, + SV("%D='01/01/70'\t%F='1970-01-01'\t%x='1970/01/01'\t%Ex='1970/01/01'\n"), + lfmt, + std::chrono::zoned_time(std::chrono::sys_seconds{0s})); // 00:00:00 UTC Thursday, 1 January 1970 + + check(loc, + SV("%D='02/13/09'\t%F='2009-02-13'\t%x='2009/02/13'\t%Ex='2009/02/13'\n"), + lfmt, + std::chrono::zoned_time(std::chrono::sys_seconds{1'234'567'890s})); // 23:31:30 UTC on Friday, 13 February 2009 +#else // defined(_WIN32) || defined(__APPLE__) || defined(_AIX) || defined(__FreeBSD__) + check(loc, + SV("%D='01/01/70'\t%F='1970-01-01'\t%x='1970年01月01日'\t%Ex='昭和45年01月01日'\n"), + lfmt, + std::chrono::zoned_time(std::chrono::sys_seconds{0s})); // 00:00:00 UTC Thursday, 1 January 1970 + + check(loc, + SV("%D='02/13/09'\t%F='2009-02-13'\t%x='2009年02月13日'\t%Ex='平成21年02月13日'\n"), + lfmt, + std::chrono::zoned_time(std::chrono::sys_seconds{1'234'567'890s})); // 23:31:30 UTC on Friday, 13 February 2009 +#endif // defined(_WIN32) || defined(__APPLE__) || defined(_AIX) || defined(__FreeBSD__) + + std::locale::global(std::locale::classic()); +} + +template <class CharT> +static void test_valid_values_time() { + using namespace std::literals::chrono_literals; + + constexpr std::basic_string_view<CharT> fmt = SV( + "{:" + "%%H='%H'%t" + "%%OH='%OH'%t" + "%%I='%I'%t" + "%%OI='%OI'%t" + "%%M='%M'%t" + "%%OM='%OM'%t" + "%%S='%S'%t" + "%%OS='%OS'%t" + "%%p='%p'%t" + "%%R='%R'%t" + "%%T='%T'%t" + "%%r='%r'%t" + "%%X='%X'%t" + "%%EX='%EX'%t" + "%n}"); + constexpr std::basic_string_view<CharT> lfmt = SV( + "{:L" + "%%H='%H'%t" + "%%OH='%OH'%t" + "%%I='%I'%t" + "%%OI='%OI'%t" + "%%M='%M'%t" + "%%OM='%OM'%t" + "%%S='%S'%t" + "%%OS='%OS'%t" + "%%p='%p'%t" + "%%R='%R'%t" + "%%T='%T'%t" + "%%r='%r'%t" + "%%X='%X'%t" + "%%EX='%EX'%t" + "%n}"); + + const std::locale loc(LOCALE_ja_JP_UTF_8); + std::locale::global(std::locale(LOCALE_fr_FR_UTF_8)); + + // Non localized output using C-locale + check(SV("%H='00'\t" + "%OH='00'\t" + "%I='12'\t" + "%OI='12'\t" + "%M='00'\t" + "%OM='00'\t" + "%S='00'\t" + "%OS='00'\t" + "%p='AM'\t" + "%R='00:00'\t" + "%T='00:00:00'\t" + "%r='12:00:00 AM'\t" + "%X='00:00:00'\t" + "%EX='00:00:00'\t" + "\n"), + fmt, + std::chrono::zoned_time(std::chrono::sys_seconds{0s})); // 00:00:00 UTC Thursday, 1 January 1970 + + check(SV("%H='23'\t" + "%OH='23'\t" + "%I='11'\t" + "%OI='11'\t" + "%M='31'\t" + "%OM='31'\t" + "%S='30.123'\t" + "%OS='30.123'\t" + "%p='PM'\t" + "%R='23:31'\t" + "%T='23:31:30.123'\t" + "%r='11:31:30 PM'\t" + "%X='23:31:30'\t" + "%EX='23:31:30'\t" + "\n"), + fmt, + std::chrono::sys_time<std::chrono::milliseconds>( + 1'234'567'890'123ms)); // 23:31:30 UTC on Friday, 13 February 2009 + // Use the global locale (fr_FR) + check(SV("%H='00'\t" + "%OH='00'\t" + "%I='12'\t" + "%OI='12'\t" + "%M='00'\t" + "%OM='00'\t" + "%S='00'\t" + "%OS='00'\t" +#if defined(_AIX) + "%p='AM'\t" +#else + "%p=''\t" +#endif + "%R='00:00'\t" + "%T='00:00:00'\t" +#ifdef _WIN32 + "%r='00:00:00'\t" +#elif defined(_AIX) + "%r='12:00:00 AM'\t" +#elif defined(__APPLE__) || defined(__FreeBSD__) + "%r=''\t" +#else + "%r='12:00:00 '\t" +#endif + "%X='00:00:00'\t" + "%EX='00:00:00'\t" + "\n"), + lfmt, + std::chrono::zoned_time(std::chrono::sys_seconds{0s})); // 00:00:00 UTC Thursday, 1 January 1970 + + check(SV("%H='23'\t" + "%OH='23'\t" + "%I='11'\t" + "%OI='11'\t" + "%M='31'\t" + "%OM='31'\t" + "%S='30,123'\t" + "%OS='30,123'\t" +#if defined(_AIX) + "%p='PM'\t" +#else + "%p=''\t" +#endif + "%R='23:31'\t" + "%T='23:31:30,123'\t" +#ifdef _WIN32 + "%r='23:31:30'\t" +#elif defined(_AIX) + "%r='11:31:30 PM'\t" +#elif defined(__APPLE__) || defined(__FreeBSD__) + "%r=''\t" +#elif defined(_WIN32) + "%r='23:31:30 '\t" +#else + "%r='11:31:30 '\t" +#endif + "%X='23:31:30'\t" + "%EX='23:31:30'\t" + "\n"), + lfmt, + std::chrono::sys_time<std::chrono::milliseconds>( + 1'234'567'890'123ms)); // 23:31:30 UTC on Friday, 13 February 2009 + + // Use supplied locale (ja_JP). This locale has a different alternate.a +#if defined(__APPLE__) || defined(_AIX) || defined(_WIN32) || defined(__FreeBSD__) + check(loc, + SV("%H='00'\t" + "%OH='00'\t" + "%I='12'\t" + "%OI='12'\t" + "%M='00'\t" + "%OM='00'\t" + "%S='00'\t" + "%OS='00'\t" +# if defined(__APPLE__) + "%p='AM'\t" +# else + "%p='午前'\t" +# endif + "%R='00:00'\t" + "%T='00:00:00'\t" +# if defined(__APPLE__) || defined(__FreeBSD__) +# if defined(__APPLE__) + "%r='12:00:00 AM'\t" +# else + "%r='12:00:00 午前'\t" +# endif + "%X='00時00分00秒'\t" + "%EX='00時00分00秒'\t" +# elif defined(_WIN32) + "%r='0:00:00'\t" + "%X='0:00:00'\t" + "%EX='0:00:00'\t" +# else + "%r='午前12:00:00'\t" + "%X='00:00:00'\t" + "%EX='00:00:00'\t" +# endif + "\n"), + lfmt, + std::chrono::hh_mm_ss(0s)); + + check(loc, + SV("%H='23'\t" + "%OH='23'\t" + "%I='11'\t" + "%OI='11'\t" + "%M='31'\t" + "%OM='31'\t" + "%S='30.123'\t" + "%OS='30.123'\t" +# if defined(__APPLE__) + "%p='PM'\t" +# else + "%p='午後'\t" +# endif + "%R='23:31'\t" + "%T='23:31:30.123'\t" +# if defined(__APPLE__) || defined(__FreeBSD__) +# if defined(__APPLE__) + "%r='11:31:30 PM'\t" +# else + "%r='11:31:30 午後'\t" +# endif + "%X='23時31分30秒'\t" + "%EX='23時31分30秒'\t" +# elif defined(_WIN32) + "%r='23:31:30'\t" + "%X='23:31:30'\t" + "%EX='23:31:30'\t" +# else + "%r='午後11:31:30'\t" + "%X='23:31:30'\t" + "%EX='23:31:30'\t" +# endif + "\n"), + lfmt, + std::chrono::hh_mm_ss(23h + 31min + 30s + 123ms)); +#else // defined(__APPLE__) || defined(_AIX) || defined(_WIN32) || defined(__FreeBSD__) + check(loc, + SV("%H='00'\t" + "%OH='〇'\t" + "%I='12'\t" + "%OI='十二'\t" + "%M='00'\t" + "%OM='〇'\t" + "%S='00'\t" + "%OS='〇'\t" + "%p='午前'\t" + "%R='00:00'\t" + "%T='00:00:00'\t" + "%r='午前12時00分00秒'\t" + "%X='00時00分00秒'\t" + "%EX='00時00分00秒'\t" + "\n"), + lfmt, + std::chrono::zoned_time(std::chrono::sys_seconds{0s})); // 00:00:00 UTC Thursday, 1 January 1970 + + check(loc, + SV("%H='23'\t" + "%OH='二十三'\t" + "%I='11'\t" + "%OI='十一'\t" + "%M='31'\t" + "%OM='三十一'\t" + "%S='30.123'\t" + "%OS='三十.123'\t" + "%p='午後'\t" + "%R='23:31'\t" + "%T='23:31:30.123'\t" + "%r='午後11時31分30秒'\t" + "%X='23時31分30秒'\t" + "%EX='23時31分30秒'\t" + "\n"), + lfmt, + std::chrono::sys_time<std::chrono::milliseconds>( + 1'234'567'890'123ms)); // 23:31:30 UTC on Friday, 13 February 2009 +#endif // defined(__APPLE__) || defined(_AIX) || defined(_WIN32) || defined(__FreeBSD__) + + std::locale::global(std::locale::classic()); +} + +template <class CharT> +static void test_valid_values_date_time() { + using namespace std::literals::chrono_literals; + + constexpr std::basic_string_view<CharT> fmt = SV("{:%%c='%c'%t%%Ec='%Ec'%n}"); + constexpr std::basic_string_view<CharT> lfmt = SV("{:L%%c='%c'%t%%Ec='%Ec'%n}"); + + const std::locale loc(LOCALE_ja_JP_UTF_8); + std::locale::global(std::locale(LOCALE_fr_FR_UTF_8)); + + // Non localized output using C-locale + check(SV("%c='Thu Jan 1 00:00:00 1970'\t%Ec='Thu Jan 1 00:00:00 1970'\n"), + fmt, + std::chrono::zoned_time(std::chrono::sys_seconds{0s})); // 00:00:00 UTC Thursday, 1 January 1970 + + check(SV("%c='Fri Feb 13 23:31:30 2009'\t%Ec='Fri Feb 13 23:31:30 2009'\n"), + fmt, + std::chrono::zoned_time(std::chrono::sys_seconds{1'234'567'890s})); // 23:31:30 UTC on Friday, 13 February 2009 + + // Use the global locale (fr_FR) + check( +// https://sourceware.org/bugzilla/show_bug.cgi?id=24054 +#if defined(__powerpc__) && defined(__linux__) + SV("%c='jeu. 01 janv. 1970 00:00:00 UTC'\t%Ec='jeu. 01 janv. 1970 00:00:00 UTC'\n"), +#elif defined(__GLIBC__) && __GLIBC__ <= 2 && __GLIBC_MINOR__ < 29 + SV("%c='jeu. 01 janv. 1970 00:00:00 GMT'\t%Ec='jeu. 01 janv. 1970 00:00:00 GMT'\n"), +#elif defined(_AIX) + SV("%c=' 1 janvier 1970 à 00:00:00 UTC'\t%Ec=' 1 janvier 1970 à 00:00:00 UTC'\n"), +#elif defined(__APPLE__) + SV("%c='Jeu 1 jan 00:00:00 1970'\t%Ec='Jeu 1 jan 00:00:00 1970'\n"), +#elif defined(_WIN32) + SV("%c='01/01/1970 00:00:00'\t%Ec='01/01/1970 00:00:00'\n"), +#elif defined(__FreeBSD__) + SV("%c='jeu. 1 janv. 00:00:00 1970'\t%Ec='jeu. 1 janv. 00:00:00 1970'\n"), +#else + SV("%c='jeu. 01 janv. 1970 00:00:00'\t%Ec='jeu. 01 janv. 1970 00:00:00'\n"), +#endif + lfmt, + std::chrono::zoned_time(std::chrono::sys_seconds{0s})); // 00:00:00 UTC Thursday, 1 January 1970 + + check( +// https://sourceware.org/bugzilla/show_bug.cgi?id=24054 +#if defined(__powerpc__) && defined(__linux__) + SV("%c='ven. 13 févr. 2009 23:31:30 UTC'\t%Ec='ven. 13 févr. 2009 23:31:30 UTC'\n"), +#elif defined(__GLIBC__) && __GLIBC__ <= 2 && __GLIBC_MINOR__ < 29 + SV("%c='ven. 13 févr. 2009 23:31:30 GMT'\t%Ec='ven. 13 févr. 2009 23:31:30 GMT'\n"), +#elif defined(_AIX) + SV("%c='13 février 2009 à 23:31:30 UTC'\t%Ec='13 février 2009 à 23:31:30 UTC'\n"), +#elif defined(__APPLE__) + SV("%c='Ven 13 fév 23:31:30 2009'\t%Ec='Ven 13 fév 23:31:30 2009'\n"), +#elif defined(_WIN32) + SV("%c='13/02/2009 23:31:30'\t%Ec='13/02/2009 23:31:30'\n"), +#elif defined(__FreeBSD__) + SV("%c='ven. 13 févr. 23:31:30 2009'\t%Ec='ven. 13 févr. 23:31:30 2009'\n"), +#else + SV("%c='ven. 13 févr. 2009 23:31:30'\t%Ec='ven. 13 févr. 2009 23:31:30'\n"), +#endif + lfmt, + std::chrono::zoned_time(std::chrono::sys_seconds{1'234'567'890s})); // 23:31:30 UTC on Friday, 13 February 2009 + + // Use supplied locale (ja_JP). This locale has a different alternate.a +#if defined(__APPLE__) || defined(__FreeBSD__) + check(loc, + SV("%c='木 1/ 1 00:00:00 1970'\t%Ec='木 1/ 1 00:00:00 1970'\n"), + lfmt, + std::chrono::zoned_time(std::chrono::sys_seconds{0s})); // 00:00:00 UTC Thursday, 1 January 1970 + check(loc, + SV("%c='金 2/13 23:31:30 2009'\t%Ec='金 2/13 23:31:30 2009'\n"), + lfmt, + std::chrono::zoned_time(std::chrono::sys_seconds{1'234'567'890s})); // 23:31:30 UTC on Friday, 13 February 2009 +#elif defined(_AIX) // defined(__APPLE__)|| defined(__FreeBSD__) + check(loc, + SV("%c='1970年01月 1日 00:00:00 UTC'\t%Ec='1970年01月 1日 00:00:00 UTC'\n"), + lfmt, + std::chrono::zoned_time(std::chrono::sys_seconds{0s})); // 00:00:00 UTC Thursday, 1 January 1970 + check(loc, + SV("%c='2009年02月13日 23:31:30 UTC'\t%Ec='2009年02月13日 23:31:30 UTC'\n"), + lfmt, + std::chrono::zoned_time(std::chrono::sys_seconds{1'234'567'890s})); // 23:31:30 UTC on Friday, 13 February 2009 +#elif defined(_WIN32) // defined(__APPLE__)|| defined(__FreeBSD__) + check(loc, + SV("%c='1970/01/01 0:00:00'\t%Ec='1970/01/01 0:00:00'\n"), + lfmt, + std::chrono::sys_seconds(0s)); // 00:00:00 UTC Thursday, 1 January 1970 + check(loc, + SV("%c='2009/02/13 23:31:30'\t%Ec='2009/02/13 23:31:30'\n"), + lfmt, + std::chrono::sys_seconds(1'234'567'890s)); // 23:31:30 UTC on Friday, 13 February 2009 +#else // defined(__APPLE__)|| defined(__FreeBSD__) + check(loc, + SV("%c='1970年01月01日 00時00分00秒'\t%Ec='昭和45年01月01日 00時00分00秒'\n"), + lfmt, + std::chrono::zoned_time(std::chrono::sys_seconds{0s})); // 00:00:00 UTC Thursday, 1 January 1970 + + check(loc, + SV("%c='2009年02月13日 23時31分30秒'\t%Ec='平成21年02月13日 23時31分30秒'\n"), + lfmt, + std::chrono::zoned_time(std::chrono::sys_seconds{1'234'567'890s})); // 23:31:30 UTC on Friday, 13 February 2009 +#endif // defined(__APPLE__)|| defined(__FreeBSD__) + + std::locale::global(std::locale::classic()); +} + +template <class CharT> +static void test_valid_values_time_zone() { + using namespace std::literals::chrono_literals; + + constexpr std::basic_string_view<CharT> fmt = SV("{:%%z='%z'%t%%Ez='%Ez'%t%%Oz='%Oz'%t%%Z='%Z'%n}"); + constexpr std::basic_string_view<CharT> lfmt = SV("{:L%%z='%z'%t%%Ez='%Ez'%t%%Oz='%Oz'%t%%Z='%Z'%n}"); + + const std::locale loc(LOCALE_ja_JP_UTF_8); + std::locale::global(std::locale(LOCALE_fr_FR_UTF_8)); + + // Non localized output using C-locale + check(SV("%z='+0000'\t%Ez='+00:00'\t%Oz='+00:00'\t%Z='UTC'\n"), + fmt, + std::chrono::zoned_time(std::chrono::sys_seconds{0s})); // 00:00:00 UTC Thursday, 1 January 1970 + + // Use the global locale (fr_FR) + check(SV("%z='+0000'\t%Ez='+00:00'\t%Oz='+00:00'\t%Z='UTC'\n"), + lfmt, + std::chrono::zoned_time(std::chrono::sys_seconds{0s})); // 00:00:00 UTC Thursday, 1 January 1970 + + // Use supplied locale (ja_JP). + check(loc, + SV("%z='+0000'\t%Ez='+00:00'\t%Oz='+00:00'\t%Z='UTC'\n"), + lfmt, + std::chrono::zoned_time(std::chrono::sys_seconds{0s})); // 00:00:00 UTC Thursday, 1 January 1970 + + std::locale::global(std::locale::classic()); +} + +template <class CharT> +static void test_valid_values() { + test_valid_values_year<CharT>(); + test_valid_values_month<CharT>(); + test_valid_values_day<CharT>(); + test_valid_values_weekday<CharT>(); + test_valid_values_day_of_year<CharT>(); + test_valid_values_week<CharT>(); + test_valid_values_iso_8601_week<CharT>(); + test_valid_values_date<CharT>(); + test_valid_values_time<CharT>(); + test_valid_values_date_time<CharT>(); + test_valid_values_time_zone<CharT>(); +} + +template <class CharT> +static void test() { + test_no_chrono_specs<CharT>(); + test_valid_values<CharT>(); + + check_invalid_types<CharT>( + {SV("a"), SV("A"), SV("b"), SV("B"), SV("c"), SV("C"), SV("d"), SV("D"), SV("e"), SV("F"), SV("g"), + SV("G"), SV("h"), SV("H"), SV("I"), SV("j"), SV("m"), SV("M"), SV("p"), SV("r"), SV("R"), SV("S"), + SV("T"), SV("u"), SV("U"), SV("V"), SV("w"), SV("W"), SV("x"), SV("X"), SV("y"), SV("Y"), SV("z"), + SV("Z"), SV("Ec"), SV("EC"), SV("Ex"), SV("EX"), SV("Ey"), SV("EY"), SV("Ez"), SV("Od"), SV("Oe"), SV("OH"), + SV("OI"), SV("Om"), SV("OM"), SV("OS"), SV("Ou"), SV("OU"), SV("OV"), SV("Ow"), SV("OW"), SV("Oy"), SV("Oz")}, + std::chrono::zoned_time{}); +} + +int main(int, char**) { + test<char>(); + +#ifndef TEST_HAS_NO_WIDE_CHARACTERS + test<wchar_t>(); +#endif + + return 0; +} diff --git a/libcxx/test/std/time/time.zone/time.zone.zonedtime/test_offset_time_zone.h b/libcxx/test/std/time/time.zone/time.zone.zonedtime/test_offset_time_zone.h index c137049..e9262c5 100644 --- a/libcxx/test/std/time/time.zone/time.zone.zonedtime/test_offset_time_zone.h +++ b/libcxx/test/std/time/time.zone/time.zone.zonedtime/test_offset_time_zone.h @@ -13,6 +13,7 @@ #include <cassert> #include <charconv> #include <chrono> +#include <format> #include <string_view> #include <type_traits> @@ -42,6 +43,8 @@ public: offset_time_zone* operator->() { return this; } + const offset_time_zone* operator->() const { return this; } + template <class Duration> std::chrono::sys_time<std::common_type_t<Duration, std::chrono::seconds>> to_sys(const std::chrono::local_time<Duration>& local) const { @@ -49,6 +52,22 @@ public: local.time_since_epoch() + offset_}; } + template <class Duration> + std::chrono::local_time<std::common_type_t<Duration, std::chrono::seconds>> + to_local(const std::chrono::sys_time<Duration>& sys) const { + return std::chrono::local_time<std::common_type_t<Duration, std::chrono::seconds>>{ + sys.time_since_epoch() - offset_}; + } + + template <class Duration> + std::chrono::sys_info get_info(const std::chrono::sys_time<Duration>&) const { + return {std::chrono::sys_seconds::min(), + std::chrono::sys_seconds::max(), + offset_, + std::chrono::minutes{0}, + std::format("{:+03d}s", offset_.count())}; + } + private: std::chrono::seconds offset_; }; diff --git a/libcxx/test/std/time/time.zone/time.zone.zonedtime/time.zone.zonedtime.nonmembers/ostream.pass.cpp b/libcxx/test/std/time/time.zone/time.zone.zonedtime/time.zone.zonedtime.nonmembers/ostream.pass.cpp new file mode 100644 index 0000000..06131d6 --- /dev/null +++ b/libcxx/test/std/time/time.zone/time.zone.zonedtime/time.zone.zonedtime.nonmembers/ostream.pass.cpp @@ -0,0 +1,351 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17 +// UNSUPPORTED: no-filesystem, no-localization, no-tzdb + +// TODO FMT This test should not require std::to_chars(floating-point) +// XFAIL: availability-fp_to_chars-missing + +// XFAIL: libcpp-has-no-experimental-tzdb + +// REQUIRES: locale.fr_FR.UTF-8 +// REQUIRES: locale.ja_JP.UTF-8 + +// <chrono> + +// template<class charT, class traits, class Duration, class TimeZonePtr> +// basic_ostream<charT, traits>& +// operator<<(basic_ostream<charT, traits>& os, +// const zoned_time<Duration, TimeZonePtr>& t); + +#include <chrono> +#include <cassert> +#include <sstream> + +#include "assert_macros.h" +#include "concat_macros.h" +#include "make_string.h" +#include "platform_support.h" // locale name macros +#include "test_macros.h" +#include "../test_offset_time_zone.h" + +#define SV(S) MAKE_STRING_VIEW(CharT, S) + +#define TEST_EQUAL(OUT, EXPECTED) \ + TEST_REQUIRE(OUT == EXPECTED, \ + TEST_WRITE_CONCATENATED( \ + "\nExpression ", #OUT, "\nExpected output ", EXPECTED, "\nActual output ", OUT, '\n')); + +template <class CharT, class Duration, class TimeZonePtr> +static std::basic_string<CharT> stream_c_locale(std::chrono::zoned_time<Duration, TimeZonePtr> time_point) { + std::basic_stringstream<CharT> sstr; + sstr << time_point; + return sstr.str(); +} + +template <class CharT, class Duration, class TimeZonePtr> +static std::basic_string<CharT> stream_fr_FR_locale(std::chrono::zoned_time<Duration, TimeZonePtr> time_point) { + std::basic_stringstream<CharT> sstr; + const std::locale locale(LOCALE_fr_FR_UTF_8); + sstr.imbue(locale); + sstr << time_point; + return sstr.str(); +} + +template <class CharT, class Duration, class TimeZonePtr> +static std::basic_string<CharT> stream_ja_JP_locale(std::chrono::zoned_time<Duration, TimeZonePtr> time_point) { + std::basic_stringstream<CharT> sstr; + const std::locale locale(LOCALE_ja_JP_UTF_8); + sstr.imbue(locale); + sstr << time_point; + return sstr.str(); +} + +template <class CharT> +static void test_c() { + using namespace std::literals::chrono_literals; + + { // Different durations + TEST_EQUAL(stream_c_locale<CharT>( + std::chrono::zoned_time("Etc/GMT-1", std::chrono::sys_time<std::chrono::nanoseconds>{42ns})), + SV("1970-01-01 01:00:00.000000042 +01")); + + TEST_EQUAL(stream_c_locale<CharT>( + std::chrono::zoned_time("Etc/GMT-1", std::chrono::sys_time<std::chrono::microseconds>{42us})), + SV("1970-01-01 01:00:00.000042 +01")); + + TEST_EQUAL(stream_c_locale<CharT>( + std::chrono::zoned_time("Etc/GMT-1", std::chrono::sys_time<std::chrono::milliseconds>{42ms})), + SV("1970-01-01 01:00:00.042 +01")); + + TEST_EQUAL( + stream_c_locale<CharT>(std::chrono::zoned_time("Etc/GMT-1", std::chrono::sys_time<std::chrono::seconds>{42s})), + SV("1970-01-01 01:00:42 +01")); + + TEST_EQUAL(stream_c_locale<CharT>(std::chrono::zoned_time( + "Etc/GMT-1", std::chrono::sys_time<std::chrono::days>{std::chrono::days{42}})), + SV("1970-02-12 01:00:00 +01")); + + TEST_EQUAL(stream_c_locale<CharT>(std::chrono::zoned_time( + "Etc/GMT-1", std::chrono::sys_time<std::chrono::weeks>{std::chrono::weeks{42}})), + SV("1970-10-22 01:00:00 +01")); + } + + { // Daylight saving time switches + // Pick an historic date where it's well known what the time zone rules were. + // This makes it unlikely updates to the database change these rules. + + // Z Europe/Berlin 0:53:28 - LMT 1893 Ap + // ... + // 1 DE CE%sT 1980 + // 1 E CE%sT + // + // ... + // R E 1979 1995 - S lastSu 1u 0 - + // R E 1981 ma - Mar lastSu 1u 1 S + + // Pick an historic date where it's well known what the time zone rules were. + // This makes it unlikely updates to the database change these rules. + + // Start of daylight saving time + TEST_EQUAL(stream_c_locale<CharT>(std::chrono::zoned_time( + "Europe/Berlin", std::chrono::sys_days{std::chrono::March / 30 / 1986} + 0h + 59min + 59s)), + SV("1986-03-30 01:59:59 CET")); + + TEST_EQUAL(stream_c_locale<CharT>(std::chrono::zoned_time( + "Europe/Berlin", std::chrono::sys_days{std::chrono::March / 30 / 1986} + 1h)), + SV("1986-03-30 03:00:00 CEST")); + + // End of daylight saving time + TEST_EQUAL(stream_c_locale<CharT>(std::chrono::zoned_time( + "Europe/Berlin", std::chrono::sys_days{std::chrono::September / 28 / 1986} + 0h + 59min + 59s)), + SV("1986-09-28 02:59:59 CEST")); + + TEST_EQUAL(stream_c_locale<CharT>(std::chrono::zoned_time( + "Europe/Berlin", std::chrono::sys_days{std::chrono::September / 28 / 1986} + 1h)), + SV("1986-09-28 02:00:00 CET")); + + TEST_EQUAL(stream_c_locale<CharT>(std::chrono::zoned_time( + "Europe/Berlin", std::chrono::sys_days{std::chrono::September / 28 / 1986} + 1h + 59min + 59s)), + SV("1986-09-28 02:59:59 CET")); + + TEST_EQUAL(stream_c_locale<CharT>(std::chrono::zoned_time( + "Europe/Berlin", std::chrono::sys_days{std::chrono::September / 28 / 1986} + 2h)), + SV("1986-09-28 03:00:00 CET")); + } + + { // offset pointer + TEST_EQUAL(stream_c_locale<CharT>(std::chrono::zoned_time( + offset_time_zone<offset_time_zone_flags::none>{}, std::chrono::sys_seconds{})), + SV("1970-01-01 00:00:00 +00s")); + + TEST_EQUAL(stream_c_locale<CharT>(std::chrono::zoned_time( + offset_time_zone<offset_time_zone_flags::none>{"42"}, std::chrono::sys_seconds{})), + SV("1969-12-31 23:59:18 +42s")); + + TEST_EQUAL(stream_c_locale<CharT>(std::chrono::zoned_time( + offset_time_zone<offset_time_zone_flags::none>{"-42"}, std::chrono::sys_seconds{})), + SV("1970-01-01 00:00:42 -42s")); + } +} + +template <class CharT> +static void test_fr_FR() { + using namespace std::literals::chrono_literals; + + { // Different durations + + TEST_EQUAL(stream_fr_FR_locale<CharT>( + std::chrono::zoned_time("Etc/GMT-1", std::chrono::sys_time<std::chrono::nanoseconds>{42ns})), + SV("1970-01-01 01:00:00,000000042 +01")); + + TEST_EQUAL(stream_fr_FR_locale<CharT>( + std::chrono::zoned_time("Etc/GMT-1", std::chrono::sys_time<std::chrono::microseconds>{42us})), + SV("1970-01-01 01:00:00,000042 +01")); + + TEST_EQUAL(stream_fr_FR_locale<CharT>( + std::chrono::zoned_time("Etc/GMT-1", std::chrono::sys_time<std::chrono::milliseconds>{42ms})), + SV("1970-01-01 01:00:00,042 +01")); + + TEST_EQUAL(stream_fr_FR_locale<CharT>( + std::chrono::zoned_time("Etc/GMT-1", std::chrono::sys_time<std::chrono::seconds>{42s})), + SV("1970-01-01 01:00:42 +01")); + + TEST_EQUAL(stream_fr_FR_locale<CharT>(std::chrono::zoned_time( + "Etc/GMT-1", std::chrono::sys_time<std::chrono::days>{std::chrono::days{42}})), + SV("1970-02-12 01:00:00 +01")); + + TEST_EQUAL(stream_fr_FR_locale<CharT>(std::chrono::zoned_time( + "Etc/GMT-1", std::chrono::sys_time<std::chrono::weeks>{std::chrono::weeks{42}})), + SV("1970-10-22 01:00:00 +01")); + } + + { // Daylight saving time switches + // Pick an historic date where it's well known what the time zone rules were. + // This makes it unlikely updates to the database change these rules. + + // Z Europe/Berlin 0:53:28 - LMT 1893 Ap + // ... + // 1 DE CE%sT 1980 + // 1 E CE%sT + // + // ... + // R E 1979 1995 - S lastSu 1u 0 - + // R E 1981 ma - Mar lastSu 1u 1 S + + // Pick an historic date where it's well known what the time zone rules were. + // This makes it unlikely updates to the database change these rules. + + // Start of daylight saving time + TEST_EQUAL(stream_fr_FR_locale<CharT>(std::chrono::zoned_time( + "Europe/Berlin", std::chrono::sys_days{std::chrono::March / 30 / 1986} + 0h + 59min + 59s)), + SV("1986-03-30 01:59:59 CET")); + + TEST_EQUAL(stream_fr_FR_locale<CharT>(std::chrono::zoned_time( + "Europe/Berlin", std::chrono::sys_days{std::chrono::March / 30 / 1986} + 1h)), + SV("1986-03-30 03:00:00 CEST")); + + // End of daylight saving time + TEST_EQUAL(stream_fr_FR_locale<CharT>(std::chrono::zoned_time( + "Europe/Berlin", std::chrono::sys_days{std::chrono::September / 28 / 1986} + 0h + 59min + 59s)), + SV("1986-09-28 02:59:59 CEST")); + + TEST_EQUAL(stream_fr_FR_locale<CharT>(std::chrono::zoned_time( + "Europe/Berlin", std::chrono::sys_days{std::chrono::September / 28 / 1986} + 1h)), + SV("1986-09-28 02:00:00 CET")); + + TEST_EQUAL(stream_fr_FR_locale<CharT>(std::chrono::zoned_time( + "Europe/Berlin", std::chrono::sys_days{std::chrono::September / 28 / 1986} + 1h + 59min + 59s)), + SV("1986-09-28 02:59:59 CET")); + + TEST_EQUAL(stream_fr_FR_locale<CharT>(std::chrono::zoned_time( + "Europe/Berlin", std::chrono::sys_days{std::chrono::September / 28 / 1986} + 2h)), + SV("1986-09-28 03:00:00 CET")); + } + + { // offset pointer + TEST_EQUAL(stream_fr_FR_locale<CharT>(std::chrono::zoned_time( + offset_time_zone<offset_time_zone_flags::none>{}, std::chrono::sys_seconds{})), + SV("1970-01-01 00:00:00 +00s")); + + TEST_EQUAL(stream_fr_FR_locale<CharT>(std::chrono::zoned_time( + offset_time_zone<offset_time_zone_flags::none>{"42"}, std::chrono::sys_seconds{})), + SV("1969-12-31 23:59:18 +42s")); + + TEST_EQUAL(stream_fr_FR_locale<CharT>(std::chrono::zoned_time( + offset_time_zone<offset_time_zone_flags::none>{"-42"}, std::chrono::sys_seconds{})), + SV("1970-01-01 00:00:42 -42s")); + } +} + +template <class CharT> +static void test_ja_JP() { + using namespace std::literals::chrono_literals; + + { // Different durations + + TEST_EQUAL(stream_ja_JP_locale<CharT>( + std::chrono::zoned_time("Etc/GMT-1", std::chrono::sys_time<std::chrono::nanoseconds>{42ns})), + SV("1970-01-01 01:00:00.000000042 +01")); + + TEST_EQUAL(stream_ja_JP_locale<CharT>( + std::chrono::zoned_time("Etc/GMT-1", std::chrono::sys_time<std::chrono::microseconds>{42us})), + SV("1970-01-01 01:00:00.000042 +01")); + + TEST_EQUAL(stream_ja_JP_locale<CharT>( + std::chrono::zoned_time("Etc/GMT-1", std::chrono::sys_time<std::chrono::milliseconds>{42ms})), + SV("1970-01-01 01:00:00.042 +01")); + + TEST_EQUAL(stream_ja_JP_locale<CharT>( + std::chrono::zoned_time("Etc/GMT-1", std::chrono::sys_time<std::chrono::seconds>{42s})), + SV("1970-01-01 01:00:42 +01")); + + TEST_EQUAL(stream_ja_JP_locale<CharT>(std::chrono::zoned_time( + "Etc/GMT-1", std::chrono::sys_time<std::chrono::days>{std::chrono::days{42}})), + SV("1970-02-12 01:00:00 +01")); + + TEST_EQUAL(stream_ja_JP_locale<CharT>(std::chrono::zoned_time( + "Etc/GMT-1", std::chrono::sys_time<std::chrono::weeks>{std::chrono::weeks{42}})), + SV("1970-10-22 01:00:00 +01")); + } + + { // Daylight saving time switches + // Pick an historic date where it's well known what the time zone rules were. + // This makes it unlikely updates to the database change these rules. + + // Z Europe/Berlin 0:53:28 - LMT 1893 Ap + // ... + // 1 DE CE%sT 1980 + // 1 E CE%sT + // + // ... + // R E 1979 1995 - S lastSu 1u 0 - + // R E 1981 ma - Mar lastSu 1u 1 S + + // Pick an historic date where it's well known what the time zone rules were. + // This makes it unlikely updates to the database change these rules. + + // Start of daylight saving time + TEST_EQUAL(stream_ja_JP_locale<CharT>(std::chrono::zoned_time( + "Europe/Berlin", std::chrono::sys_days{std::chrono::March / 30 / 1986} + 0h + 59min + 59s)), + SV("1986-03-30 01:59:59 CET")); + + TEST_EQUAL(stream_ja_JP_locale<CharT>(std::chrono::zoned_time( + "Europe/Berlin", std::chrono::sys_days{std::chrono::March / 30 / 1986} + 1h)), + SV("1986-03-30 03:00:00 CEST")); + + // End of daylight saving time + TEST_EQUAL(stream_ja_JP_locale<CharT>(std::chrono::zoned_time( + "Europe/Berlin", std::chrono::sys_days{std::chrono::September / 28 / 1986} + 0h + 59min + 59s)), + SV("1986-09-28 02:59:59 CEST")); + + TEST_EQUAL(stream_ja_JP_locale<CharT>(std::chrono::zoned_time( + "Europe/Berlin", std::chrono::sys_days{std::chrono::September / 28 / 1986} + 1h)), + SV("1986-09-28 02:00:00 CET")); + + TEST_EQUAL(stream_ja_JP_locale<CharT>(std::chrono::zoned_time( + "Europe/Berlin", std::chrono::sys_days{std::chrono::September / 28 / 1986} + 1h + 59min + 59s)), + SV("1986-09-28 02:59:59 CET")); + + TEST_EQUAL(stream_ja_JP_locale<CharT>(std::chrono::zoned_time( + "Europe/Berlin", std::chrono::sys_days{std::chrono::September / 28 / 1986} + 2h)), + SV("1986-09-28 03:00:00 CET")); + } + + { // offset pointer + TEST_EQUAL(stream_ja_JP_locale<CharT>(std::chrono::zoned_time( + offset_time_zone<offset_time_zone_flags::none>{}, std::chrono::sys_seconds{})), + SV("1970-01-01 00:00:00 +00s")); + + TEST_EQUAL(stream_ja_JP_locale<CharT>(std::chrono::zoned_time( + offset_time_zone<offset_time_zone_flags::none>{"42"}, std::chrono::sys_seconds{})), + SV("1969-12-31 23:59:18 +42s")); + + TEST_EQUAL(stream_ja_JP_locale<CharT>(std::chrono::zoned_time( + offset_time_zone<offset_time_zone_flags::none>{"-42"}, std::chrono::sys_seconds{})), + SV("1970-01-01 00:00:42 -42s")); + } +} + +template <class CharT> +static void test() { + test_c<CharT>(); + test_fr_FR<CharT>(); + test_ja_JP<CharT>(); +} + +int main(int, char**) { + test<char>(); + +#ifndef TEST_HAS_NO_WIDE_CHARACTERS + test<wchar_t>(); +#endif + + return 0; +} diff --git a/libcxx/utils/libcxx/test/modules.py b/libcxx/utils/libcxx/test/modules.py index aab7651..b7758dc 100644 --- a/libcxx/utils/libcxx/test/modules.py +++ b/libcxx/utils/libcxx/test/modules.py @@ -76,6 +76,13 @@ ExtraDeclarations = dict() # This declaration is in the ostream header. ExtraDeclarations["system_error"] = ["std::operator<<"] +# TODO MODULES avoid this work-around +# This is a work-around for the special math functions. They are declared in +# __math/special_functions.h. Adding this as an ExtraHeader works for the std +# module. However these functions are special; they are not available in the +# global namespace. +ExtraDeclarations["cmath"] = ["std::hermite", "std::hermitef", "std::hermitel"] + ### ExtraHeader # Adds extra headers file to scan diff --git a/lld/ELF/ScriptLexer.cpp b/lld/ELF/ScriptLexer.cpp index 14f39ed..c8c02ab 100644 --- a/lld/ELF/ScriptLexer.cpp +++ b/lld/ELF/ScriptLexer.cpp @@ -272,20 +272,10 @@ StringRef ScriptLexer::peek() { return tok; } -StringRef ScriptLexer::peek2() { - skip(); - StringRef tok = next(); - if (errorCount()) - return ""; - pos = pos - 2; - return tok; -} - bool ScriptLexer::consume(StringRef tok) { - if (peek() == tok) { - skip(); + if (next() == tok) return true; - } + --pos; return false; } diff --git a/lld/ELF/ScriptLexer.h b/lld/ELF/ScriptLexer.h index 7919e49..d539381 100644 --- a/lld/ELF/ScriptLexer.h +++ b/lld/ELF/ScriptLexer.h @@ -26,7 +26,6 @@ public: bool atEOF(); StringRef next(); StringRef peek(); - StringRef peek2(); void skip(); bool consume(StringRef tok); void expect(StringRef expect); diff --git a/lld/ELF/ScriptParser.cpp b/lld/ELF/ScriptParser.cpp index 47a94c2..49aa7e6 100644 --- a/lld/ELF/ScriptParser.cpp +++ b/lld/ELF/ScriptParser.cpp @@ -92,7 +92,7 @@ private: SymbolAssignment *readSymbolAssignment(StringRef name); ByteCommand *readByteCommand(StringRef tok); std::array<uint8_t, 4> readFill(); - bool readSectionDirective(OutputSection *cmd, StringRef tok1, StringRef tok2); + bool readSectionDirective(OutputSection *cmd, StringRef tok); void readSectionAddressType(OutputSection *cmd); OutputDesc *readOverlaySectionDescription(); OutputDesc *readOutputSectionDescription(StringRef outSec); @@ -873,16 +873,11 @@ constexpr std::pair<const char *, unsigned> typeMap[] = { // Tries to read the special directive for an output section definition which // can be one of following: "(NOLOAD)", "(COPY)", "(INFO)", "(OVERLAY)", and // "(TYPE=<value>)". -// Tok1 and Tok2 are next 2 tokens peeked. See comment for -// readSectionAddressType below. -bool ScriptParser::readSectionDirective(OutputSection *cmd, StringRef tok1, StringRef tok2) { - if (tok1 != "(") - return false; - if (tok2 != "NOLOAD" && tok2 != "COPY" && tok2 != "INFO" && - tok2 != "OVERLAY" && tok2 != "TYPE") +bool ScriptParser::readSectionDirective(OutputSection *cmd, StringRef tok) { + if (tok != "NOLOAD" && tok != "COPY" && tok != "INFO" && tok != "OVERLAY" && + tok != "TYPE") return false; - expect("("); if (consume("NOLOAD")) { cmd->type = SHT_NOBITS; cmd->typeIsSet = true; @@ -921,16 +916,23 @@ bool ScriptParser::readSectionDirective(OutputSection *cmd, StringRef tok1, Stri // https://sourceware.org/binutils/docs/ld/Output-Section-Address.html // https://sourceware.org/binutils/docs/ld/Output-Section-Type.html void ScriptParser::readSectionAddressType(OutputSection *cmd) { - // Temporarily set inExpr to support TYPE=<value> without spaces. - bool saved = std::exchange(inExpr, true); - bool isDirective = readSectionDirective(cmd, peek(), peek2()); - inExpr = saved; - if (isDirective) - return; + if (consume("(")) { + // Temporarily set inExpr to support TYPE=<value> without spaces. + SaveAndRestore saved(inExpr, true); + if (readSectionDirective(cmd, peek())) + return; + cmd->addrExpr = readExpr(); + expect(")"); + } else { + cmd->addrExpr = readExpr(); + } - cmd->addrExpr = readExpr(); - if (peek() == "(" && !readSectionDirective(cmd, "(", peek2())) - setError("unknown section directive: " + peek2()); + if (consume("(")) { + SaveAndRestore saved(inExpr, true); + StringRef tok = peek(); + if (!readSectionDirective(cmd, tok)) + setError("unknown section directive: " + tok); + } } static Expr checkAlignment(Expr e, std::string &loc) { @@ -1180,10 +1182,8 @@ SymbolAssignment *ScriptParser::readSymbolAssignment(StringRef name) { Expr ScriptParser::readExpr() { // Our lexer is context-aware. Set the in-expression bit so that // they apply different tokenization rules. - bool orig = inExpr; - inExpr = true; + SaveAndRestore saved(inExpr, true); Expr e = readExpr1(readPrimary(), 0); - inExpr = orig; return e; } @@ -1249,9 +1249,9 @@ Expr ScriptParser::readExpr1(Expr lhs, int minPrec) { StringRef op1 = peek(); if (precedence(op1) < minPrec) break; - if (consume("?")) - return readTernary(lhs); skip(); + if (op1 == "?") + return readTernary(lhs); Expr rhs = readPrimary(); // Evaluate the remaining part of the expression first if the diff --git a/lld/MachO/ObjC.cpp b/lld/MachO/ObjC.cpp index 4a6f996..9c056f4 100644 --- a/lld/MachO/ObjC.cpp +++ b/lld/MachO/ObjC.cpp @@ -1303,12 +1303,16 @@ void ObjcCategoryMerger::eraseMergedCategories() { continue; eraseISec(catInfo.catBodyIsec); - // We can't erase 'catLayout.nameOffset' for Swift categories because the - // name will be referenced for generating relative offsets - // See usages of 'l_.str.11.SimpleClass' in objc-category-merging-swift.s + + // We can't erase 'catLayout.nameOffset' for either Swift or ObjC + // categories because the name will sometimes also be used for other + // purposes. + // For Swift, see usages of 'l_.str.11.SimpleClass' in + // objc-category-merging-swift.s + // For ObjC, see usages of 'l_OBJC_CLASS_NAME_.1' in + // objc-category-merging-erase-objc-name-test.s // TODO: handle the above in a smarter way - if (catInfo.sourceLanguage != SourceLanguage::Swift) - tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec, catLayout.nameOffset); + tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec, catLayout.instanceMethodsOffset); tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec, diff --git a/lld/test/ELF/defsym.s b/lld/test/ELF/defsym.s index 0168ce8..fed937f 100644 --- a/lld/test/ELF/defsym.s +++ b/lld/test/ELF/defsym.s @@ -11,8 +11,7 @@ ## Check we are reporting the error correctly and don't crash ## when handling the second --defsym. -# RUN: not ld.lld -o /dev/null %t.o --defsym ERR+ \ -# --defsym foo2=foo1 2>&1 | FileCheck %s --check-prefix=ERR +# RUN: not ld.lld -o /dev/null %t.o --defsym ERR+ --defsym foo2=foo1 2>&1 | FileCheck %s --check-prefix=ERR # ERR: error: --defsym: syntax error: ERR+ # CHECK-DAG: 0000000000000123 0 NOTYPE GLOBAL DEFAULT ABS foo1 @@ -27,7 +26,7 @@ # RUN: ld.lld -o %t %t.o --defsym=foo2=1 # RUN: llvm-readelf -s %t | FileCheck %s --check-prefix=ABS -# ABS: 0000000000000123 0 NOTYPE GLOBAL DEFAULT ABS foo2 +# ABS: 0000000000000001 0 NOTYPE GLOBAL DEFAULT ABS foo2 # RUN: ld.lld -o %t %t.o --defsym=foo2=foo1+5 # RUN: llvm-readelf -s %t | FileCheck %s --check-prefix=EXPR diff --git a/lld/test/ELF/linkerscript/custom-section-type.s b/lld/test/ELF/linkerscript/custom-section-type.s index 8ca0a4d..2add3a5 100644 --- a/lld/test/ELF/linkerscript/custom-section-type.s +++ b/lld/test/ELF/linkerscript/custom-section-type.s @@ -67,7 +67,7 @@ SECTIONS { nobits ( TYPE=SHT_NOBITS) : { BYTE(8) } init_array (TYPE=SHT_INIT_ARRAY ) : { QUAD(myinit) } fini_array (TYPE=SHT_FINI_ARRAY) : { QUAD(15) } - preinit_array (TYPE=SHT_PREINIT_ARRAY) : { QUAD(16) } + preinit_array . (TYPE=SHT_PREINIT_ARRAY) : { QUAD(16) } group (TYPE=17) : { LONG(17) } expr (TYPE=0x41+1) : { BYTE(0x42) *(expr) } } diff --git a/lld/test/MachO/objc-category-merging-erase-objc-name-test.s b/lld/test/MachO/objc-category-merging-erase-objc-name-test.s new file mode 100644 index 0000000..01c5c4f --- /dev/null +++ b/lld/test/MachO/objc-category-merging-erase-objc-name-test.s @@ -0,0 +1,306 @@ +; REQUIRES: aarch64 + +; Here we test that if we defined a protocol MyTestProtocol and also a category MyTestProtocol +; then when merging the category into the base class (and deleting the category), we don't +; delete the 'MyTestProtocol' name + +; RUN: llvm-mc -filetype=obj -triple=arm64-apple-macos -o erase-objc-name.o %s +; RUN: %lld -arch arm64 -dylib -o erase-objc-name.dylib erase-objc-name.o -objc_category_merging +; RUN: llvm-objdump --objc-meta-data --macho erase-objc-name.dylib | FileCheck %s --check-prefixes=MERGE_CATS + +; === Check merge categories enabled === +; Check that the original categories are not there +; MERGE_CATS-NOT: __OBJC_$_CATEGORY_MyBaseClass_$_Category01 +; MERGE_CATS-NOT: __OBJC_$_CATEGORY_MyBaseClass_$_Category02 + +; Check that we get the expected output - most importantly that the protocol is named `MyTestProtocol` +; MERGE_CATS: Contents of (__DATA_CONST,__objc_classlist) section +; MERGE_CATS-NEXT: _OBJC_CLASS_$_MyBaseClass +; MERGE_CATS-NEXT: isa {{.*}} _OBJC_METACLASS_$_MyBaseClass +; MERGE_CATS-NEXT: superclass {{.*}} +; MERGE_CATS-NEXT: cache {{.*}} +; MERGE_CATS-NEXT: vtable {{.*}} +; MERGE_CATS-NEXT: data {{.*}} (struct class_ro_t *) +; MERGE_CATS-NEXT: flags {{.*}} RO_ROOT +; MERGE_CATS-NEXT: instanceStart 0 +; MERGE_CATS-NEXT: instanceSize 0 +; MERGE_CATS-NEXT: reserved {{.*}} +; MERGE_CATS-NEXT: ivarLayout {{.*}} +; MERGE_CATS-NEXT: name {{.*}} MyBaseClass +; MERGE_CATS-NEXT: baseMethods {{.*}} (struct method_list_t *) +; MERGE_CATS-NEXT: entsize 24 +; MERGE_CATS-NEXT: count 2 +; MERGE_CATS-NEXT: name {{.*}} getValue +; MERGE_CATS-NEXT: types {{.*}} i16@0:8 +; MERGE_CATS-NEXT: imp -[MyBaseClass(MyTestProtocol) getValue] +; MERGE_CATS-NEXT: name {{.*}} baseInstanceMethod +; MERGE_CATS-NEXT: types {{.*}} v16@0:8 +; MERGE_CATS-NEXT: imp -[MyBaseClass baseInstanceMethod] +; MERGE_CATS-NEXT: baseProtocols {{.*}} +; MERGE_CATS-NEXT: count 1 +; MERGE_CATS-NEXT: list[0] {{.*}} (struct protocol_t *) +; MERGE_CATS-NEXT: isa {{.*}} +; MERGE_CATS-NEXT: name {{.*}} MyTestProtocol +; MERGE_CATS-NEXT: protocols {{.*}} +; MERGE_CATS-NEXT: instanceMethods {{.*}} (struct method_list_t *) +; MERGE_CATS-NEXT: entsize 24 +; MERGE_CATS-NEXT: count 1 +; MERGE_CATS-NEXT: name {{.*}} getValue +; MERGE_CATS-NEXT: types {{.*}} i16@0:8 +; MERGE_CATS-NEXT: imp {{.*}} +; MERGE_CATS-NEXT: classMethods {{.*}} (struct method_list_t *) +; MERGE_CATS-NEXT: optionalInstanceMethods {{.*}} +; MERGE_CATS-NEXT: optionalClassMethods {{.*}} +; MERGE_CATS-NEXT: instanceProperties {{.*}} +; MERGE_CATS-NEXT: ivars {{.*}} +; MERGE_CATS-NEXT: weakIvarLayout {{.*}} +; MERGE_CATS-NEXT: baseProperties {{.*}} +; MERGE_CATS-NEXT: Meta Class +; MERGE_CATS-NEXT: isa {{.*}} _OBJC_METACLASS_$_MyBaseClass +; MERGE_CATS-NEXT: superclass {{.*}} _OBJC_CLASS_$_MyBaseClass +; MERGE_CATS-NEXT: cache {{.*}} +; MERGE_CATS-NEXT: vtable {{.*}} +; MERGE_CATS-NEXT: data {{.*}} (struct class_ro_t *) +; MERGE_CATS-NEXT: flags {{.*}} RO_META RO_ROOT +; MERGE_CATS-NEXT: instanceStart 40 +; MERGE_CATS-NEXT: instanceSize 40 +; MERGE_CATS-NEXT: reserved {{.*}} +; MERGE_CATS-NEXT: ivarLayout {{.*}} +; MERGE_CATS-NEXT: name {{.*}} MyBaseClass +; MERGE_CATS-NEXT: baseMethods {{.*}} (struct method_list_t *) +; MERGE_CATS-NEXT: baseProtocols {{.*}} +; MERGE_CATS-NEXT: count 1 +; MERGE_CATS-NEXT: list[0] {{.*}} (struct protocol_t *) +; MERGE_CATS-NEXT: isa {{.*}} +; MERGE_CATS-NEXT: name {{.*}} MyTestProtocol +; MERGE_CATS-NEXT: protocols {{.*}} +; MERGE_CATS-NEXT: instanceMethods {{.*}} (struct method_list_t *) +; MERGE_CATS-NEXT: entsize 24 +; MERGE_CATS-NEXT: count 1 +; MERGE_CATS-NEXT: name {{.*}} getValue +; MERGE_CATS-NEXT: types {{.*}} i16@0:8 +; MERGE_CATS-NEXT: imp {{.*}} +; MERGE_CATS-NEXT: classMethods {{.*}} (struct method_list_t *) +; MERGE_CATS-NEXT: optionalInstanceMethods {{.*}} +; MERGE_CATS-NEXT: optionalClassMethods {{.*}} +; MERGE_CATS-NEXT: instanceProperties {{.*}} +; MERGE_CATS-NEXT: ivars {{.*}} +; MERGE_CATS-NEXT: weakIvarLayout {{.*}} +; MERGE_CATS-NEXT: baseProperties {{.*}} +; MERGE_CATS-NEXT: Contents of (__DATA_CONST,__objc_protolist) section +; MERGE_CATS-NEXT: {{.*}} {{.*}} __OBJC_PROTOCOL_$_MyTestProtocol +; MERGE_CATS-NEXT: Contents of (__DATA_CONST,__objc_imageinfo) section +; MERGE_CATS-NEXT: version 0 +; MERGE_CATS-NEXT: flags {{.*}} OBJC_IMAGE_HAS_CATEGORY_CLASS_PROPERTIES + + +; ================== repro.sh ==================== +; # Write the Objective-C code to a file +; cat << EOF > MyClass.m +; @protocol MyTestProtocol +; - (int)getValue; +; @end +; +; __attribute__((objc_root_class)) +; @interface MyBaseClass +; - (void)baseInstanceMethod; +; @end +; +; @implementation MyBaseClass +; - (void)baseInstanceMethod {} +; @end +; +; @interface MyBaseClass (MyTestProtocol) <MyTestProtocol> +; @end +; +; @implementation MyBaseClass (MyTestProtocol) +; +; - (int)getValue { +; return 0x30; +; } +; +; @end +; EOF +; +; # Compile the Objective-C file to assembly +; xcrun clang -S -arch arm64 MyClass.m -o MyClass.s +; ============================================== + + + .section __TEXT,__text,regular,pure_instructions + .p2align 2 ; -- Begin function -[MyBaseClass baseInstanceMethod] +"-[MyBaseClass baseInstanceMethod]": ; @"\01-[MyBaseClass baseInstanceMethod]" + .cfi_startproc +; %bb.0: + sub sp, sp, #16 + .cfi_def_cfa_offset 16 + str x0, [sp, #8] + str x1, [sp] + add sp, sp, #16 + ret + .cfi_endproc + ; -- End function + .p2align 2 ; -- Begin function -[MyBaseClass(MyTestProtocol) getValue] +"-[MyBaseClass(MyTestProtocol) getValue]": ; @"\01-[MyBaseClass(MyTestProtocol) getValue]" + .cfi_startproc +; %bb.0: + sub sp, sp, #16 + .cfi_def_cfa_offset 16 + str x0, [sp, #8] + str x1, [sp] + mov w0, #48 ; =0x30 + add sp, sp, #16 + ret + .cfi_endproc + ; -- End function + .section __DATA,__objc_data + .globl _OBJC_CLASS_$_MyBaseClass ; @"OBJC_CLASS_$_MyBaseClass" + .p2align 3, 0x0 +_OBJC_CLASS_$_MyBaseClass: + .quad _OBJC_METACLASS_$_MyBaseClass + .quad 0 + .quad __objc_empty_cache + .quad 0 + .quad __OBJC_CLASS_RO_$_MyBaseClass + .globl _OBJC_METACLASS_$_MyBaseClass ; @"OBJC_METACLASS_$_MyBaseClass" + .p2align 3, 0x0 +_OBJC_METACLASS_$_MyBaseClass: + .quad _OBJC_METACLASS_$_MyBaseClass + .quad _OBJC_CLASS_$_MyBaseClass + .quad __objc_empty_cache + .quad 0 + .quad __OBJC_METACLASS_RO_$_MyBaseClass + .section __TEXT,__objc_classname,cstring_literals +l_OBJC_CLASS_NAME_: ; @OBJC_CLASS_NAME_ + .asciz "MyBaseClass" + .section __DATA,__objc_const + .p2align 3, 0x0 ; @"_OBJC_METACLASS_RO_$_MyBaseClass" +__OBJC_METACLASS_RO_$_MyBaseClass: + .long 131 ; 0x83 + .long 40 ; 0x28 + .long 40 ; 0x28 + .space 4 + .quad 0 + .quad l_OBJC_CLASS_NAME_ + .quad 0 + .quad 0 + .quad 0 + .quad 0 + .quad 0 + .section __TEXT,__objc_methname,cstring_literals +l_OBJC_METH_VAR_NAME_: ; @OBJC_METH_VAR_NAME_ + .asciz "baseInstanceMethod" + .section __TEXT,__objc_methtype,cstring_literals +l_OBJC_METH_VAR_TYPE_: ; @OBJC_METH_VAR_TYPE_ + .asciz "v16@0:8" + .section __DATA,__objc_const + .p2align 3, 0x0 ; @"_OBJC_$_INSTANCE_METHODS_MyBaseClass" +__OBJC_$_INSTANCE_METHODS_MyBaseClass: + .long 24 ; 0x18 + .long 1 ; 0x1 + .quad l_OBJC_METH_VAR_NAME_ + .quad l_OBJC_METH_VAR_TYPE_ + .quad "-[MyBaseClass baseInstanceMethod]" + .p2align 3, 0x0 ; @"_OBJC_CLASS_RO_$_MyBaseClass" +__OBJC_CLASS_RO_$_MyBaseClass: + .long 130 ; 0x82 + .long 0 ; 0x0 + .long 0 ; 0x0 + .space 4 + .quad 0 + .quad l_OBJC_CLASS_NAME_ + .quad __OBJC_$_INSTANCE_METHODS_MyBaseClass + .quad 0 + .quad 0 + .quad 0 + .quad 0 + .section __TEXT,__objc_classname,cstring_literals +l_OBJC_CLASS_NAME_.1: ; @OBJC_CLASS_NAME_.1 + .asciz "MyTestProtocol" + .section __TEXT,__objc_methname,cstring_literals +l_OBJC_METH_VAR_NAME_.2: ; @OBJC_METH_VAR_NAME_.2 + .asciz "getValue" + .section __TEXT,__objc_methtype,cstring_literals +l_OBJC_METH_VAR_TYPE_.3: ; @OBJC_METH_VAR_TYPE_.3 + .asciz "i16@0:8" + .section __DATA,__objc_const + .p2align 3, 0x0 ; @"_OBJC_$_CATEGORY_INSTANCE_METHODS_MyBaseClass_$_MyTestProtocol" +__OBJC_$_CATEGORY_INSTANCE_METHODS_MyBaseClass_$_MyTestProtocol: + .long 24 ; 0x18 + .long 1 ; 0x1 + .quad l_OBJC_METH_VAR_NAME_.2 + .quad l_OBJC_METH_VAR_TYPE_.3 + .quad "-[MyBaseClass(MyTestProtocol) getValue]" + .p2align 3, 0x0 ; @"_OBJC_$_PROTOCOL_INSTANCE_METHODS_MyTestProtocol" +__OBJC_$_PROTOCOL_INSTANCE_METHODS_MyTestProtocol: + .long 24 ; 0x18 + .long 1 ; 0x1 + .quad l_OBJC_METH_VAR_NAME_.2 + .quad l_OBJC_METH_VAR_TYPE_.3 + .quad 0 + .p2align 3, 0x0 ; @"_OBJC_$_PROTOCOL_METHOD_TYPES_MyTestProtocol" +__OBJC_$_PROTOCOL_METHOD_TYPES_MyTestProtocol: + .quad l_OBJC_METH_VAR_TYPE_.3 + .private_extern __OBJC_PROTOCOL_$_MyTestProtocol ; @"_OBJC_PROTOCOL_$_MyTestProtocol" + .section __DATA,__data + .globl __OBJC_PROTOCOL_$_MyTestProtocol + .weak_definition __OBJC_PROTOCOL_$_MyTestProtocol + .p2align 3, 0x0 +__OBJC_PROTOCOL_$_MyTestProtocol: + .quad 0 + .quad l_OBJC_CLASS_NAME_.1 + .quad 0 + .quad __OBJC_$_PROTOCOL_INSTANCE_METHODS_MyTestProtocol + .quad 0 + .quad 0 + .quad 0 + .quad 0 + .long 96 ; 0x60 + .long 0 ; 0x0 + .quad __OBJC_$_PROTOCOL_METHOD_TYPES_MyTestProtocol + .quad 0 + .quad 0 + .private_extern __OBJC_LABEL_PROTOCOL_$_MyTestProtocol ; @"_OBJC_LABEL_PROTOCOL_$_MyTestProtocol" + .section __DATA,__objc_protolist,coalesced,no_dead_strip + .globl __OBJC_LABEL_PROTOCOL_$_MyTestProtocol + .weak_definition __OBJC_LABEL_PROTOCOL_$_MyTestProtocol + .p2align 3, 0x0 +__OBJC_LABEL_PROTOCOL_$_MyTestProtocol: + .quad __OBJC_PROTOCOL_$_MyTestProtocol + .section __DATA,__objc_const + .p2align 3, 0x0 ; @"_OBJC_CATEGORY_PROTOCOLS_$_MyBaseClass_$_MyTestProtocol" +__OBJC_CATEGORY_PROTOCOLS_$_MyBaseClass_$_MyTestProtocol: + .quad 1 ; 0x1 + .quad __OBJC_PROTOCOL_$_MyTestProtocol + .quad 0 + .p2align 3, 0x0 ; @"_OBJC_$_CATEGORY_MyBaseClass_$_MyTestProtocol" +__OBJC_$_CATEGORY_MyBaseClass_$_MyTestProtocol: + .quad l_OBJC_CLASS_NAME_.1 + .quad _OBJC_CLASS_$_MyBaseClass + .quad __OBJC_$_CATEGORY_INSTANCE_METHODS_MyBaseClass_$_MyTestProtocol + .quad 0 + .quad __OBJC_CATEGORY_PROTOCOLS_$_MyBaseClass_$_MyTestProtocol + .quad 0 + .quad 0 + .long 64 ; 0x40 + .space 4 + .section __DATA,__objc_classlist,regular,no_dead_strip + .p2align 3, 0x0 ; @"OBJC_LABEL_CLASS_$" +l_OBJC_LABEL_CLASS_$: + .quad _OBJC_CLASS_$_MyBaseClass + .section __DATA,__objc_catlist,regular,no_dead_strip + .p2align 3, 0x0 ; @"OBJC_LABEL_CATEGORY_$" +l_OBJC_LABEL_CATEGORY_$: + .quad __OBJC_$_CATEGORY_MyBaseClass_$_MyTestProtocol + .no_dead_strip __OBJC_PROTOCOL_$_MyTestProtocol + .no_dead_strip __OBJC_LABEL_PROTOCOL_$_MyTestProtocol + .section __DATA,__objc_imageinfo,regular,no_dead_strip +L_OBJC_IMAGE_INFO: + .long 0 + .long 64 + +__objc_empty_cache: +_$sBOWV: + .quad 0 + +.subsections_via_symbols diff --git a/lld/test/MachO/reproduce-thin-archive-objc.s b/lld/test/MachO/reproduce-thin-archive-objc.s index c5fe42f..8159f03 100644 --- a/lld/test/MachO/reproduce-thin-archive-objc.s +++ b/lld/test/MachO/reproduce-thin-archive-objc.s @@ -4,20 +4,19 @@ ## during linking. However, we need to iterate over all members for -ObjC, check that we don't ## crash when we encounter a missing member. -# RUN: rm -rf %t; mkdir %t -# RUN: sed s/SYM/_main/ %s | llvm-mc -filetype=obj -triple=x86_64-apple-macos -o %t/main.o -# RUN: sed s/SYM/_unused/ %s | llvm-mc -filetype=obj -triple=x86_64-apple-macos -o %t/unused.o +# RUN: rm -rf %t && mkdir %t && cd %t +# RUN: sed s/SYM/_main/ %s | llvm-mc -filetype=obj -triple=x86_64-apple-macos -o main.o +# RUN: sed s/SYM/_unused/ %s | llvm-mc -filetype=obj -triple=x86_64-apple-macos -o unused.o -# RUN: cd %t; llvm-ar rcsT unused.a unused.o; rm unused.o +# RUN: llvm-ar rcsT unused.a unused.o; rm unused.o ## FIXME: Absolute paths don't end up relativized in the repro file. # RUN: %no-fatal-warnings-lld %t/main.o %t/unused.a -ObjC -o /dev/null 2>&1 \ # RUN: | FileCheck %s --check-prefix=WARN -# RUN: %lld %t/main.o %t/unused.a -ObjC --no-warn-thin-archive-missing-members -o /dev/null \ -# RUN: | FileCheck %s --implicit-check-not 'warning' --allow-empty +# RUN: %lld main.o unused.a -ObjC --no-warn-thin-archive-missing-members 2>&1 | count 0 -# WARN: ld64.lld: warning: {{.*}}unused.a: -ObjC failed to open archive member: 'unused.o' +# WARN: warning: {{.*}}unused.a: -ObjC failed to open archive member: 'unused.o' .text .globl SYM diff --git a/lldb/test/API/functionalities/process_save_core/TestProcessSaveCore.py b/lldb/test/API/functionalities/process_save_core/TestProcessSaveCore.py index 07d06bd..8573d15 100644 --- a/lldb/test/API/functionalities/process_save_core/TestProcessSaveCore.py +++ b/lldb/test/API/functionalities/process_save_core/TestProcessSaveCore.py @@ -20,8 +20,8 @@ class ProcessSaveCoreTestCase(TestBase): target = self.dbg.CreateTarget(exe) process = target.LaunchSimple(None, None, self.get_process_working_directory()) self.assertNotEqual(process.GetState(), lldb.eStateStopped) - options = SBSaveCoreOptions() - options.SetOutputFile(SBFileSpec(core)) + options = lldb.SBSaveCoreOptions() + options.SetOutputFile(lldb.SBFileSpec(core)) error = process.SaveCore(core) self.assertTrue(error.Fail()) diff --git a/lldb/test/API/lit.cfg.py b/lldb/test/API/lit.cfg.py index 96520c7..dfeb765 100644 --- a/lldb/test/API/lit.cfg.py +++ b/lldb/test/API/lit.cfg.py @@ -265,6 +265,11 @@ if is_configured("lldb_libs_dir"): if is_configured("lldb_framework_dir"): dotest_cmd += ["--framework", config.lldb_framework_dir] +# Facebook T92898286 +if is_configured("llvm_test_bolt"): + dotest_cmd += ["-E", '"--post-link-optimize"'] +# End Facebook T92898286 + if ( "lldb-repro-capture" in config.available_features or "lldb-repro-replay" in config.available_features diff --git a/lldb/test/API/lit.site.cfg.py.in b/lldb/test/API/lit.site.cfg.py.in index 8b2d09a..602f457 100644 --- a/lldb/test/API/lit.site.cfg.py.in +++ b/lldb/test/API/lit.site.cfg.py.in @@ -1,5 +1,9 @@ @LIT_SITE_CFG_IN_HEADER@ +#Facebook T92898286 +import lit.util +#End Facebook T92898286 + config.llvm_src_root = "@LLVM_SOURCE_DIR@" config.llvm_obj_root = "@LLVM_BINARY_DIR@" config.llvm_tools_dir = lit_config.substitute("@LLVM_TOOLS_DIR@") @@ -39,6 +43,10 @@ config.libcxx_include_target_dir = "@LIBCXX_GENERATED_INCLUDE_TARGET_DIR@" config.lldb_module_cache = os.path.join("@LLDB_TEST_MODULE_CACHE_LLDB@", "lldb-api") config.clang_module_cache = os.path.join("@LLDB_TEST_MODULE_CACHE_CLANG@", "lldb-api") +# Facebook T92898286 +config.llvm_test_bolt = lit.util.pythonize_bool("@LLVM_TEST_BOLT@") +# End Facebook T92898286 + # Plugins lldb_build_intel_pt = '@LLDB_BUILD_INTEL_PT@' if lldb_build_intel_pt == '1': diff --git a/lldb/test/Shell/helper/toolchain.py b/lldb/test/Shell/helper/toolchain.py index 255955f..7b7be06 100644 --- a/lldb/test/Shell/helper/toolchain.py +++ b/lldb/test/Shell/helper/toolchain.py @@ -165,6 +165,11 @@ def use_support_substitutions(config): if config.cmake_sysroot: host_flags += ["--sysroot={}".format(config.cmake_sysroot)] + # Facebook T92898286 + if config.llvm_test_bolt: + host_flags += ["--post-link-optimize"] + # End Facebook T92898286 + host_flags = " ".join(host_flags) config.substitutions.append(("%clang_host", "%clang " + host_flags)) config.substitutions.append(("%clangxx_host", "%clangxx " + host_flags)) diff --git a/lldb/test/Shell/lit.site.cfg.py.in b/lldb/test/Shell/lit.site.cfg.py.in index b69e7bc..fe83237 100644 --- a/lldb/test/Shell/lit.site.cfg.py.in +++ b/lldb/test/Shell/lit.site.cfg.py.in @@ -1,5 +1,10 @@ @LIT_SITE_CFG_IN_HEADER@ +#Facebook T92898286 +import lit.util +#End Facebook T92898286 + + config.llvm_src_root = "@LLVM_SOURCE_DIR@" config.llvm_obj_root = "@LLVM_BINARY_DIR@" config.llvm_tools_dir = lit_config.substitute("@LLVM_TOOLS_DIR@") @@ -31,6 +36,10 @@ config.llvm_use_sanitizer = "@LLVM_USE_SANITIZER@" config.lldb_module_cache = os.path.join("@LLDB_TEST_MODULE_CACHE_LLDB@", "lldb-shell") config.clang_module_cache = os.path.join("@LLDB_TEST_MODULE_CACHE_CLANG@", "lldb-shell") +# Facebook T92898286 +config.llvm_test_bolt = lit.util.pythonize_bool("@LLVM_TEST_BOLT@") +# End Facebook T92898286 + import lit.llvm lit.llvm.initialize(lit_config, config) diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt index 1261896..a08b477 100644 --- a/llvm/CMakeLists.txt +++ b/llvm/CMakeLists.txt @@ -709,6 +709,10 @@ set(LLVM_LIB_FUZZING_ENGINE "" CACHE PATH option(LLVM_USE_SPLIT_DWARF "Use -gsplit-dwarf when compiling llvm and --gdb-index when linking." OFF) +# Facebook T92898286 +option(LLVM_TEST_BOLT "Enable BOLT testing in non-BOLT tests that use clang" OFF) +# End Facebook T92898286 + # Define an option controlling whether we should build for 32-bit on 64-bit # platforms, where supported. if( CMAKE_SIZEOF_VOID_P EQUAL 8 AND NOT (WIN32 OR ${CMAKE_SYSTEM_NAME} MATCHES "AIX")) diff --git a/llvm/docs/CommandGuide/lit.rst b/llvm/docs/CommandGuide/lit.rst index 799ee34e..c9d5bab 100644 --- a/llvm/docs/CommandGuide/lit.rst +++ b/llvm/docs/CommandGuide/lit.rst @@ -151,6 +151,10 @@ EXECUTION OPTIONS feature that can be used to conditionally disable (or expect failure in) certain tests. +.. option:: --skip-test-time-recording + + Disable tracking the wall time individual tests take to execute. + .. option:: --time-tests Track the wall time individual tests take to execute and includes the results diff --git a/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelect.h b/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelect.h index cada7f3..8017f09 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelect.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelect.h @@ -49,8 +49,8 @@ public: MachineFunctionProperties::Property::Selected); } - InstructionSelect(CodeGenOptLevel OL); - InstructionSelect(); + InstructionSelect(CodeGenOptLevel OL = CodeGenOptLevel::Default, + char &PassID = ID); bool runOnMachineFunction(MachineFunction &MF) override; diff --git a/llvm/include/llvm/CodeGen/RuntimeLibcallUtil.h b/llvm/include/llvm/CodeGen/RuntimeLibcallUtil.h index ce63dcc..7a13164 100644 --- a/llvm/include/llvm/CodeGen/RuntimeLibcallUtil.h +++ b/llvm/include/llvm/CodeGen/RuntimeLibcallUtil.h @@ -13,6 +13,7 @@ #ifndef LLVM_CODEGEN_RUNTIMELIBCALLS_H #define LLVM_CODEGEN_RUNTIMELIBCALLS_H +#include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/RuntimeLibcalls.h" #include "llvm/Support/AtomicOrdering.h" @@ -90,6 +91,9 @@ Libcall getMEMMOVE_ELEMENT_UNORDERED_ATOMIC(uint64_t ElementSize); /// UNKNOW_LIBCALL if there is none. Libcall getMEMSET_ELEMENT_UNORDERED_ATOMIC(uint64_t ElementSize); +/// Initialize the default condition code on the libcalls. +void initCmpLibcallCCs(ISD::CondCode *CmpLibcallCCs); + } // namespace RTLIB } // namespace llvm diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index d4a2166..9d9886f 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -3431,16 +3431,20 @@ public: /// Override the default CondCode to be used to test the result of the /// comparison libcall against zero. + /// FIXME: This can't be merged with 'RuntimeLibcallsInfo' because of the ISD. void setCmpLibcallCC(RTLIB::Libcall Call, ISD::CondCode CC) { - Libcalls.setCmpLibcallCC(Call, CC); + CmpLibcallCCs[Call] = CC; } + /// Get the CondCode that's to be used to test the result of the comparison /// libcall against zero. + /// FIXME: This can't be merged with 'RuntimeLibcallsInfo' because of the ISD. ISD::CondCode getCmpLibcallCC(RTLIB::Libcall Call) const { - return Libcalls.getCmpLibcallCC(Call); + return CmpLibcallCCs[Call]; } + /// Set the CallingConv that should be used for the specified libcall. void setLibcallCallingConv(RTLIB::Libcall Call, CallingConv::ID CC) { Libcalls.setLibcallCallingConv(Call, CC); @@ -3630,6 +3634,10 @@ private: /// The list of libcalls that the target will use. RTLIB::RuntimeLibcallsInfo Libcalls; + /// The ISD::CondCode that should be used to test the result of each of the + /// comparison libcall against zero. + ISD::CondCode CmpLibcallCCs[RTLIB::UNKNOWN_LIBCALL]; + /// The bits of IndexedModeActions used to store the legalisation actions /// We store the data as | ML | MS | L | S | each taking 4 bits. enum IndexedModeActionsBits { diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 71b1e832..ca85ff3 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -769,9 +769,10 @@ class AMDGPUDimProfileCopy<AMDGPUDimProfile base> : AMDGPUDimProfile<base.OpMod, class AMDGPUDimSampleProfile<string opmod, AMDGPUDimProps dim, - AMDGPUSampleVariant sample> : AMDGPUDimProfile<opmod, dim> { + AMDGPUSampleVariant sample, + bit has_return = true> : AMDGPUDimProfile<opmod, dim> { let IsSample = true; - let RetTypes = [llvm_any_ty]; + let RetTypes = !if(has_return, [llvm_any_ty], []); let ExtraAddrArgs = sample.ExtraAddrArgs; let Offset = sample.Offset; let Bias = sample.Bias; @@ -780,6 +781,12 @@ class AMDGPUDimSampleProfile<string opmod, let LodClampMip = sample.LodOrClamp; } +class AMDGPUDimSampleNoReturnProfile<string opmod, + AMDGPUDimProps dim, + AMDGPUSampleVariant sample> + : AMDGPUDimSampleProfile<opmod, dim, sample, false> { +} + class AMDGPUDimNoSampleProfile<string opmod, AMDGPUDimProps dim, list<LLVMType> retty, @@ -970,6 +977,21 @@ defset list<AMDGPUImageDimIntrinsic> AMDGPUImageDimIntrinsics = { AMDGPUImageDMaskIntrinsic; } + multiclass AMDGPUImageDimSampleNoReturnDims<string opmod, + AMDGPUSampleVariant sample> { + foreach dim = AMDGPUDims.NoMsaa in { + def !strconcat(NAME, "_", dim.Name, "_nortn") : AMDGPUImageDimIntrinsic< + AMDGPUDimSampleNoReturnProfile<opmod, dim, sample>, + [IntrWillReturn], [SDNPMemOperand]>; + } + } + foreach sample = AMDGPUSampleVariants in { + defm int_amdgcn_image_sample # sample.LowerCaseMod + : AMDGPUImageDimSampleNoReturnDims< + "SAMPLE" # sample.UpperCaseMod # "_nortn", sample>, + AMDGPUImageDMaskIntrinsic; + } + defm int_amdgcn_image_getlod : AMDGPUImageDimSampleDims<"GET_LOD", AMDGPUSample, 1>, AMDGPUImageDMaskIntrinsic; diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.h b/llvm/include/llvm/IR/RuntimeLibcalls.h index 3057bff..b3648f5 100644 --- a/llvm/include/llvm/IR/RuntimeLibcalls.h +++ b/llvm/include/llvm/IR/RuntimeLibcalls.h @@ -15,7 +15,6 @@ #define LLVM_IR_RUNTIME_LIBCALLS_H #include "llvm/ADT/ArrayRef.h" -#include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/IR/CallingConv.h" #include "llvm/Support/AtomicOrdering.h" #include "llvm/TargetParser/Triple.h" @@ -41,7 +40,6 @@ enum Libcall { struct RuntimeLibcallsInfo { explicit RuntimeLibcallsInfo(const Triple &TT) { initLibcalls(TT); - initCmpLibcallCCs(); } /// Rename the default libcall routine name for the specified libcall. @@ -59,18 +57,6 @@ struct RuntimeLibcallsInfo { return LibcallRoutineNames[Call]; } - /// Override the default CondCode to be used to test the result of the - /// comparison libcall against zero. - void setCmpLibcallCC(RTLIB::Libcall Call, ISD::CondCode CC) { - CmpLibcallCCs[Call] = CC; - } - - /// Get the CondCode that's to be used to test the result of the comparison - /// libcall against zero. - ISD::CondCode getCmpLibcallCC(RTLIB::Libcall Call) const { - return CmpLibcallCCs[Call]; - } - /// Set the CallingConv that should be used for the specified libcall. void setLibcallCallingConv(RTLIB::Libcall Call, CallingConv::ID CC) { LibcallCallingConvs[Call] = CC; @@ -90,10 +76,6 @@ private: /// Stores the name each libcall. const char *LibcallRoutineNames[RTLIB::UNKNOWN_LIBCALL + 1]; - /// The ISD::CondCode that should be used to test the result of each of the - /// comparison libcall against zero. - ISD::CondCode CmpLibcallCCs[RTLIB::UNKNOWN_LIBCALL]; - /// Stores the CallingConv that should be used for each libcall. CallingConv::ID LibcallCallingConvs[RTLIB::UNKNOWN_LIBCALL]; @@ -112,9 +94,6 @@ private: return true; } - /// Sets default libcall calling conventions. - void initCmpLibcallCCs(); - /// Set default libcall names. If a target wants to opt-out of a libcall it /// should be placed here. void initLibcalls(const Triple &TT); diff --git a/llvm/include/llvm/MC/TargetRegistry.h b/llvm/include/llvm/MC/TargetRegistry.h index 5038b87..14ceb76 100644 --- a/llvm/include/llvm/MC/TargetRegistry.h +++ b/llvm/include/llvm/MC/TargetRegistry.h @@ -23,6 +23,7 @@ #include "llvm/ADT/iterator_range.h" #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/Support/CodeGen.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" #include "llvm/TargetParser/Triple.h" @@ -107,10 +108,6 @@ MCStreamer *createWasmStreamer(MCContext &Ctx, std::unique_ptr<MCAsmBackend> &&TAB, std::unique_ptr<MCObjectWriter> &&OW, std::unique_ptr<MCCodeEmitter> &&CE); -MCStreamer *createXCOFFStreamer(MCContext &Ctx, - std::unique_ptr<MCAsmBackend> &&TAB, - std::unique_ptr<MCObjectWriter> &&OW, - std::unique_ptr<MCCodeEmitter> &&CE); MCStreamer *createSPIRVStreamer(MCContext &Ctx, std::unique_ptr<MCAsmBackend> &&TAB, std::unique_ptr<MCObjectWriter> &&OW, @@ -194,41 +191,19 @@ public: std::unique_ptr<MCAsmBackend> &&TAB, std::unique_ptr<MCObjectWriter> &&OW, std::unique_ptr<MCCodeEmitter> &&Emitter); - using GOFFStreamerCtorTy = - MCStreamer *(*)(MCContext &Ctx, std::unique_ptr<MCAsmBackend> &&TAB, - std::unique_ptr<MCObjectWriter> &&OW, - std::unique_ptr<MCCodeEmitter> &&Emitter); using MachOStreamerCtorTy = MCStreamer *(*)(MCContext &Ctx, std::unique_ptr<MCAsmBackend> &&TAB, std::unique_ptr<MCObjectWriter> &&OW, - std::unique_ptr<MCCodeEmitter> &&Emitter, - bool DWARFMustBeAtTheEnd); + std::unique_ptr<MCCodeEmitter> &&Emitter); using COFFStreamerCtorTy = MCStreamer *(*)(MCContext &Ctx, std::unique_ptr<MCAsmBackend> &&TAB, std::unique_ptr<MCObjectWriter> &&OW, - std::unique_ptr<MCCodeEmitter> &&Emitter, - bool IncrementalLinkerCompatible); - using WasmStreamerCtorTy = - MCStreamer *(*)(const Triple &T, MCContext &Ctx, - std::unique_ptr<MCAsmBackend> &&TAB, - std::unique_ptr<MCObjectWriter> &&OW, std::unique_ptr<MCCodeEmitter> &&Emitter); using XCOFFStreamerCtorTy = MCStreamer *(*)(const Triple &T, MCContext &Ctx, std::unique_ptr<MCAsmBackend> &&TAB, std::unique_ptr<MCObjectWriter> &&OW, std::unique_ptr<MCCodeEmitter> &&Emitter); - using SPIRVStreamerCtorTy = - MCStreamer *(*)(const Triple &T, MCContext &Ctx, - std::unique_ptr<MCAsmBackend> &&TAB, - std::unique_ptr<MCObjectWriter> &&OW, - std::unique_ptr<MCCodeEmitter> &&Emitter); - - using DXContainerStreamerCtorTy = - MCStreamer *(*)(const Triple &T, MCContext &Ctx, - std::unique_ptr<MCAsmBackend> &&TAB, - std::unique_ptr<MCObjectWriter> &&OW, - std::unique_ptr<MCCodeEmitter> &&Emitter); using NullTargetStreamerCtorTy = MCTargetStreamer *(*)(MCStreamer &S); using AsmTargetStreamerCtorTy = MCTargetStreamer *(*)( @@ -330,13 +305,9 @@ private: // Construction functions for the various object formats, if registered. COFFStreamerCtorTy COFFStreamerCtorFn = nullptr; - GOFFStreamerCtorTy GOFFStreamerCtorFn = nullptr; MachOStreamerCtorTy MachOStreamerCtorFn = nullptr; ELFStreamerCtorTy ELFStreamerCtorFn = nullptr; - WasmStreamerCtorTy WasmStreamerCtorFn = nullptr; XCOFFStreamerCtorTy XCOFFStreamerCtorFn = nullptr; - SPIRVStreamerCtorTy SPIRVStreamerCtorFn = nullptr; - DXContainerStreamerCtorTy DXContainerStreamerCtorFn = nullptr; /// Construction function for this target's null TargetStreamer, if /// registered (default = nullptr). @@ -555,100 +526,29 @@ public: /// \param TAB The target assembler backend object. Takes ownership. /// \param OW The stream object. /// \param Emitter The target independent assembler object.Takes ownership. - /// \param RelaxAll Relax all fixups? + MCStreamer *createMCObjectStreamer(const Triple &T, MCContext &Ctx, + std::unique_ptr<MCAsmBackend> TAB, + std::unique_ptr<MCObjectWriter> OW, + std::unique_ptr<MCCodeEmitter> Emitter, + const MCSubtargetInfo &STI) const; + LLVM_DEPRECATED("Use the overload without the 3 trailing bool", "") MCStreamer *createMCObjectStreamer(const Triple &T, MCContext &Ctx, std::unique_ptr<MCAsmBackend> &&TAB, std::unique_ptr<MCObjectWriter> &&OW, std::unique_ptr<MCCodeEmitter> &&Emitter, - const MCSubtargetInfo &STI, bool, - bool IncrementalLinkerCompatible, - bool DWARFMustBeAtTheEnd) const { - MCStreamer *S = nullptr; - switch (T.getObjectFormat()) { - case Triple::UnknownObjectFormat: - llvm_unreachable("Unknown object format"); - case Triple::COFF: - assert((T.isOSWindows() || T.isUEFI()) && - "only Windows and UEFI COFF are supported"); - S = COFFStreamerCtorFn(Ctx, std::move(TAB), std::move(OW), - std::move(Emitter), IncrementalLinkerCompatible); - break; - case Triple::MachO: - if (MachOStreamerCtorFn) - S = MachOStreamerCtorFn(Ctx, std::move(TAB), std::move(OW), - std::move(Emitter), DWARFMustBeAtTheEnd); - else - S = createMachOStreamer(Ctx, std::move(TAB), std::move(OW), - std::move(Emitter), DWARFMustBeAtTheEnd); - break; - case Triple::ELF: - if (ELFStreamerCtorFn) - S = ELFStreamerCtorFn(T, Ctx, std::move(TAB), std::move(OW), - std::move(Emitter)); - else - S = createELFStreamer(Ctx, std::move(TAB), std::move(OW), - std::move(Emitter)); - break; - case Triple::Wasm: - if (WasmStreamerCtorFn) - S = WasmStreamerCtorFn(T, Ctx, std::move(TAB), std::move(OW), - std::move(Emitter)); - else - S = createWasmStreamer(Ctx, std::move(TAB), std::move(OW), - std::move(Emitter)); - break; - case Triple::GOFF: - if (GOFFStreamerCtorFn) - S = GOFFStreamerCtorFn(Ctx, std::move(TAB), std::move(OW), - std::move(Emitter)); - else - S = createGOFFStreamer(Ctx, std::move(TAB), std::move(OW), - std::move(Emitter)); - break; - case Triple::XCOFF: - if (XCOFFStreamerCtorFn) - S = XCOFFStreamerCtorFn(T, Ctx, std::move(TAB), std::move(OW), - std::move(Emitter)); - else - S = createXCOFFStreamer(Ctx, std::move(TAB), std::move(OW), - std::move(Emitter)); - break; - case Triple::SPIRV: - if (SPIRVStreamerCtorFn) - S = SPIRVStreamerCtorFn(T, Ctx, std::move(TAB), std::move(OW), - std::move(Emitter)); - else - S = createSPIRVStreamer(Ctx, std::move(TAB), std::move(OW), - std::move(Emitter)); - break; - case Triple::DXContainer: - if (DXContainerStreamerCtorFn) - S = DXContainerStreamerCtorFn(T, Ctx, std::move(TAB), std::move(OW), - std::move(Emitter)); - else - S = createDXContainerStreamer(Ctx, std::move(TAB), std::move(OW), - std::move(Emitter)); - break; - } - if (ObjectTargetStreamerCtorFn) - ObjectTargetStreamerCtorFn(*S, STI); - return S; - } + const MCSubtargetInfo &STI, bool, bool, + bool) const; MCStreamer *createAsmStreamer(MCContext &Ctx, std::unique_ptr<formatted_raw_ostream> OS, - bool IsVerboseAsm, bool UseDwarfDirectory, - MCInstPrinter *InstPrint, - std::unique_ptr<MCCodeEmitter> &&CE, - std::unique_ptr<MCAsmBackend> &&TAB, - bool ShowInst) const { - formatted_raw_ostream &OSRef = *OS; - MCStreamer *S = llvm::createAsmStreamer( - Ctx, std::move(OS), IsVerboseAsm, UseDwarfDirectory, InstPrint, - std::move(CE), std::move(TAB), ShowInst); - createAsmTargetStreamer(*S, OSRef, InstPrint, IsVerboseAsm); - return S; - } + MCInstPrinter *IP, + std::unique_ptr<MCCodeEmitter> CE, + std::unique_ptr<MCAsmBackend> TAB) const; + MCStreamer * + createAsmStreamer(MCContext &Ctx, std::unique_ptr<formatted_raw_ostream> OS, + bool IsVerboseAsm, bool UseDwarfDirectory, + MCInstPrinter *IP, std::unique_ptr<MCCodeEmitter> &&CE, + std::unique_ptr<MCAsmBackend> &&TAB, bool ShowInst) const; MCTargetStreamer *createAsmTargetStreamer(MCStreamer &S, formatted_raw_ostream &OS, @@ -1011,10 +911,6 @@ struct TargetRegistry { T.COFFStreamerCtorFn = Fn; } - static void RegisterGOFFStreamer(Target &T, Target::GOFFStreamerCtorTy Fn) { - T.GOFFStreamerCtorFn = Fn; - } - static void RegisterMachOStreamer(Target &T, Target::MachOStreamerCtorTy Fn) { T.MachOStreamerCtorFn = Fn; } @@ -1023,18 +919,6 @@ struct TargetRegistry { T.ELFStreamerCtorFn = Fn; } - static void RegisterSPIRVStreamer(Target &T, Target::SPIRVStreamerCtorTy Fn) { - T.SPIRVStreamerCtorFn = Fn; - } - - static void RegisterDXContainerStreamer(Target &T, Target::DXContainerStreamerCtorTy Fn) { - T.DXContainerStreamerCtorFn = Fn; - } - - static void RegisterWasmStreamer(Target &T, Target::WasmStreamerCtorTy Fn) { - T.WasmStreamerCtorFn = Fn; - } - static void RegisterXCOFFStreamer(Target &T, Target::XCOFFStreamerCtorTy Fn) { T.XCOFFStreamerCtorFn = Fn; } diff --git a/llvm/include/llvm/SandboxIR/SandboxIR.h b/llvm/include/llvm/SandboxIR/SandboxIR.h index f168fdf..dfffe5c 100644 --- a/llvm/include/llvm/SandboxIR/SandboxIR.h +++ b/llvm/include/llvm/SandboxIR/SandboxIR.h @@ -76,6 +76,7 @@ class Context; class Function; class Instruction; class LoadInst; +class StoreInst; class User; class Value; @@ -172,10 +173,11 @@ protected: /// order. llvm::Value *Val = nullptr; - friend class Context; // For getting `Val`. - friend class User; // For getting `Val`. - friend class Use; // For getting `Val`. - friend class LoadInst; // For getting `Val`. + friend class Context; // For getting `Val`. + friend class User; // For getting `Val`. + friend class Use; // For getting `Val`. + friend class LoadInst; // For getting `Val`. + friend class StoreInst; // For getting `Val`. /// All values point to the context. Context &Ctx; @@ -495,7 +497,8 @@ protected: /// A SandboxIR Instruction may map to multiple LLVM IR Instruction. This /// returns its topmost LLVM IR instruction. llvm::Instruction *getTopmostLLVMInstruction() const; - friend class LoadInst; // For getTopmostLLVMInstruction(). + friend class LoadInst; // For getTopmostLLVMInstruction(). + friend class StoreInst; // For getTopmostLLVMInstruction(). /// \Returns the LLVM IR Instructions that this SandboxIR maps to in program /// order. @@ -599,6 +602,43 @@ public: #endif }; +class StoreInst final : public Instruction { + /// Use StoreInst::create(). + StoreInst(llvm::StoreInst *SI, Context &Ctx) + : Instruction(ClassID::Store, Opcode::Store, SI, Ctx) {} + friend Context; // for StoreInst() + Use getOperandUseInternal(unsigned OpIdx, bool Verify) const final { + return getOperandUseDefault(OpIdx, Verify); + } + SmallVector<llvm::Instruction *, 1> getLLVMInstrs() const final { + return {cast<llvm::Instruction>(Val)}; + } + +public: + unsigned getUseOperandNo(const Use &Use) const final { + return getUseOperandNoDefault(Use); + } + unsigned getNumOfIRInstrs() const final { return 1u; } + static StoreInst *create(Value *V, Value *Ptr, MaybeAlign Align, + Instruction *InsertBefore, Context &Ctx); + static StoreInst *create(Value *V, Value *Ptr, MaybeAlign Align, + BasicBlock *InsertAtEnd, Context &Ctx); + /// For isa/dyn_cast. + static bool classof(const Value *From); + Value *getValueOperand() const; + Value *getPointerOperand() const; + Align getAlign() const { return cast<llvm::StoreInst>(Val)->getAlign(); } + bool isSimple() const { return cast<llvm::StoreInst>(Val)->isSimple(); } + bool isUnordered() const { return cast<llvm::StoreInst>(Val)->isUnordered(); } +#ifndef NDEBUG + void verify() const final { + assert(isa<llvm::StoreInst>(Val) && "Expected StoreInst!"); + } + void dump(raw_ostream &OS) const override; + LLVM_DUMP_METHOD void dump() const override; +#endif +}; + /// An LLLVM Instruction that has no SandboxIR equivalent class gets mapped to /// an OpaqueInstr. class OpaqueInst : public sandboxir::Instruction { @@ -734,6 +774,8 @@ protected: LoadInst *createLoadInst(llvm::LoadInst *LI); friend LoadInst; // For createLoadInst() + StoreInst *createStoreInst(llvm::StoreInst *SI); + friend StoreInst; // For createStoreInst() public: Context(LLVMContext &LLVMCtx) diff --git a/llvm/include/llvm/SandboxIR/SandboxIRValues.def b/llvm/include/llvm/SandboxIR/SandboxIRValues.def index e1ed3cd..90365ca 100644 --- a/llvm/include/llvm/SandboxIR/SandboxIRValues.def +++ b/llvm/include/llvm/SandboxIR/SandboxIRValues.def @@ -26,6 +26,7 @@ DEF_USER(Constant, Constant) // ClassID, Opcode(s), Class DEF_INSTR(Opaque, OP(Opaque), OpaqueInst) DEF_INSTR(Load, OP(Load), LoadInst) +DEF_INSTR(Store, OP(Store), StoreInst) #ifdef DEF_VALUE #undef DEF_VALUE diff --git a/llvm/include/llvm/Support/TimeProfiler.h b/llvm/include/llvm/Support/TimeProfiler.h index 6eb9293..31f7df1 100644 --- a/llvm/include/llvm/Support/TimeProfiler.h +++ b/llvm/include/llvm/Support/TimeProfiler.h @@ -83,28 +83,16 @@ namespace llvm { class raw_pwrite_stream; -struct TimeTraceMetadata { - std::string Detail; - // Source file and line number information for the event. - std::string File; - int Line; - - bool isEmpty() const { return Detail.empty() && File.empty(); } -}; - struct TimeTraceProfiler; TimeTraceProfiler *getTimeTraceProfilerInstance(); -bool isTimeTraceVerbose(); - struct TimeTraceProfilerEntry; /// Initialize the time trace profiler. /// This sets up the global \p TimeTraceProfilerInstance /// variable to be the profiler instance. void timeTraceProfilerInitialize(unsigned TimeTraceGranularity, - StringRef ProcName, - bool TimeTraceVerbose = false); + StringRef ProcName); /// Cleanup the time trace profiler, if it was initialized. void timeTraceProfilerCleanup(); @@ -140,10 +128,6 @@ TimeTraceProfilerEntry * timeTraceProfilerBegin(StringRef Name, llvm::function_ref<std::string()> Detail); -TimeTraceProfilerEntry * -timeTraceProfilerBegin(StringRef Name, - llvm::function_ref<TimeTraceMetadata()> MetaData); - /// Manually begin a time section, with the given \p Name and \p Detail. /// This starts Async Events having \p Name as a category which is shown /// separately from other traces. See @@ -180,11 +164,6 @@ public: if (getTimeTraceProfilerInstance() != nullptr) Entry = timeTraceProfilerBegin(Name, Detail); } - TimeTraceScope(StringRef Name, - llvm::function_ref<TimeTraceMetadata()> Metadata) { - if (getTimeTraceProfilerInstance() != nullptr) - Entry = timeTraceProfilerBegin(Name, Metadata); - } ~TimeTraceScope() { if (getTimeTraceProfilerInstance() != nullptr) timeTraceProfilerEnd(Entry); diff --git a/llvm/include/llvm/TargetParser/X86TargetParser.def b/llvm/include/llvm/TargetParser/X86TargetParser.def index 0e4ad87..92798cb 100644 --- a/llvm/include/llvm/TargetParser/X86TargetParser.def +++ b/llvm/include/llvm/TargetParser/X86TargetParser.def @@ -175,8 +175,12 @@ X86_FEATURE_COMPAT(AVX512BF16, "avx512bf16", 34) X86_FEATURE_COMPAT(AVX512VP2INTERSECT, "avx512vp2intersect", 35) // Below Features has some missings comparing to gcc, it's because gcc has some // not one-to-one mapped in llvm. -X86_FEATURE_COMPAT(3DNOW, "3dnow", 0) -X86_FEATURE (3DNOWA, "3dnowa") + +// FIXME: dummy features were added to keep the numeric values of later features +// stable. Since the values need to be ABI stable, they should be changed to +// have explicitly assigned values, and then these dummy features removed. +X86_FEATURE (DUMMYFEATURE1, "__dummyfeature1") +X86_FEATURE (DUMMYFEATURE2, "__dummyfeature2") X86_FEATURE_COMPAT(ADX, "adx", 0) X86_FEATURE (64BIT, "64bit") X86_FEATURE_COMPAT(CLDEMOTE, "cldemote", 0) diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index b46a6d3..91b5703 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -1669,8 +1669,8 @@ void AsmPrinter::emitPCSections(const MachineFunction &MF) { } /// Returns true if function begin and end labels should be emitted. -static bool needFuncLabels(const MachineFunction &MF) { - MachineModuleInfo &MMI = MF.getMMI(); +static bool needFuncLabels(const MachineFunction &MF, + const MachineModuleInfo &MMI) { if (!MF.getLandingPads().empty() || MF.hasEHFunclets() || MMI.hasDebugInfo() || MF.getFunction().hasMetadata(LLVMContext::MD_pcsections)) @@ -1944,7 +1944,7 @@ void AsmPrinter::emitFunctionBody() { // are automatically sized. bool EmitFunctionSize = MAI->hasDotTypeDotSizeDirective() && !TT.isWasm(); - if (needFuncLabels(*MF) || EmitFunctionSize) { + if (needFuncLabels(*MF, *MMI) || EmitFunctionSize) { // Create a symbol for the end of function. CurrentFnEnd = createTempSymbol("func_end"); OutStreamer->emitLabel(CurrentFnEnd); @@ -2587,8 +2587,9 @@ void AsmPrinter::SetupMachineFunction(MachineFunction &MF) { bool NeedsLocalForSize = MAI->needsLocalForSize(); if (F.hasFnAttribute("patchable-function-entry") || F.hasFnAttribute("function-instrument") || - F.hasFnAttribute("xray-instruction-threshold") || needFuncLabels(MF) || - NeedsLocalForSize || MF.getTarget().Options.EmitStackSizeSection || + F.hasFnAttribute("xray-instruction-threshold") || + needFuncLabels(MF, *MMI) || NeedsLocalForSize || + MF.getTarget().Options.EmitStackSizeSection || MF.getTarget().Options.BBAddrMap || MF.hasBBLabels()) { CurrentFnBegin = createTempSymbol("func_begin"); if (NeedsLocalForSize) diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp index 49f3fc1..087ee02 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp @@ -90,7 +90,7 @@ void DwarfCFIException::beginFunction(const MachineFunction *MF) { shouldEmitLSDA = shouldEmitPersonality && LSDAEncoding != dwarf::DW_EH_PE_omit; - const MCAsmInfo &MAI = *MF->getMMI().getContext().getAsmInfo(); + const MCAsmInfo &MAI = *MF->getContext().getAsmInfo(); if (MAI.getExceptionHandlingType() != ExceptionHandling::None) shouldEmitCFI = MAI.usesCFIForEH() && (shouldEmitPersonality || shouldEmitMoves); diff --git a/llvm/lib/CodeGen/CFGuardLongjmp.cpp b/llvm/lib/CodeGen/CFGuardLongjmp.cpp index b5d88a7..04de011 100644 --- a/llvm/lib/CodeGen/CFGuardLongjmp.cpp +++ b/llvm/lib/CodeGen/CFGuardLongjmp.cpp @@ -62,7 +62,7 @@ FunctionPass *llvm::createCFGuardLongjmpPass() { return new CFGuardLongjmp(); } bool CFGuardLongjmp::runOnMachineFunction(MachineFunction &MF) { // Skip modules for which the cfguard flag is not set. - if (!MF.getMMI().getModule()->getModuleFlag("cfguard")) + if (!MF.getFunction().getParent()->getModuleFlag("cfguard")) return false; // Skip functions that do not have calls to _setjmp. diff --git a/llvm/lib/CodeGen/EHContGuardCatchret.cpp b/llvm/lib/CodeGen/EHContGuardCatchret.cpp index f7c6580..cd1cdb0 100644 --- a/llvm/lib/CodeGen/EHContGuardCatchret.cpp +++ b/llvm/lib/CodeGen/EHContGuardCatchret.cpp @@ -62,7 +62,7 @@ FunctionPass *llvm::createEHContGuardCatchretPass() { bool EHContGuardCatchret::runOnMachineFunction(MachineFunction &MF) { // Skip modules for which the ehcontguard flag is not set. - if (!MF.getMMI().getModule()->getModuleFlag("ehcontguard")) + if (!MF.getFunction().getParent()->getModuleFlag("ehcontguard")) return false; // Skip functions that do not have catchret diff --git a/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp b/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp index 4cb1d01f..9a27728 100644 --- a/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp +++ b/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp @@ -62,14 +62,8 @@ INITIALIZE_PASS_END(InstructionSelect, DEBUG_TYPE, "Select target instructions out of generic instructions", false, false) -InstructionSelect::InstructionSelect(CodeGenOptLevel OL) - : MachineFunctionPass(ID), OptLevel(OL) {} - -// In order not to crash when calling getAnalysis during testing with -run-pass -// we use the default opt level here instead of None, so that the addRequired() -// calls are made in getAnalysisUsage(). -InstructionSelect::InstructionSelect() - : MachineFunctionPass(ID), OptLevel(CodeGenOptLevel::Default) {} +InstructionSelect::InstructionSelect(CodeGenOptLevel OL, char &PassID) + : MachineFunctionPass(PassID), OptLevel(OL) {} void InstructionSelect::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<TargetPassConfig>(); diff --git a/llvm/lib/CodeGen/KCFI.cpp b/llvm/lib/CodeGen/KCFI.cpp index 91c6ac2..af19319 100644 --- a/llvm/lib/CodeGen/KCFI.cpp +++ b/llvm/lib/CodeGen/KCFI.cpp @@ -89,7 +89,7 @@ bool KCFI::emitCheck(MachineBasicBlock &MBB, } bool KCFI::runOnMachineFunction(MachineFunction &MF) { - const Module *M = MF.getMMI().getModule(); + const Module *M = MF.getFunction().getParent(); if (!M->getModuleFlag("kcfi")) return false; diff --git a/llvm/lib/CodeGen/LLVMTargetMachine.cpp b/llvm/lib/CodeGen/LLVMTargetMachine.cpp index 1d13173..819187c 100644 --- a/llvm/lib/CodeGen/LLVMTargetMachine.cpp +++ b/llvm/lib/CodeGen/LLVMTargetMachine.cpp @@ -167,25 +167,12 @@ Expected<std::unique_ptr<MCStreamer>> LLVMTargetMachine::createMCStreamer( if (Options.MCOptions.ShowMCEncoding) MCE.reset(getTarget().createMCCodeEmitter(MII, Context)); - bool UseDwarfDirectory = false; - switch (Options.MCOptions.MCUseDwarfDirectory) { - case MCTargetOptions::DisableDwarfDirectory: - UseDwarfDirectory = false; - break; - case MCTargetOptions::EnableDwarfDirectory: - UseDwarfDirectory = true; - break; - case MCTargetOptions::DefaultDwarfDirectory: - UseDwarfDirectory = MAI.enableDwarfFileDirectoryDefault(); - break; - } - std::unique_ptr<MCAsmBackend> MAB( getTarget().createMCAsmBackend(STI, MRI, Options.MCOptions)); auto FOut = std::make_unique<formatted_raw_ostream>(Out); MCStreamer *S = getTarget().createAsmStreamer( - Context, std::move(FOut), Options.MCOptions.AsmVerbose, - UseDwarfDirectory, InstPrinter, std::move(MCE), std::move(MAB), + Context, std::move(FOut), Options.MCOptions.AsmVerbose, true, + InstPrinter, std::move(MCE), std::move(MAB), Options.MCOptions.ShowMCInst); AsmStreamer.reset(S); break; @@ -208,9 +195,7 @@ Expected<std::unique_ptr<MCStreamer>> LLVMTargetMachine::createMCStreamer( T, Context, std::unique_ptr<MCAsmBackend>(MAB), DwoOut ? MAB->createDwoObjectWriter(Out, *DwoOut) : MAB->createObjectWriter(Out), - std::unique_ptr<MCCodeEmitter>(MCE), STI, Options.MCOptions.MCRelaxAll, - Options.MCOptions.MCIncrementalLinkerCompatible, - /*DWARFMustBeAtTheEnd*/ true)); + std::unique_ptr<MCCodeEmitter>(MCE), STI)); break; } case CodeGenFileType::Null: @@ -276,17 +261,15 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, MCContext *&Ctx, const MCRegisterInfo &MRI = *getMCRegisterInfo(); std::unique_ptr<MCCodeEmitter> MCE( getTarget().createMCCodeEmitter(*getMCInstrInfo(), *Ctx)); - std::unique_ptr<MCAsmBackend> MAB( - getTarget().createMCAsmBackend(STI, MRI, Options.MCOptions)); + MCAsmBackend *MAB = + getTarget().createMCAsmBackend(STI, MRI, Options.MCOptions); if (!MCE || !MAB) return true; const Triple &T = getTargetTriple(); std::unique_ptr<MCStreamer> AsmStreamer(getTarget().createMCObjectStreamer( - T, *Ctx, std::move(MAB), MAB->createObjectWriter(Out), std::move(MCE), - STI, Options.MCOptions.MCRelaxAll, - Options.MCOptions.MCIncrementalLinkerCompatible, - /*DWARFMustBeAtTheEnd*/ true)); + T, *Ctx, std::unique_ptr<MCAsmBackend>(MAB), MAB->createObjectWriter(Out), + std::move(MCE), STI)); // Create the AsmPrinter, which takes ownership of AsmStreamer if successful. FunctionPass *Printer = diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index 8040f1e..2be7fc9 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -574,6 +574,39 @@ RTLIB::Libcall RTLIB::getMEMSET_ELEMENT_UNORDERED_ATOMIC(uint64_t ElementSize) { } } +void RTLIB::initCmpLibcallCCs(ISD::CondCode *CmpLibcallCCs) { + std::fill(CmpLibcallCCs, CmpLibcallCCs + RTLIB::UNKNOWN_LIBCALL, + ISD::SETCC_INVALID); + CmpLibcallCCs[RTLIB::OEQ_F32] = ISD::SETEQ; + CmpLibcallCCs[RTLIB::OEQ_F64] = ISD::SETEQ; + CmpLibcallCCs[RTLIB::OEQ_F128] = ISD::SETEQ; + CmpLibcallCCs[RTLIB::OEQ_PPCF128] = ISD::SETEQ; + CmpLibcallCCs[RTLIB::UNE_F32] = ISD::SETNE; + CmpLibcallCCs[RTLIB::UNE_F64] = ISD::SETNE; + CmpLibcallCCs[RTLIB::UNE_F128] = ISD::SETNE; + CmpLibcallCCs[RTLIB::UNE_PPCF128] = ISD::SETNE; + CmpLibcallCCs[RTLIB::OGE_F32] = ISD::SETGE; + CmpLibcallCCs[RTLIB::OGE_F64] = ISD::SETGE; + CmpLibcallCCs[RTLIB::OGE_F128] = ISD::SETGE; + CmpLibcallCCs[RTLIB::OGE_PPCF128] = ISD::SETGE; + CmpLibcallCCs[RTLIB::OLT_F32] = ISD::SETLT; + CmpLibcallCCs[RTLIB::OLT_F64] = ISD::SETLT; + CmpLibcallCCs[RTLIB::OLT_F128] = ISD::SETLT; + CmpLibcallCCs[RTLIB::OLT_PPCF128] = ISD::SETLT; + CmpLibcallCCs[RTLIB::OLE_F32] = ISD::SETLE; + CmpLibcallCCs[RTLIB::OLE_F64] = ISD::SETLE; + CmpLibcallCCs[RTLIB::OLE_F128] = ISD::SETLE; + CmpLibcallCCs[RTLIB::OLE_PPCF128] = ISD::SETLE; + CmpLibcallCCs[RTLIB::OGT_F32] = ISD::SETGT; + CmpLibcallCCs[RTLIB::OGT_F64] = ISD::SETGT; + CmpLibcallCCs[RTLIB::OGT_F128] = ISD::SETGT; + CmpLibcallCCs[RTLIB::OGT_PPCF128] = ISD::SETGT; + CmpLibcallCCs[RTLIB::UO_F32] = ISD::SETNE; + CmpLibcallCCs[RTLIB::UO_F64] = ISD::SETNE; + CmpLibcallCCs[RTLIB::UO_F128] = ISD::SETNE; + CmpLibcallCCs[RTLIB::UO_PPCF128] = ISD::SETNE; +} + /// NOTE: The TargetMachine owns TLOF. TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm), Libcalls(TM.getTargetTriple()) { @@ -608,6 +641,8 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) MinCmpXchgSizeInBits = 0; SupportsUnalignedAtomics = false; + + RTLIB::initCmpLibcallCCs(CmpLibcallCCs); } void TargetLoweringBase::initActions() { diff --git a/llvm/lib/DWARFLinker/Classic/DWARFStreamer.cpp b/llvm/lib/DWARFLinker/Classic/DWARFStreamer.cpp index 74df2eb..45a62da 100644 --- a/llvm/lib/DWARFLinker/Classic/DWARFStreamer.cpp +++ b/llvm/lib/DWARFLinker/Classic/DWARFStreamer.cpp @@ -62,6 +62,8 @@ Error DwarfStreamer::init(Triple TheTriple, TripleName.c_str()); MCTargetOptions MCOptions = mc::InitMCTargetOptionsFromFlags(); + MCOptions.AsmVerbose = true; + MCOptions.MCUseDwarfDirectory = MCTargetOptions::EnableDwarfDirectory; MAI.reset(TheTarget->createMCAsmInfo(*MRI, TripleName, MCOptions)); if (!MAI) return createStringError(std::errc::invalid_argument, @@ -110,8 +112,7 @@ Error DwarfStreamer::init(Triple TheTriple, MS = TheTarget->createMCObjectStreamer( TheTriple, *MC, std::unique_ptr<MCAsmBackend>(MAB), MAB->createObjectWriter(OutFile), std::unique_ptr<MCCodeEmitter>(MCE), - *MSTI, MCOptions.MCRelaxAll, MCOptions.MCIncrementalLinkerCompatible, - /*DWARFMustBeAtTheEnd*/ false); + *MSTI); break; } } diff --git a/llvm/lib/DWARFLinker/Parallel/DWARFEmitterImpl.cpp b/llvm/lib/DWARFLinker/Parallel/DWARFEmitterImpl.cpp index b9edcb6..f790766 100644 --- a/llvm/lib/DWARFLinker/Parallel/DWARFEmitterImpl.cpp +++ b/llvm/lib/DWARFLinker/Parallel/DWARFEmitterImpl.cpp @@ -41,6 +41,8 @@ Error DwarfEmitterImpl::init(Triple TheTriple, TripleName.c_str()); MCTargetOptions MCOptions = mc::InitMCTargetOptionsFromFlags(); + MCOptions.AsmVerbose = true; + MCOptions.MCUseDwarfDirectory = MCTargetOptions::EnableDwarfDirectory; MAI.reset(TheTarget->createMCAsmInfo(*MRI, TripleName, MCOptions)); if (!MAI) return createStringError(std::errc::invalid_argument, @@ -89,8 +91,7 @@ Error DwarfEmitterImpl::init(Triple TheTriple, MS = TheTarget->createMCObjectStreamer( TheTriple, *MC, std::unique_ptr<MCAsmBackend>(MAB), MAB->createObjectWriter(OutFile), std::unique_ptr<MCCodeEmitter>(MCE), - *MSTI, MCOptions.MCRelaxAll, MCOptions.MCIncrementalLinkerCompatible, - /*DWARFMustBeAtTheEnd*/ false); + *MSTI); break; } } diff --git a/llvm/lib/IR/CMakeLists.txt b/llvm/lib/IR/CMakeLists.txt index 8bf199f..91e0e0c 100644 --- a/llvm/lib/IR/CMakeLists.txt +++ b/llvm/lib/IR/CMakeLists.txt @@ -82,7 +82,6 @@ add_llvm_component_library(LLVMCore ${LLVM_PTHREAD_LIB} DEPENDS - vt_gen intrinsics_gen LINK_COMPONENTS diff --git a/llvm/lib/IR/RuntimeLibcalls.cpp b/llvm/lib/IR/RuntimeLibcalls.cpp index de3db55..8ce0caa 100644 --- a/llvm/lib/IR/RuntimeLibcalls.cpp +++ b/llvm/lib/IR/RuntimeLibcalls.cpp @@ -344,36 +344,3 @@ void RuntimeLibcallsInfo::initLibcalls(const Triple &TT) { } } } - -void RuntimeLibcallsInfo::initCmpLibcallCCs() { - std::fill(CmpLibcallCCs, CmpLibcallCCs + RTLIB::UNKNOWN_LIBCALL, - ISD::SETCC_INVALID); - CmpLibcallCCs[RTLIB::OEQ_F32] = ISD::SETEQ; - CmpLibcallCCs[RTLIB::OEQ_F64] = ISD::SETEQ; - CmpLibcallCCs[RTLIB::OEQ_F128] = ISD::SETEQ; - CmpLibcallCCs[RTLIB::OEQ_PPCF128] = ISD::SETEQ; - CmpLibcallCCs[RTLIB::UNE_F32] = ISD::SETNE; - CmpLibcallCCs[RTLIB::UNE_F64] = ISD::SETNE; - CmpLibcallCCs[RTLIB::UNE_F128] = ISD::SETNE; - CmpLibcallCCs[RTLIB::UNE_PPCF128] = ISD::SETNE; - CmpLibcallCCs[RTLIB::OGE_F32] = ISD::SETGE; - CmpLibcallCCs[RTLIB::OGE_F64] = ISD::SETGE; - CmpLibcallCCs[RTLIB::OGE_F128] = ISD::SETGE; - CmpLibcallCCs[RTLIB::OGE_PPCF128] = ISD::SETGE; - CmpLibcallCCs[RTLIB::OLT_F32] = ISD::SETLT; - CmpLibcallCCs[RTLIB::OLT_F64] = ISD::SETLT; - CmpLibcallCCs[RTLIB::OLT_F128] = ISD::SETLT; - CmpLibcallCCs[RTLIB::OLT_PPCF128] = ISD::SETLT; - CmpLibcallCCs[RTLIB::OLE_F32] = ISD::SETLE; - CmpLibcallCCs[RTLIB::OLE_F64] = ISD::SETLE; - CmpLibcallCCs[RTLIB::OLE_F128] = ISD::SETLE; - CmpLibcallCCs[RTLIB::OLE_PPCF128] = ISD::SETLE; - CmpLibcallCCs[RTLIB::OGT_F32] = ISD::SETGT; - CmpLibcallCCs[RTLIB::OGT_F64] = ISD::SETGT; - CmpLibcallCCs[RTLIB::OGT_F128] = ISD::SETGT; - CmpLibcallCCs[RTLIB::OGT_PPCF128] = ISD::SETGT; - CmpLibcallCCs[RTLIB::UO_F32] = ISD::SETNE; - CmpLibcallCCs[RTLIB::UO_F64] = ISD::SETNE; - CmpLibcallCCs[RTLIB::UO_F128] = ISD::SETNE; - CmpLibcallCCs[RTLIB::UO_PPCF128] = ISD::SETNE; -} diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp index d303f22..bb3c9f7 100644 --- a/llvm/lib/LTO/LTO.cpp +++ b/llvm/lib/LTO/LTO.cpp @@ -1360,7 +1360,6 @@ Error LTO::runRegularLTO(AddStreamFn AddStream) { SmallVector<const char *> LTO::getRuntimeLibcallSymbols(const Triple &TT) { RTLIB::RuntimeLibcallsInfo Libcalls(TT); - SmallVector<const char *> LibcallSymbols; copy_if(Libcalls.getLibcallNames(), std::back_inserter(LibcallSymbols), [](const char *Name) { return Name; }); diff --git a/llvm/lib/MC/MCAsmStreamer.cpp b/llvm/lib/MC/MCAsmStreamer.cpp index 24209e4..db93a33 100644 --- a/llvm/lib/MC/MCAsmStreamer.cpp +++ b/llvm/lib/MC/MCAsmStreamer.cpp @@ -55,9 +55,9 @@ class MCAsmStreamer final : public MCStreamer { raw_svector_ostream CommentStream; raw_null_ostream NullStream; - unsigned IsVerboseAsm : 1; - unsigned ShowInst : 1; - unsigned UseDwarfDirectory : 1; + bool IsVerboseAsm = false; + bool ShowInst = false; + bool UseDwarfDirectory = false; void EmitRegisterName(int64_t Register); void PrintQuotedString(StringRef Data, raw_ostream &OS) const; @@ -72,24 +72,40 @@ class MCAsmStreamer final : public MCStreamer { public: MCAsmStreamer(MCContext &Context, std::unique_ptr<formatted_raw_ostream> os, - bool isVerboseAsm, bool useDwarfDirectory, MCInstPrinter *printer, std::unique_ptr<MCCodeEmitter> emitter, - std::unique_ptr<MCAsmBackend> asmbackend, bool showInst) + std::unique_ptr<MCAsmBackend> asmbackend) : MCStreamer(Context), OSOwner(std::move(os)), OS(*OSOwner), MAI(Context.getAsmInfo()), InstPrinter(printer), Assembler(std::make_unique<MCAssembler>( Context, std::move(asmbackend), std::move(emitter), (asmbackend) ? asmbackend->createObjectWriter(NullStream) : nullptr)), - CommentStream(CommentToEmit), IsVerboseAsm(isVerboseAsm), - ShowInst(showInst), UseDwarfDirectory(useDwarfDirectory) { + CommentStream(CommentToEmit) { assert(InstPrinter); - if (IsVerboseAsm) - InstPrinter->setCommentStream(CommentStream); if (Assembler->getBackendPtr()) setAllowAutoPadding(Assembler->getBackend().allowAutoPadding()); Context.setUseNamesOnTempLabels(true); + + auto *TO = Context.getTargetOptions(); + if (!TO) + return; + IsVerboseAsm = TO->AsmVerbose; + if (IsVerboseAsm) + InstPrinter->setCommentStream(CommentStream); + ShowInst = TO->ShowMCInst; + switch (TO->MCUseDwarfDirectory) { + case MCTargetOptions::DisableDwarfDirectory: + UseDwarfDirectory = false; + break; + case MCTargetOptions::EnableDwarfDirectory: + UseDwarfDirectory = true; + break; + case MCTargetOptions::DefaultDwarfDirectory: + UseDwarfDirectory = + Context.getAsmInfo()->enableDwarfFileDirectoryDefault(); + break; + } } MCAssembler &getAssembler() { return *Assembler; } @@ -2646,7 +2662,6 @@ MCStreamer *llvm::createAsmStreamer(MCContext &Context, std::unique_ptr<MCCodeEmitter> &&CE, std::unique_ptr<MCAsmBackend> &&MAB, bool ShowInst) { - return new MCAsmStreamer(Context, std::move(OS), isVerboseAsm, - useDwarfDirectory, IP, std::move(CE), std::move(MAB), - ShowInst); + return new MCAsmStreamer(Context, std::move(OS), IP, std::move(CE), + std::move(MAB)); } diff --git a/llvm/lib/MC/MCMachOStreamer.cpp b/llvm/lib/MC/MCMachOStreamer.cpp index c8bc819..5231d10 100644 --- a/llvm/lib/MC/MCMachOStreamer.cpp +++ b/llvm/lib/MC/MCMachOStreamer.cpp @@ -54,9 +54,6 @@ private: /// need for local relocations. False by default. bool LabelSections; - bool DWARFMustBeAtTheEnd; - bool CreatedADWARFSection; - /// HasSectionLabel - map of which sections have already had a non-local /// label emitted to them. Used so we don't emit extraneous linker local /// labels in the middle of the section. @@ -70,16 +67,13 @@ private: public: MCMachOStreamer(MCContext &Context, std::unique_ptr<MCAsmBackend> MAB, std::unique_ptr<MCObjectWriter> OW, - std::unique_ptr<MCCodeEmitter> Emitter, - bool DWARFMustBeAtTheEnd, bool label) + std::unique_ptr<MCCodeEmitter> Emitter, bool label) : MCObjectStreamer(Context, std::move(MAB), std::move(OW), std::move(Emitter)), - LabelSections(label), DWARFMustBeAtTheEnd(DWARFMustBeAtTheEnd), - CreatedADWARFSection(false) {} + LabelSections(label) {} /// state management void reset() override { - CreatedADWARFSection = false; HasSectionLabel.clear(); MCObjectStreamer::reset(); } @@ -141,48 +135,9 @@ public: } // end anonymous namespace. -static bool canGoAfterDWARF(const MCSectionMachO &MSec) { - // These sections are created by the assembler itself after the end of - // the .s file. - StringRef SegName = MSec.getSegmentName(); - StringRef SecName = MSec.getName(); - - if (SegName == "__LD" && SecName == "__compact_unwind") - return true; - - if (SegName == "__IMPORT") { - if (SecName == "__jump_table") - return true; - - if (SecName == "__pointers") - return true; - } - - if (SegName == "__TEXT" && SecName == "__eh_frame") - return true; - - if (SegName == "__DATA" && - (SecName == "__llvm_addrsig" || SecName == "__nl_symbol_ptr" || - SecName == "__thread_ptr")) - return true; - if (SegName == "__LLVM" && (SecName == "__cg_profile")) - return true; - - if (SegName == "__DATA" && SecName == "__auth_ptr") - return true; - - return false; -} - void MCMachOStreamer::changeSection(MCSection *Section, uint32_t Subsection) { // Change the section normally. - bool Created = changeSectionImpl(Section, Subsection); - const MCSectionMachO &MSec = *cast<MCSectionMachO>(Section); - StringRef SegName = MSec.getSegmentName(); - if (SegName == "__DWARF") - CreatedADWARFSection = true; - else if (Created && DWARFMustBeAtTheEnd && !canGoAfterDWARF(MSec)) - assert(!CreatedADWARFSection && "Creating regular section after DWARF"); + changeSectionImpl(Section, Subsection); // Output a linker-local symbol so we don't need section-relative local // relocations. The linker hates us when we do that. @@ -576,9 +531,8 @@ MCStreamer *llvm::createMachOStreamer(MCContext &Context, std::unique_ptr<MCCodeEmitter> &&CE, bool DWARFMustBeAtTheEnd, bool LabelSections) { - MCMachOStreamer *S = - new MCMachOStreamer(Context, std::move(MAB), std::move(OW), std::move(CE), - DWARFMustBeAtTheEnd, LabelSections); + MCMachOStreamer *S = new MCMachOStreamer( + Context, std::move(MAB), std::move(OW), std::move(CE), LabelSections); const Triple &Target = Context.getTargetTriple(); S->emitVersionForTarget( Target, Context.getObjectFileInfo()->getSDKVersion(), diff --git a/llvm/lib/MC/MCParser/AsmParser.cpp b/llvm/lib/MC/MCParser/AsmParser.cpp index d05712b..992b69f 100644 --- a/llvm/lib/MC/MCParser/AsmParser.cpp +++ b/llvm/lib/MC/MCParser/AsmParser.cpp @@ -658,7 +658,7 @@ private: bool parseDirectiveComm(bool IsLocal); // ".comm" and ".lcomm" - bool parseDirectiveAbort(); // ".abort" + bool parseDirectiveAbort(SMLoc DirectiveLoc); // ".abort" bool parseDirectiveInclude(); // ".include" bool parseDirectiveIncbin(); // ".incbin" @@ -2120,7 +2120,7 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info, case DK_LCOMM: return parseDirectiveComm(/*IsLocal=*/true); case DK_ABORT: - return parseDirectiveAbort(); + return parseDirectiveAbort(IDLoc); case DK_INCLUDE: return parseDirectiveInclude(); case DK_INCBIN: @@ -5095,21 +5095,17 @@ bool AsmParser::parseDirectiveComm(bool IsLocal) { /// parseDirectiveAbort /// ::= .abort [... message ...] -bool AsmParser::parseDirectiveAbort() { - // FIXME: Use loc from directive. - SMLoc Loc = getLexer().getLoc(); - +bool AsmParser::parseDirectiveAbort(SMLoc DirectiveLoc) { StringRef Str = parseStringToEndOfStatement(); if (parseEOL()) return true; if (Str.empty()) - return Error(Loc, ".abort detected. Assembly stopping."); - else - return Error(Loc, ".abort '" + Str + "' detected. Assembly stopping."); - // FIXME: Actually abort assembly here. + return Error(DirectiveLoc, ".abort detected. Assembly stopping"); - return false; + // FIXME: Actually abort assembly here. + return Error(DirectiveLoc, + ".abort '" + Str + "' detected. Assembly stopping"); } /// parseDirectiveInclude diff --git a/llvm/lib/MC/MCWinCOFFStreamer.cpp b/llvm/lib/MC/MCWinCOFFStreamer.cpp index 538244c..a14d3bc 100644 --- a/llvm/lib/MC/MCWinCOFFStreamer.cpp +++ b/llvm/lib/MC/MCWinCOFFStreamer.cpp @@ -27,6 +27,7 @@ #include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCSectionCOFF.h" #include "llvm/MC/MCSymbolCOFF.h" +#include "llvm/MC/MCTargetOptions.h" #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" @@ -45,7 +46,11 @@ MCWinCOFFStreamer::MCWinCOFFStreamer(MCContext &Context, std::unique_ptr<MCCodeEmitter> CE, std::unique_ptr<MCObjectWriter> OW) : MCObjectStreamer(Context, std::move(MAB), std::move(OW), std::move(CE)), - CurSymbol(nullptr) {} + CurSymbol(nullptr) { + auto *TO = Context.getTargetOptions(); + if (TO && TO->MCIncrementalLinkerCompatible) + getAssembler().setIncrementalLinkerCompatible(true); +} void MCWinCOFFStreamer::emitInstToData(const MCInst &Inst, const MCSubtargetInfo &STI) { diff --git a/llvm/lib/MC/MCXCOFFStreamer.cpp b/llvm/lib/MC/MCXCOFFStreamer.cpp index 175d7d6..9cd46e5 100644 --- a/llvm/lib/MC/MCXCOFFStreamer.cpp +++ b/llvm/lib/MC/MCXCOFFStreamer.cpp @@ -159,15 +159,6 @@ void MCXCOFFStreamer::emitInstToData(const MCInst &Inst, DF->getContents().append(Code.begin(), Code.end()); } -MCStreamer *llvm::createXCOFFStreamer(MCContext &Context, - std::unique_ptr<MCAsmBackend> &&MAB, - std::unique_ptr<MCObjectWriter> &&OW, - std::unique_ptr<MCCodeEmitter> &&CE) { - MCXCOFFStreamer *S = new MCXCOFFStreamer(Context, std::move(MAB), - std::move(OW), std::move(CE)); - return S; -} - void MCXCOFFStreamer::emitXCOFFLocalCommonSymbol(MCSymbol *LabelSym, uint64_t Size, MCSymbol *CsectSym, diff --git a/llvm/lib/MC/TargetRegistry.cpp b/llvm/lib/MC/TargetRegistry.cpp index 0aa4891..4190117 100644 --- a/llvm/lib/MC/TargetRegistry.cpp +++ b/llvm/lib/MC/TargetRegistry.cpp @@ -9,6 +9,12 @@ #include "llvm/MC/TargetRegistry.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" +#include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCObjectStreamer.h" +#include "llvm/MC/MCObjectWriter.h" +#include "llvm/MC/MCTargetOptions.h" #include "llvm/Support/raw_ostream.h" #include <cassert> #include <vector> @@ -17,6 +23,95 @@ using namespace llvm; // Clients are responsible for avoid race conditions in registration. static Target *FirstTarget = nullptr; +MCStreamer *Target::createMCObjectStreamer( + const Triple &T, MCContext &Ctx, std::unique_ptr<MCAsmBackend> TAB, + std::unique_ptr<MCObjectWriter> OW, std::unique_ptr<MCCodeEmitter> Emitter, + const MCSubtargetInfo &STI) const { + MCStreamer *S = nullptr; + switch (T.getObjectFormat()) { + case Triple::UnknownObjectFormat: + llvm_unreachable("Unknown object format"); + case Triple::COFF: + assert((T.isOSWindows() || T.isUEFI()) && + "only Windows and UEFI COFF are supported"); + S = COFFStreamerCtorFn(Ctx, std::move(TAB), std::move(OW), + std::move(Emitter)); + break; + case Triple::MachO: + if (MachOStreamerCtorFn) + S = MachOStreamerCtorFn(Ctx, std::move(TAB), std::move(OW), + std::move(Emitter)); + else + S = createMachOStreamer(Ctx, std::move(TAB), std::move(OW), + std::move(Emitter), false); + break; + case Triple::ELF: + if (ELFStreamerCtorFn) + S = ELFStreamerCtorFn(T, Ctx, std::move(TAB), std::move(OW), + std::move(Emitter)); + else + S = createELFStreamer(Ctx, std::move(TAB), std::move(OW), + std::move(Emitter)); + break; + case Triple::Wasm: + S = createWasmStreamer(Ctx, std::move(TAB), std::move(OW), + std::move(Emitter)); + break; + case Triple::GOFF: + S = createGOFFStreamer(Ctx, std::move(TAB), std::move(OW), + std::move(Emitter)); + break; + case Triple::XCOFF: + S = XCOFFStreamerCtorFn(T, Ctx, std::move(TAB), std::move(OW), + std::move(Emitter)); + break; + case Triple::SPIRV: + S = createSPIRVStreamer(Ctx, std::move(TAB), std::move(OW), + std::move(Emitter)); + break; + case Triple::DXContainer: + S = createDXContainerStreamer(Ctx, std::move(TAB), std::move(OW), + std::move(Emitter)); + break; + } + if (ObjectTargetStreamerCtorFn) + ObjectTargetStreamerCtorFn(*S, STI); + return S; +} + +MCStreamer *Target::createMCObjectStreamer( + const Triple &T, MCContext &Ctx, std::unique_ptr<MCAsmBackend> &&TAB, + std::unique_ptr<MCObjectWriter> &&OW, + std::unique_ptr<MCCodeEmitter> &&Emitter, const MCSubtargetInfo &STI, bool, + bool, bool) const { + return createMCObjectStreamer(T, Ctx, std::move(TAB), std::move(OW), + std::move(Emitter), STI); +} + +MCStreamer *Target::createAsmStreamer(MCContext &Ctx, + std::unique_ptr<formatted_raw_ostream> OS, + MCInstPrinter *IP, + std::unique_ptr<MCCodeEmitter> CE, + std::unique_ptr<MCAsmBackend> TAB) const { + formatted_raw_ostream &OSRef = *OS; + MCStreamer *S = llvm::createAsmStreamer(Ctx, std::move(OS), false, false, IP, + std::move(CE), std::move(TAB), false); + auto *TO = Ctx.getTargetOptions(); + createAsmTargetStreamer(*S, OSRef, IP, TO && TO->AsmVerbose); + return S; +} + +MCStreamer *Target::createAsmStreamer(MCContext &Ctx, + std::unique_ptr<formatted_raw_ostream> OS, + bool IsVerboseAsm, bool UseDwarfDirectory, + MCInstPrinter *IP, + std::unique_ptr<MCCodeEmitter> &&CE, + std::unique_ptr<MCAsmBackend> &&TAB, + bool ShowInst) const { + return createAsmStreamer(Ctx, std::move(OS), IP, std::move(CE), + std::move(TAB)); +} + iterator_range<TargetRegistry::iterator> TargetRegistry::targets() { return make_range(iterator(FirstTarget), iterator()); } diff --git a/llvm/lib/SandboxIR/SandboxIR.cpp b/llvm/lib/SandboxIR/SandboxIR.cpp index f392704..209b677 100644 --- a/llvm/lib/SandboxIR/SandboxIR.cpp +++ b/llvm/lib/SandboxIR/SandboxIR.cpp @@ -496,6 +496,50 @@ void LoadInst::dump() const { dump(dbgs()); dbgs() << "\n"; } +#endif // NDEBUG +StoreInst *StoreInst::create(Value *V, Value *Ptr, MaybeAlign Align, + Instruction *InsertBefore, Context &Ctx) { + llvm::Instruction *BeforeIR = InsertBefore->getTopmostLLVMInstruction(); + auto &Builder = Ctx.getLLVMIRBuilder(); + Builder.SetInsertPoint(BeforeIR); + auto *NewSI = + Builder.CreateAlignedStore(V->Val, Ptr->Val, Align, /*isVolatile=*/false); + auto *NewSBI = Ctx.createStoreInst(NewSI); + return NewSBI; +} +StoreInst *StoreInst::create(Value *V, Value *Ptr, MaybeAlign Align, + BasicBlock *InsertAtEnd, Context &Ctx) { + auto *InsertAtEndIR = cast<llvm::BasicBlock>(InsertAtEnd->Val); + auto &Builder = Ctx.getLLVMIRBuilder(); + Builder.SetInsertPoint(InsertAtEndIR); + auto *NewSI = + Builder.CreateAlignedStore(V->Val, Ptr->Val, Align, /*isVolatile=*/false); + auto *NewSBI = Ctx.createStoreInst(NewSI); + return NewSBI; +} + +bool StoreInst::classof(const Value *From) { + return From->getSubclassID() == ClassID::Store; +} + +Value *StoreInst::getValueOperand() const { + return Ctx.getValue(cast<llvm::StoreInst>(Val)->getValueOperand()); +} + +Value *StoreInst::getPointerOperand() const { + return Ctx.getValue(cast<llvm::StoreInst>(Val)->getPointerOperand()); +} + +#ifndef NDEBUG +void StoreInst::dump(raw_ostream &OS) const { + dumpCommonPrefix(OS); + dumpCommonSuffix(OS); +} + +void StoreInst::dump() const { + dump(dbgs()); + dbgs() << "\n"; +} void OpaqueInst::dump(raw_ostream &OS) const { dumpCommonPrefix(OS); @@ -581,7 +625,8 @@ Value *Context::registerValue(std::unique_ptr<Value> &&VPtr) { assert(VPtr->getSubclassID() != Value::ClassID::User && "Can't register a user!"); Value *V = VPtr.get(); - auto Pair = LLVMValueToValueMap.insert({VPtr->Val, std::move(VPtr)}); + [[maybe_unused]] auto Pair = + LLVMValueToValueMap.insert({VPtr->Val, std::move(VPtr)}); assert(Pair.second && "Already exists!"); return V; } @@ -618,6 +663,11 @@ Value *Context::getOrCreateValueInternal(llvm::Value *LLVMV, llvm::User *U) { It->second = std::unique_ptr<LoadInst>(new LoadInst(LLVMLd, *this)); return It->second.get(); } + case llvm::Instruction::Store: { + auto *LLVMSt = cast<llvm::StoreInst>(LLVMV); + It->second = std::unique_ptr<StoreInst>(new StoreInst(LLVMSt, *this)); + return It->second.get(); + } default: break; } @@ -641,6 +691,11 @@ LoadInst *Context::createLoadInst(llvm::LoadInst *LI) { return cast<LoadInst>(registerValue(std::move(NewPtr))); } +StoreInst *Context::createStoreInst(llvm::StoreInst *SI) { + auto NewPtr = std::unique_ptr<StoreInst>(new StoreInst(SI, *this)); + return cast<StoreInst>(registerValue(std::move(NewPtr))); +} + Value *Context::getValue(llvm::Value *V) const { auto It = LLVMValueToValueMap.find(V); if (It != LLVMValueToValueMap.end()) diff --git a/llvm/lib/Support/TimeProfiler.cpp b/llvm/lib/Support/TimeProfiler.cpp index c201402..9612db7 100644 --- a/llvm/lib/Support/TimeProfiler.cpp +++ b/llvm/lib/Support/TimeProfiler.cpp @@ -73,20 +73,12 @@ struct llvm::TimeTraceProfilerEntry { const TimePointType Start; TimePointType End; const std::string Name; - TimeTraceMetadata Metadata; - + const std::string Detail; const bool AsyncEvent = false; TimeTraceProfilerEntry(TimePointType &&S, TimePointType &&E, std::string &&N, std::string &&Dt, bool Ae) - : Start(std::move(S)), End(std::move(E)), Name(std::move(N)), Metadata(), - AsyncEvent(Ae) { - Metadata.Detail = std::move(Dt); - } - - TimeTraceProfilerEntry(TimePointType &&S, TimePointType &&E, std::string &&N, - TimeTraceMetadata &&Mt, bool Ae) : Start(std::move(S)), End(std::move(E)), Name(std::move(N)), - Metadata(std::move(Mt)), AsyncEvent(Ae) {} + Detail(std::move(Dt)), AsyncEvent(Ae) {} // Calculate timings for FlameGraph. Cast time points to microsecond precision // rather than casting duration. This avoids truncation issues causing inner @@ -105,12 +97,10 @@ struct llvm::TimeTraceProfilerEntry { }; struct llvm::TimeTraceProfiler { - TimeTraceProfiler(unsigned TimeTraceGranularity = 0, StringRef ProcName = "", - bool TimeTraceVerbose = false) + TimeTraceProfiler(unsigned TimeTraceGranularity = 0, StringRef ProcName = "") : BeginningOfTime(system_clock::now()), StartTime(ClockType::now()), ProcName(ProcName), Pid(sys::Process::getProcessId()), - Tid(llvm::get_threadid()), TimeTraceGranularity(TimeTraceGranularity), - TimeTraceVerbose(TimeTraceVerbose) { + Tid(llvm::get_threadid()), TimeTraceGranularity(TimeTraceGranularity) { llvm::get_thread_name(ThreadName); } @@ -123,15 +113,6 @@ struct llvm::TimeTraceProfiler { return Stack.back().get(); } - TimeTraceProfilerEntry * - begin(std::string Name, llvm::function_ref<TimeTraceMetadata()> Metadata, - bool AsyncEvent = false) { - Stack.emplace_back(std::make_unique<TimeTraceProfilerEntry>( - ClockType::now(), TimePointType(), std::move(Name), Metadata(), - AsyncEvent)); - return Stack.back().get(); - } - void end() { assert(!Stack.empty() && "Must call begin() first"); end(*Stack.back()); @@ -203,15 +184,8 @@ struct llvm::TimeTraceProfiler { J.attribute("dur", DurUs); } J.attribute("name", E.Name); - if (!E.Metadata.isEmpty()) { - J.attributeObject("args", [&] { - if (!E.Metadata.Detail.empty()) - J.attribute("detail", E.Metadata.Detail); - if (!E.Metadata.File.empty()) - J.attribute("file", E.Metadata.File); - if (E.Metadata.Line > 0) - J.attribute("line", E.Metadata.Line); - }); + if (!E.Detail.empty()) { + J.attributeObject("args", [&] { J.attribute("detail", E.Detail); }); } }); @@ -333,25 +307,14 @@ struct llvm::TimeTraceProfiler { // Minimum time granularity (in microseconds) const unsigned TimeTraceGranularity; - - // Make time trace capture verbose event details (e.g. source filenames). This - // can increase the size of the output by 2-3 times. - const bool TimeTraceVerbose; }; -bool llvm::isTimeTraceVerbose() { - return getTimeTraceProfilerInstance() && - getTimeTraceProfilerInstance()->TimeTraceVerbose; -} - void llvm::timeTraceProfilerInitialize(unsigned TimeTraceGranularity, - StringRef ProcName, - bool TimeTraceVerbose) { + StringRef ProcName) { assert(TimeTraceProfilerInstance == nullptr && "Profiler should not be initialized"); TimeTraceProfilerInstance = new TimeTraceProfiler( - TimeTraceGranularity, llvm::sys::path::filename(ProcName), - TimeTraceVerbose); + TimeTraceGranularity, llvm::sys::path::filename(ProcName)); } // Removes all TimeTraceProfilerInstances. @@ -418,14 +381,6 @@ llvm::timeTraceProfilerBegin(StringRef Name, return nullptr; } -TimeTraceProfilerEntry * -llvm::timeTraceProfilerBegin(StringRef Name, - llvm::function_ref<TimeTraceMetadata()> Metadata) { - if (TimeTraceProfilerInstance != nullptr) - return TimeTraceProfilerInstance->begin(std::string(Name), Metadata, false); - return nullptr; -} - TimeTraceProfilerEntry *llvm::timeTraceAsyncProfilerBegin(StringRef Name, StringRef Detail) { if (TimeTraceProfilerInstance != nullptr) diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp index 63358c1..1e60ce9 100644 --- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -1728,8 +1728,7 @@ void AArch64AsmPrinter::LowerLOADauthptrstatic(const MachineInstr &MI) { assert(GAOp.getOffset() == 0 && "non-zero offset for $auth_ptr$ stub slots is not supported"); const MCSymbol *GASym = TM.getSymbol(GAOp.getGlobal()); - AuthPtrStubSym = - TLOF.getAuthPtrSlotSymbol(TM, &MF->getMMI(), GASym, Key, Disc); + AuthPtrStubSym = TLOF.getAuthPtrSlotSymbol(TM, MMI, GASym, Key, Disc); } else { assert(TM.getTargetTriple().isOSBinFormatMachO() && "LOADauthptrstatic is implemented only for MachO/ELF"); @@ -1740,8 +1739,7 @@ void AArch64AsmPrinter::LowerLOADauthptrstatic(const MachineInstr &MI) { assert(GAOp.getOffset() == 0 && "non-zero offset for $auth_ptr$ stub slots is not supported"); const MCSymbol *GASym = TM.getSymbol(GAOp.getGlobal()); - AuthPtrStubSym = - TLOF.getAuthPtrSlotSymbol(TM, &MF->getMMI(), GASym, Key, Disc); + AuthPtrStubSym = TLOF.getAuthPtrSlotSymbol(TM, MMI, GASym, Key, Disc); } MachineOperand StubMOHi = diff --git a/llvm/lib/Target/AArch64/AArch64CallingConvention.td b/llvm/lib/Target/AArch64/AArch64CallingConvention.td index 6f885f4..2bbb499 100644 --- a/llvm/lib/Target/AArch64/AArch64CallingConvention.td +++ b/llvm/lib/Target/AArch64/AArch64CallingConvention.td @@ -17,6 +17,11 @@ class CCIfBigEndian<CCAction A> : class CCIfILP32<CCAction A> : CCIf<"State.getMachineFunction().getDataLayout().getPointerSize() == 4", A>; +/// CCIfSubtarget - Match if the current subtarget has a feature F. +class CCIfSubtarget<string F, CCAction A> + : CCIf<!strconcat("State.getMachineFunction()" + ".getSubtarget<AArch64Subtarget>().", F), + A>; //===----------------------------------------------------------------------===// // ARM AAPCS64 Calling Convention @@ -496,36 +501,44 @@ def CC_AArch64_GHC : CallingConv<[ let Entry = 1 in def CC_AArch64_Preserve_None : CallingConv<[ - // We can pass arguments in all general registers, except: - // - X8, used for sret - // - X16/X17, used by the linker as IP0/IP1 - // - X18, the platform register - // - X19, the base pointer - // - X29, the frame pointer - // - X30, the link register - // General registers are not preserved with the exception of - // FP, LR, and X18 - // Non-volatile registers are used first, so functions may call - // normal functions without saving and reloading arguments. - // X9 is assigned last as it is used in FrameLowering as the first - // choice for a scratch register. - CCIfType<[i32], CCAssignToReg<[W20, W21, W22, W23, - W24, W25, W26, W27, W28, - W0, W1, W2, W3, W4, W5, - W6, W7, W10, W11, - W12, W13, W14, W9]>>, - CCIfType<[i64], CCAssignToReg<[X20, X21, X22, X23, - X24, X25, X26, X27, X28, - X0, X1, X2, X3, X4, X5, - X6, X7, X10, X11, - X12, X13, X14, X9]>>, - - // Windows uses X15 for stack allocation - CCIf<"!State.getMachineFunction().getSubtarget<AArch64Subtarget>().isTargetWindows()", - CCIfType<[i32], CCAssignToReg<[W15]>>>, - CCIf<"!State.getMachineFunction().getSubtarget<AArch64Subtarget>().isTargetWindows()", - CCIfType<[i64], CCAssignToReg<[X15]>>>, - CCDelegateTo<CC_AArch64_AAPCS> + // VarArgs are only supported using the C calling convention. + // This handles the non-variadic parameter case. Variadic parameters + // are handled in CCAssignFnForCall. + CCIfVarArg<CCIfSubtarget<"isTargetDarwin()", CCDelegateTo<CC_AArch64_DarwinPCS>>>, + CCIfVarArg<CCIfSubtarget<"isTargetWindows()", CCDelegateTo<CC_AArch64_Win64PCS>>>, + CCIfVarArg<CCDelegateTo<CC_AArch64_AAPCS>>, + + // We can pass arguments in all general registers, except: + // - X8, used for sret + // - X16/X17, used by the linker as IP0/IP1 + // - X18, the platform register + // - X19, the base pointer + // - X29, the frame pointer + // - X30, the link register + // General registers are not preserved with the exception of + // FP, LR, and X18 + // Non-volatile registers are used first, so functions may call + // normal functions without saving and reloading arguments. + // X9 is assigned last as it is used in FrameLowering as the first + // choice for a scratch register. + CCIfType<[i32], CCAssignToReg<[W20, W21, W22, W23, + W24, W25, W26, W27, W28, + W0, W1, W2, W3, W4, W5, + W6, W7, W10, W11, + W12, W13, W14, W9]>>, + CCIfType<[i64], CCAssignToReg<[X20, X21, X22, X23, + X24, X25, X26, X27, X28, + X0, X1, X2, X3, X4, X5, + X6, X7, X10, X11, + X12, X13, X14, X9]>>, + + // Windows uses X15 for stack allocation + CCIf<"!State.getMachineFunction().getSubtarget<AArch64Subtarget>().isTargetWindows()", + CCIfType<[i32], CCAssignToReg<[W15]>>>, + CCIf<"!State.getMachineFunction().getSubtarget<AArch64Subtarget>().isTargetWindows()", + CCIfType<[i64], CCAssignToReg<[X15]>>>, + + CCDelegateTo<CC_AArch64_AAPCS> ]>; // The order of the callee-saves in this file is important, because the diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index dac03bc..b1b83e2 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -1714,7 +1714,6 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); const TargetInstrInfo *TII = Subtarget.getInstrInfo(); - MachineModuleInfo &MMI = MF.getMMI(); AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); bool EmitCFI = AFI->needsDwarfUnwindInfo(MF); bool EmitAsyncCFI = AFI->needsAsyncDwarfUnwindInfo(MF); @@ -1882,7 +1881,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI); if (EmitCFI) { // Label used to tie together the PROLOG_LABEL and the MachineMoves. - MCSymbol *FrameLabel = MMI.getContext().createTempSymbol(); + MCSymbol *FrameLabel = MF.getContext().createTempSymbol(); // Encode the stack size of the leaf function. unsigned CFIIndex = MF.addFrameInst( MCCFIInstruction::cfiDefCfaOffset(FrameLabel, NumBytes)); @@ -1901,8 +1900,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, return; } - bool IsWin64 = - Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv()); + bool IsWin64 = Subtarget.isCallingConvWin64(F.getCallingConv(), F.isVarArg()); unsigned FixedObject = getFixedObjectSize(MF, AFI, IsWin64, IsFunclet); auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject; @@ -2308,8 +2306,8 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, // How much of the stack used by incoming arguments this function is expected // to restore in this particular epilogue. int64_t ArgumentStackToRestore = getArgumentStackToRestore(MF, MBB); - bool IsWin64 = - Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv()); + bool IsWin64 = Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv(), + MF.getFunction().isVarArg()); unsigned FixedObject = getFixedObjectSize(MF, AFI, IsWin64, IsFunclet); int64_t AfterCSRPopSize = ArgumentStackToRestore; @@ -2615,8 +2613,8 @@ static StackOffset getFPOffset(const MachineFunction &MF, int64_t ObjectOffset) { const auto *AFI = MF.getInfo<AArch64FunctionInfo>(); const auto &Subtarget = MF.getSubtarget<AArch64Subtarget>(); - bool IsWin64 = - Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv()); + const Function &F = MF.getFunction(); + bool IsWin64 = Subtarget.isCallingConvWin64(F.getCallingConv(), F.isVarArg()); unsigned FixedObject = getFixedObjectSize(MF, AFI, IsWin64, /*IsFunclet=*/false); int64_t CalleeSaveSize = AFI->getCalleeSavedStackSize(MF.getFrameInfo()); @@ -2722,9 +2720,9 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference( // via the frame pointer, so we have to use the FP in the parent // function. (void) Subtarget; - assert( - Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv()) && - "Funclets should only be present on Win64"); + assert(Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv(), + MF.getFunction().isVarArg()) && + "Funclets should only be present on Win64"); UseFP = true; } else { // We have the choice between FP and (SP or BP). diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 84de1ee..bf205b1 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -7109,7 +7109,13 @@ CCAssignFn *AArch64TargetLowering::CCAssignFnForCall(CallingConv::ID CC, case CallingConv::GHC: return CC_AArch64_GHC; case CallingConv::PreserveNone: - return CC_AArch64_Preserve_None; + // The VarArg implementation makes assumptions about register + // argument passing that do not hold for preserve_none, so we + // instead fall back to C argument passing. + // The non-vararg case is handled in the CC function itself. + if (!IsVarArg) + return CC_AArch64_Preserve_None; + [[fallthrough]]; case CallingConv::C: case CallingConv::Fast: case CallingConv::PreserveMost: @@ -7182,7 +7188,8 @@ SDValue AArch64TargetLowering::LowerFormalArguments( MachineFunction &MF = DAG.getMachineFunction(); const Function &F = MF.getFunction(); MachineFrameInfo &MFI = MF.getFrameInfo(); - bool IsWin64 = Subtarget->isCallingConvWin64(F.getCallingConv()); + bool IsWin64 = + Subtarget->isCallingConvWin64(F.getCallingConv(), F.isVarArg()); bool StackViaX4 = CallConv == CallingConv::ARM64EC_Thunk_X64 || (isVarArg && Subtarget->isWindowsArm64EC()); AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>(); @@ -7634,7 +7641,9 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo, MachineFrameInfo &MFI = MF.getFrameInfo(); AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>(); auto PtrVT = getPointerTy(DAG.getDataLayout()); - bool IsWin64 = Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv()); + Function &F = MF.getFunction(); + bool IsWin64 = + Subtarget->isCallingConvWin64(F.getCallingConv(), F.isVarArg()); SmallVector<SDValue, 8> MemOps; @@ -7805,6 +7814,21 @@ static bool mayTailCallThisCC(CallingConv::ID CC) { } } +/// Return true if the call convention supports varargs +/// Currently only those that pass varargs like the C +/// calling convention does are eligible +/// Calling conventions listed in this function must also +/// be properly handled in AArch64Subtarget::isCallingConvWin64 +static bool callConvSupportsVarArgs(CallingConv::ID CC) { + switch (CC) { + case CallingConv::C: + case CallingConv::PreserveNone: + return true; + default: + return false; + } +} + static void analyzeCallOperands(const AArch64TargetLowering &TLI, const AArch64Subtarget *Subtarget, const TargetLowering::CallLoweringInfo &CLI, @@ -7813,7 +7837,7 @@ static void analyzeCallOperands(const AArch64TargetLowering &TLI, CallingConv::ID CalleeCC = CLI.CallConv; bool IsVarArg = CLI.IsVarArg; const SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs; - bool IsCalleeWin64 = Subtarget->isCallingConvWin64(CalleeCC); + bool IsCalleeWin64 = Subtarget->isCallingConvWin64(CalleeCC, IsVarArg); // For Arm64EC thunks, allocate 32 extra bytes at the bottom of the stack // for the shadow store. @@ -7941,8 +7965,8 @@ bool AArch64TargetLowering::isEligibleForTailCallOptimization( // I want anyone implementing a new calling convention to think long and hard // about this assert. - assert((!IsVarArg || CalleeCC == CallingConv::C) && - "Unexpected variadic calling convention"); + if (IsVarArg && !callConvSupportsVarArgs(CalleeCC)) + report_fatal_error("Unsupported variadic calling convention"); LLVMContext &C = *DAG.getContext(); // Check that the call results are passed in the same way. @@ -10872,8 +10896,9 @@ SDValue AArch64TargetLowering::LowerAAPCS_VASTART(SDValue Op, SDValue AArch64TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); + Function &F = MF.getFunction(); - if (Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv())) + if (Subtarget->isCallingConvWin64(F.getCallingConv(), F.isVarArg())) return LowerWin64_VASTART(Op, DAG); else if (Subtarget->isTargetDarwin()) return LowerDarwin_VASTART(Op, DAG); diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp index 1e069f4..435cc18 100644 --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -611,7 +611,8 @@ bool AArch64RegisterInfo::isArgumentRegister(const MachineFunction &MF, MCRegister Reg) const { CallingConv::ID CC = MF.getFunction().getCallingConv(); const AArch64Subtarget &STI = MF.getSubtarget<AArch64Subtarget>(); - bool IsVarArg = STI.isCallingConvWin64(MF.getFunction().getCallingConv()); + bool IsVarArg = STI.isCallingConvWin64(MF.getFunction().getCallingConv(), + MF.getFunction().isVarArg()); auto HasReg = [](ArrayRef<MCRegister> RegList, MCRegister Reg) { return llvm::is_contained(RegList, Reg); @@ -623,7 +624,9 @@ bool AArch64RegisterInfo::isArgumentRegister(const MachineFunction &MF, case CallingConv::GHC: return HasReg(CC_AArch64_GHC_ArgRegs, Reg); case CallingConv::PreserveNone: - return HasReg(CC_AArch64_Preserve_None_ArgRegs, Reg); + if (!MF.getFunction().isVarArg()) + return HasReg(CC_AArch64_Preserve_None_ArgRegs, Reg); + [[fallthrough]]; case CallingConv::C: case CallingConv::Fast: case CallingConv::PreserveMost: diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h index 4b840b2..12c3d25 100644 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.h +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h @@ -322,13 +322,15 @@ public: std::unique_ptr<PBQPRAConstraint> getCustomPBQPConstraints() const override; - bool isCallingConvWin64(CallingConv::ID CC) const { + bool isCallingConvWin64(CallingConv::ID CC, bool IsVarArg) const { switch (CC) { case CallingConv::C: case CallingConv::Fast: case CallingConv::Swift: case CallingConv::SwiftTail: return isTargetWindows(); + case CallingConv::PreserveNone: + return IsVarArg && isTargetWindows(); case CallingConv::Win64: return true; default: diff --git a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp index 5206ba4..b4d2a33 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp @@ -117,7 +117,9 @@ struct AArch64OutgoingValueAssigner CCValAssign::LocInfo LocInfo, const CallLowering::ArgInfo &Info, ISD::ArgFlagsTy Flags, CCState &State) override { - bool IsCalleeWin = Subtarget.isCallingConvWin64(State.getCallingConv()); + const Function &F = State.getMachineFunction().getFunction(); + bool IsCalleeWin = + Subtarget.isCallingConvWin64(State.getCallingConv(), F.isVarArg()); bool UseVarArgsCCForFixed = IsCalleeWin && State.isVarArg(); bool Res; @@ -557,8 +559,8 @@ void AArch64CallLowering::saveVarArgRegisters( MachineFrameInfo &MFI = MF.getFrameInfo(); AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>(); auto &Subtarget = MF.getSubtarget<AArch64Subtarget>(); - bool IsWin64CC = - Subtarget.isCallingConvWin64(CCInfo.getCallingConv()); + bool IsWin64CC = Subtarget.isCallingConvWin64(CCInfo.getCallingConv(), + MF.getFunction().isVarArg()); const LLT p0 = LLT::pointer(0, 64); const LLT s64 = LLT::scalar(64); @@ -653,7 +655,9 @@ bool AArch64CallLowering::lowerFormalArguments( F.getCallingConv() == CallingConv::ARM64EC_Thunk_X64) return false; - bool IsWin64 = Subtarget.isCallingConvWin64(F.getCallingConv()) && !Subtarget.isWindowsArm64EC(); + bool IsWin64 = + Subtarget.isCallingConvWin64(F.getCallingConv(), F.isVarArg()) && + !Subtarget.isWindowsArm64EC(); SmallVector<ArgInfo, 8> SplitArgs; SmallVector<std::pair<Register, Register>> BoolArgs; diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp index 0d3f6d9..009928a 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -2006,7 +2006,7 @@ bool AArch64InstructionSelector::selectVaStartDarwin( int FrameIdx = FuncInfo->getVarArgsStackIndex(); if (MF.getSubtarget<AArch64Subtarget>().isCallingConvWin64( - MF.getFunction().getCallingConv())) { + MF.getFunction().getCallingConv(), MF.getFunction().isVarArg())) { FrameIdx = FuncInfo->getVarArgsGPRSize() > 0 ? FuncInfo->getVarArgsGPRIndex() : FuncInfo->getVarArgsStackIndex(); diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp index f05e5e6..97c5f96 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp @@ -386,24 +386,21 @@ static MCStreamer *createELFStreamer(const Triple &T, MCContext &Ctx, std::move(Emitter)); } -static MCStreamer *createMachOStreamer(MCContext &Ctx, - std::unique_ptr<MCAsmBackend> &&TAB, - std::unique_ptr<MCObjectWriter> &&OW, - std::unique_ptr<MCCodeEmitter> &&Emitter, - bool DWARFMustBeAtTheEnd) { +static MCStreamer * +createMachOStreamer(MCContext &Ctx, std::unique_ptr<MCAsmBackend> &&TAB, + std::unique_ptr<MCObjectWriter> &&OW, + std::unique_ptr<MCCodeEmitter> &&Emitter) { return createMachOStreamer(Ctx, std::move(TAB), std::move(OW), - std::move(Emitter), DWARFMustBeAtTheEnd, + std::move(Emitter), /*ignore=*/false, /*LabelSections*/ true); } static MCStreamer * createWinCOFFStreamer(MCContext &Ctx, std::unique_ptr<MCAsmBackend> &&TAB, std::unique_ptr<MCObjectWriter> &&OW, - std::unique_ptr<MCCodeEmitter> &&Emitter, - bool IncrementalLinkerCompatible) { + std::unique_ptr<MCCodeEmitter> &&Emitter) { return createAArch64WinCOFFStreamer(Ctx, std::move(TAB), std::move(OW), - std::move(Emitter), - IncrementalLinkerCompatible); + std::move(Emitter)); } namespace { diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp index c25cc2e..208d435 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp @@ -291,12 +291,11 @@ void AArch64TargetWinCOFFStreamer::emitARM64WinCFISaveAnyRegQPX(unsigned Reg, emitARM64WinUnwindCode(Win64EH::UOP_SaveAnyRegQPX, Reg, Offset); } -MCWinCOFFStreamer *llvm::createAArch64WinCOFFStreamer( - MCContext &Context, std::unique_ptr<MCAsmBackend> MAB, - std::unique_ptr<MCObjectWriter> OW, std::unique_ptr<MCCodeEmitter> Emitter, - bool IncrementalLinkerCompatible) { - auto *S = new AArch64WinCOFFStreamer(Context, std::move(MAB), - std::move(Emitter), std::move(OW)); - S->getAssembler().setIncrementalLinkerCompatible(IncrementalLinkerCompatible); - return S; +MCWinCOFFStreamer * +llvm::createAArch64WinCOFFStreamer(MCContext &Context, + std::unique_ptr<MCAsmBackend> MAB, + std::unique_ptr<MCObjectWriter> OW, + std::unique_ptr<MCCodeEmitter> Emitter) { + return new AArch64WinCOFFStreamer(Context, std::move(MAB), std::move(Emitter), + std::move(OW)); } diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.h b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.h index a13b1a4..5caf520 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.h +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.h @@ -20,8 +20,7 @@ namespace llvm { MCWinCOFFStreamer *createAArch64WinCOFFStreamer( MCContext &Context, std::unique_ptr<MCAsmBackend> TAB, - std::unique_ptr<MCObjectWriter> OW, std::unique_ptr<MCCodeEmitter> Emitter, - bool IncrementalLinkerCompatible); + std::unique_ptr<MCObjectWriter> OW, std::unique_ptr<MCCodeEmitter> Emitter); } // end llvm namespace #endif diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index da3e8c0..63048c7 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -1870,6 +1870,8 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic( VDataIn = MI.getOperand(1).getReg(); VDataTy = MRI->getType(VDataIn); NumVDataDwords = (VDataTy.getSizeInBits() + 31) / 32; + } else if (BaseOpcode->NoReturn) { + NumVDataDwords = 0; } else { VDataOut = MI.getOperand(0).getReg(); VDataTy = MRI->getType(VDataOut); @@ -3616,6 +3618,7 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I) { return selectG_INSERT_VECTOR_ELT(I); case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD: case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16: + case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_NORET: case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE: case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16: { const AMDGPU::ImageDimIntrinsicInfo *Intr = diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 88e40da..89ef0f2 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -6334,8 +6334,13 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic( const LLT V2S16 = LLT::fixed_vector(2, 16); unsigned DMask = 0; - Register VData = MI.getOperand(NumDefs == 0 ? 1 : 0).getReg(); - LLT Ty = MRI->getType(VData); + Register VData; + LLT Ty; + + if (!BaseOpcode->NoReturn || BaseOpcode->Store) { + VData = MI.getOperand(NumDefs == 0 ? 1 : 0).getReg(); + Ty = MRI->getType(VData); + } const bool IsAtomicPacked16Bit = (BaseOpcode->BaseOpcode == AMDGPU::IMAGE_ATOMIC_PK_ADD_F16 || @@ -6373,7 +6378,11 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic( : AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE; const unsigned LoadOpcode = IsD16 ? AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16 : AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD; - unsigned NewOpcode = NumDefs == 0 ? StoreOpcode : LoadOpcode; + unsigned NewOpcode = LoadOpcode; + if (BaseOpcode->Store) + NewOpcode = StoreOpcode; + else if (BaseOpcode->NoReturn) + NewOpcode = AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_NORET; // Track that we legalized this MI.setDesc(B.getTII().get(NewOpcode)); @@ -6503,7 +6512,7 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic( Flags |= 2; MI.addOperand(MachineOperand::CreateImm(Flags)); - if (BaseOpcode->Store) { // No TFE for stores? + if (BaseOpcode->NoReturn) { // No TFE for stores? // TODO: Handle dmask trim if (!Ty.isVector() || !IsD16) return true; diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index 73796ed..68f4767 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -3172,6 +3172,7 @@ void AMDGPURegisterBankInfo::applyMappingImpl( } case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD: case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16: + case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_NORET: case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE: case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16: { const AMDGPU::RsrcIntrinsic *RSrcIntrin = @@ -4842,6 +4843,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { } case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD: case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16: + case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_NORET: case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE: case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16: { auto IntrID = AMDGPU::getIntrinsicID(MI); diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index 217487b..92c3b26c 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -3868,7 +3868,8 @@ bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst, int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); - assert(VDataIdx != -1); + if (VDataIdx == -1 && isGFX10Plus()) // no return image_sample + return true; if ((DMaskIdx == -1 || TFEIdx == -1) && isGFX10_AEncoding()) // intersect_ray return true; diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td index 15fd36e..b4e58cf 100644 --- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td +++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td @@ -51,6 +51,7 @@ class MIMGBaseOpcode : PredicateControl { bit MSAA = 0; bit BVH = 0; bit A16 = 0; + bit NoReturn = 0; } def MIMGBaseOpcode : GenericEnum { @@ -62,7 +63,7 @@ def MIMGBaseOpcodesTable : GenericTable { let CppTypeName = "MIMGBaseOpcodeInfo"; let Fields = ["BaseOpcode", "Store", "Atomic", "AtomicX2", "Sampler", "Gather4", "NumExtraArgs", "Gradients", "G16", "Coordinates", - "LodOrClampOrMip", "HasD16", "MSAA", "BVH", "A16"]; + "LodOrClampOrMip", "HasD16", "MSAA", "BVH", "A16", "NoReturn"]; string TypeOf_BaseOpcode = "MIMGBaseOpcode"; let PrimaryKey = ["BaseOpcode"]; @@ -521,6 +522,25 @@ class VSAMPLE_Sampler_gfx12<mimgopc op, string opcode, RegisterClass DataRC, #!if(BaseOpcode.HasD16, "$d16", ""); } +class VSAMPLE_Sampler_nortn_gfx12<mimgopc op, string opcode, + int num_addrs, RegisterClass Addr3RC = VGPR_32, + string dns=""> + : VSAMPLE_gfx12<op.GFX12, (outs), num_addrs, dns, Addr3RC> { + let InOperandList = !con(AddrIns, + (ins SReg_256:$rsrc), + !if(BaseOpcode.Sampler, (ins SReg_128:$samp), (ins)), + (ins DMask:$dmask, Dim:$dim, UNorm:$unorm, + CPol:$cpol, R128A16:$r128, A16:$a16, TFE:$tfe, + LWE:$lwe), + !if(BaseOpcode.HasD16, (ins D16:$d16), (ins))); + let AsmString = opcode#" off, "#AddrAsm#", $rsrc" + #!if(BaseOpcode.Sampler, ", $samp", "") + #"$dmask$dim$unorm$cpol$r128$a16$tfe$lwe" + #!if(BaseOpcode.HasD16, "$d16", ""); + // Force vdata to VGPR0 as no result will be returned. + let vdata = 0; +} + multiclass MIMG_NoSampler_Src_Helper <mimgopc op, string asm, RegisterClass dst_rc, bit enableDisasm, bit ExtendedImageInst = 1, @@ -835,6 +855,7 @@ multiclass MIMG_Store <mimgopc op, string asm, bit has_d16, bit mip = 0> { let Store = 1; let LodOrClampOrMip = mip; let HasD16 = has_d16; + let NoReturn = 1; } let BaseOpcode = !cast<MIMGBaseOpcode>(NAME) in { @@ -1136,44 +1157,62 @@ class MIMG_Sampler_gfx90a<mimgopc op, string asm, RegisterClass dst_rc, #!if(BaseOpcode.HasD16, "$d16", ""); } +class MIMG_Sampler_OpList_gfx10p<dag OpPrefix, bit HasD16> { + dag ret = !con(OpPrefix, + (ins SReg_256:$srsrc, SReg_128:$ssamp, + DMask:$dmask, Dim:$dim, UNorm:$unorm, CPol:$cpol, + R128A16:$r128, A16:$a16, TFE:$tfe, LWE:$lwe), + !if(HasD16, (ins D16:$d16), (ins))); +} + +class MIMG_Sampler_Asm_gfx10p<string opcode, string AsmPrefix, bit HasD16> { + string ret = opcode#" "#AsmPrefix#", $srsrc, $ssamp$dmask$dim$unorm" + #"$cpol$r128$a16$tfe$lwe" + #!if(HasD16, "$d16", ""); +} + class MIMG_Sampler_gfx10<mimgopc op, string opcode, RegisterClass DataRC, RegisterClass AddrRC, string dns=""> : MIMG_gfx10<op.GFX10M, (outs DataRC:$vdata), dns> { - let InOperandList = !con((ins AddrRC:$vaddr0, SReg_256:$srsrc, SReg_128:$ssamp, - DMask:$dmask, Dim:$dim, UNorm:$unorm, CPol:$cpol, - R128A16:$r128, A16:$a16, TFE:$tfe, LWE:$lwe), - !if(BaseOpcode.HasD16, (ins D16:$d16), (ins))); - let AsmString = opcode#" $vdata, $vaddr0, $srsrc, $ssamp$dmask$dim$unorm" - #"$cpol$r128$a16$tfe$lwe" - #!if(BaseOpcode.HasD16, "$d16", ""); + let InOperandList = MIMG_Sampler_OpList_gfx10p<(ins AddrRC:$vaddr0), BaseOpcode.HasD16>.ret; + let AsmString = MIMG_Sampler_Asm_gfx10p<opcode, "$vdata, $vaddr0", BaseOpcode.HasD16>.ret; } class MIMG_Sampler_nsa_gfx10<mimgopc op, string opcode, RegisterClass DataRC, int num_addrs, string dns=""> : MIMG_nsa_gfx10<op.GFX10M, (outs DataRC:$vdata), num_addrs, dns> { - let InOperandList = !con(AddrIns, - (ins SReg_256:$srsrc, SReg_128:$ssamp, DMask:$dmask, - Dim:$dim, UNorm:$unorm, CPol:$cpol, - R128A16:$r128, A16:$a16, TFE:$tfe, LWE:$lwe), - !if(BaseOpcode.HasD16, (ins D16:$d16), (ins))); - let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc, $ssamp$dmask$dim$unorm" - #"$cpol$r128$a16$tfe$lwe" - #!if(BaseOpcode.HasD16, "$d16", ""); + let InOperandList = MIMG_Sampler_OpList_gfx10p<AddrIns, BaseOpcode.HasD16>.ret; + let AsmString = MIMG_Sampler_Asm_gfx10p<opcode, " $vdata, "#AddrAsm, BaseOpcode.HasD16>.ret; +} + +class MIMG_Sampler_nortn_gfx10<mimgopc op, string opcode, + RegisterClass AddrRC, + string dns=""> + : MIMG_gfx10<op.GFX10M, (outs), dns> { + let InOperandList = MIMG_Sampler_OpList_gfx10p<(ins AddrRC:$vaddr0), BaseOpcode.HasD16>.ret; + let AsmString = MIMG_Sampler_Asm_gfx10p<opcode, "off, $vaddr0", BaseOpcode.HasD16>.ret; + // Force vdata to VGPR0 as no result will be returned. + let vdata = 0; +} + +class MIMG_Sampler_nortn_nsa_gfx10<mimgopc op, string opcode, + int num_addrs, + string dns=""> + : MIMG_nsa_gfx10<op.GFX10M, (outs), num_addrs, dns> { + let InOperandList = MIMG_Sampler_OpList_gfx10p<AddrIns, BaseOpcode.HasD16>.ret; + let AsmString = MIMG_Sampler_Asm_gfx10p<opcode, " off, "#AddrAsm, BaseOpcode.HasD16>.ret; + // Force vdata to VGPR0 as no result will be returned. + let vdata = 0; } class MIMG_Sampler_gfx11<mimgopc op, string opcode, RegisterClass DataRC, RegisterClass AddrRC, string dns=""> : MIMG_gfx11<op.GFX11, (outs DataRC:$vdata), dns> { - let InOperandList = !con((ins AddrRC:$vaddr0, SReg_256:$srsrc, SReg_128:$ssamp, - DMask:$dmask, Dim:$dim, UNorm:$unorm, CPol:$cpol, - R128A16:$r128, A16:$a16, TFE:$tfe, LWE:$lwe), - !if(BaseOpcode.HasD16, (ins D16:$d16), (ins))); - let AsmString = opcode#" $vdata, $vaddr0, $srsrc, $ssamp$dmask$dim$unorm" - #"$cpol$r128$a16$tfe$lwe" - #!if(BaseOpcode.HasD16, "$d16", ""); + let InOperandList = MIMG_Sampler_OpList_gfx10p<(ins AddrRC:$vaddr0), BaseOpcode.HasD16>.ret; + let AsmString = MIMG_Sampler_Asm_gfx10p<opcode, "$vdata, $vaddr0", BaseOpcode.HasD16>.ret; } class MIMG_Sampler_nsa_gfx11<mimgopc op, string opcode, @@ -1181,14 +1220,26 @@ class MIMG_Sampler_nsa_gfx11<mimgopc op, string opcode, RegisterClass LastVAddrSize, string dns=""> : MIMG_nsa_gfx11<op.GFX11, (outs DataRC:$vdata), num_addrs, dns, [], LastVAddrSize> { - let InOperandList = !con(AddrIns, - (ins SReg_256:$srsrc, SReg_128:$ssamp, DMask:$dmask, - Dim:$dim, UNorm:$unorm, CPol:$cpol, - R128A16:$r128, A16:$a16, TFE:$tfe, LWE:$lwe), - !if(BaseOpcode.HasD16, (ins D16:$d16), (ins))); - let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc, $ssamp$dmask$dim$unorm" - #"$cpol$r128$a16$tfe$lwe" - #!if(BaseOpcode.HasD16, "$d16", ""); + let InOperandList = MIMG_Sampler_OpList_gfx10p<AddrIns, BaseOpcode.HasD16>.ret; + let AsmString = MIMG_Sampler_Asm_gfx10p<opcode, " $vdata, "#AddrAsm, BaseOpcode.HasD16>.ret; +} + +class MIMG_Sampler_nortn_gfx11<mimgopc op, string opcode, + RegisterClass AddrRC, + string dns=""> + : MIMG_gfx11<op.GFX11, (outs), dns> { + let InOperandList = MIMG_Sampler_OpList_gfx10p<(ins AddrRC:$vaddr0), BaseOpcode.HasD16>.ret; + let AsmString = MIMG_Sampler_Asm_gfx10p<opcode, "off, $vaddr0", BaseOpcode.HasD16>.ret; + let vdata = 0; +} + +class MIMG_Sampler_nortn_nsa_gfx11<mimgopc op, string opcode, + int num_addrs, + RegisterClass LastVAddrSize, string dns=""> + : MIMG_nsa_gfx11<op.GFX11, (outs), num_addrs, dns, [], LastVAddrSize> { + let InOperandList = MIMG_Sampler_OpList_gfx10p<AddrIns, BaseOpcode.HasD16>.ret; + let AsmString = MIMG_Sampler_Asm_gfx10p<opcode, "off, "#AddrAsm, BaseOpcode.HasD16>.ret; + let vdata = 0; } class MIMGAddrSize<int dw, bit enable_disasm, int AddrDW = dw> { @@ -1366,6 +1417,57 @@ class MIMG_Sampler_BaseOpcode<AMDGPUSampleVariant sample> let LodOrClampOrMip = !ne(sample.LodOrClamp, ""); } +multiclass MIMG_Sampler_NoReturn <mimgopc op, AMDGPUSampleVariant sample, bit wqm = 0, bit isG16, string asm> { + def "" : MIMG_Sampler_BaseOpcode<sample> { + let HasD16 = 1; + let G16 = isG16; + let NoReturn = 1; + } + + let BaseOpcode = !cast<MIMGBaseOpcode>(NAME), WQM = wqm, + mayLoad = 1, mayStore = 1, VDataDwords = 0 in { + foreach addr = MIMG_Sampler_AddrSizes<sample, isG16>.MachineInstrs in { + let VAddrDwords = addr.NumWords in { + if op.HAS_GFX10M then { + def _V # addr.NumWords # _gfx10 + : MIMG_Sampler_nortn_gfx10 <op, asm, addr.RegClass>; + } + if op.HAS_GFX11 then { + def _V # addr.NumWords # _gfx11 + : MIMG_Sampler_nortn_gfx11 <op, asm, addr.RegClass>; + } + } + } + + foreach addr = MIMG_Sampler_AddrSizes<sample, isG16>.NSAInstrs in { + let VAddrDwords = addr.NumWords in { + if op.HAS_GFX10M then { + def _V # addr.NumWords # _nsa_gfx10 + : MIMG_Sampler_nortn_nsa_gfx10<op, asm, addr.NumWords>; + } + } + } + + foreach addr = MIMG_Sampler_AddrSizes<sample, isG16, 5/*MaxNSASize*/>.PartialNSAInstrs in { + let VAddrDwords = addr.NumWords in { + if op.HAS_GFX11 then { + def _V # addr.NumWords # _nsa_gfx11 + : MIMG_Sampler_nortn_nsa_gfx11<op, asm, addr.NumWords, addr.RegClass>; + } + } + } + + foreach addr = MIMG_Sampler_AddrSizes<sample, isG16, 4/*MaxNSASize*/, 1>.PartialNSAInstrs in { + let VAddrDwords = addr.NumWords in { + if op.HAS_GFX12 then { + def _V # addr.NumWords # _gfx12 + : VSAMPLE_Sampler_nortn_gfx12<op, asm, addr.NumWords, addr.RegClass>; + } + } + } + } +} + multiclass MIMG_Sampler <mimgopc op, AMDGPUSampleVariant sample, bit wqm = 0, bit isG16 = 0, bit isGetLod = 0, string asm = "image_sample"#sample.LowerCaseMod#!if(isG16, "_g16", ""), @@ -1388,6 +1490,9 @@ multiclass MIMG_Sampler <mimgopc op, AMDGPUSampleVariant sample, bit wqm = 0, let VDataDwords = 5 in defm _V5 : MIMG_Sampler_Src_Helper<op, asm, sample, VReg_160, 0, ExtendedImageInst, isG16>; } + + if !not(isGetLod) then + defm "_nortn" : MIMG_Sampler_NoReturn <op, sample, wqm, isG16, asm>; } multiclass MIMG_Sampler_WQM <mimgopc op, AMDGPUSampleVariant sample> @@ -1755,6 +1860,10 @@ def : MIMGLZMapping<IMAGE_GATHER4_L, IMAGE_GATHER4_LZ>; def : MIMGLZMapping<IMAGE_GATHER4_C_L, IMAGE_GATHER4_C_LZ>; def : MIMGLZMapping<IMAGE_GATHER4_L_O, IMAGE_GATHER4_LZ_O>; def : MIMGLZMapping<IMAGE_GATHER4_C_L_O, IMAGE_GATHER4_C_LZ_O>; +def : MIMGLZMapping<IMAGE_SAMPLE_L_nortn, IMAGE_SAMPLE_LZ_nortn>; +def : MIMGLZMapping<IMAGE_SAMPLE_C_L_nortn, IMAGE_SAMPLE_C_LZ_nortn>; +def : MIMGLZMapping<IMAGE_SAMPLE_L_O_nortn, IMAGE_SAMPLE_LZ_O_nortn>; +def : MIMGLZMapping<IMAGE_SAMPLE_C_L_O_nortn, IMAGE_SAMPLE_C_LZ_O_nortn>; // MIP to NONMIP Optimization Mapping def : MIMGMIPMapping<IMAGE_LOAD_MIP, IMAGE_LOAD>; @@ -1777,6 +1886,14 @@ def : MIMGBiasMapping<IMAGE_GATHER4_B_O, IMAGE_GATHER4_O>; def : MIMGBiasMapping<IMAGE_GATHER4_B_CL_O, IMAGE_GATHER4_CL_O>; def : MIMGBiasMapping<IMAGE_GATHER4_C_B_O, IMAGE_GATHER4_C_O>; def : MIMGBiasMapping<IMAGE_GATHER4_C_B_CL_O, IMAGE_GATHER4_C_CL_O>; +def : MIMGBiasMapping<IMAGE_SAMPLE_B_nortn, IMAGE_SAMPLE_nortn>; +def : MIMGBiasMapping<IMAGE_SAMPLE_B_CL_nortn, IMAGE_SAMPLE_CL_nortn>; +def : MIMGBiasMapping<IMAGE_SAMPLE_C_B_nortn, IMAGE_SAMPLE_C_nortn>; +def : MIMGBiasMapping<IMAGE_SAMPLE_C_B_CL_nortn, IMAGE_SAMPLE_C_CL_nortn>; +def : MIMGBiasMapping<IMAGE_SAMPLE_B_O_nortn, IMAGE_SAMPLE_O_nortn>; +def : MIMGBiasMapping<IMAGE_SAMPLE_B_CL_O_nortn, IMAGE_SAMPLE_CL_O_nortn>; +def : MIMGBiasMapping<IMAGE_SAMPLE_C_B_O_nortn, IMAGE_SAMPLE_C_O_nortn>; +def : MIMGBiasMapping<IMAGE_SAMPLE_C_B_CL_O_nortn, IMAGE_SAMPLE_C_CL_O_nortn>; // Offset to NoOffset Optimization Mapping def : MIMGOffsetMapping<IMAGE_SAMPLE_O, IMAGE_SAMPLE>; @@ -1819,6 +1936,34 @@ def : MIMGOffsetMapping<IMAGE_SAMPLE_CD_O_G16, IMAGE_SAMPLE_CD_G16>; def : MIMGOffsetMapping<IMAGE_SAMPLE_CD_CL_O_G16, IMAGE_SAMPLE_CD_CL_G16>; def : MIMGOffsetMapping<IMAGE_SAMPLE_C_CD_O_G16, IMAGE_SAMPLE_C_CD_G16>; def : MIMGOffsetMapping<IMAGE_SAMPLE_C_CD_CL_O_G16, IMAGE_SAMPLE_C_CD_CL_G16>; +def : MIMGOffsetMapping<IMAGE_SAMPLE_O_nortn, IMAGE_SAMPLE_nortn>; +def : MIMGOffsetMapping<IMAGE_SAMPLE_CL_O_nortn, IMAGE_SAMPLE_CL_nortn>; +def : MIMGOffsetMapping<IMAGE_SAMPLE_D_O_nortn, IMAGE_SAMPLE_D_nortn>; +def : MIMGOffsetMapping<IMAGE_SAMPLE_D_CL_O_nortn, IMAGE_SAMPLE_D_CL_nortn>; +def : MIMGOffsetMapping<IMAGE_SAMPLE_D_O_G16_nortn, IMAGE_SAMPLE_D_G16_nortn>; +def : MIMGOffsetMapping<IMAGE_SAMPLE_D_CL_O_G16_nortn, IMAGE_SAMPLE_D_CL_G16_nortn>; +def : MIMGOffsetMapping<IMAGE_SAMPLE_L_O_nortn, IMAGE_SAMPLE_L_nortn>; +def : MIMGOffsetMapping<IMAGE_SAMPLE_B_O_nortn, IMAGE_SAMPLE_B_nortn>; +def : MIMGOffsetMapping<IMAGE_SAMPLE_B_CL_O_nortn, IMAGE_SAMPLE_B_CL_nortn>; +def : MIMGOffsetMapping<IMAGE_SAMPLE_LZ_O_nortn, IMAGE_SAMPLE_LZ_nortn>; +def : MIMGOffsetMapping<IMAGE_SAMPLE_C_O_nortn, IMAGE_SAMPLE_C_nortn>; +def : MIMGOffsetMapping<IMAGE_SAMPLE_C_CL_O_nortn, IMAGE_SAMPLE_C_CL_nortn>; +def : MIMGOffsetMapping<IMAGE_SAMPLE_C_D_O_nortn, IMAGE_SAMPLE_C_D_nortn>; +def : MIMGOffsetMapping<IMAGE_SAMPLE_C_D_CL_O_nortn, IMAGE_SAMPLE_C_D_CL_nortn>; +def : MIMGOffsetMapping<IMAGE_SAMPLE_C_D_O_G16_nortn, IMAGE_SAMPLE_C_D_G16_nortn>; +def : MIMGOffsetMapping<IMAGE_SAMPLE_C_D_CL_O_G16_nortn, IMAGE_SAMPLE_C_D_CL_G16_nortn>; +def : MIMGOffsetMapping<IMAGE_SAMPLE_C_L_O_nortn, IMAGE_SAMPLE_C_L_nortn>; +def : MIMGOffsetMapping<IMAGE_SAMPLE_C_B_CL_O_nortn, IMAGE_SAMPLE_C_B_CL_nortn>; +def : MIMGOffsetMapping<IMAGE_SAMPLE_C_B_O_nortn, IMAGE_SAMPLE_C_B_nortn>; +def : MIMGOffsetMapping<IMAGE_SAMPLE_C_LZ_O_nortn, IMAGE_SAMPLE_C_LZ_nortn>; +def : MIMGOffsetMapping<IMAGE_SAMPLE_CD_O_nortn, IMAGE_SAMPLE_CD>; +def : MIMGOffsetMapping<IMAGE_SAMPLE_CD_CL_O_nortn, IMAGE_SAMPLE_CD_CL_nortn>; +def : MIMGOffsetMapping<IMAGE_SAMPLE_C_CD_O_nortn, IMAGE_SAMPLE_C_CD_nortn>; +def : MIMGOffsetMapping<IMAGE_SAMPLE_C_CD_CL_O_nortn, IMAGE_SAMPLE_C_CD_CL_nortn>; +def : MIMGOffsetMapping<IMAGE_SAMPLE_CD_O_G16_nortn, IMAGE_SAMPLE_CD_G16_nortn>; +def : MIMGOffsetMapping<IMAGE_SAMPLE_CD_CL_O_G16_nortn, IMAGE_SAMPLE_CD_CL_G16_nortn>; +def : MIMGOffsetMapping<IMAGE_SAMPLE_C_CD_O_G16_nortn, IMAGE_SAMPLE_C_CD_G16_nortn>; +def : MIMGOffsetMapping<IMAGE_SAMPLE_C_CD_CL_O_G16_nortn, IMAGE_SAMPLE_C_CD_CL_G16_nortn>; // G to G16 Optimization Mapping def : MIMGG16Mapping<IMAGE_SAMPLE_D, IMAGE_SAMPLE_D_G16>; @@ -1837,3 +1982,19 @@ def : MIMGG16Mapping<IMAGE_SAMPLE_CD_O, IMAGE_SAMPLE_CD_O_G16>; def : MIMGG16Mapping<IMAGE_SAMPLE_CD_CL_O, IMAGE_SAMPLE_CD_CL_O_G16>; def : MIMGG16Mapping<IMAGE_SAMPLE_C_CD_O, IMAGE_SAMPLE_C_CD_O_G16>; def : MIMGG16Mapping<IMAGE_SAMPLE_C_CD_CL_O, IMAGE_SAMPLE_C_CD_CL_O_G16>; +def : MIMGG16Mapping<IMAGE_SAMPLE_D_nortn, IMAGE_SAMPLE_D_G16_nortn>; +def : MIMGG16Mapping<IMAGE_SAMPLE_D_CL_nortn, IMAGE_SAMPLE_D_CL_G16_nortn>; +def : MIMGG16Mapping<IMAGE_SAMPLE_C_D_nortn, IMAGE_SAMPLE_C_D_G16_nortn>; +def : MIMGG16Mapping<IMAGE_SAMPLE_C_D_CL_nortn, IMAGE_SAMPLE_C_D_CL_G16_nortn>; +def : MIMGG16Mapping<IMAGE_SAMPLE_D_O_nortn, IMAGE_SAMPLE_D_O_G16_nortn>; +def : MIMGG16Mapping<IMAGE_SAMPLE_D_CL_O_nortn, IMAGE_SAMPLE_D_CL_O_G16_nortn>; +def : MIMGG16Mapping<IMAGE_SAMPLE_C_D_O_nortn, IMAGE_SAMPLE_C_D_O_G16_nortn>; +def : MIMGG16Mapping<IMAGE_SAMPLE_C_D_CL_O_nortn, IMAGE_SAMPLE_C_D_CL_O_G16_nortn>; +def : MIMGG16Mapping<IMAGE_SAMPLE_CD_nortn, IMAGE_SAMPLE_CD_G16_nortn>; +def : MIMGG16Mapping<IMAGE_SAMPLE_CD_CL_nortn, IMAGE_SAMPLE_CD_CL_G16_nortn>; +def : MIMGG16Mapping<IMAGE_SAMPLE_C_CD_nortn, IMAGE_SAMPLE_C_CD_G16_nortn>; +def : MIMGG16Mapping<IMAGE_SAMPLE_C_CD_CL_nortn, IMAGE_SAMPLE_C_CD_CL_G16_nortn>; +def : MIMGG16Mapping<IMAGE_SAMPLE_CD_O_nortn, IMAGE_SAMPLE_CD_O_G16_nortn>; +def : MIMGG16Mapping<IMAGE_SAMPLE_CD_CL_O_nortn, IMAGE_SAMPLE_CD_CL_O_G16_nortn>; +def : MIMGG16Mapping<IMAGE_SAMPLE_C_CD_O_nortn, IMAGE_SAMPLE_C_CD_O_G16_nortn>; +def : MIMGG16Mapping<IMAGE_SAMPLE_C_CD_CL_O_nortn, IMAGE_SAMPLE_C_CD_CL_O_G16_nortn>; diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index d5ffb44..a09e0ad 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -1190,8 +1190,13 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, // TODO: Should images get their own address space? Info.fallbackAddressSpace = AMDGPUAS::BUFFER_RESOURCE; - if (RsrcIntr->IsImage) + const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = nullptr; + if (RsrcIntr->IsImage) { + const AMDGPU::ImageDimIntrinsicInfo *Intr = + AMDGPU::getImageDimIntrinsicInfo(IntrID); + BaseOpcode = AMDGPU::getMIMGBaseOpcodeInfo(Intr->BaseOpcode); Info.align.reset(); + } Value *RsrcArg = CI.getArgOperand(RsrcIntr->RsrcArg); if (auto *RsrcPtrTy = dyn_cast<PointerType>(RsrcArg->getType())) { @@ -1212,11 +1217,6 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, if (RsrcIntr->IsImage) { unsigned MaxNumLanes = 4; - const AMDGPU::ImageDimIntrinsicInfo *Intr - = AMDGPU::getImageDimIntrinsicInfo(IntrID); - const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = - AMDGPU::getMIMGBaseOpcodeInfo(Intr->BaseOpcode); - if (!BaseOpcode->Gather4) { // If this isn't a gather, we may have excess loaded elements in the // IR type. Check the dmask for the real number of elements loaded. @@ -1250,7 +1250,7 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.flags |= MachineMemOperand::MOStore; } else { - // Atomic + // Atomic or NoReturn Sampler Info.opc = CI.getType()->isVoidTy() ? ISD::INTRINSIC_VOID : ISD::INTRINSIC_W_CHAIN; Info.flags |= MachineMemOperand::MOLoad | @@ -1259,9 +1259,14 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, switch (IntrID) { default: - Info.memVT = MVT::getVT(CI.getArgOperand(0)->getType()); - // XXX - Should this be volatile without known ordering? - Info.flags |= MachineMemOperand::MOVolatile; + if (RsrcIntr->IsImage && BaseOpcode->NoReturn) { + // Fake memory access type for no return sampler intrinsics + Info.memVT = MVT::i32; + } else { + // XXX - Should this be volatile without known ordering? + Info.flags |= MachineMemOperand::MOVolatile; + Info.memVT = MVT::getVT(CI.getArgOperand(0)->getType()); + } break; case Intrinsic::amdgcn_raw_buffer_load_lds: case Intrinsic::amdgcn_raw_ptr_buffer_load_lds: @@ -7900,7 +7905,7 @@ SDValue SITargetLowering::lowerImage(SDValue Op, bool IsG16 = false; bool IsA16 = false; SDValue VData; - int NumVDataDwords; + int NumVDataDwords = 0; bool AdjustRetType = false; bool IsAtomicPacked16Bit = false; @@ -7949,7 +7954,7 @@ SDValue SITargetLowering::lowerImage(SDValue Op, } NumVDataDwords = (VData.getValueType().getSizeInBits() + 31) / 32; - } else { + } else if (!BaseOpcode->NoReturn) { // Work out the num dwords based on the dmask popcount and underlying type // and whether packing is supported. MVT LoadVT = ResultTypes[0].getSimpleVT(); @@ -8242,7 +8247,7 @@ SDValue SITargetLowering::lowerImage(SDValue Op, DAG.ExtractVectorElements(SDValue(NewNode, 0), Elt, 0, 1); return DAG.getMergeValues({Elt[0], SDValue(NewNode, 1)}, DL); } - if (BaseOpcode->Store) + if (BaseOpcode->NoReturn) return SDValue(NewNode, 0); return constructRetValue(DAG, NewNode, OrigResultTypes, IsTexFail, Subtarget->hasUnpackedD16VMem(), IsD16, DMaskLanes, diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp index a18da72..1315aa0 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -699,7 +699,8 @@ public: // these should use VM_CNT. if (!ST->hasVscnt() || SIInstrInfo::mayWriteLDSThroughDMA(Inst)) return VMEM_ACCESS; - if (Inst.mayStore() && !SIInstrInfo::isAtomicRet(Inst)) { + if (Inst.mayStore() && + (!Inst.mayLoad() || SIInstrInfo::isAtomicNoRet(Inst))) { // FLAT and SCRATCH instructions may access scratch. Other VMEM // instructions do not. if (SIInstrInfo::isFLAT(Inst) && mayAccessScratchThroughFlat(Inst)) diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 5204479..463737f 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -472,6 +472,8 @@ bool SIInstrInfo::getMemOperandsWithOffsetWidth( Offset = 0; // Get appropriate operand, and compute width accordingly. DataOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); + if (DataOpIdx == -1) + return false; // no return sampler Width = getOpSize(LdSt, DataOpIdx); return true; } diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 2e617e5..15078bc 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -3953,6 +3953,14 @@ def G_AMDGPU_INTRIN_IMAGE_LOAD_D16 : AMDGPUGenericInstruction { let mayStore = 1; } +def G_AMDGPU_INTRIN_IMAGE_LOAD_NORET : AMDGPUGenericInstruction { + let OutOperandList = (outs); + let InOperandList = (ins unknown:$intrin, variable_ops); + let hasSideEffects = 0; + let mayLoad = 1; + let mayStore = 1; +} + // This is equivalent to the G_INTRINSIC*, but the operands may have // been legalized depending on the subtarget requirements. def G_AMDGPU_INTRIN_IMAGE_STORE : AMDGPUGenericInstruction { diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index af2f0bc..429c3ad 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -404,6 +404,7 @@ struct MIMGBaseOpcodeInfo { bool MSAA; bool BVH; bool A16; + bool NoReturn; }; LLVM_READONLY diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp index 20603b6..cf4fc37 100644 --- a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp +++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp @@ -369,10 +369,9 @@ static MCStreamer *createELFStreamer(const Triple &T, MCContext &Ctx, static MCStreamer * createARMMachOStreamer(MCContext &Ctx, std::unique_ptr<MCAsmBackend> &&MAB, std::unique_ptr<MCObjectWriter> &&OW, - std::unique_ptr<MCCodeEmitter> &&Emitter, - bool DWARFMustBeAtTheEnd) { + std::unique_ptr<MCCodeEmitter> &&Emitter) { return createMachOStreamer(Ctx, std::move(MAB), std::move(OW), - std::move(Emitter), DWARFMustBeAtTheEnd); + std::move(Emitter), false); } static MCInstPrinter *createARMMCInstPrinter(const Triple &T, diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h index a673d59..66f1923 100644 --- a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h +++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h @@ -93,8 +93,7 @@ MCAsmBackend *createARMBEAsmBackend(const Target &T, const MCSubtargetInfo &STI, MCStreamer *createARMWinCOFFStreamer(MCContext &Context, std::unique_ptr<MCAsmBackend> &&MAB, std::unique_ptr<MCObjectWriter> &&OW, - std::unique_ptr<MCCodeEmitter> &&Emitter, - bool IncrementalLinkerCompatible); + std::unique_ptr<MCCodeEmitter> &&Emitter); /// Construct an ELF Mach-O object writer. std::unique_ptr<MCObjectTargetWriter> createARMELFObjectWriter(uint8_t OSABI); diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp index 0fcf6eb..e66059c 100644 --- a/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp +++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp @@ -70,12 +70,9 @@ MCStreamer * llvm::createARMWinCOFFStreamer(MCContext &Context, std::unique_ptr<MCAsmBackend> &&MAB, std::unique_ptr<MCObjectWriter> &&OW, - std::unique_ptr<MCCodeEmitter> &&Emitter, - bool IncrementalLinkerCompatible) { - auto *S = new ARMWinCOFFStreamer(Context, std::move(MAB), std::move(Emitter), - std::move(OW)); - S->getAssembler().setIncrementalLinkerCompatible(IncrementalLinkerCompatible); - return S; + std::unique_ptr<MCCodeEmitter> &&Emitter) { + return new ARMWinCOFFStreamer(Context, std::move(MAB), std::move(Emitter), + std::move(OW)); } namespace { diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index 6072e5e..ba6be85 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -335,7 +335,6 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, setTargetDAGCombine(ISD::AND); setTargetDAGCombine(ISD::OR); setTargetDAGCombine(ISD::SRL); - setTargetDAGCombine(ISD::SETCC); // Set DAG combine for 'LSX' feature. @@ -2529,165 +2528,6 @@ Retry2: return SDValue(); } -static bool checkValueWidth(SDValue V, ISD::LoadExtType &ExtType) { - ExtType = ISD::NON_EXTLOAD; - - switch (V.getNode()->getOpcode()) { - case ISD::LOAD: { - LoadSDNode *LoadNode = cast<LoadSDNode>(V.getNode()); - if ((LoadNode->getMemoryVT() == MVT::i8) || - (LoadNode->getMemoryVT() == MVT::i16)) { - ExtType = LoadNode->getExtensionType(); - return true; - } - return false; - } - case ISD::AssertSext: { - VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1)); - if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) { - ExtType = ISD::SEXTLOAD; - return true; - } - return false; - } - case ISD::AssertZext: { - VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1)); - if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) { - ExtType = ISD::ZEXTLOAD; - return true; - } - return false; - } - default: - return false; - } - - return false; -} - -// Eliminate redundant truncation and zero-extension nodes. -// * Case 1: -// +------------+ +------------+ +------------+ -// | Input1 | | Input2 | | CC | -// +------------+ +------------+ +------------+ -// | | | -// V V +----+ -// +------------+ +------------+ | -// | TRUNCATE | | TRUNCATE | | -// +------------+ +------------+ | -// | | | -// V V | -// +------------+ +------------+ | -// | ZERO_EXT | | ZERO_EXT | | -// +------------+ +------------+ | -// | | | -// | +-------------+ | -// V V | | -// +----------------+ | | -// | AND | | | -// +----------------+ | | -// | | | -// +---------------+ | | -// | | | -// V V V -// +-------------+ -// | CMP | -// +-------------+ -// * Case 2: -// +------------+ +------------+ +-------------+ +------------+ +------------+ -// | Input1 | | Input2 | | Constant -1 | | Constant 0 | | CC | -// +------------+ +------------+ +-------------+ +------------+ +------------+ -// | | | | | -// V | | | | -// +------------+ | | | | -// | XOR |<---------------------+ | | -// +------------+ | | | -// | | | | -// V V +---------------+ | -// +------------+ +------------+ | | -// | TRUNCATE | | TRUNCATE | | +-------------------------+ -// +------------+ +------------+ | | -// | | | | -// V V | | -// +------------+ +------------+ | | -// | ZERO_EXT | | ZERO_EXT | | | -// +------------+ +------------+ | | -// | | | | -// V V | | -// +----------------+ | | -// | AND | | | -// +----------------+ | | -// | | | -// +---------------+ | | -// | | | -// V V V -// +-------------+ -// | CMP | -// +-------------+ -static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG, - TargetLowering::DAGCombinerInfo &DCI, - const LoongArchSubtarget &Subtarget) { - ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); - - SDNode *AndNode = N->getOperand(0).getNode(); - if (AndNode->getOpcode() != ISD::AND) - return SDValue(); - - SDValue AndInputValue2 = AndNode->getOperand(1); - if (AndInputValue2.getOpcode() != ISD::ZERO_EXTEND) - return SDValue(); - - SDValue CmpInputValue = N->getOperand(1); - SDValue AndInputValue1 = AndNode->getOperand(0); - if (AndInputValue1.getOpcode() == ISD::XOR) { - if (CC != ISD::SETEQ && CC != ISD::SETNE) - return SDValue(); - ConstantSDNode *CN = dyn_cast<ConstantSDNode>(AndInputValue1.getOperand(1)); - if (!CN || CN->getSExtValue() != -1) - return SDValue(); - CN = dyn_cast<ConstantSDNode>(CmpInputValue); - if (!CN || CN->getSExtValue() != 0) - return SDValue(); - AndInputValue1 = AndInputValue1.getOperand(0); - if (AndInputValue1.getOpcode() != ISD::ZERO_EXTEND) - return SDValue(); - } else if (AndInputValue1.getOpcode() == ISD::ZERO_EXTEND) { - if (AndInputValue2 != CmpInputValue) - return SDValue(); - } else { - return SDValue(); - } - - SDValue TruncValue1 = AndInputValue1.getNode()->getOperand(0); - if (TruncValue1.getOpcode() != ISD::TRUNCATE) - return SDValue(); - - SDValue TruncValue2 = AndInputValue2.getNode()->getOperand(0); - if (TruncValue2.getOpcode() != ISD::TRUNCATE) - return SDValue(); - - SDValue TruncInputValue1 = TruncValue1.getNode()->getOperand(0); - SDValue TruncInputValue2 = TruncValue2.getNode()->getOperand(0); - ISD::LoadExtType ExtType1; - ISD::LoadExtType ExtType2; - - if (!checkValueWidth(TruncInputValue1, ExtType1) || - !checkValueWidth(TruncInputValue2, ExtType2)) - return SDValue(); - - if ((ExtType2 != ISD::ZEXTLOAD) && - ((ExtType2 != ISD::SEXTLOAD) && (ExtType1 != ISD::SEXTLOAD))) - return SDValue(); - - // These truncation and zero-extension nodes are not necessary, remove them. - SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N), AndNode->getValueType(0), - TruncInputValue1, TruncInputValue2); - SDValue NewSetCC = - DAG.getSetCC(SDLoc(N), N->getValueType(0), NewAnd, TruncInputValue2, CC); - DAG.ReplaceAllUsesWith(N, NewSetCC.getNode()); - return SDValue(N, 0); -} - // Combine (loongarch_bitrev_w (loongarch_revb_2w X)) to loongarch_bitrev_4b. static SDValue performBITREV_WCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, @@ -3315,8 +3155,6 @@ SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N, return performANDCombine(N, DAG, DCI, Subtarget); case ISD::OR: return performORCombine(N, DAG, DCI, Subtarget); - case ISD::SETCC: - return performSETCCCombine(N, DAG, DCI, Subtarget); case ISD::SRL: return performSRLCombine(N, DAG, DCI, Subtarget); case LoongArchISD::BITREV_W: diff --git a/llvm/lib/Target/M68k/M68kFrameLowering.cpp b/llvm/lib/Target/M68k/M68kFrameLowering.cpp index 36443f9..c548346 100644 --- a/llvm/lib/Target/M68k/M68kFrameLowering.cpp +++ b/llvm/lib/Target/M68k/M68kFrameLowering.cpp @@ -246,9 +246,7 @@ MachineBasicBlock::iterator M68kFrameLowering::eliminateCallFramePseudoInstr( unsigned StackAlign = getStackAlignment(); Amount = alignTo(Amount, StackAlign); - MachineModuleInfo &MMI = MF.getMMI(); - const auto &Fn = MF.getFunction(); - bool DwarfCFI = MMI.hasDebugInfo() || Fn.needsUnwindTableEntry(); + bool DwarfCFI = MF.needsFrameMoves(); // If we have any exception handlers in this function, and we adjust // the SP before calls, we may need to indicate this to the unwinder diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp index 11193c1..8516bc1 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp @@ -3752,8 +3752,14 @@ bool NVPTXDAGToDAGISel::SelectADDRri_imp( Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt); else Base = Addr.getOperand(0); - Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode), - mvt); + + // Offset must fit in a 32-bit signed int in PTX [register+offset] address + // mode + if (!CN->getAPIntValue().isSignedIntN(32)) + return false; + + Offset = CurDAG->getTargetConstant(CN->getSExtValue(), SDLoc(OpNode), + MVT::i32); return true; } } diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp index bc23998..44c1a2e 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -5167,9 +5167,12 @@ bool NVPTXTargetLowering::isLegalAddressingMode(const DataLayout &DL, // - [areg+immoff] // - [immAddr] - if (AM.BaseGV) { + // immoff must fit in a signed 32-bit int + if (!APInt(64, AM.BaseOffs).isSignedIntN(32)) + return false; + + if (AM.BaseGV) return !AM.BaseOffs && !AM.HasBaseReg && !AM.Scale; - } switch (AM.Scale) { case 0: // "r", "r+i" or "i" is allowed diff --git a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCTargetDesc.cpp b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCTargetDesc.cpp index 74ebaa9..2f302ed 100644 --- a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCTargetDesc.cpp +++ b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCTargetDesc.cpp @@ -49,15 +49,6 @@ createSPIRVMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) { return createSPIRVMCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS); } -static MCStreamer * -createSPIRVMCStreamer(const Triple &T, MCContext &Ctx, - std::unique_ptr<MCAsmBackend> &&MAB, - std::unique_ptr<MCObjectWriter> &&OW, - std::unique_ptr<MCCodeEmitter> &&Emitter) { - return createSPIRVStreamer(Ctx, std::move(MAB), std::move(OW), - std::move(Emitter)); -} - static MCTargetStreamer *createTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &, MCInstPrinter *, bool) { @@ -94,7 +85,6 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSPIRVTargetMC() { TargetRegistry::RegisterMCInstrInfo(*T, createSPIRVMCInstrInfo); TargetRegistry::RegisterMCRegInfo(*T, createSPIRVMCRegisterInfo); TargetRegistry::RegisterMCSubtargetInfo(*T, createSPIRVMCSubtargetInfo); - TargetRegistry::RegisterSPIRVStreamer(*T, createSPIRVMCStreamer); TargetRegistry::RegisterMCInstPrinter(*T, createSPIRVMCInstPrinter); TargetRegistry::RegisterMCInstrAnalysis(*T, createSPIRVInstrAnalysis); TargetRegistry::RegisterMCCodeEmitter(*T, createSPIRVMCCodeEmitter); diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp index 3cd0af0c..6553e1c 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp @@ -691,9 +691,12 @@ void X86MCCodeEmitter::emitMemModRMByte( unsigned BaseRegNo = BaseReg ? getX86RegNum(Base) : -1U; + bool IsAdSize16 = STI.hasFeature(X86::Is32Bit) && + (TSFlags & X86II::AdSizeMask) == X86II::AdSize16; + // 16-bit addressing forms of the ModR/M byte have a different encoding for // the R/M field and are far more limited in which registers can be used. - if (X86_MC::is16BitMemOperand(MI, Op, STI)) { + if (IsAdSize16 || X86_MC::is16BitMemOperand(MI, Op, STI)) { if (BaseReg) { // For 32-bit addressing, the row and column values in Table 2-2 are // basically the same. It's AX/CX/DX/BX/SP/BP/SI/DI in that order, with diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h b/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h index 96a7823..4e83e7e4 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h +++ b/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h @@ -128,8 +128,7 @@ MCTargetStreamer *createX86ObjectTargetStreamer(MCStreamer &S, MCStreamer *createX86WinCOFFStreamer(MCContext &C, std::unique_ptr<MCAsmBackend> &&AB, std::unique_ptr<MCObjectWriter> &&OW, - std::unique_ptr<MCCodeEmitter> &&CE, - bool IncrementalLinkerCompatible); + std::unique_ptr<MCCodeEmitter> &&CE); MCStreamer *createX86ELFStreamer(const Triple &T, MCContext &Context, std::unique_ptr<MCAsmBackend> &&MAB, diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp index b1e5362..1ef10928 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp @@ -72,14 +72,9 @@ void X86WinCOFFStreamer::finishImpl() { } } // namespace -MCStreamer *llvm::createX86WinCOFFStreamer(MCContext &C, - std::unique_ptr<MCAsmBackend> &&AB, - std::unique_ptr<MCObjectWriter> &&OW, - std::unique_ptr<MCCodeEmitter> &&CE, - bool IncrementalLinkerCompatible) { - X86WinCOFFStreamer *S = - new X86WinCOFFStreamer(C, std::move(AB), std::move(CE), std::move(OW)); - S->getAssembler().setIncrementalLinkerCompatible(IncrementalLinkerCompatible); - return S; +MCStreamer * +llvm::createX86WinCOFFStreamer(MCContext &C, std::unique_ptr<MCAsmBackend> &&AB, + std::unique_ptr<MCObjectWriter> &&OW, + std::unique_ptr<MCCodeEmitter> &&CE) { + return new X86WinCOFFStreamer(C, std::move(AB), std::move(CE), std::move(OW)); } - diff --git a/llvm/lib/Target/X86/X86AsmPrinter.cpp b/llvm/lib/Target/X86/X86AsmPrinter.cpp index 3395a13..0c2c6bf 100644 --- a/llvm/lib/Target/X86/X86AsmPrinter.cpp +++ b/llvm/lib/Target/X86/X86AsmPrinter.cpp @@ -66,11 +66,10 @@ bool X86AsmPrinter::runOnMachineFunction(MachineFunction &MF) { CodeEmitter.reset(TM.getTarget().createMCCodeEmitter( *Subtarget->getInstrInfo(), MF.getContext())); - EmitFPOData = - Subtarget->isTargetWin32() && MF.getMMI().getModule()->getCodeViewFlag(); + const Module *M = MF.getFunction().getParent(); + EmitFPOData = Subtarget->isTargetWin32() && M->getCodeViewFlag(); - IndCSPrefix = - MF.getMMI().getModule()->getModuleFlag("indirect_branch_cs_prefix"); + IndCSPrefix = M->getModuleFlag("indirect_branch_cs_prefix"); SetupMachineFunction(MF); diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp index 8980178..0ff50d8 100644 --- a/llvm/lib/Target/X86/X86FrameLowering.cpp +++ b/llvm/lib/Target/X86/X86FrameLowering.cpp @@ -1530,7 +1530,6 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, MachineBasicBlock::iterator MBBI = MBB.begin(); MachineFrameInfo &MFI = MF.getFrameInfo(); const Function &Fn = MF.getFunction(); - MachineModuleInfo &MMI = MF.getMMI(); X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); uint64_t MaxAlign = calculateMaxStackAlign(MF); // Desired stack alignment. uint64_t StackSize = MFI.getStackSize(); // Number of bytes to allocate. @@ -1545,8 +1544,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, bool IsWin64Prologue = isWin64Prologue(MF); bool NeedsWin64CFI = IsWin64Prologue && Fn.needsUnwindTableEntry(); // FIXME: Emit FPO data for EH funclets. - bool NeedsWinFPO = - !IsFunclet && STI.isTargetWin32() && MMI.getModule()->getCodeViewFlag(); + bool NeedsWinFPO = !IsFunclet && STI.isTargetWin32() && + MF.getFunction().getParent()->getCodeViewFlag(); bool NeedsWinCFI = NeedsWin64CFI || NeedsWinFPO; bool NeedsDwarfCFI = needsDwarfCFI(MF); Register FramePtr = TRI->getFrameRegister(MF); @@ -3521,7 +3520,7 @@ void X86FrameLowering::adjustForHiPEPrologue( // HiPE-specific values NamedMDNode *HiPELiteralsMD = - MF.getMMI().getModule()->getNamedMetadata("hipe.literals"); + MF.getFunction().getParent()->getNamedMetadata("hipe.literals"); if (!HiPELiteralsMD) report_fatal_error( "Can't generate HiPE prologue without runtime parameters"); diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index c91bd57..74804e5 100644 --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -922,7 +922,8 @@ void X86DAGToDAGISel::PreprocessISelDAG() { if (Imm == EndbrImm || isEndbrImm64(Imm)) { // Check that the cf-protection-branch is enabled. Metadata *CFProtectionBranch = - MF->getMMI().getModule()->getModuleFlag("cf-protection-branch"); + MF->getFunction().getParent()->getModuleFlag( + "cf-protection-branch"); if (CFProtectionBranch || IndirectBranchTracking) { SDLoc dl(N); SDValue Complement = CurDAG->getConstant(~Imm, dl, VT, false, true); diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 890728b..2959902c 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -35882,7 +35882,7 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr &MI, MIB.addMBB(restoreMBB); MIB.setMemRefs(MMOs); - if (MF->getMMI().getModule()->getModuleFlag("cf-protection-return")) { + if (MF->getFunction().getParent()->getModuleFlag("cf-protection-return")) { emitSetJmpShadowStackFix(MI, thisMBB); } @@ -36158,7 +36158,7 @@ X86TargetLowering::emitEHSjLjLongJmp(MachineInstr &MI, MachineBasicBlock *thisMBB = MBB; // When CET and shadow stack is enabled, we need to fix the Shadow Stack. - if (MF->getMMI().getModule()->getModuleFlag("cf-protection-return")) { + if (MF->getFunction().getParent()->getModuleFlag("cf-protection-return")) { thisMBB = emitLongJmpShadowStackFix(MI, thisMBB); } @@ -57981,7 +57981,7 @@ SDValue X86TargetLowering::expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, int JTI, SelectionDAG &DAG) const { - const Module *M = DAG.getMachineFunction().getMMI().getModule(); + const Module *M = DAG.getMachineFunction().getFunction().getParent(); Metadata *IsCFProtectionSupported = M->getModuleFlag("cf-protection-branch"); if (IsCFProtectionSupported) { // In case control-flow branch protection is enabled, we need to add diff --git a/llvm/lib/Target/X86/X86IndirectBranchTracking.cpp b/llvm/lib/Target/X86/X86IndirectBranchTracking.cpp index 381286a..7740a17 100644 --- a/llvm/lib/Target/X86/X86IndirectBranchTracking.cpp +++ b/llvm/lib/Target/X86/X86IndirectBranchTracking.cpp @@ -116,7 +116,7 @@ static bool needsPrologueENDBR(MachineFunction &MF, const Module *M) { bool X86IndirectBranchTrackingPass::runOnMachineFunction(MachineFunction &MF) { const X86Subtarget &SubTarget = MF.getSubtarget<X86Subtarget>(); - const Module *M = MF.getMMI().getModule(); + const Module *M = MF.getFunction().getParent(); // Check that the cf-protection-branch is enabled. Metadata *isCFProtectionSupported = M->getModuleFlag("cf-protection-branch"); diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp index df20ecd..77ddd23 100644 --- a/llvm/lib/Target/X86/X86MCInstLower.cpp +++ b/llvm/lib/Target/X86/X86MCInstLower.cpp @@ -147,7 +147,7 @@ X86MCInstLower::X86MCInstLower(const MachineFunction &mf, AsmPrinter(asmprinter) {} MachineModuleInfoMachO &X86MCInstLower::getMachOMMI() const { - return MF.getMMI().getObjFileInfo<MachineModuleInfoMachO>(); + return AsmPrinter.MMI->getObjFileInfo<MachineModuleInfoMachO>(); } /// GetSymbolFromOperand - Lower an MO_GlobalAddress or MO_ExternalSymbol @@ -203,7 +203,7 @@ MCSymbol *X86MCInstLower::GetSymbolFromOperand(const MachineOperand &MO) const { break; case X86II::MO_COFFSTUB: { MachineModuleInfoCOFF &MMICOFF = - MF.getMMI().getObjFileInfo<MachineModuleInfoCOFF>(); + AsmPrinter.MMI->getObjFileInfo<MachineModuleInfoCOFF>(); MachineModuleInfoImpl::StubValueTy &StubSym = MMICOFF.getGVStubEntry(Sym); if (!StubSym.getPointer()) { assert(MO.isGlobal() && "Extern symbol not handled yet"); diff --git a/llvm/lib/Target/X86/X86ReturnThunks.cpp b/llvm/lib/Target/X86/X86ReturnThunks.cpp index fe89238..c40b4f3 100644 --- a/llvm/lib/Target/X86/X86ReturnThunks.cpp +++ b/llvm/lib/Target/X86/X86ReturnThunks.cpp @@ -78,7 +78,7 @@ bool X86ReturnThunks::runOnMachineFunction(MachineFunction &MF) { Rets.push_back(&Term); bool IndCS = - MF.getMMI().getModule()->getModuleFlag("indirect_branch_cs_prefix"); + MF.getFunction().getParent()->getModuleFlag("indirect_branch_cs_prefix"); const MCInstrDesc &CS = ST.getInstrInfo()->get(X86::CS_PREFIX); const MCInstrDesc &JMP = ST.getInstrInfo()->get(X86::TAILJMPd); diff --git a/llvm/lib/TargetParser/X86TargetParser.cpp b/llvm/lib/TargetParser/X86TargetParser.cpp index 141ecb9..dcf9130 100644 --- a/llvm/lib/TargetParser/X86TargetParser.cpp +++ b/llvm/lib/TargetParser/X86TargetParser.cpp @@ -171,14 +171,14 @@ constexpr FeatureBitset FeaturesClearwaterforest = // Geode Processor. constexpr FeatureBitset FeaturesGeode = - FeatureX87 | FeatureCMPXCHG8B | FeatureMMX | Feature3DNOW | Feature3DNOWA; + FeatureX87 | FeatureCMPXCHG8B | FeatureMMX | FeaturePRFCHW; // K6 processor. constexpr FeatureBitset FeaturesK6 = FeatureX87 | FeatureCMPXCHG8B | FeatureMMX; // K7 and K8 architecture processors. constexpr FeatureBitset FeaturesAthlon = - FeatureX87 | FeatureCMPXCHG8B | FeatureMMX | Feature3DNOW | Feature3DNOWA; + FeatureX87 | FeatureCMPXCHG8B | FeatureMMX | FeaturePRFCHW; constexpr FeatureBitset FeaturesAthlonXP = FeaturesAthlon | FeatureFXSR | FeatureSSE; constexpr FeatureBitset FeaturesK8 = @@ -256,8 +256,8 @@ constexpr ProcInfo Processors[] = { // i486-generation processors. { {"i486"}, CK_i486, ~0U, FeatureX87, '\0', false }, { {"winchip-c6"}, CK_WinChipC6, ~0U, FeaturesPentiumMMX, '\0', false }, - { {"winchip2"}, CK_WinChip2, ~0U, FeaturesPentiumMMX | Feature3DNOW, '\0', false }, - { {"c3"}, CK_C3, ~0U, FeaturesPentiumMMX | Feature3DNOW, '\0', false }, + { {"winchip2"}, CK_WinChip2, ~0U, FeaturesPentiumMMX | FeaturePRFCHW, '\0', false }, + { {"c3"}, CK_C3, ~0U, FeaturesPentiumMMX | FeaturePRFCHW, '\0', false }, // i586-generation processors, P5 microarchitecture based. { {"i586"}, CK_i586, ~0U, FeatureX87 | FeatureCMPXCHG8B, '\0', false }, { {"pentium"}, CK_Pentium, ~0U, FeatureX87 | FeatureCMPXCHG8B, 'B', false }, @@ -386,8 +386,8 @@ constexpr ProcInfo Processors[] = { { {"lakemont"}, CK_Lakemont, ~0U, FeatureCMPXCHG8B, '\0', false }, // K6 architecture processors. { {"k6"}, CK_K6, ~0U, FeaturesK6, '\0', false }, - { {"k6-2"}, CK_K6_2, ~0U, FeaturesK6 | Feature3DNOW, '\0', false }, - { {"k6-3"}, CK_K6_3, ~0U, FeaturesK6 | Feature3DNOW, '\0', false }, + { {"k6-2"}, CK_K6_2, ~0U, FeaturesK6 | FeaturePRFCHW, '\0', false }, + { {"k6-3"}, CK_K6_3, ~0U, FeaturesK6 | FeaturePRFCHW, '\0', false }, // K7 architecture processors. { {"athlon"}, CK_Athlon, ~0U, FeaturesAthlon, '\0', false }, { {"athlon-tbird"}, CK_Athlon, ~0U, FeaturesAthlon, '\0', false }, @@ -493,6 +493,7 @@ constexpr FeatureBitset ImpliedFeaturesFXSR = {}; constexpr FeatureBitset ImpliedFeaturesINVPCID = {}; constexpr FeatureBitset ImpliedFeaturesLWP = {}; constexpr FeatureBitset ImpliedFeaturesLZCNT = {}; +constexpr FeatureBitset ImpliedFeaturesMMX = {}; constexpr FeatureBitset ImpliedFeaturesMWAITX = {}; constexpr FeatureBitset ImpliedFeaturesMOVBE = {}; constexpr FeatureBitset ImpliedFeaturesMOVDIR64B = {}; @@ -520,6 +521,8 @@ constexpr FeatureBitset ImpliedFeaturesWBNOINVD = {}; constexpr FeatureBitset ImpliedFeaturesVZEROUPPER = {}; constexpr FeatureBitset ImpliedFeaturesX87 = {}; constexpr FeatureBitset ImpliedFeaturesXSAVE = {}; +constexpr FeatureBitset ImpliedFeaturesDUMMYFEATURE1 = {}; +constexpr FeatureBitset ImpliedFeaturesDUMMYFEATURE2 = {}; // Not really CPU features, but need to be in the table because clang uses // target features to communicate them to the backend. @@ -534,11 +537,6 @@ constexpr FeatureBitset ImpliedFeaturesXSAVEC = FeatureXSAVE; constexpr FeatureBitset ImpliedFeaturesXSAVEOPT = FeatureXSAVE; constexpr FeatureBitset ImpliedFeaturesXSAVES = FeatureXSAVE; -// MMX->3DNOW->3DNOWA chain. -constexpr FeatureBitset ImpliedFeaturesMMX = {}; -constexpr FeatureBitset ImpliedFeatures3DNOW = FeatureMMX; -constexpr FeatureBitset ImpliedFeatures3DNOWA = Feature3DNOW; - // SSE/AVX/AVX512F chain. constexpr FeatureBitset ImpliedFeaturesSSE = {}; constexpr FeatureBitset ImpliedFeaturesSSE2 = FeatureSSE; diff --git a/llvm/lib/Transforms/IPO/HotColdSplitting.cpp b/llvm/lib/Transforms/IPO/HotColdSplitting.cpp index 5aefcbf..2ec5da4 100644 --- a/llvm/lib/Transforms/IPO/HotColdSplitting.cpp +++ b/llvm/lib/Transforms/IPO/HotColdSplitting.cpp @@ -39,6 +39,7 @@ #include "llvm/IR/CFG.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Dominators.h" +#include "llvm/IR/EHPersonalities.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" @@ -168,10 +169,24 @@ static bool mayExtractBlock(const BasicBlock &BB) { // // Resumes that are not reachable from a cleanup landing pad are considered to // be unreachable. It’s not safe to split them out either. + if (BB.hasAddressTaken() || BB.isEHPad()) return false; auto Term = BB.getTerminator(); - return !isa<InvokeInst>(Term) && !isa<ResumeInst>(Term); + if (isa<InvokeInst>(Term) || isa<ResumeInst>(Term)) + return false; + + // Do not outline basic blocks that have token type instructions. e.g., + // exception: + // %0 = cleanuppad within none [] + // call void @"?terminate@@YAXXZ"() [ "funclet"(token %0) ] + // br label %continue-exception + if (llvm::any_of( + BB, [](const Instruction &I) { return I.getType()->isTokenTy(); })) { + return false; + } + + return true; } /// Mark \p F cold. Based on this assumption, also optimize it for minimum size. @@ -258,6 +273,11 @@ bool HotColdSplitting::shouldOutlineFrom(const Function &F) const { F.hasFnAttribute(Attribute::SanitizeMemory)) return false; + // Do not outline scoped EH personality functions. + if (F.hasPersonalityFn()) + if (isScopedEHPersonality(classifyEHPersonality(F.getPersonalityFn()))) + return false; + return true; } diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp index f994f8a..09c56eb 100644 --- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp +++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -945,6 +945,9 @@ Value *InstrLowerer::getCounterAddress(InstrProfCntrInstBase *I) { IRBuilder<> EntryBuilder(&Fn->getEntryBlock().front()); auto *Bias = getOrCreateBiasVar(getInstrProfCounterBiasVarName()); BiasLI = EntryBuilder.CreateLoad(Int64Ty, Bias, "profc_bias"); + // Bias doesn't change after startup. + BiasLI->setMetadata(LLVMContext::MD_invariant_load, + MDNode::get(M.getContext(), std::nullopt)); } auto *Add = Builder.CreateAdd(Builder.CreatePtrToInt(Addr, Int64Ty), BiasLI); return Builder.CreateIntToPtr(Add, Addr->getType()); diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index ff60bd8..6d28b8f 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -441,37 +441,6 @@ static std::optional<unsigned> getSmallBestKnownTC(ScalarEvolution &SE, return std::nullopt; } -/// Return a vector containing interleaved elements from multiple -/// smaller input vectors. -static Value *interleaveVectors(IRBuilderBase &Builder, ArrayRef<Value *> Vals, - const Twine &Name) { - unsigned Factor = Vals.size(); - assert(Factor > 1 && "Tried to interleave invalid number of vectors"); - - VectorType *VecTy = cast<VectorType>(Vals[0]->getType()); -#ifndef NDEBUG - for (Value *Val : Vals) - assert(Val->getType() == VecTy && "Tried to interleave mismatched types"); -#endif - - // Scalable vectors cannot use arbitrary shufflevectors (only splats), so - // must use intrinsics to interleave. - if (VecTy->isScalableTy()) { - VectorType *WideVecTy = VectorType::getDoubleElementsVectorType(VecTy); - return Builder.CreateIntrinsic(WideVecTy, Intrinsic::vector_interleave2, - Vals, - /*FMFSource=*/nullptr, Name); - } - - // Fixed length. Start by concatenating all vectors into a wide vector. - Value *WideVec = concatenateVectors(Builder, Vals); - - // Interleave the elements into the wide vector. - const unsigned NumElts = VecTy->getElementCount().getFixedValue(); - return Builder.CreateShuffleVector( - WideVec, createInterleaveMask(NumElts, Factor), Name); -} - namespace { // Forward declare GeneratedRTChecks. class GeneratedRTChecks; @@ -553,16 +522,6 @@ public: const VPIteration &Instance, VPTransformState &State); - /// Try to vectorize interleaved access group \p Group with the base address - /// given in \p Addr, optionally masking the vector operations if \p - /// BlockInMask is non-null. Use \p State to translate given VPValues to IR - /// values in the vectorized loop. - void vectorizeInterleaveGroup(const InterleaveGroup<Instruction> *Group, - ArrayRef<VPValue *> VPDefs, - VPTransformState &State, VPValue *Addr, - ArrayRef<VPValue *> StoredValues, - VPValue *BlockInMask, bool NeedsMaskForGaps); - /// Fix the non-induction PHIs in \p Plan. void fixNonInductionPHIs(VPlan &Plan, VPTransformState &State); @@ -611,11 +570,6 @@ protected: /// Returns (and creates if needed) the trip count of the widened loop. Value *getOrCreateVectorTripCount(BasicBlock *InsertBlock); - /// Returns a bitcasted value to the requested vector type. - /// Also handles bitcasts of vector<float> <-> vector<pointer> types. - Value *createBitOrPointerCast(Value *V, VectorType *DstVTy, - const DataLayout &DL); - /// Emit a bypass check to see if the vector trip count is zero, including if /// it overflows. void emitIterationCountCheck(BasicBlock *Bypass); @@ -2393,275 +2347,6 @@ static bool useMaskedInterleavedAccesses(const TargetTransformInfo &TTI) { return TTI.enableMaskedInterleavedAccessVectorization(); } -// Try to vectorize the interleave group that \p Instr belongs to. -// -// E.g. Translate following interleaved load group (factor = 3): -// for (i = 0; i < N; i+=3) { -// R = Pic[i]; // Member of index 0 -// G = Pic[i+1]; // Member of index 1 -// B = Pic[i+2]; // Member of index 2 -// ... // do something to R, G, B -// } -// To: -// %wide.vec = load <12 x i32> ; Read 4 tuples of R,G,B -// %R.vec = shuffle %wide.vec, poison, <0, 3, 6, 9> ; R elements -// %G.vec = shuffle %wide.vec, poison, <1, 4, 7, 10> ; G elements -// %B.vec = shuffle %wide.vec, poison, <2, 5, 8, 11> ; B elements -// -// Or translate following interleaved store group (factor = 3): -// for (i = 0; i < N; i+=3) { -// ... do something to R, G, B -// Pic[i] = R; // Member of index 0 -// Pic[i+1] = G; // Member of index 1 -// Pic[i+2] = B; // Member of index 2 -// } -// To: -// %R_G.vec = shuffle %R.vec, %G.vec, <0, 1, 2, ..., 7> -// %B_U.vec = shuffle %B.vec, poison, <0, 1, 2, 3, u, u, u, u> -// %interleaved.vec = shuffle %R_G.vec, %B_U.vec, -// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11> ; Interleave R,G,B elements -// store <12 x i32> %interleaved.vec ; Write 4 tuples of R,G,B -void InnerLoopVectorizer::vectorizeInterleaveGroup( - const InterleaveGroup<Instruction> *Group, ArrayRef<VPValue *> VPDefs, - VPTransformState &State, VPValue *Addr, ArrayRef<VPValue *> StoredValues, - VPValue *BlockInMask, bool NeedsMaskForGaps) { - Instruction *Instr = Group->getInsertPos(); - const DataLayout &DL = Instr->getDataLayout(); - - // Prepare for the vector type of the interleaved load/store. - Type *ScalarTy = getLoadStoreType(Instr); - unsigned InterleaveFactor = Group->getFactor(); - auto *VecTy = VectorType::get(ScalarTy, State.VF * InterleaveFactor); - - // Prepare for the new pointers. - SmallVector<Value *, 2> AddrParts; - unsigned Index = Group->getIndex(Instr); - - // TODO: extend the masked interleaved-group support to reversed access. - assert((!BlockInMask || !Group->isReverse()) && - "Reversed masked interleave-group not supported."); - - Value *Idx; - // If the group is reverse, adjust the index to refer to the last vector lane - // instead of the first. We adjust the index from the first vector lane, - // rather than directly getting the pointer for lane VF - 1, because the - // pointer operand of the interleaved access is supposed to be uniform. For - // uniform instructions, we're only required to generate a value for the - // first vector lane in each unroll iteration. - if (Group->isReverse()) { - Value *RuntimeVF = getRuntimeVF(Builder, Builder.getInt32Ty(), State.VF); - Idx = Builder.CreateSub(RuntimeVF, Builder.getInt32(1)); - Idx = Builder.CreateMul(Idx, Builder.getInt32(Group->getFactor())); - Idx = Builder.CreateAdd(Idx, Builder.getInt32(Index)); - Idx = Builder.CreateNeg(Idx); - } else - Idx = Builder.getInt32(-Index); - - for (unsigned Part = 0; Part < State.UF; Part++) { - Value *AddrPart = State.get(Addr, VPIteration(Part, 0)); - if (auto *I = dyn_cast<Instruction>(AddrPart)) - State.setDebugLocFrom(I->getDebugLoc()); - - // Notice current instruction could be any index. Need to adjust the address - // to the member of index 0. - // - // E.g. a = A[i+1]; // Member of index 1 (Current instruction) - // b = A[i]; // Member of index 0 - // Current pointer is pointed to A[i+1], adjust it to A[i]. - // - // E.g. A[i+1] = a; // Member of index 1 - // A[i] = b; // Member of index 0 - // A[i+2] = c; // Member of index 2 (Current instruction) - // Current pointer is pointed to A[i+2], adjust it to A[i]. - - bool InBounds = false; - if (auto *gep = dyn_cast<GetElementPtrInst>(AddrPart->stripPointerCasts())) - InBounds = gep->isInBounds(); - AddrPart = Builder.CreateGEP(ScalarTy, AddrPart, Idx, "", InBounds); - AddrParts.push_back(AddrPart); - } - - State.setDebugLocFrom(Instr->getDebugLoc()); - Value *PoisonVec = PoisonValue::get(VecTy); - - auto CreateGroupMask = [this, &BlockInMask, &State, &InterleaveFactor]( - unsigned Part, Value *MaskForGaps) -> Value * { - if (State.VF.isScalable()) { - assert(!MaskForGaps && "Interleaved groups with gaps are not supported."); - assert(InterleaveFactor == 2 && - "Unsupported deinterleave factor for scalable vectors"); - auto *BlockInMaskPart = State.get(BlockInMask, Part); - SmallVector<Value *, 2> Ops = {BlockInMaskPart, BlockInMaskPart}; - auto *MaskTy = VectorType::get(Builder.getInt1Ty(), - State.VF.getKnownMinValue() * 2, true); - return Builder.CreateIntrinsic(MaskTy, Intrinsic::vector_interleave2, Ops, - /*FMFSource=*/nullptr, "interleaved.mask"); - } - - if (!BlockInMask) - return MaskForGaps; - - Value *BlockInMaskPart = State.get(BlockInMask, Part); - Value *ShuffledMask = Builder.CreateShuffleVector( - BlockInMaskPart, - createReplicatedMask(InterleaveFactor, State.VF.getKnownMinValue()), - "interleaved.mask"); - return MaskForGaps ? Builder.CreateBinOp(Instruction::And, ShuffledMask, - MaskForGaps) - : ShuffledMask; - }; - - // Vectorize the interleaved load group. - if (isa<LoadInst>(Instr)) { - Value *MaskForGaps = nullptr; - if (NeedsMaskForGaps) { - MaskForGaps = - createBitMaskForGaps(Builder, State.VF.getKnownMinValue(), *Group); - assert(MaskForGaps && "Mask for Gaps is required but it is null"); - } - - // For each unroll part, create a wide load for the group. - SmallVector<Value *, 2> NewLoads; - for (unsigned Part = 0; Part < State.UF; Part++) { - Instruction *NewLoad; - if (BlockInMask || MaskForGaps) { - assert(useMaskedInterleavedAccesses(*TTI) && - "masked interleaved groups are not allowed."); - Value *GroupMask = CreateGroupMask(Part, MaskForGaps); - NewLoad = - Builder.CreateMaskedLoad(VecTy, AddrParts[Part], Group->getAlign(), - GroupMask, PoisonVec, "wide.masked.vec"); - } - else - NewLoad = Builder.CreateAlignedLoad(VecTy, AddrParts[Part], - Group->getAlign(), "wide.vec"); - Group->addMetadata(NewLoad); - NewLoads.push_back(NewLoad); - } - - if (VecTy->isScalableTy()) { - assert(InterleaveFactor == 2 && - "Unsupported deinterleave factor for scalable vectors"); - - for (unsigned Part = 0; Part < State.UF; ++Part) { - // Scalable vectors cannot use arbitrary shufflevectors (only splats), - // so must use intrinsics to deinterleave. - Value *DI = Builder.CreateIntrinsic( - Intrinsic::vector_deinterleave2, VecTy, NewLoads[Part], - /*FMFSource=*/nullptr, "strided.vec"); - unsigned J = 0; - for (unsigned I = 0; I < InterleaveFactor; ++I) { - Instruction *Member = Group->getMember(I); - - if (!Member) - continue; - - Value *StridedVec = Builder.CreateExtractValue(DI, I); - // If this member has different type, cast the result type. - if (Member->getType() != ScalarTy) { - VectorType *OtherVTy = VectorType::get(Member->getType(), State.VF); - StridedVec = createBitOrPointerCast(StridedVec, OtherVTy, DL); - } - - if (Group->isReverse()) - StridedVec = Builder.CreateVectorReverse(StridedVec, "reverse"); - - State.set(VPDefs[J], StridedVec, Part); - ++J; - } - } - - return; - } - - // For each member in the group, shuffle out the appropriate data from the - // wide loads. - unsigned J = 0; - for (unsigned I = 0; I < InterleaveFactor; ++I) { - Instruction *Member = Group->getMember(I); - - // Skip the gaps in the group. - if (!Member) - continue; - - auto StrideMask = - createStrideMask(I, InterleaveFactor, State.VF.getKnownMinValue()); - for (unsigned Part = 0; Part < State.UF; Part++) { - Value *StridedVec = Builder.CreateShuffleVector( - NewLoads[Part], StrideMask, "strided.vec"); - - // If this member has different type, cast the result type. - if (Member->getType() != ScalarTy) { - assert(!State.VF.isScalable() && "VF is assumed to be non scalable."); - VectorType *OtherVTy = VectorType::get(Member->getType(), State.VF); - StridedVec = createBitOrPointerCast(StridedVec, OtherVTy, DL); - } - - if (Group->isReverse()) - StridedVec = Builder.CreateVectorReverse(StridedVec, "reverse"); - - State.set(VPDefs[J], StridedVec, Part); - } - ++J; - } - return; - } - - // The sub vector type for current instruction. - auto *SubVT = VectorType::get(ScalarTy, State.VF); - - // Vectorize the interleaved store group. - Value *MaskForGaps = - createBitMaskForGaps(Builder, State.VF.getKnownMinValue(), *Group); - assert((!MaskForGaps || useMaskedInterleavedAccesses(*TTI)) && - "masked interleaved groups are not allowed."); - assert((!MaskForGaps || !State.VF.isScalable()) && - "masking gaps for scalable vectors is not yet supported."); - for (unsigned Part = 0; Part < State.UF; Part++) { - // Collect the stored vector from each member. - SmallVector<Value *, 4> StoredVecs; - unsigned StoredIdx = 0; - for (unsigned i = 0; i < InterleaveFactor; i++) { - assert((Group->getMember(i) || MaskForGaps) && - "Fail to get a member from an interleaved store group"); - Instruction *Member = Group->getMember(i); - - // Skip the gaps in the group. - if (!Member) { - Value *Undef = PoisonValue::get(SubVT); - StoredVecs.push_back(Undef); - continue; - } - - Value *StoredVec = State.get(StoredValues[StoredIdx], Part); - ++StoredIdx; - - if (Group->isReverse()) - StoredVec = Builder.CreateVectorReverse(StoredVec, "reverse"); - - // If this member has different type, cast it to a unified type. - - if (StoredVec->getType() != SubVT) - StoredVec = createBitOrPointerCast(StoredVec, SubVT, DL); - - StoredVecs.push_back(StoredVec); - } - - // Interleave all the smaller vectors into one wider vector. - Value *IVec = interleaveVectors(Builder, StoredVecs, "interleaved.vec"); - Instruction *NewStoreInstr; - if (BlockInMask || MaskForGaps) { - Value *GroupMask = CreateGroupMask(Part, MaskForGaps); - NewStoreInstr = Builder.CreateMaskedStore(IVec, AddrParts[Part], - Group->getAlign(), GroupMask); - } else - NewStoreInstr = - Builder.CreateAlignedStore(IVec, AddrParts[Part], Group->getAlign()); - - Group->addMetadata(NewStoreInstr); - } -} - void InnerLoopVectorizer::scalarizeInstruction(const Instruction *Instr, VPReplicateRecipe *RepRecipe, const VPIteration &Instance, @@ -2769,36 +2454,6 @@ InnerLoopVectorizer::getOrCreateVectorTripCount(BasicBlock *InsertBlock) { return VectorTripCount; } -Value *InnerLoopVectorizer::createBitOrPointerCast(Value *V, VectorType *DstVTy, - const DataLayout &DL) { - // Verify that V is a vector type with same number of elements as DstVTy. - auto VF = DstVTy->getElementCount(); - auto *SrcVecTy = cast<VectorType>(V->getType()); - assert(VF == SrcVecTy->getElementCount() && "Vector dimensions do not match"); - Type *SrcElemTy = SrcVecTy->getElementType(); - Type *DstElemTy = DstVTy->getElementType(); - assert((DL.getTypeSizeInBits(SrcElemTy) == DL.getTypeSizeInBits(DstElemTy)) && - "Vector elements must have same size"); - - // Do a direct cast if element types are castable. - if (CastInst::isBitOrNoopPointerCastable(SrcElemTy, DstElemTy, DL)) { - return Builder.CreateBitOrPointerCast(V, DstVTy); - } - // V cannot be directly casted to desired vector type. - // May happen when V is a floating point vector but DstVTy is a vector of - // pointers or vice-versa. Handle this using a two-step bitcast using an - // intermediate Integer type for the bitcast i.e. Ptr <-> Int <-> Float. - assert((DstElemTy->isPointerTy() != SrcElemTy->isPointerTy()) && - "Only one type should be a pointer type"); - assert((DstElemTy->isFloatingPointTy() != SrcElemTy->isFloatingPointTy()) && - "Only one type should be a floating point type"); - Type *IntTy = - IntegerType::getIntNTy(V->getContext(), DL.getTypeSizeInBits(SrcElemTy)); - auto *VecIntTy = VectorType::get(IntTy, VF); - Value *CastVal = Builder.CreateBitOrPointerCast(V, VecIntTy); - return Builder.CreateBitOrPointerCast(CastVal, DstVTy); -} - void InnerLoopVectorizer::emitIterationCountCheck(BasicBlock *Bypass) { Value *Count = getTripCount(); // Reuse existing vector loop preheader for TC checks. @@ -4594,15 +4249,12 @@ ElementCount LoopVectorizationCostModel::getMaximizedVFForTarget( // Select the largest VF which doesn't require more registers than existing // ones. - for (int i = RUs.size() - 1; i >= 0; --i) { - bool Selected = true; - for (auto &pair : RUs[i].MaxLocalUsers) { - unsigned TargetNumRegisters = TTI.getNumberOfRegisters(pair.first); - if (pair.second > TargetNumRegisters) - Selected = false; - } - if (Selected) { - MaxVF = VFs[i]; + for (int I = RUs.size() - 1; I >= 0; --I) { + const auto &MLU = RUs[I].MaxLocalUsers; + if (all_of(MLU, [&](decltype(MLU.front()) &LU) { + return LU.second <= TTI.getNumberOfRegisters(LU.first); + })) { + MaxVF = VFs[I]; break; } } @@ -8986,6 +8638,8 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) { bool NeedsMaskForGaps = IG->requiresScalarEpilogue() && !CM.isScalarEpilogueAllowed(); + assert((!NeedsMaskForGaps || useMaskedInterleavedAccesses(CM.TTI)) && + "masked interleaved groups are not allowed."); auto *VPIG = new VPInterleaveRecipe(IG, Recipe->getAddr(), StoredValues, Recipe->getMask(), NeedsMaskForGaps); VPIG->insertBefore(Recipe); @@ -9397,37 +9051,6 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( VPlanTransforms::clearReductionWrapFlags(*Plan); } -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -void VPInterleaveRecipe::print(raw_ostream &O, const Twine &Indent, - VPSlotTracker &SlotTracker) const { - O << Indent << "INTERLEAVE-GROUP with factor " << IG->getFactor() << " at "; - IG->getInsertPos()->printAsOperand(O, false); - O << ", "; - getAddr()->printAsOperand(O, SlotTracker); - VPValue *Mask = getMask(); - if (Mask) { - O << ", "; - Mask->printAsOperand(O, SlotTracker); - } - - unsigned OpIdx = 0; - for (unsigned i = 0; i < IG->getFactor(); ++i) { - if (!IG->getMember(i)) - continue; - if (getNumStoreOperands() > 0) { - O << "\n" << Indent << " store "; - getOperand(1 + OpIdx)->printAsOperand(O, SlotTracker); - O << " to index " << i; - } else { - O << "\n" << Indent << " "; - getVPValue(OpIdx)->printAsOperand(O, SlotTracker); - O << " = load from index " << i; - } - ++OpIdx; - } -} -#endif - void VPWidenPointerInductionRecipe::execute(VPTransformState &State) { assert(IndDesc.getKind() == InductionDescriptor::IK_PtrInduction && "Not a pointer induction according to InductionDescriptor!"); @@ -9511,13 +9134,6 @@ void VPDerivedIVRecipe::execute(VPTransformState &State) { State.set(this, DerivedIV, VPIteration(0, 0)); } -void VPInterleaveRecipe::execute(VPTransformState &State) { - assert(!State.Instance && "Interleave group being replicated."); - State.ILV->vectorizeInterleaveGroup(IG, definedValues(), State, getAddr(), - getStoredValues(), getMask(), - NeedsMaskForGaps); -} - void VPReplicateRecipe::execute(VPTransformState &State) { Instruction *UI = getUnderlyingInstr(); if (State.Instance) { // Generate a single instance. diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 4b1ac79..1b787d0 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -2037,6 +2037,373 @@ void VPWidenStoreEVLRecipe::print(raw_ostream &O, const Twine &Indent, } #endif +static Value *createBitOrPointerCast(IRBuilderBase &Builder, Value *V, + VectorType *DstVTy, const DataLayout &DL) { + // Verify that V is a vector type with same number of elements as DstVTy. + auto VF = DstVTy->getElementCount(); + auto *SrcVecTy = cast<VectorType>(V->getType()); + assert(VF == SrcVecTy->getElementCount() && "Vector dimensions do not match"); + Type *SrcElemTy = SrcVecTy->getElementType(); + Type *DstElemTy = DstVTy->getElementType(); + assert((DL.getTypeSizeInBits(SrcElemTy) == DL.getTypeSizeInBits(DstElemTy)) && + "Vector elements must have same size"); + + // Do a direct cast if element types are castable. + if (CastInst::isBitOrNoopPointerCastable(SrcElemTy, DstElemTy, DL)) { + return Builder.CreateBitOrPointerCast(V, DstVTy); + } + // V cannot be directly casted to desired vector type. + // May happen when V is a floating point vector but DstVTy is a vector of + // pointers or vice-versa. Handle this using a two-step bitcast using an + // intermediate Integer type for the bitcast i.e. Ptr <-> Int <-> Float. + assert((DstElemTy->isPointerTy() != SrcElemTy->isPointerTy()) && + "Only one type should be a pointer type"); + assert((DstElemTy->isFloatingPointTy() != SrcElemTy->isFloatingPointTy()) && + "Only one type should be a floating point type"); + Type *IntTy = + IntegerType::getIntNTy(V->getContext(), DL.getTypeSizeInBits(SrcElemTy)); + auto *VecIntTy = VectorType::get(IntTy, VF); + Value *CastVal = Builder.CreateBitOrPointerCast(V, VecIntTy); + return Builder.CreateBitOrPointerCast(CastVal, DstVTy); +} + +/// Return a vector containing interleaved elements from multiple +/// smaller input vectors. +static Value *interleaveVectors(IRBuilderBase &Builder, ArrayRef<Value *> Vals, + const Twine &Name) { + unsigned Factor = Vals.size(); + assert(Factor > 1 && "Tried to interleave invalid number of vectors"); + + VectorType *VecTy = cast<VectorType>(Vals[0]->getType()); +#ifndef NDEBUG + for (Value *Val : Vals) + assert(Val->getType() == VecTy && "Tried to interleave mismatched types"); +#endif + + // Scalable vectors cannot use arbitrary shufflevectors (only splats), so + // must use intrinsics to interleave. + if (VecTy->isScalableTy()) { + VectorType *WideVecTy = VectorType::getDoubleElementsVectorType(VecTy); + return Builder.CreateIntrinsic(WideVecTy, Intrinsic::vector_interleave2, + Vals, + /*FMFSource=*/nullptr, Name); + } + + // Fixed length. Start by concatenating all vectors into a wide vector. + Value *WideVec = concatenateVectors(Builder, Vals); + + // Interleave the elements into the wide vector. + const unsigned NumElts = VecTy->getElementCount().getFixedValue(); + return Builder.CreateShuffleVector( + WideVec, createInterleaveMask(NumElts, Factor), Name); +} + +// Try to vectorize the interleave group that \p Instr belongs to. +// +// E.g. Translate following interleaved load group (factor = 3): +// for (i = 0; i < N; i+=3) { +// R = Pic[i]; // Member of index 0 +// G = Pic[i+1]; // Member of index 1 +// B = Pic[i+2]; // Member of index 2 +// ... // do something to R, G, B +// } +// To: +// %wide.vec = load <12 x i32> ; Read 4 tuples of R,G,B +// %R.vec = shuffle %wide.vec, poison, <0, 3, 6, 9> ; R elements +// %G.vec = shuffle %wide.vec, poison, <1, 4, 7, 10> ; G elements +// %B.vec = shuffle %wide.vec, poison, <2, 5, 8, 11> ; B elements +// +// Or translate following interleaved store group (factor = 3): +// for (i = 0; i < N; i+=3) { +// ... do something to R, G, B +// Pic[i] = R; // Member of index 0 +// Pic[i+1] = G; // Member of index 1 +// Pic[i+2] = B; // Member of index 2 +// } +// To: +// %R_G.vec = shuffle %R.vec, %G.vec, <0, 1, 2, ..., 7> +// %B_U.vec = shuffle %B.vec, poison, <0, 1, 2, 3, u, u, u, u> +// %interleaved.vec = shuffle %R_G.vec, %B_U.vec, +// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11> ; Interleave R,G,B elements +// store <12 x i32> %interleaved.vec ; Write 4 tuples of R,G,B +void VPInterleaveRecipe::execute(VPTransformState &State) { + assert(!State.Instance && "Interleave group being replicated."); + const InterleaveGroup<Instruction> *Group = IG; + Instruction *Instr = Group->getInsertPos(); + + // Prepare for the vector type of the interleaved load/store. + Type *ScalarTy = getLoadStoreType(Instr); + unsigned InterleaveFactor = Group->getFactor(); + auto *VecTy = VectorType::get(ScalarTy, State.VF * InterleaveFactor); + + // Prepare for the new pointers. + SmallVector<Value *, 2> AddrParts; + unsigned Index = Group->getIndex(Instr); + + // TODO: extend the masked interleaved-group support to reversed access. + VPValue *BlockInMask = getMask(); + assert((!BlockInMask || !Group->isReverse()) && + "Reversed masked interleave-group not supported."); + + Value *Idx; + // If the group is reverse, adjust the index to refer to the last vector lane + // instead of the first. We adjust the index from the first vector lane, + // rather than directly getting the pointer for lane VF - 1, because the + // pointer operand of the interleaved access is supposed to be uniform. For + // uniform instructions, we're only required to generate a value for the + // first vector lane in each unroll iteration. + if (Group->isReverse()) { + Value *RuntimeVF = + getRuntimeVF(State.Builder, State.Builder.getInt32Ty(), State.VF); + Idx = State.Builder.CreateSub(RuntimeVF, State.Builder.getInt32(1)); + Idx = State.Builder.CreateMul(Idx, + State.Builder.getInt32(Group->getFactor())); + Idx = State.Builder.CreateAdd(Idx, State.Builder.getInt32(Index)); + Idx = State.Builder.CreateNeg(Idx); + } else + Idx = State.Builder.getInt32(-Index); + + VPValue *Addr = getAddr(); + for (unsigned Part = 0; Part < State.UF; Part++) { + Value *AddrPart = State.get(Addr, VPIteration(Part, 0)); + if (auto *I = dyn_cast<Instruction>(AddrPart)) + State.setDebugLocFrom(I->getDebugLoc()); + + // Notice current instruction could be any index. Need to adjust the address + // to the member of index 0. + // + // E.g. a = A[i+1]; // Member of index 1 (Current instruction) + // b = A[i]; // Member of index 0 + // Current pointer is pointed to A[i+1], adjust it to A[i]. + // + // E.g. A[i+1] = a; // Member of index 1 + // A[i] = b; // Member of index 0 + // A[i+2] = c; // Member of index 2 (Current instruction) + // Current pointer is pointed to A[i+2], adjust it to A[i]. + + bool InBounds = false; + if (auto *gep = dyn_cast<GetElementPtrInst>(AddrPart->stripPointerCasts())) + InBounds = gep->isInBounds(); + AddrPart = State.Builder.CreateGEP(ScalarTy, AddrPart, Idx, "", InBounds); + AddrParts.push_back(AddrPart); + } + + State.setDebugLocFrom(Instr->getDebugLoc()); + Value *PoisonVec = PoisonValue::get(VecTy); + + auto CreateGroupMask = [&BlockInMask, &State, &InterleaveFactor]( + unsigned Part, Value *MaskForGaps) -> Value * { + if (State.VF.isScalable()) { + assert(!MaskForGaps && "Interleaved groups with gaps are not supported."); + assert(InterleaveFactor == 2 && + "Unsupported deinterleave factor for scalable vectors"); + auto *BlockInMaskPart = State.get(BlockInMask, Part); + SmallVector<Value *, 2> Ops = {BlockInMaskPart, BlockInMaskPart}; + auto *MaskTy = VectorType::get(State.Builder.getInt1Ty(), + State.VF.getKnownMinValue() * 2, true); + return State.Builder.CreateIntrinsic( + MaskTy, Intrinsic::vector_interleave2, Ops, + /*FMFSource=*/nullptr, "interleaved.mask"); + } + + if (!BlockInMask) + return MaskForGaps; + + Value *BlockInMaskPart = State.get(BlockInMask, Part); + Value *ShuffledMask = State.Builder.CreateShuffleVector( + BlockInMaskPart, + createReplicatedMask(InterleaveFactor, State.VF.getKnownMinValue()), + "interleaved.mask"); + return MaskForGaps ? State.Builder.CreateBinOp(Instruction::And, + ShuffledMask, MaskForGaps) + : ShuffledMask; + }; + + const DataLayout &DL = Instr->getDataLayout(); + // Vectorize the interleaved load group. + if (isa<LoadInst>(Instr)) { + Value *MaskForGaps = nullptr; + if (NeedsMaskForGaps) { + MaskForGaps = createBitMaskForGaps(State.Builder, + State.VF.getKnownMinValue(), *Group); + assert(MaskForGaps && "Mask for Gaps is required but it is null"); + } + + // For each unroll part, create a wide load for the group. + SmallVector<Value *, 2> NewLoads; + for (unsigned Part = 0; Part < State.UF; Part++) { + Instruction *NewLoad; + if (BlockInMask || MaskForGaps) { + Value *GroupMask = CreateGroupMask(Part, MaskForGaps); + NewLoad = State.Builder.CreateMaskedLoad(VecTy, AddrParts[Part], + Group->getAlign(), GroupMask, + PoisonVec, "wide.masked.vec"); + } else + NewLoad = State.Builder.CreateAlignedLoad( + VecTy, AddrParts[Part], Group->getAlign(), "wide.vec"); + Group->addMetadata(NewLoad); + NewLoads.push_back(NewLoad); + } + + ArrayRef<VPValue *> VPDefs = definedValues(); + const DataLayout &DL = State.CFG.PrevBB->getDataLayout(); + if (VecTy->isScalableTy()) { + assert(InterleaveFactor == 2 && + "Unsupported deinterleave factor for scalable vectors"); + + for (unsigned Part = 0; Part < State.UF; ++Part) { + // Scalable vectors cannot use arbitrary shufflevectors (only splats), + // so must use intrinsics to deinterleave. + Value *DI = State.Builder.CreateIntrinsic( + Intrinsic::vector_deinterleave2, VecTy, NewLoads[Part], + /*FMFSource=*/nullptr, "strided.vec"); + unsigned J = 0; + for (unsigned I = 0; I < InterleaveFactor; ++I) { + Instruction *Member = Group->getMember(I); + + if (!Member) + continue; + + Value *StridedVec = State.Builder.CreateExtractValue(DI, I); + // If this member has different type, cast the result type. + if (Member->getType() != ScalarTy) { + VectorType *OtherVTy = VectorType::get(Member->getType(), State.VF); + StridedVec = + createBitOrPointerCast(State.Builder, StridedVec, OtherVTy, DL); + } + + if (Group->isReverse()) + StridedVec = + State.Builder.CreateVectorReverse(StridedVec, "reverse"); + + State.set(VPDefs[J], StridedVec, Part); + ++J; + } + } + + return; + } + + // For each member in the group, shuffle out the appropriate data from the + // wide loads. + unsigned J = 0; + for (unsigned I = 0; I < InterleaveFactor; ++I) { + Instruction *Member = Group->getMember(I); + + // Skip the gaps in the group. + if (!Member) + continue; + + auto StrideMask = + createStrideMask(I, InterleaveFactor, State.VF.getKnownMinValue()); + for (unsigned Part = 0; Part < State.UF; Part++) { + Value *StridedVec = State.Builder.CreateShuffleVector( + NewLoads[Part], StrideMask, "strided.vec"); + + // If this member has different type, cast the result type. + if (Member->getType() != ScalarTy) { + assert(!State.VF.isScalable() && "VF is assumed to be non scalable."); + VectorType *OtherVTy = VectorType::get(Member->getType(), State.VF); + StridedVec = + createBitOrPointerCast(State.Builder, StridedVec, OtherVTy, DL); + } + + if (Group->isReverse()) + StridedVec = State.Builder.CreateVectorReverse(StridedVec, "reverse"); + + State.set(VPDefs[J], StridedVec, Part); + } + ++J; + } + return; + } + + // The sub vector type for current instruction. + auto *SubVT = VectorType::get(ScalarTy, State.VF); + + // Vectorize the interleaved store group. + Value *MaskForGaps = + createBitMaskForGaps(State.Builder, State.VF.getKnownMinValue(), *Group); + assert((!MaskForGaps || !State.VF.isScalable()) && + "masking gaps for scalable vectors is not yet supported."); + ArrayRef<VPValue *> StoredValues = getStoredValues(); + for (unsigned Part = 0; Part < State.UF; Part++) { + // Collect the stored vector from each member. + SmallVector<Value *, 4> StoredVecs; + unsigned StoredIdx = 0; + for (unsigned i = 0; i < InterleaveFactor; i++) { + assert((Group->getMember(i) || MaskForGaps) && + "Fail to get a member from an interleaved store group"); + Instruction *Member = Group->getMember(i); + + // Skip the gaps in the group. + if (!Member) { + Value *Undef = PoisonValue::get(SubVT); + StoredVecs.push_back(Undef); + continue; + } + + Value *StoredVec = State.get(StoredValues[StoredIdx], Part); + ++StoredIdx; + + if (Group->isReverse()) + StoredVec = State.Builder.CreateVectorReverse(StoredVec, "reverse"); + + // If this member has different type, cast it to a unified type. + + if (StoredVec->getType() != SubVT) + StoredVec = createBitOrPointerCast(State.Builder, StoredVec, SubVT, DL); + + StoredVecs.push_back(StoredVec); + } + + // Interleave all the smaller vectors into one wider vector. + Value *IVec = + interleaveVectors(State.Builder, StoredVecs, "interleaved.vec"); + Instruction *NewStoreInstr; + if (BlockInMask || MaskForGaps) { + Value *GroupMask = CreateGroupMask(Part, MaskForGaps); + NewStoreInstr = State.Builder.CreateMaskedStore( + IVec, AddrParts[Part], Group->getAlign(), GroupMask); + } else + NewStoreInstr = State.Builder.CreateAlignedStore(IVec, AddrParts[Part], + Group->getAlign()); + + Group->addMetadata(NewStoreInstr); + } +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void VPInterleaveRecipe::print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const { + O << Indent << "INTERLEAVE-GROUP with factor " << IG->getFactor() << " at "; + IG->getInsertPos()->printAsOperand(O, false); + O << ", "; + getAddr()->printAsOperand(O, SlotTracker); + VPValue *Mask = getMask(); + if (Mask) { + O << ", "; + Mask->printAsOperand(O, SlotTracker); + } + + unsigned OpIdx = 0; + for (unsigned i = 0; i < IG->getFactor(); ++i) { + if (!IG->getMember(i)) + continue; + if (getNumStoreOperands() > 0) { + O << "\n" << Indent << " store "; + getOperand(1 + OpIdx)->printAsOperand(O, SlotTracker); + O << " to index " << i; + } else { + O << "\n" << Indent << " "; + getVPValue(OpIdx)->printAsOperand(O, SlotTracker); + O << " = load from index " << i; + } + ++OpIdx; + } +} +#endif + void VPCanonicalIVPHIRecipe::execute(VPTransformState &State) { Value *Start = getStartValue()->getLiveInIRValue(); PHINode *EntryPart = PHINode::Create(Start->getType(), 2, "index"); diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index d668ae2..c91fd0f 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -278,6 +278,11 @@ static bool mergeReplicateRegionsIntoSuccessors(VPlan &Plan) { return UI && UI->getParent() == Then2; }); + // Remove phi recipes that are unused after merging the regions. + if (Phi1ToMove.getVPSingleValue()->getNumUsers() == 0) { + Phi1ToMove.eraseFromParent(); + continue; + } Phi1ToMove.moveBefore(*Merge2, Merge2->begin()); } diff --git a/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_aapcs.ll b/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_aapcs.ll new file mode 100644 index 0000000..4889871 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_aapcs.ll @@ -0,0 +1,123 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=aarch64 < %s | FileCheck %s + +%va_list = type { ptr, ptr, ptr, i32, i32 } + +define preserve_nonecc i32 @callee(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, ...) nounwind noinline ssp { +; CHECK-LABEL: callee: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sub sp, sp, #192 +; CHECK-NEXT: mov x8, #-24 // =0xffffffffffffffe8 +; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: add x10, sp, #136 +; CHECK-NEXT: movk x8, #65408, lsl #32 +; CHECK-NEXT: add x9, x9, #128 +; CHECK-NEXT: stp x6, x7, [sp, #144] +; CHECK-NEXT: stp x9, x8, [sp, #176] +; CHECK-NEXT: add x9, x10, #24 +; CHECK-NEXT: add x10, sp, #192 +; CHECK-NEXT: mov w8, #-24 // =0xffffffe8 +; CHECK-NEXT: str x5, [sp, #136] +; CHECK-NEXT: stp q0, q1, [sp] +; CHECK-NEXT: stp q2, q3, [sp, #32] +; CHECK-NEXT: stp q4, q5, [sp, #64] +; CHECK-NEXT: stp q6, q7, [sp, #96] +; CHECK-NEXT: stp x10, x9, [sp, #160] +; CHECK-NEXT: tbz w8, #31, .LBB0_3 +; CHECK-NEXT: // %bb.1: // %maybe_reg +; CHECK-NEXT: add w9, w8, #8 +; CHECK-NEXT: cmp w9, #0 +; CHECK-NEXT: str w9, [sp, #184] +; CHECK-NEXT: b.gt .LBB0_3 +; CHECK-NEXT: // %bb.2: // %in_reg +; CHECK-NEXT: ldr x9, [sp, #168] +; CHECK-NEXT: add x8, x9, w8, sxtw +; CHECK-NEXT: b .LBB0_4 +; CHECK-NEXT: .LBB0_3: // %on_stack +; CHECK-NEXT: ldr x8, [sp, #160] +; CHECK-NEXT: add x9, x8, #8 +; CHECK-NEXT: str x9, [sp, #160] +; CHECK-NEXT: .LBB0_4: // %end +; CHECK-NEXT: ldr w0, [x8] +; CHECK-NEXT: add sp, sp, #192 +; CHECK-NEXT: ret +entry: + %args = alloca %va_list, align 8 + call void @llvm.va_start(ptr %args) + %gr_offs_p = getelementptr inbounds %va_list, ptr %args, i32 0, i32 3 + %gr_offs = load i32, ptr %gr_offs_p, align 8 + %0 = icmp sge i32 %gr_offs, 0 + br i1 %0, label %on_stack, label %maybe_reg + +maybe_reg: + %new_reg_offs = add i32 %gr_offs, 8 + store i32 %new_reg_offs, ptr %gr_offs_p, align 8 + %inreg = icmp sle i32 %new_reg_offs, 0 + br i1 %inreg, label %in_reg, label %on_stack + +in_reg: + %reg_top_p = getelementptr inbounds %va_list, ptr %args, i32 0, i32 1 + %reg_top = load ptr, ptr %reg_top_p, align 8 + %reg = getelementptr inbounds i8, ptr %reg_top, i32 %gr_offs + br label %end + +on_stack: + %stack_p = getelementptr inbounds %va_list, ptr %args, i32 0, i32 0 + %stack = load ptr, ptr %stack_p, align 8 + %new_stack = getelementptr inbounds i8, ptr %stack, i64 8 + store ptr %new_stack, ptr %stack_p, align 8 + br label %end + +end: + %p = phi ptr [ %reg, %in_reg ], [ %stack, %on_stack ] + %10 = load i32, ptr %p, align 8 + call void @llvm.va_end.p0(ptr %args) + ret i32 %10 +} + +declare void @llvm.va_start(ptr) nounwind +declare void @llvm.va_end(ptr) nounwind + +define i32 @caller() nounwind ssp { +; CHECK-LABEL: caller: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #176 +; CHECK-NEXT: mov w8, #10 // =0xa +; CHECK-NEXT: mov w9, #9 // =0x9 +; CHECK-NEXT: mov w0, #1 // =0x1 +; CHECK-NEXT: mov w1, #2 // =0x2 +; CHECK-NEXT: mov w2, #3 // =0x3 +; CHECK-NEXT: mov w3, #4 // =0x4 +; CHECK-NEXT: mov w4, #5 // =0x5 +; CHECK-NEXT: mov w5, #6 // =0x6 +; CHECK-NEXT: mov w6, #7 // =0x7 +; CHECK-NEXT: mov w7, #8 // =0x8 +; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #80] // 8-byte Folded Spill +; CHECK-NEXT: stp x28, x27, [sp, #96] // 16-byte Folded Spill +; CHECK-NEXT: stp x26, x25, [sp, #112] // 16-byte Folded Spill +; CHECK-NEXT: stp x24, x23, [sp, #128] // 16-byte Folded Spill +; CHECK-NEXT: stp x22, x21, [sp, #144] // 16-byte Folded Spill +; CHECK-NEXT: stp x20, x19, [sp, #160] // 16-byte Folded Spill +; CHECK-NEXT: str w8, [sp, #8] +; CHECK-NEXT: str w9, [sp] +; CHECK-NEXT: bl callee +; CHECK-NEXT: ldp x20, x19, [sp, #160] // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload +; CHECK-NEXT: ldp x22, x21, [sp, #144] // 16-byte Folded Reload +; CHECK-NEXT: ldp x24, x23, [sp, #128] // 16-byte Folded Reload +; CHECK-NEXT: ldp x26, x25, [sp, #112] // 16-byte Folded Reload +; CHECK-NEXT: ldp x28, x27, [sp, #96] // 16-byte Folded Reload +; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: add sp, sp, #176 +; CHECK-NEXT: ret + %r = tail call preserve_nonecc i32 (i32, i32, i32, i32, i32, ...) @callee(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10) + ret i32 %r +} + diff --git a/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_darwin.ll b/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_darwin.ll new file mode 100644 index 0000000..e227f14 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_darwin.ll @@ -0,0 +1,67 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=aarch64-apple-darwin < %s | FileCheck %s + +define preserve_nonecc i32 @callee(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, ...) nounwind noinline ssp { +; CHECK-LABEL: callee: +; CHECK: ; %bb.0: +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: add x8, sp, #16 +; CHECK-NEXT: ldr w0, [sp, #16] +; CHECK-NEXT: orr x8, x8, #0x8 +; CHECK-NEXT: str x8, [sp, #8] +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: ret + %args = alloca ptr, align 8 + call void @llvm.va_start(ptr %args) + %10 = va_arg ptr %args, i32 + call void @llvm.va_end(ptr %args) + ret i32 %10 +} + +declare void @llvm.va_start(ptr) nounwind +declare void @llvm.va_end(ptr) nounwind + +define i32 @caller() nounwind ssp { +; CHECK-LABEL: caller: +; CHECK: ; %bb.0: +; CHECK-NEXT: sub sp, sp, #208 +; CHECK-NEXT: mov w8, #10 ; =0xa +; CHECK-NEXT: mov w9, #9 ; =0x9 +; CHECK-NEXT: mov w0, #1 ; =0x1 +; CHECK-NEXT: stp x9, x8, [sp, #24] +; CHECK-NEXT: mov w8, #8 ; =0x8 +; CHECK-NEXT: mov w9, #6 ; =0x6 +; CHECK-NEXT: str x8, [sp, #16] +; CHECK-NEXT: mov w8, #7 ; =0x7 +; CHECK-NEXT: mov w1, #2 ; =0x2 +; CHECK-NEXT: mov w2, #3 ; =0x3 +; CHECK-NEXT: mov w3, #4 ; =0x4 +; CHECK-NEXT: mov w4, #5 ; =0x5 +; CHECK-NEXT: stp d15, d14, [sp, #48] ; 16-byte Folded Spill +; CHECK-NEXT: stp d13, d12, [sp, #64] ; 16-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #80] ; 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #96] ; 16-byte Folded Spill +; CHECK-NEXT: stp x28, x27, [sp, #112] ; 16-byte Folded Spill +; CHECK-NEXT: stp x26, x25, [sp, #128] ; 16-byte Folded Spill +; CHECK-NEXT: stp x24, x23, [sp, #144] ; 16-byte Folded Spill +; CHECK-NEXT: stp x22, x21, [sp, #160] ; 16-byte Folded Spill +; CHECK-NEXT: stp x20, x19, [sp, #176] ; 16-byte Folded Spill +; CHECK-NEXT: stp x29, x30, [sp, #192] ; 16-byte Folded Spill +; CHECK-NEXT: stp x9, x8, [sp] +; CHECK-NEXT: bl _callee +; CHECK-NEXT: ldp x29, x30, [sp, #192] ; 16-byte Folded Reload +; CHECK-NEXT: ldp x20, x19, [sp, #176] ; 16-byte Folded Reload +; CHECK-NEXT: ldp x22, x21, [sp, #160] ; 16-byte Folded Reload +; CHECK-NEXT: ldp x24, x23, [sp, #144] ; 16-byte Folded Reload +; CHECK-NEXT: ldp x26, x25, [sp, #128] ; 16-byte Folded Reload +; CHECK-NEXT: ldp x28, x27, [sp, #112] ; 16-byte Folded Reload +; CHECK-NEXT: ldp d9, d8, [sp, #96] ; 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #80] ; 16-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp, #64] ; 16-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp, #48] ; 16-byte Folded Reload +; CHECK-NEXT: add sp, sp, #208 +; CHECK-NEXT: ret + %r = tail call preserve_nonecc i32 (i32, i32, i32, i32, i32, ...) @callee(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10) + ret i32 %r +} + diff --git a/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_win64.ll b/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_win64.ll new file mode 100644 index 0000000..83dd240 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_win64.ll @@ -0,0 +1,69 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=aarch64-pc-windows < %s | FileCheck %s + +define preserve_nonecc i32 @callee(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, ...) nounwind noinline ssp { +; CHECK-LABEL: callee: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #48 +; CHECK-NEXT: mov x0, x5 +; CHECK-NEXT: add x8, sp, #24 +; CHECK-NEXT: stp x6, x7, [sp, #32] +; CHECK-NEXT: str x5, [sp, #24] +; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: str x8, [sp, #8] +; CHECK-NEXT: add sp, sp, #48 +; CHECK-NEXT: ret + %args = alloca ptr, align 8 + call void @llvm.va_start(ptr %args) + %p = load ptr, ptr %args, align 8 + %10 = load i32, ptr %p, align 8 + call void @llvm.va_end(ptr %args) + ret i32 %10 +} + +declare void @llvm.va_start(ptr) nounwind +declare void @llvm.va_end(ptr) nounwind + +define i32 @caller() nounwind ssp { +; CHECK-LABEL: caller: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #176 +; CHECK-NEXT: mov w8, #10 // =0xa +; CHECK-NEXT: mov w9, #9 // =0x9 +; CHECK-NEXT: mov w0, #1 // =0x1 +; CHECK-NEXT: mov w1, #2 // =0x2 +; CHECK-NEXT: mov w2, #3 // =0x3 +; CHECK-NEXT: mov w3, #4 // =0x4 +; CHECK-NEXT: mov w4, #5 // =0x5 +; CHECK-NEXT: mov w5, #6 // =0x6 +; CHECK-NEXT: mov w6, #7 // =0x7 +; CHECK-NEXT: mov w7, #8 // =0x8 +; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #80] // 8-byte Folded Spill +; CHECK-NEXT: stp x28, x27, [sp, #96] // 16-byte Folded Spill +; CHECK-NEXT: stp x26, x25, [sp, #112] // 16-byte Folded Spill +; CHECK-NEXT: stp x24, x23, [sp, #128] // 16-byte Folded Spill +; CHECK-NEXT: stp x22, x21, [sp, #144] // 16-byte Folded Spill +; CHECK-NEXT: stp x20, x19, [sp, #160] // 16-byte Folded Spill +; CHECK-NEXT: str w8, [sp, #8] +; CHECK-NEXT: str w9, [sp] +; CHECK-NEXT: bl callee +; CHECK-NEXT: ldp x20, x19, [sp, #160] // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload +; CHECK-NEXT: ldp x22, x21, [sp, #144] // 16-byte Folded Reload +; CHECK-NEXT: ldp x24, x23, [sp, #128] // 16-byte Folded Reload +; CHECK-NEXT: ldp x26, x25, [sp, #112] // 16-byte Folded Reload +; CHECK-NEXT: ldp x28, x27, [sp, #96] // 16-byte Folded Reload +; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: add sp, sp, #176 +; CHECK-NEXT: ret + %r = tail call preserve_nonecc i32 (i32, i32, i32, i32, i32, ...) @callee(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10) + ret i32 %r +} + diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.noret.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.noret.ll new file mode 100644 index 0000000..90dfab5 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.noret.ll @@ -0,0 +1,479 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10PLUS-SDAG,GFX10,GFX10-SDAG %s +; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10PLUS-GISEL,GFX10,GFX10-GISEL %s +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10PLUS-SDAG,GFX11,GFX11-SDAG %s +; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10PLUS-GISEL,GFX11,GFX11-GISEL %s +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG %s +; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL %s + +define amdgpu_ps void @sample_1d_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { +; GFX10PLUS-LABEL: sample_1d_nortn: +; GFX10PLUS: ; %bb.0: ; %main_body +; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo +; GFX10PLUS-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D +; GFX10PLUS-NEXT: s_endpgm +; +; GFX12-LABEL: sample_1d_nortn: +; GFX12: ; %bb.0: ; %main_body +; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo +; GFX12-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D +; GFX12-NEXT: s_endpgm +main_body: + call void @llvm.amdgcn.image.sample.1d.nortn.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret void +} + +define amdgpu_ps void @sample_2d_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) { +; GFX10PLUS-LABEL: sample_2d_nortn: +; GFX10PLUS: ; %bb.0: ; %main_body +; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo +; GFX10PLUS-NEXT: image_sample off, v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D +; GFX10PLUS-NEXT: s_endpgm +; +; GFX12-LABEL: sample_2d_nortn: +; GFX12: ; %bb.0: ; %main_body +; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo +; GFX12-NEXT: image_sample off, [v0, v1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D +; GFX12-NEXT: s_endpgm +main_body: + call void @llvm.amdgcn.image.sample.2d.nortn.f32(i32 15, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret void +} + +define amdgpu_ps void @sample_3d_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %r) { +; GFX10PLUS-LABEL: sample_3d_nortn: +; GFX10PLUS: ; %bb.0: ; %main_body +; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo +; GFX10PLUS-NEXT: image_sample off, v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D +; GFX10PLUS-NEXT: s_endpgm +; +; GFX12-LABEL: sample_3d_nortn: +; GFX12: ; %bb.0: ; %main_body +; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo +; GFX12-NEXT: image_sample off, [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D +; GFX12-NEXT: s_endpgm +main_body: + call void @llvm.amdgcn.image.sample.3d.nortn.f32(i32 15, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret void +} + +define amdgpu_ps void @sample_cube_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %face) { +; GFX10PLUS-LABEL: sample_cube_nortn: +; GFX10PLUS: ; %bb.0: ; %main_body +; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo +; GFX10PLUS-NEXT: image_sample off, v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_CUBE +; GFX10PLUS-NEXT: s_endpgm +; +; GFX12-LABEL: sample_cube_nortn: +; GFX12: ; %bb.0: ; %main_body +; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo +; GFX12-NEXT: image_sample off, [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_CUBE +; GFX12-NEXT: s_endpgm +main_body: + call void @llvm.amdgcn.image.sample.cube.nortn.f32(i32 15, float %s, float %t, float %face, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret void +} + +define amdgpu_ps void @sample_1darray_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %slice) { +; GFX10PLUS-LABEL: sample_1darray_nortn: +; GFX10PLUS: ; %bb.0: ; %main_body +; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo +; GFX10PLUS-NEXT: image_sample off, v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY +; GFX10PLUS-NEXT: s_endpgm +; +; GFX12-LABEL: sample_1darray_nortn: +; GFX12: ; %bb.0: ; %main_body +; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo +; GFX12-NEXT: image_sample off, [v0, v1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY +; GFX12-NEXT: s_endpgm +main_body: + call void @llvm.amdgcn.image.sample.1darray.nortn.f32(i32 15, float %s, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret void +} + +define amdgpu_ps void @sample_2darray_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %slice) { +; GFX10PLUS-LABEL: sample_2darray_nortn: +; GFX10PLUS: ; %bb.0: ; %main_body +; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo +; GFX10PLUS-NEXT: image_sample off, v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY +; GFX10PLUS-NEXT: s_endpgm +; +; GFX12-LABEL: sample_2darray_nortn: +; GFX12: ; %bb.0: ; %main_body +; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo +; GFX12-NEXT: image_sample off, [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY +; GFX12-NEXT: s_endpgm +main_body: + call void @llvm.amdgcn.image.sample.2darray.nortn.f32(i32 15, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret void +} + +define amdgpu_ps void @sample_b_1d_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s) { +; GFX10PLUS-LABEL: sample_b_1d_nortn: +; GFX10PLUS: ; %bb.0: ; %main_body +; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo +; GFX10PLUS-NEXT: image_sample_b off, v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D +; GFX10PLUS-NEXT: s_endpgm +; +; GFX12-LABEL: sample_b_1d_nortn: +; GFX12: ; %bb.0: ; %main_body +; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo +; GFX12-NEXT: image_sample_b off, [v0, v1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D +; GFX12-NEXT: s_endpgm +main_body: + call void @llvm.amdgcn.image.sample.b.1d.nortn.f32(i32 15, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret void +} + +define amdgpu_ps void @sample_b_2d_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) { +; GFX10PLUS-LABEL: sample_b_2d_nortn: +; GFX10PLUS: ; %bb.0: ; %main_body +; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo +; GFX10PLUS-NEXT: image_sample_b off, v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D +; GFX10PLUS-NEXT: s_endpgm +; +; GFX12-LABEL: sample_b_2d_nortn: +; GFX12: ; %bb.0: ; %main_body +; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo +; GFX12-NEXT: image_sample_b off, [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D +; GFX12-NEXT: s_endpgm +main_body: + call void @llvm.amdgcn.image.sample.b.2d.nortn.f32(i32 15, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret void +} + +define amdgpu_ps void @sample_c_1d_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s) { +; GFX10PLUS-LABEL: sample_c_1d_nortn: +; GFX10PLUS: ; %bb.0: ; %main_body +; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo +; GFX10PLUS-NEXT: image_sample_c off, v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D +; GFX10PLUS-NEXT: s_endpgm +; +; GFX12-LABEL: sample_c_1d_nortn: +; GFX12: ; %bb.0: ; %main_body +; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo +; GFX12-NEXT: image_sample_c off, [v0, v1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D +; GFX12-NEXT: s_endpgm +main_body: + call void @llvm.amdgcn.image.sample.c.1d.nortn.f32(i32 15, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret void +} + +define amdgpu_ps void @sample_c_2d_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) { +; GFX10PLUS-LABEL: sample_c_2d_nortn: +; GFX10PLUS: ; %bb.0: ; %main_body +; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo +; GFX10PLUS-NEXT: image_sample_c off, v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D +; GFX10PLUS-NEXT: s_endpgm +; +; GFX12-LABEL: sample_c_2d_nortn: +; GFX12: ; %bb.0: ; %main_body +; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo +; GFX12-NEXT: image_sample_c off, [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D +; GFX12-NEXT: s_endpgm +main_body: + call void @llvm.amdgcn.image.sample.c.2d.nortn.f32(i32 15, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret void +} + +define amdgpu_ps void @sample_d_1d_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, float %s) { +; GFX10PLUS-LABEL: sample_d_1d_nortn: +; GFX10PLUS: ; %bb.0: ; %main_body +; GFX10PLUS-NEXT: image_sample_d off, v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D +; GFX10PLUS-NEXT: s_endpgm +; +; GFX12-LABEL: sample_d_1d_nortn: +; GFX12: ; %bb.0: ; %main_body +; GFX12-NEXT: image_sample_d off, [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D +; GFX12-NEXT: s_endpgm +main_body: + call void @llvm.amdgcn.image.sample.d.1d.nortn.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret void +} + +define amdgpu_ps void @sample_d_2d_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) { +; GFX10PLUS-LABEL: sample_d_2d_nortn: +; GFX10PLUS: ; %bb.0: ; %main_body +; GFX10PLUS-NEXT: image_sample_d off, v[0:5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D +; GFX10PLUS-NEXT: s_endpgm +; +; GFX12-LABEL: sample_d_2d_nortn: +; GFX12: ; %bb.0: ; %main_body +; GFX12-NEXT: image_sample_d off, [v0, v1, v2, v[3:5]], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D +; GFX12-NEXT: s_endpgm +main_body: + call void @llvm.amdgcn.image.sample.d.2d.nortn.f32.f32(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret void +} + +define amdgpu_ps void @sample_l_1d_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %lod) { +; GFX10PLUS-LABEL: sample_l_1d_nortn: +; GFX10PLUS: ; %bb.0: ; %main_body +; GFX10PLUS-NEXT: image_sample_l off, v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D +; GFX10PLUS-NEXT: s_endpgm +; +; GFX12-LABEL: sample_l_1d_nortn: +; GFX12: ; %bb.0: ; %main_body +; GFX12-NEXT: image_sample_l off, [v0, v1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D +; GFX12-NEXT: s_endpgm +main_body: + call void @llvm.amdgcn.image.sample.l.1d.nortn.f32(i32 15, float %s, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret void +} + +define amdgpu_ps void @sample_l_2d_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %lod) { +; GFX10PLUS-LABEL: sample_l_2d_nortn: +; GFX10PLUS: ; %bb.0: ; %main_body +; GFX10PLUS-NEXT: image_sample_l off, v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D +; GFX10PLUS-NEXT: s_endpgm +; +; GFX12-LABEL: sample_l_2d_nortn: +; GFX12: ; %bb.0: ; %main_body +; GFX12-NEXT: image_sample_l off, [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D +; GFX12-NEXT: s_endpgm +main_body: + call void @llvm.amdgcn.image.sample.l.2d.nortn.f32(i32 15, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret void +} + +define amdgpu_ps <4 x float> @sample_nortn_mix_1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { +; GFX10PLUS-LABEL: sample_nortn_mix_1: +; GFX10PLUS: ; %bb.0: ; %main_body +; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo +; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo +; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 +; GFX10PLUS-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D +; GFX10PLUS-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D +; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) +; GFX10PLUS-NEXT: ; return to shader part epilog +; +; GFX12-LABEL: sample_nortn_mix_1: +; GFX12: ; %bb.0: ; %main_body +; GFX12-NEXT: s_mov_b32 s12, exec_lo +; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo +; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12 +; GFX12-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D +; GFX12-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: ; return to shader part epilog +main_body: + call void @llvm.amdgcn.image.sample.1d.nortn.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <4 x float> %v +} + +define amdgpu_ps <4 x float> @sample_nortn_mix_2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { +; GFX10PLUS-LABEL: sample_nortn_mix_2: +; GFX10PLUS: ; %bb.0: ; %main_body +; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo +; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo +; GFX10PLUS-NEXT: v_mov_b32_e32 v4, v0 +; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 +; GFX10PLUS-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D +; GFX10PLUS-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D +; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) +; GFX10PLUS-NEXT: ; return to shader part epilog +; +; GFX12-LABEL: sample_nortn_mix_2: +; GFX12: ; %bb.0: ; %main_body +; GFX12-NEXT: s_mov_b32 s12, exec_lo +; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo +; GFX12-NEXT: v_mov_b32_e32 v4, v0 +; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12 +; GFX12-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D +; GFX12-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: ; return to shader part epilog +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + call void @llvm.amdgcn.image.sample.1d.nortn.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <4 x float> %v +} + +define amdgpu_ps <4 x float> @sample_nortn_mix_3(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { +; GFX10PLUS-SDAG-LABEL: sample_nortn_mix_3: +; GFX10PLUS-SDAG: ; %bb.0: ; %main_body +; GFX10PLUS-SDAG-NEXT: s_mov_b32 s12, exec_lo +; GFX10PLUS-SDAG-NEXT: s_wqm_b32 exec_lo, exec_lo +; GFX10PLUS-SDAG-NEXT: image_sample v1, v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D +; GFX10PLUS-SDAG-NEXT: s_and_b32 exec_lo, exec_lo, s12 +; GFX10PLUS-SDAG-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D +; GFX10PLUS-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX10PLUS-SDAG-NEXT: image_sample v[0:3], v1, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D +; GFX10PLUS-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX10PLUS-SDAG-NEXT: ; return to shader part epilog +; +; GFX10PLUS-GISEL-LABEL: sample_nortn_mix_3: +; GFX10PLUS-GISEL: ; %bb.0: ; %main_body +; GFX10PLUS-GISEL-NEXT: s_mov_b32 s12, exec_lo +; GFX10PLUS-GISEL-NEXT: s_wqm_b32 exec_lo, exec_lo +; GFX10PLUS-GISEL-NEXT: image_sample v[1:4], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D +; GFX10PLUS-GISEL-NEXT: s_and_b32 exec_lo, exec_lo, s12 +; GFX10PLUS-GISEL-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D +; GFX10PLUS-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX10PLUS-GISEL-NEXT: image_sample v[0:3], v1, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D +; GFX10PLUS-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX10PLUS-GISEL-NEXT: ; return to shader part epilog +; +; GFX12-SDAG-LABEL: sample_nortn_mix_3: +; GFX12-SDAG: ; %bb.0: ; %main_body +; GFX12-SDAG-NEXT: s_mov_b32 s12, exec_lo +; GFX12-SDAG-NEXT: s_wqm_b32 exec_lo, exec_lo +; GFX12-SDAG-NEXT: image_sample v1, v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D +; GFX12-SDAG-NEXT: s_and_b32 exec_lo, exec_lo, s12 +; GFX12-SDAG-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D +; GFX12-SDAG-NEXT: s_wait_samplecnt 0x1 +; GFX12-SDAG-NEXT: image_sample v[0:3], v1, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D +; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 +; GFX12-SDAG-NEXT: ; return to shader part epilog +; +; GFX12-GISEL-LABEL: sample_nortn_mix_3: +; GFX12-GISEL: ; %bb.0: ; %main_body +; GFX12-GISEL-NEXT: s_mov_b32 s12, exec_lo +; GFX12-GISEL-NEXT: s_wqm_b32 exec_lo, exec_lo +; GFX12-GISEL-NEXT: image_sample v[1:4], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D +; GFX12-GISEL-NEXT: s_and_b32 exec_lo, exec_lo, s12 +; GFX12-GISEL-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D +; GFX12-GISEL-NEXT: s_wait_samplecnt 0x1 +; GFX12-GISEL-NEXT: image_sample v[0:3], v1, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D +; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 +; GFX12-GISEL-NEXT: ; return to shader part epilog +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %v.0 = extractelement <4 x float> %v, i32 0 + call void @llvm.amdgcn.image.sample.1d.nortn.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %u = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %v.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <4 x float> %u +} + +define amdgpu_ps <4 x float> @sample_nortn_mix_4(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { +; GFX10PLUS-SDAG-LABEL: sample_nortn_mix_4: +; GFX10PLUS-SDAG: ; %bb.0: ; %main_body +; GFX10PLUS-SDAG-NEXT: s_mov_b32 s12, exec_lo +; GFX10PLUS-SDAG-NEXT: s_wqm_b32 exec_lo, exec_lo +; GFX10PLUS-SDAG-NEXT: image_sample v4, v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D +; GFX10PLUS-SDAG-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D +; GFX10PLUS-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX10PLUS-SDAG-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D +; GFX10PLUS-SDAG-NEXT: image_sample v[0:3], v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D +; GFX10PLUS-SDAG-NEXT: s_and_b32 exec_lo, exec_lo, s12 +; GFX10PLUS-SDAG-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D +; GFX10PLUS-SDAG-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D +; GFX10PLUS-SDAG-NEXT: s_waitcnt vmcnt(2) +; GFX10PLUS-SDAG-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D +; GFX10PLUS-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX10PLUS-SDAG-NEXT: ; return to shader part epilog +; +; GFX10PLUS-GISEL-LABEL: sample_nortn_mix_4: +; GFX10PLUS-GISEL: ; %bb.0: ; %main_body +; GFX10PLUS-GISEL-NEXT: s_mov_b32 s12, exec_lo +; GFX10PLUS-GISEL-NEXT: s_wqm_b32 exec_lo, exec_lo +; GFX10PLUS-GISEL-NEXT: image_sample v[4:7], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D +; GFX10PLUS-GISEL-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D +; GFX10PLUS-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX10PLUS-GISEL-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D +; GFX10PLUS-GISEL-NEXT: image_sample v[0:3], v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D +; GFX10PLUS-GISEL-NEXT: s_and_b32 exec_lo, exec_lo, s12 +; GFX10PLUS-GISEL-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D +; GFX10PLUS-GISEL-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D +; GFX10PLUS-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX10PLUS-GISEL-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D +; GFX10PLUS-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX10PLUS-GISEL-NEXT: ; return to shader part epilog +; +; GFX12-SDAG-LABEL: sample_nortn_mix_4: +; GFX12-SDAG: ; %bb.0: ; %main_body +; GFX12-SDAG-NEXT: s_mov_b32 s12, exec_lo +; GFX12-SDAG-NEXT: s_wqm_b32 exec_lo, exec_lo +; GFX12-SDAG-NEXT: image_sample v4, v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D +; GFX12-SDAG-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D +; GFX12-SDAG-NEXT: s_wait_samplecnt 0x1 +; GFX12-SDAG-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D +; GFX12-SDAG-NEXT: image_sample v[0:3], v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D +; GFX12-SDAG-NEXT: s_and_b32 exec_lo, exec_lo, s12 +; GFX12-SDAG-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D +; GFX12-SDAG-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D +; GFX12-SDAG-NEXT: s_wait_samplecnt 0x2 +; GFX12-SDAG-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D +; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 +; GFX12-SDAG-NEXT: ; return to shader part epilog +; +; GFX12-GISEL-LABEL: sample_nortn_mix_4: +; GFX12-GISEL: ; %bb.0: ; %main_body +; GFX12-GISEL-NEXT: s_mov_b32 s12, exec_lo +; GFX12-GISEL-NEXT: s_wqm_b32 exec_lo, exec_lo +; GFX12-GISEL-NEXT: image_sample v[4:7], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D +; GFX12-GISEL-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D +; GFX12-GISEL-NEXT: s_wait_samplecnt 0x1 +; GFX12-GISEL-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D +; GFX12-GISEL-NEXT: image_sample v[0:3], v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D +; GFX12-GISEL-NEXT: s_and_b32 exec_lo, exec_lo, s12 +; GFX12-GISEL-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D +; GFX12-GISEL-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D +; GFX12-GISEL-NEXT: s_wait_samplecnt 0x2 +; GFX12-GISEL-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D +; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 +; GFX12-GISEL-NEXT: ; return to shader part epilog +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %v.0 = extractelement <4 x float> %v, i32 0 + %v.1 = extractelement <4 x float> %v, i32 0 + %v.2 = extractelement <4 x float> %v, i32 0 + %v.3 = extractelement <4 x float> %v, i32 0 + call void @llvm.amdgcn.image.sample.1d.nortn.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + call void @llvm.amdgcn.image.sample.1d.nortn.f32(i32 15, float %v.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %u = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %v.1, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %u.0 = extractelement <4 x float> %u, i32 0 + call void @llvm.amdgcn.image.sample.1d.nortn.f32(i32 15, float %v.2, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + call void @llvm.amdgcn.image.sample.1d.nortn.f32(i32 15, float %v.3, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + call void @llvm.amdgcn.image.sample.1d.nortn.f32(i32 15, float %u.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <4 x float> %u +} + +define amdgpu_ps void @sample_d_1d_g16_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s) { +; GFX10PLUS-LABEL: sample_d_1d_g16_nortn: +; GFX10PLUS: ; %bb.0: ; %main_body +; GFX10PLUS-NEXT: image_sample_d_g16 off, v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D +; GFX10PLUS-NEXT: s_endpgm +; +; GFX12-LABEL: sample_d_1d_g16_nortn: +; GFX12: ; %bb.0: ; %main_body +; GFX12-NEXT: image_sample_d_g16 off, [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D +; GFX12-NEXT: s_endpgm +main_body: + call void @llvm.amdgcn.image.sample.d.1d.nortn.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret void +} + +declare void @llvm.amdgcn.image.sample.1d.nortn.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0 +declare void @llvm.amdgcn.image.sample.2d.nortn.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0 +declare void @llvm.amdgcn.image.sample.3d.nortn.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0 +declare void @llvm.amdgcn.image.sample.cube.nortn.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0 +declare void @llvm.amdgcn.image.sample.1darray.nortn.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0 +declare void @llvm.amdgcn.image.sample.2darray.nortn.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0 + +declare void @llvm.amdgcn.image.sample.b.1d.nortn.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0 +declare void @llvm.amdgcn.image.sample.b.2d.nortn.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0 + +declare void @llvm.amdgcn.image.sample.c.1d.nortn.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0 +declare void @llvm.amdgcn.image.sample.c.2d.nortn.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0 + +declare void @llvm.amdgcn.image.sample.d.1d.f32.nortn.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0 +declare void @llvm.amdgcn.image.sample.d.2d.f32.nortn.f32(i32, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0 + +declare void @llvm.amdgcn.image.sample.l.1d.nortn.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0 +declare void @llvm.amdgcn.image.sample.l.2d.nortn.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0 + +declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 + +declare void @llvm.amdgcn.image.sample.d.1d.nortn.f16.f32(i32, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readonly } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; GFX10: {{.*}} +; GFX10-GISEL: {{.*}} +; GFX10-SDAG: {{.*}} +; GFX11: {{.*}} +; GFX11-GISEL: {{.*}} +; GFX11-SDAG: {{.*}} diff --git a/llvm/test/CodeGen/LoongArch/andn-icmp.ll b/llvm/test/CodeGen/LoongArch/andn-icmp.ll index 6d07e7a..447f3ac 100644 --- a/llvm/test/CodeGen/LoongArch/andn-icmp.ll +++ b/llvm/test/CodeGen/LoongArch/andn-icmp.ll @@ -6,12 +6,14 @@ define i1 @andn_icmp_eq_i8(i8 signext %a, i8 signext %b) nounwind { ; LA32-LABEL: andn_icmp_eq_i8: ; LA32: # %bb.0: ; LA32-NEXT: andn $a0, $a1, $a0 +; LA32-NEXT: andi $a0, $a0, 255 ; LA32-NEXT: sltui $a0, $a0, 1 ; LA32-NEXT: ret ; ; LA64-LABEL: andn_icmp_eq_i8: ; LA64: # %bb.0: ; LA64-NEXT: andn $a0, $a1, $a0 +; LA64-NEXT: andi $a0, $a0, 255 ; LA64-NEXT: sltui $a0, $a0, 1 ; LA64-NEXT: ret %and = and i8 %a, %b @@ -23,12 +25,14 @@ define i1 @andn_icmp_eq_i16(i16 signext %a, i16 signext %b) nounwind { ; LA32-LABEL: andn_icmp_eq_i16: ; LA32: # %bb.0: ; LA32-NEXT: andn $a0, $a1, $a0 +; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 ; LA32-NEXT: sltui $a0, $a0, 1 ; LA32-NEXT: ret ; ; LA64-LABEL: andn_icmp_eq_i16: ; LA64: # %bb.0: ; LA64-NEXT: andn $a0, $a1, $a0 +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 ; LA64-NEXT: sltui $a0, $a0, 1 ; LA64-NEXT: ret %and = and i16 %a, %b @@ -76,12 +80,14 @@ define i1 @andn_icmp_ne_i8(i8 signext %a, i8 signext %b) nounwind { ; LA32-LABEL: andn_icmp_ne_i8: ; LA32: # %bb.0: ; LA32-NEXT: andn $a0, $a1, $a0 +; LA32-NEXT: andi $a0, $a0, 255 ; LA32-NEXT: sltu $a0, $zero, $a0 ; LA32-NEXT: ret ; ; LA64-LABEL: andn_icmp_ne_i8: ; LA64: # %bb.0: ; LA64-NEXT: andn $a0, $a1, $a0 +; LA64-NEXT: andi $a0, $a0, 255 ; LA64-NEXT: sltu $a0, $zero, $a0 ; LA64-NEXT: ret %and = and i8 %a, %b @@ -93,12 +99,14 @@ define i1 @andn_icmp_ne_i16(i16 signext %a, i16 signext %b) nounwind { ; LA32-LABEL: andn_icmp_ne_i16: ; LA32: # %bb.0: ; LA32-NEXT: andn $a0, $a1, $a0 +; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 ; LA32-NEXT: sltu $a0, $zero, $a0 ; LA32-NEXT: ret ; ; LA64-LABEL: andn_icmp_ne_i16: ; LA64: # %bb.0: ; LA64-NEXT: andn $a0, $a1, $a0 +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 ; LA64-NEXT: sltu $a0, $zero, $a0 ; LA64-NEXT: ret %and = and i16 %a, %b @@ -145,13 +153,15 @@ define i1 @andn_icmp_ne_i64(i64 %a, i64 %b) nounwind { define i1 @andn_icmp_ult_i8(i8 signext %a, i8 signext %b) nounwind { ; LA32-LABEL: andn_icmp_ult_i8: ; LA32: # %bb.0: -; LA32-NEXT: and $a0, $a0, $a1 +; LA32-NEXT: andi $a1, $a1, 255 +; LA32-NEXT: and $a0, $a1, $a0 ; LA32-NEXT: sltu $a0, $a0, $a1 ; LA32-NEXT: ret ; ; LA64-LABEL: andn_icmp_ult_i8: ; LA64: # %bb.0: -; LA64-NEXT: and $a0, $a0, $a1 +; LA64-NEXT: andi $a1, $a1, 255 +; LA64-NEXT: and $a0, $a1, $a0 ; LA64-NEXT: sltu $a0, $a0, $a1 ; LA64-NEXT: ret %and = and i8 %a, %b @@ -162,13 +172,15 @@ define i1 @andn_icmp_ult_i8(i8 signext %a, i8 signext %b) nounwind { define i1 @andn_icmp_ult_i16(i16 signext %a, i16 signext %b) nounwind { ; LA32-LABEL: andn_icmp_ult_i16: ; LA32: # %bb.0: -; LA32-NEXT: and $a0, $a0, $a1 +; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 +; LA32-NEXT: and $a0, $a1, $a0 ; LA32-NEXT: sltu $a0, $a0, $a1 ; LA32-NEXT: ret ; ; LA64-LABEL: andn_icmp_ult_i16: ; LA64: # %bb.0: -; LA64-NEXT: and $a0, $a0, $a1 +; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 +; LA64-NEXT: and $a0, $a1, $a0 ; LA64-NEXT: sltu $a0, $a0, $a1 ; LA64-NEXT: ret %and = and i16 %a, %b @@ -179,14 +191,16 @@ define i1 @andn_icmp_ult_i16(i16 signext %a, i16 signext %b) nounwind { define i1 @andn_icmp_uge_i8(i8 signext %a, i8 signext %b) nounwind { ; LA32-LABEL: andn_icmp_uge_i8: ; LA32: # %bb.0: -; LA32-NEXT: and $a0, $a0, $a1 +; LA32-NEXT: andi $a1, $a1, 255 +; LA32-NEXT: and $a0, $a1, $a0 ; LA32-NEXT: sltu $a0, $a0, $a1 ; LA32-NEXT: xori $a0, $a0, 1 ; LA32-NEXT: ret ; ; LA64-LABEL: andn_icmp_uge_i8: ; LA64: # %bb.0: -; LA64-NEXT: and $a0, $a0, $a1 +; LA64-NEXT: andi $a1, $a1, 255 +; LA64-NEXT: and $a0, $a1, $a0 ; LA64-NEXT: sltu $a0, $a0, $a1 ; LA64-NEXT: xori $a0, $a0, 1 ; LA64-NEXT: ret @@ -198,14 +212,16 @@ define i1 @andn_icmp_uge_i8(i8 signext %a, i8 signext %b) nounwind { define i1 @andn_icmp_uge_i16(i16 signext %a, i16 signext %b) nounwind { ; LA32-LABEL: andn_icmp_uge_i16: ; LA32: # %bb.0: -; LA32-NEXT: and $a0, $a0, $a1 +; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 +; LA32-NEXT: and $a0, $a1, $a0 ; LA32-NEXT: sltu $a0, $a0, $a1 ; LA32-NEXT: xori $a0, $a0, 1 ; LA32-NEXT: ret ; ; LA64-LABEL: andn_icmp_uge_i16: ; LA64: # %bb.0: -; LA64-NEXT: and $a0, $a0, $a1 +; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 +; LA64-NEXT: and $a0, $a1, $a0 ; LA64-NEXT: sltu $a0, $a0, $a1 ; LA64-NEXT: xori $a0, $a0, 1 ; LA64-NEXT: ret @@ -217,13 +233,15 @@ define i1 @andn_icmp_uge_i16(i16 signext %a, i16 signext %b) nounwind { define i1 @andn_icmp_ugt_i8(i8 signext %a, i8 signext %b) nounwind { ; LA32-LABEL: andn_icmp_ugt_i8: ; LA32: # %bb.0: -; LA32-NEXT: and $a0, $a0, $a1 +; LA32-NEXT: andi $a1, $a1, 255 +; LA32-NEXT: and $a0, $a1, $a0 ; LA32-NEXT: sltu $a0, $a1, $a0 ; LA32-NEXT: ret ; ; LA64-LABEL: andn_icmp_ugt_i8: ; LA64: # %bb.0: -; LA64-NEXT: and $a0, $a0, $a1 +; LA64-NEXT: andi $a1, $a1, 255 +; LA64-NEXT: and $a0, $a1, $a0 ; LA64-NEXT: sltu $a0, $a1, $a0 ; LA64-NEXT: ret %and = and i8 %a, %b @@ -234,13 +252,15 @@ define i1 @andn_icmp_ugt_i8(i8 signext %a, i8 signext %b) nounwind { define i1 @andn_icmp_ugt_i16(i16 signext %a, i16 signext %b) nounwind { ; LA32-LABEL: andn_icmp_ugt_i16: ; LA32: # %bb.0: -; LA32-NEXT: and $a0, $a0, $a1 +; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 +; LA32-NEXT: and $a0, $a1, $a0 ; LA32-NEXT: sltu $a0, $a1, $a0 ; LA32-NEXT: ret ; ; LA64-LABEL: andn_icmp_ugt_i16: ; LA64: # %bb.0: -; LA64-NEXT: and $a0, $a0, $a1 +; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 +; LA64-NEXT: and $a0, $a1, $a0 ; LA64-NEXT: sltu $a0, $a1, $a0 ; LA64-NEXT: ret %and = and i16 %a, %b @@ -251,14 +271,16 @@ define i1 @andn_icmp_ugt_i16(i16 signext %a, i16 signext %b) nounwind { define i1 @andn_icmp_ule_i8(i8 signext %a, i8 signext %b) nounwind { ; LA32-LABEL: andn_icmp_ule_i8: ; LA32: # %bb.0: -; LA32-NEXT: and $a0, $a0, $a1 +; LA32-NEXT: andi $a1, $a1, 255 +; LA32-NEXT: and $a0, $a1, $a0 ; LA32-NEXT: sltu $a0, $a1, $a0 ; LA32-NEXT: xori $a0, $a0, 1 ; LA32-NEXT: ret ; ; LA64-LABEL: andn_icmp_ule_i8: ; LA64: # %bb.0: -; LA64-NEXT: and $a0, $a0, $a1 +; LA64-NEXT: andi $a1, $a1, 255 +; LA64-NEXT: and $a0, $a1, $a0 ; LA64-NEXT: sltu $a0, $a1, $a0 ; LA64-NEXT: xori $a0, $a0, 1 ; LA64-NEXT: ret @@ -270,14 +292,16 @@ define i1 @andn_icmp_ule_i8(i8 signext %a, i8 signext %b) nounwind { define i1 @andn_icmp_ule_i16(i16 signext %a, i16 signext %b) nounwind { ; LA32-LABEL: andn_icmp_ule_i16: ; LA32: # %bb.0: -; LA32-NEXT: and $a0, $a0, $a1 +; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 +; LA32-NEXT: and $a0, $a1, $a0 ; LA32-NEXT: sltu $a0, $a1, $a0 ; LA32-NEXT: xori $a0, $a0, 1 ; LA32-NEXT: ret ; ; LA64-LABEL: andn_icmp_ule_i16: ; LA64: # %bb.0: -; LA64-NEXT: and $a0, $a0, $a1 +; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 +; LA64-NEXT: and $a0, $a1, $a0 ; LA64-NEXT: sltu $a0, $a1, $a0 ; LA64-NEXT: xori $a0, $a0, 1 ; LA64-NEXT: ret @@ -577,3 +601,25 @@ define i1 @andn_icmp_ult_i8_nn(i8 %a, i8 %b) nounwind { %cmp = icmp ult i8 %and, %b ret i1 %cmp } + +define i1 @andn_icmp_eq_i8_i32(i8 signext %a, i8 signext %b) nounwind { +; LA32-LABEL: andn_icmp_eq_i8_i32: +; LA32: # %bb.0: +; LA32-NEXT: andn $a0, $a1, $a0 +; LA32-NEXT: andi $a0, $a0, 255 +; LA32-NEXT: sltui $a0, $a0, 1 +; LA32-NEXT: ret +; +; LA64-LABEL: andn_icmp_eq_i8_i32: +; LA64: # %bb.0: +; LA64-NEXT: andn $a0, $a1, $a0 +; LA64-NEXT: andi $a0, $a0, 255 +; LA64-NEXT: sltui $a0, $a0, 1 +; LA64-NEXT: ret + %x = zext i8 %a to i32 + %y = zext i8 %b to i32 + %not = xor i32 %x, -1 + %and = and i32 %not, %y + %cmp = icmp eq i32 %and, 0 + ret i1 %cmp +} diff --git a/llvm/test/CodeGen/NVPTX/addr-mode.ll b/llvm/test/CodeGen/NVPTX/addr-mode.ll new file mode 100644 index 0000000..a6a085c --- /dev/null +++ b/llvm/test/CodeGen/NVPTX/addr-mode.ll @@ -0,0 +1,85 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -march=nvptx64 | FileCheck %s + +target triple = "nvptx64-nvidia-cuda" + +define i32 @test_addr_mode_i64(ptr %x) { +; CHECK-LABEL: test_addr_mode_i64( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u64 %rd1, [test_addr_mode_i64_param_0]; +; CHECK-NEXT: ld.u32 %r1, [%rd1+-4]; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r1; +; CHECK-NEXT: ret; + %addr = getelementptr i32, ptr %x, i64 -1 + %res = load i32, ptr %addr + ret i32 %res +} + +define i32 @test_addr_mode_i32(ptr %x) { +; CHECK-LABEL: test_addr_mode_i32( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u64 %rd1, [test_addr_mode_i32_param_0]; +; CHECK-NEXT: ld.u32 %r1, [%rd1+-4]; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r1; +; CHECK-NEXT: ret; + %addr = getelementptr i32, ptr %x, i32 -1 + %res = load i32, ptr %addr + ret i32 %res +} + +define i32 @test_addr_mode_i16(ptr %x) { +; CHECK-LABEL: test_addr_mode_i16( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u64 %rd1, [test_addr_mode_i16_param_0]; +; CHECK-NEXT: ld.u32 %r1, [%rd1+-4]; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r1; +; CHECK-NEXT: ret; + %addr = getelementptr i32, ptr %x, i16 -1 + %res = load i32, ptr %addr + ret i32 %res +} + +define i32 @test_addr_mode_i8(ptr %x) { +; CHECK-LABEL: test_addr_mode_i8( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u64 %rd1, [test_addr_mode_i8_param_0]; +; CHECK-NEXT: ld.u32 %r1, [%rd1+-4]; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r1; +; CHECK-NEXT: ret; + %addr = getelementptr i32, ptr %x, i8 -1 + %res = load i32, ptr %addr + ret i32 %res +} + +define i32 @test_addr_mode_i64_large(ptr %x) { +; CHECK-LABEL: test_addr_mode_i64_large( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-NEXT: .reg .b64 %rd<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u64 %rd1, [test_addr_mode_i64_large_param_0]; +; CHECK-NEXT: add.s64 %rd2, %rd1, 17179869172; +; CHECK-NEXT: ld.u32 %r1, [%rd2]; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r1; +; CHECK-NEXT: ret; + %addr = getelementptr i32, ptr %x, i64 4294967293 + %res = load i32, ptr %addr + ret i32 %res +} diff --git a/llvm/test/Instrumentation/InstrProfiling/runtime-counter-relocation.ll b/llvm/test/Instrumentation/InstrProfiling/runtime-counter-relocation.ll index 53b1e49..e1da23e 100644 --- a/llvm/test/Instrumentation/InstrProfiling/runtime-counter-relocation.ll +++ b/llvm/test/Instrumentation/InstrProfiling/runtime-counter-relocation.ll @@ -1,9 +1,13 @@ ; RUN: opt < %s -S -passes=instrprof | FileCheck %s ; RUN: opt < %s -S -passes=instrprof -runtime-counter-relocation | FileCheck -check-prefixes=RELOC %s +; RUN: opt < %s -S -passes=instrprof,inline,gvn -runtime-counter-relocation | FileCheck -check-prefixes=RELOC,RELOCOPT %s +; RUN: opt < %s -S -passes=instrprof -runtime-counter-relocation -instrprof-atomic-counter-update-all | FileCheck -check-prefixes=ATOMIC %s +; RUN: opt < %s -S -passes=instrprof,inline,gvn -runtime-counter-relocation -instrprof-atomic-counter-update-all | FileCheck -check-prefixes=ATOMIC,ATOMICOPT %s target triple = "x86_64-unknown-linux-gnu" @__profn_foo = private constant [3 x i8] c"foo" +@__profn_bar = private constant [3 x i8] c"bar" ; RELOC: $__llvm_profile_counter_bias = comdat any ; RELOC: @__llvm_profile_counter_bias = linkonce_odr hidden global i64 0, comdat @@ -12,14 +16,34 @@ target triple = "x86_64-unknown-linux-gnu" ; CHECK-NEXT: %[[PGOCOUNTINC:.+]] = add i64 %[[PGOCOUNT]], 1 ; CHECK-NEXT: store i64 %[[PGOCOUNTINC]], ptr @__profc_foo ; RELOC-LABEL: define void @foo -; RELOC-NEXT: %[[BIAS:.+]] = load i64, ptr @__llvm_profile_counter_bias +; RELOC-NEXT: %[[BIAS:.+]] = load i64, ptr @__llvm_profile_counter_bias, align {{[0-9]+}}, !invariant.load !0 ; RELOC-NEXT: %[[PROFC_BIAS:.+]] = add i64 ptrtoint (ptr @__profc_foo to i64), %[[BIAS]] ; RELOC-NEXT: %[[PROFC_ADDR:.+]] = inttoptr i64 %[[PROFC_BIAS]] to ptr ; RELOC-NEXT: %[[PGOCOUNT:.+]] = load i64, ptr %[[PROFC_ADDR]] ; RELOC-NEXT: %[[PGOCOUNTINC:.+]] = add i64 %[[PGOCOUNT]], 1 ; RELOC-NEXT: store i64 %[[PGOCOUNTINC]], ptr %[[PROFC_ADDR]] +; RELOCOPT-NEXT: %[[PROFC_BIAS1:.+]] = add i64 ptrtoint (ptr @__profc_bar to i64), %[[BIAS]] +; RELOCOPT-NEXT: %[[PROFC_ADDR1:.+]] = inttoptr i64 %[[PROFC_BIAS1]] to ptr +; RELOCOPT-NEXT: %[[PGOCOUNT1:.+]] = load i64, ptr %[[PROFC_ADDR1]] +; RELOCOPT-NEXT: %[[PGOCOUNTINC1:.+]] = add i64 %[[PGOCOUNT1]], 1 +; RELOCOPT-NEXT: store i64 %[[PGOCOUNTINC1]], ptr %[[PROFC_ADDR1]] +; ATOMIC-LABEL: define void @foo +; ATOMIC-NEXT: %[[BIAS:.+]] = load i64, ptr @__llvm_profile_counter_bias, align {{[0-9]+}}, !invariant.load !0 +; ATOMIC-NEXT: %[[PROFC_BIAS:.+]] = add i64 ptrtoint (ptr @__profc_foo to i64), %[[BIAS]] +; ATOMIC-NEXT: %[[PROFC_ADDR:.+]] = inttoptr i64 %[[PROFC_BIAS]] to ptr +; ATOMIC-NEXT: %[[PGOCOUNTINC:.+]] = atomicrmw add ptr %[[PROFC_ADDR]], i64 1 monotonic +; ATOMICOPT-NEXT: %[[PROFC_BIAS1:.+]] = add i64 ptrtoint (ptr @__profc_bar to i64), %[[BIAS]] +; ATOMICOPT-NEXT: %[[PROFC_ADDR1:.+]] = inttoptr i64 %[[PROFC_BIAS1]] to ptr +; ATOMICOPT-NEXT: %[[PGOCOUNTINC1:.+]] = atomicrmw add ptr %[[PROFC_ADDR1]], i64 1 monotonic + +define void @bar() { + call void @llvm.instrprof.increment(ptr @__profn_bar, i64 0, i32 1, i32 0) + ret void +} + define void @foo() { call void @llvm.instrprof.increment(ptr @__profn_foo, i64 0, i32 1, i32 0) + call void @bar() ret void } diff --git a/llvm/test/MC/AsmParser/directive_abort.s b/llvm/test/MC/AsmParser/directive_abort.s index 86e6267..f4dda22 100644 --- a/llvm/test/MC/AsmParser/directive_abort.s +++ b/llvm/test/MC/AsmParser/directive_abort.s @@ -1,6 +1,9 @@ -# RUN: not llvm-mc -triple i386-unknown-unknown %s 2> %t -# RUN: FileCheck -input-file %t %s +// RUN: not llvm-mc -filetype=obj -triple x86_64 %s 2>&1 -o /dev/null | FileCheck %s -# CHECK: error: .abort 'please stop assembing' -TEST0: - .abort please stop assembing +.abort +// CHECK: [[#@LINE-1]]:1: error: .abort detected. Assembly stopping +// CHECK-NEXT: abort + +.abort "abort message" +// CHECK: [[#@LINE-1]]:1: error: .abort '"abort message"' detected. Assembly stopping +// CHECK-NEXT: abort diff --git a/llvm/test/MC/X86/x86-32-coverage.s b/llvm/test/MC/X86/x86-32-coverage.s index fbe2714..5475946 100644 --- a/llvm/test/MC/X86/x86-32-coverage.s +++ b/llvm/test/MC/X86/x86-32-coverage.s @@ -10790,7 +10790,7 @@ btcl $4, (%eax) movdir64b 485498096, %ecx // CHECK: movdir64b 485498096, %cx -// CHECK: # encoding: [0x67,0x66,0x0f,0x38,0xf8,0x0d,0xf0,0x1c,0xf0,0x1c] +// CHECK: # encoding: [0x67,0x66,0x0f,0x38,0xf8,0x0e,0xf0,0x1c] movdir64b 485498096, %cx // CHECK: movdir64b (%edx), %eax @@ -10877,6 +10877,10 @@ enqcmd (%bx,%di), %di // CHECK: encoding: [0x67,0xf2,0x0f,0x38,0xf8,0x81,0xc0,0x1f] enqcmd 8128(%bx,%di), %ax +// CHECK: enqcmd 485498096, %cx +// CHECK: encoding: [0x67,0xf2,0x0f,0x38,0xf8,0x0e,0xf0,0x1c] +enqcmd 485498096, %cx + // CHECK: enqcmds (%bx,%di), %di // CHECK: encoding: [0x67,0xf3,0x0f,0x38,0xf8,0x39] enqcmds (%bx,%di), %di @@ -10885,6 +10889,10 @@ enqcmds (%bx,%di), %di // CHECK: encoding: [0x67,0xf3,0x0f,0x38,0xf8,0x81,0xc0,0x1f] enqcmds 8128(%bx,%di), %ax +// CHECK: enqcmds 485498096, %cx +// CHECK: encoding: [0x67,0xf3,0x0f,0x38,0xf8,0x0e,0xf0,0x1c] +enqcmds 485498096, %cx + // CHECK: serialize // CHECK: encoding: [0x0f,0x01,0xe8] serialize diff --git a/llvm/test/Transforms/HotColdSplit/pr40056.ll b/llvm/test/Transforms/HotColdSplit/pr40056.ll new file mode 100644 index 0000000..950b62c --- /dev/null +++ b/llvm/test/Transforms/HotColdSplit/pr40056.ll @@ -0,0 +1,72 @@ +; RUN: opt -passes=hotcoldsplit -hotcoldsplit-threshold=-1 -S < %s | FileCheck %s +; Hot cold splitting should not outline: +; 1. Basic blocks with token type instructions +; 2. Functions with scoped EH personality + +target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-pc-windows-msvc19.0.0" + +; CHECK-LABEL: define {{.*}}@with_funclet +; CHECK-NOT: with_funclet.cold +define void @with_funclet() personality ptr @__CxxFrameHandler3 { +entry: + invoke void @fYAXXZ() + to label %normal unwind label %exception + +normal: ; preds = %entry + ret void + +exception: ; preds = %entry + %0 = cleanuppad within none [] + call void @terminateYAXXZ() [ "funclet"(token %0) ] + br label %continueexception + +continueexception: ; preds = %exception + ret void +} + +; CHECK-LABEL: define {{.*}}@with_personality +; CHECK-NOT: with_personality.cold +define void @with_personality(i32 %cond) personality ptr @__CxxFrameHandler3 { +entry: + %cond.addr = alloca i32 + store i32 %cond, ptr %cond.addr + %0 = load i32, ptr %cond.addr + %tobool = icmp ne i32 %0, 0 + br i1 %tobool, label %if.then, label %if.end2 + +if.then: ; preds = %entry + %1 = load i32, ptr %cond.addr + %cmp = icmp sgt i32 %1, 10 + br i1 %cmp, label %if.then1, label %if.else + +if.then1: ; preds = %if.then + call void @sideeffect(i32 0) + br label %if.end + +if.else: ; preds = %if.then + call void @sideeffect(i32 1) + br label %if.end + +if.end: ; preds = %if.else, %if.then1 + call void (...) @sink() + ret void + +if.end2: ; preds = %entry + call void @sideeffect(i32 2) + ret void +} + +declare i32 @__CxxFrameHandler3(...) + +declare void @fYAXXZ() + +declare void @bar() #0 + +declare void @terminateYAXXZ() + +declare void @sideeffect(i32) + +declare void @sink(...) #0 + +attributes #0 = { cold } diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/pr73894.ll b/llvm/test/Transforms/LoopVectorize/AArch64/pr73894.ll index 5116a85..a70eafb 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/pr73894.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/pr73894.ll @@ -32,7 +32,6 @@ define i32 @pr70988(ptr %src, i32 %n) { ; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]] ; CHECK: pred.load.continue: -; CHECK-NEXT: [[TMP7:%.*]] = phi ptr [ poison, [[VECTOR_BODY]] ], [ [[TMP5]], [[PRED_LOAD_IF]] ] ; CHECK-NEXT: [[TMP8:%.*]] = phi i32 [ poison, [[VECTOR_BODY]] ], [ [[TMP6]], [[PRED_LOAD_IF]] ] ; CHECK-NEXT: br i1 [[ACTIVE_LANE_MASK2]], label [[PRED_LOAD_IF4:%.*]], label [[PRED_LOAD_CONTINUE5]] ; CHECK: pred.load.if4: @@ -42,7 +41,6 @@ define i32 @pr70988(ptr %src, i32 %n) { ; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE5]] ; CHECK: pred.load.continue5: -; CHECK-NEXT: [[TMP13:%.*]] = phi ptr [ poison, [[PRED_LOAD_CONTINUE]] ], [ [[TMP11]], [[PRED_LOAD_IF4]] ] ; CHECK-NEXT: [[TMP14:%.*]] = phi i32 [ poison, [[PRED_LOAD_CONTINUE]] ], [ [[TMP12]], [[PRED_LOAD_IF4]] ] ; CHECK-NEXT: [[TMP15:%.*]] = tail call i32 @llvm.smax.i32(i32 [[TMP8]], i32 [[VEC_PHI]]) ; CHECK-NEXT: [[TMP16:%.*]] = tail call i32 @llvm.smax.i32(i32 [[TMP14]], i32 [[VEC_PHI3]]) diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/vplan-force-tail-with-evl.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/vplan-force-tail-with-evl.ll index 7f258d5..0b8a2d2 100644 --- a/llvm/test/Transforms/LoopVectorize/PowerPC/vplan-force-tail-with-evl.ll +++ b/llvm/test/Transforms/LoopVectorize/PowerPC/vplan-force-tail-with-evl.ll @@ -40,8 +40,6 @@ define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { ; CHECK-NEXT: Successor(s): pred.store.continue ; CHECK-EMPTY: ; CHECK-NEXT: pred.store.continue: -; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<[[P1:%.+]]> = ir<%0> -; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<[[P2:%.+]]> = ir<%1> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): for.body.2 diff --git a/llvm/test/Transforms/LoopVectorize/X86/divs-with-tail-folding.ll b/llvm/test/Transforms/LoopVectorize/X86/divs-with-tail-folding.ll index 42a9ab0..133510f 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/divs-with-tail-folding.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/divs-with-tail-folding.ll @@ -288,9 +288,7 @@ define void @udiv_urem_feeding_gep(i64 %x, ptr %dst, i64 %N) { ; CHECK-NEXT: br label %[[PRED_UREM_CONTINUE]] ; CHECK: [[PRED_UREM_CONTINUE]]: ; CHECK-NEXT: [[TMP15:%.*]] = phi <4 x i64> [ poison, %[[VECTOR_BODY]] ], [ [[TMP9]], %[[PRED_UREM_IF]] ] -; CHECK-NEXT: [[TMP16:%.*]] = phi i64 [ poison, %[[VECTOR_BODY]] ], [ [[TMP10]], %[[PRED_UREM_IF]] ] ; CHECK-NEXT: [[TMP17:%.*]] = phi i64 [ poison, %[[VECTOR_BODY]] ], [ [[TMP11]], %[[PRED_UREM_IF]] ] -; CHECK-NEXT: [[TMP18:%.*]] = phi i64 [ poison, %[[VECTOR_BODY]] ], [ [[TMP12]], %[[PRED_UREM_IF]] ] ; CHECK-NEXT: [[TMP19:%.*]] = phi i64 [ poison, %[[VECTOR_BODY]] ], [ [[TMP13]], %[[PRED_UREM_IF]] ] ; CHECK-NEXT: [[TMP20:%.*]] = phi i64 [ poison, %[[VECTOR_BODY]] ], [ [[TMP14]], %[[PRED_UREM_IF]] ] ; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x i1> [[TMP5]], i32 1 @@ -307,9 +305,7 @@ define void @udiv_urem_feeding_gep(i64 %x, ptr %dst, i64 %N) { ; CHECK-NEXT: br label %[[PRED_UREM_CONTINUE2]] ; CHECK: [[PRED_UREM_CONTINUE2]]: ; CHECK-NEXT: [[TMP30:%.*]] = phi <4 x i64> [ [[TMP15]], %[[PRED_UREM_CONTINUE]] ], [ [[TMP24]], %[[PRED_UREM_IF1]] ] -; CHECK-NEXT: [[TMP31:%.*]] = phi i64 [ poison, %[[PRED_UREM_CONTINUE]] ], [ [[TMP25]], %[[PRED_UREM_IF1]] ] ; CHECK-NEXT: [[TMP32:%.*]] = phi i64 [ poison, %[[PRED_UREM_CONTINUE]] ], [ [[TMP26]], %[[PRED_UREM_IF1]] ] -; CHECK-NEXT: [[TMP33:%.*]] = phi i64 [ poison, %[[PRED_UREM_CONTINUE]] ], [ [[TMP27]], %[[PRED_UREM_IF1]] ] ; CHECK-NEXT: [[TMP34:%.*]] = phi i64 [ poison, %[[PRED_UREM_CONTINUE]] ], [ [[TMP28]], %[[PRED_UREM_IF1]] ] ; CHECK-NEXT: [[TMP35:%.*]] = phi i64 [ poison, %[[PRED_UREM_CONTINUE]] ], [ [[TMP29]], %[[PRED_UREM_IF1]] ] ; CHECK-NEXT: [[TMP36:%.*]] = extractelement <4 x i1> [[TMP5]], i32 2 @@ -326,9 +322,7 @@ define void @udiv_urem_feeding_gep(i64 %x, ptr %dst, i64 %N) { ; CHECK-NEXT: br label %[[PRED_UREM_CONTINUE4]] ; CHECK: [[PRED_UREM_CONTINUE4]]: ; CHECK-NEXT: [[TMP45:%.*]] = phi <4 x i64> [ [[TMP30]], %[[PRED_UREM_CONTINUE2]] ], [ [[TMP39]], %[[PRED_UREM_IF3]] ] -; CHECK-NEXT: [[TMP46:%.*]] = phi i64 [ poison, %[[PRED_UREM_CONTINUE2]] ], [ [[TMP40]], %[[PRED_UREM_IF3]] ] ; CHECK-NEXT: [[TMP47:%.*]] = phi i64 [ poison, %[[PRED_UREM_CONTINUE2]] ], [ [[TMP41]], %[[PRED_UREM_IF3]] ] -; CHECK-NEXT: [[TMP48:%.*]] = phi i64 [ poison, %[[PRED_UREM_CONTINUE2]] ], [ [[TMP42]], %[[PRED_UREM_IF3]] ] ; CHECK-NEXT: [[TMP49:%.*]] = phi i64 [ poison, %[[PRED_UREM_CONTINUE2]] ], [ [[TMP43]], %[[PRED_UREM_IF3]] ] ; CHECK-NEXT: [[TMP50:%.*]] = phi i64 [ poison, %[[PRED_UREM_CONTINUE2]] ], [ [[TMP44]], %[[PRED_UREM_IF3]] ] ; CHECK-NEXT: [[TMP51:%.*]] = extractelement <4 x i1> [[TMP5]], i32 3 @@ -345,9 +339,7 @@ define void @udiv_urem_feeding_gep(i64 %x, ptr %dst, i64 %N) { ; CHECK-NEXT: br label %[[PRED_UREM_CONTINUE6]] ; CHECK: [[PRED_UREM_CONTINUE6]]: ; CHECK-NEXT: [[TMP60:%.*]] = phi <4 x i64> [ [[TMP45]], %[[PRED_UREM_CONTINUE4]] ], [ [[TMP54]], %[[PRED_UREM_IF5]] ] -; CHECK-NEXT: [[TMP61:%.*]] = phi i64 [ poison, %[[PRED_UREM_CONTINUE4]] ], [ [[TMP55]], %[[PRED_UREM_IF5]] ] ; CHECK-NEXT: [[TMP62:%.*]] = phi i64 [ poison, %[[PRED_UREM_CONTINUE4]] ], [ [[TMP56]], %[[PRED_UREM_IF5]] ] -; CHECK-NEXT: [[TMP63:%.*]] = phi i64 [ poison, %[[PRED_UREM_CONTINUE4]] ], [ [[TMP57]], %[[PRED_UREM_IF5]] ] ; CHECK-NEXT: [[TMP64:%.*]] = phi i64 [ poison, %[[PRED_UREM_CONTINUE4]] ], [ [[TMP58]], %[[PRED_UREM_IF5]] ] ; CHECK-NEXT: [[TMP65:%.*]] = phi i64 [ poison, %[[PRED_UREM_CONTINUE4]] ], [ [[TMP59]], %[[PRED_UREM_IF5]] ] ; CHECK-NEXT: [[TMP66:%.*]] = extractelement <4 x i64> [[TMP60]], i32 0 diff --git a/llvm/test/Transforms/LoopVectorize/X86/x86-predication.ll b/llvm/test/Transforms/LoopVectorize/X86/x86-predication.ll index feaa5fa2..eee1b6f 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/x86-predication.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/x86-predication.ll @@ -238,7 +238,6 @@ define i32 @scalarize_and_sink_gather(ptr %a, i1 %c, i32 %x, i64 %n) { ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> poison, i32 [[TMP5]], i32 0 ; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE]] ; CHECK: pred.udiv.continue: -; CHECK-NEXT: [[TMP7:%.*]] = phi i32 [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_UDIV_IF]] ] ; CHECK-NEXT: [[TMP8:%.*]] = phi <2 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP6]], [[PRED_UDIV_IF]] ] ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[BROADCAST_SPLAT]], i32 1 ; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_UDIV_IF1:%.*]], label [[PRED_UDIV_CONTINUE2]] @@ -250,7 +249,6 @@ define i32 @scalarize_and_sink_gather(ptr %a, i1 %c, i32 %x, i64 %n) { ; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP8]], i32 [[TMP13]], i32 1 ; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE2]] ; CHECK: pred.udiv.continue2: -; CHECK-NEXT: [[TMP15:%.*]] = phi i32 [ poison, [[PRED_UDIV_CONTINUE]] ], [ [[TMP12]], [[PRED_UDIV_IF1]] ] ; CHECK-NEXT: [[TMP16:%.*]] = phi <2 x i32> [ [[TMP8]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP14]], [[PRED_UDIV_IF1]] ] ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[BROADCAST_SPLAT]], <2 x i32> [[TMP16]], <2 x i32> [[BROADCAST_SPLAT4]] ; CHECK-NEXT: [[TMP18]] = add <2 x i32> [[VEC_PHI]], [[PREDPHI]] @@ -314,7 +312,6 @@ define i32 @scalarize_and_sink_gather(ptr %a, i1 %c, i32 %x, i64 %n) { ; SINK-GATHER-NEXT: [[TMP6:%.*]] = insertelement <8 x i32> poison, i32 [[TMP5]], i32 0 ; SINK-GATHER-NEXT: br label [[PRED_UDIV_CONTINUE]] ; SINK-GATHER: pred.udiv.continue: -; SINK-GATHER-NEXT: [[TMP7:%.*]] = phi i32 [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_UDIV_IF]] ] ; SINK-GATHER-NEXT: [[TMP8:%.*]] = phi <8 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP6]], [[PRED_UDIV_IF]] ] ; SINK-GATHER-NEXT: [[TMP9:%.*]] = extractelement <8 x i1> [[BROADCAST_SPLAT]], i32 1 ; SINK-GATHER-NEXT: br i1 [[TMP9]], label [[PRED_UDIV_IF1:%.*]], label [[PRED_UDIV_CONTINUE2:%.*]] @@ -326,7 +323,6 @@ define i32 @scalarize_and_sink_gather(ptr %a, i1 %c, i32 %x, i64 %n) { ; SINK-GATHER-NEXT: [[TMP14:%.*]] = insertelement <8 x i32> [[TMP8]], i32 [[TMP13]], i32 1 ; SINK-GATHER-NEXT: br label [[PRED_UDIV_CONTINUE2]] ; SINK-GATHER: pred.udiv.continue2: -; SINK-GATHER-NEXT: [[TMP15:%.*]] = phi i32 [ poison, [[PRED_UDIV_CONTINUE]] ], [ [[TMP12]], [[PRED_UDIV_IF1]] ] ; SINK-GATHER-NEXT: [[TMP16:%.*]] = phi <8 x i32> [ [[TMP8]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP14]], [[PRED_UDIV_IF1]] ] ; SINK-GATHER-NEXT: [[TMP17:%.*]] = extractelement <8 x i1> [[BROADCAST_SPLAT]], i32 2 ; SINK-GATHER-NEXT: br i1 [[TMP17]], label [[PRED_UDIV_IF3:%.*]], label [[PRED_UDIV_CONTINUE4:%.*]] @@ -338,7 +334,6 @@ define i32 @scalarize_and_sink_gather(ptr %a, i1 %c, i32 %x, i64 %n) { ; SINK-GATHER-NEXT: [[TMP22:%.*]] = insertelement <8 x i32> [[TMP16]], i32 [[TMP21]], i32 2 ; SINK-GATHER-NEXT: br label [[PRED_UDIV_CONTINUE4]] ; SINK-GATHER: pred.udiv.continue4: -; SINK-GATHER-NEXT: [[TMP23:%.*]] = phi i32 [ poison, [[PRED_UDIV_CONTINUE2]] ], [ [[TMP20]], [[PRED_UDIV_IF3]] ] ; SINK-GATHER-NEXT: [[TMP24:%.*]] = phi <8 x i32> [ [[TMP16]], [[PRED_UDIV_CONTINUE2]] ], [ [[TMP22]], [[PRED_UDIV_IF3]] ] ; SINK-GATHER-NEXT: [[TMP25:%.*]] = extractelement <8 x i1> [[BROADCAST_SPLAT]], i32 3 ; SINK-GATHER-NEXT: br i1 [[TMP25]], label [[PRED_UDIV_IF5:%.*]], label [[PRED_UDIV_CONTINUE6:%.*]] @@ -350,7 +345,6 @@ define i32 @scalarize_and_sink_gather(ptr %a, i1 %c, i32 %x, i64 %n) { ; SINK-GATHER-NEXT: [[TMP30:%.*]] = insertelement <8 x i32> [[TMP24]], i32 [[TMP29]], i32 3 ; SINK-GATHER-NEXT: br label [[PRED_UDIV_CONTINUE6]] ; SINK-GATHER: pred.udiv.continue6: -; SINK-GATHER-NEXT: [[TMP31:%.*]] = phi i32 [ poison, [[PRED_UDIV_CONTINUE4]] ], [ [[TMP28]], [[PRED_UDIV_IF5]] ] ; SINK-GATHER-NEXT: [[TMP32:%.*]] = phi <8 x i32> [ [[TMP24]], [[PRED_UDIV_CONTINUE4]] ], [ [[TMP30]], [[PRED_UDIV_IF5]] ] ; SINK-GATHER-NEXT: [[TMP33:%.*]] = extractelement <8 x i1> [[BROADCAST_SPLAT]], i32 4 ; SINK-GATHER-NEXT: br i1 [[TMP33]], label [[PRED_UDIV_IF7:%.*]], label [[PRED_UDIV_CONTINUE8:%.*]] @@ -362,7 +356,6 @@ define i32 @scalarize_and_sink_gather(ptr %a, i1 %c, i32 %x, i64 %n) { ; SINK-GATHER-NEXT: [[TMP38:%.*]] = insertelement <8 x i32> [[TMP32]], i32 [[TMP37]], i32 4 ; SINK-GATHER-NEXT: br label [[PRED_UDIV_CONTINUE8]] ; SINK-GATHER: pred.udiv.continue8: -; SINK-GATHER-NEXT: [[TMP39:%.*]] = phi i32 [ poison, [[PRED_UDIV_CONTINUE6]] ], [ [[TMP36]], [[PRED_UDIV_IF7]] ] ; SINK-GATHER-NEXT: [[TMP40:%.*]] = phi <8 x i32> [ [[TMP32]], [[PRED_UDIV_CONTINUE6]] ], [ [[TMP38]], [[PRED_UDIV_IF7]] ] ; SINK-GATHER-NEXT: [[TMP41:%.*]] = extractelement <8 x i1> [[BROADCAST_SPLAT]], i32 5 ; SINK-GATHER-NEXT: br i1 [[TMP41]], label [[PRED_UDIV_IF9:%.*]], label [[PRED_UDIV_CONTINUE10:%.*]] @@ -374,7 +367,6 @@ define i32 @scalarize_and_sink_gather(ptr %a, i1 %c, i32 %x, i64 %n) { ; SINK-GATHER-NEXT: [[TMP46:%.*]] = insertelement <8 x i32> [[TMP40]], i32 [[TMP45]], i32 5 ; SINK-GATHER-NEXT: br label [[PRED_UDIV_CONTINUE10]] ; SINK-GATHER: pred.udiv.continue10: -; SINK-GATHER-NEXT: [[TMP47:%.*]] = phi i32 [ poison, [[PRED_UDIV_CONTINUE8]] ], [ [[TMP44]], [[PRED_UDIV_IF9]] ] ; SINK-GATHER-NEXT: [[TMP48:%.*]] = phi <8 x i32> [ [[TMP40]], [[PRED_UDIV_CONTINUE8]] ], [ [[TMP46]], [[PRED_UDIV_IF9]] ] ; SINK-GATHER-NEXT: [[TMP49:%.*]] = extractelement <8 x i1> [[BROADCAST_SPLAT]], i32 6 ; SINK-GATHER-NEXT: br i1 [[TMP49]], label [[PRED_UDIV_IF11:%.*]], label [[PRED_UDIV_CONTINUE12:%.*]] @@ -386,7 +378,6 @@ define i32 @scalarize_and_sink_gather(ptr %a, i1 %c, i32 %x, i64 %n) { ; SINK-GATHER-NEXT: [[TMP54:%.*]] = insertelement <8 x i32> [[TMP48]], i32 [[TMP53]], i32 6 ; SINK-GATHER-NEXT: br label [[PRED_UDIV_CONTINUE12]] ; SINK-GATHER: pred.udiv.continue12: -; SINK-GATHER-NEXT: [[TMP55:%.*]] = phi i32 [ poison, [[PRED_UDIV_CONTINUE10]] ], [ [[TMP52]], [[PRED_UDIV_IF11]] ] ; SINK-GATHER-NEXT: [[TMP56:%.*]] = phi <8 x i32> [ [[TMP48]], [[PRED_UDIV_CONTINUE10]] ], [ [[TMP54]], [[PRED_UDIV_IF11]] ] ; SINK-GATHER-NEXT: [[TMP57:%.*]] = extractelement <8 x i1> [[BROADCAST_SPLAT]], i32 7 ; SINK-GATHER-NEXT: br i1 [[TMP57]], label [[PRED_UDIV_IF13:%.*]], label [[PRED_UDIV_CONTINUE14]] @@ -398,7 +389,6 @@ define i32 @scalarize_and_sink_gather(ptr %a, i1 %c, i32 %x, i64 %n) { ; SINK-GATHER-NEXT: [[TMP62:%.*]] = insertelement <8 x i32> [[TMP56]], i32 [[TMP61]], i32 7 ; SINK-GATHER-NEXT: br label [[PRED_UDIV_CONTINUE14]] ; SINK-GATHER: pred.udiv.continue14: -; SINK-GATHER-NEXT: [[TMP63:%.*]] = phi i32 [ poison, [[PRED_UDIV_CONTINUE12]] ], [ [[TMP60]], [[PRED_UDIV_IF13]] ] ; SINK-GATHER-NEXT: [[TMP64:%.*]] = phi <8 x i32> [ [[TMP56]], [[PRED_UDIV_CONTINUE12]] ], [ [[TMP62]], [[PRED_UDIV_IF13]] ] ; SINK-GATHER-NEXT: [[PREDPHI:%.*]] = select <8 x i1> [[BROADCAST_SPLAT]], <8 x i32> [[TMP64]], <8 x i32> [[BROADCAST_SPLAT16]] ; SINK-GATHER-NEXT: [[TMP66]] = add <8 x i32> [[VEC_PHI]], [[PREDPHI]] diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll index bff730f..048b670 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll @@ -60,7 +60,6 @@ define void @sink_replicate_region_1(i32 %x, ptr %ptr, ptr noalias %dst) optsize ; CHECK-NEXT: Successor(s): pred.store.continue ; CHECK-EMPTY: ; CHECK-NEXT: pred.store.continue: -; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<[[PRED2:%.+]]> = ir<%rem> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): loop.2 @@ -143,7 +142,6 @@ define void @sink_replicate_region_2(i32 %x, i8 %y, ptr %ptr) optsize { ; CHECK-NEXT: Successor(s): pred.store.continue ; CHECK-EMPTY: ; CHECK-NEXT: pred.store.continue: -; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<[[PRED:%.+]]> = ir<%rem> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): loop.1 @@ -332,8 +330,6 @@ define void @sink_replicate_region_4_requires_split_at_end_of_block(i32 %x, ptr ; CHECK-NEXT: Successor(s): pred.store.continue ; CHECK-EMPTY: ; CHECK: pred.store.continue: -; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<[[PRED1:%.+]]> = ir<%rem> -; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<[[PRED2:%.+]]> = ir<%lv.2> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): loop.3 @@ -426,8 +422,6 @@ define void @sink_replicate_region_after_replicate_region(ptr %ptr, ptr noalias ; CHECK-NEXT: Successor(s): pred.store.continue ; CHECK-EMPTY: ; CHECK-NEXT: pred.store.continue: -; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<[[PRED:%.+]]> = ir<%rem> -; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<[[PRED2:%.+]]> = ir<%rem.div> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): loop.3 @@ -510,7 +504,6 @@ define void @need_new_block_after_sinking_pr56146(i32 %x, ptr %src, ptr noalias ; CHECK-NEXT: Successor(s): pred.store.continue ; CHECK-EMPTY: ; CHECK-NEXT: pred.store.continue: -; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<[[P_VAL:%.+]]> = ir<%val> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): loop.1 diff --git a/llvm/test/Transforms/LoopVectorize/if-pred-non-void.ll b/llvm/test/Transforms/LoopVectorize/if-pred-non-void.ll index eade22f..ecb57c5 100644 --- a/llvm/test/Transforms/LoopVectorize/if-pred-non-void.ll +++ b/llvm/test/Transforms/LoopVectorize/if-pred-non-void.ll @@ -388,7 +388,6 @@ define void @test_scalar2scalar(ptr nocapture %asd, ptr nocapture %bsd) { ; CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x i32> poison, i32 [[TMP12]], i32 0 ; CHECK-NEXT: br label [[PRED_SDIV_CONTINUE]] ; CHECK: pred.sdiv.continue: -; CHECK-NEXT: [[TMP14:%.*]] = phi i32 [ poison, [[VECTOR_BODY]] ], [ [[TMP10]], [[PRED_SDIV_IF]] ] ; CHECK-NEXT: [[TMP15:%.*]] = phi <2 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP13]], [[PRED_SDIV_IF]] ] ; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x i1> [[TMP6]], i32 1 ; CHECK-NEXT: br i1 [[TMP16]], label [[PRED_SDIV_IF3:%.*]], label [[PRED_SDIV_CONTINUE4]] @@ -401,7 +400,6 @@ define void @test_scalar2scalar(ptr nocapture %asd, ptr nocapture %bsd) { ; CHECK-NEXT: [[TMP22:%.*]] = insertelement <2 x i32> [[TMP15]], i32 [[TMP21]], i32 1 ; CHECK-NEXT: br label [[PRED_SDIV_CONTINUE4]] ; CHECK: pred.sdiv.continue4: -; CHECK-NEXT: [[TMP23:%.*]] = phi i32 [ poison, [[PRED_SDIV_CONTINUE]] ], [ [[TMP19]], [[PRED_SDIV_IF3]] ] ; CHECK-NEXT: [[TMP24:%.*]] = phi <2 x i32> [ [[TMP15]], [[PRED_SDIV_CONTINUE]] ], [ [[TMP22]], [[PRED_SDIV_IF3]] ] ; CHECK-NEXT: [[TMP25:%.*]] = xor <2 x i1> [[TMP6]], <i1 true, i1 true> ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP25]], <2 x i32> [[TMP5]], <2 x i32> [[TMP24]] @@ -466,7 +464,6 @@ define void @test_scalar2scalar(ptr nocapture %asd, ptr nocapture %bsd) { ; UNROLL-NO-VF-NEXT: [[TMP15:%.*]] = sdiv i32 [[TMP8]], [[TMP14]] ; UNROLL-NO-VF-NEXT: br label [[PRED_SDIV_CONTINUE]] ; UNROLL-NO-VF: pred.sdiv.continue: -; UNROLL-NO-VF-NEXT: [[TMP16:%.*]] = phi i32 [ poison, [[VECTOR_BODY]] ], [ [[TMP14]], [[PRED_SDIV_IF]] ] ; UNROLL-NO-VF-NEXT: [[TMP17:%.*]] = phi i32 [ poison, [[VECTOR_BODY]] ], [ [[TMP15]], [[PRED_SDIV_IF]] ] ; UNROLL-NO-VF-NEXT: br i1 [[TMP13]], label [[PRED_SDIV_IF2:%.*]], label [[PRED_SDIV_CONTINUE3]] ; UNROLL-NO-VF: pred.sdiv.if2: @@ -474,7 +471,6 @@ define void @test_scalar2scalar(ptr nocapture %asd, ptr nocapture %bsd) { ; UNROLL-NO-VF-NEXT: [[TMP19:%.*]] = sdiv i32 [[TMP9]], [[TMP18]] ; UNROLL-NO-VF-NEXT: br label [[PRED_SDIV_CONTINUE3]] ; UNROLL-NO-VF: pred.sdiv.continue3: -; UNROLL-NO-VF-NEXT: [[TMP20:%.*]] = phi i32 [ poison, [[PRED_SDIV_CONTINUE]] ], [ [[TMP18]], [[PRED_SDIV_IF2]] ] ; UNROLL-NO-VF-NEXT: [[TMP21:%.*]] = phi i32 [ poison, [[PRED_SDIV_CONTINUE]] ], [ [[TMP19]], [[PRED_SDIV_IF2]] ] ; UNROLL-NO-VF-NEXT: [[TMP22:%.*]] = xor i1 [[TMP12]], true ; UNROLL-NO-VF-NEXT: [[TMP23:%.*]] = xor i1 [[TMP13]], true @@ -577,7 +573,6 @@ define void @pr30172(ptr nocapture %asd, ptr nocapture %bsd) !dbg !5 {; ; CHECK-NEXT: [[TMP17:%.*]] = insertelement <2 x i32> poison, i32 [[TMP16]], i32 0 ; CHECK-NEXT: br label [[PRED_SDIV_CONTINUE]] ; CHECK: pred.sdiv.continue: -; CHECK-NEXT: [[TMP18:%.*]] = phi i32 [ poison, [[VECTOR_BODY]] ], [ [[TMP14]], [[PRED_SDIV_IF]] ] ; CHECK-NEXT: [[TMP19:%.*]] = phi <2 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP17]], [[PRED_SDIV_IF]] ] ; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x i1> [[TMP10]], i32 1 ; CHECK-NEXT: br i1 [[TMP20]], label [[PRED_SDIV_IF3:%.*]], label [[PRED_SDIV_CONTINUE4]] @@ -590,7 +585,6 @@ define void @pr30172(ptr nocapture %asd, ptr nocapture %bsd) !dbg !5 {; ; CHECK-NEXT: [[TMP26:%.*]] = insertelement <2 x i32> [[TMP19]], i32 [[TMP25]], i32 1 ; CHECK-NEXT: br label [[PRED_SDIV_CONTINUE4]] ; CHECK: pred.sdiv.continue4: -; CHECK-NEXT: [[TMP27:%.*]] = phi i32 [ poison, [[PRED_SDIV_CONTINUE]] ], [ [[TMP23]], [[PRED_SDIV_IF3]] ] ; CHECK-NEXT: [[TMP28:%.*]] = phi <2 x i32> [ [[TMP19]], [[PRED_SDIV_CONTINUE]] ], [ [[TMP26]], [[PRED_SDIV_IF3]] ] ; CHECK-NEXT: [[TMP29:%.*]] = xor <2 x i1> [[TMP7]], <i1 true, i1 true>, !dbg [[DBG35]] ; CHECK-NEXT: [[TMP30:%.*]] = select <2 x i1> [[TMP8]], <2 x i1> [[TMP29]], <2 x i1> zeroinitializer, !dbg [[DBG35]] @@ -666,7 +660,6 @@ define void @pr30172(ptr nocapture %asd, ptr nocapture %bsd) !dbg !5 {; ; UNROLL-NO-VF-NEXT: [[TMP23:%.*]] = sdiv i32 [[TMP8]], [[TMP22]] ; UNROLL-NO-VF-NEXT: br label [[PRED_SDIV_CONTINUE]] ; UNROLL-NO-VF: pred.sdiv.continue: -; UNROLL-NO-VF-NEXT: [[TMP24:%.*]] = phi i32 [ poison, [[VECTOR_BODY]] ], [ [[TMP22]], [[PRED_SDIV_IF]] ] ; UNROLL-NO-VF-NEXT: [[TMP25:%.*]] = phi i32 [ poison, [[VECTOR_BODY]] ], [ [[TMP23]], [[PRED_SDIV_IF]] ] ; UNROLL-NO-VF-NEXT: br i1 [[TMP21]], label [[PRED_SDIV_IF2:%.*]], label [[PRED_SDIV_CONTINUE3]] ; UNROLL-NO-VF: pred.sdiv.if2: @@ -674,7 +667,6 @@ define void @pr30172(ptr nocapture %asd, ptr nocapture %bsd) !dbg !5 {; ; UNROLL-NO-VF-NEXT: [[TMP27:%.*]] = sdiv i32 [[TMP9]], [[TMP26]] ; UNROLL-NO-VF-NEXT: br label [[PRED_SDIV_CONTINUE3]] ; UNROLL-NO-VF: pred.sdiv.continue3: -; UNROLL-NO-VF-NEXT: [[TMP28:%.*]] = phi i32 [ poison, [[PRED_SDIV_CONTINUE]] ], [ [[TMP26]], [[PRED_SDIV_IF2]] ] ; UNROLL-NO-VF-NEXT: [[TMP29:%.*]] = phi i32 [ poison, [[PRED_SDIV_CONTINUE]] ], [ [[TMP27]], [[PRED_SDIV_IF2]] ] ; UNROLL-NO-VF-NEXT: [[TMP30:%.*]] = xor i1 [[TMP14]], true, !dbg [[DBG35]] ; UNROLL-NO-VF-NEXT: [[TMP31:%.*]] = xor i1 [[TMP15]], true, !dbg [[DBG35]] diff --git a/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll b/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll index 7ed42ed..2503520 100644 --- a/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll +++ b/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll @@ -96,7 +96,6 @@ declare i32 @llvm.smin.i32(i32, i32) ; DBG-NEXT: Successor(s): pred.store.continue ; DBG-EMPTY: ; DBG-NEXT: pred.store.continue: -; DBG-NEXT: PHI-PREDICATED-INSTRUCTION vp<{{.+}}> = ir<%l> ; DBG-NEXT: No successors ; DBG-NEXT: } ; DBG-NEXT: Successor(s): cond.false.1 @@ -137,7 +136,6 @@ define void @test_scalarize_with_branch_cond(ptr %src, ptr %dst) { ; CHECK-NEXT: store i32 [[TMP4]], ptr [[TMP1]], align 4 ; CHECK-NEXT: br label %pred.store.continue ; CHECK: pred.store.continue: -; CHECK-NEXT: [[TMP5:%.*]] = phi i32 [ poison, %vector.body ], [ [[TMP4]], %pred.store.if ] ; CHECK-NEXT: br i1 [[INDUCTION3]], label %pred.store.if4, label %pred.store.continue5 ; CHECK: pred.store.if4: ; CHECK-NEXT: [[INDUCTION5:%.*]] = add i64 [[INDEX]], 1 @@ -147,7 +145,6 @@ define void @test_scalarize_with_branch_cond(ptr %src, ptr %dst) { ; CHECK-NEXT: store i32 [[TMP7]], ptr [[TMP2]], align 4 ; CHECK-NEXT: br label %pred.store.continue5 ; CHECK: pred.store.continue5: -; CHECK-NEXT: [[TMP8:%.*]] = phi i32 [ poison, %pred.store.continue ], [ [[TMP7]], %pred.store.if4 ] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP9]], label %middle.block, label %vector.body diff --git a/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll b/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll index f05ec30..7c23b60 100644 --- a/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll +++ b/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll @@ -224,7 +224,6 @@ define void @load_variant(ptr noalias %a, ptr noalias %b) { ; CHECK-NEXT: store i64 [[TMP4]], ptr [[B:%.*]], align 8 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] ; CHECK: pred.store.continue: -; CHECK-NEXT: [[TMP5:%.*]] = phi i64 [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_STORE_IF]] ] ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1 ; CHECK-NEXT: br i1 [[TMP6]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2:%.*]] ; CHECK: pred.store.if1: @@ -234,7 +233,6 @@ define void @load_variant(ptr noalias %a, ptr noalias %b) { ; CHECK-NEXT: store i64 [[TMP9]], ptr [[B]], align 8 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE2]] ; CHECK: pred.store.continue2: -; CHECK-NEXT: [[TMP10:%.*]] = phi i64 [ poison, [[PRED_STORE_CONTINUE]] ], [ [[TMP9]], [[PRED_STORE_IF1]] ] ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2 ; CHECK-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]] ; CHECK: pred.store.if3: @@ -244,7 +242,6 @@ define void @load_variant(ptr noalias %a, ptr noalias %b) { ; CHECK-NEXT: store i64 [[TMP14]], ptr [[B]], align 8 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]] ; CHECK: pred.store.continue4: -; CHECK-NEXT: [[TMP15:%.*]] = phi i64 [ poison, [[PRED_STORE_CONTINUE2]] ], [ [[TMP14]], [[PRED_STORE_IF3]] ] ; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3 ; CHECK-NEXT: br i1 [[TMP16]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6]] ; CHECK: pred.store.if5: @@ -254,7 +251,6 @@ define void @load_variant(ptr noalias %a, ptr noalias %b) { ; CHECK-NEXT: store i64 [[TMP19]], ptr [[B]], align 8 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]] ; CHECK: pred.store.continue6: -; CHECK-NEXT: [[TMP20:%.*]] = phi i64 [ poison, [[PRED_STORE_CONTINUE4]] ], [ [[TMP19]], [[PRED_STORE_IF5]] ] ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4> ; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 @@ -295,7 +291,6 @@ define void @load_variant(ptr noalias %a, ptr noalias %b) { ; VF2UF2-NEXT: store i64 [[TMP5]], ptr [[B:%.*]], align 8 ; VF2UF2-NEXT: br label [[PRED_STORE_CONTINUE]] ; VF2UF2: pred.store.continue: -; VF2UF2-NEXT: [[TMP6:%.*]] = phi i64 [ poison, [[VECTOR_BODY]] ], [ [[TMP5]], [[PRED_STORE_IF]] ] ; VF2UF2-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP0]], i32 1 ; VF2UF2-NEXT: br i1 [[TMP7]], label [[PRED_STORE_IF2:%.*]], label [[PRED_STORE_CONTINUE3:%.*]] ; VF2UF2: pred.store.if2: @@ -305,7 +300,6 @@ define void @load_variant(ptr noalias %a, ptr noalias %b) { ; VF2UF2-NEXT: store i64 [[TMP10]], ptr [[B]], align 8 ; VF2UF2-NEXT: br label [[PRED_STORE_CONTINUE3]] ; VF2UF2: pred.store.continue3: -; VF2UF2-NEXT: [[TMP11:%.*]] = phi i64 [ poison, [[PRED_STORE_CONTINUE]] ], [ [[TMP10]], [[PRED_STORE_IF2]] ] ; VF2UF2-NEXT: [[TMP12:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0 ; VF2UF2-NEXT: br i1 [[TMP12]], label [[PRED_STORE_IF4:%.*]], label [[PRED_STORE_CONTINUE5:%.*]] ; VF2UF2: pred.store.if4: @@ -315,7 +309,6 @@ define void @load_variant(ptr noalias %a, ptr noalias %b) { ; VF2UF2-NEXT: store i64 [[TMP15]], ptr [[B]], align 8 ; VF2UF2-NEXT: br label [[PRED_STORE_CONTINUE5]] ; VF2UF2: pred.store.continue5: -; VF2UF2-NEXT: [[TMP16:%.*]] = phi i64 [ poison, [[PRED_STORE_CONTINUE3]] ], [ [[TMP15]], [[PRED_STORE_IF4]] ] ; VF2UF2-NEXT: [[TMP17:%.*]] = extractelement <2 x i1> [[TMP1]], i32 1 ; VF2UF2-NEXT: br i1 [[TMP17]], label [[PRED_STORE_IF6:%.*]], label [[PRED_STORE_CONTINUE7]] ; VF2UF2: pred.store.if6: @@ -325,7 +318,6 @@ define void @load_variant(ptr noalias %a, ptr noalias %b) { ; VF2UF2-NEXT: store i64 [[TMP20]], ptr [[B]], align 8 ; VF2UF2-NEXT: br label [[PRED_STORE_CONTINUE7]] ; VF2UF2: pred.store.continue7: -; VF2UF2-NEXT: [[TMP21:%.*]] = phi i64 [ poison, [[PRED_STORE_CONTINUE5]] ], [ [[TMP20]], [[PRED_STORE_IF6]] ] ; VF2UF2-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; VF2UF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], <i64 2, i64 2> ; VF2UF2-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 @@ -368,7 +360,6 @@ define void @load_variant(ptr noalias %a, ptr noalias %b) { ; VF1UF4-NEXT: store i64 [[TMP9]], ptr [[B:%.*]], align 8 ; VF1UF4-NEXT: br label [[PRED_STORE_CONTINUE]] ; VF1UF4: pred.store.continue: -; VF1UF4-NEXT: [[TMP10:%.*]] = phi i64 [ poison, [[VECTOR_BODY]] ], [ [[TMP9]], [[PRED_STORE_IF]] ] ; VF1UF4-NEXT: br i1 [[TMP5]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2:%.*]] ; VF1UF4: pred.store.if1: ; VF1UF4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]] @@ -376,7 +367,6 @@ define void @load_variant(ptr noalias %a, ptr noalias %b) { ; VF1UF4-NEXT: store i64 [[TMP12]], ptr [[B]], align 8 ; VF1UF4-NEXT: br label [[PRED_STORE_CONTINUE2]] ; VF1UF4: pred.store.continue2: -; VF1UF4-NEXT: [[TMP13:%.*]] = phi i64 [ poison, [[PRED_STORE_CONTINUE]] ], [ [[TMP12]], [[PRED_STORE_IF1]] ] ; VF1UF4-NEXT: br i1 [[TMP6]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]] ; VF1UF4: pred.store.if3: ; VF1UF4-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]] @@ -384,7 +374,6 @@ define void @load_variant(ptr noalias %a, ptr noalias %b) { ; VF1UF4-NEXT: store i64 [[TMP15]], ptr [[B]], align 8 ; VF1UF4-NEXT: br label [[PRED_STORE_CONTINUE4]] ; VF1UF4: pred.store.continue4: -; VF1UF4-NEXT: [[TMP16:%.*]] = phi i64 [ poison, [[PRED_STORE_CONTINUE2]] ], [ [[TMP15]], [[PRED_STORE_IF3]] ] ; VF1UF4-NEXT: br i1 [[TMP7]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6]] ; VF1UF4: pred.store.if5: ; VF1UF4-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]] @@ -392,7 +381,6 @@ define void @load_variant(ptr noalias %a, ptr noalias %b) { ; VF1UF4-NEXT: store i64 [[TMP18]], ptr [[B]], align 8 ; VF1UF4-NEXT: br label [[PRED_STORE_CONTINUE6]] ; VF1UF4: pred.store.continue6: -; VF1UF4-NEXT: [[TMP19:%.*]] = phi i64 [ poison, [[PRED_STORE_CONTINUE4]] ], [ [[TMP18]], [[PRED_STORE_IF5]] ] ; VF1UF4-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; VF1UF4-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 ; VF1UF4-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/select-cmp-multiuse.ll b/llvm/test/Transforms/LoopVectorize/select-cmp-multiuse.ll index 8983c80..9eb9009 100644 --- a/llvm/test/Transforms/LoopVectorize/select-cmp-multiuse.ll +++ b/llvm/test/Transforms/LoopVectorize/select-cmp-multiuse.ll @@ -510,7 +510,6 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) { ; CHECK-VF4-IC1-NEXT: store i32 [[TMP11]], ptr [[TMP9]], align 4, !alias.scope [[META9]], !noalias [[META6]] ; CHECK-VF4-IC1-NEXT: br label [[PRED_STORE_CONTINUE]] ; CHECK-VF4-IC1: pred.store.continue: -; CHECK-VF4-IC1-NEXT: [[TMP12:%.*]] = phi i32 [ poison, [[VECTOR_BODY]] ], [ [[TMP10]], [[PRED_STORE_IF]] ] ; CHECK-VF4-IC1-NEXT: [[TMP13:%.*]] = extractelement <4 x i1> [[TMP4]], i32 1 ; CHECK-VF4-IC1-NEXT: br i1 [[TMP13]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]] ; CHECK-VF4-IC1: pred.store.if3: @@ -521,7 +520,6 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) { ; CHECK-VF4-IC1-NEXT: store i32 [[TMP17]], ptr [[TMP15]], align 4, !alias.scope [[META9]], !noalias [[META6]] ; CHECK-VF4-IC1-NEXT: br label [[PRED_STORE_CONTINUE4]] ; CHECK-VF4-IC1: pred.store.continue4: -; CHECK-VF4-IC1-NEXT: [[TMP18:%.*]] = phi i32 [ poison, [[PRED_STORE_CONTINUE]] ], [ [[TMP16]], [[PRED_STORE_IF3]] ] ; CHECK-VF4-IC1-NEXT: [[TMP19:%.*]] = extractelement <4 x i1> [[TMP4]], i32 2 ; CHECK-VF4-IC1-NEXT: br i1 [[TMP19]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]] ; CHECK-VF4-IC1: pred.store.if5: @@ -532,7 +530,6 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) { ; CHECK-VF4-IC1-NEXT: store i32 [[TMP23]], ptr [[TMP21]], align 4, !alias.scope [[META9]], !noalias [[META6]] ; CHECK-VF4-IC1-NEXT: br label [[PRED_STORE_CONTINUE6]] ; CHECK-VF4-IC1: pred.store.continue6: -; CHECK-VF4-IC1-NEXT: [[TMP24:%.*]] = phi i32 [ poison, [[PRED_STORE_CONTINUE4]] ], [ [[TMP22]], [[PRED_STORE_IF5]] ] ; CHECK-VF4-IC1-NEXT: [[TMP25:%.*]] = extractelement <4 x i1> [[TMP4]], i32 3 ; CHECK-VF4-IC1-NEXT: br i1 [[TMP25]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8]] ; CHECK-VF4-IC1: pred.store.if7: @@ -543,7 +540,6 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) { ; CHECK-VF4-IC1-NEXT: store i32 [[TMP29]], ptr [[TMP27]], align 4, !alias.scope [[META9]], !noalias [[META6]] ; CHECK-VF4-IC1-NEXT: br label [[PRED_STORE_CONTINUE8]] ; CHECK-VF4-IC1: pred.store.continue8: -; CHECK-VF4-IC1-NEXT: [[TMP30:%.*]] = phi i32 [ poison, [[PRED_STORE_CONTINUE6]] ], [ [[TMP28]], [[PRED_STORE_IF7]] ] ; CHECK-VF4-IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-VF4-IC1-NEXT: [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-VF4-IC1-NEXT: br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] @@ -636,7 +632,6 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) { ; CHECK-VF4-IC2-NEXT: store i32 [[TMP18]], ptr [[TMP16]], align 4, !alias.scope [[META9]], !noalias [[META6]] ; CHECK-VF4-IC2-NEXT: br label [[PRED_STORE_CONTINUE]] ; CHECK-VF4-IC2: pred.store.continue: -; CHECK-VF4-IC2-NEXT: [[TMP19:%.*]] = phi i32 [ poison, [[VECTOR_BODY]] ], [ [[TMP17]], [[PRED_STORE_IF]] ] ; CHECK-VF4-IC2-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP7]], i32 1 ; CHECK-VF4-IC2-NEXT: br i1 [[TMP20]], label [[PRED_STORE_IF6:%.*]], label [[PRED_STORE_CONTINUE7:%.*]] ; CHECK-VF4-IC2: pred.store.if6: @@ -647,7 +642,6 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) { ; CHECK-VF4-IC2-NEXT: store i32 [[TMP24]], ptr [[TMP22]], align 4, !alias.scope [[META9]], !noalias [[META6]] ; CHECK-VF4-IC2-NEXT: br label [[PRED_STORE_CONTINUE7]] ; CHECK-VF4-IC2: pred.store.continue7: -; CHECK-VF4-IC2-NEXT: [[TMP25:%.*]] = phi i32 [ poison, [[PRED_STORE_CONTINUE]] ], [ [[TMP23]], [[PRED_STORE_IF6]] ] ; CHECK-VF4-IC2-NEXT: [[TMP26:%.*]] = extractelement <4 x i1> [[TMP7]], i32 2 ; CHECK-VF4-IC2-NEXT: br i1 [[TMP26]], label [[PRED_STORE_IF8:%.*]], label [[PRED_STORE_CONTINUE9:%.*]] ; CHECK-VF4-IC2: pred.store.if8: @@ -658,7 +652,6 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) { ; CHECK-VF4-IC2-NEXT: store i32 [[TMP30]], ptr [[TMP28]], align 4, !alias.scope [[META9]], !noalias [[META6]] ; CHECK-VF4-IC2-NEXT: br label [[PRED_STORE_CONTINUE9]] ; CHECK-VF4-IC2: pred.store.continue9: -; CHECK-VF4-IC2-NEXT: [[TMP31:%.*]] = phi i32 [ poison, [[PRED_STORE_CONTINUE7]] ], [ [[TMP29]], [[PRED_STORE_IF8]] ] ; CHECK-VF4-IC2-NEXT: [[TMP32:%.*]] = extractelement <4 x i1> [[TMP7]], i32 3 ; CHECK-VF4-IC2-NEXT: br i1 [[TMP32]], label [[PRED_STORE_IF10:%.*]], label [[PRED_STORE_CONTINUE11:%.*]] ; CHECK-VF4-IC2: pred.store.if10: @@ -669,7 +662,6 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) { ; CHECK-VF4-IC2-NEXT: store i32 [[TMP36]], ptr [[TMP34]], align 4, !alias.scope [[META9]], !noalias [[META6]] ; CHECK-VF4-IC2-NEXT: br label [[PRED_STORE_CONTINUE11]] ; CHECK-VF4-IC2: pred.store.continue11: -; CHECK-VF4-IC2-NEXT: [[TMP37:%.*]] = phi i32 [ poison, [[PRED_STORE_CONTINUE9]] ], [ [[TMP35]], [[PRED_STORE_IF10]] ] ; CHECK-VF4-IC2-NEXT: [[TMP38:%.*]] = extractelement <4 x i1> [[TMP8]], i32 0 ; CHECK-VF4-IC2-NEXT: br i1 [[TMP38]], label [[PRED_STORE_IF12:%.*]], label [[PRED_STORE_CONTINUE13:%.*]] ; CHECK-VF4-IC2: pred.store.if12: @@ -679,7 +671,6 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) { ; CHECK-VF4-IC2-NEXT: store i32 [[TMP41]], ptr [[TMP39]], align 4, !alias.scope [[META9]], !noalias [[META6]] ; CHECK-VF4-IC2-NEXT: br label [[PRED_STORE_CONTINUE13]] ; CHECK-VF4-IC2: pred.store.continue13: -; CHECK-VF4-IC2-NEXT: [[TMP42:%.*]] = phi i32 [ poison, [[PRED_STORE_CONTINUE11]] ], [ [[TMP40]], [[PRED_STORE_IF12]] ] ; CHECK-VF4-IC2-NEXT: [[TMP43:%.*]] = extractelement <4 x i1> [[TMP8]], i32 1 ; CHECK-VF4-IC2-NEXT: br i1 [[TMP43]], label [[PRED_STORE_IF14:%.*]], label [[PRED_STORE_CONTINUE15:%.*]] ; CHECK-VF4-IC2: pred.store.if14: @@ -690,7 +681,6 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) { ; CHECK-VF4-IC2-NEXT: store i32 [[TMP47]], ptr [[TMP45]], align 4, !alias.scope [[META9]], !noalias [[META6]] ; CHECK-VF4-IC2-NEXT: br label [[PRED_STORE_CONTINUE15]] ; CHECK-VF4-IC2: pred.store.continue15: -; CHECK-VF4-IC2-NEXT: [[TMP48:%.*]] = phi i32 [ poison, [[PRED_STORE_CONTINUE13]] ], [ [[TMP46]], [[PRED_STORE_IF14]] ] ; CHECK-VF4-IC2-NEXT: [[TMP49:%.*]] = extractelement <4 x i1> [[TMP8]], i32 2 ; CHECK-VF4-IC2-NEXT: br i1 [[TMP49]], label [[PRED_STORE_IF16:%.*]], label [[PRED_STORE_CONTINUE17:%.*]] ; CHECK-VF4-IC2: pred.store.if16: @@ -701,7 +691,6 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) { ; CHECK-VF4-IC2-NEXT: store i32 [[TMP53]], ptr [[TMP51]], align 4, !alias.scope [[META9]], !noalias [[META6]] ; CHECK-VF4-IC2-NEXT: br label [[PRED_STORE_CONTINUE17]] ; CHECK-VF4-IC2: pred.store.continue17: -; CHECK-VF4-IC2-NEXT: [[TMP54:%.*]] = phi i32 [ poison, [[PRED_STORE_CONTINUE15]] ], [ [[TMP52]], [[PRED_STORE_IF16]] ] ; CHECK-VF4-IC2-NEXT: [[TMP55:%.*]] = extractelement <4 x i1> [[TMP8]], i32 3 ; CHECK-VF4-IC2-NEXT: br i1 [[TMP55]], label [[PRED_STORE_IF18:%.*]], label [[PRED_STORE_CONTINUE19]] ; CHECK-VF4-IC2: pred.store.if18: @@ -712,7 +701,6 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) { ; CHECK-VF4-IC2-NEXT: store i32 [[TMP59]], ptr [[TMP57]], align 4, !alias.scope [[META9]], !noalias [[META6]] ; CHECK-VF4-IC2-NEXT: br label [[PRED_STORE_CONTINUE19]] ; CHECK-VF4-IC2: pred.store.continue19: -; CHECK-VF4-IC2-NEXT: [[TMP60:%.*]] = phi i32 [ poison, [[PRED_STORE_CONTINUE17]] ], [ [[TMP58]], [[PRED_STORE_IF18]] ] ; CHECK-VF4-IC2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; CHECK-VF4-IC2-NEXT: [[TMP61:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-VF4-IC2-NEXT: br i1 [[TMP61]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] @@ -804,7 +792,6 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) { ; CHECK-VF1-IC2-NEXT: store i32 [[TMP17]], ptr [[TMP15]], align 4, !alias.scope [[META9]], !noalias [[META6]] ; CHECK-VF1-IC2-NEXT: br label [[PRED_STORE_CONTINUE]] ; CHECK-VF1-IC2: pred.store.continue: -; CHECK-VF1-IC2-NEXT: [[TMP18:%.*]] = phi i32 [ poison, [[VECTOR_BODY]] ], [ [[TMP16]], [[PRED_STORE_IF]] ] ; CHECK-VF1-IC2-NEXT: br i1 [[TMP8]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6]] ; CHECK-VF1-IC2: pred.store.if5: ; CHECK-VF1-IC2-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP2]] @@ -813,7 +800,6 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) { ; CHECK-VF1-IC2-NEXT: store i32 [[TMP21]], ptr [[TMP19]], align 4, !alias.scope [[META9]], !noalias [[META6]] ; CHECK-VF1-IC2-NEXT: br label [[PRED_STORE_CONTINUE6]] ; CHECK-VF1-IC2: pred.store.continue6: -; CHECK-VF1-IC2-NEXT: [[TMP22:%.*]] = phi i32 [ poison, [[PRED_STORE_CONTINUE]] ], [ [[TMP20]], [[PRED_STORE_IF5]] ] ; CHECK-VF1-IC2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-VF1-IC2-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-VF1-IC2-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll index 313be09..9c07281 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll @@ -46,7 +46,6 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 ; CHECK-NEXT: Successor(s): pred.store.continue ; CHECK: pred.store.continue: -; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<[[PRED:%.+]]> = ir<%lv.b> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -768,8 +767,6 @@ define void @update_2_uses_in_same_recipe_in_merged_block(i32 %k) { ; CHECK-NEXT: Successor(s): pred.store.continue ; CHECK-EMPTY: ; CHECK-NEXT: pred.store.continue: -; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<[[PRED1:%.+]]> = ir<%lv.a> -; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<[[PRED2:%.+]]> = ir<%div> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): loop.2 @@ -854,7 +851,6 @@ define void @recipe_in_merge_candidate_used_by_first_order_recurrence(i32 %k) { ; CHECK-NEXT: Successor(s): pred.store.continue ; CHECK-EMPTY: ; CHECK-NEXT: pred.store.continue: -; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<[[PRED2:%.+]]> = ir<%div> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): loop.2 @@ -914,7 +910,6 @@ define void @update_multiple_users(ptr noalias %src, ptr noalias %dst, i1 %c) { ; CHECK-NEXT: Successor(s): pred.store.continue ; CHECK-EMPTY: ; CHECK-NEXT: pred.store.continue: -; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<[[PRED:%.+]]> = ir<%l1> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): loop.then.1 @@ -1053,7 +1048,6 @@ define void @merge_with_dead_gep_between_regions(i32 %n, ptr noalias %src, ptr n ; CHECK-NEXT: Successor(s): pred.store.continue ; CHECK-EMPTY: ; CHECK-NEXT: pred.store.continue: -; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<[[P_LOAD:%.+]]> = ir<%l> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): loop.1 diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll index c915b9a..16e9e5e 100644 --- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll +++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll @@ -397,3 +397,27 @@ entry: %ptr2 = getelementptr inbounds %struct0, ptr %ptr, i65 1, i32 3, i64 %idx, i32 1 ret ptr %ptr2 } + +; Do not extract large constant offset that cannot be folded in to PTX +; addressing mode +define void @large_offset(ptr %out, i32 %in) { +; CHECK-LABEL: define void @large_offset( +; CHECK-SAME: ptr [[OUT:%.*]], i32 [[IN:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i32 [[TMP0]], 536870912 +; CHECK-NEXT: [[IDX:%.*]] = zext nneg i32 [[ADD]] to i64 +; CHECK-NEXT: [[GETELEM:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 [[IDX]] +; CHECK-NEXT: store i32 [[IN]], ptr [[GETELEM]], align 4 +; CHECK-NEXT: ret void +; +entry: + %0 = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x() + %add = add nuw nsw i32 %0, 536870912 + %idx = zext nneg i32 %add to i64 + %getElem = getelementptr inbounds i32, ptr %out, i64 %idx + store i32 %in, ptr %getElem, align 4 + ret void +} + +declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() diff --git a/llvm/test/tools/llvm-profgen/Inputs/cmov_3.perfbin b/llvm/test/tools/llvm-profgen/Inputs/cmov_3.perfbin Binary files differnew file mode 100755 index 0000000..7a15430 --- /dev/null +++ b/llvm/test/tools/llvm-profgen/Inputs/cmov_3.perfbin diff --git a/llvm/test/tools/llvm-profgen/Inputs/cmov_3.perfscript b/llvm/test/tools/llvm-profgen/Inputs/cmov_3.perfscript new file mode 100644 index 0000000..3d29d44 --- /dev/null +++ b/llvm/test/tools/llvm-profgen/Inputs/cmov_3.perfscript @@ -0,0 +1,39 @@ + br_inst_retired.near_taken:upp: 401310 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/3//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/3//- 0x401310/0x4012f0/P/-/-/26//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/26//- 0x401310/0x4012f0/P/-/-/26//- 0x401310/0x4012f0/P/-/-/27//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/5//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/2//- 0x401310/0x4012f0/P/-/-/5//- 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/4//- 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/1//- 0x401310/0x4012f0/P/-/-/3//- 0x401310/0x4012f0/P/-/-/2//- 0x401310/0x4012f0/P/-/-/5//- 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/4//- 0x401310/0x4012f0/P/-/-/21//- 0x4012fa/0x4012ff/M/-/-/2//- 0x401310/0x4012f0/P/-/-/3//- + br_inst_retired.near_taken:upp: 401310 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/3//- 0x401310/0x4012f0/P/-/-/28//- 0x401310/0x4012f0/P/-/-/21//- 0x4012fa/0x4012ff/M/-/-/4//- 0x401310/0x4012f0/P/-/-/2//- 0x401310/0x4012f0/P/-/-/4//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/2//- 0x401310/0x4012f0/P/-/-/4//- 0x401310/0x4012f0/P/-/-/26//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/2//- 0x401310/0x4012f0/P/-/-/25//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/3//- 0x401310/0x4012f0/P/-/-/23//- 0x4012fa/0x4012ff/M/-/-/1//- 0x401310/0x4012f0/P/-/-/4//- 0x401310/0x4012f0/P/-/-/24//- 0x401310/0x4012f0/P/-/-/3//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/4//- 0x401310/0x4012f0/P/-/-/26//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/2//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/2//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- + br_inst_retired.near_taken:upp: 401310 0x401310/0x4012f0/P/-/-/21//- 0x4012fa/0x4012ff/M/-/-/3//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/2//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/3//- 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/2//- 0x401310/0x4012f0/P/-/-/27//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/2//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/2//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/2//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/26//- 0x401310/0x4012f0/P/-/-/26//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/3//- 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/1//- 0x401310/0x4012f0/P/-/-/29//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/3//- 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/2//- +br_misp_retired.all_branches:upp: 4012fa 0x4012fa/0x4012ff/M/-/-/3//- 0x401310/0x4012f0/P/-/-/26//- 0x401310/0x4012f0/P/-/-/27//- 0x401310/0x4012f0/P/-/-/24//- 0x4012fa/0x4012ff/M/-/-/1//- 0x401310/0x4012f0/P/-/-/4//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/2//- 0x401310/0x4012f0/P/-/-/25//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/2//- 0x401310/0x4012f0/P/-/-/25//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/2//- 0x401310/0x4012f0/P/-/-/27//- 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/3//- 0x401310/0x4012f0/P/-/-/26//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/2//- 0x401310/0x4012f0/P/-/-/25//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/2//- 0x401310/0x4012f0/P/-/-/27//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/2//- 0x401310/0x4012f0/P/-/-/4//- + br_inst_retired.near_taken:upp: 401310 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/1//- 0x401310/0x4012f0/P/-/-/5//- 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/3//- 0x401310/0x4012f0/P/-/-/26//- 0x401310/0x4012f0/P/-/-/26//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/5//- 0x401310/0x4012f0/P/-/-/21//- 0x4012fa/0x4012ff/M/-/-/4//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/2//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/2//- 0x401310/0x4012f0/P/-/-/25//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/3//- 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/3//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/2//- 0x401310/0x4012f0/P/-/-/27//- 0x401310/0x4012f0/P/-/-/27//- 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/4//- 0x401310/0x4012f0/P/-/-/22//- + br_inst_retired.near_taken:upp: 401310 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/2//- 0x401310/0x4012f0/P/-/-/2//- 0x4012fa/0x4012ff/P/-/-/3//- 0x401310/0x4012f0/P/-/-/23//- 0x4012fa/0x4012ff/M/-/-/1//- 0x401310/0x4012f0/P/-/-/3//- 0x401310/0x4012f0/P/-/-/4//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/3//- 0x401310/0x4012f0/P/-/-/21//- 0x4012fa/0x4012ff/M/-/-/3//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/4//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/26//- 0x401310/0x4012f0/P/-/-/25//- 0x401310/0x4012f0/P/-/-/3//- 0x401310/0x4012f0/P/-/-/25//- 0x401310/0x4012f0/P/-/-/3//- 0x401310/0x4012f0/P/-/-/26//- 0x401310/0x4012f0/P/-/-/5//- 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/4//- 0x401310/0x4012f0/P/-/-/21//- 0x4012fa/0x4012ff/M/-/-/3//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/25//- 0x401310/0x4012f0/P/-/-/3//- + br_inst_retired.near_taken:upp: 401310 0x401310/0x4012f0/P/-/-/26//- 0x401310/0x4012f0/P/-/-/2//- 0x401310/0x4012f0/P/-/-/5//- 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/1//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/5//- 0x401310/0x4012f0/P/-/-/23//- 0x4012fa/0x4012ff/M/-/-/2//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/28//- 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/4//- 0x401310/0x4012f0/P/-/-/23//- 0x4012fa/0x4012ff/M/-/-/4//- 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/1//- 0x401310/0x4012f0/P/-/-/5//- 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/4//- 0x401310/0x4012f0/P/-/-/23//- 0x4012fa/0x4012ff/M/-/-/1//- 0x401310/0x4012f0/P/-/-/5//- 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/2//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/25//- 0x401310/0x4012f0/P/-/-/5//- 0x401310/0x4012f0/P/-/-/22//- +br_misp_retired.all_branches:upp: 4012fa 0x4012fa/0x4012ff/M/-/-/2//- 0x401310/0x4012f0/P/-/-/28//- 0x401310/0x4012f0/P/-/-/24//- 0x4012fa/0x4012ff/M/-/-/1//- 0x401310/0x4012f0/P/-/-/5//- 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/2//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/5//- 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/3//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/3//- 0x401310/0x4012f0/P/-/-/21//- 0x4012fa/0x4012ff/M/-/-/2//- 0x401310/0x4012f0/P/-/-/3//- 0x401310/0x4012f0/P/-/-/3//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/4//- 0x401310/0x4012f0/P/-/-/26//- 0x401310/0x4012f0/P/-/-/26//- 0x401310/0x4012f0/P/-/-/26//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/26//- 0x401310/0x4012f0/P/-/-/26//- 0x401310/0x4012f0/P/-/-/25//- 0x401310/0x4012f0/P/-/-/5//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- + br_inst_retired.near_taken:upp: 401310 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/3//- 0x401310/0x4012f0/P/-/-/26//- 0x401310/0x4012f0/P/-/-/26//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/2//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/2//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/3//- 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/3//- 0x401310/0x4012f0/P/-/-/23//- 0x4012fa/0x4012ff/M/-/-/3//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/3//- 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/1//- 0x401310/0x4012f0/P/-/-/2//- 0x401310/0x4012f0/P/-/-/3//- 0x401310/0x4012f0/P/-/-/2//- 0x401310/0x4012f0/P/-/-/4//- 0x401310/0x4012f0/P/-/-/26//- 0x401310/0x4012f0/P/-/-/26//- 0x401310/0x4012f0/P/-/-/24//- 0x401310/0x4012f0/P/-/-/5//- 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/4//- + br_inst_retired.near_taken:upp: 401310 0x401310/0x4012f0/P/-/-/25//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/2//- 0x401310/0x4012f0/P/-/-/27//- 0x401310/0x4012f0/P/-/-/24//- 0x4012fa/0x4012ff/M/-/-/1//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/2//- 0x401310/0x4012f0/P/-/-/26//- 0x401310/0x4012f0/P/-/-/27//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/2//- 0x401310/0x4012f0/P/-/-/2//- 0x401310/0x4012f0/P/-/-/5//- 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/3//- 0x401310/0x4012f0/P/-/-/26//- 0x401310/0x4012f0/P/-/-/27//- 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/3//- 0x401310/0x4012f0/P/-/-/27//- 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/1//- 0x401310/0x4012f0/P/-/-/5//- 0x401310/0x4012f0/P/-/-/23//- 0x4012fa/0x4012ff/M/-/-/4//- 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/4//- 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/3//- 0x401310/0x4012f0/P/-/-/26//- + br_inst_retired.near_taken:upp: 401310 0x401310/0x4012f0/P/-/-/23//- 0x4012fa/0x4012ff/M/-/-/2//- 0x401310/0x4012f0/P/-/-/27//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/25//- 0x401310/0x4012f0/P/-/-/3//- 0x401310/0x4012f0/P/-/-/6//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/21//- 0x401310/0x4012f0/P/-/-/2//- 0x401310/0x4012f0/P/-/-/4//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/14//- 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/1//- 0x401310/0x4012f0/P/-/-/3//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/2//- 0x401310/0x4012f0/P/-/-/5//- 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/4//- 0x401310/0x4012f0/P/-/-/21//- 0x4012fa/0x4012ff/M/-/-/3//- 0x401310/0x4012f0/P/-/-/2//- +br_misp_retired.all_branches:upp: 4012fa 0x401310/0x4012f0/P/-/-/27//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/3//- 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/2//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/27//- 0x401310/0x4012f0/P/-/-/28//- 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/2//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/3//- 0x401310/0x4012f0/P/-/-/23//- 0x4012fa/0x4012ff/M/-/-/2//- 0x401310/0x4012f0/P/-/-/26//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/2//- 0x401310/0x4012f0/P/-/-/5//- 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/1//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/4//- 0x401310/0x4012f0/P/-/-/28//- 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/2//- 0x401310/0x4012f0/P/-/-/26//- 0x401310/0x4012f0/P/-/-/2//- 0x401310/0x4012f0/P/-/-/3//- 0x401310/0x4012f0/P/-/-/26//- + br_inst_retired.near_taken:upp: 401310 0x401310/0x4012f0/P/-/-/23//- 0x4012fa/0x4012ff/M/-/-/3//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/2//- 0x401310/0x4012f0/P/-/-/2//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/2//- 0x401310/0x4012f0/P/-/-/27//- 0x401310/0x4012f0/P/-/-/23//- 0x4012fa/0x4012ff/M/-/-/1//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/2//- 0x401310/0x4012f0/P/-/-/26//- 0x401310/0x4012f0/P/-/-/2//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/2//- 0x401310/0x4012f0/P/-/-/25//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/26//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/3//- 0x401310/0x4012f0/P/-/-/23//- 0x4012fa/0x4012ff/M/-/-/4//- 0x401310/0x4012f0/P/-/-/21//- 0x4012fa/0x4012ff/M/-/-/5//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/2//- 0x401310/0x4012f0/P/-/-/24//- 0x401310/0x4012f0/P/-/-/2//- + br_inst_retired.near_taken:upp: 401310 0x401310/0x4012f0/P/-/-/27//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/3//- 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/2//- 0x401310/0x4012f0/P/-/-/28//- 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/4//- 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/2//- 0x401310/0x4012f0/P/-/-/2//- 0x401310/0x4012f0/P/-/-/5//- 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/4//- 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/2//- 0x401310/0x4012f0/P/-/-/28//- 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/2//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/27//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/3//- 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/2//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- + br_inst_retired.near_taken:upp: 401310 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/4//- 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/2//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/28//- 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/1//- 0x401310/0x4012f0/P/-/-/5//- 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/4//- 0x401310/0x4012f0/P/-/-/23//- 0x4012fa/0x4012ff/M/-/-/2//- 0x401310/0x4012f0/P/-/-/28//- 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/2//- 0x401310/0x4012f0/P/-/-/25//- 0x401310/0x4012f0/P/-/-/4//- 0x401310/0x4012f0/P/-/-/25//- 0x401310/0x4012f0/P/-/-/4//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/3//- 0x401310/0x4012f0/P/-/-/25//- 0x401310/0x4012f0/P/-/-/5//- 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/4//- 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/4//- +br_misp_retired.all_branches:upp: 4012fa 0x401310/0x4012f0/P/-/-/3//- 0x401310/0x4012f0/P/-/-/27//- 0x401310/0x4012f0/P/-/-/2//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/5//- 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/2//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/3//- 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/4//- 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/1//- 0x401310/0x4012f0/P/-/-/4//- 0x401310/0x4012f0/P/-/-/2//- 0x4012fa/0x4012ff/P/-/-/2//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/2//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/5//- 0x401310/0x4012f0/P/-/-/21//- 0x4012fa/0x4012ff/M/-/-/3//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/2//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/2//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/5//- 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/1//- + br_inst_retired.near_taken:upp: 401310 0x401310/0x4012f0/P/-/-/25//- 0x401310/0x4012f0/P/-/-/4//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/2//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/2//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/2//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/25//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/5//- 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/1//- 0x401310/0x4012f0/P/-/-/4//- 0x401310/0x4012f0/P/-/-/27//- 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/4//- 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/3//- 0x401310/0x4012f0/P/-/-/26//- 0x401310/0x4012f0/P/-/-/4//- 0x401310/0x4012f0/P/-/-/26//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/2//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/3//- 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/3//- 0x401310/0x4012f0/P/-/-/27//- + br_inst_retired.near_taken:upp: 401310 0x401310/0x4012f0/P/-/-/26//- 0x401310/0x4012f0/P/-/-/2//- 0x401310/0x4012f0/P/-/-/3//- 0x401310/0x4012f0/P/-/-/26//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/29//- 0x401310/0x4012f0/P/-/-/24//- 0x4012fa/0x4012ff/M/-/-/1//- 0x401310/0x4012f0/P/-/-/3//- 0x401310/0x4012f0/P/-/-/25//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/2//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/19//- 0x401310/0x4012f0/P/-/-/5//- 0x401310/0x4012f0/P/-/-/21//- 0x4012fa/0x4012ff/M/-/-/2//- 0x401310/0x4012f0/P/-/-/25//- 0x401310/0x4012f0/P/-/-/4//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/3//- 0x401310/0x4012f0/P/-/-/25//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/3//- 0x401310/0x4012f0/P/-/-/28//- 0x401310/0x4012f0/P/-/-/3//- 0x401310/0x4012f0/P/-/-/26//- + br_inst_retired.near_taken:upp: 401310 0x401310/0x4012f0/P/-/-/23//- 0x4012fa/0x4012ff/M/-/-/1//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/5//- 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/3//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/3//- 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/1//- 0x401310/0x4012f0/P/-/-/5//- 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/3//- 0x401310/0x4012f0/P/-/-/26//- 0x401310/0x4012f0/P/-/-/26//- 0x401310/0x4012f0/P/-/-/27//- 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/4//- 0x401310/0x4012f0/P/-/-/23//- 0x4012fa/0x4012ff/M/-/-/1//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/2//- 0x401310/0x4012f0/P/-/-/26//- 0x401310/0x4012f0/P/-/-/25//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/2//- 0x401310/0x4012f0/P/-/-/27//- 0x401310/0x4012f0/P/-/-/27//- 0x401310/0x4012f0/P/-/-/23//- 0x4012fa/0x4012ff/M/-/-/1//- 0x401310/0x4012f0/P/-/-/1//- + br_inst_retired.near_taken:upp: 401310 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/1//- 0x401310/0x4012f0/P/-/-/5//- 0x401310/0x4012f0/P/-/-/23//- 0x4012fa/0x4012ff/M/-/-/2//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/3//- 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/1//- 0x401310/0x4012f0/P/-/-/29//- 0x401310/0x4012f0/P/-/-/23//- 0x4012fa/0x4012ff/M/-/-/2//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/26//- 0x401310/0x4012f0/P/-/-/4//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/3//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/3//- 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/2//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/26//- 0x401310/0x4012f0/P/-/-/4//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/3//- 0x401310/0x4012f0/P/-/-/23//- 0x4012fa/0x4012ff/M/-/-/1//- 0x401310/0x4012f0/P/-/-/4//- +br_misp_retired.all_branches:upp: 4012fa 0x401310/0x4012f0/P/-/-/13//- 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/1//- 0x401310/0x4012f0/P/-/-/5//- 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/4//- 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/1//- 0x401310/0x4012f0/P/-/-/5//- 0x401310/0x4012f0/P/-/-/23//- 0x4012fa/0x4012ff/M/-/-/3//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/4//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/2//- 0x401310/0x4012f0/P/-/-/26//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/2//- 0x401310/0x4012f0/P/-/-/26//- 0x401310/0x4012f0/P/-/-/2//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/3//- 0x401310/0x4012f0/P/-/-/23//- 0x4012fa/0x4012ff/M/-/-/1//- 0x401310/0x4012f0/P/-/-/4//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/2//- 0x401310/0x4012f0/P/-/-/2//- 0x401310/0x4012f0/P/-/-/1//- + br_inst_retired.near_taken:upp: 401310 0x401310/0x4012f0/P/-/-/26//- 0x401310/0x4012f0/P/-/-/24//- 0x401310/0x4012f0/P/-/-/5//- 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/4//- 0x401310/0x4012f0/P/-/-/23//- 0x4012fa/0x4012ff/M/-/-/1//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/4//- 0x401310/0x4012f0/P/-/-/27//- 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/4//- 0x401310/0x4012f0/P/-/-/23//- 0x4012fa/0x4012ff/M/-/-/1//- 0x401310/0x4012f0/P/-/-/4//- 0x401310/0x4012f0/P/-/-/24//- 0x401310/0x4012f0/P/-/-/5//- 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/1//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/5//- 0x401310/0x4012f0/P/-/-/23//- 0x4012fa/0x4012ff/M/-/-/1//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/2//- 0x401310/0x4012f0/P/-/-/27//- 0x401310/0x4012f0/P/-/-/23//- 0x4012fa/0x4012ff/M/-/-/1//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/3//- + br_inst_retired.near_taken:upp: 401310 0x401310/0x4012f0/P/-/-/26//- 0x401310/0x4012f0/P/-/-/26//- 0x401310/0x4012f0/P/-/-/28//- 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/2//- 0x401310/0x4012f0/P/-/-/27//- 0x401310/0x4012f0/P/-/-/26//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/25//- 0x401310/0x4012f0/P/-/-/5//- 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/2//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/26//- 0x401310/0x4012f0/P/-/-/26//- 0x401310/0x4012f0/P/-/-/25//- 0x401310/0x4012f0/P/-/-/4//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/2//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/28//- 0x401310/0x4012f0/P/-/-/23//- 0x4012fa/0x4012ff/M/-/-/2//- 0x401310/0x4012f0/P/-/-/26//- 0x401310/0x4012f0/P/-/-/26//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- + br_inst_retired.near_taken:upp: 401310 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/3//- 0x401310/0x4012f0/P/-/-/2//- 0x4012fa/0x4012ff/P/-/-/3//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/5//- 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/4//- 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/4//- 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/3//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/26//- 0x401310/0x4012f0/P/-/-/3//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/26//- 0x401310/0x4012f0/P/-/-/26//- 0x401310/0x4012f0/P/-/-/26//- 0x401310/0x4012f0/P/-/-/3//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/2//- 0x401310/0x4012f0/P/-/-/3//- 0x401310/0x4012f0/P/-/-/28//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/3//- 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/3//- +br_misp_retired.all_branches:upp: 4012fa 0x401310/0x4012f0/P/-/-/4//- 0x401310/0x4012f0/P/-/-/2//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/2//- 0x401310/0x4012f0/P/-/-/26//- 0x401310/0x4012f0/P/-/-/25//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/4//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/3//- 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/1//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/4//- 0x401310/0x4012f0/P/-/-/27//- 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/3//- 0x401310/0x4012f0/P/-/-/20//- 0x401310/0x4012f0/P/-/-/2//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/34//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/2//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/3//- 0x401310/0x4012f0/P/-/-/23//- 0x4012fa/0x4012ff/M/-/-/1//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/2//- + br_inst_retired.near_taken:upp: 4012fa 0x4012fa/0x4012ff/M/-/-/3//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/4//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/2//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/5//- 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/1//- 0x401310/0x4012f0/P/-/-/5//- 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/4//- 0x401310/0x4012f0/P/-/-/21//- 0x4012fa/0x4012ff/M/-/-/3//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/26//- 0x401310/0x4012f0/P/-/-/26//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/3//- 0x401310/0x4012f0/P/-/-/3//- 0x401310/0x4012f0/P/-/-/3//- 0x401310/0x4012f0/P/-/-/2//- 0x401310/0x4012f0/P/-/-/3//- 0x401310/0x4012f0/P/-/-/26//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/3//- + br_inst_retired.near_taken:upp: 401310 0x401310/0x4012f0/P/-/-/21//- 0x4012fa/0x4012ff/M/-/-/3//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/3//- 0x401310/0x4012f0/P/-/-/23//- 0x4012fa/0x4012ff/M/-/-/1//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/2//- 0x401310/0x4012f0/P/-/-/27//- 0x401310/0x4012f0/P/-/-/24//- 0x401310/0x4012f0/P/-/-/5//- 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/3//- 0x401310/0x4012f0/P/-/-/26//- 0x401310/0x4012f0/P/-/-/25//- 0x401310/0x4012f0/P/-/-/2//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/3//- 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/3//- 0x401310/0x4012f0/P/-/-/17//- 0x401310/0x4012f0/P/-/-/9//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/4//- 0x401310/0x4012f0/P/-/-/4//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/4//- + br_inst_retired.near_taken:upp: 401310 0x401310/0x4012f0/P/-/-/25//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/2//- 0x401310/0x4012f0/P/-/-/4//- 0x401310/0x4012f0/P/-/-/26//- 0x401310/0x4012f0/P/-/-/24//- 0x401310/0x4012f0/P/-/-/4//- 0x401310/0x4012f0/P/-/-/5//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/2//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/4//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/2//- 0x401310/0x4012f0/P/-/-/2//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/5//- 0x401310/0x4012f0/P/-/-/23//- 0x4012fa/0x4012ff/M/-/-/4//- 0x401310/0x4012f0/P/-/-/23//- 0x4012fa/0x4012ff/M/-/-/1//- 0x401310/0x4012f0/P/-/-/4//- 0x401310/0x4012f0/P/-/-/25//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/2//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/2//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/3//- +br_misp_retired.all_branches:upp: 4012fa 0x4012fa/0x4012ff/M/-/-/2//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/3//- 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/2//- 0x401310/0x4012f0/P/-/-/27//- 0x401310/0x4012f0/P/-/-/23//- 0x4012fa/0x4012ff/M/-/-/3//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/4//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/2//- 0x401310/0x4012f0/P/-/-/2//- 0x4012fa/0x4012ff/P/-/-/2//- 0x401310/0x4012f0/P/-/-/3//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/26//- 0x401310/0x4012f0/P/-/-/25//- 0x401310/0x4012f0/P/-/-/3//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/3//- 0x401310/0x4012f0/P/-/-/26//- 0x401310/0x4012f0/P/-/-/26//- 0x401310/0x4012f0/P/-/-/27//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/2//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/2//- 0x401310/0x4012f0/P/-/-/1//- + br_inst_retired.near_taken:upp: 401310 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/1//- 0x401310/0x4012f0/P/-/-/4//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/29//- 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/1//- 0x401310/0x4012f0/P/-/-/2//- 0x401310/0x4012f0/P/-/-/6//- 0x401310/0x4012f0/P/-/-/24//- 0x4012fa/0x4012ff/M/-/-/1//- 0x401310/0x4012f0/P/-/-/4//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/4//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/2//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/2//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/2//- 0x401310/0x4012f0/P/-/-/4//- 0x401310/0x4012f0/P/-/-/26//- 0x401310/0x4012f0/P/-/-/25//- 0x401310/0x4012f0/P/-/-/3//- 0x401310/0x4012f0/P/-/-/28//- + br_inst_retired.near_taken:upp: 401310 0x401310/0x4012f0/P/-/-/25//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/3//- 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/4//- 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/4//- 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/1//- 0x401310/0x4012f0/P/-/-/5//- 0x401310/0x4012f0/P/-/-/15//- 0x4012fa/0x4012ff/M/-/-/1//- 0x401310/0x4012f0/P/-/-/9//- 0x4012fa/0x4012ff/P/-/-/2//- 0x401310/0x4012f0/P/-/-/27//- 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/3//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/4//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/5//- 0x401310/0x4012f0/P/-/-/2//- 0x401310/0x4012f0/P/-/-/2//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/3//- 0x401310/0x4012f0/P/-/-/2//- 0x401310/0x4012f0/P/-/-/5//- 0x401310/0x4012f0/P/-/-/25//- 0x401310/0x4012f0/P/-/-/1//- + br_inst_retired.near_taken:upp: 401310 0x401310/0x4012f0/P/-/-/27//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/3//- 0x401310/0x4012f0/P/-/-/21//- 0x4012fa/0x4012ff/M/-/-/2//- 0x401310/0x4012f0/P/-/-/2//- 0x401310/0x4012f0/P/-/-/5//- 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/1//- 0x401310/0x4012f0/P/-/-/3//- 0x401310/0x4012f0/P/-/-/25//- 0x401310/0x4012f0/P/-/-/3//- 0x401310/0x4012f0/P/-/-/26//- 0x401310/0x4012f0/P/-/-/26//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/28//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/2//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/26//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/3//- 0x401310/0x4012f0/P/-/-/25//- 0x401310/0x4012f0/P/-/-/27//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/5//- 0x401310/0x4012f0/P/-/-/17//- 0x4012fa/0x4012ff/M/-/-/1//- +br_misp_retired.all_branches:upp: 4012fa 0x401310/0x4012f0/P/-/-/5//- 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/3//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/3//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/5//- 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/4//- 0x401310/0x4012f0/P/-/-/23//- 0x4012fa/0x4012ff/M/-/-/1//- 0x401310/0x4012f0/P/-/-/3//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/25//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/4//- 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/2//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/3//- 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/2//- 0x401310/0x4012f0/P/-/-/4//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/2//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/3//- 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/1//- + br_inst_retired.near_taken:upp: 401310 0x401310/0x4012f0/P/-/-/23//- 0x4012fa/0x4012ff/M/-/-/4//- 0x401310/0x4012f0/P/-/-/21//- 0x4012fa/0x4012ff/M/-/-/1//- 0x401310/0x4012f0/P/-/-/3//- 0x401310/0x4012f0/P/-/-/2//- 0x401310/0x4012f0/P/-/-/2//- 0x401310/0x4012f0/P/-/-/4//- 0x401310/0x4012f0/P/-/-/26//- 0x401310/0x4012f0/P/-/-/27//- 0x401310/0x4012f0/P/-/-/2//- 0x401310/0x4012f0/P/-/-/4//- 0x401310/0x4012f0/P/-/-/25//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/3//- 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/3//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/2//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/2//- 0x401310/0x4012f0/P/-/-/3//- 0x401310/0x4012f0/P/-/-/2//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/4//- 0x401310/0x4012f0/P/-/-/24//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/5//- 0x401310/0x4012f0/P/-/-/23//- 0x4012fa/0x4012ff/M/-/-/4//- + br_inst_retired.near_taken:upp: 401310 0x401310/0x4012f0/P/-/-/23//- 0x4012fa/0x4012ff/M/-/-/2//- 0x401310/0x4012f0/P/-/-/3//- 0x401310/0x4012f0/P/-/-/19//- 0x401310/0x4012f0/P/-/-/3//- 0x401310/0x4012f0/P/-/-/2//- 0x401310/0x4012f0/P/-/-/10//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/26//- 0x401310/0x4012f0/P/-/-/26//- 0x401310/0x4012f0/P/-/-/4//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/3//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/28//- 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/4//- 0x401310/0x4012f0/P/-/-/23//- 0x4012fa/0x4012ff/M/-/-/2//- 0x401310/0x4012f0/P/-/-/2//- 0x4012fa/0x4012ff/P/-/-/3//- 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/1//- 0x401310/0x4012f0/P/-/-/5//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/3//- 0x401310/0x4012f0/P/-/-/25//- + br_inst_retired.near_taken:upp: 401310 0x401310/0x4012f0/P/-/-/26//- 0x401310/0x4012f0/P/-/-/2//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/4//- 0x401310/0x4012f0/P/-/-/27//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/2//- 0x401310/0x4012f0/P/-/-/5//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/2//- 0x401310/0x4012f0/P/-/-/26//- 0x401310/0x4012f0/P/-/-/27//- 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/3//- 0x401310/0x4012f0/P/-/-/26//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/2//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/2//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/3//- 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/4//- 0x401310/0x4012f0/P/-/-/7//- 0x4012fa/0x4012ff/M/-/-/1//- +br_misp_retired.all_branches:upp: 4012fa 0x4012fa/0x4012ff/M/-/-/4//- 0x401310/0x4012f0/P/-/-/21//- 0x4012fa/0x4012ff/M/-/-/3//- 0x401310/0x4012f0/P/-/-/2//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/4//- 0x401310/0x4012f0/P/-/-/24//- 0x401310/0x4012f0/P/-/-/5//- 0x401310/0x4012f0/P/-/-/23//- 0x4012fa/0x4012ff/M/-/-/1//- 0x401310/0x4012f0/P/-/-/5//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/3//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/2//- 0x401310/0x4012f0/P/-/-/25//- 0x401310/0x4012f0/P/-/-/4//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/2//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/3//- 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/3//- 0x401310/0x4012f0/P/-/-/25//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/3//- 0x401310/0x4012f0/P/-/-/23//- + br_inst_retired.near_taken:upp: 401310 0x401310/0x4012f0/P/-/-/23//- 0x4012fa/0x4012ff/M/-/-/2//- 0x401310/0x4012f0/P/-/-/2//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/2//- 0x401310/0x4012f0/P/-/-/4//- 0x401310/0x4012f0/P/-/-/25//- 0x401310/0x4012f0/P/-/-/5//- 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/3//- 0x401310/0x4012f0/P/-/-/26//- 0x401310/0x4012f0/P/-/-/25//- 0x401310/0x4012f0/P/-/-/2//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/2//- 0x401310/0x4012f0/P/-/-/26//- 0x401310/0x4012f0/P/-/-/4//- 0x401310/0x4012f0/P/-/-/26//- 0x401310/0x4012f0/P/-/-/26//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/2//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/3//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/3//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/5//- 0x401310/0x4012f0/P/-/-/23//- 0x4012fa/0x4012ff/M/-/-/1//- 0x401310/0x4012f0/P/-/-/1//- + br_inst_retired.near_taken:upp: 401310 0x401310/0x4012f0/P/-/-/26//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/3//- 0x401310/0x4012f0/P/-/-/23//- 0x4012fa/0x4012ff/M/-/-/4//- 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/2//- 0x401310/0x4012f0/P/-/-/26//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/2//- 0x401310/0x4012f0/P/-/-/3//- 0x401310/0x4012f0/P/-/-/26//- 0x401310/0x4012f0/P/-/-/26//- 0x401310/0x4012f0/P/-/-/26//- 0x401310/0x4012f0/P/-/-/27//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/3//- 0x401310/0x4012f0/P/-/-/25//- 0x401310/0x4012f0/P/-/-/4//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/3//- 0x401310/0x4012f0/P/-/-/3//- 0x401310/0x4012f0/P/-/-/1//- 0x4012fa/0x4012ff/P/-/-/1//- 0x401310/0x4012f0/P/-/-/28//- 0x401310/0x4012f0/P/-/-/22//- 0x4012fa/0x4012ff/M/-/-/2//- 0x401310/0x4012f0/P/-/-/26//- 0x401310/0x4012f0/P/-/-/28//- diff --git a/llvm/test/tools/llvm-profgen/Inputs/ip-duplication.perfscript b/llvm/test/tools/llvm-profgen/Inputs/ip-duplication.perfscript new file mode 100644 index 0000000..f0d4efc --- /dev/null +++ b/llvm/test/tools/llvm-profgen/Inputs/ip-duplication.perfscript @@ -0,0 +1,2 @@ + 4006b7 0x4006b7/0x40068b/P/-/-/1 0x4006c8/0x4006b0/P/-/-/1 0x400689/0x4006b9/P/-/-/1 0x40066d/0x400686/P/-/-/2 0x4007a6/0x400650/P/-/-/9 0x4007ca/0x400790/P/-/-/8 0x4007d7/0x4007bd/P/-/-/1 0x400792/0x4007d7/P/-/-/1 0x4007b8/0x400790/P/-/-/2 0x4006a2/0x4007a8/P/-/-/3 + 40065d 40065d/0x40068f/M/-/-/1 diff --git a/llvm/test/tools/llvm-profgen/Inputs/noprobe-skid.perfscript b/llvm/test/tools/llvm-profgen/Inputs/noprobe-skid.perfscript new file mode 100644 index 0000000..7c70a18 --- /dev/null +++ b/llvm/test/tools/llvm-profgen/Inputs/noprobe-skid.perfscript @@ -0,0 +1,5 @@ +// Invalid perf line + 40062f 0x40062f/0x4005b0/P/-/-/9 0x400645/0x4005ff/P/-/-/1 0x400637/0x400645/P/-/-/1 0x4005e9/0x400634/P/-/-/1 0x4005d7/0x4005e5/P/-/-/6 0x40062f/0x4005b0/P/-/-/16 0x400645/0x4005ff/P/-/-/1 0x400637/0x400645/P/-/-/1 0x4005e9/0x400634/P/-/-/1 0x4005d7/0x4005e5/P/-/-/6 0x40062f/0x4005b0/P/-/-/6 0x400645/0x4005ff/P/-/-/1 0x400637/0x400645/P/-/-/1 0x4005e9/0x400634/P/-/-/1 0x4005c8/0x4005dc/P/-/-/8 0x40062f/0x4005b0/P/-/-/9 0x400645/0x4005ff/P/-/-/1 0x400637/0x400645/P/-/-/1 0x4005e9/0x400634/P/-/-/1 0x4005d7/0x4005e5/P/-/-/10 0x40062f/0x4005b0/P/-/-/14 0x400645/0x4005ff/P/-/-/1 0x400637/0x400645/P/-/-/1 0x4005e9/0x400634/P/-/-/1 0x4005d7/0x4005e5/P/-/-/7 0x40062f/0x4005b0/P/-/-/8 0x400645/0x4005ff/P/-/-/1 0x400637/0x400645/P/-/-/1 0x4005e9/0x400634/P/-/-/1 0x4005c8/0x4005dc/P/-/-/7 0x40062f/0x4005b0/P/-/-/15 0x400645/0x4005ff/P/-/-/1 + 4005d7 0x4005d7/0x4005e5/P/-/-/8 0x40062f/0x4005b0/P/-/-/6 0x400645/0x4005ff/P/-/-/1 0x400637/0x400645/P/-/-/1 0x4005e9/0x400634/P/-/-/2 0x4005c8/0x4005dc/P/-/-/7 0x40062f/0x4005b0/P/-/-/11 0x400645/0x4005ff/P/-/-/1 0x400637/0x400645/P/-/-/1 0x4005e9/0x400634/P/-/-/1 0x4005d7/0x4005e5/P/-/-/8 0x40062f/0x4005b0/P/-/-/9 0x400645/0x4005ff/P/-/-/1 0x400637/0x400645/P/-/-/1 0x4005e9/0x400634/P/-/-/1 0x4005d7/0x4005e5/P/-/-/5 0x40062f/0x4005b0/P/-/-/11 0x400645/0x4005ff/P/-/-/1 0x400637/0x400645/P/-/-/1 0x4005e9/0x400634/P/-/-/2 0x4005c8/0x4005dc/P/-/-/7 0x40062f/0x4005b0/P/-/-/10 0x400645/0x4005ff/P/-/-/1 0x400637/0x400645/P/-/-/1 0x4005e9/0x400634/P/-/-/1 0x4005d7/0x4005e5/P/-/-/8 0x40062f/0x4005b0/P/-/-/9 0x400645/0x4005ff/P/-/-/1 0x400637/0x400645/P/-/-/1 0x4005e9/0x400634/P/-/-/1 0x4005d7/0x4005e5/P/-/-/13 0x40062f/0x4005b0/P/-/-/9 + 4005c8 0x4005c8/0x4005dc/P/-/-/11 0x40062f/0x4005b0/P/-/-/8 0x400645/0x4005ff/P/-/-/1 0x400637/0x400645/P/-/-/1 0x4005e9/0x400634/P/-/-/1 0x4005d7/0x4005e5/P/-/-/5 0x40062f/0x4005b0/P/-/-/6 0x400645/0x4005ff/P/-/-/1 0x400637/0x400645/P/-/-/1 0x4005e9/0x400634/P/-/-/1 0x4005d7/0x4005e5/P/-/-/12 0x40062f/0x4005b0/P/-/-/6 0x400645/0x4005ff/P/-/-/1 0x400637/0x400645/P/-/-/1 0x4005e9/0x400634/P/-/-/2 0x4005c8/0x4005dc/P/-/-/7 0x40062f/0x4005b0/P/-/-/10 0x400645/0x4005ff/P/-/-/1 0x400637/0x400645/P/-/-/1 0x4005e9/0x400634/P/-/-/1 0x4005d7/0x4005e5/P/-/-/8 0x40062f/0x4005b0/P/-/-/9 0x400645/0x4005ff/P/-/-/1 0x400637/0x400645/P/-/-/1 0x4005e9/0x400634/P/-/-/1 0x4005d7/0x4005e5/P/-/-/12 0x40062f/0x4005b0/P/-/-/6 0x400645/0x4005ff/P/-/-/1 0x400637/0x400645/P/-/-/1 0x4005e9/0x400634/P/-/-/2 0x4005c8/0x4005dc/P/-/-/8 0x40062f/0x4005b0/P/-/-/8 + 4005c5 0x4005c8/0x4005dc/P/-/-/11 0x40062f/0x4005b0/P/-/-/8 0x400645/0x4005ff/P/-/-/1 0x400637/0x400645/P/-/-/1 0x4005e9/0x400634/P/-/-/1 0x4005d7/0x4005e5/P/-/-/5 0x40062f/0x4005b0/P/-/-/6 0x400645/0x4005ff/P/-/-/1 0x400637/0x400645/P/-/-/1 0x4005e9/0x400634/P/-/-/1 0x4005d7/0x4005e5/P/-/-/12 0x40062f/0x4005b0/P/-/-/6 0x400645/0x4005ff/P/-/-/1 0x400637/0x400645/P/-/-/1 0x4005e9/0x400634/P/-/-/2 0x4005c8/0x4005dc/P/-/-/7 0x40062f/0x4005b0/P/-/-/10 0x400645/0x4005ff/P/-/-/1 0x400637/0x400645/P/-/-/1 0x4005e9/0x400634/P/-/-/1 0x4005d7/0x4005e5/P/-/-/8 0x40062f/0x4005b0/P/-/-/9 0x400645/0x4005ff/P/-/-/1 0x400637/0x400645/P/-/-/1 0x4005e9/0x400634/P/-/-/1 0x4005d7/0x4005e5/P/-/-/12 0x40062f/0x4005b0/P/-/-/6 0x400645/0x4005ff/P/-/-/1 0x400637/0x400645/P/-/-/1 0x4005e9/0x400634/P/-/-/2 0x4005c8/0x4005dc/P/-/-/8 0x40062f/0x4005b0/P/-/-/8 diff --git a/llvm/test/tools/llvm-profgen/event-filtering.test b/llvm/test/tools/llvm-profgen/event-filtering.test new file mode 100644 index 0000000..ea486a8 --- /dev/null +++ b/llvm/test/tools/llvm-profgen/event-filtering.test @@ -0,0 +1,78 @@ +// RUN: llvm-profgen --format=text --perfscript=%S/Inputs/cmov_3.perfscript --binary=%S/Inputs/cmov_3.perfbin --output=%t --skip-symbolization --perf-event=br_inst_retired.near_taken:upp +// RUN: FileCheck %s --input-file %t --check-prefix=CHECK-RAW-PROFILE +// RUN: llvm-profgen --format=text --perfscript=%S/Inputs/cmov_3.perfscript --binary=%S/Inputs/cmov_3.perfbin --output=%t --perf-event=br_inst_retired.near_taken:upp +// RUN: FileCheck %s --input-file %t --check-prefix=CHECK + +// RUN: llvm-profgen --format=text --perfscript=%S/Inputs/cmov_3.perfscript --binary=%S/Inputs/cmov_3.perfbin --output=%t --skip-symbolization --perf-event=br_misp_retired.all_branches:upp --leading-ip-only +// RUN: FileCheck %s --input-file %t --check-prefix=UNPRED-RAW-PROFILE +// RUN: llvm-profgen --format=text --perfscript=%S/Inputs/cmov_3.perfscript --binary=%S/Inputs/cmov_3.perfbin --output=%t --perf-event=br_misp_retired.all_branches:upp --leading-ip-only +// RUN: FileCheck %s --input-file %t --check-prefix=UNPRED + +// Check that we can use perf event filtering to generate multiple types of +// source-level profiles from a single perf profile. In this case, we generate +// a typical execution frequency profile using br_inst_retired.near_taken LBRs, +// and a branch mispredict profile using br_misp_retired.all_branches sample +// IPs. + +// The source example below is based on perfKernelCpp/cmov_3, except a +// misleading builtin is used to persuade the compiler not to use cmov, which +// induces branch mispredicts. + +// CHECK: sel_arr:20229:0 +// CHECK: 3.1: 627 +// CHECK: 3.2: 627 +// CHECK: 4: 615 +// CHECK: 5: 627 + +// UNPRED: sel_arr:18:0 +// UNPRED: 3.1: 0 +// UNPRED: 3.2: 0 +// UNPRED: 4: 9 +// UNPRED: 5: 0 + +// CHECK-RAW-PROFILE: 3 +// CHECK-RAW-PROFILE-NEXT: 2f0-2fa:303 +// CHECK-RAW-PROFILE-NEXT: 2f0-310:312 +// CHECK-RAW-PROFILE-NEXT: 2ff-310:315 + +// UNPRED-RAW-PROFILE: 1 +// UNPRED-RAW-PROFILE-NEXT: 2fa-2fa:9 + +// original code: +// clang -O2 -gline-tables-only -fdebug-info-for-profiling lit.c +#include <stdlib.h> + +#define N 20000 +#define ITERS 10000 + +static int *m_s1, *m_s2, *m_s3, *m_dst; + +void init(void) { + m_s1 = malloc(sizeof(int)*N); + m_s2 = malloc(sizeof(int)*N); + m_s3 = malloc(sizeof(int)*N); + m_dst = malloc(sizeof(int)*N); + srand(42); + + for (int i = 0; i < N; i++) { + m_s1[i] = rand() % N; + m_s2[i] = 0; + m_s3[i] = 1; + } +} + +void __attribute__((noinline)) sel_arr(int *dst, int *s1, int *s2, int *s3) { +#pragma nounroll +#pragma clang loop vectorize(disable) interleave(disable) + for (int i = 0; i < N; i++) { + int *p = __builtin_expect((s1[i] < 10035), 0) ? &s2[i] : &s3[i]; + dst[i] = *p; + } +} + +int main(void) { + init(); + for(int i=0; i<ITERS; ++i) + sel_arr(m_dst, m_s1, m_s2, m_s3); + return 0; +} diff --git a/llvm/test/tools/llvm-profgen/iponly-nodupfactor.test b/llvm/test/tools/llvm-profgen/iponly-nodupfactor.test new file mode 100644 index 0000000..006b1c4 --- /dev/null +++ b/llvm/test/tools/llvm-profgen/iponly-nodupfactor.test @@ -0,0 +1,22 @@ +; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/ip-duplication.perfscript --binary=%S/Inputs/inline-noprobe2.perfbin --output=%t --use-offset=0 --leading-ip-only +; RUN: FileCheck %s --input-file %t --check-prefix=CHECK + +; Test that we don't over-count samples for duplicated source code when +; building an IP-based profile. + +; The inline-noprobe2.perfbin binary is used for this test because one of the +; partition_pivot_last+3.1 debug locations has a duplication factor of 2 +; encoded into its discriminator. In IP-sample mode, a hit in one instruction +; in the duplicated code does not imply a hit to the other duplicates. + +; The perfscript input includes 1 sample at a location with duplication factor +; of 2, and another sample at the same source location but with no duplication +; factor. These should be summed without duplication factors. Ensure we record +; a count of 1+1=2 (and not 2+1=3) for the 3.1 location. + +;CHECK-LABEL: partition_pivot_last +;CHECK-NEXT: 1: 0 +;CHECK-NEXT: 2: 0 +;CHECK-NEXT: 3: 0 +;CHECK-NEXT: 3.1: 2 + diff --git a/llvm/test/tools/llvm-profgen/iponly.test b/llvm/test/tools/llvm-profgen/iponly.test new file mode 100644 index 0000000..2e81798 --- /dev/null +++ b/llvm/test/tools/llvm-profgen/iponly.test @@ -0,0 +1,58 @@ +; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/noprobe-skid.perfscript --binary=%S/Inputs/noprobe.perfbin --output=%t --skip-symbolization --leading-ip-only +; RUN: FileCheck %s --input-file %t --check-prefix=CHECK-RAW-PROFILE +; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/noprobe-skid.perfscript --binary=%S/Inputs/noprobe.perfbin --output=%t --leading-ip-only +; RUN: FileCheck %s --input-file %t --check-prefix=CHECK + +; Here we check the ability to ignore LBRs, which is useful for generating +; profiles where only the precise PMU sample IP is of interest. In general the +; IPs need not identify a branch. In this case there are exactly 4 samples, so +; we see only these 4 locations as "hot" and none of the LBR history. +; Compare with noinline-noprobe.test, which includes LBR history. + +; Note that there are two different IPs (5c5 and 5c8) contributing to line +; offset 1 in bar. This tests that sample counts corresponding to the same +; debug location are summed into that location in the profile rather than the +; maximum being taken, as happens with basic block execution count profiles. + +;CHECK: bar:14:0 +;CHECK: 0: 0 +;CHECK: 1: 2 +;CHECK: 2: 1 +;CHECK: 4: 0 +;CHECK: 5: 0 +;CHECK: foo:5:0 +;CHECK: 0: 0 +;CHECK: 1: 0 +;CHECK: 2: 0 +;CHECK: 3: 1 +;CHECK: 4: 0 +;CHECK: 5: 0 + +CHECK-RAW-PROFILE: 4 +CHECK-RAW-PROFILE-NEXT: 5c5-5c5:1 +CHECK-RAW-PROFILE-NEXT: 5c8-5c8:1 +CHECK-RAW-PROFILE-NEXT: 5d7-5d7:1 +CHECK-RAW-PROFILE-NEXT: 62f-62f:1 + +; original code: +; clang -O3 -g -fdebug-info-for-profiling test.c -fno-inline -o a.out +#include <stdio.h> + +int bar(int x, int y) { + if (x % 3) { + return x - y; + } + return x + y; +} + +void foo() { + int s, i = 0; + while (i++ < 4000 * 4000) + if (i % 91) s = bar(i, s); else s += 30; + printf("sum is %d\n", s); +} + +int main() { + foo(); + return 0; +} diff --git a/llvm/tools/llvm-dwp/llvm-dwp.cpp b/llvm/tools/llvm-dwp/llvm-dwp.cpp index 18f4f1a..60a89cb 100644 --- a/llvm/tools/llvm-dwp/llvm-dwp.cpp +++ b/llvm/tools/llvm-dwp/llvm-dwp.cpp @@ -266,9 +266,8 @@ int llvm_dwp_main(int argc, char **argv, const llvm::ToolContext &) { std::unique_ptr<MCStreamer> MS(TheTarget->createMCObjectStreamer( *ErrOrTriple, MC, std::unique_ptr<MCAsmBackend>(MAB), - MAB->createObjectWriter(*OS), std::unique_ptr<MCCodeEmitter>(MCE), *MSTI, - MCOptions.MCRelaxAll, MCOptions.MCIncrementalLinkerCompatible, - /*DWARFMustBeAtTheEnd*/ false)); + MAB->createObjectWriter(*OS), std::unique_ptr<MCCodeEmitter>(MCE), + *MSTI)); if (!MS) return error("no object streamer for target " + TripleName, Context); diff --git a/llvm/tools/llvm-mc-assemble-fuzzer/llvm-mc-assemble-fuzzer.cpp b/llvm/tools/llvm-mc-assemble-fuzzer/llvm-mc-assemble-fuzzer.cpp index 6ec19a3..ef9d0f3 100644 --- a/llvm/tools/llvm-mc-assemble-fuzzer/llvm-mc-assemble-fuzzer.cpp +++ b/llvm/tools/llvm-mc-assemble-fuzzer/llvm-mc-assemble-fuzzer.cpp @@ -233,9 +233,8 @@ int AssembleOneInput(const uint8_t *Data, size_t Size) { MCAsmBackend *MAB = TheTarget->createMCAsmBackend(*STI, *MRI, MCOptions); Str.reset(TheTarget->createMCObjectStreamer( TheTriple, Ctx, std::unique_ptr<MCAsmBackend>(MAB), - MAB->createObjectWriter(*OS), std::unique_ptr<MCCodeEmitter>(CE), *STI, - MCOptions.MCRelaxAll, MCOptions.MCIncrementalLinkerCompatible, - /*DWARFMustBeAtTheEnd*/ false)); + MAB->createObjectWriter(*OS), std::unique_ptr<MCCodeEmitter>(CE), + *STI)); } const int Res = AssembleInput(ProgName, TheTarget, SrcMgr, Ctx, *Str, *MAI, *STI, *MCII, MCOptions); diff --git a/llvm/tools/llvm-mc/llvm-mc.cpp b/llvm/tools/llvm-mc/llvm-mc.cpp index de999a4..0f1e330 100644 --- a/llvm/tools/llvm-mc/llvm-mc.cpp +++ b/llvm/tools/llvm-mc/llvm-mc.cpp @@ -356,6 +356,9 @@ int main(int argc, char **argv) { cl::ParseCommandLineOptions(argc, argv, "llvm machine code playground\n"); MCTargetOptions MCOptions = mc::InitMCTargetOptionsFromFlags(); MCOptions.CompressDebugSections = CompressDebugSections.getValue(); + MCOptions.ShowMCInst = ShowInst; + MCOptions.AsmVerbose = true; + MCOptions.MCUseDwarfDirectory = MCTargetOptions::EnableDwarfDirectory; setDwarfDebugFlags(argc, argv); setDwarfDebugProducer(); @@ -555,9 +558,7 @@ int main(int argc, char **argv) { TheTriple, Ctx, std::unique_ptr<MCAsmBackend>(MAB), DwoOut ? MAB->createDwoObjectWriter(*OS, DwoOut->os()) : MAB->createObjectWriter(*OS), - std::unique_ptr<MCCodeEmitter>(CE), *STI, MCOptions.MCRelaxAll, - MCOptions.MCIncrementalLinkerCompatible, - /*DWARFMustBeAtTheEnd*/ false)); + std::unique_ptr<MCCodeEmitter>(CE), *STI)); if (NoExecStack) Str->initSections(true, *STI); } diff --git a/llvm/tools/llvm-ml/llvm-ml.cpp b/llvm/tools/llvm-ml/llvm-ml.cpp index 24643bd..bcfec97 100644 --- a/llvm/tools/llvm-ml/llvm-ml.cpp +++ b/llvm/tools/llvm-ml/llvm-ml.cpp @@ -264,6 +264,7 @@ int llvm_ml_main(int Argc, char **Argv, const llvm::ToolContext &) { MCOptions.AssemblyLanguage = "masm"; MCOptions.MCFatalWarnings = InputArgs.hasArg(OPT_fatal_warnings); MCOptions.MCSaveTempLabels = InputArgs.hasArg(OPT_save_temp_labels); + MCOptions.AsmVerbose = true; Triple TheTriple = GetTriple(ProgName, InputArgs); std::string Error; @@ -402,9 +403,8 @@ int llvm_ml_main(int Argc, char **Argv, const llvm::ToolContext &) { MCAsmBackend *MAB = TheTarget->createMCAsmBackend(*STI, *MRI, MCOptions); Str.reset(TheTarget->createMCObjectStreamer( TheTriple, Ctx, std::unique_ptr<MCAsmBackend>(MAB), - MAB->createObjectWriter(*OS), std::unique_ptr<MCCodeEmitter>(CE), *STI, - MCOptions.MCRelaxAll, MCOptions.MCIncrementalLinkerCompatible, - /*DWARFMustBeAtTheEnd*/ false)); + MAB->createObjectWriter(*OS), std::unique_ptr<MCCodeEmitter>(CE), + *STI)); } else { llvm_unreachable("Invalid file type!"); } diff --git a/llvm/tools/llvm-profgen/PerfReader.cpp b/llvm/tools/llvm-profgen/PerfReader.cpp index 111c546..b4e4911 100644 --- a/llvm/tools/llvm-profgen/PerfReader.cpp +++ b/llvm/tools/llvm-profgen/PerfReader.cpp @@ -41,6 +41,17 @@ static cl::opt<bool> "and produce context-insensitive profile.")); cl::opt<bool> ShowDetailedWarning("show-detailed-warning", cl::desc("Show detailed warning message.")); +cl::opt<bool> + LeadingIPOnly("leading-ip-only", + cl::desc("Form a profile based only on sample IPs")); + +static cl::list<std::string> PerfEventFilter( + "perf-event", + cl::desc("Ignore samples not matching the given event names")); +static cl::alias + PerfEventFilterPlural("perf-events", cl::CommaSeparated, + cl::desc("Comma-delimited version of -perf-event"), + cl::aliasopt(PerfEventFilter)); extern cl::opt<std::string> PerfTraceFilename; extern cl::opt<bool> ShowDisassemblyOnly; @@ -404,13 +415,18 @@ PerfScriptReader::convertPerfDataToTrace(ProfiledBinary *Binary, bool SkipPID, } } + // If filtering by events was requested, additionally request the "event" + // field. + const std::string FieldList = + PerfEventFilter.empty() ? "ip,brstack" : "event,ip,brstack"; + // Run perf script again to retrieve events for PIDs collected above SmallVector<StringRef, 8> ScriptSampleArgs; ScriptSampleArgs.push_back(PerfPath); ScriptSampleArgs.push_back("script"); ScriptSampleArgs.push_back("--show-mmap-events"); ScriptSampleArgs.push_back("-F"); - ScriptSampleArgs.push_back("ip,brstack"); + ScriptSampleArgs.push_back(FieldList); ScriptSampleArgs.push_back("-i"); ScriptSampleArgs.push_back(PerfData); if (!PIDs.empty()) { @@ -575,14 +591,54 @@ bool PerfScriptReader::extractLBRStack(TraceStream &TraceIt, // Skip the leading instruction pointer. size_t Index = 0; + + StringRef EventName; + // Skip a perf event name. This may or may not exist. + if (Records.size() > Index && Records[Index].ends_with(":")) { + EventName = Records[Index].ltrim().rtrim(':'); + Index++; + + if (PerfEventFilter.empty()) { + WithColor::warning() << "No --perf-event filter was specified, but an " + "\"event\" field was found in line " + << TraceIt.getLineNumber() << ": " + << TraceIt.getCurrentLine() << "\n"; + } else if (std::find(PerfEventFilter.begin(), PerfEventFilter.end(), + EventName) == PerfEventFilter.end()) { + TraceIt.advance(); + return false; + } + + } else if (!PerfEventFilter.empty()) { + WithColor::warning() << "A --perf-event filter was specified, but no " + "\"event\" field found in line " + << TraceIt.getLineNumber() << ": " + << TraceIt.getCurrentLine() << "\n"; + } + uint64_t LeadingAddr; - if (!Records.empty() && !Records[0].contains('/')) { - if (Records[0].getAsInteger(16, LeadingAddr)) { + if (Records.size() > Index && !Records[Index].contains('/')) { + if (Records[Index].getAsInteger(16, LeadingAddr)) { WarnInvalidLBR(TraceIt); TraceIt.advance(); return false; } - Index = 1; + Index++; + } + + // We assume that if we saw an event name we also saw a leading addr. + // In other words, LeadingAddr is set if Index is 1 or 2. + if (LeadingIPOnly && Index > 0) { + // Form a profile only from the sample IP. Do not assume an LBR stack + // follows, and ignore it if it does. + uint64_t SampleIP = Binary->canonicalizeVirtualAddress(LeadingAddr); + bool SampleIPIsInternal = Binary->addressIsCode(SampleIP); + if (SampleIPIsInternal) { + // Form a half LBR entry where the sample IP is the destination. + LBRStack.emplace_back(LBREntry(SampleIP, SampleIP)); + } + TraceIt.advance(); + return !LBRStack.empty(); } // Now extract LBR samples - note that we do not reverse the @@ -902,6 +958,20 @@ void PerfScriptReader::computeCounterFromLBR(const PerfSample *Sample, uint64_t Repeat) { SampleCounter &Counter = SampleCounters.begin()->second; uint64_t EndAddress = 0; + + if (LeadingIPOnly) { + assert(Sample->LBRStack.size() == 1 && + "Expected only half LBR entries for ip-only mode"); + const LBREntry &LBR = *(Sample->LBRStack.begin()); + uint64_t SourceAddress = LBR.Source; + uint64_t TargetAddress = LBR.Target; + if (SourceAddress == TargetAddress && + Binary->addressIsCode(TargetAddress)) { + Counter.recordRangeCount(SourceAddress, TargetAddress, Repeat); + } + return; + } + for (const LBREntry &LBR : Sample->LBRStack) { uint64_t SourceAddress = LBR.Source; uint64_t TargetAddress = LBR.Target; @@ -1062,6 +1132,18 @@ bool PerfScriptReader::isLBRSample(StringRef Line) { Line.trim().split(Records, " ", 2, false); if (Records.size() < 2) return false; + // Check if there is an event name before the leading IP. + // If there is, it will be in Records[0]. To skip it, we'll re-split on + // Records[1], which should contain the rest of the line. + if (Records[0].contains(":")) { + // If so, consume the event name and continue processing the rest of the + // line. + StringRef IPAndLBR = Records[1].ltrim(); + Records.clear(); + IPAndLBR.split(Records, " ", 2, false); + if (Records.size() < 2) + return false; + } if (Records[1].starts_with("0x") && Records[1].contains('/')) return true; return false; @@ -1152,6 +1234,18 @@ void PerfScriptReader::warnInvalidRange() { const PerfSample *Sample = Item.first.getPtr(); uint64_t Count = Item.second; uint64_t EndAddress = 0; + + if (LeadingIPOnly) { + assert(Sample->LBRStack.size() == 1 && + "Expected only half LBR entries for ip-only mode"); + const LBREntry &LBR = *(Sample->LBRStack.begin()); + if (LBR.Source == LBR.Target && LBR.Source != ExternalAddr) { + // This is an leading-addr-only profile. + Ranges[{LBR.Source, LBR.Source}] += Count; + } + continue; + } + for (const LBREntry &LBR : Sample->LBRStack) { uint64_t SourceAddress = LBR.Source; uint64_t StartAddress = LBR.Target; @@ -1199,11 +1293,15 @@ void PerfScriptReader::warnInvalidRange() { !Binary->addressIsCode(EndAddress)) continue; - if (!Binary->addressIsCode(StartAddress) || - !Binary->addressIsTransfer(EndAddress)) { - InstNotBoundary += I.second; - WarnInvalidRange(StartAddress, EndAddress, EndNotBoundaryMsg); - } + // IP samples can indicate activity on individual instructions rather than + // basic blocks/edges. In this mode, don't warn if sampled IPs aren't + // branches. + if (!LeadingIPOnly) + if (!Binary->addressIsCode(StartAddress) || + !Binary->addressIsTransfer(EndAddress)) { + InstNotBoundary += I.second; + WarnInvalidRange(StartAddress, EndAddress, EndNotBoundaryMsg); + } auto *FRange = Binary->findFuncRange(StartAddress); if (!FRange) { diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp index 53a25b2..175556c 100644 --- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp +++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp @@ -104,6 +104,8 @@ cl::opt<bool> InferMissingFrames( "Infer missing call frames due to compiler tail call elimination."), llvm::cl::Optional); +extern cl::opt<bool> LeadingIPOnly; + using namespace llvm; using namespace sampleprof; @@ -388,18 +390,25 @@ void ProfileGeneratorBase::updateBodySamplesforFunctionProfile( // Use the maximum count of samples with same line location uint32_t Discriminator = getBaseDiscriminator(LeafLoc.Location.Discriminator); - // Use duplication factor to compensated for loop unroll/vectorization. - // Note that this is only needed when we're taking MAX of the counts at - // the location instead of SUM. - Count *= getDuplicationFactor(LeafLoc.Location.Discriminator); - - ErrorOr<uint64_t> R = - FunctionProfile.findSamplesAt(LeafLoc.Location.LineOffset, Discriminator); - - uint64_t PreviousCount = R ? R.get() : 0; - if (PreviousCount <= Count) { + if (LeadingIPOnly) { + // When computing an IP-based profile we take the SUM of counts at the + // location instead of applying duplication factors and taking the MAX. FunctionProfile.addBodySamples(LeafLoc.Location.LineOffset, Discriminator, - Count - PreviousCount); + Count); + } else { + // Otherwise, use duplication factor to compensate for loop + // unroll/vectorization. Note that this is only needed when we're taking + // MAX of the counts at the location instead of SUM. + Count *= getDuplicationFactor(LeafLoc.Location.Discriminator); + + ErrorOr<uint64_t> R = FunctionProfile.findSamplesAt( + LeafLoc.Location.LineOffset, Discriminator); + + uint64_t PreviousCount = R ? R.get() : 0; + if (PreviousCount <= Count) { + FunctionProfile.addBodySamples(LeafLoc.Location.LineOffset, Discriminator, + Count - PreviousCount); + } } } diff --git a/llvm/unittests/DebugInfo/DWARF/DWARFExpressionCopyBytesTest.cpp b/llvm/unittests/DebugInfo/DWARF/DWARFExpressionCopyBytesTest.cpp index 43fdf5d..ec9c0dd 100644 --- a/llvm/unittests/DebugInfo/DWARF/DWARFExpressionCopyBytesTest.cpp +++ b/llvm/unittests/DebugInfo/DWARF/DWARFExpressionCopyBytesTest.cpp @@ -112,10 +112,7 @@ DWARFExpressionCopyBytesTest::createStreamer(raw_pwrite_stream &OS) { std::unique_ptr<MCObjectWriter> OW = MAB->createObjectWriter(OS); Res.Streamer.reset(TheTarget->createMCObjectStreamer( Triple(TripleName), *Res.Ctx, std::unique_ptr<MCAsmBackend>(MAB), - std::move(OW), std::unique_ptr<MCCodeEmitter>(MCE), *STI, - /* RelaxAll */ false, - /* IncrementalLinkerCompatible */ false, - /* DWARFMustBeAtTheEnd */ false)); + std::move(OW), std::unique_ptr<MCCodeEmitter>(MCE), *STI)); return Res; } diff --git a/llvm/unittests/DebugInfo/DWARF/DwarfGenerator.cpp b/llvm/unittests/DebugInfo/DWARF/DwarfGenerator.cpp index ad5e51b..2cbd4cc 100644 --- a/llvm/unittests/DebugInfo/DWARF/DwarfGenerator.cpp +++ b/llvm/unittests/DebugInfo/DWARF/DwarfGenerator.cpp @@ -503,8 +503,7 @@ llvm::Error dwarfgen::Generator::init(Triple TheTriple, uint16_t V) { MS = TheTarget->createMCObjectStreamer( TheTriple, *MC, std::unique_ptr<MCAsmBackend>(MAB), MAB->createObjectWriter(*Stream), std::unique_ptr<MCCodeEmitter>(MCE), - *MSTI, MCOptions.MCRelaxAll, MCOptions.MCIncrementalLinkerCompatible, - /*DWARFMustBeAtTheEnd*/ false); + *MSTI); if (!MS) return make_error<StringError>("no object streamer for target " + TripleName, diff --git a/llvm/unittests/MC/DwarfLineTableHeaders.cpp b/llvm/unittests/MC/DwarfLineTableHeaders.cpp index d8a657e..1fad1ba 100644 --- a/llvm/unittests/MC/DwarfLineTableHeaders.cpp +++ b/llvm/unittests/MC/DwarfLineTableHeaders.cpp @@ -83,10 +83,7 @@ public: std::unique_ptr<MCObjectWriter> OW = MAB->createObjectWriter(OS); Res.Streamer.reset(TheTarget->createMCObjectStreamer( Triple(TripleName), *Res.Ctx, std::unique_ptr<MCAsmBackend>(MAB), - std::move(OW), std::unique_ptr<MCCodeEmitter>(MCE), *STI, - /* RelaxAll */ false, - /* IncrementalLinkerCompatible */ false, - /* DWARFMustBeAtTheEnd */ false)); + std::move(OW), std::unique_ptr<MCCodeEmitter>(MCE), *STI)); return Res; } diff --git a/llvm/unittests/SandboxIR/SandboxIRTest.cpp b/llvm/unittests/SandboxIR/SandboxIRTest.cpp index 04beb42..054a81e 100644 --- a/llvm/unittests/SandboxIR/SandboxIRTest.cpp +++ b/llvm/unittests/SandboxIR/SandboxIRTest.cpp @@ -591,3 +591,36 @@ define void @foo(ptr %arg0, ptr %arg1) { EXPECT_EQ(NewLd->getAlign(), 8); EXPECT_EQ(NewLd->getName(), "NewLd"); } + +TEST_F(SandboxIRTest, StoreInst) { + parseIR(C, R"IR( +define void @foo(i8 %val, ptr %ptr) { + store i8 %val, ptr %ptr, align 64 + ret void +} +)IR"); + llvm::Function *LLVMF = &*M->getFunction("foo"); + sandboxir::Context Ctx(C); + sandboxir::Function *F = Ctx.createFunction(LLVMF); + auto *Val = F->getArg(0); + auto *Ptr = F->getArg(1); + auto *BB = &*F->begin(); + auto It = BB->begin(); + auto *St = cast<sandboxir::StoreInst>(&*It++); + auto *Ret = &*It++; + + // Check that the StoreInst has been created correctly. + // Check getPointerOperand() + EXPECT_EQ(St->getValueOperand(), Val); + EXPECT_EQ(St->getPointerOperand(), Ptr); + // Check getAlign() + EXPECT_EQ(St->getAlign(), 64); + // Check create(InsertBefore) + sandboxir::StoreInst *NewSt = + sandboxir::StoreInst::create(Val, Ptr, Align(8), + /*InsertBefore=*/Ret, Ctx); + EXPECT_EQ(NewSt->getType(), St->getType()); + EXPECT_EQ(NewSt->getValueOperand(), Val); + EXPECT_EQ(NewSt->getPointerOperand(), Ptr); + EXPECT_EQ(NewSt->getAlign(), 8); +} diff --git a/llvm/utils/lit/lit/cl_arguments.py b/llvm/utils/lit/lit/cl_arguments.py index b9122d0..ed78256 100644 --- a/llvm/utils/lit/lit/cl_arguments.py +++ b/llvm/utils/lit/lit/cl_arguments.py @@ -155,11 +155,6 @@ def parse_args(): default=[], ) execution_group.add_argument( - "--time-tests", - help="Track elapsed wall time for each test", - action="store_true", - ) - execution_group.add_argument( "--no-execute", dest="noExecute", help="Don't execute any tests (assume PASS)", @@ -209,6 +204,17 @@ def parse_args(): action="store_true", help="Exit with status zero even if some tests fail", ) + execution_test_time_group = execution_group.add_mutually_exclusive_group() + execution_test_time_group.add_argument( + "--skip-test-time-recording", + help="Do not track elapsed wall time for each test", + action="store_true", + ) + execution_test_time_group.add_argument( + "--time-tests", + help="Track elapsed wall time for each test printed in a histogram", + action="store_true", + ) selection_group = parser.add_argument_group("Test Selection") selection_group.add_argument( diff --git a/llvm/utils/lit/lit/main.py b/llvm/utils/lit/lit/main.py index db9f24f..24ba804 100755 --- a/llvm/utils/lit/lit/main.py +++ b/llvm/utils/lit/lit/main.py @@ -124,7 +124,8 @@ def main(builtin_params={}): run_tests(selected_tests, lit_config, opts, len(discovered_tests)) elapsed = time.time() - start - record_test_times(selected_tests, lit_config) + if not opts.skip_test_time_recording: + record_test_times(selected_tests, lit_config) selected_tests, discovered_tests = GoogleTest.post_process_shard_results( selected_tests, discovered_tests diff --git a/llvm/utils/lit/tests/Inputs/time-tests/a.txt b/llvm/utils/lit/tests/Inputs/time-tests/a.txt new file mode 100644 index 0000000..b80b60b --- /dev/null +++ b/llvm/utils/lit/tests/Inputs/time-tests/a.txt @@ -0,0 +1 @@ +# RUN: true diff --git a/llvm/utils/lit/tests/Inputs/time-tests/lit.cfg b/llvm/utils/lit/tests/Inputs/time-tests/lit.cfg new file mode 100644 index 0000000..e6ae418 --- /dev/null +++ b/llvm/utils/lit/tests/Inputs/time-tests/lit.cfg @@ -0,0 +1,7 @@ +import lit.formats + +config.name = "time-tests" +config.suffixes = [".txt"] +config.test_format = lit.formats.ShTest() +config.test_source_root = None +config.test_exec_root = None diff --git a/llvm/utils/lit/tests/time-tests.py b/llvm/utils/lit/tests/time-tests.py new file mode 100644 index 0000000..20b83a6 --- /dev/null +++ b/llvm/utils/lit/tests/time-tests.py @@ -0,0 +1,15 @@ +## Check that --skip-test-time-recording skips .lit_test_times.txt recording. + +# RUN: %{lit-no-order-opt} --skip-test-time-recording %{inputs}/time-tests +# RUN: not ls %{inputs}/time-tests/.lit_test_times.txt + +## Check that --time-tests generates a printed histogram. + +# RUN: %{lit-no-order-opt} --time-tests %{inputs}/time-tests > %t.out +# RUN: FileCheck < %t.out %s +# RUN: rm %{inputs}/time-tests/.lit_test_times.txt + +# CHECK: Tests Times: +# CHECK-NEXT: -------------------------------------------------------------------------- +# CHECK-NEXT: [ Range ] :: [ Percentage ] :: [Count] +# CHECK-NEXT: -------------------------------------------------------------------------- diff --git a/mlir/lib/Bytecode/Writer/BytecodeWriter.cpp b/mlir/lib/Bytecode/Writer/BytecodeWriter.cpp index 449d754..0e96aa9 100644 --- a/mlir/lib/Bytecode/Writer/BytecodeWriter.cpp +++ b/mlir/lib/Bytecode/Writer/BytecodeWriter.cpp @@ -18,6 +18,7 @@ #include "llvm/ADT/CachedHashString.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/Endian.h" #include "llvm/Support/raw_ostream.h" #include <optional> @@ -145,7 +146,9 @@ public: //===--------------------------------------------------------------------===// /// Backpatch a byte in the result buffer at the given offset. - void patchByte(uint64_t offset, uint8_t value) { + void patchByte(uint64_t offset, uint8_t value, StringLiteral desc) { + LLVM_DEBUG(llvm::dbgs() << "patchByte(" << offset << ',' << uint64_t(value) + << ")\t" << desc << '\n'); assert(offset < size() && offset >= prevResultSize && "cannot patch previously emitted data"); currentResult[offset - prevResultSize] = value; @@ -153,7 +156,9 @@ public: /// Emit the provided blob of data, which is owned by the caller and is /// guaranteed to not die before the end of the bytecode process. - void emitOwnedBlob(ArrayRef<uint8_t> data) { + void emitOwnedBlob(ArrayRef<uint8_t> data, StringLiteral desc) { + LLVM_DEBUG(llvm::dbgs() + << "emitOwnedBlob(" << data.size() << "b)\t" << desc << '\n'); // Push the current buffer before adding the provided data. appendResult(std::move(currentResult)); appendOwnedResult(data); @@ -163,17 +168,19 @@ public: /// owned by the caller and is guaranteed to not die before the end of the /// bytecode process. The alignment value is also encoded, making it available /// on load. - void emitOwnedBlobAndAlignment(ArrayRef<uint8_t> data, uint32_t alignment) { - emitVarInt(alignment); - emitVarInt(data.size()); + void emitOwnedBlobAndAlignment(ArrayRef<uint8_t> data, uint32_t alignment, + StringLiteral desc) { + emitVarInt(alignment, desc); + emitVarInt(data.size(), desc); alignTo(alignment); - emitOwnedBlob(data); + emitOwnedBlob(data, desc); } - void emitOwnedBlobAndAlignment(ArrayRef<char> data, uint32_t alignment) { + void emitOwnedBlobAndAlignment(ArrayRef<char> data, uint32_t alignment, + StringLiteral desc) { ArrayRef<uint8_t> castedData(reinterpret_cast<const uint8_t *>(data.data()), data.size()); - emitOwnedBlobAndAlignment(castedData, alignment); + emitOwnedBlobAndAlignment(castedData, alignment, desc); } /// Align the emitter to the given alignment. @@ -187,7 +194,7 @@ public: size_t curOffset = size(); size_t paddingSize = llvm::alignTo(curOffset, alignment) - curOffset; while (paddingSize--) - emitByte(bytecode::kAlignmentByte); + emitByte(bytecode::kAlignmentByte, "alignment byte"); // Keep track of the maximum required alignment. requiredAlignment = std::max(requiredAlignment, alignment); @@ -198,12 +205,16 @@ public: /// Emit a single byte. template <typename T> - void emitByte(T byte) { + void emitByte(T byte, StringLiteral desc) { + LLVM_DEBUG(llvm::dbgs() + << "emitByte(" << uint64_t(byte) << ")\t" << desc << '\n'); currentResult.push_back(static_cast<uint8_t>(byte)); } /// Emit a range of bytes. - void emitBytes(ArrayRef<uint8_t> bytes) { + void emitBytes(ArrayRef<uint8_t> bytes, StringLiteral desc) { + LLVM_DEBUG(llvm::dbgs() + << "emitBytes(" << bytes.size() << "b)\t" << desc << '\n'); llvm::append_range(currentResult, bytes); } @@ -214,40 +225,43 @@ public: /// All remaining bits in the first byte, along with all of the bits in /// additional bytes, provide the value of the integer encoded in /// little-endian order. - void emitVarInt(uint64_t value) { + void emitVarInt(uint64_t value, StringLiteral desc) { + LLVM_DEBUG(llvm::dbgs() << "emitVarInt(" << value << ")\t" << desc << '\n'); + // In the most common case, the value can be represented in a single byte. // Given how hot this case is, explicitly handle that here. if ((value >> 7) == 0) - return emitByte((value << 1) | 0x1); - emitMultiByteVarInt(value); + return emitByte((value << 1) | 0x1, desc); + emitMultiByteVarInt(value, desc); } /// Emit a signed variable length integer. Signed varints are encoded using /// a varint with zigzag encoding, meaning that we use the low bit of the /// value to indicate the sign of the value. This allows for more efficient /// encoding of negative values by limiting the number of active bits - void emitSignedVarInt(uint64_t value) { - emitVarInt((value << 1) ^ (uint64_t)((int64_t)value >> 63)); + void emitSignedVarInt(uint64_t value, StringLiteral desc) { + emitVarInt((value << 1) ^ (uint64_t)((int64_t)value >> 63), desc); } /// Emit a variable length integer whose low bit is used to encode the /// provided flag, i.e. encoded as: (value << 1) | (flag ? 1 : 0). - void emitVarIntWithFlag(uint64_t value, bool flag) { - emitVarInt((value << 1) | (flag ? 1 : 0)); + void emitVarIntWithFlag(uint64_t value, bool flag, StringLiteral desc) { + emitVarInt((value << 1) | (flag ? 1 : 0), desc); } //===--------------------------------------------------------------------===// // String Emission /// Emit the given string as a nul terminated string. - void emitNulTerminatedString(StringRef str) { - emitString(str); - emitByte(0); + void emitNulTerminatedString(StringRef str, StringLiteral desc) { + emitString(str, desc); + emitByte(0, "null terminator"); } /// Emit the given string without a nul terminator. - void emitString(StringRef str) { - emitBytes({reinterpret_cast<const uint8_t *>(str.data()), str.size()}); + void emitString(StringRef str, StringLiteral desc) { + emitBytes({reinterpret_cast<const uint8_t *>(str.data()), str.size()}, + desc); } //===--------------------------------------------------------------------===// @@ -260,14 +274,14 @@ public: // indicate whether the section alignment is present, so save an offset to // it. uint64_t codeOffset = currentResult.size(); - emitByte(code); - emitVarInt(emitter.size()); + emitByte(code, "section code"); + emitVarInt(emitter.size(), "section size"); // Integrate the alignment of the section into this emitter if necessary. unsigned emitterAlign = emitter.requiredAlignment; if (emitterAlign > 1) { if (size() & (emitterAlign - 1)) { - emitVarInt(emitterAlign); + emitVarInt(emitterAlign, "section alignment"); alignTo(emitterAlign); // Indicate that we needed to align the section, the high bit of the @@ -295,7 +309,8 @@ private: /// fallback when the number of bytes needed to encode the value is greater /// than 1. We mark it noinline here so that the single byte hot path isn't /// pessimized. - LLVM_ATTRIBUTE_NOINLINE void emitMultiByteVarInt(uint64_t value); + LLVM_ATTRIBUTE_NOINLINE void emitMultiByteVarInt(uint64_t value, + StringLiteral desc); /// Append a new result buffer to the current contents. void appendResult(std::vector<uint8_t> &&result) { @@ -345,15 +360,15 @@ public: /// Write the current set of strings to the given emitter. void write(EncodingEmitter &emitter) { - emitter.emitVarInt(strings.size()); + emitter.emitVarInt(strings.size(), "string section size"); // Emit the sizes in reverse order, so that we don't need to backpatch an // offset to the string data or have a separate section. for (const auto &it : llvm::reverse(strings)) - emitter.emitVarInt(it.first.size() + 1); + emitter.emitVarInt(it.first.size() + 1, "string size"); // Emit the string data itself. for (const auto &it : strings) - emitter.emitNulTerminatedString(it.first.val()); + emitter.emitNulTerminatedString(it.first.val(), "string"); } private: @@ -380,32 +395,35 @@ public: //===--------------------------------------------------------------------===// void writeAttribute(Attribute attr) override { - emitter.emitVarInt(numberingState.getNumber(attr)); + emitter.emitVarInt(numberingState.getNumber(attr), "dialect attr"); } void writeOptionalAttribute(Attribute attr) override { if (!attr) { - emitter.emitVarInt(0); + emitter.emitVarInt(0, "dialect optional attr none"); return; } - emitter.emitVarIntWithFlag(numberingState.getNumber(attr), true); + emitter.emitVarIntWithFlag(numberingState.getNumber(attr), true, + "dialect optional attr"); } void writeType(Type type) override { - emitter.emitVarInt(numberingState.getNumber(type)); + emitter.emitVarInt(numberingState.getNumber(type), "dialect type"); } void writeResourceHandle(const AsmDialectResourceHandle &resource) override { - emitter.emitVarInt(numberingState.getNumber(resource)); + emitter.emitVarInt(numberingState.getNumber(resource), "dialect resource"); } //===--------------------------------------------------------------------===// // Primitives //===--------------------------------------------------------------------===// - void writeVarInt(uint64_t value) override { emitter.emitVarInt(value); } + void writeVarInt(uint64_t value) override { + emitter.emitVarInt(value, "dialect writer"); + } void writeSignedVarInt(int64_t value) override { - emitter.emitSignedVarInt(value); + emitter.emitSignedVarInt(value, "dialect writer"); } void writeAPIntWithKnownWidth(const APInt &value) override { @@ -414,21 +432,21 @@ public: // If the value is a single byte, just emit it directly without going // through a varint. if (bitWidth <= 8) - return emitter.emitByte(value.getLimitedValue()); + return emitter.emitByte(value.getLimitedValue(), "dialect APInt"); // If the value fits within a single varint, emit it directly. if (bitWidth <= 64) - return emitter.emitSignedVarInt(value.getLimitedValue()); + return emitter.emitSignedVarInt(value.getLimitedValue(), "dialect APInt"); // Otherwise, we need to encode a variable number of active words. We use // active words instead of the number of total words under the observation // that smaller values will be more common. unsigned numActiveWords = value.getActiveWords(); - emitter.emitVarInt(numActiveWords); + emitter.emitVarInt(numActiveWords, "dialect APInt word count"); const uint64_t *rawValueData = value.getRawData(); for (unsigned i = 0; i < numActiveWords; ++i) - emitter.emitSignedVarInt(rawValueData[i]); + emitter.emitSignedVarInt(rawValueData[i], "dialect APInt word"); } void writeAPFloatWithKnownSemantics(const APFloat &value) override { @@ -436,16 +454,20 @@ public: } void writeOwnedString(StringRef str) override { - emitter.emitVarInt(stringSection.insert(str)); + emitter.emitVarInt(stringSection.insert(str), "dialect string"); } void writeOwnedBlob(ArrayRef<char> blob) override { - emitter.emitVarInt(blob.size()); - emitter.emitOwnedBlob(ArrayRef<uint8_t>( - reinterpret_cast<const uint8_t *>(blob.data()), blob.size())); + emitter.emitVarInt(blob.size(), "dialect blob"); + emitter.emitOwnedBlob( + ArrayRef<uint8_t>(reinterpret_cast<const uint8_t *>(blob.data()), + blob.size()), + "dialect blob"); } - void writeOwnedBool(bool value) override { emitter.emitByte(value); } + void writeOwnedBool(bool value) override { + emitter.emitByte(value, "dialect bool"); + } int64_t getBytecodeVersion() const override { return bytecodeVersion; } @@ -486,7 +508,7 @@ public: if (!prop) return std::nullopt; EncodingEmitter sizeEmitter; - sizeEmitter.emitVarInt(numberingState.getNumber(prop)); + sizeEmitter.emitVarInt(numberingState.getNumber(prop), "properties size"); scratch.clear(); llvm::raw_svector_ostream os(scratch); sizeEmitter.writeTo(os); @@ -507,16 +529,17 @@ public: /// Write the current set of properties to the given emitter. void write(EncodingEmitter &emitter) { - emitter.emitVarInt(propertiesStorage.size()); + emitter.emitVarInt(propertiesStorage.size(), "properties size"); if (propertiesStorage.empty()) return; for (const auto &storage : propertiesStorage) { if (storage.empty()) { - emitter.emitBytes(ArrayRef<uint8_t>()); + emitter.emitBytes(ArrayRef<uint8_t>(), "empty properties"); continue; } emitter.emitBytes(ArrayRef(reinterpret_cast<const uint8_t *>(&storage[0]), - storage.size())); + storage.size()), + "property"); } } @@ -532,7 +555,7 @@ private: SmallVector<char> sizeScratch; { EncodingEmitter sizeEmitter; - sizeEmitter.emitVarInt(rawProperties.size()); + sizeEmitter.emitVarInt(rawProperties.size(), "properties"); llvm::raw_svector_ostream os(sizeScratch); sizeEmitter.writeTo(os); } @@ -576,7 +599,8 @@ public: private: void write_impl(const char *ptr, size_t size) override { - emitter.emitBytes({reinterpret_cast<const uint8_t *>(ptr), size}); + emitter.emitBytes({reinterpret_cast<const uint8_t *>(ptr), size}, + "raw emitter"); } uint64_t current_pos() const override { return emitter.size(); } @@ -591,7 +615,7 @@ void EncodingEmitter::writeTo(raw_ostream &os) const { os.write((const char *)currentResult.data(), currentResult.size()); } -void EncodingEmitter::emitMultiByteVarInt(uint64_t value) { +void EncodingEmitter::emitMultiByteVarInt(uint64_t value, StringLiteral desc) { // Compute the number of bytes needed to encode the value. Each byte can hold // up to 7-bits of data. We only check up to the number of bits we can encode // in the first byte (8). @@ -601,16 +625,16 @@ void EncodingEmitter::emitMultiByteVarInt(uint64_t value) { uint64_t encodedValue = (value << 1) | 0x1; encodedValue <<= (numBytes - 1); llvm::support::ulittle64_t encodedValueLE(encodedValue); - emitBytes({reinterpret_cast<uint8_t *>(&encodedValueLE), numBytes}); + emitBytes({reinterpret_cast<uint8_t *>(&encodedValueLE), numBytes}, desc); return; } } // If the value is too large to encode in a single byte, emit a special all // zero marker byte and splat the value directly. - emitByte(0); + emitByte(0, desc); llvm::support::ulittle64_t valueLE(value); - emitBytes({reinterpret_cast<uint8_t *>(&valueLE), sizeof(valueLE)}); + emitBytes({reinterpret_cast<uint8_t *>(&valueLE), sizeof(valueLE)}, desc); } //===----------------------------------------------------------------------===// @@ -696,7 +720,7 @@ LogicalResult BytecodeWriter::write(Operation *rootOp, raw_ostream &os) { // Emit the bytecode file header. This is how we identify the output as a // bytecode file. - emitter.emitString("ML\xefR"); + emitter.emitString("ML\xefR", "bytecode header"); // Emit the bytecode version. if (config.bytecodeVersion < bytecode::kMinSupportedVersion || @@ -706,10 +730,10 @@ LogicalResult BytecodeWriter::write(Operation *rootOp, raw_ostream &os) { << ", must be in range [" << static_cast<int64_t>(bytecode::kMinSupportedVersion) << ", " << static_cast<int64_t>(bytecode::kVersion) << ']'; - emitter.emitVarInt(config.bytecodeVersion); + emitter.emitVarInt(config.bytecodeVersion, "bytecode version"); // Emit the producer. - emitter.emitNulTerminatedString(config.producer); + emitter.emitNulTerminatedString(config.producer, "bytecode producer"); // Emit the dialect section. writeDialectSection(emitter); @@ -760,8 +784,8 @@ static void writeDialectGrouping(EncodingEmitter &emitter, EntriesT &&entries, }); // Emit the dialect and number of elements. - emitter.emitVarInt(currentDialect->number); - emitter.emitVarInt(std::distance(groupStart, it)); + emitter.emitVarInt(currentDialect->number, "dialect number"); + emitter.emitVarInt(std::distance(groupStart, it), "dialect offset"); // Emit the entries within the group. for (auto &entry : llvm::make_range(groupStart, it)) @@ -774,13 +798,13 @@ void BytecodeWriter::writeDialectSection(EncodingEmitter &emitter) { // Emit the referenced dialects. auto dialects = numberingState.getDialects(); - dialectEmitter.emitVarInt(llvm::size(dialects)); + dialectEmitter.emitVarInt(llvm::size(dialects), "dialects count"); for (DialectNumbering &dialect : dialects) { // Write the string section and get the ID. size_t nameID = stringSection.insert(dialect.name); if (config.bytecodeVersion < bytecode::kDialectVersioning) { - dialectEmitter.emitVarInt(nameID); + dialectEmitter.emitVarInt(nameID, "dialect name ID"); continue; } @@ -798,22 +822,25 @@ void BytecodeWriter::writeDialectSection(EncodingEmitter &emitter) { // this in the dialect ID, so if there is no version, we don't write the // section. size_t versionAvailable = versionEmitter.size() > 0; - dialectEmitter.emitVarIntWithFlag(nameID, versionAvailable); + dialectEmitter.emitVarIntWithFlag(nameID, versionAvailable, + "dialect version"); if (versionAvailable) dialectEmitter.emitSection(bytecode::Section::kDialectVersions, std::move(versionEmitter)); } if (config.bytecodeVersion >= bytecode::kElideUnknownBlockArgLocation) - dialectEmitter.emitVarInt(size(numberingState.getOpNames())); + dialectEmitter.emitVarInt(size(numberingState.getOpNames()), + "op names count"); // Emit the referenced operation names grouped by dialect. auto emitOpName = [&](OpNameNumbering &name) { size_t stringId = stringSection.insert(name.name.stripDialect()); if (config.bytecodeVersion < bytecode::kNativePropertiesEncoding) - dialectEmitter.emitVarInt(stringId); + dialectEmitter.emitVarInt(stringId, "dialect op name"); else - dialectEmitter.emitVarIntWithFlag(stringId, name.name.isRegistered()); + dialectEmitter.emitVarIntWithFlag(stringId, name.name.isRegistered(), + "dialect op name"); }; writeDialectGrouping(dialectEmitter, numberingState.getOpNames(), emitOpName); @@ -826,8 +853,10 @@ void BytecodeWriter::writeDialectSection(EncodingEmitter &emitter) { void BytecodeWriter::writeAttrTypeSection(EncodingEmitter &emitter) { EncodingEmitter attrTypeEmitter; EncodingEmitter offsetEmitter; - offsetEmitter.emitVarInt(llvm::size(numberingState.getAttributes())); - offsetEmitter.emitVarInt(llvm::size(numberingState.getTypes())); + offsetEmitter.emitVarInt(llvm::size(numberingState.getAttributes()), + "attributes count"); + offsetEmitter.emitVarInt(llvm::size(numberingState.getTypes()), + "types count"); // A functor used to emit an attribute or type entry. uint64_t prevOffset = 0; @@ -836,7 +865,7 @@ void BytecodeWriter::writeAttrTypeSection(EncodingEmitter &emitter) { auto emitAttrOrTypeRawImpl = [&]() -> void { RawEmitterOstream(attrTypeEmitter) << entryValue; - attrTypeEmitter.emitByte(0); + attrTypeEmitter.emitByte(0, "attr/type separator"); }; auto emitAttrOrTypeImpl = [&]() -> bool { // TODO: We don't currently support custom encoded mutable types and @@ -882,7 +911,8 @@ void BytecodeWriter::writeAttrTypeSection(EncodingEmitter &emitter) { // Record the offset of this entry. uint64_t curOffset = attrTypeEmitter.size(); - offsetEmitter.emitVarIntWithFlag(curOffset - prevOffset, hasCustomEncoding); + offsetEmitter.emitVarIntWithFlag(curOffset - prevOffset, hasCustomEncoding, + "attr/type offset"); prevOffset = curOffset; }; @@ -910,30 +940,33 @@ LogicalResult BytecodeWriter::writeBlock(EncodingEmitter &emitter, // use the low bit of the operation count to indicate if the block has // arguments. unsigned numOps = numberingState.getOperationCount(block); - emitter.emitVarIntWithFlag(numOps, hasArgs); + emitter.emitVarIntWithFlag(numOps, hasArgs, "block num ops"); // Emit the arguments of the block. if (hasArgs) { - emitter.emitVarInt(args.size()); + emitter.emitVarInt(args.size(), "block args count"); for (BlockArgument arg : args) { Location argLoc = arg.getLoc(); if (config.bytecodeVersion >= bytecode::kElideUnknownBlockArgLocation) { emitter.emitVarIntWithFlag(numberingState.getNumber(arg.getType()), - !isa<UnknownLoc>(argLoc)); + !isa<UnknownLoc>(argLoc), "block arg type"); if (!isa<UnknownLoc>(argLoc)) - emitter.emitVarInt(numberingState.getNumber(argLoc)); + emitter.emitVarInt(numberingState.getNumber(argLoc), + "block arg location"); } else { - emitter.emitVarInt(numberingState.getNumber(arg.getType())); - emitter.emitVarInt(numberingState.getNumber(argLoc)); + emitter.emitVarInt(numberingState.getNumber(arg.getType()), + "block arg type"); + emitter.emitVarInt(numberingState.getNumber(argLoc), + "block arg location"); } } if (config.bytecodeVersion >= bytecode::kUseListOrdering) { uint64_t maskOffset = emitter.size(); uint8_t encodingMask = 0; - emitter.emitByte(0); + emitter.emitByte(0, "use-list separator"); writeUseListOrders(emitter, encodingMask, args); if (encodingMask) - emitter.patchByte(maskOffset, encodingMask); + emitter.patchByte(maskOffset, encodingMask, "block patch encoding"); } } @@ -945,17 +978,17 @@ LogicalResult BytecodeWriter::writeBlock(EncodingEmitter &emitter, } LogicalResult BytecodeWriter::writeOp(EncodingEmitter &emitter, Operation *op) { - emitter.emitVarInt(numberingState.getNumber(op->getName())); + emitter.emitVarInt(numberingState.getNumber(op->getName()), "op name ID"); // Emit a mask for the operation components. We need to fill this in later // (when we actually know what needs to be emitted), so emit a placeholder for // now. uint64_t maskOffset = emitter.size(); uint8_t opEncodingMask = 0; - emitter.emitByte(0); + emitter.emitByte(0, "op separator"); // Emit the location for this operation. - emitter.emitVarInt(numberingState.getNumber(op->getLoc())); + emitter.emitVarInt(numberingState.getNumber(op->getLoc()), "op location"); // Emit the attributes of this operation. DictionaryAttr attrs = op->getDiscardableAttrDictionary(); @@ -969,7 +1002,7 @@ LogicalResult BytecodeWriter::writeOp(EncodingEmitter &emitter, Operation *op) { } if (!attrs.empty()) { opEncodingMask |= bytecode::OpEncodingMask::kHasAttrs; - emitter.emitVarInt(numberingState.getNumber(attrs)); + emitter.emitVarInt(numberingState.getNumber(attrs), "op attrs count"); } // Emit the properties of this operation, for now we still support deployment @@ -978,32 +1011,32 @@ LogicalResult BytecodeWriter::writeOp(EncodingEmitter &emitter, Operation *op) { std::optional<ssize_t> propertiesId = propertiesSection.emit(op); if (propertiesId.has_value()) { opEncodingMask |= bytecode::OpEncodingMask::kHasProperties; - emitter.emitVarInt(*propertiesId); + emitter.emitVarInt(*propertiesId, "op properties ID"); } } // Emit the result types of the operation. if (unsigned numResults = op->getNumResults()) { opEncodingMask |= bytecode::OpEncodingMask::kHasResults; - emitter.emitVarInt(numResults); + emitter.emitVarInt(numResults, "op results count"); for (Type type : op->getResultTypes()) - emitter.emitVarInt(numberingState.getNumber(type)); + emitter.emitVarInt(numberingState.getNumber(type), "op result type"); } // Emit the operands of the operation. if (unsigned numOperands = op->getNumOperands()) { opEncodingMask |= bytecode::OpEncodingMask::kHasOperands; - emitter.emitVarInt(numOperands); + emitter.emitVarInt(numOperands, "op operands count"); for (Value operand : op->getOperands()) - emitter.emitVarInt(numberingState.getNumber(operand)); + emitter.emitVarInt(numberingState.getNumber(operand), "op operand types"); } // Emit the successors of the operation. if (unsigned numSuccessors = op->getNumSuccessors()) { opEncodingMask |= bytecode::OpEncodingMask::kHasSuccessors; - emitter.emitVarInt(numSuccessors); + emitter.emitVarInt(numSuccessors, "op successors count"); for (Block *successor : op->getSuccessors()) - emitter.emitVarInt(numberingState.getNumber(successor)); + emitter.emitVarInt(numberingState.getNumber(successor), "op successor"); } // Emit the use-list orders to bytecode, so we can reconstruct the same order @@ -1017,7 +1050,7 @@ LogicalResult BytecodeWriter::writeOp(EncodingEmitter &emitter, Operation *op) { opEncodingMask |= bytecode::OpEncodingMask::kHasInlineRegions; // Update the mask for the operation. - emitter.patchByte(maskOffset, opEncodingMask); + emitter.patchByte(maskOffset, opEncodingMask, "op encoding mask"); // With the mask emitted, we can now emit the regions of the operation. We do // this after mask emission to avoid offset complications that may arise by @@ -1025,7 +1058,8 @@ LogicalResult BytecodeWriter::writeOp(EncodingEmitter &emitter, Operation *op) { // op encoding mask is more annoying). if (numRegions) { bool isIsolatedFromAbove = numberingState.isIsolatedFromAbove(op); - emitter.emitVarIntWithFlag(numRegions, isIsolatedFromAbove); + emitter.emitVarIntWithFlag(numRegions, isIsolatedFromAbove, + "op regions count"); // If the region is not isolated from above, or we are emitting bytecode // targeting version <kLazyLoading, we don't use a section. @@ -1096,8 +1130,9 @@ void BytecodeWriter::writeUseListOrders(EncodingEmitter &emitter, opEncodingMask |= bytecode::OpEncodingMask::kHasUseListOrders; // Emit the number of results that have a custom use-list order if the number // of results is greater than one. - if (range.size() != 1) - emitter.emitVarInt(map.size()); + if (range.size() != 1) { + emitter.emitVarInt(map.size(), "custom use-list size"); + } for (const auto &item : map) { auto resultIdx = item.getFirst(); @@ -1113,20 +1148,22 @@ void BytecodeWriter::writeUseListOrders(EncodingEmitter &emitter, // For single result, we don't need to store the result index. if (range.size() != 1) - emitter.emitVarInt(resultIdx); + emitter.emitVarInt(resultIdx, "use-list result index"); if (indexPairEncoding) { - emitter.emitVarIntWithFlag(shuffledElements * 2, indexPairEncoding); + emitter.emitVarIntWithFlag(shuffledElements * 2, indexPairEncoding, + "use-list index pair size"); for (auto pair : llvm::enumerate(useListOrder)) { if (pair.index() != pair.value()) { - emitter.emitVarInt(pair.value()); - emitter.emitVarInt(pair.index()); + emitter.emitVarInt(pair.value(), "use-list index pair first"); + emitter.emitVarInt(pair.index(), "use-list index pair second"); } } } else { - emitter.emitVarIntWithFlag(useListOrder.size(), indexPairEncoding); + emitter.emitVarIntWithFlag(useListOrder.size(), indexPairEncoding, + "use-list size"); for (const auto &index : useListOrder) - emitter.emitVarInt(index); + emitter.emitVarInt(index, "use-list order"); } } } @@ -1136,15 +1173,15 @@ LogicalResult BytecodeWriter::writeRegion(EncodingEmitter &emitter, // If the region is empty, we only need to emit the number of blocks (which is // zero). if (region->empty()) { - emitter.emitVarInt(/*numBlocks*/ 0); + emitter.emitVarInt(/*numBlocks*/ 0, "region block count empty"); return success(); } // Emit the number of blocks and values within the region. unsigned numBlocks, numValues; std::tie(numBlocks, numValues) = numberingState.getBlockValueCount(region); - emitter.emitVarInt(numBlocks); - emitter.emitVarInt(numValues); + emitter.emitVarInt(numBlocks, "region block count"); + emitter.emitVarInt(numValues, "region value count"); // Emit the blocks within the region. for (Block &block : *region) @@ -1160,7 +1197,7 @@ LogicalResult BytecodeWriter::writeIRSection(EncodingEmitter &emitter, // Write the IR section the same way as a block with no arguments. Note that // the low-bit of the operation count for a block is used to indicate if the // block has arguments, which in this case is always false. - irEmitter.emitVarIntWithFlag(/*numOps*/ 1, /*hasArgs*/ false); + irEmitter.emitVarIntWithFlag(/*numOps*/ 1, /*hasArgs*/ false, "ir section"); // Emit the operations. if (failed(writeOp(irEmitter, op))) @@ -1189,17 +1226,17 @@ public: void buildBlob(StringRef key, ArrayRef<char> data, uint32_t dataAlignment) final { if (!shouldElideData) - emitter.emitOwnedBlobAndAlignment(data, dataAlignment); + emitter.emitOwnedBlobAndAlignment(data, dataAlignment, "resource blob"); postProcessFn(key, AsmResourceEntryKind::Blob); } void buildBool(StringRef key, bool data) final { if (!shouldElideData) - emitter.emitByte(data); + emitter.emitByte(data, "resource bool"); postProcessFn(key, AsmResourceEntryKind::Bool); } void buildString(StringRef key, StringRef data) final { if (!shouldElideData) - emitter.emitVarInt(stringSection.insert(data)); + emitter.emitVarInt(stringSection.insert(data), "resource string"); postProcessFn(key, AsmResourceEntryKind::String); } @@ -1229,12 +1266,14 @@ void BytecodeWriter::writeResourceSection(Operation *op, // Functor used to emit a resource group defined by 'key'. auto emitResourceGroup = [&](uint64_t key) { - resourceOffsetEmitter.emitVarInt(key); - resourceOffsetEmitter.emitVarInt(curResourceEntries.size()); + resourceOffsetEmitter.emitVarInt(key, "resource group key"); + resourceOffsetEmitter.emitVarInt(curResourceEntries.size(), + "resource group size"); for (auto [key, kind, size] : curResourceEntries) { - resourceOffsetEmitter.emitVarInt(stringSection.insert(key)); - resourceOffsetEmitter.emitVarInt(size); - resourceOffsetEmitter.emitByte(kind); + resourceOffsetEmitter.emitVarInt(stringSection.insert(key), + "resource key"); + resourceOffsetEmitter.emitVarInt(size, "resource size"); + resourceOffsetEmitter.emitByte(kind, "resource kind"); } }; @@ -1244,7 +1283,8 @@ void BytecodeWriter::writeResourceSection(Operation *op, config.shouldElideResourceData); // Emit the external resource entries. - resourceOffsetEmitter.emitVarInt(config.externalResourcePrinters.size()); + resourceOffsetEmitter.emitVarInt(config.externalResourcePrinters.size(), + "external resource printer count"); for (const auto &printer : config.externalResourcePrinters) { curResourceEntries.clear(); printer->buildResources(op, entryBuilder); diff --git a/mlir/lib/Target/LLVM/ROCDL/Target.cpp b/mlir/lib/Target/LLVM/ROCDL/Target.cpp index 70d6bcd..4d23f98 100644 --- a/mlir/lib/Target/LLVM/ROCDL/Target.cpp +++ b/mlir/lib/Target/LLVM/ROCDL/Target.cpp @@ -324,8 +324,7 @@ SerializeGPUModuleBase::assembleIsa(StringRef isa) { mcStreamer.reset(target->createMCObjectStreamer( triple, ctx, std::unique_ptr<llvm::MCAsmBackend>(mab), mab->createObjectWriter(os), std::unique_ptr<llvm::MCCodeEmitter>(ce), - *sti, mcOptions.MCRelaxAll, mcOptions.MCIncrementalLinkerCompatible, - /*DWARFMustBeAtTheEnd*/ false)); + *sti)); std::unique_ptr<llvm::MCAsmParser> parser( createMCAsmParser(srcMgr, ctx, *mcStreamer, *mai)); diff --git a/mlir/lib/Transforms/Utils/DialectConversion.cpp b/mlir/lib/Transforms/Utils/DialectConversion.cpp index 1e0afee..0b552a7 100644 --- a/mlir/lib/Transforms/Utils/DialectConversion.cpp +++ b/mlir/lib/Transforms/Utils/DialectConversion.cpp @@ -432,34 +432,14 @@ private: Block *insertBeforeBlock; }; -/// This structure contains the information pertaining to an argument that has -/// been converted. -struct ConvertedArgInfo { - ConvertedArgInfo(unsigned newArgIdx, unsigned newArgSize, - Value castValue = nullptr) - : newArgIdx(newArgIdx), newArgSize(newArgSize), castValue(castValue) {} - - /// The start index of in the new argument list that contains arguments that - /// replace the original. - unsigned newArgIdx; - - /// The number of arguments that replaced the original argument. - unsigned newArgSize; - - /// The cast value that was created to cast from the new arguments to the - /// old. This only used if 'newArgSize' > 1. - Value castValue; -}; - /// Block type conversion. This rewrite is partially reflected in the IR. class BlockTypeConversionRewrite : public BlockRewrite { public: - BlockTypeConversionRewrite( - ConversionPatternRewriterImpl &rewriterImpl, Block *block, - Block *origBlock, SmallVector<std::optional<ConvertedArgInfo>, 1> argInfo, - const TypeConverter *converter) + BlockTypeConversionRewrite(ConversionPatternRewriterImpl &rewriterImpl, + Block *block, Block *origBlock, + const TypeConverter *converter) : BlockRewrite(Kind::BlockTypeConversion, rewriterImpl, block), - origBlock(origBlock), argInfo(argInfo), converter(converter) {} + origBlock(origBlock), converter(converter) {} static bool classof(const IRRewrite *rewrite) { return rewrite->getKind() == Kind::BlockTypeConversion; @@ -479,10 +459,6 @@ private: /// The original block that was requested to have its signature converted. Block *origBlock; - /// The conversion information for each of the arguments. The information is - /// std::nullopt if the argument was dropped during conversion. - SmallVector<std::optional<ConvertedArgInfo>, 1> argInfo; - /// The type converter used to convert the arguments. const TypeConverter *converter; }; @@ -691,12 +667,16 @@ public: /// The type of materialization. enum MaterializationKind { /// This materialization materializes a conversion for an illegal block - /// argument type, to a legal one. + /// argument type, to the original one. Argument, /// This materialization materializes a conversion from an illegal type to a /// legal one. - Target + Target, + + /// This materialization materializes a conversion from a legal type back to + /// an illegal one. + Source }; /// An unresolved materialization, i.e., a "builtin.unrealized_conversion_cast" @@ -736,7 +716,7 @@ public: private: /// The corresponding type converter to use when resolving this /// materialization, and the kind of this materialization. - llvm::PointerIntPair<const TypeConverter *, 1, MaterializationKind> + llvm::PointerIntPair<const TypeConverter *, 2, MaterializationKind> converterAndKind; }; } // namespace @@ -855,11 +835,6 @@ struct ConversionPatternRewriterImpl : public RewriterBase::Listener { ValueRange inputs, Type outputType, const TypeConverter *converter); - Value buildUnresolvedArgumentMaterialization(Block *block, Location loc, - ValueRange inputs, - Type outputType, - const TypeConverter *converter); - Value buildUnresolvedTargetMaterialization(Location loc, Value input, Type outputType, const TypeConverter *converter); @@ -989,28 +964,6 @@ void BlockTypeConversionRewrite::commit(RewriterBase &rewriter) { dyn_cast_or_null<RewriterBase::Listener>(rewriter.getListener())) for (Operation *op : block->getUsers()) listener->notifyOperationModified(op); - - // Process the remapping for each of the original arguments. - for (auto [origArg, info] : - llvm::zip_equal(origBlock->getArguments(), argInfo)) { - // Handle the case of a 1->0 value mapping. - if (!info) { - if (Value newArg = - rewriterImpl.mapping.lookupOrNull(origArg, origArg.getType())) - rewriter.replaceAllUsesWith(origArg, newArg); - continue; - } - - // Otherwise this is a 1->1+ value mapping. - Value castValue = info->castValue; - assert(info->newArgSize >= 1 && castValue && "expected 1->1+ mapping"); - - // If the argument is still used, replace it with the generated cast. - if (!origArg.use_empty()) { - rewriter.replaceAllUsesWith(origArg, rewriterImpl.mapping.lookupOrDefault( - castValue, origArg.getType())); - } - } } void BlockTypeConversionRewrite::rollback() { @@ -1035,14 +988,12 @@ LogicalResult BlockTypeConversionRewrite::materializeLiveConversions( continue; Value replacementValue = rewriterImpl.mapping.lookupOrDefault(origArg); - bool isDroppedArg = replacementValue == origArg; - if (!isDroppedArg) - builder.setInsertionPointAfterValue(replacementValue); + assert(replacementValue && "replacement value not found"); Value newArg; if (converter) { + builder.setInsertionPointAfterValue(replacementValue); newArg = converter->materializeSourceConversion( - builder, origArg.getLoc(), origArg.getType(), - isDroppedArg ? ValueRange() : ValueRange(replacementValue)); + builder, origArg.getLoc(), origArg.getType(), replacementValue); assert((!newArg || newArg.getType() == origArg.getType()) && "materialization hook did not provide a value of the expected " "type"); @@ -1053,8 +1004,6 @@ LogicalResult BlockTypeConversionRewrite::materializeLiveConversions( << "failed to materialize conversion for block argument #" << it.index() << " that remained live after conversion, type was " << origArg.getType(); - if (!isDroppedArg) - diag << ", with target type " << replacementValue.getType(); diag.attachNote(liveUser->getLoc()) << "see existing live user here: " << *liveUser; return failure(); @@ -1340,73 +1289,64 @@ Block *ConversionPatternRewriterImpl::applySignatureConversion( // Replace all uses of the old block with the new block. block->replaceAllUsesWith(newBlock); - // Remap each of the original arguments as determined by the signature - // conversion. - SmallVector<std::optional<ConvertedArgInfo>, 1> argInfo; - argInfo.resize(origArgCount); - for (unsigned i = 0; i != origArgCount; ++i) { - auto inputMap = signatureConversion.getInputMapping(i); - if (!inputMap) - continue; BlockArgument origArg = block->getArgument(i); + Type origArgType = origArg.getType(); + + std::optional<TypeConverter::SignatureConversion::InputMapping> inputMap = + signatureConversion.getInputMapping(i); + if (!inputMap) { + // This block argument was dropped and no replacement value was provided. + // Materialize a replacement value "out of thin air". + Value repl = buildUnresolvedMaterialization( + MaterializationKind::Source, newBlock, newBlock->begin(), + origArg.getLoc(), /*inputs=*/ValueRange(), + /*outputType=*/origArgType, converter); + mapping.map(origArg, repl); + appendRewrite<ReplaceBlockArgRewrite>(block, origArg); + continue; + } - // If inputMap->replacementValue is not nullptr, then the argument is - // dropped and a replacement value is provided to be the remappedValue. - if (inputMap->replacementValue) { + if (Value repl = inputMap->replacementValue) { + // This block argument was dropped and a replacement value was provided. assert(inputMap->size == 0 && "invalid to provide a replacement value when the argument isn't " "dropped"); - mapping.map(origArg, inputMap->replacementValue); + mapping.map(origArg, repl); appendRewrite<ReplaceBlockArgRewrite>(block, origArg); continue; } - // Otherwise, this is a 1->1+ mapping. + // This is a 1->1+ mapping. 1->N mappings are not fully supported in the + // dialect conversion. Therefore, we need an argument materialization to + // turn the replacement block arguments into a single SSA value that can be + // used as a replacement. auto replArgs = newBlock->getArguments().slice(inputMap->inputNo, inputMap->size); - Value newArg; + Value argMat = buildUnresolvedMaterialization( + MaterializationKind::Argument, newBlock, newBlock->begin(), + origArg.getLoc(), /*inputs=*/replArgs, origArgType, converter); + mapping.map(origArg, argMat); + appendRewrite<ReplaceBlockArgRewrite>(block, origArg); - // If this is a 1->1 mapping and the types of new and replacement arguments - // match (i.e. it's an identity map), then the argument is mapped to its - // original type. // FIXME: We simply pass through the replacement argument if there wasn't a // converter, which isn't great as it allows implicit type conversions to // appear. We should properly restructure this code to handle cases where a // converter isn't provided and also to properly handle the case where an // argument materialization is actually a temporary source materialization // (e.g. in the case of 1->N). - if (replArgs.size() == 1 && - (!converter || replArgs[0].getType() == origArg.getType())) { - newArg = replArgs.front(); - mapping.map(origArg, newArg); - } else { - // Build argument materialization: new block arguments -> old block - // argument type. - Value argMat = buildUnresolvedArgumentMaterialization( - newBlock, origArg.getLoc(), replArgs, origArg.getType(), converter); - mapping.map(origArg, argMat); - - // Build target materialization: old block argument type -> legal type. - // Note: This function returns an "empty" type if no valid conversion to - // a legal type exists. In that case, we continue the conversion with the - // original block argument type. - Type legalOutputType = converter->convertType(origArg.getType()); - if (legalOutputType && legalOutputType != origArg.getType()) { - newArg = buildUnresolvedTargetMaterialization( - origArg.getLoc(), argMat, legalOutputType, converter); - mapping.map(argMat, newArg); - } else { - newArg = argMat; - } + Type legalOutputType; + if (converter) + legalOutputType = converter->convertType(origArgType); + if (legalOutputType && legalOutputType != origArgType) { + Value targetMat = buildUnresolvedTargetMaterialization( + origArg.getLoc(), argMat, legalOutputType, converter); + mapping.map(argMat, targetMat); } - appendRewrite<ReplaceBlockArgRewrite>(block, origArg); - argInfo[i] = ConvertedArgInfo(inputMap->inputNo, inputMap->size, newArg); } - appendRewrite<BlockTypeConversionRewrite>(newBlock, block, argInfo, - converter); + appendRewrite<BlockTypeConversionRewrite>(newBlock, block, converter); // Erase the old block. (It is just unlinked for now and will be erased during // cleanup.) @@ -1437,13 +1377,6 @@ Value ConversionPatternRewriterImpl::buildUnresolvedMaterialization( appendRewrite<UnresolvedMaterializationRewrite>(convertOp, converter, kind); return convertOp.getResult(0); } -Value ConversionPatternRewriterImpl::buildUnresolvedArgumentMaterialization( - Block *block, Location loc, ValueRange inputs, Type outputType, - const TypeConverter *converter) { - return buildUnresolvedMaterialization(MaterializationKind::Argument, block, - block->begin(), loc, inputs, outputType, - converter); -} Value ConversionPatternRewriterImpl::buildUnresolvedTargetMaterialization( Location loc, Value input, Type outputType, const TypeConverter *converter) { @@ -2862,6 +2795,10 @@ static LogicalResult legalizeUnresolvedMaterialization( newMaterialization = converter->materializeTargetConversion( rewriter, op->getLoc(), outputType, inputOperands); break; + case MaterializationKind::Source: + newMaterialization = converter->materializeSourceConversion( + rewriter, op->getLoc(), outputType, inputOperands); + break; } if (newMaterialization) { assert(newMaterialization.getType() == outputType && @@ -2874,8 +2811,8 @@ static LogicalResult legalizeUnresolvedMaterialization( InFlightDiagnostic diag = op->emitError() << "failed to legalize unresolved materialization " - "from " - << inputOperands.getTypes() << " to " << outputType + "from (" + << inputOperands.getTypes() << ") to " << outputType << " that remained live after conversion"; if (Operation *liveUser = findLiveUser(op->getUsers())) { diag.attachNote(liveUser->getLoc()) diff --git a/mlir/test/Transforms/test-legalize-type-conversion.mlir b/mlir/test/Transforms/test-legalize-type-conversion.mlir index b35cda8..8254be6 100644 --- a/mlir/test/Transforms/test-legalize-type-conversion.mlir +++ b/mlir/test/Transforms/test-legalize-type-conversion.mlir @@ -2,9 +2,8 @@ func.func @test_invalid_arg_materialization( - // expected-error@below {{failed to materialize conversion for block argument #0 that remained live after conversion, type was 'i16'}} + // expected-error@below {{failed to legalize unresolved materialization from () to 'i16' that remained live after conversion}} %arg0: i16) { - // expected-note@below {{see existing live user here}} "foo.return"(%arg0) : (i16) -> () } @@ -104,9 +103,8 @@ func.func @test_block_argument_not_converted() { // Make sure argument type changes aren't implicitly forwarded. func.func @test_signature_conversion_no_converter() { "test.signature_conversion_no_converter"() ({ - // expected-error@below {{failed to materialize conversion for block argument #0 that remained live after conversion}} + // expected-error@below {{failed to legalize unresolved materialization from ('f64') to 'f32' that remained live after conversion}} ^bb0(%arg0: f32): - // expected-note@below {{see existing live user here}} "test.type_consumer"(%arg0) : (f32) -> () "test.return"(%arg0) : (f32) -> () }) : () -> () diff --git a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel index ae17746..64d36c7 100644 --- a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel @@ -947,14 +947,7 @@ cc_library( ]) + [ # To avoid a dependency cycle. "include/llvm/Analysis/IVDescriptors.h", - "include/llvm/CodeGen/GenVT.inc", - ] + glob( - # To avoid a dependency cycle. - [ - "include/llvm/CodeGen/**/*.h", - "include/llvm/CodeGenTypes/**/*.h", - ], - ), + ], hdrs = glob( [ "include/llvm/*.h", |