aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAmir Ayupov <aaupov@fb.com>2024-09-26 12:25:43 -0700
committerAmir Ayupov <aaupov@fb.com>2024-09-26 12:25:43 -0700
commit648f2bbea6af7f8a6313ecf4a7dc15579bea53fb (patch)
treed4df07841adc5cbb0bc0c4d00c2a76e4a2a32ac0
parent41e1fa020cfe334c337757390ac648cae047641e (diff)
parent70ef5eb6f087524dc952a8f5249b79f4a4000e04 (diff)
downloadllvm-users/shawbyoung/spr/main.bolt-match-blocks-with-pseudo-probes.zip
llvm-users/shawbyoung/spr/main.bolt-match-blocks-with-pseudo-probes.tar.gz
llvm-users/shawbyoung/spr/main.bolt-match-blocks-with-pseudo-probes.tar.bz2
[𝘀𝗽𝗿] changes introduced through rebaseusers/shawbyoung/spr/main.bolt-match-blocks-with-pseudo-probes
Created using spr 1.3.4 [skip ci]
-rw-r--r--.github/workflows/release-binaries.yml15
-rw-r--r--bolt/include/bolt/Profile/ProfileYAMLMapping.h14
-rw-r--r--bolt/include/bolt/Profile/YAMLProfileReader.h4
-rw-r--r--bolt/include/bolt/Profile/YAMLProfileWriter.h2
-rw-r--r--bolt/lib/Profile/YAMLProfileReader.cpp13
-rw-r--r--bolt/lib/Profile/YAMLProfileWriter.cpp5
-rw-r--r--bolt/test/X86/pseudoprobe-decoding-inline.test4
-rw-r--r--bolt/test/X86/pseudoprobe-decoding-noinline.test41
-rw-r--r--clang-tools-extra/docs/clang-tidy/checks/bugprone/pointer-arithmetic-on-polymorphic-object.rst28
-rw-r--r--clang/include/clang/Basic/Builtins.td7
-rw-r--r--clang/include/clang/Driver/Options.td4
-rw-r--r--clang/lib/Basic/Targets/RISCV.cpp2
-rw-r--r--clang/lib/CodeGen/CGBuiltin.cpp10
-rw-r--r--clang/lib/CodeGen/CGHLSLRuntime.h1
-rw-r--r--clang/lib/Driver/ToolChains/Gnu.cpp29
-rw-r--r--clang/lib/Headers/hlsl/hlsl_intrinsics.h33
-rw-r--r--clang/lib/Sema/SemaHLSL.cpp12
-rw-r--r--clang/lib/Sema/SemaInit.cpp2
-rw-r--r--clang/test/AST/HLSL/vector-constructors.hlsl3
-rw-r--r--clang/test/ClangScanDeps/implicit-target.c31
-rw-r--r--clang/test/CodeGenHLSL/builtins/step.hlsl84
-rw-r--r--clang/test/Driver/cl-link.c12
-rw-r--r--clang/test/Driver/windows-cross.c2
-rw-r--r--clang/test/Preprocessor/riscv-cmodel.c9
-rw-r--r--clang/test/SemaHLSL/BuiltIns/step-errors.hlsl31
-rw-r--r--clang/tools/clang-scan-deps/CMakeLists.txt1
-rw-r--r--clang/tools/clang-scan-deps/ClangScanDeps.cpp5
-rw-r--r--clang/utils/TableGen/ClangASTPropertiesEmitter.cpp18
-rw-r--r--clang/utils/TableGen/TableGenBackends.h12
-rw-r--r--compiler-rt/cmake/base-config-ix.cmake2
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_common_interface.inc1
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_interface_internal.h5
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_tls_get_addr.cpp28
-rw-r--r--compiler-rt/lib/sanitizer_common/weak_symbols.txt1
-rw-r--r--compiler-rt/test/lit.common.cfg.py3
-rw-r--r--compiler-rt/test/sanitizer_common/TestCases/Linux/tls_get_addr.c52
-rw-r--r--compiler-rt/test/sanitizer_common/TestCases/dlsym_alloc.c2
-rw-r--r--cross-project-tests/lit.cfg.py14
-rw-r--r--cross-project-tests/lit.site.cfg.py.in4
-rw-r--r--flang/include/flang/Semantics/expression.h2
-rw-r--r--flang/lib/Evaluate/intrinsics.cpp4
-rw-r--r--flang/lib/Lower/OpenMP/ClauseProcessor.cpp12
-rw-r--r--flang/lib/Lower/OpenMP/ClauseProcessor.h1
-rw-r--r--flang/lib/Lower/OpenMP/OpenMP.cpp4
-rw-r--r--flang/lib/Semantics/check-call.cpp8
-rw-r--r--flang/lib/Semantics/expression.cpp11
-rw-r--r--flang/runtime/edit-input.cpp24
-rw-r--r--flang/test/Lower/OpenMP/simd.f9018
-rw-r--r--flang/test/Semantics/elemental02.f9013
-rw-r--r--flang/test/Semantics/expr-errors06.f908
-rw-r--r--flang/test/Semantics/kinds06.f904
-rw-r--r--flang/unittests/Runtime/Namelist.cpp29
-rw-r--r--libcxx/.clang-format1
-rw-r--r--libcxx/include/CMakeLists.txt2
-rw-r--r--libcxx/include/__algorithm/adjacent_find.h6
-rw-r--r--libcxx/include/__algorithm/all_of.h2
-rw-r--r--libcxx/include/__algorithm/any_of.h2
-rw-r--r--libcxx/include/__algorithm/binary_search.h4
-rw-r--r--libcxx/include/__algorithm/count.h2
-rw-r--r--libcxx/include/__algorithm/count_if.h2
-rw-r--r--libcxx/include/__algorithm/equal.h16
-rw-r--r--libcxx/include/__algorithm/equal_range.h4
-rw-r--r--libcxx/include/__algorithm/find.h2
-rw-r--r--libcxx/include/__algorithm/find_end.h6
-rw-r--r--libcxx/include/__algorithm/find_first_of.h4
-rw-r--r--libcxx/include/__algorithm/find_if.h2
-rw-r--r--libcxx/include/__algorithm/find_if_not.h2
-rw-r--r--libcxx/include/__algorithm/includes.h4
-rw-r--r--libcxx/include/__algorithm/is_heap.h4
-rw-r--r--libcxx/include/__algorithm/is_heap_until.h4
-rw-r--r--libcxx/include/__algorithm/is_partitioned.h2
-rw-r--r--libcxx/include/__algorithm/is_permutation.h10
-rw-r--r--libcxx/include/__algorithm/is_sorted.h4
-rw-r--r--libcxx/include/__algorithm/is_sorted_until.h4
-rw-r--r--libcxx/include/__algorithm/lexicographical_compare.h4
-rw-r--r--libcxx/include/__algorithm/lower_bound.h10
-rw-r--r--libcxx/include/__algorithm/max.h8
-rw-r--r--libcxx/include/__algorithm/max_element.h4
-rw-r--r--libcxx/include/__algorithm/min.h8
-rw-r--r--libcxx/include/__algorithm/min_element.h4
-rw-r--r--libcxx/include/__algorithm/minmax.h8
-rw-r--r--libcxx/include/__algorithm/minmax_element.h4
-rw-r--r--libcxx/include/__algorithm/mismatch.h22
-rw-r--r--libcxx/include/__algorithm/none_of.h2
-rw-r--r--libcxx/include/__algorithm/pstl.h2
-rw-r--r--libcxx/include/__algorithm/remove.h2
-rw-r--r--libcxx/include/__algorithm/remove_if.h2
-rw-r--r--libcxx/include/__algorithm/search.h4
-rw-r--r--libcxx/include/__algorithm/search_n.h4
-rw-r--r--libcxx/include/__algorithm/set_intersection.h6
-rw-r--r--libcxx/include/__algorithm/simd_utils.h8
-rw-r--r--libcxx/include/__algorithm/unique.h6
-rw-r--r--libcxx/include/__algorithm/upper_bound.h4
-rw-r--r--libcxx/include/__bit/bit_cast.h2
-rw-r--r--libcxx/include/__bit/countl.h6
-rw-r--r--libcxx/include/__bit/countr.h8
-rw-r--r--libcxx/include/__chrono/leap_second.h4
-rw-r--r--libcxx/include/__config9
-rw-r--r--libcxx/include/__filesystem/path.h2
-rw-r--r--libcxx/include/__functional/identity.h2
-rw-r--r--libcxx/include/__fwd/get.h24
-rw-r--r--libcxx/include/__fwd/variant.h77
-rw-r--r--libcxx/include/__math/abs.h8
-rw-r--r--libcxx/include/__math/copysign.h6
-rw-r--r--libcxx/include/__math/min_max.h16
-rw-r--r--libcxx/include/__math/roots.h8
-rw-r--r--libcxx/include/__math/rounding_functions.h48
-rw-r--r--libcxx/include/__math/traits.h66
-rw-r--r--libcxx/include/__memory/allocate_at_least.h2
-rw-r--r--libcxx/include/__memory/allocator.h4
-rw-r--r--libcxx/include/__memory/allocator_traits.h6
-rw-r--r--libcxx/include/__memory/assume_aligned.h2
-rw-r--r--libcxx/include/__memory/temporary_buffer.h2
-rw-r--r--libcxx/include/__mutex/lock_guard.h4
-rw-r--r--libcxx/include/__mutex/unique_lock.h16
-rw-r--r--libcxx/include/__ranges/elements_view.h2
-rw-r--r--libcxx/include/__utility/forward.h4
-rw-r--r--libcxx/include/__utility/move.h4
-rw-r--r--libcxx/include/array4
-rw-r--r--libcxx/include/barrier4
-rw-r--r--libcxx/include/deque2
-rw-r--r--libcxx/include/forward_list2
-rw-r--r--libcxx/include/future4
-rw-r--r--libcxx/include/limits126
-rw-r--r--libcxx/include/list2
-rw-r--r--libcxx/include/map4
-rw-r--r--libcxx/include/math.h8
-rw-r--r--libcxx/include/module.modulemap10
-rw-r--r--libcxx/include/new22
-rw-r--r--libcxx/include/queue8
-rw-r--r--libcxx/include/regex2
-rw-r--r--libcxx/include/scoped_allocator4
-rw-r--r--libcxx/include/set4
-rw-r--r--libcxx/include/stack4
-rw-r--r--libcxx/include/stdlib.h10
-rw-r--r--libcxx/include/string2
-rw-r--r--libcxx/include/string_view2
-rw-r--r--libcxx/include/type_traits1
-rw-r--r--libcxx/include/unordered_map4
-rw-r--r--libcxx/include/unordered_set4
-rw-r--r--libcxx/include/variant23
-rw-r--r--libcxx/include/vector4
-rw-r--r--libcxx/test/std/containers/sequences/vector.bool/enabled_hash.pass.cpp4
-rw-r--r--libcxx/test/std/diagnostics/syserr/syserr.hash/enabled_hash.pass.cpp4
-rw-r--r--libcxx/test/std/experimental/memory/memory.observer.ptr/hash.pass.cpp2
-rw-r--r--libcxx/test/std/input.output/filesystems/class.path/path.member/path.hash_enabled.pass.cpp2
-rw-r--r--libcxx/test/std/numerics/c.math/signbit.pass.cpp13
-rw-r--r--libcxx/test/std/strings/basic.string.hash/enabled_hashes.pass.cpp16
-rw-r--r--libcxx/test/std/strings/string.view/string.view.hash/enabled_hashes.pass.cpp14
-rw-r--r--libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.id/enabled_hashes.pass.cpp2
-rw-r--r--libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.hash/hash_shared_ptr.pass.cpp4
-rw-r--r--libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.hash/hash_unique_ptr.pass.cpp8
-rw-r--r--libcxx/test/std/utilities/optional/optional.hash/hash.pass.cpp16
-rw-r--r--libcxx/test/std/utilities/template.bitset/bitset.hash/enabled_hash.pass.cpp8
-rw-r--r--libcxx/test/std/utilities/type.index/type.index.synopsis/hash_type_index.pass.cpp2
-rw-r--r--libcxx/test/std/utilities/variant/variant.hash/hash.pass.cpp14
-rw-r--r--libcxx/test/support/poisoned_hash_helper.h185
-rw-r--r--libcxx/test/support/test.support/test_poisoned_hash_helper.pass.cpp33
-rw-r--r--lld/COFF/DLL.cpp52
-rw-r--r--lld/COFF/DLL.h1
-rw-r--r--lld/COFF/Driver.cpp1
-rw-r--r--lld/COFF/InputFiles.cpp26
-rw-r--r--lld/COFF/InputFiles.h4
-rw-r--r--lld/COFF/SymbolTable.cpp7
-rw-r--r--lld/COFF/SymbolTable.h3
-rw-r--r--lld/COFF/Symbols.h12
-rw-r--r--lld/COFF/Writer.cpp19
-rw-r--r--lld/test/COFF/Inputs/loadconfig-arm64ec.s2
-rw-r--r--lld/test/COFF/arm64ec-import.test31
-rw-r--r--lldb/include/lldb/Core/SourceManager.h4
-rw-r--r--lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp16
-rw-r--r--lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h31
-rw-r--r--lldb/test/API/lit.cfg.py5
-rw-r--r--lldb/test/API/lit.site.cfg.py.in8
-rw-r--r--lldb/test/Shell/SymbolFile/DWARF/no_unique_address-with-bitfields.cpp96
-rw-r--r--lldb/test/Shell/helper/toolchain.py5
-rw-r--r--lldb/test/Shell/lit.site.cfg.py.in9
-rw-r--r--llvm/CMakeLists.txt4
-rw-r--r--llvm/include/llvm/ADT/DenseMap.h37
-rw-r--r--llvm/include/llvm/Analysis/CtxProfAnalysis.h1
-rw-r--r--llvm/include/llvm/Analysis/ValueTracking.h3
-rw-r--r--llvm/include/llvm/IR/IntrinsicsDirectX.td2
-rw-r--r--llvm/include/llvm/IR/IntrinsicsSPIRV.td1
-rw-r--r--llvm/include/llvm/InitializePasses.h1
-rw-r--r--llvm/include/llvm/LinkAllPasses.h1
-rw-r--r--llvm/include/llvm/ProfileData/PGOCtxProfReader.h15
-rw-r--r--llvm/include/llvm/SandboxIR/SandboxIR.h81
-rw-r--r--llvm/include/llvm/SandboxIR/SandboxIRValues.def3
-rw-r--r--llvm/include/llvm/Transforms/IPO/Attributor.h4
-rw-r--r--llvm/include/llvm/Transforms/InstCombine/InstCombiner.h3
-rw-r--r--llvm/include/llvm/Transforms/Scalar/Scalarizer.h5
-rw-r--r--llvm/lib/Analysis/ValueTracking.cpp37
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp10
-rw-r--r--llvm/lib/ProfileData/PGOCtxProfReader.cpp8
-rw-r--r--llvm/lib/SandboxIR/SandboxIR.cpp14
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp19
-rw-r--r--llvm/lib/Target/DirectX/DXContainerGlobals.cpp56
-rw-r--r--llvm/lib/Target/DirectX/DXILFinalizeLinkage.cpp5
-rw-r--r--llvm/lib/Target/DirectX/DXILFinalizeLinkage.h1
-rw-r--r--llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp31
-rw-r--r--llvm/lib/Target/DirectX/DXILIntrinsicExpansion.h1
-rw-r--r--llvm/lib/Target/DirectX/DXILPrepare.cpp2
-rw-r--r--llvm/lib/Target/DirectX/DirectXTargetMachine.cpp6
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchISelLowering.h2
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp52
-rw-r--r--llvm/lib/Target/RISCV/RISCVFeatures.td5
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfo.td26
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoF.td35
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td12
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp24
-rw-r--r--llvm/lib/Target/X86/X86ScheduleZnver4.td14
-rw-r--r--llvm/lib/Transforms/Coroutines/CMakeLists.txt1
-rw-r--r--llvm/lib/Transforms/Coroutines/CoroFrame.cpp298
-rw-r--r--llvm/lib/Transforms/Coroutines/CoroInternal.h1
-rw-r--r--llvm/lib/Transforms/Coroutines/CoroSplit.cpp18
-rw-r--r--llvm/lib/Transforms/Coroutines/Coroutines.cpp4
-rw-r--r--llvm/lib/Transforms/Coroutines/MaterializationUtils.cpp308
-rw-r--r--llvm/lib/Transforms/Coroutines/MaterializationUtils.h30
-rw-r--r--llvm/lib/Transforms/Coroutines/SpillUtils.cpp6
-rw-r--r--llvm/lib/Transforms/Coroutines/SpillUtils.h2
-rw-r--r--llvm/lib/Transforms/Coroutines/SuspendCrossingInfo.cpp12
-rw-r--r--llvm/lib/Transforms/IPO/AttributorAttributes.cpp17
-rw-r--r--llvm/lib/Transforms/IPO/FunctionImport.cpp2
-rw-r--r--llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp38
-rw-r--r--llvm/lib/Transforms/Scalar/Scalar.cpp1
-rw-r--r--llvm/lib/Transforms/Scalar/Scalarizer.cpp39
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h3
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp14
-rw-r--r--llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp3
-rw-r--r--llvm/test/Analysis/CostModel/AArch64/reduce-fadd.ll42
-rw-r--r--llvm/test/CodeGen/DirectX/ContainerData/PSVResources.ll93
-rw-r--r--llvm/test/CodeGen/DirectX/acos.ll25
-rw-r--r--llvm/test/CodeGen/DirectX/asin.ll25
-rw-r--r--llvm/test/CodeGen/DirectX/atan.ll25
-rw-r--r--llvm/test/CodeGen/DirectX/ceil.ll21
-rw-r--r--llvm/test/CodeGen/DirectX/cos.ll21
-rw-r--r--llvm/test/CodeGen/DirectX/cosh.ll25
-rw-r--r--llvm/test/CodeGen/DirectX/exp2.ll50
-rw-r--r--llvm/test/CodeGen/DirectX/fabs.ll22
-rw-r--r--llvm/test/CodeGen/DirectX/floor.ll25
-rw-r--r--llvm/test/CodeGen/DirectX/isinf.ll24
-rw-r--r--llvm/test/CodeGen/DirectX/llc-pipeline.ll24
-rw-r--r--llvm/test/CodeGen/DirectX/reversebits.ll21
-rw-r--r--llvm/test/CodeGen/DirectX/round.ll22
-rw-r--r--llvm/test/CodeGen/DirectX/saturate.ll17
-rw-r--r--llvm/test/CodeGen/DirectX/scalar-store.ll17
-rw-r--r--llvm/test/CodeGen/DirectX/scalarize-two-calls.ll25
-rw-r--r--llvm/test/CodeGen/DirectX/sin.ll44
-rw-r--r--llvm/test/CodeGen/DirectX/sinh.ll25
-rw-r--r--llvm/test/CodeGen/DirectX/sqrt.ll25
-rw-r--r--llvm/test/CodeGen/DirectX/step.ll78
-rw-r--r--llvm/test/CodeGen/DirectX/tan.ll21
-rw-r--r--llvm/test/CodeGen/DirectX/tanh.ll21
-rw-r--r--llvm/test/CodeGen/DirectX/trunc.ll21
-rw-r--r--llvm/test/CodeGen/LoongArch/fp16-promote.ll198
-rw-r--r--llvm/test/CodeGen/NVPTX/lower-alloca.ll20
-rw-r--r--llvm/test/CodeGen/RISCV/double-fcmp-strict.ll96
-rw-r--r--llvm/test/CodeGen/RISCV/float-fcmp-strict.ll48
-rw-r--r--llvm/test/CodeGen/RISCV/half-fcmp-strict.ll96
-rw-r--r--llvm/test/CodeGen/RISCV/xcvalu.ll4
-rw-r--r--llvm/test/CodeGen/SPIRV/hlsl-intrinsics/step.ll33
-rw-r--r--llvm/test/MC/RISCV/corev/XCValu-valid.s32
-rw-r--r--llvm/test/MC/RISCV/csr-aliases.s26
-rw-r--r--llvm/test/MC/RISCV/rv32ih-aliases-valid.s30
-rw-r--r--llvm/test/MC/RISCV/rvf-aliases-valid.s3
-rw-r--r--llvm/test/MC/RISCV/rvi-aliases-valid.s4
-rw-r--r--llvm/test/MC/RISCV/rvzfinx-aliases-valid.s65
-rw-r--r--llvm/test/Transforms/Coroutines/gh107139-split-in-scc.ll38
-rw-r--r--llvm/test/Transforms/InstCombine/fmod.ll128
-rw-r--r--llvm/test/Transforms/InstCombine/pow-1.ll24
-rw-r--r--llvm/test/Transforms/InstCombine/rem.ll103
-rw-r--r--llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll2
-rw-r--r--llvm/test/Transforms/SLPVectorizer/AArch64/reduce-fadd.ll865
-rw-r--r--llvm/test/Transforms/SLPVectorizer/X86/multi-extracts-bv-combined.ll38
-rw-r--r--llvm/test/tools/llvm-dwp/X86/cu_tu_units_manual_v4.s2
-rw-r--r--llvm/test/tools/llvm-dwp/X86/cu_tu_units_manual_v5.s2
-rw-r--r--llvm/test/tools/llvm-dwp/X86/cu_tu_units_manual_v5_invalid.s2
-rw-r--r--llvm/test/tools/llvm-dwp/X86/debug_macro_v5.s2
-rw-r--r--llvm/test/tools/llvm-dwp/X86/type_dedup.test4
-rw-r--r--llvm/test/tools/llvm-mca/X86/Znver4/zero-idioms.s374
-rw-r--r--llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp2
-rw-r--r--llvm/tools/opt/optdriver.cpp1
-rw-r--r--llvm/unittests/ProfileData/PGOCtxProfReaderWriterTest.cpp1
-rw-r--r--llvm/unittests/SandboxIR/SandboxIRTest.cpp66
-rw-r--r--llvm/unittests/SandboxIR/TrackerTest.cpp37
-rw-r--r--llvm/utils/gn/build/toolchain/target_flags.gni2
-rw-r--r--llvm/utils/gn/secondary/compiler-rt/lib/BUILD.gn10
-rw-r--r--llvm/utils/gn/secondary/compiler-rt/lib/asan/BUILD.gn19
-rw-r--r--llvm/utils/gn/secondary/libcxx/include/BUILD.gn2
-rw-r--r--llvm/utils/gn/secondary/llvm/lib/Transforms/Coroutines/BUILD.gn1
-rw-r--r--mlir/cmake/modules/MLIRDetectPythonEnv.cmake2
-rw-r--r--mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp29
-rw-r--r--mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp8
-rw-r--r--mlir/lib/Dialect/Transform/Interfaces/TransformInterfaces.cpp10
-rw-r--r--mlir/test/CMakeLists.txt4
-rw-r--r--mlir/test/Conversion/AMDGPUToROCDL/wmma.mlir6
-rw-r--r--mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir76
-rw-r--r--utils/bazel/llvm-project-overlay/mlir/BUILD.bazel1
298 files changed, 4928 insertions, 1801 deletions
diff --git a/.github/workflows/release-binaries.yml b/.github/workflows/release-binaries.yml
index 672dd75..925912d 100644
--- a/.github/workflows/release-binaries.yml
+++ b/.github/workflows/release-binaries.yml
@@ -442,11 +442,22 @@ jobs:
name: ${{ needs.prepare.outputs.release-binary-filename }}-attestation
path: ${{ needs.prepare.outputs.release-binary-filename }}.jsonl
+ - name: Checkout Release Scripts
+ uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
+ with:
+ sparse-checkout: |
+ llvm/utils/release/github-upload-release.py
+ llvm/utils/git/requirements.txt
+ sparse-checkout-cone-mode: false
+
+ - name: Install Python Requirements
+ run: |
+ pip install --require-hashes -r ./llvm/utils/git/requirements.txt
+
- name: Upload Release
shell: bash
run: |
- sudo apt install python3-github
- ./llvm-project/llvm/utils/release/github-upload-release.py \
+ ./llvm/utils/release/github-upload-release.py \
--token ${{ github.token }} \
--release ${{ needs.prepare.outputs.release-version }} \
upload \
diff --git a/bolt/include/bolt/Profile/ProfileYAMLMapping.h b/bolt/include/bolt/Profile/ProfileYAMLMapping.h
index 91955af..9865118 100644
--- a/bolt/include/bolt/Profile/ProfileYAMLMapping.h
+++ b/bolt/include/bolt/Profile/ProfileYAMLMapping.h
@@ -270,12 +270,12 @@ template <> struct MappingTraits<bolt::BinaryProfileHeader> {
};
namespace bolt {
-struct PseudoProbeDesc {
+struct ProfilePseudoProbeDesc {
std::vector<Hex64> GUID;
std::vector<Hex64> Hash;
std::vector<uint32_t> GUIDHashIdx; // Index of hash for that GUID in Hash
- bool operator==(const PseudoProbeDesc &Other) const {
+ bool operator==(const ProfilePseudoProbeDesc &Other) const {
// Only treat empty Desc as equal
return GUID.empty() && Other.GUID.empty() && Hash.empty() &&
Other.Hash.empty() && GUIDHashIdx.empty() &&
@@ -284,8 +284,8 @@ struct PseudoProbeDesc {
};
} // end namespace bolt
-template <> struct MappingTraits<bolt::PseudoProbeDesc> {
- static void mapping(IO &YamlIO, bolt::PseudoProbeDesc &PD) {
+template <> struct MappingTraits<bolt::ProfilePseudoProbeDesc> {
+ static void mapping(IO &YamlIO, bolt::ProfilePseudoProbeDesc &PD) {
YamlIO.mapRequired("gs", PD.GUID);
YamlIO.mapRequired("gh", PD.GUIDHashIdx);
YamlIO.mapRequired("hs", PD.Hash);
@@ -295,7 +295,7 @@ template <> struct MappingTraits<bolt::PseudoProbeDesc> {
} // end namespace llvm
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::bolt::BinaryFunctionProfile)
-LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::bolt::PseudoProbeDesc)
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::bolt::ProfilePseudoProbeDesc)
namespace llvm {
namespace yaml {
@@ -304,7 +304,7 @@ namespace bolt {
struct BinaryProfile {
BinaryProfileHeader Header;
std::vector<BinaryFunctionProfile> Functions;
- PseudoProbeDesc PseudoProbeDesc;
+ ProfilePseudoProbeDesc PseudoProbeDesc;
};
} // namespace bolt
@@ -313,7 +313,7 @@ template <> struct MappingTraits<bolt::BinaryProfile> {
YamlIO.mapRequired("header", BP.Header);
YamlIO.mapRequired("functions", BP.Functions);
YamlIO.mapOptional("pseudo_probe_desc", BP.PseudoProbeDesc,
- bolt::PseudoProbeDesc());
+ bolt::ProfilePseudoProbeDesc());
}
};
diff --git a/bolt/include/bolt/Profile/YAMLProfileReader.h b/bolt/include/bolt/Profile/YAMLProfileReader.h
index bd5a86f..a6f0fd6 100644
--- a/bolt/include/bolt/Profile/YAMLProfileReader.h
+++ b/bolt/include/bolt/Profile/YAMLProfileReader.h
@@ -105,7 +105,7 @@ private:
yaml::bolt::BinaryProfile YamlBP;
/// Map a function ID from a YAML profile to a BinaryFunction object.
- std::vector<BinaryFunction *> YamlProfileToFunction;
+ DenseMap<uint32_t, BinaryFunction *> YamlProfileToFunction;
using FunctionSet = std::unordered_set<const BinaryFunction *>;
/// To keep track of functions that have a matched profile before the profile
@@ -162,8 +162,6 @@ private:
/// Update matched YAML -> BinaryFunction pair.
void matchProfileToFunction(yaml::bolt::BinaryFunctionProfile &YamlBF,
BinaryFunction &BF) {
- if (YamlBF.Id >= YamlProfileToFunction.size())
- YamlProfileToFunction.resize(YamlBF.Id + 1);
YamlProfileToFunction[YamlBF.Id] = &BF;
YamlBF.Used = true;
diff --git a/bolt/include/bolt/Profile/YAMLProfileWriter.h b/bolt/include/bolt/Profile/YAMLProfileWriter.h
index aec6e47..d4d7217 100644
--- a/bolt/include/bolt/Profile/YAMLProfileWriter.h
+++ b/bolt/include/bolt/Profile/YAMLProfileWriter.h
@@ -47,7 +47,7 @@ public:
convertBFInlineTree(const MCPseudoProbeDecoder &Decoder,
const InlineTreeDesc &InlineTree, uint64_t GUID);
- static std::tuple<yaml::bolt::PseudoProbeDesc, InlineTreeDesc>
+ static std::tuple<yaml::bolt::ProfilePseudoProbeDesc, InlineTreeDesc>
convertPseudoProbeDesc(const MCPseudoProbeDecoder &PseudoProbeDecoder);
static yaml::bolt::BinaryFunctionProfile
diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp
index 67ed320..3bd0950 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -238,9 +238,7 @@ bool YAMLProfileReader::parseFunctionProfile(
BB.setExecutionCount(YamlBB.ExecCount);
for (const yaml::bolt::CallSiteInfo &YamlCSI : YamlBB.CallSites) {
- BinaryFunction *Callee = YamlCSI.DestId < YamlProfileToFunction.size()
- ? YamlProfileToFunction[YamlCSI.DestId]
- : nullptr;
+ BinaryFunction *Callee = YamlProfileToFunction.lookup(YamlCSI.DestId);
bool IsFunction = Callee ? true : false;
MCSymbol *CalleeSymbol = nullptr;
if (IsFunction)
@@ -707,7 +705,7 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
break;
}
}
- YamlProfileToFunction.resize(YamlBP.Functions.size() + 1);
+ YamlProfileToFunction.reserve(YamlBP.Functions.size());
// Computes hash for binary functions.
if (opts::MatchProfileWithFunctionHash) {
@@ -760,12 +758,7 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
NormalizeByCalls = usesEvent("branches");
uint64_t NumUnused = 0;
for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions) {
- if (YamlBF.Id >= YamlProfileToFunction.size()) {
- // Such profile was ignored.
- ++NumUnused;
- continue;
- }
- if (BinaryFunction *BF = YamlProfileToFunction[YamlBF.Id])
+ if (BinaryFunction *BF = YamlProfileToFunction.lookup(YamlBF.Id))
parseFunctionProfile(*BF, YamlBF);
else
++NumUnused;
diff --git a/bolt/lib/Profile/YAMLProfileWriter.cpp b/bolt/lib/Profile/YAMLProfileWriter.cpp
index 44600c3..4437be4 100644
--- a/bolt/lib/Profile/YAMLProfileWriter.cpp
+++ b/bolt/lib/Profile/YAMLProfileWriter.cpp
@@ -81,9 +81,10 @@ YAMLProfileWriter::collectInlineTree(
return InlineTree;
}
-std::tuple<yaml::bolt::PseudoProbeDesc, YAMLProfileWriter::InlineTreeDesc>
+std::tuple<yaml::bolt::ProfilePseudoProbeDesc,
+ YAMLProfileWriter::InlineTreeDesc>
YAMLProfileWriter::convertPseudoProbeDesc(const MCPseudoProbeDecoder &Decoder) {
- yaml::bolt::PseudoProbeDesc Desc;
+ yaml::bolt::ProfilePseudoProbeDesc Desc;
InlineTreeDesc InlineTree;
for (const MCDecodedPseudoProbeInlineTree &TopLev :
diff --git a/bolt/test/X86/pseudoprobe-decoding-inline.test b/bolt/test/X86/pseudoprobe-decoding-inline.test
index ec11725..e5e8aad 100644
--- a/bolt/test/X86/pseudoprobe-decoding-inline.test
+++ b/bolt/test/X86/pseudoprobe-decoding-inline.test
@@ -34,8 +34,8 @@
#
## Check that without --profile-write-pseudo-probes option, no pseudo probes are
## generated
-# RUN: perf2bolt %S/../../../llvm/test/tools/llvm-profgen/Inputs/inline-cs-pseudoprobe.perfbin -p %t.preagg --pa -w %t.yaml -o %t.fdata
-# RUN: FileCheck --input-file %t.yaml %s --check-prefix CHECK-NO-OPT
+# RUN: perf2bolt %S/../../../llvm/test/tools/llvm-profgen/Inputs/inline-cs-pseudoprobe.perfbin -p %t.preagg --pa -w %t.yaml3 -o %t.fdata
+# RUN: FileCheck --input-file %t.yaml3 %s --check-prefix CHECK-NO-OPT
# CHECK-NO-OPT-NOT: probes:
# CHECK-NO-OPT-NOT: inline_tree:
# CHECK-NO-OPT-NOT: pseudo_probe_desc:
diff --git a/bolt/test/X86/pseudoprobe-decoding-noinline.test b/bolt/test/X86/pseudoprobe-decoding-noinline.test
index 5dd6c2e..36a2fab 100644
--- a/bolt/test/X86/pseudoprobe-decoding-noinline.test
+++ b/bolt/test/X86/pseudoprobe-decoding-noinline.test
@@ -1,6 +1,45 @@
# REQUIRES: system-linux
-# RUN: llvm-bolt %S/../../../llvm/test/tools/llvm-profgen/Inputs/noinline-cs-pseudoprobe.perfbin --print-pseudo-probes=all -o %t.bolt 2>&1 | FileCheck %s
+# RUN: llvm-bolt %S/../../../llvm/test/tools/llvm-profgen/Inputs/noinline-cs-pseudoprobe.perfbin --print-pseudo-probes=all -o %t.bolt --lite=0 --enable-bat 2>&1 | FileCheck %s
+# PREAGG: B X:0 #foo# 1 0
+# PREAGG: B X:0 #bar# 1 0
+# PREAGG: B X:0 #main# 1 0
+
+## Check pseudo-probes in regular YAML profile (non-BOLTed binary)
+# RUN: link_fdata %s %S/../../../llvm/test/tools/llvm-profgen/Inputs/noinline-cs-pseudoprobe.perfbin %t.preagg PREAGG
+# RUN: perf2bolt %S/../../../llvm/test/tools/llvm-profgen/Inputs/noinline-cs-pseudoprobe.perfbin -p %t.preagg --pa -w %t.yaml -o %t.fdata --profile-write-pseudo-probes
+# RUN: FileCheck --input-file %t.yaml %s --check-prefix CHECK-YAML
+## Check pseudo-probes in BAT YAML profile (BOLTed binary)
+# RUN: link_fdata %s %t.bolt %t.preagg2 PREAGG
+# RUN: perf2bolt %t.bolt -p %t.preagg2 --pa -w %t.yaml2 -o %t.fdata2 --profile-write-pseudo-probes
+# RUN: FileCheck --input-file %t.yaml2 %s --check-prefix CHECK-YAML
+# CHECK-YAML: name: bar
+# CHECK-YAML: - bid: 0
+# CHECK-YAML: probes: [ { blx: 9 } ]
+# CHECK-YAML: inline_tree: [ { } ]
+#
+# CHECK-YAML: name: foo
+# CHECK-YAML: - bid: 0
+# CHECK-YAML: probes: [ { blx: 3 } ]
+# CHECK-YAML: inline_tree: [ { g: 2 } ]
+#
+# CHECK-YAML: name: main
+# CHECK-YAML: - bid: 0
+# CHECK-YAML: probes: [ { blx: 1, call: [ 2 ] } ]
+# CHECK-YAML: inline_tree: [ { g: 1 } ]
+#
+# CHECK-YAML: pseudo_probe_desc:
+# CHECK-YAML-NEXT: gs: [ 0xE413754A191DB537, 0xDB956436E78DD5FA, 0x5CF8C24CDB18BDAC ]
+# CHECK-YAML-NEXT: gh: [ 2, 1, 0 ]
+# CHECK-YAML-NEXT: hs: [ 0x200205A19C5B4, 0x10000FFFFFFFF, 0x10E852DA94 ]
+#
+## Check that without --profile-write-pseudo-probes option, no pseudo probes are
+## generated
+# RUN: perf2bolt %S/../../../llvm/test/tools/llvm-profgen/Inputs/noinline-cs-pseudoprobe.perfbin -p %t.preagg --pa -w %t.yaml3 -o %t.fdata
+# RUN: FileCheck --input-file %t.yaml3 %s --check-prefix CHECK-NO-OPT
+# CHECK-NO-OPT-NOT: probes:
+# CHECK-NO-OPT-NOT: inline_tree:
+# CHECK-NO-OPT-NOT: pseudo_probe_desc:
;; Report of decoding input pseudo probe binaries
; CHECK: GUID: 6699318081062747564 Name: foo
diff --git a/clang-tools-extra/docs/clang-tidy/checks/bugprone/pointer-arithmetic-on-polymorphic-object.rst b/clang-tools-extra/docs/clang-tidy/checks/bugprone/pointer-arithmetic-on-polymorphic-object.rst
index 1884acd..95509ef 100644
--- a/clang-tools-extra/docs/clang-tidy/checks/bugprone/pointer-arithmetic-on-polymorphic-object.rst
+++ b/clang-tools-extra/docs/clang-tidy/checks/bugprone/pointer-arithmetic-on-polymorphic-object.rst
@@ -19,20 +19,28 @@ Example:
.. code-block:: c++
struct Base {
- virtual void ~Base();
+ virtual ~Base();
+ int i;
};
struct Derived : public Base {};
- void foo() {
- Base *b = new Derived[10];
-
+ void foo(Base* b) {
b += 1;
// warning: pointer arithmetic on class that declares a virtual function can
// result in undefined behavior if the dynamic type differs from the
// pointer type
+ }
+
+ int bar(const Derived d[]) {
+ return d[1].i; // warning due to pointer arithmetic on polymorphic object
+ }
- delete[] static_cast<Derived*>(b);
+ // Making Derived final suppresses the warning
+ struct FinalDerived final : public Base {};
+
+ int baz(const FinalDerived d[]) {
+ return d[1].i; // no warning as FinalDerived is final
}
Options
@@ -47,17 +55,9 @@ Options
.. code-block:: c++
- void bar() {
- Base *b = new Base[10];
+ void bar(Base b[], Derived d[]) {
b += 1; // warning, as Base declares a virtual destructor
-
- delete[] b;
-
- Derived *d = new Derived[10]; // Derived overrides the destructor, and
- // declares no other virtual functions
d += 1; // warning only if IgnoreVirtualDeclarationsOnly is set to false
-
- delete[] d;
}
References
diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index 3dc04f6..6cf03d2 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -4763,6 +4763,7 @@ def HLSLSaturate : LangBuiltin<"HLSL_LANG"> {
let Prototype = "void(...)";
}
+
def HLSLSelect : LangBuiltin<"HLSL_LANG"> {
let Spellings = ["__builtin_hlsl_select"];
let Attributes = [NoThrow, Const];
@@ -4775,6 +4776,12 @@ def HLSLSign : LangBuiltin<"HLSL_LANG"> {
let Prototype = "void(...)";
}
+def HLSLStep: LangBuiltin<"HLSL_LANG"> {
+ let Spellings = ["__builtin_hlsl_step"];
+ let Attributes = [NoThrow, Const];
+ let Prototype = "void(...)";
+}
+
// Builtins for XRay.
def XRayCustomEvent : Builtin {
let Spellings = ["__xray_customevent"];
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 8c69199..f780322 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -5609,10 +5609,6 @@ def pg : Flag<["-"], "pg">, HelpText<"Enable mcount instrumentation">,
MarshallingInfoFlag<CodeGenOpts<"InstrumentForProfiling">>;
def pipe : Flag<["-", "--"], "pipe">,
HelpText<"Use pipes between commands, when possible">;
-// Facebook T92898286
-def post_link_optimize : Flag<["--"], "post-link-optimize">,
- HelpText<"Apply post-link optimizations using BOLT">;
-// End Facebook T92898286
def prebind__all__twolevel__modules : Flag<["-"], "prebind_all_twolevel_modules">;
def prebind : Flag<["-"], "prebind">;
def preload : Flag<["-"], "preload">;
diff --git a/clang/lib/Basic/Targets/RISCV.cpp b/clang/lib/Basic/Targets/RISCV.cpp
index 6f9d050..223ac66 100644
--- a/clang/lib/Basic/Targets/RISCV.cpp
+++ b/clang/lib/Basic/Targets/RISCV.cpp
@@ -146,6 +146,8 @@ void RISCVTargetInfo::getTargetDefines(const LangOptions &Opts,
Builder.defineMacro("__riscv_cmodel_medlow");
else if (CodeModel == "medium")
Builder.defineMacro("__riscv_cmodel_medany");
+ else if (CodeModel == "large")
+ Builder.defineMacro("__riscv_cmodel_large");
StringRef ABIName = getABI();
if (ABIName == "ilp32f" || ABIName == "lp64f")
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 9950c06..27abeba 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -18861,6 +18861,16 @@ case Builtin::BI__builtin_hlsl_elementwise_isinf: {
return SelectVal;
}
+ case Builtin::BI__builtin_hlsl_step: {
+ Value *Op0 = EmitScalarExpr(E->getArg(0));
+ Value *Op1 = EmitScalarExpr(E->getArg(1));
+ assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
+ E->getArg(1)->getType()->hasFloatingRepresentation() &&
+ "step operands must have a float representation");
+ return Builder.CreateIntrinsic(
+ /*ReturnType=*/Op0->getType(), CGM.getHLSLRuntime().getStepIntrinsic(),
+ ArrayRef<Value *>{Op0, Op1}, nullptr, "hlsl.step");
+ }
case Builtin::BI__builtin_hlsl_wave_get_lane_index: {
return EmitRuntimeCall(CGM.CreateRuntimeFunction(
llvm::FunctionType::get(IntTy, {}, false), "__hlsl_wave_get_lane_index",
diff --git a/clang/lib/CodeGen/CGHLSLRuntime.h b/clang/lib/CodeGen/CGHLSLRuntime.h
index 6e22680..a8aabca 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.h
+++ b/clang/lib/CodeGen/CGHLSLRuntime.h
@@ -81,6 +81,7 @@ public:
GENERATE_HLSL_INTRINSIC_FUNCTION(Rsqrt, rsqrt)
GENERATE_HLSL_INTRINSIC_FUNCTION(Saturate, saturate)
GENERATE_HLSL_INTRINSIC_FUNCTION(Sign, sign)
+ GENERATE_HLSL_INTRINSIC_FUNCTION(Step, step)
GENERATE_HLSL_INTRINSIC_FUNCTION(ThreadId, thread_id)
GENERATE_HLSL_INTRINSIC_FUNCTION(FDot, fdot)
GENERATE_HLSL_INTRINSIC_FUNCTION(SDot, sdot)
diff --git a/clang/lib/Driver/ToolChains/Gnu.cpp b/clang/lib/Driver/ToolChains/Gnu.cpp
index 608fdf2..b7ae0de 100644
--- a/clang/lib/Driver/ToolChains/Gnu.cpp
+++ b/clang/lib/Driver/ToolChains/Gnu.cpp
@@ -672,41 +672,12 @@ void tools::gnutools::Linker::ConstructJob(Compilation &C, const JobAction &JA,
}
}
- // Facebook T92898286
- if (Args.hasArg(options::OPT_post_link_optimize))
- CmdArgs.push_back("-q");
- // End Facebook T92898286
-
Args.AddAllArgs(CmdArgs, options::OPT_T);
const char *Exec = Args.MakeArgString(ToolChain.GetLinkerPath());
C.addCommand(std::make_unique<Command>(JA, *this,
ResponseFileSupport::AtFileCurCP(),
Exec, CmdArgs, Inputs, Output));
- // Facebook T92898286
- if (!Args.hasArg(options::OPT_post_link_optimize) || !Output.isFilename())
- return;
-
- const char *MvExec = Args.MakeArgString(ToolChain.GetProgramPath("mv"));
- ArgStringList MoveCmdArgs;
- MoveCmdArgs.push_back(Output.getFilename());
- const char *PreBoltBin =
- Args.MakeArgString(Twine(Output.getFilename()) + ".pre-bolt");
- MoveCmdArgs.push_back(PreBoltBin);
- C.addCommand(std::make_unique<Command>(JA, *this, ResponseFileSupport::None(),
- MvExec, MoveCmdArgs, std::nullopt));
-
- ArgStringList BoltCmdArgs;
- const char *BoltExec =
- Args.MakeArgString(ToolChain.GetProgramPath("llvm-bolt"));
- BoltCmdArgs.push_back(PreBoltBin);
- BoltCmdArgs.push_back("-reorder-blocks=reverse");
- BoltCmdArgs.push_back("-update-debug-sections");
- BoltCmdArgs.push_back("-o");
- BoltCmdArgs.push_back(Output.getFilename());
- C.addCommand(std::make_unique<Command>(JA, *this, ResponseFileSupport::None(),
- BoltExec, BoltCmdArgs, std::nullopt));
- // End Facebook T92898286
}
void tools::gnutools::Assembler::ConstructJob(Compilation &C,
diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
index 7a1edd9..d08dcd3 100644
--- a/clang/lib/Headers/hlsl/hlsl_intrinsics.h
+++ b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
@@ -1718,6 +1718,39 @@ _HLSL_BUILTIN_ALIAS(__builtin_elementwise_sqrt)
float4 sqrt(float4);
//===----------------------------------------------------------------------===//
+// step builtins
+//===----------------------------------------------------------------------===//
+
+/// \fn T step(T x, T y)
+/// \brief Returns 1 if the x parameter is greater than or equal to the y
+/// parameter; otherwise, 0. vector. \param x [in] The first floating-point
+/// value to compare. \param y [in] The first floating-point value to compare.
+///
+/// Step is based on the following formula: (x >= y) ? 1 : 0
+
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_step)
+half step(half, half);
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_step)
+half2 step(half2, half2);
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_step)
+half3 step(half3, half3);
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_step)
+half4 step(half4, half4);
+
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_step)
+float step(float, float);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_step)
+float2 step(float2, float2);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_step)
+float3 step(float3, float3);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_step)
+float4 step(float4, float4);
+
+//===----------------------------------------------------------------------===//
// tan builtins
//===----------------------------------------------------------------------===//
diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
index 4e44813..527718c 100644
--- a/clang/lib/Sema/SemaHLSL.cpp
+++ b/clang/lib/Sema/SemaHLSL.cpp
@@ -1747,6 +1747,18 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
SetElementTypeAsReturnType(&SemaRef, TheCall, getASTContext().IntTy);
break;
}
+ case Builtin::BI__builtin_hlsl_step: {
+ if (SemaRef.checkArgCount(TheCall, 2))
+ return true;
+ if (CheckFloatOrHalfRepresentations(&SemaRef, TheCall))
+ return true;
+
+ ExprResult A = TheCall->getArg(0);
+ QualType ArgTyA = A.get()->getType();
+ // return type is the same as the input type
+ TheCall->setType(ArgTyA);
+ break;
+ }
// Note these are llvm builtins that we want to catch invalid intrinsic
// generation. Normal handling of these builitns will occur elsewhere.
case Builtin::BI__builtin_elementwise_bitreverse: {
diff --git a/clang/lib/Sema/SemaInit.cpp b/clang/lib/Sema/SemaInit.cpp
index d21b8cb..4d11f2a 100644
--- a/clang/lib/Sema/SemaInit.cpp
+++ b/clang/lib/Sema/SemaInit.cpp
@@ -9548,7 +9548,7 @@ static void DiagnoseNarrowingInInitList(Sema &S,
unsigned ConstRefDiagID, unsigned WarnDiagID) {
unsigned DiagID;
auto &L = S.getLangOpts();
- if (L.CPlusPlus11 &&
+ if (L.CPlusPlus11 && !L.HLSL &&
(!L.MicrosoftExt || L.isCompatibleWithMSVC(LangOptions::MSVC2015)))
DiagID = IsConstRef ? ConstRefDiagID : DefaultDiagID;
else
diff --git a/clang/test/AST/HLSL/vector-constructors.hlsl b/clang/test/AST/HLSL/vector-constructors.hlsl
index 905f11d..9161ad1 100644
--- a/clang/test/AST/HLSL/vector-constructors.hlsl
+++ b/clang/test/AST/HLSL/vector-constructors.hlsl
@@ -1,4 +1,5 @@
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -x hlsl -ast-dump -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -ast-dump -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -std=hlsl202x -ast-dump -o - %s | FileCheck %s
typedef float float2 __attribute__((ext_vector_type(2)));
typedef float float3 __attribute__((ext_vector_type(3)));
diff --git a/clang/test/ClangScanDeps/implicit-target.c b/clang/test/ClangScanDeps/implicit-target.c
new file mode 100644
index 0000000..cf757f9
--- /dev/null
+++ b/clang/test/ClangScanDeps/implicit-target.c
@@ -0,0 +1,31 @@
+// Check that we can detect an implicit target when clang is invoked as
+// <triple->clang. Using an implicit triple requires that the target actually
+// is available, too.
+// REQUIRES: x86-registered-target
+
+// RUN: rm -rf %t
+// RUN: split-file %s %t
+// RUN: sed -e "s|DIR|%/t|g" %t/cdb.json.in > %t/cdb.json
+
+// Check that we can deduce this both when using a compilation database, and when using
+// a literal command line.
+
+// RUN: clang-scan-deps -format experimental-full -compilation-database %t/cdb.json | FileCheck %s
+
+// RUN: clang-scan-deps -format experimental-full -- x86_64-w64-mingw32-clang %t/source.c -o %t/source.o | FileCheck %s
+
+// CHECK: "-triple",
+// CHECK-NEXT: "x86_64-w64-windows-gnu",
+
+
+//--- cdb.json.in
+[
+ {
+ "directory": "DIR"
+ "command": "x86_64-w64-mingw32-clang -c DIR/source.c -o DIR/source.o"
+ "file": "DIR/source.c"
+ },
+]
+
+//--- source.c
+void func(void) {}
diff --git a/clang/test/CodeGenHLSL/builtins/step.hlsl b/clang/test/CodeGenHLSL/builtins/step.hlsl
new file mode 100644
index 0000000..442f493
--- /dev/null
+++ b/clang/test/CodeGenHLSL/builtins/step.hlsl
@@ -0,0 +1,84 @@
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
+// RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \
+// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s \
+// RUN: --check-prefixes=CHECK,NATIVE_HALF \
+// RUN: -DFNATTRS=noundef -DTARGET=dx
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
+// RUN: dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \
+// RUN: -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF \
+// RUN: -DFNATTRS=noundef -DTARGET=dx
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
+// RUN: spirv-unknown-vulkan-compute %s -fnative-half-type \
+// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s \
+// RUN: --check-prefixes=CHECK,NATIVE_HALF \
+// RUN: -DFNATTRS="spir_func noundef" -DTARGET=spv
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
+// RUN: spirv-unknown-vulkan-compute %s -emit-llvm -disable-llvm-passes \
+// RUN: -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF \
+// RUN: -DFNATTRS="spir_func noundef" -DTARGET=spv
+
+// NATIVE_HALF: define [[FNATTRS]] half @
+// NATIVE_HALF: call half @llvm.[[TARGET]].step.f16(half
+// NO_HALF: call float @llvm.[[TARGET]].step.f32(float
+// NATIVE_HALF: ret half
+// NO_HALF: ret float
+half test_step_half(half p0, half p1)
+{
+ return step(p0, p1);
+}
+// NATIVE_HALF: define [[FNATTRS]] <2 x half> @
+// NATIVE_HALF: call <2 x half> @llvm.[[TARGET]].step.v2f16(<2 x half>
+// NO_HALF: call <2 x float> @llvm.[[TARGET]].step.v2f32(<2 x float>
+// NATIVE_HALF: ret <2 x half> %hlsl.step
+// NO_HALF: ret <2 x float> %hlsl.step
+half2 test_step_half2(half2 p0, half2 p1)
+{
+ return step(p0, p1);
+}
+// NATIVE_HALF: define [[FNATTRS]] <3 x half> @
+// NATIVE_HALF: call <3 x half> @llvm.[[TARGET]].step.v3f16(<3 x half>
+// NO_HALF: call <3 x float> @llvm.[[TARGET]].step.v3f32(<3 x float>
+// NATIVE_HALF: ret <3 x half> %hlsl.step
+// NO_HALF: ret <3 x float> %hlsl.step
+half3 test_step_half3(half3 p0, half3 p1)
+{
+ return step(p0, p1);
+}
+// NATIVE_HALF: define [[FNATTRS]] <4 x half> @
+// NATIVE_HALF: call <4 x half> @llvm.[[TARGET]].step.v4f16(<4 x half>
+// NO_HALF: call <4 x float> @llvm.[[TARGET]].step.v4f32(<4 x float>
+// NATIVE_HALF: ret <4 x half> %hlsl.step
+// NO_HALF: ret <4 x float> %hlsl.step
+half4 test_step_half4(half4 p0, half4 p1)
+{
+ return step(p0, p1);
+}
+
+// CHECK: define [[FNATTRS]] float @
+// CHECK: call float @llvm.[[TARGET]].step.f32(float
+// CHECK: ret float
+float test_step_float(float p0, float p1)
+{
+ return step(p0, p1);
+}
+// CHECK: define [[FNATTRS]] <2 x float> @
+// CHECK: %hlsl.step = call <2 x float> @llvm.[[TARGET]].step.v2f32(
+// CHECK: ret <2 x float> %hlsl.step
+float2 test_step_float2(float2 p0, float2 p1)
+{
+ return step(p0, p1);
+}
+// CHECK: define [[FNATTRS]] <3 x float> @
+// CHECK: %hlsl.step = call <3 x float> @llvm.[[TARGET]].step.v3f32(
+// CHECK: ret <3 x float> %hlsl.step
+float3 test_step_float3(float3 p0, float3 p1)
+{
+ return step(p0, p1);
+}
+// CHECK: define [[FNATTRS]] <4 x float> @
+// CHECK: %hlsl.step = call <4 x float> @llvm.[[TARGET]].step.v4f32(
+// CHECK: ret <4 x float> %hlsl.step
+float4 test_step_float4(float4 p0, float4 p1)
+{
+ return step(p0, p1);
+}
diff --git a/clang/test/Driver/cl-link.c b/clang/test/Driver/cl-link.c
index f526044..9bf8a81 100644
--- a/clang/test/Driver/cl-link.c
+++ b/clang/test/Driver/cl-link.c
@@ -13,17 +13,17 @@
// ASAN: link.exe
// ASAN: "-debug"
// ASAN: "-incremental:no"
-// ASAN: "{{[^"]*}}clang_rt.asan_dynamic.lib"
-// ASAN: "-wholearchive:{{.*}}clang_rt.asan_static_runtime_thunk.lib"
+// ASAN: "{{[^"]*}}clang_rt.asan_dynamic{{(-i386)?}}.lib"
+// ASAN: "-wholearchive:{{.*}}clang_rt.asan_static_runtime_thunk{{(-i386)?}}.lib"
// ASAN: "{{.*}}cl-link{{.*}}.obj"
// RUN: %clang_cl -m32 -arch:IA32 --target=i386-pc-win32 /MD /Tc%s -fuse-ld=link -### -fsanitize=address 2>&1 | FileCheck --check-prefix=ASAN-MD %s
// ASAN-MD: link.exe
// ASAN-MD: "-debug"
// ASAN-MD: "-incremental:no"
-// ASAN-MD: "{{.*}}clang_rt.asan_dynamic.lib"
+// ASAN-MD: "{{.*}}clang_rt.asan_dynamic{{(-i386)?}}.lib"
// ASAN-MD: "-include:___asan_seh_interceptor"
-// ASAN-MD: "-wholearchive:{{.*}}clang_rt.asan_dynamic_runtime_thunk.lib"
+// ASAN-MD: "-wholearchive:{{.*}}clang_rt.asan_dynamic_runtime_thunk{{(-i386)?}}.lib"
// ASAN-MD: "{{.*}}cl-link{{.*}}.obj"
// RUN: %clang_cl /LD -fuse-ld=link -### /Tc%s 2>&1 | FileCheck --check-prefix=DLL %s
@@ -37,8 +37,8 @@
// ASAN-DLL: "-dll"
// ASAN-DLL: "-debug"
// ASAN-DLL: "-incremental:no"
-// ASAN-DLL: "{{.*}}clang_rt.asan_dynamic.lib"
-// ASAN-DLL: "-wholearchive:{{.*}}clang_rt.asan_static_runtime_thunk.lib"
+// ASAN-DLL: "{{.*}}clang_rt.asan_dynamic{{(-i386)?}}.lib"
+// ASAN-DLL: "-wholearchive:{{.*}}clang_rt.asan_static_runtime_thunk{{(-i386)?}}.lib"
// ASAN-DLL: "{{.*}}cl-link{{.*}}.obj"
// RUN: %clang_cl /Zi /Tc%s -fuse-ld=link -### 2>&1 | FileCheck --check-prefix=DEBUG %s
diff --git a/clang/test/Driver/windows-cross.c b/clang/test/Driver/windows-cross.c
index f6e831f..096358d 100644
--- a/clang/test/Driver/windows-cross.c
+++ b/clang/test/Driver/windows-cross.c
@@ -64,7 +64,7 @@
// RUN: | FileCheck %s --check-prefix CHECK-SANITIZE-ADDRESS-EXE-X86
// CHECK-SANITIZE-ADDRESS-EXE-X86: "-fsanitize=address"
-// CHECK-SANITIZE-ADDRESS-EXE-X86: "{{.*}}clang_rt.asan_dynamic.lib" "{{.*}}clang_rt.asan_dynamic_runtime_thunk.lib" "--undefined" "___asan_seh_interceptor"
+// CHECK-SANITIZE-ADDRESS-EXE-X86: "{{.*}}clang_rt.asan_dynamic{{(-i386)?}}.lib" "{{.*}}clang_rt.asan_dynamic_runtime_thunk{{(-i386)?}}.lib" "--undefined" "___asan_seh_interceptor"
// RUN: not %clang -### --target=armv7-windows-itanium --sysroot %S/Inputs/Windows/ARM/8.1 -B %S/Inputs/Windows/ARM/8.1/usr/bin -fuse-ld=lld-link2 -shared -o shared.dll -fsanitize=tsan -x c++ %s 2>&1 \
// RUN: | FileCheck %s --check-prefix CHECK-SANITIZE-TSAN
diff --git a/clang/test/Preprocessor/riscv-cmodel.c b/clang/test/Preprocessor/riscv-cmodel.c
index 45b9a93..0a531c7 100644
--- a/clang/test/Preprocessor/riscv-cmodel.c
+++ b/clang/test/Preprocessor/riscv-cmodel.c
@@ -15,6 +15,7 @@
// CHECK-MEDLOW: #define __riscv_cmodel_medlow 1
// CHECK-MEDLOW-NOT: __riscv_cmodel_medany
+// CHECK-MEDLOW-NOT: __riscv_cmodel_large
// RUN: %clang --target=riscv32-unknown-linux-gnu -march=rv32i -x c -E -dM %s \
// RUN: -mcmodel=medium -o - | FileCheck --check-prefix=CHECK-MEDANY %s
@@ -28,3 +29,11 @@
// CHECK-MEDANY: #define __riscv_cmodel_medany 1
// CHECK-MEDANY-NOT: __riscv_cmodel_medlow
+// CHECK-MEDANY-NOT: __riscv_cmodel_large
+
+// RUN: %clang --target=riscv64-unknown-linux-gnu -march=rv64i -fno-pic -x c -E -dM %s \
+// RUN: -mcmodel=large -o - | FileCheck --check-prefix=CHECK-LARGE %s
+
+// CHECK-LARGE: #define __riscv_cmodel_large 1
+// CHECK-LARGE-NOT: __riscv_cmodel_medlow
+// CHECK-LARGE-NOT: __riscv_cmodel_medany
diff --git a/clang/test/SemaHLSL/BuiltIns/step-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/step-errors.hlsl
new file mode 100644
index 0000000..8235852
--- /dev/null
+++ b/clang/test/SemaHLSL/BuiltIns/step-errors.hlsl
@@ -0,0 +1,31 @@
+// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -disable-llvm-passes -verify -verify-ignore-unexpected
+
+void test_too_few_arg()
+{
+ return __builtin_hlsl_step();
+ // expected-error@-1 {{too few arguments to function call, expected 2, have 0}}
+}
+
+void test_too_many_arg(float2 p0)
+{
+ return __builtin_hlsl_step(p0, p0, p0);
+ // expected-error@-1 {{too many arguments to function call, expected 2, have 3}}
+}
+
+bool builtin_bool_to_float_type_promotion(bool p1)
+{
+ return __builtin_hlsl_step(p1, p1);
+ // expected-error@-1 {passing 'bool' to parameter of incompatible type 'float'}}
+}
+
+bool builtin_step_int_to_float_promotion(int p1)
+{
+ return __builtin_hlsl_step(p1, p1);
+ // expected-error@-1 {{passing 'int' to parameter of incompatible type 'float'}}
+}
+
+bool2 builtin_step_int2_to_float2_promotion(int2 p1)
+{
+ return __builtin_hlsl_step(p1, p1);
+ // expected-error@-1 {{passing 'int2' (aka 'vector<int, 2>') to parameter of incompatible type '__attribute__((__vector_size__(2 * sizeof(float)))) float' (vector of 2 'float' values)}}
+}
diff --git a/clang/tools/clang-scan-deps/CMakeLists.txt b/clang/tools/clang-scan-deps/CMakeLists.txt
index f0be6a5..10bc0ff 100644
--- a/clang/tools/clang-scan-deps/CMakeLists.txt
+++ b/clang/tools/clang-scan-deps/CMakeLists.txt
@@ -1,4 +1,5 @@
set(LLVM_LINK_COMPONENTS
+ ${LLVM_TARGETS_TO_BUILD}
Core
Option
Support
diff --git a/clang/tools/clang-scan-deps/ClangScanDeps.cpp b/clang/tools/clang-scan-deps/ClangScanDeps.cpp
index 259058c..1db7245 100644
--- a/clang/tools/clang-scan-deps/ClangScanDeps.cpp
+++ b/clang/tools/clang-scan-deps/ClangScanDeps.cpp
@@ -15,6 +15,7 @@
#include "clang/Tooling/DependencyScanning/DependencyScanningTool.h"
#include "clang/Tooling/DependencyScanning/DependencyScanningWorker.h"
#include "clang/Tooling/JSONCompilationDatabase.h"
+#include "clang/Tooling/Tooling.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Support/CommandLine.h"
@@ -24,6 +25,7 @@
#include "llvm/Support/LLVMDriver.h"
#include "llvm/Support/Program.h"
#include "llvm/Support/Signals.h"
+#include "llvm/Support/TargetSelect.h"
#include "llvm/Support/ThreadPool.h"
#include "llvm/Support/Threading.h"
#include "llvm/Support/Timer.h"
@@ -795,6 +797,7 @@ getCompilationDatabase(int argc, char **argv, std::string &ErrorMessage) {
}
int clang_scan_deps_main(int argc, char **argv, const llvm::ToolContext &) {
+ llvm::InitializeAllTargetInfos();
std::string ErrorMessage;
std::unique_ptr<tooling::CompilationDatabase> Compilations =
getCompilationDatabase(argc, argv, ErrorMessage);
@@ -810,6 +813,8 @@ int clang_scan_deps_main(int argc, char **argv, const llvm::ToolContext &) {
Compilations = expandResponseFiles(std::move(Compilations),
llvm::vfs::getRealFileSystem());
+ Compilations = inferTargetAndDriverMode(std::move(Compilations));
+
// The command options are rewritten to run Clang in preprocessor only mode.
auto AdjustingCompilations =
std::make_unique<tooling::ArgumentsAdjustingCompilations>(
diff --git a/clang/utils/TableGen/ClangASTPropertiesEmitter.cpp b/clang/utils/TableGen/ClangASTPropertiesEmitter.cpp
index 70005da..2d67b6b 100644
--- a/clang/utils/TableGen/ClangASTPropertiesEmitter.cpp
+++ b/clang/utils/TableGen/ClangASTPropertiesEmitter.cpp
@@ -89,13 +89,13 @@ struct CasedTypeInfo {
class ASTPropsEmitter {
raw_ostream &Out;
- RecordKeeper &Records;
+ const RecordKeeper &Records;
std::map<HasProperties, NodeInfo> NodeInfos;
std::vector<PropertyType> AllPropertyTypes;
std::map<PropertyType, CasedTypeInfo> CasedTypeInfos;
public:
- ASTPropsEmitter(RecordKeeper &records, raw_ostream &out)
+ ASTPropsEmitter(const RecordKeeper &records, raw_ostream &out)
: Out(out), Records(records) {
// Find all the properties.
@@ -587,28 +587,28 @@ void ASTPropsEmitter::emitWriteOfProperty(StringRef writerName,
/// Emit an .inc file that defines the AbstractFooReader class
/// for the given AST class hierarchy.
template <class NodeClass>
-static void emitASTReader(RecordKeeper &records, raw_ostream &out,
+static void emitASTReader(const RecordKeeper &records, raw_ostream &out,
StringRef description) {
emitSourceFileHeader(description, out, records);
ASTPropsEmitter(records, out).emitNodeReaderClass<NodeClass>();
}
-void clang::EmitClangTypeReader(RecordKeeper &records, raw_ostream &out) {
+void clang::EmitClangTypeReader(const RecordKeeper &records, raw_ostream &out) {
emitASTReader<TypeNode>(records, out, "A CRTP reader for Clang Type nodes");
}
/// Emit an .inc file that defines the AbstractFooWriter class
/// for the given AST class hierarchy.
template <class NodeClass>
-static void emitASTWriter(RecordKeeper &records, raw_ostream &out,
+static void emitASTWriter(const RecordKeeper &records, raw_ostream &out,
StringRef description) {
emitSourceFileHeader(description, out, records);
ASTPropsEmitter(records, out).emitNodeWriterClass<NodeClass>();
}
-void clang::EmitClangTypeWriter(RecordKeeper &records, raw_ostream &out) {
+void clang::EmitClangTypeWriter(const RecordKeeper &records, raw_ostream &out) {
emitASTWriter<TypeNode>(records, out, "A CRTP writer for Clang Type nodes");
}
@@ -847,7 +847,8 @@ void ASTPropsEmitter::emitBasicReaderWriterFile(const ReaderWriterInfo &info) {
/// Emit an .inc file that defines some helper classes for reading
/// basic values.
-void clang::EmitClangBasicReader(RecordKeeper &records, raw_ostream &out) {
+void clang::EmitClangBasicReader(const RecordKeeper &records,
+ raw_ostream &out) {
emitSourceFileHeader("Helper classes for BasicReaders", out, records);
// Use any property, we won't be using those properties.
@@ -857,7 +858,8 @@ void clang::EmitClangBasicReader(RecordKeeper &records, raw_ostream &out) {
/// Emit an .inc file that defines some helper classes for writing
/// basic values.
-void clang::EmitClangBasicWriter(RecordKeeper &records, raw_ostream &out) {
+void clang::EmitClangBasicWriter(const RecordKeeper &records,
+ raw_ostream &out) {
emitSourceFileHeader("Helper classes for BasicWriters", out, records);
// Use any property, we won't be using those properties.
diff --git a/clang/utils/TableGen/TableGenBackends.h b/clang/utils/TableGen/TableGenBackends.h
index c0582e3..01d16d2 100644
--- a/clang/utils/TableGen/TableGenBackends.h
+++ b/clang/utils/TableGen/TableGenBackends.h
@@ -35,11 +35,15 @@ void EmitClangDeclContext(const llvm::RecordKeeper &RK, llvm::raw_ostream &OS);
void EmitClangASTNodes(const llvm::RecordKeeper &RK, llvm::raw_ostream &OS,
const std::string &N, const std::string &S,
std::string_view PriorizeIfSubclassOf = "");
-void EmitClangBasicReader(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
-void EmitClangBasicWriter(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
+void EmitClangBasicReader(const llvm::RecordKeeper &Records,
+ llvm::raw_ostream &OS);
+void EmitClangBasicWriter(const llvm::RecordKeeper &Records,
+ llvm::raw_ostream &OS);
void EmitClangTypeNodes(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
-void EmitClangTypeReader(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
-void EmitClangTypeWriter(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
+void EmitClangTypeReader(const llvm::RecordKeeper &Records,
+ llvm::raw_ostream &OS);
+void EmitClangTypeWriter(const llvm::RecordKeeper &Records,
+ llvm::raw_ostream &OS);
void EmitClangAttrParserStringSwitches(const llvm::RecordKeeper &Records,
llvm::raw_ostream &OS);
void EmitClangAttrSubjectMatchRulesParserStringSwitches(
diff --git a/compiler-rt/cmake/base-config-ix.cmake b/compiler-rt/cmake/base-config-ix.cmake
index 5a97992..286a622 100644
--- a/compiler-rt/cmake/base-config-ix.cmake
+++ b/compiler-rt/cmake/base-config-ix.cmake
@@ -81,6 +81,8 @@ if("${COMPILER_RT_TEST_COMPILER}" MATCHES "clang[+]*$")
set(COMPILER_RT_TEST_COMPILER_ID Clang)
elseif("${COMPILER_RT_TEST_COMPILER}" MATCHES "clang.*.exe$")
set(COMPILER_RT_TEST_COMPILER_ID Clang)
+elseif("${COMPILER_RT_TEST_COMPILER}" MATCHES "cl.exe$")
+ set(COMPILER_RT_TEST_COMPILER_ID MSVC)
else()
set(COMPILER_RT_TEST_COMPILER_ID GNU)
endif()
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_interface.inc b/compiler-rt/lib/sanitizer_common/sanitizer_common_interface.inc
index 91be9e9..66744aa 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_common_interface.inc
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_interface.inc
@@ -22,6 +22,7 @@ INTERFACE_FUNCTION(__sanitizer_verify_double_ended_contiguous_container)
INTERFACE_WEAK_FUNCTION(__sanitizer_on_print)
INTERFACE_WEAK_FUNCTION(__sanitizer_report_error_summary)
INTERFACE_WEAK_FUNCTION(__sanitizer_sandbox_on_notify)
+INTERFACE_WEAK_FUNCTION(__sanitizer_get_dtls_size)
// Sanitizer weak hooks
INTERFACE_WEAK_FUNCTION(__sanitizer_weak_hook_memcmp)
INTERFACE_WEAK_FUNCTION(__sanitizer_weak_hook_strcmp)
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_interface_internal.h b/compiler-rt/lib/sanitizer_common/sanitizer_interface_internal.h
index cd0d45e..c424ab1 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_interface_internal.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_interface_internal.h
@@ -49,6 +49,11 @@ __sanitizer_sandbox_on_notify(__sanitizer_sandbox_arguments *args);
SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE void
__sanitizer_report_error_summary(const char *error_summary);
+// Returns size of dynamically allocated block. This function can be overridden
+// by the client.
+SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE __sanitizer::uptr
+__sanitizer_get_dtls_size(const void *tls_begin);
+
SANITIZER_INTERFACE_ATTRIBUTE void __sanitizer_cov_dump();
SANITIZER_INTERFACE_ATTRIBUTE void __sanitizer_dump_coverage(
const __sanitizer::uptr *pcs, const __sanitizer::uptr len);
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_tls_get_addr.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_tls_get_addr.cpp
index e5839f6..5e9a787 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_tls_get_addr.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_tls_get_addr.cpp
@@ -110,15 +110,16 @@ SANITIZER_WEAK_ATTRIBUTE
const void *__sanitizer_get_allocated_begin(const void *p);
}
-static bool GetDTLSRange(uptr &tls_beg, uptr &tls_size) {
- const void *start = __sanitizer_get_allocated_begin((void *)tls_beg);
+SANITIZER_INTERFACE_WEAK_DEF(uptr, __sanitizer_get_dtls_size,
+ const void *tls_begin) {
+ const void *start = __sanitizer_get_allocated_begin(tls_begin);
if (!start)
- return false;
- tls_beg = (uptr)start;
- tls_size = __sanitizer_get_allocated_size(start);
+ return 0;
+ CHECK_EQ(start, tls_begin);
+ uptr tls_size = __sanitizer_get_allocated_size(start);
VReport(2, "__tls_get_addr: glibc DTLS suspected; tls={%p,0x%zx}\n",
- (void *)tls_beg, tls_size);
- return true;
+ tls_begin, tls_size);
+ return tls_size;
}
DTLS::DTV *DTLS_on_tls_get_addr(void *arg_void, void *res,
@@ -142,10 +143,12 @@ DTLS::DTV *DTLS_on_tls_get_addr(void *arg_void, void *res,
// creation.
VReport(2, "__tls_get_addr: static tls: %p\n", (void *)tls_beg);
tls_size = 0;
- } else if (!GetDTLSRange(tls_beg, tls_size)) {
- VReport(2, "__tls_get_addr: Can't guess glibc version\n");
- // This may happen inside the DTOR of main thread, so just ignore it.
- tls_size = 0;
+ } else {
+ tls_size = __sanitizer_get_dtls_size(reinterpret_cast<void *>(tls_beg));
+ if (!tls_size) {
+ VReport(2, "__tls_get_addr: Can't guess glibc version\n");
+ // This may happen inside the DTOR of main thread, so just ignore it.
+ }
}
dtv->beg = tls_beg;
dtv->size = tls_size;
@@ -160,6 +163,9 @@ bool DTLSInDestruction(DTLS *dtls) {
}
#else
+SANITIZER_INTERFACE_WEAK_DEF(uptr, __sanitizer_get_dtls_size, const void *) {
+ return 0;
+}
DTLS::DTV *DTLS_on_tls_get_addr(void *arg, void *res,
unsigned long, unsigned long) { return 0; }
DTLS *DTLS_Get() { return 0; }
diff --git a/compiler-rt/lib/sanitizer_common/weak_symbols.txt b/compiler-rt/lib/sanitizer_common/weak_symbols.txt
index 1eb1ce8..77e7b5d 100644
--- a/compiler-rt/lib/sanitizer_common/weak_symbols.txt
+++ b/compiler-rt/lib/sanitizer_common/weak_symbols.txt
@@ -1,4 +1,5 @@
___sanitizer_free_hook
+___sanitizer_get_dtls_size
___sanitizer_malloc_hook
___sanitizer_report_error_summary
___sanitizer_sandbox_on_notify
diff --git a/compiler-rt/test/lit.common.cfg.py b/compiler-rt/test/lit.common.cfg.py
index 1c6fbc8..c533c7e 100644
--- a/compiler-rt/test/lit.common.cfg.py
+++ b/compiler-rt/test/lit.common.cfg.py
@@ -148,6 +148,9 @@ if compiler_id == "Clang":
# requested it because it makes ASan reports more precise.
config.debug_info_flags.append("-gcodeview")
config.debug_info_flags.append("-gcolumn-info")
+elif compiler_id == "MSVC":
+ config.debug_info_flags = ["/Z7"]
+ config.cxx_mode_flags = []
elif compiler_id == "GNU":
config.cxx_mode_flags = ["-x c++"]
config.debug_info_flags = ["-g"]
diff --git a/compiler-rt/test/sanitizer_common/TestCases/Linux/tls_get_addr.c b/compiler-rt/test/sanitizer_common/TestCases/Linux/tls_get_addr.c
new file mode 100644
index 0000000..4ec129f
--- /dev/null
+++ b/compiler-rt/test/sanitizer_common/TestCases/Linux/tls_get_addr.c
@@ -0,0 +1,52 @@
+// RUN: %clang -g %s -o %t
+// RUN: %clang -g %s -DBUILD_SO -fPIC -o %t-so.so -shared
+// RUN: %run %t 2>&1 | FileCheck %s
+
+// REQUIRES: glibc
+
+// `__tls_get_addr` is somehow not invoked.
+// XFAIL: i386-linux
+
+// These don't intercept __tls_get_addr.
+// XFAIL: lsan,hwasan,ubsan
+
+#ifndef BUILD_SO
+# include <assert.h>
+# include <dlfcn.h>
+# include <pthread.h>
+# include <stdio.h>
+# include <stdlib.h>
+
+// CHECK-COUNT-2: __sanitizer_get_dtls_size:
+size_t __sanitizer_get_dtls_size(const void *ptr) {
+ fprintf(stderr, "__sanitizer_get_dtls_size: %p\n", ptr);
+ return 0;
+}
+
+typedef long *(*get_t)();
+get_t GetTls;
+void *Thread(void *unused) { return GetTls(); }
+
+int main(int argc, char *argv[]) {
+ char path[4096];
+ snprintf(path, sizeof(path), "%s-so.so", argv[0]);
+ int i;
+
+ void *handle = dlopen(path, RTLD_LAZY);
+ if (!handle)
+ fprintf(stderr, "%s\n", dlerror());
+ assert(handle != 0);
+ GetTls = (get_t)dlsym(handle, "GetTls");
+ assert(dlerror() == 0);
+
+ pthread_t t;
+ pthread_create(&t, 0, Thread, 0);
+ pthread_join(t, 0);
+ pthread_create(&t, 0, Thread, 0);
+ pthread_join(t, 0);
+ return 0;
+}
+#else // BUILD_SO
+__thread long huge_thread_local_array[1 << 17];
+long *GetTls() { return &huge_thread_local_array[0]; }
+#endif
diff --git a/compiler-rt/test/sanitizer_common/TestCases/dlsym_alloc.c b/compiler-rt/test/sanitizer_common/TestCases/dlsym_alloc.c
index 0228c3b..7b5b9cf 100644
--- a/compiler-rt/test/sanitizer_common/TestCases/dlsym_alloc.c
+++ b/compiler-rt/test/sanitizer_common/TestCases/dlsym_alloc.c
@@ -2,6 +2,8 @@
// FIXME: TSAN does not use DlsymAlloc.
// UNSUPPORTED: tsan
+// FIXME: investigate why this fails on macos
+// UNSUPPORTED: darwin
#include <stdlib.h>
diff --git a/cross-project-tests/lit.cfg.py b/cross-project-tests/lit.cfg.py
index 232d981..9935fe6 100644
--- a/cross-project-tests/lit.cfg.py
+++ b/cross-project-tests/lit.cfg.py
@@ -81,13 +81,7 @@ if is_msvc:
# use_clang() and use_lld() respectively, so set them to "", if needed.
if not hasattr(config, "clang_src_dir"):
config.clang_src_dir = ""
-# Facebook T92898286
-should_test_bolt = get_required_attr(config, "llvm_test_bolt")
-if should_test_bolt:
- llvm_config.use_clang(required=("clang" in config.llvm_enabled_projects), additional_flags=["--post-link-optimize"])
-else:
- llvm_config.use_clang(required=("clang" in config.llvm_enabled_projects))
-# End Facebook T92898286
+llvm_config.use_clang(required=("clang" in config.llvm_enabled_projects))
if not hasattr(config, "lld_src_dir"):
config.lld_src_dir = ""
@@ -300,9 +294,3 @@ llvm_config.feature_config([("--build-mode", {"Debug|RelWithDebInfo": "debug-inf
# Allow 'REQUIRES: XXX-registered-target' in tests.
for arch in config.targets_to_build:
config.available_features.add(arch.lower() + "-registered-target")
-
-# Facebook T92898286
-# Ensure the user's PYTHONPATH is included.
-if "PYTHONPATH" in os.environ:
- config.environment["PYTHONPATH"] = os.environ["PYTHONPATH"]
-# End Facebook T92898286
diff --git a/cross-project-tests/lit.site.cfg.py.in b/cross-project-tests/lit.site.cfg.py.in
index 2d53cd3..39458df 100644
--- a/cross-project-tests/lit.site.cfg.py.in
+++ b/cross-project-tests/lit.site.cfg.py.in
@@ -21,10 +21,6 @@ config.mlir_src_root = "@MLIR_SOURCE_DIR@"
config.llvm_use_sanitizer = "@LLVM_USE_SANITIZER@"
-# Facebook T92898286
-config.llvm_test_bolt = lit.util.pythonize_bool("@LLVM_TEST_BOLT@")
-# End Facebook T92898286
-
import lit.llvm
lit.llvm.initialize(lit_config, config)
diff --git a/flang/include/flang/Semantics/expression.h b/flang/include/flang/Semantics/expression.h
index b1304d7..c90c8c4 100644
--- a/flang/include/flang/Semantics/expression.h
+++ b/flang/include/flang/Semantics/expression.h
@@ -331,7 +331,7 @@ private:
const semantics::Scope &, bool C919bAlreadyEnforced = false);
MaybeExpr CompleteSubscripts(ArrayRef &&);
MaybeExpr ApplySubscripts(DataRef &&, std::vector<Subscript> &&);
- void CheckConstantSubscripts(ArrayRef &);
+ void CheckSubscripts(ArrayRef &);
bool CheckRanks(const DataRef &); // Return false if error exists.
bool CheckPolymorphic(const DataRef &); // ditto
bool CheckDataRef(const DataRef &); // ditto
diff --git a/flang/lib/Evaluate/intrinsics.cpp b/flang/lib/Evaluate/intrinsics.cpp
index 876c2ae..166dae9 100644
--- a/flang/lib/Evaluate/intrinsics.cpp
+++ b/flang/lib/Evaluate/intrinsics.cpp
@@ -2264,7 +2264,7 @@ std::optional<SpecificCall> IntrinsicInterface::Match(
messages.Say("'kind=' argument must be a constant scalar integer "
"whose value is a supported kind for the "
"intrinsic result type"_err_en_US);
- return std::nullopt;
+ // use default kind below for error recovery
} else if (kindDummyArg->flags.test(ArgFlag::defaultsToSameKind)) {
CHECK(sameArg);
resultType = *sameArg->GetType();
@@ -2274,6 +2274,8 @@ std::optional<SpecificCall> IntrinsicInterface::Match(
DynamicType{TypeCategory::Integer, defaults.sizeIntegerKind()};
} else {
CHECK(kindDummyArg->flags.test(ArgFlag::defaultsToDefaultForResult));
+ }
+ if (!resultType) {
int kind{defaults.GetDefaultKind(*category)};
if (*category == TypeCategory::Character) { // ACHAR & CHAR
resultType = DynamicType{kind, 1};
diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
index f336d21..fa8a430 100644
--- a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
@@ -1009,6 +1009,18 @@ bool ClauseProcessor::processMap(
return clauseFound;
}
+bool ClauseProcessor::processNontemporal(
+ mlir::omp::NontemporalClauseOps &result) const {
+ return findRepeatableClause<omp::clause::Nontemporal>(
+ [&](const omp::clause::Nontemporal &clause, const parser::CharBlock &) {
+ for (const Object &object : clause.v) {
+ semantics::Symbol *sym = object.sym();
+ mlir::Value symVal = converter.getSymbolAddress(*sym);
+ result.nontemporalVars.push_back(symVal);
+ }
+ });
+}
+
bool ClauseProcessor::processReduction(
mlir::Location currentLocation, mlir::omp::ReductionClauseOps &result,
llvm::SmallVectorImpl<mlir::Type> *outReductionTypes,
diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.h b/flang/lib/Lower/OpenMP/ClauseProcessor.h
index 8d02d36..be1d8a6 100644
--- a/flang/lib/Lower/OpenMP/ClauseProcessor.h
+++ b/flang/lib/Lower/OpenMP/ClauseProcessor.h
@@ -121,6 +121,7 @@ public:
llvm::SmallVectorImpl<const semantics::Symbol *> *mapSyms = nullptr,
llvm::SmallVectorImpl<mlir::Location> *mapSymLocs = nullptr,
llvm::SmallVectorImpl<mlir::Type> *mapSymTypes = nullptr) const;
+ bool processNontemporal(mlir::omp::NontemporalClauseOps &result) const;
bool processReduction(
mlir::Location currentLocation, mlir::omp::ReductionClauseOps &result,
llvm::SmallVectorImpl<mlir::Type> *reductionTypes = nullptr,
diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index 233aacb..99114dc 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -1123,13 +1123,13 @@ static void genSimdClauses(lower::AbstractConverter &converter,
ClauseProcessor cp(converter, semaCtx, clauses);
cp.processAligned(clauseOps);
cp.processIf(llvm::omp::Directive::OMPD_simd, clauseOps);
+ cp.processNontemporal(clauseOps);
cp.processOrder(clauseOps);
cp.processReduction(loc, clauseOps);
cp.processSafelen(clauseOps);
cp.processSimdlen(clauseOps);
- cp.processTODO<clause::Linear, clause::Nontemporal>(
- loc, llvm::omp::Directive::OMPD_simd);
+ cp.processTODO<clause::Linear>(loc, llvm::omp::Directive::OMPD_simd);
}
static void genSingleClauses(lower::AbstractConverter &converter,
diff --git a/flang/lib/Semantics/check-call.cpp b/flang/lib/Semantics/check-call.cpp
index c7ec873..71d1c08 100644
--- a/flang/lib/Semantics/check-call.cpp
+++ b/flang/lib/Semantics/check-call.cpp
@@ -1363,6 +1363,14 @@ static bool CheckElementalConformance(parser::ContextualMessages &messages,
const auto &dummy{proc.dummyArguments.at(index++)};
if (arg) {
if (const auto *expr{arg->UnwrapExpr()}) {
+ if (const auto *wholeSymbol{evaluate::UnwrapWholeSymbolDataRef(arg)}) {
+ wholeSymbol = &ResolveAssociations(*wholeSymbol);
+ if (IsAssumedSizeArray(*wholeSymbol)) {
+ evaluate::SayWithDeclaration(messages, *wholeSymbol,
+ "Whole assumed-size array '%s' may not be used as an argument to an elemental procedure"_err_en_US,
+ wholeSymbol->name());
+ }
+ }
if (auto argShape{evaluate::GetShape(context, *expr)}) {
if (GetRank(*argShape) > 0) {
std::string argName{"actual argument ("s + expr->AsFortran() +
diff --git a/flang/lib/Semantics/expression.cpp b/flang/lib/Semantics/expression.cpp
index e94a49f..072ebe1 100644
--- a/flang/lib/Semantics/expression.cpp
+++ b/flang/lib/Semantics/expression.cpp
@@ -298,7 +298,7 @@ MaybeExpr ExpressionAnalyzer::CompleteSubscripts(ArrayRef &&ref) {
// Subscripts of named constants are checked in folding.
// Subscripts of DATA statement objects are checked in data statement
// conversion to initializers.
- CheckConstantSubscripts(ref);
+ CheckSubscripts(ref);
}
return Designate(DataRef{std::move(ref)});
}
@@ -326,7 +326,7 @@ MaybeExpr ExpressionAnalyzer::ApplySubscripts(
std::move(dataRef.u));
}
-void ExpressionAnalyzer::CheckConstantSubscripts(ArrayRef &ref) {
+void ExpressionAnalyzer::CheckSubscripts(ArrayRef &ref) {
// Fold subscript expressions and check for an empty triplet.
const Symbol &arraySymbol{ref.base().GetLastSymbol()};
Shape lb{GetLBOUNDs(foldingContext_, NamedEntity{arraySymbol})};
@@ -390,6 +390,13 @@ void ExpressionAnalyzer::CheckConstantSubscripts(ArrayRef &ref) {
for (Subscript &ss : ref.subscript()) {
auto dimLB{ToInt64(lb[dim])};
auto dimUB{ToInt64(ub[dim])};
+ if (dimUB && dimLB && *dimUB < *dimLB) {
+ AttachDeclaration(
+ Say("Empty array dimension %d cannot be subscripted as an element or non-empty array section"_err_en_US,
+ dim + 1),
+ arraySymbol);
+ break;
+ }
std::optional<ConstantSubscript> val[2];
int vals{0};
if (auto *triplet{std::get_if<Triplet>(&ss.u)}) {
diff --git a/flang/runtime/edit-input.cpp b/flang/runtime/edit-input.cpp
index 61b070b..2cee35e 100644
--- a/flang/runtime/edit-input.cpp
+++ b/flang/runtime/edit-input.cpp
@@ -54,6 +54,10 @@ static RT_API_ATTRS bool CheckCompleteListDirectedField(
}
}
+static inline RT_API_ATTRS char32_t GetSeparatorChar(const DataEdit &edit) {
+ return edit.modes.editingFlags & decimalComma ? char32_t{';'} : char32_t{','};
+}
+
template <int LOG2_BASE>
static RT_API_ATTRS bool EditBOZInput(
IoStatementState &io, const DataEdit &edit, void *n, std::size_t bytes) {
@@ -70,6 +74,7 @@ static RT_API_ATTRS bool EditBOZInput(
// Count significant digits after any leading white space & zeroes
int digits{0};
int significantBits{0};
+ const char32_t comma{GetSeparatorChar(edit)};
for (; next; next = io.NextInField(remaining, edit)) {
char32_t ch{*next};
if (ch == ' ' || ch == '\t') {
@@ -84,7 +89,7 @@ static RT_API_ATTRS bool EditBOZInput(
} else if (LOG2_BASE >= 4 && ch >= '8' && ch <= '9') {
} else if (LOG2_BASE >= 4 && ch >= 'A' && ch <= 'F') {
} else if (LOG2_BASE >= 4 && ch >= 'a' && ch <= 'f') {
- } else if (ch == ',') {
+ } else if (ch == comma) {
break; // end non-list-directed field early
} else {
io.GetIoErrorHandler().SignalError(
@@ -209,6 +214,7 @@ RT_API_ATTRS bool EditIntegerInput(
common::UnsignedInt128 value{0};
bool any{!!sign};
bool overflow{false};
+ const char32_t comma{GetSeparatorChar(edit)};
for (; next; next = io.NextInField(remaining, edit)) {
char32_t ch{*next};
if (ch == ' ' || ch == '\t') {
@@ -221,9 +227,23 @@ RT_API_ATTRS bool EditIntegerInput(
int digit{0};
if (ch >= '0' && ch <= '9') {
digit = ch - '0';
- } else if (ch == ',') {
+ } else if (ch == comma) {
break; // end non-list-directed field early
} else {
+ if (edit.modes.inNamelist && ch == GetRadixPointChar(edit)) {
+ // Ignore any fractional part that might appear in NAMELIST integer
+ // input, like a few other Fortran compilers do.
+ // TODO: also process exponents? Some compilers do, but they obviously
+ // can't just be ignored.
+ while ((next = io.NextInField(remaining, edit))) {
+ if (*next < '0' || *next > '9') {
+ break;
+ }
+ }
+ if (!next || *next == comma) {
+ break;
+ }
+ }
io.GetIoErrorHandler().SignalError(
"Bad character '%lc' in INTEGER input field", ch);
return false;
diff --git a/flang/test/Lower/OpenMP/simd.f90 b/flang/test/Lower/OpenMP/simd.f90
index 2127451..bdc6a1e 100644
--- a/flang/test/Lower/OpenMP/simd.f90
+++ b/flang/test/Lower/OpenMP/simd.f90
@@ -223,3 +223,21 @@ subroutine simdloop_aligned_allocatable()
A(i) = i
end do
end subroutine
+
+!CHECK-LABEL: func @_QPsimd_with_nontemporal_clause
+subroutine simd_with_nontemporal_clause(n)
+ !CHECK: %[[A_DECL:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFsimd_with_nontemporal_clauseEa"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+ !CHECK: %[[C_DECL:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFsimd_with_nontemporal_clauseEc"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+ integer :: i, n
+ integer :: A, B, C
+ !CHECK: %[[LB:.*]] = arith.constant 1 : i32
+ !CHECK: %[[UB:.*]] = fir.load %{{.*}}#0 : !fir.ref<i32>
+ !CHECK: %[[STEP:.*]] = arith.constant 1 : i32
+ !CHECK: omp.simd nontemporal(%[[A_DECL]]#1, %[[C_DECL]]#1 : !fir.ref<i32>, !fir.ref<i32>) {
+ !CHECK-NEXT: omp.loop_nest (%[[I:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) {
+ !$OMP SIMD NONTEMPORAL(A, C)
+ do i = 1, n
+ C = A + B
+ end do
+ !$OMP END SIMD
+end subroutine
diff --git a/flang/test/Semantics/elemental02.f90 b/flang/test/Semantics/elemental02.f90
new file mode 100644
index 0000000..7f8fb4a
--- /dev/null
+++ b/flang/test/Semantics/elemental02.f90
@@ -0,0 +1,13 @@
+! RUN: %python %S/test_errors.py %s %flang_fc1
+subroutine s(a)
+ real a(*)
+ interface
+ elemental function ef(efarg)
+ real, intent(in) :: efarg
+ end
+ end interface
+!ERROR: Whole assumed-size array 'a' may not be used as an argument to an elemental procedure
+ print *, sqrt(a)
+!ERROR: Whole assumed-size array 'a' may not be used as an argument to an elemental procedure
+ print *, ef(a)
+end
diff --git a/flang/test/Semantics/expr-errors06.f90 b/flang/test/Semantics/expr-errors06.f90
index 84872c7..bdcb92c 100644
--- a/flang/test/Semantics/expr-errors06.f90
+++ b/flang/test/Semantics/expr-errors06.f90
@@ -1,7 +1,7 @@
! RUN: %python %S/test_errors.py %s %flang_fc1 -Werror
! Check out-of-range subscripts
subroutine subr(da)
- real a(10), da(2,1)
+ real a(10), da(2,1), empty(1:0,1)
integer, parameter :: n(2) = [1, 2]
integer unknown
!ERROR: DATA statement designator 'a(0_8)' is out of range
@@ -39,4 +39,10 @@ subroutine subr(da)
print *, da(1,0)
!WARNING: Subscript 2 is greater than upper bound 1 for dimension 2 of array
print *, da(1,2)
+ print *, empty([(j,j=1,0)],1) ! ok
+ print *, empty(1:0,1) ! ok
+ print *, empty(:,1) ! ok
+ print *, empty(i:j,k) ! ok
+ !ERROR: Empty array dimension 1 cannot be subscripted as an element or non-empty array section
+ print *, empty(i,1)
end
diff --git a/flang/test/Semantics/kinds06.f90 b/flang/test/Semantics/kinds06.f90
new file mode 100644
index 0000000..f5b488e
--- /dev/null
+++ b/flang/test/Semantics/kinds06.f90
@@ -0,0 +1,4 @@
+!RUN: %python %S/test_errors.py %s %flang_fc1
+!ERROR: 'kind=' argument must be a constant scalar integer whose value is a supported kind for the intrinsic result type
+print *, real(1.,666)
+end
diff --git a/flang/unittests/Runtime/Namelist.cpp b/flang/unittests/Runtime/Namelist.cpp
index f95c5d2..9037fa1 100644
--- a/flang/unittests/Runtime/Namelist.cpp
+++ b/flang/unittests/Runtime/Namelist.cpp
@@ -305,4 +305,33 @@ TEST(NamelistTests, Comma) {
EXPECT_EQ(got, expect);
}
+// Tests REAL-looking input to integers
+TEST(NamelistTests, RealValueForInt) {
+ OwningPtr<Descriptor> scDesc{
+ MakeArray<TypeCategory::Integer, static_cast<int>(sizeof(int))>(
+ std::vector<int>{}, std::vector<int>{{}})};
+ const NamelistGroup::Item items[]{{"j", *scDesc}};
+ const NamelistGroup group{"nml", 1, items};
+ static char t1[]{"&nml j=123.456/"};
+ StaticDescriptor<1, true> statDesc;
+ Descriptor &internalDesc{statDesc.descriptor()};
+ internalDesc.Establish(TypeCode{CFI_type_char},
+ /*elementBytes=*/std::strlen(t1), t1, 0, nullptr, CFI_attribute_pointer);
+ auto inCookie{IONAME(BeginInternalArrayListInput)(
+ internalDesc, nullptr, 0, __FILE__, __LINE__)};
+ ASSERT_TRUE(IONAME(InputNamelist)(inCookie, group));
+ ASSERT_EQ(IONAME(EndIoStatement)(inCookie), IostatOk)
+ << "namelist real input for integer";
+ char out[16];
+ internalDesc.Establish(TypeCode{CFI_type_char}, /*elementBytes=*/sizeof out,
+ out, 0, nullptr, CFI_attribute_pointer);
+ auto outCookie{IONAME(BeginInternalArrayListOutput)(
+ internalDesc, nullptr, 0, __FILE__, __LINE__)};
+ ASSERT_TRUE(IONAME(OutputNamelist)(outCookie, group));
+ ASSERT_EQ(IONAME(EndIoStatement)(outCookie), IostatOk) << "namelist output";
+ std::string got{out, sizeof out};
+ static const std::string expect{" &NML J= 123/ "};
+ EXPECT_EQ(got, expect);
+}
+
// TODO: Internal NAMELIST error tests
diff --git a/libcxx/.clang-format b/libcxx/.clang-format
index c37b234..84a2afa 100644
--- a/libcxx/.clang-format
+++ b/libcxx/.clang-format
@@ -43,7 +43,6 @@ AttributeMacros: [
'_LIBCPP_NO_SANITIZE',
'_LIBCPP_NO_UNIQUE_ADDRESS',
'_LIBCPP_NOALIAS',
- '_LIBCPP_NODISCARD',
'_LIBCPP_OVERRIDABLE_FUNC_VIS',
'_LIBCPP_STANDALONE_DEBUG',
'_LIBCPP_TEMPLATE_DATA_VIS',
diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt
index ffff811..23d9aa0 100644
--- a/libcxx/include/CMakeLists.txt
+++ b/libcxx/include/CMakeLists.txt
@@ -424,6 +424,7 @@ set(files
__fwd/format.h
__fwd/fstream.h
__fwd/functional.h
+ __fwd/get.h
__fwd/ios.h
__fwd/istream.h
__fwd/mdspan.h
@@ -440,6 +441,7 @@ set(files
__fwd/string_view.h
__fwd/subrange.h
__fwd/tuple.h
+ __fwd/variant.h
__fwd/vector.h
__hash_table
__ios/fpos.h
diff --git a/libcxx/include/__algorithm/adjacent_find.h b/libcxx/include/__algorithm/adjacent_find.h
index 6f15456..f0708eb 100644
--- a/libcxx/include/__algorithm/adjacent_find.h
+++ b/libcxx/include/__algorithm/adjacent_find.h
@@ -26,7 +26,7 @@ _LIBCPP_PUSH_MACROS
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _Iter, class _Sent, class _BinaryPredicate>
-_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Iter
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Iter
__adjacent_find(_Iter __first, _Sent __last, _BinaryPredicate&& __pred) {
if (__first == __last)
return __first;
@@ -40,13 +40,13 @@ __adjacent_find(_Iter __first, _Sent __last, _BinaryPredicate&& __pred) {
}
template <class _ForwardIterator, class _BinaryPredicate>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator
adjacent_find(_ForwardIterator __first, _ForwardIterator __last, _BinaryPredicate __pred) {
return std::__adjacent_find(std::move(__first), std::move(__last), __pred);
}
template <class _ForwardIterator>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator
adjacent_find(_ForwardIterator __first, _ForwardIterator __last) {
return std::adjacent_find(std::move(__first), std::move(__last), __equal_to());
}
diff --git a/libcxx/include/__algorithm/all_of.h b/libcxx/include/__algorithm/all_of.h
index ec84eea..1fcb74f 100644
--- a/libcxx/include/__algorithm/all_of.h
+++ b/libcxx/include/__algorithm/all_of.h
@@ -19,7 +19,7 @@
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _InputIterator, class _Predicate>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
all_of(_InputIterator __first, _InputIterator __last, _Predicate __pred) {
for (; __first != __last; ++__first)
if (!__pred(*__first))
diff --git a/libcxx/include/__algorithm/any_of.h b/libcxx/include/__algorithm/any_of.h
index b5ff778..acb546b 100644
--- a/libcxx/include/__algorithm/any_of.h
+++ b/libcxx/include/__algorithm/any_of.h
@@ -19,7 +19,7 @@
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _InputIterator, class _Predicate>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
any_of(_InputIterator __first, _InputIterator __last, _Predicate __pred) {
for (; __first != __last; ++__first)
if (__pred(*__first))
diff --git a/libcxx/include/__algorithm/binary_search.h b/libcxx/include/__algorithm/binary_search.h
index 6065fc3..79a5ec0 100644
--- a/libcxx/include/__algorithm/binary_search.h
+++ b/libcxx/include/__algorithm/binary_search.h
@@ -22,14 +22,14 @@
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _ForwardIterator, class _Tp, class _Compare>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
binary_search(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value, _Compare __comp) {
__first = std::lower_bound<_ForwardIterator, _Tp, __comp_ref_type<_Compare> >(__first, __last, __value, __comp);
return __first != __last && !__comp(__value, *__first);
}
template <class _ForwardIterator, class _Tp>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
binary_search(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value) {
return std::binary_search(__first, __last, __value, __less<>());
}
diff --git a/libcxx/include/__algorithm/count.h b/libcxx/include/__algorithm/count.h
index 1cfe7f6..b3489a4 100644
--- a/libcxx/include/__algorithm/count.h
+++ b/libcxx/include/__algorithm/count.h
@@ -79,7 +79,7 @@ __count(__bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __l
}
template <class _InputIterator, class _Tp>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __iter_diff_t<_InputIterator>
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __iter_diff_t<_InputIterator>
count(_InputIterator __first, _InputIterator __last, const _Tp& __value) {
__identity __proj;
return std::__count<_ClassicAlgPolicy>(__first, __last, __value, __proj);
diff --git a/libcxx/include/__algorithm/count_if.h b/libcxx/include/__algorithm/count_if.h
index 2578206..e702388 100644
--- a/libcxx/include/__algorithm/count_if.h
+++ b/libcxx/include/__algorithm/count_if.h
@@ -20,7 +20,7 @@
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _InputIterator, class _Predicate>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20
typename iterator_traits<_InputIterator>::difference_type
count_if(_InputIterator __first, _InputIterator __last, _Predicate __pred) {
typename iterator_traits<_InputIterator>::difference_type __r(0);
diff --git a/libcxx/include/__algorithm/equal.h b/libcxx/include/__algorithm/equal.h
index bfc8f72..23ff064 100644
--- a/libcxx/include/__algorithm/equal.h
+++ b/libcxx/include/__algorithm/equal.h
@@ -35,7 +35,7 @@ _LIBCPP_PUSH_MACROS
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _InputIterator1, class _InputIterator2, class _BinaryPredicate>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool __equal_iter_impl(
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool __equal_iter_impl(
_InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first2, _BinaryPredicate& __pred) {
for (; __first1 != __last1; ++__first1, (void)++__first2)
if (!__pred(*__first1, *__first2))
@@ -49,20 +49,20 @@ template <class _Tp,
__enable_if_t<__desugars_to_v<__equal_tag, _BinaryPredicate, _Tp, _Up> && !is_volatile<_Tp>::value &&
!is_volatile<_Up>::value && __libcpp_is_trivially_equality_comparable<_Tp, _Up>::value,
int> = 0>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
__equal_iter_impl(_Tp* __first1, _Tp* __last1, _Up* __first2, _BinaryPredicate&) {
return std::__constexpr_memcmp_equal(__first1, __first2, __element_count(__last1 - __first1));
}
template <class _InputIterator1, class _InputIterator2, class _BinaryPredicate>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
equal(_InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first2, _BinaryPredicate __pred) {
return std::__equal_iter_impl(
std::__unwrap_iter(__first1), std::__unwrap_iter(__last1), std::__unwrap_iter(__first2), __pred);
}
template <class _InputIterator1, class _InputIterator2>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
equal(_InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first2) {
return std::equal(__first1, __last1, __first2, __equal_to());
}
@@ -70,7 +70,7 @@ equal(_InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first
#if _LIBCPP_STD_VER >= 14
template <class _Iter1, class _Sent1, class _Iter2, class _Sent2, class _Pred, class _Proj1, class _Proj2>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool __equal_impl(
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool __equal_impl(
_Iter1 __first1, _Sent1 __last1, _Iter2 __first2, _Sent2 __last2, _Pred& __comp, _Proj1& __proj1, _Proj2& __proj2) {
while (__first1 != __last1 && __first2 != __last2) {
if (!std::__invoke(__comp, std::__invoke(__proj1, *__first1), std::__invoke(__proj2, *__first2)))
@@ -90,13 +90,13 @@ template <class _Tp,
__is_identity<_Proj2>::value && !is_volatile<_Tp>::value && !is_volatile<_Up>::value &&
__libcpp_is_trivially_equality_comparable<_Tp, _Up>::value,
int> = 0>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
__equal_impl(_Tp* __first1, _Tp* __last1, _Up* __first2, _Up*, _Pred&, _Proj1&, _Proj2&) {
return std::__constexpr_memcmp_equal(__first1, __first2, __element_count(__last1 - __first1));
}
template <class _InputIterator1, class _InputIterator2, class _BinaryPredicate>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
equal(_InputIterator1 __first1,
_InputIterator1 __last1,
_InputIterator2 __first2,
@@ -119,7 +119,7 @@ equal(_InputIterator1 __first1,
}
template <class _InputIterator1, class _InputIterator2>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
equal(_InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first2, _InputIterator2 __last2) {
return std::equal(__first1, __last1, __first2, __last2, __equal_to());
}
diff --git a/libcxx/include/__algorithm/equal_range.h b/libcxx/include/__algorithm/equal_range.h
index 676e436..28c37cd 100644
--- a/libcxx/include/__algorithm/equal_range.h
+++ b/libcxx/include/__algorithm/equal_range.h
@@ -60,7 +60,7 @@ __equal_range(_Iter __first, _Sent __last, const _Tp& __value, _Compare&& __comp
}
template <class _ForwardIterator, class _Tp, class _Compare>
-_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_ForwardIterator, _ForwardIterator>
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_ForwardIterator, _ForwardIterator>
equal_range(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value, _Compare __comp) {
static_assert(__is_callable<_Compare&, decltype(*__first), const _Tp&>::value, "The comparator has to be callable");
static_assert(is_copy_constructible<_ForwardIterator>::value, "Iterator has to be copy constructible");
@@ -73,7 +73,7 @@ equal_range(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __valu
}
template <class _ForwardIterator, class _Tp>
-_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_ForwardIterator, _ForwardIterator>
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_ForwardIterator, _ForwardIterator>
equal_range(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value) {
return std::equal_range(std::move(__first), std::move(__last), __value, __less<>());
}
diff --git a/libcxx/include/__algorithm/find.h b/libcxx/include/__algorithm/find.h
index 7f58dbb..3ab4ab8 100644
--- a/libcxx/include/__algorithm/find.h
+++ b/libcxx/include/__algorithm/find.h
@@ -167,7 +167,7 @@ struct __find_segment {
// public API
template <class _InputIterator, class _Tp>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _InputIterator
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _InputIterator
find(_InputIterator __first, _InputIterator __last, const _Tp& __value) {
__identity __proj;
return std::__rewrap_iter(
diff --git a/libcxx/include/__algorithm/find_end.h b/libcxx/include/__algorithm/find_end.h
index 841e0fd..68a9da7 100644
--- a/libcxx/include/__algorithm/find_end.h
+++ b/libcxx/include/__algorithm/find_end.h
@@ -81,7 +81,7 @@ _LIBCPP_HIDE_FROM_ABI inline _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_Iter1, _Iter1>
}
template <class _ForwardIterator1, class _ForwardIterator2, class _BinaryPredicate>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _ForwardIterator1 __find_end_classic(
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _ForwardIterator1 __find_end_classic(
_ForwardIterator1 __first1,
_ForwardIterator1 __last1,
_ForwardIterator2 __first2,
@@ -102,7 +102,7 @@ _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Fo
}
template <class _ForwardIterator1, class _ForwardIterator2, class _BinaryPredicate>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator1 find_end(
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator1 find_end(
_ForwardIterator1 __first1,
_ForwardIterator1 __last1,
_ForwardIterator2 __first2,
@@ -112,7 +112,7 @@ _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Fo
}
template <class _ForwardIterator1, class _ForwardIterator2>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator1
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator1
find_end(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator2 __last2) {
return std::find_end(__first1, __last1, __first2, __last2, __equal_to());
}
diff --git a/libcxx/include/__algorithm/find_first_of.h b/libcxx/include/__algorithm/find_first_of.h
index 6b99f56..4a240f7 100644
--- a/libcxx/include/__algorithm/find_first_of.h
+++ b/libcxx/include/__algorithm/find_first_of.h
@@ -35,7 +35,7 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _ForwardIterator1 __find_fir
}
template <class _ForwardIterator1, class _ForwardIterator2, class _BinaryPredicate>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator1 find_first_of(
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator1 find_first_of(
_ForwardIterator1 __first1,
_ForwardIterator1 __last1,
_ForwardIterator2 __first2,
@@ -45,7 +45,7 @@ _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Fo
}
template <class _ForwardIterator1, class _ForwardIterator2>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator1 find_first_of(
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator1 find_first_of(
_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator2 __last2) {
return std::__find_first_of_ce(__first1, __last1, __first2, __last2, __equal_to());
}
diff --git a/libcxx/include/__algorithm/find_if.h b/libcxx/include/__algorithm/find_if.h
index 22092d3..fd63bcc 100644
--- a/libcxx/include/__algorithm/find_if.h
+++ b/libcxx/include/__algorithm/find_if.h
@@ -19,7 +19,7 @@
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _InputIterator, class _Predicate>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _InputIterator
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _InputIterator
find_if(_InputIterator __first, _InputIterator __last, _Predicate __pred) {
for (; __first != __last; ++__first)
if (__pred(*__first))
diff --git a/libcxx/include/__algorithm/find_if_not.h b/libcxx/include/__algorithm/find_if_not.h
index cc20019..b4441b2 100644
--- a/libcxx/include/__algorithm/find_if_not.h
+++ b/libcxx/include/__algorithm/find_if_not.h
@@ -19,7 +19,7 @@
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _InputIterator, class _Predicate>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _InputIterator
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _InputIterator
find_if_not(_InputIterator __first, _InputIterator __last, _Predicate __pred) {
for (; __first != __last; ++__first)
if (!__pred(*__first))
diff --git a/libcxx/include/__algorithm/includes.h b/libcxx/include/__algorithm/includes.h
index 0ad09a9..47b19d4 100644
--- a/libcxx/include/__algorithm/includes.h
+++ b/libcxx/include/__algorithm/includes.h
@@ -47,7 +47,7 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool __includes(
}
template <class _InputIterator1, class _InputIterator2, class _Compare>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
includes(_InputIterator1 __first1,
_InputIterator1 __last1,
_InputIterator2 __first2,
@@ -67,7 +67,7 @@ includes(_InputIterator1 __first1,
}
template <class _InputIterator1, class _InputIterator2>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
includes(_InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first2, _InputIterator2 __last2) {
return std::includes(std::move(__first1), std::move(__last1), std::move(__first2), std::move(__last2), __less<>());
}
diff --git a/libcxx/include/__algorithm/is_heap.h b/libcxx/include/__algorithm/is_heap.h
index c589b80..fa668c1 100644
--- a/libcxx/include/__algorithm/is_heap.h
+++ b/libcxx/include/__algorithm/is_heap.h
@@ -22,13 +22,13 @@
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _RandomAccessIterator, class _Compare>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
is_heap(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) {
return std::__is_heap_until(__first, __last, static_cast<__comp_ref_type<_Compare> >(__comp)) == __last;
}
template <class _RandomAccessIterator>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
is_heap(_RandomAccessIterator __first, _RandomAccessIterator __last) {
return std::is_heap(__first, __last, __less<>());
}
diff --git a/libcxx/include/__algorithm/is_heap_until.h b/libcxx/include/__algorithm/is_heap_until.h
index a174f24..7444d97 100644
--- a/libcxx/include/__algorithm/is_heap_until.h
+++ b/libcxx/include/__algorithm/is_heap_until.h
@@ -46,13 +46,13 @@ __is_heap_until(_RandomAccessIterator __first, _RandomAccessIterator __last, _Co
}
template <class _RandomAccessIterator, class _Compare>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _RandomAccessIterator
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _RandomAccessIterator
is_heap_until(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) {
return std::__is_heap_until(__first, __last, static_cast<__comp_ref_type<_Compare> >(__comp));
}
template <class _RandomAccessIterator>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _RandomAccessIterator
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _RandomAccessIterator
is_heap_until(_RandomAccessIterator __first, _RandomAccessIterator __last) {
return std::__is_heap_until(__first, __last, __less<>());
}
diff --git a/libcxx/include/__algorithm/is_partitioned.h b/libcxx/include/__algorithm/is_partitioned.h
index 1f7c8b0..700e452 100644
--- a/libcxx/include/__algorithm/is_partitioned.h
+++ b/libcxx/include/__algorithm/is_partitioned.h
@@ -18,7 +18,7 @@
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _InputIterator, class _Predicate>
-_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
is_partitioned(_InputIterator __first, _InputIterator __last, _Predicate __pred) {
for (; __first != __last; ++__first)
if (!__pred(*__first))
diff --git a/libcxx/include/__algorithm/is_permutation.h b/libcxx/include/__algorithm/is_permutation.h
index 9dcfcf1..b7949a5 100644
--- a/libcxx/include/__algorithm/is_permutation.h
+++ b/libcxx/include/__algorithm/is_permutation.h
@@ -113,7 +113,7 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool __is_permutation_impl(
// 2+1 iterators, predicate. Not used by range algorithms.
template <class _AlgPolicy, class _ForwardIterator1, class _Sentinel1, class _ForwardIterator2, class _BinaryPredicate>
-_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool __is_permutation(
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool __is_permutation(
_ForwardIterator1 __first1, _Sentinel1 __last1, _ForwardIterator2 __first2, _BinaryPredicate&& __pred) {
// Shorten sequences as much as possible by lopping of any equal prefix.
for (; __first1 != __last1; ++__first1, (void)++__first2) {
@@ -247,7 +247,7 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool __is_permutation(
// 2+1 iterators, predicate
template <class _ForwardIterator1, class _ForwardIterator2, class _BinaryPredicate>
-_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool is_permutation(
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool is_permutation(
_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _BinaryPredicate __pred) {
static_assert(__is_callable<_BinaryPredicate&, decltype(*__first1), decltype(*__first2)>::value,
"The comparator has to be callable");
@@ -257,7 +257,7 @@ _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool is_pe
// 2+1 iterators
template <class _ForwardIterator1, class _ForwardIterator2>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
is_permutation(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2) {
return std::is_permutation(__first1, __last1, __first2, __equal_to());
}
@@ -266,7 +266,7 @@ is_permutation(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIt
// 2+2 iterators
template <class _ForwardIterator1, class _ForwardIterator2>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool is_permutation(
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool is_permutation(
_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator2 __last2) {
return std::__is_permutation<_ClassicAlgPolicy>(
std::move(__first1),
@@ -280,7 +280,7 @@ _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 boo
// 2+2 iterators, predicate
template <class _ForwardIterator1, class _ForwardIterator2, class _BinaryPredicate>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool is_permutation(
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool is_permutation(
_ForwardIterator1 __first1,
_ForwardIterator1 __last1,
_ForwardIterator2 __first2,
diff --git a/libcxx/include/__algorithm/is_sorted.h b/libcxx/include/__algorithm/is_sorted.h
index 3befb1a..ff61a73 100644
--- a/libcxx/include/__algorithm/is_sorted.h
+++ b/libcxx/include/__algorithm/is_sorted.h
@@ -22,13 +22,13 @@
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _ForwardIterator, class _Compare>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
is_sorted(_ForwardIterator __first, _ForwardIterator __last, _Compare __comp) {
return std::__is_sorted_until<__comp_ref_type<_Compare> >(__first, __last, __comp) == __last;
}
template <class _ForwardIterator>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
is_sorted(_ForwardIterator __first, _ForwardIterator __last) {
return std::is_sorted(__first, __last, __less<>());
}
diff --git a/libcxx/include/__algorithm/is_sorted_until.h b/libcxx/include/__algorithm/is_sorted_until.h
index 53a49f0..b64fb65 100644
--- a/libcxx/include/__algorithm/is_sorted_until.h
+++ b/libcxx/include/__algorithm/is_sorted_until.h
@@ -35,13 +35,13 @@ __is_sorted_until(_ForwardIterator __first, _ForwardIterator __last, _Compare __
}
template <class _ForwardIterator, class _Compare>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator
is_sorted_until(_ForwardIterator __first, _ForwardIterator __last, _Compare __comp) {
return std::__is_sorted_until<__comp_ref_type<_Compare> >(__first, __last, __comp);
}
template <class _ForwardIterator>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator
is_sorted_until(_ForwardIterator __first, _ForwardIterator __last) {
return std::is_sorted_until(__first, __last, __less<>());
}
diff --git a/libcxx/include/__algorithm/lexicographical_compare.h b/libcxx/include/__algorithm/lexicographical_compare.h
index df23e6a..8ea7c17 100644
--- a/libcxx/include/__algorithm/lexicographical_compare.h
+++ b/libcxx/include/__algorithm/lexicographical_compare.h
@@ -98,7 +98,7 @@ __lexicographical_compare(_Tp* __first1, _Tp* __last1, _Tp* __first2, _Tp* __las
#endif // _LIBCPP_STD_VER >= 14
template <class _InputIterator1, class _InputIterator2, class _Compare>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool lexicographical_compare(
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool lexicographical_compare(
_InputIterator1 __first1,
_InputIterator1 __last1,
_InputIterator2 __first2,
@@ -116,7 +116,7 @@ _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 boo
}
template <class _InputIterator1, class _InputIterator2>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool lexicographical_compare(
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool lexicographical_compare(
_InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first2, _InputIterator2 __last2) {
return std::lexicographical_compare(__first1, __last1, __first2, __last2, __less<>());
}
diff --git a/libcxx/include/__algorithm/lower_bound.h b/libcxx/include/__algorithm/lower_bound.h
index d18ab83..54a64be 100644
--- a/libcxx/include/__algorithm/lower_bound.h
+++ b/libcxx/include/__algorithm/lower_bound.h
@@ -28,7 +28,7 @@
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _AlgPolicy, class _Iter, class _Type, class _Proj, class _Comp>
-_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Iter __lower_bound_bisecting(
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Iter __lower_bound_bisecting(
_Iter __first,
const _Type& __value,
typename iterator_traits<_Iter>::difference_type __len,
@@ -58,7 +58,7 @@ _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Iter __lo
// whereas the one-sided version will yield O(n) operations on both counts, with a \Omega(log(n)) bound on the number of
// comparisons.
template <class _AlgPolicy, class _ForwardIterator, class _Sent, class _Type, class _Proj, class _Comp>
-_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator
__lower_bound_onesided(_ForwardIterator __first, _Sent __last, const _Type& __value, _Comp& __comp, _Proj& __proj) {
// step = 0, ensuring we can always short-circuit when distance is 1 later on
if (__first == __last || !std::__invoke(__comp, std::__invoke(__proj, *__first), __value))
@@ -84,14 +84,14 @@ __lower_bound_onesided(_ForwardIterator __first, _Sent __last, const _Type& __va
}
template <class _AlgPolicy, class _ForwardIterator, class _Sent, class _Type, class _Proj, class _Comp>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator
__lower_bound(_ForwardIterator __first, _Sent __last, const _Type& __value, _Comp& __comp, _Proj& __proj) {
const auto __dist = _IterOps<_AlgPolicy>::distance(__first, __last);
return std::__lower_bound_bisecting<_AlgPolicy>(__first, __value, __dist, __comp, __proj);
}
template <class _ForwardIterator, class _Tp, class _Compare>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator
lower_bound(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value, _Compare __comp) {
static_assert(__is_callable<_Compare&, decltype(*__first), const _Tp&>::value, "The comparator has to be callable");
auto __proj = std::__identity();
@@ -99,7 +99,7 @@ lower_bound(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __valu
}
template <class _ForwardIterator, class _Tp>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator
lower_bound(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value) {
return std::lower_bound(__first, __last, __value, __less<>());
}
diff --git a/libcxx/include/__algorithm/max.h b/libcxx/include/__algorithm/max.h
index d4c99f6..1673e6b 100644
--- a/libcxx/include/__algorithm/max.h
+++ b/libcxx/include/__algorithm/max.h
@@ -25,13 +25,13 @@ _LIBCPP_PUSH_MACROS
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _Tp, class _Compare>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 const _Tp&
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 const _Tp&
max(_LIBCPP_LIFETIMEBOUND const _Tp& __a, _LIBCPP_LIFETIMEBOUND const _Tp& __b, _Compare __comp) {
return __comp(__a, __b) ? __b : __a;
}
template <class _Tp>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 const _Tp&
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 const _Tp&
max(_LIBCPP_LIFETIMEBOUND const _Tp& __a, _LIBCPP_LIFETIMEBOUND const _Tp& __b) {
return std::max(__a, __b, __less<>());
}
@@ -39,13 +39,13 @@ max(_LIBCPP_LIFETIMEBOUND const _Tp& __a, _LIBCPP_LIFETIMEBOUND const _Tp& __b)
#ifndef _LIBCPP_CXX03_LANG
template <class _Tp, class _Compare>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp
max(initializer_list<_Tp> __t, _Compare __comp) {
return *std::__max_element<__comp_ref_type<_Compare> >(__t.begin(), __t.end(), __comp);
}
template <class _Tp>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp max(initializer_list<_Tp> __t) {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp max(initializer_list<_Tp> __t) {
return *std::max_element(__t.begin(), __t.end(), __less<>());
}
diff --git a/libcxx/include/__algorithm/max_element.h b/libcxx/include/__algorithm/max_element.h
index 3e58c40..929f337 100644
--- a/libcxx/include/__algorithm/max_element.h
+++ b/libcxx/include/__algorithm/max_element.h
@@ -36,7 +36,7 @@ __max_element(_ForwardIterator __first, _ForwardIterator __last, _Compare __comp
}
template <class _ForwardIterator, class _Compare>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _ForwardIterator
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _ForwardIterator
max_element(_ForwardIterator __first, _ForwardIterator __last, _Compare __comp) {
static_assert(
__is_callable<_Compare&, decltype(*__first), decltype(*__first)>::value, "The comparator has to be callable");
@@ -44,7 +44,7 @@ max_element(_ForwardIterator __first, _ForwardIterator __last, _Compare __comp)
}
template <class _ForwardIterator>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _ForwardIterator
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _ForwardIterator
max_element(_ForwardIterator __first, _ForwardIterator __last) {
return std::max_element(__first, __last, __less<>());
}
diff --git a/libcxx/include/__algorithm/min.h b/libcxx/include/__algorithm/min.h
index 1bafad8..660e0b2 100644
--- a/libcxx/include/__algorithm/min.h
+++ b/libcxx/include/__algorithm/min.h
@@ -25,13 +25,13 @@ _LIBCPP_PUSH_MACROS
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _Tp, class _Compare>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 const _Tp&
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 const _Tp&
min(_LIBCPP_LIFETIMEBOUND const _Tp& __a, _LIBCPP_LIFETIMEBOUND const _Tp& __b, _Compare __comp) {
return __comp(__b, __a) ? __b : __a;
}
template <class _Tp>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 const _Tp&
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 const _Tp&
min(_LIBCPP_LIFETIMEBOUND const _Tp& __a, _LIBCPP_LIFETIMEBOUND const _Tp& __b) {
return std::min(__a, __b, __less<>());
}
@@ -39,13 +39,13 @@ min(_LIBCPP_LIFETIMEBOUND const _Tp& __a, _LIBCPP_LIFETIMEBOUND const _Tp& __b)
#ifndef _LIBCPP_CXX03_LANG
template <class _Tp, class _Compare>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp
min(initializer_list<_Tp> __t, _Compare __comp) {
return *std::__min_element<__comp_ref_type<_Compare> >(__t.begin(), __t.end(), __comp);
}
template <class _Tp>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp min(initializer_list<_Tp> __t) {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp min(initializer_list<_Tp> __t) {
return *std::min_element(__t.begin(), __t.end(), __less<>());
}
diff --git a/libcxx/include/__algorithm/min_element.h b/libcxx/include/__algorithm/min_element.h
index 9a360f9..f40b24a0 100644
--- a/libcxx/include/__algorithm/min_element.h
+++ b/libcxx/include/__algorithm/min_element.h
@@ -48,7 +48,7 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Iter __min_element(_Iter __
}
template <class _ForwardIterator, class _Compare>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _ForwardIterator
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _ForwardIterator
min_element(_ForwardIterator __first, _ForwardIterator __last, _Compare __comp) {
static_assert(
__has_forward_iterator_category<_ForwardIterator>::value, "std::min_element requires a ForwardIterator");
@@ -59,7 +59,7 @@ min_element(_ForwardIterator __first, _ForwardIterator __last, _Compare __comp)
}
template <class _ForwardIterator>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _ForwardIterator
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _ForwardIterator
min_element(_ForwardIterator __first, _ForwardIterator __last) {
return std::min_element(__first, __last, __less<>());
}
diff --git a/libcxx/include/__algorithm/minmax.h b/libcxx/include/__algorithm/minmax.h
index bb7a379..de0bec0 100644
--- a/libcxx/include/__algorithm/minmax.h
+++ b/libcxx/include/__algorithm/minmax.h
@@ -24,13 +24,13 @@
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _Tp, class _Compare>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<const _Tp&, const _Tp&>
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<const _Tp&, const _Tp&>
minmax(_LIBCPP_LIFETIMEBOUND const _Tp& __a, _LIBCPP_LIFETIMEBOUND const _Tp& __b, _Compare __comp) {
return __comp(__b, __a) ? pair<const _Tp&, const _Tp&>(__b, __a) : pair<const _Tp&, const _Tp&>(__a, __b);
}
template <class _Tp>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<const _Tp&, const _Tp&>
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<const _Tp&, const _Tp&>
minmax(_LIBCPP_LIFETIMEBOUND const _Tp& __a, _LIBCPP_LIFETIMEBOUND const _Tp& __b) {
return std::minmax(__a, __b, __less<>());
}
@@ -38,7 +38,7 @@ minmax(_LIBCPP_LIFETIMEBOUND const _Tp& __a, _LIBCPP_LIFETIMEBOUND const _Tp& __
#ifndef _LIBCPP_CXX03_LANG
template <class _Tp, class _Compare>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_Tp, _Tp>
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_Tp, _Tp>
minmax(initializer_list<_Tp> __t, _Compare __comp) {
static_assert(__is_callable<_Compare&, _Tp, _Tp>::value, "The comparator has to be callable");
__identity __proj;
@@ -47,7 +47,7 @@ minmax(initializer_list<_Tp> __t, _Compare __comp) {
}
template <class _Tp>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_Tp, _Tp>
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_Tp, _Tp>
minmax(initializer_list<_Tp> __t) {
return std::minmax(__t, __less<>());
}
diff --git a/libcxx/include/__algorithm/minmax_element.h b/libcxx/include/__algorithm/minmax_element.h
index 23929c9..47e3a68 100644
--- a/libcxx/include/__algorithm/minmax_element.h
+++ b/libcxx/include/__algorithm/minmax_element.h
@@ -79,7 +79,7 @@ __minmax_element_impl(_Iter __first, _Sent __last, _Comp& __comp, _Proj& __proj)
}
template <class _ForwardIterator, class _Compare>
-_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_ForwardIterator, _ForwardIterator>
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_ForwardIterator, _ForwardIterator>
minmax_element(_ForwardIterator __first, _ForwardIterator __last, _Compare __comp) {
static_assert(
__has_forward_iterator_category<_ForwardIterator>::value, "std::minmax_element requires a ForwardIterator");
@@ -90,7 +90,7 @@ minmax_element(_ForwardIterator __first, _ForwardIterator __last, _Compare __com
}
template <class _ForwardIterator>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_ForwardIterator, _ForwardIterator>
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_ForwardIterator, _ForwardIterator>
minmax_element(_ForwardIterator __first, _ForwardIterator __last) {
return std::minmax_element(__first, __last, __less<>());
}
diff --git a/libcxx/include/__algorithm/mismatch.h b/libcxx/include/__algorithm/mismatch.h
index 632bec0..0fae7f6 100644
--- a/libcxx/include/__algorithm/mismatch.h
+++ b/libcxx/include/__algorithm/mismatch.h
@@ -37,7 +37,7 @@ _LIBCPP_PUSH_MACROS
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _Iter1, class _Sent1, class _Iter2, class _Pred, class _Proj1, class _Proj2>
-_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_Iter1, _Iter2>
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_Iter1, _Iter2>
__mismatch_loop(_Iter1 __first1, _Sent1 __last1, _Iter2 __first2, _Pred& __pred, _Proj1& __proj1, _Proj2& __proj2) {
while (__first1 != __last1) {
if (!std::__invoke(__pred, std::__invoke(__proj1, *__first1), std::__invoke(__proj2, *__first2)))
@@ -49,7 +49,7 @@ __mismatch_loop(_Iter1 __first1, _Sent1 __last1, _Iter2 __first2, _Pred& __pred,
}
template <class _Iter1, class _Sent1, class _Iter2, class _Pred, class _Proj1, class _Proj2>
-_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_Iter1, _Iter2>
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_Iter1, _Iter2>
__mismatch(_Iter1 __first1, _Sent1 __last1, _Iter2 __first2, _Pred& __pred, _Proj1& __proj1, _Proj2& __proj2) {
return std::__mismatch_loop(__first1, __last1, __first2, __pred, __proj1, __proj2);
}
@@ -57,7 +57,7 @@ __mismatch(_Iter1 __first1, _Sent1 __last1, _Iter2 __first2, _Pred& __pred, _Pro
#if _LIBCPP_VECTORIZE_ALGORITHMS
template <class _Iter>
-_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_Iter, _Iter>
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_Iter, _Iter>
__mismatch_vectorized(_Iter __first1, _Iter __last1, _Iter __first2) {
using __value_type = __iter_value_type<_Iter>;
constexpr size_t __unroll_count = 4;
@@ -124,7 +124,7 @@ template <class _Tp,
__enable_if_t<is_integral<_Tp>::value && __desugars_to_v<__equal_tag, _Pred, _Tp, _Tp> &&
__is_identity<_Proj1>::value && __is_identity<_Proj2>::value,
int> = 0>
-_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_Tp*, _Tp*>
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_Tp*, _Tp*>
__mismatch(_Tp* __first1, _Tp* __last1, _Tp* __first2, _Pred&, _Proj1&, _Proj2&) {
return std::__mismatch_vectorized(__first1, __last1, __first2);
}
@@ -137,7 +137,7 @@ template <class _Tp,
__is_identity<_Proj1>::value && __is_identity<_Proj2>::value &&
__can_map_to_integer_v<_Tp> && __libcpp_is_trivially_equality_comparable<_Tp, _Tp>::value,
int> = 0>
-_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_Tp*, _Tp*>
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_Tp*, _Tp*>
__mismatch(_Tp* __first1, _Tp* __last1, _Tp* __first2, _Pred& __pred, _Proj1& __proj1, _Proj2& __proj2) {
if (__libcpp_is_constant_evaluated()) {
return std::__mismatch_loop(__first1, __last1, __first2, __pred, __proj1, __proj2);
@@ -150,7 +150,7 @@ __mismatch(_Tp* __first1, _Tp* __last1, _Tp* __first2, _Pred& __pred, _Proj1& __
#endif // _LIBCPP_VECTORIZE_ALGORITHMS
template <class _InputIterator1, class _InputIterator2, class _BinaryPredicate>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_InputIterator1, _InputIterator2>
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_InputIterator1, _InputIterator2>
mismatch(_InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first2, _BinaryPredicate __pred) {
__identity __proj;
auto __res = std::__mismatch(
@@ -159,14 +159,14 @@ mismatch(_InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __fi
}
template <class _InputIterator1, class _InputIterator2>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_InputIterator1, _InputIterator2>
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_InputIterator1, _InputIterator2>
mismatch(_InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first2) {
return std::mismatch(__first1, __last1, __first2, __equal_to());
}
#if _LIBCPP_STD_VER >= 14
template <class _Iter1, class _Sent1, class _Iter2, class _Sent2, class _Pred, class _Proj1, class _Proj2>
-_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_Iter1, _Iter2> __mismatch(
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_Iter1, _Iter2> __mismatch(
_Iter1 __first1, _Sent1 __last1, _Iter2 __first2, _Sent2 __last2, _Pred& __pred, _Proj1& __proj1, _Proj2& __proj2) {
while (__first1 != __last1 && __first2 != __last2) {
if (!std::__invoke(__pred, std::__invoke(__proj1, *__first1), std::__invoke(__proj2, *__first2)))
@@ -178,14 +178,14 @@ _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_Iter
}
template <class _Tp, class _Pred, class _Proj1, class _Proj2>
-_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_Tp*, _Tp*>
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_Tp*, _Tp*>
__mismatch(_Tp* __first1, _Tp* __last1, _Tp* __first2, _Tp* __last2, _Pred& __pred, _Proj1& __proj1, _Proj2& __proj2) {
auto __len = std::min(__last1 - __first1, __last2 - __first2);
return std::__mismatch(__first1, __first1 + __len, __first2, __pred, __proj1, __proj2);
}
template <class _InputIterator1, class _InputIterator2, class _BinaryPredicate>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_InputIterator1, _InputIterator2>
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_InputIterator1, _InputIterator2>
mismatch(_InputIterator1 __first1,
_InputIterator1 __last1,
_InputIterator2 __first2,
@@ -204,7 +204,7 @@ mismatch(_InputIterator1 __first1,
}
template <class _InputIterator1, class _InputIterator2>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_InputIterator1, _InputIterator2>
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_InputIterator1, _InputIterator2>
mismatch(_InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first2, _InputIterator2 __last2) {
return std::mismatch(__first1, __last1, __first2, __last2, __equal_to());
}
diff --git a/libcxx/include/__algorithm/none_of.h b/libcxx/include/__algorithm/none_of.h
index 50841ba..e6bd197 100644
--- a/libcxx/include/__algorithm/none_of.h
+++ b/libcxx/include/__algorithm/none_of.h
@@ -19,7 +19,7 @@
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _InputIterator, class _Predicate>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
none_of(_InputIterator __first, _InputIterator __last, _Predicate __pred) {
for (; __first != __last; ++__first)
if (__pred(*__first))
diff --git a/libcxx/include/__algorithm/pstl.h b/libcxx/include/__algorithm/pstl.h
index 0bb052b..71e7f28 100644
--- a/libcxx/include/__algorithm/pstl.h
+++ b/libcxx/include/__algorithm/pstl.h
@@ -352,7 +352,7 @@ template <class _ExecutionPolicy,
class _Predicate,
class _RawPolicy = __remove_cvref_t<_ExecutionPolicy>,
enable_if_t<is_execution_policy_v<_RawPolicy>, int> = 0>
-_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI bool
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool
is_partitioned(_ExecutionPolicy&& __policy, _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred) {
_LIBCPP_REQUIRE_CPP17_FORWARD_ITERATOR(_ForwardIterator, "is_partitioned requires ForwardIterators");
using _Implementation = __pstl::__dispatch<__pstl::__is_partitioned, __pstl::__current_configuration, _RawPolicy>;
diff --git a/libcxx/include/__algorithm/remove.h b/libcxx/include/__algorithm/remove.h
index fd01c23..b2d7023 100644
--- a/libcxx/include/__algorithm/remove.h
+++ b/libcxx/include/__algorithm/remove.h
@@ -24,7 +24,7 @@ _LIBCPP_PUSH_MACROS
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _ForwardIterator, class _Tp>
-_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator
remove(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value) {
__first = std::find(__first, __last, __value);
if (__first != __last) {
diff --git a/libcxx/include/__algorithm/remove_if.h b/libcxx/include/__algorithm/remove_if.h
index b14f3c0e..56fd745 100644
--- a/libcxx/include/__algorithm/remove_if.h
+++ b/libcxx/include/__algorithm/remove_if.h
@@ -23,7 +23,7 @@ _LIBCPP_PUSH_MACROS
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _ForwardIterator, class _Predicate>
-_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator
remove_if(_ForwardIterator __first, _ForwardIterator __last, _Predicate __pred) {
__first = std::find_if<_ForwardIterator, _Predicate&>(__first, __last, __pred);
if (__first != __last) {
diff --git a/libcxx/include/__algorithm/search.h b/libcxx/include/__algorithm/search.h
index 7316e5e..24dec22 100644
--- a/libcxx/include/__algorithm/search.h
+++ b/libcxx/include/__algorithm/search.h
@@ -160,7 +160,7 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_Iter1, _Iter1> __searc
}
template <class _ForwardIterator1, class _ForwardIterator2, class _BinaryPredicate>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator1
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator1
search(_ForwardIterator1 __first1,
_ForwardIterator1 __last1,
_ForwardIterator2 __first2,
@@ -173,7 +173,7 @@ search(_ForwardIterator1 __first1,
}
template <class _ForwardIterator1, class _ForwardIterator2>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator1
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator1
search(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator2 __last2) {
return std::search(__first1, __last1, __first2, __last2, __equal_to());
}
diff --git a/libcxx/include/__algorithm/search_n.h b/libcxx/include/__algorithm/search_n.h
index f980638..4019dfb 100644
--- a/libcxx/include/__algorithm/search_n.h
+++ b/libcxx/include/__algorithm/search_n.h
@@ -136,7 +136,7 @@ __search_n_impl(_Iter1 __first, _Sent1 __last, _DiffT __count, const _Type& __va
}
template <class _ForwardIterator, class _Size, class _Tp, class _BinaryPredicate>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator search_n(
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator search_n(
_ForwardIterator __first, _ForwardIterator __last, _Size __count, const _Tp& __value, _BinaryPredicate __pred) {
static_assert(
__is_callable<_BinaryPredicate&, decltype(*__first), const _Tp&>::value, "The comparator has to be callable");
@@ -145,7 +145,7 @@ _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Fo
}
template <class _ForwardIterator, class _Size, class _Tp>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator
search_n(_ForwardIterator __first, _ForwardIterator __last, _Size __count, const _Tp& __value) {
return std::search_n(__first, __last, std::__convert_to_integral(__count), __value, __equal_to());
}
diff --git a/libcxx/include/__algorithm/set_intersection.h b/libcxx/include/__algorithm/set_intersection.h
index bb0d86c..2335e50 100644
--- a/libcxx/include/__algorithm/set_intersection.h
+++ b/libcxx/include/__algorithm/set_intersection.h
@@ -84,7 +84,7 @@ template <class _AlgPolicy,
class _InForwardIter2,
class _Sent2,
class _OutIter>
-_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI
_LIBCPP_CONSTEXPR_SINCE_CXX20 __set_intersection_result<_InForwardIter1, _InForwardIter2, _OutIter>
__set_intersection(
_InForwardIter1 __first1,
@@ -129,7 +129,7 @@ template <class _AlgPolicy,
class _InInputIter2,
class _Sent2,
class _OutIter>
-_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI
_LIBCPP_CONSTEXPR_SINCE_CXX20 __set_intersection_result<_InInputIter1, _InInputIter2, _OutIter>
__set_intersection(
_InInputIter1 __first1,
@@ -160,7 +160,7 @@ __set_intersection(
}
template <class _AlgPolicy, class _Compare, class _InIter1, class _Sent1, class _InIter2, class _Sent2, class _OutIter>
-_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI
_LIBCPP_CONSTEXPR_SINCE_CXX20 __set_intersection_result<_InIter1, _InIter2, _OutIter>
__set_intersection(
_InIter1 __first1, _Sent1 __last1, _InIter2 __first2, _Sent2 __last2, _OutIter __result, _Compare&& __comp) {
diff --git a/libcxx/include/__algorithm/simd_utils.h b/libcxx/include/__algorithm/simd_utils.h
index 549197b..56518da 100644
--- a/libcxx/include/__algorithm/simd_utils.h
+++ b/libcxx/include/__algorithm/simd_utils.h
@@ -110,19 +110,19 @@ using __simd_vector_underlying_type_t = decltype(std::__simd_vector_underlying_t
// This isn't inlined without always_inline when loading chars.
template <class _VecT, class _Iter>
-_LIBCPP_NODISCARD _LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _VecT __load_vector(_Iter __iter) noexcept {
+[[__nodiscard__]] _LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _VecT __load_vector(_Iter __iter) noexcept {
return [=]<size_t... _Indices>(index_sequence<_Indices...>) _LIBCPP_ALWAYS_INLINE noexcept {
return _VecT{__iter[_Indices]...};
}(make_index_sequence<__simd_vector_size_v<_VecT>>{});
}
template <class _Tp, size_t _Np>
-_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI bool __all_of(__simd_vector<_Tp, _Np> __vec) noexcept {
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool __all_of(__simd_vector<_Tp, _Np> __vec) noexcept {
return __builtin_reduce_and(__builtin_convertvector(__vec, __simd_vector<bool, _Np>));
}
template <class _Tp, size_t _Np>
-_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI size_t __find_first_set(__simd_vector<_Tp, _Np> __vec) noexcept {
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_t __find_first_set(__simd_vector<_Tp, _Np> __vec) noexcept {
using __mask_vec = __simd_vector<bool, _Np>;
// This has MSan disabled du to https://github.com/llvm/llvm-project/issues/85876
@@ -151,7 +151,7 @@ _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI size_t __find_first_set(__simd_vector<_T
}
template <class _Tp, size_t _Np>
-_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI size_t __find_first_not_set(__simd_vector<_Tp, _Np> __vec) noexcept {
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_t __find_first_not_set(__simd_vector<_Tp, _Np> __vec) noexcept {
return std::__find_first_set(~__vec);
}
diff --git a/libcxx/include/__algorithm/unique.h b/libcxx/include/__algorithm/unique.h
index d597014..1f0c4ff 100644
--- a/libcxx/include/__algorithm/unique.h
+++ b/libcxx/include/__algorithm/unique.h
@@ -29,7 +29,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD
// unique
template <class _AlgPolicy, class _Iter, class _Sent, class _BinaryPredicate>
-_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 std::pair<_Iter, _Iter>
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 std::pair<_Iter, _Iter>
__unique(_Iter __first, _Sent __last, _BinaryPredicate&& __pred) {
__first = std::__adjacent_find(__first, __last, __pred);
if (__first != __last) {
@@ -46,13 +46,13 @@ __unique(_Iter __first, _Sent __last, _BinaryPredicate&& __pred) {
}
template <class _ForwardIterator, class _BinaryPredicate>
-_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator
unique(_ForwardIterator __first, _ForwardIterator __last, _BinaryPredicate __pred) {
return std::__unique<_ClassicAlgPolicy>(std::move(__first), std::move(__last), __pred).first;
}
template <class _ForwardIterator>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator
unique(_ForwardIterator __first, _ForwardIterator __last) {
return std::unique(__first, __last, __equal_to());
}
diff --git a/libcxx/include/__algorithm/upper_bound.h b/libcxx/include/__algorithm/upper_bound.h
index 102447e..e8be0ef 100644
--- a/libcxx/include/__algorithm/upper_bound.h
+++ b/libcxx/include/__algorithm/upper_bound.h
@@ -49,7 +49,7 @@ __upper_bound(_Iter __first, _Sent __last, const _Tp& __value, _Compare&& __comp
}
template <class _ForwardIterator, class _Tp, class _Compare>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator
upper_bound(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value, _Compare __comp) {
static_assert(__is_callable<_Compare&, const _Tp&, decltype(*__first)>::value, "The comparator has to be callable");
static_assert(is_copy_constructible<_ForwardIterator>::value, "Iterator has to be copy constructible");
@@ -58,7 +58,7 @@ upper_bound(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __valu
}
template <class _ForwardIterator, class _Tp>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator
upper_bound(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value) {
return std::upper_bound(std::move(__first), std::move(__last), __value, __less<>());
}
diff --git a/libcxx/include/__bit/bit_cast.h b/libcxx/include/__bit/bit_cast.h
index cd04567..7350250 100644
--- a/libcxx/include/__bit/bit_cast.h
+++ b/libcxx/include/__bit/bit_cast.h
@@ -22,7 +22,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD
#ifndef _LIBCPP_CXX03_LANG
template <class _ToType, class _FromType>
-_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI constexpr _ToType __bit_cast(const _FromType& __from) noexcept {
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI constexpr _ToType __bit_cast(const _FromType& __from) noexcept {
return __builtin_bit_cast(_ToType, __from);
}
diff --git a/libcxx/include/__bit/countl.h b/libcxx/include/__bit/countl.h
index 998a0b4..bb09e8e 100644
--- a/libcxx/include/__bit/countl.h
+++ b/libcxx/include/__bit/countl.h
@@ -27,15 +27,15 @@ _LIBCPP_PUSH_MACROS
_LIBCPP_BEGIN_NAMESPACE_STD
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(unsigned __x) _NOEXCEPT {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(unsigned __x) _NOEXCEPT {
return __builtin_clz(__x);
}
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(unsigned long __x) _NOEXCEPT {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(unsigned long __x) _NOEXCEPT {
return __builtin_clzl(__x);
}
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(unsigned long long __x) _NOEXCEPT {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(unsigned long long __x) _NOEXCEPT {
return __builtin_clzll(__x);
}
diff --git a/libcxx/include/__bit/countr.h b/libcxx/include/__bit/countr.h
index 9e92021..2f75711 100644
--- a/libcxx/include/__bit/countr.h
+++ b/libcxx/include/__bit/countr.h
@@ -26,20 +26,20 @@ _LIBCPP_PUSH_MACROS
_LIBCPP_BEGIN_NAMESPACE_STD
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_ctz(unsigned __x) _NOEXCEPT {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_ctz(unsigned __x) _NOEXCEPT {
return __builtin_ctz(__x);
}
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_ctz(unsigned long __x) _NOEXCEPT {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_ctz(unsigned long __x) _NOEXCEPT {
return __builtin_ctzl(__x);
}
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_ctz(unsigned long long __x) _NOEXCEPT {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_ctz(unsigned long long __x) _NOEXCEPT {
return __builtin_ctzll(__x);
}
template <class _Tp>
-_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 int __countr_zero(_Tp __t) _NOEXCEPT {
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 int __countr_zero(_Tp __t) _NOEXCEPT {
#if __has_builtin(__builtin_ctzg)
return __builtin_ctzg(__t, numeric_limits<_Tp>::digits);
#else // __has_builtin(__builtin_ctzg)
diff --git a/libcxx/include/__chrono/leap_second.h b/libcxx/include/__chrono/leap_second.h
index be3ab423..b01d9fb 100644
--- a/libcxx/include/__chrono/leap_second.h
+++ b/libcxx/include/__chrono/leap_second.h
@@ -43,9 +43,9 @@ public:
_LIBCPP_HIDE_FROM_ABI leap_second(const leap_second&) = default;
_LIBCPP_HIDE_FROM_ABI leap_second& operator=(const leap_second&) = default;
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI constexpr sys_seconds date() const noexcept { return __date_; }
+ [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr sys_seconds date() const noexcept { return __date_; }
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI constexpr seconds value() const noexcept { return __value_; }
+ [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr seconds value() const noexcept { return __value_; }
private:
sys_seconds __date_;
diff --git a/libcxx/include/__config b/libcxx/include/__config
index 1c0b7c0..9f3bab3 100644
--- a/libcxx/include/__config
+++ b/libcxx/include/__config
@@ -1116,15 +1116,6 @@ typedef __char32_t char32_t;
# define _LIBCPP_USING_IF_EXISTS
# endif
-# if __has_cpp_attribute(__nodiscard__)
-# define _LIBCPP_NODISCARD [[__nodiscard__]]
-# else
-// We can't use GCC's [[gnu::warn_unused_result]] and
-// __attribute__((warn_unused_result)), because GCC does not silence them via
-// (void) cast.
-# define _LIBCPP_NODISCARD
-# endif
-
# if __has_attribute(__no_destroy__)
# define _LIBCPP_NO_DESTROY __attribute__((__no_destroy__))
# else
diff --git a/libcxx/include/__filesystem/path.h b/libcxx/include/__filesystem/path.h
index eef1fc0..f0d3968 100644
--- a/libcxx/include/__filesystem/path.h
+++ b/libcxx/include/__filesystem/path.h
@@ -812,7 +812,7 @@ public:
_LIBCPP_HIDE_FROM_ABI path extension() const { return string_type(__extension()); }
// query
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI bool empty() const noexcept { return __pn_.empty(); }
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool empty() const noexcept { return __pn_.empty(); }
_LIBCPP_HIDE_FROM_ABI bool has_root_name() const { return !__root_name().empty(); }
_LIBCPP_HIDE_FROM_ABI bool has_root_directory() const { return !__root_directory().empty(); }
diff --git a/libcxx/include/__functional/identity.h b/libcxx/include/__functional/identity.h
index 8468de3..1b1c6cf 100644
--- a/libcxx/include/__functional/identity.h
+++ b/libcxx/include/__functional/identity.h
@@ -26,7 +26,7 @@ struct __is_identity : false_type {};
struct __identity {
template <class _Tp>
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR _Tp&& operator()(_Tp&& __t) const _NOEXCEPT {
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR _Tp&& operator()(_Tp&& __t) const _NOEXCEPT {
return std::forward<_Tp>(__t);
}
diff --git a/libcxx/include/__fwd/get.h b/libcxx/include/__fwd/get.h
new file mode 100644
index 0000000..6121ed0
--- /dev/null
+++ b/libcxx/include/__fwd/get.h
@@ -0,0 +1,24 @@
+//===---------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===---------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___FWD_GET_H
+#define _LIBCPP___FWD_GET_H
+
+#include <__config>
+#include <__fwd/array.h>
+#include <__fwd/complex.h>
+#include <__fwd/pair.h>
+#include <__fwd/subrange.h>
+#include <__fwd/tuple.h>
+#include <__fwd/variant.h>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+# pragma GCC system_header
+#endif
+
+#endif // _LIBCPP___FWD_GET_H
diff --git a/libcxx/include/__fwd/variant.h b/libcxx/include/__fwd/variant.h
new file mode 100644
index 0000000..71c792f
--- /dev/null
+++ b/libcxx/include/__fwd/variant.h
@@ -0,0 +1,77 @@
+//===---------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===---------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___FWD_VARIANT_H
+#define _LIBCPP___FWD_VARIANT_H
+
+#include <__config>
+#include <__cstddef/size_t.h>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+# pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#if _LIBCPP_STD_VER >= 17
+
+template <class... _Types>
+class _LIBCPP_TEMPLATE_VIS variant;
+
+template <class _Tp>
+struct _LIBCPP_TEMPLATE_VIS variant_size;
+
+template <class _Tp>
+inline constexpr size_t variant_size_v = variant_size<_Tp>::value;
+
+template <size_t _Ip, class _Tp>
+struct _LIBCPP_TEMPLATE_VIS variant_alternative;
+
+template <size_t _Ip, class _Tp>
+using variant_alternative_t = typename variant_alternative<_Ip, _Tp>::type;
+
+inline constexpr size_t variant_npos = static_cast<size_t>(-1);
+
+template <size_t _Ip, class... _Types>
+_LIBCPP_HIDE_FROM_ABI
+_LIBCPP_AVAILABILITY_THROW_BAD_VARIANT_ACCESS constexpr variant_alternative_t<_Ip, variant<_Types...>>&
+get(variant<_Types...>&);
+
+template <size_t _Ip, class... _Types>
+_LIBCPP_HIDE_FROM_ABI
+_LIBCPP_AVAILABILITY_THROW_BAD_VARIANT_ACCESS constexpr variant_alternative_t<_Ip, variant<_Types...>>&&
+get(variant<_Types...>&&);
+
+template <size_t _Ip, class... _Types>
+_LIBCPP_HIDE_FROM_ABI
+_LIBCPP_AVAILABILITY_THROW_BAD_VARIANT_ACCESS constexpr const variant_alternative_t<_Ip, variant<_Types...>>&
+get(const variant<_Types...>&);
+
+template <size_t _Ip, class... _Types>
+_LIBCPP_HIDE_FROM_ABI
+_LIBCPP_AVAILABILITY_THROW_BAD_VARIANT_ACCESS constexpr const variant_alternative_t<_Ip, variant<_Types...>>&&
+get(const variant<_Types...>&&);
+
+template <class _Tp, class... _Types>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_THROW_BAD_VARIANT_ACCESS constexpr _Tp& get(variant<_Types...>&);
+
+template <class _Tp, class... _Types>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_THROW_BAD_VARIANT_ACCESS constexpr _Tp&& get(variant<_Types...>&&);
+
+template <class _Tp, class... _Types>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_THROW_BAD_VARIANT_ACCESS constexpr const _Tp& get(const variant<_Types...>&);
+
+template <class _Tp, class... _Types>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_THROW_BAD_VARIANT_ACCESS constexpr const _Tp&&
+get(const variant<_Types...>&&);
+
+#endif // _LIBCPP_STD_VER >= 17
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___FWD_VARIANT_H
diff --git a/libcxx/include/__math/abs.h b/libcxx/include/__math/abs.h
index ab82a28..fc3bf3a 100644
--- a/libcxx/include/__math/abs.h
+++ b/libcxx/include/__math/abs.h
@@ -23,19 +23,19 @@ namespace __math {
// fabs
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI float fabs(float __x) _NOEXCEPT { return __builtin_fabsf(__x); }
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI float fabs(float __x) _NOEXCEPT { return __builtin_fabsf(__x); }
template <class = int>
-_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI double fabs(double __x) _NOEXCEPT {
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI double fabs(double __x) _NOEXCEPT {
return __builtin_fabs(__x);
}
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI long double fabs(long double __x) _NOEXCEPT {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI long double fabs(long double __x) _NOEXCEPT {
return __builtin_fabsl(__x);
}
template <class _A1, __enable_if_t<is_integral<_A1>::value, int> = 0>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI double fabs(_A1 __x) _NOEXCEPT {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI double fabs(_A1 __x) _NOEXCEPT {
return __builtin_fabs((double)__x);
}
diff --git a/libcxx/include/__math/copysign.h b/libcxx/include/__math/copysign.h
index 2c3b0dd..c3ca6a3 100644
--- a/libcxx/include/__math/copysign.h
+++ b/libcxx/include/__math/copysign.h
@@ -24,16 +24,16 @@ namespace __math {
// copysign
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI float copysign(float __x, float __y) _NOEXCEPT {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI float copysign(float __x, float __y) _NOEXCEPT {
return ::__builtin_copysignf(__x, __y);
}
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI long double copysign(long double __x, long double __y) _NOEXCEPT {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI long double copysign(long double __x, long double __y) _NOEXCEPT {
return ::__builtin_copysignl(__x, __y);
}
template <class _A1, class _A2, __enable_if_t<is_arithmetic<_A1>::value && is_arithmetic<_A2>::value, int> = 0>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI typename __promote<_A1, _A2>::type copysign(_A1 __x, _A2 __y) _NOEXCEPT {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI typename __promote<_A1, _A2>::type copysign(_A1 __x, _A2 __y) _NOEXCEPT {
return ::__builtin_copysign(__x, __y);
}
diff --git a/libcxx/include/__math/min_max.h b/libcxx/include/__math/min_max.h
index 27997b4..db900c8 100644
--- a/libcxx/include/__math/min_max.h
+++ b/libcxx/include/__math/min_max.h
@@ -25,21 +25,21 @@ namespace __math {
// fmax
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI float fmax(float __x, float __y) _NOEXCEPT {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI float fmax(float __x, float __y) _NOEXCEPT {
return __builtin_fmaxf(__x, __y);
}
template <class = int>
-_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI double fmax(double __x, double __y) _NOEXCEPT {
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI double fmax(double __x, double __y) _NOEXCEPT {
return __builtin_fmax(__x, __y);
}
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI long double fmax(long double __x, long double __y) _NOEXCEPT {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI long double fmax(long double __x, long double __y) _NOEXCEPT {
return __builtin_fmaxl(__x, __y);
}
template <class _A1, class _A2, __enable_if_t<is_arithmetic<_A1>::value && is_arithmetic<_A2>::value, int> = 0>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI typename __promote<_A1, _A2>::type fmax(_A1 __x, _A2 __y) _NOEXCEPT {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI typename __promote<_A1, _A2>::type fmax(_A1 __x, _A2 __y) _NOEXCEPT {
using __result_type = typename __promote<_A1, _A2>::type;
static_assert(!(_IsSame<_A1, __result_type>::value && _IsSame<_A2, __result_type>::value), "");
return __math::fmax((__result_type)__x, (__result_type)__y);
@@ -47,21 +47,21 @@ _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI typename __promote<_A1, _A2>::typ
// fmin
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI float fmin(float __x, float __y) _NOEXCEPT {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI float fmin(float __x, float __y) _NOEXCEPT {
return __builtin_fminf(__x, __y);
}
template <class = int>
-_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI double fmin(double __x, double __y) _NOEXCEPT {
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI double fmin(double __x, double __y) _NOEXCEPT {
return __builtin_fmin(__x, __y);
}
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI long double fmin(long double __x, long double __y) _NOEXCEPT {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI long double fmin(long double __x, long double __y) _NOEXCEPT {
return __builtin_fminl(__x, __y);
}
template <class _A1, class _A2, __enable_if_t<is_arithmetic<_A1>::value && is_arithmetic<_A2>::value, int> = 0>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI typename __promote<_A1, _A2>::type fmin(_A1 __x, _A2 __y) _NOEXCEPT {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI typename __promote<_A1, _A2>::type fmin(_A1 __x, _A2 __y) _NOEXCEPT {
using __result_type = typename __promote<_A1, _A2>::type;
static_assert(!(_IsSame<_A1, __result_type>::value && _IsSame<_A2, __result_type>::value), "");
return __math::fmin((__result_type)__x, (__result_type)__y);
diff --git a/libcxx/include/__math/roots.h b/libcxx/include/__math/roots.h
index 359fd74..cef376f 100644
--- a/libcxx/include/__math/roots.h
+++ b/libcxx/include/__math/roots.h
@@ -39,19 +39,19 @@ inline _LIBCPP_HIDE_FROM_ABI double sqrt(_A1 __x) _NOEXCEPT {
// cbrt
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI float cbrt(float __x) _NOEXCEPT { return __builtin_cbrtf(__x); }
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI float cbrt(float __x) _NOEXCEPT { return __builtin_cbrtf(__x); }
template <class = int>
-_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI double cbrt(double __x) _NOEXCEPT {
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI double cbrt(double __x) _NOEXCEPT {
return __builtin_cbrt(__x);
}
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI long double cbrt(long double __x) _NOEXCEPT {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI long double cbrt(long double __x) _NOEXCEPT {
return __builtin_cbrtl(__x);
}
template <class _A1, __enable_if_t<is_integral<_A1>::value, int> = 0>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI double cbrt(_A1 __x) _NOEXCEPT {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI double cbrt(_A1 __x) _NOEXCEPT {
return __builtin_cbrt((double)__x);
}
diff --git a/libcxx/include/__math/rounding_functions.h b/libcxx/include/__math/rounding_functions.h
index f7246ba..474f585 100644
--- a/libcxx/include/__math/rounding_functions.h
+++ b/libcxx/include/__math/rounding_functions.h
@@ -26,37 +26,37 @@ namespace __math {
// ceil
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI float ceil(float __x) _NOEXCEPT { return __builtin_ceilf(__x); }
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI float ceil(float __x) _NOEXCEPT { return __builtin_ceilf(__x); }
template <class = int>
-_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI double ceil(double __x) _NOEXCEPT {
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI double ceil(double __x) _NOEXCEPT {
return __builtin_ceil(__x);
}
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI long double ceil(long double __x) _NOEXCEPT {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI long double ceil(long double __x) _NOEXCEPT {
return __builtin_ceill(__x);
}
template <class _A1, __enable_if_t<is_integral<_A1>::value, int> = 0>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI double ceil(_A1 __x) _NOEXCEPT {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI double ceil(_A1 __x) _NOEXCEPT {
return __builtin_ceil((double)__x);
}
// floor
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI float floor(float __x) _NOEXCEPT { return __builtin_floorf(__x); }
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI float floor(float __x) _NOEXCEPT { return __builtin_floorf(__x); }
template <class = int>
-_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI double floor(double __x) _NOEXCEPT {
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI double floor(double __x) _NOEXCEPT {
return __builtin_floor(__x);
}
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI long double floor(long double __x) _NOEXCEPT {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI long double floor(long double __x) _NOEXCEPT {
return __builtin_floorl(__x);
}
template <class _A1, __enable_if_t<is_integral<_A1>::value, int> = 0>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI double floor(_A1 __x) _NOEXCEPT {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI double floor(_A1 __x) _NOEXCEPT {
return __builtin_floor((double)__x);
}
@@ -126,21 +126,21 @@ inline _LIBCPP_HIDE_FROM_ABI long lround(_A1 __x) _NOEXCEPT {
// nearbyint
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI float nearbyint(float __x) _NOEXCEPT {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI float nearbyint(float __x) _NOEXCEPT {
return __builtin_nearbyintf(__x);
}
template <class = int>
-_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI double nearbyint(double __x) _NOEXCEPT {
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI double nearbyint(double __x) _NOEXCEPT {
return __builtin_nearbyint(__x);
}
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI long double nearbyint(long double __x) _NOEXCEPT {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI long double nearbyint(long double __x) _NOEXCEPT {
return __builtin_nearbyintl(__x);
}
template <class _A1, __enable_if_t<is_integral<_A1>::value, int> = 0>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI double nearbyint(_A1 __x) _NOEXCEPT {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI double nearbyint(_A1 __x) _NOEXCEPT {
return __builtin_nearbyint((double)__x);
}
@@ -186,55 +186,55 @@ inline _LIBCPP_HIDE_FROM_ABI double nexttoward(_A1 __x, long double __y) _NOEXCE
// rint
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI float rint(float __x) _NOEXCEPT { return __builtin_rintf(__x); }
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI float rint(float __x) _NOEXCEPT { return __builtin_rintf(__x); }
template <class = int>
-_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI double rint(double __x) _NOEXCEPT {
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI double rint(double __x) _NOEXCEPT {
return __builtin_rint(__x);
}
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI long double rint(long double __x) _NOEXCEPT {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI long double rint(long double __x) _NOEXCEPT {
return __builtin_rintl(__x);
}
template <class _A1, __enable_if_t<is_integral<_A1>::value, int> = 0>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI double rint(_A1 __x) _NOEXCEPT {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI double rint(_A1 __x) _NOEXCEPT {
return __builtin_rint((double)__x);
}
// round
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI float round(float __x) _NOEXCEPT { return __builtin_round(__x); }
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI float round(float __x) _NOEXCEPT { return __builtin_round(__x); }
template <class = int>
-_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI double round(double __x) _NOEXCEPT {
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI double round(double __x) _NOEXCEPT {
return __builtin_round(__x);
}
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI long double round(long double __x) _NOEXCEPT {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI long double round(long double __x) _NOEXCEPT {
return __builtin_roundl(__x);
}
template <class _A1, __enable_if_t<is_integral<_A1>::value, int> = 0>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI double round(_A1 __x) _NOEXCEPT {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI double round(_A1 __x) _NOEXCEPT {
return __builtin_round((double)__x);
}
// trunc
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI float trunc(float __x) _NOEXCEPT { return __builtin_trunc(__x); }
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI float trunc(float __x) _NOEXCEPT { return __builtin_trunc(__x); }
template <class = int>
-_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI double trunc(double __x) _NOEXCEPT {
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI double trunc(double __x) _NOEXCEPT {
return __builtin_trunc(__x);
}
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI long double trunc(long double __x) _NOEXCEPT {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI long double trunc(long double __x) _NOEXCEPT {
return __builtin_truncl(__x);
}
template <class _A1, __enable_if_t<is_integral<_A1>::value, int> = 0>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI double trunc(_A1 __x) _NOEXCEPT {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI double trunc(_A1 __x) _NOEXCEPT {
return __builtin_trunc((double)__x);
}
diff --git a/libcxx/include/__math/traits.h b/libcxx/include/__math/traits.h
index 3d4f14f..0c96f76 100644
--- a/libcxx/include/__math/traits.h
+++ b/libcxx/include/__math/traits.h
@@ -12,7 +12,6 @@
#include <__config>
#include <__type_traits/enable_if.h>
#include <__type_traits/is_arithmetic.h>
-#include <__type_traits/is_floating_point.h>
#include <__type_traits/is_integral.h>
#include <__type_traits/is_signed.h>
#include <__type_traits/promote.h>
@@ -34,52 +33,65 @@ namespace __math {
# define _LIBCPP_SIGNBIT_CONSTEXPR
#endif
-template <class _A1, __enable_if_t<is_floating_point<_A1>::value, int> = 0>
-_LIBCPP_NODISCARD inline _LIBCPP_SIGNBIT_CONSTEXPR _LIBCPP_HIDE_FROM_ABI bool signbit(_A1 __x) _NOEXCEPT {
+// The universal C runtime (UCRT) in the WinSDK provides floating point overloads
+// for std::signbit(). By defining our overloads as templates, we can work around
+// this issue as templates are less preferred than non-template functions.
+template <class = void>
+[[__nodiscard__]] inline _LIBCPP_SIGNBIT_CONSTEXPR _LIBCPP_HIDE_FROM_ABI bool signbit(float __x) _NOEXCEPT {
+ return __builtin_signbit(__x);
+}
+
+template <class = void>
+[[__nodiscard__]] inline _LIBCPP_SIGNBIT_CONSTEXPR _LIBCPP_HIDE_FROM_ABI bool signbit(double __x) _NOEXCEPT {
+ return __builtin_signbit(__x);
+}
+
+template <class = void>
+[[__nodiscard__]] inline _LIBCPP_SIGNBIT_CONSTEXPR _LIBCPP_HIDE_FROM_ABI bool signbit(long double __x) _NOEXCEPT {
return __builtin_signbit(__x);
}
template <class _A1, __enable_if_t<is_integral<_A1>::value && is_signed<_A1>::value, int> = 0>
-_LIBCPP_NODISCARD inline _LIBCPP_SIGNBIT_CONSTEXPR _LIBCPP_HIDE_FROM_ABI bool signbit(_A1 __x) _NOEXCEPT {
+[[__nodiscard__]] inline _LIBCPP_SIGNBIT_CONSTEXPR _LIBCPP_HIDE_FROM_ABI bool signbit(_A1 __x) _NOEXCEPT {
return __x < 0;
}
template <class _A1, __enable_if_t<is_integral<_A1>::value && !is_signed<_A1>::value, int> = 0>
-_LIBCPP_NODISCARD inline _LIBCPP_SIGNBIT_CONSTEXPR _LIBCPP_HIDE_FROM_ABI bool signbit(_A1) _NOEXCEPT {
+[[__nodiscard__]] inline _LIBCPP_SIGNBIT_CONSTEXPR _LIBCPP_HIDE_FROM_ABI bool signbit(_A1) _NOEXCEPT {
return false;
}
// isfinite
template <class _A1, __enable_if_t<is_integral<_A1>::value, int> = 0>
-_LIBCPP_NODISCARD _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool isfinite(_A1) _NOEXCEPT {
+[[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool isfinite(_A1) _NOEXCEPT {
return true;
}
-_LIBCPP_NODISCARD inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool isfinite(float __x) _NOEXCEPT {
+[[__nodiscard__]] inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool isfinite(float __x) _NOEXCEPT {
return __builtin_isfinite(__x);
}
-_LIBCPP_NODISCARD inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool isfinite(double __x) _NOEXCEPT {
+[[__nodiscard__]] inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool isfinite(double __x) _NOEXCEPT {
return __builtin_isfinite(__x);
}
-_LIBCPP_NODISCARD inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool isfinite(long double __x) _NOEXCEPT {
+[[__nodiscard__]] inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool isfinite(long double __x) _NOEXCEPT {
return __builtin_isfinite(__x);
}
// isinf
template <class _A1, __enable_if_t<is_integral<_A1>::value, int> = 0>
-_LIBCPP_NODISCARD _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool isinf(_A1) _NOEXCEPT {
+[[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool isinf(_A1) _NOEXCEPT {
return false;
}
-_LIBCPP_NODISCARD inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool isinf(float __x) _NOEXCEPT {
+[[__nodiscard__]] inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool isinf(float __x) _NOEXCEPT {
return __builtin_isinf(__x);
}
-_LIBCPP_NODISCARD inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI
+[[__nodiscard__]] inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI
#ifdef _LIBCPP_PREFERRED_OVERLOAD
_LIBCPP_PREFERRED_OVERLOAD
#endif
@@ -88,22 +100,22 @@ _LIBCPP_PREFERRED_OVERLOAD
return __builtin_isinf(__x);
}
-_LIBCPP_NODISCARD inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool isinf(long double __x) _NOEXCEPT {
+[[__nodiscard__]] inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool isinf(long double __x) _NOEXCEPT {
return __builtin_isinf(__x);
}
// isnan
template <class _A1, __enable_if_t<is_integral<_A1>::value, int> = 0>
-_LIBCPP_NODISCARD _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool isnan(_A1) _NOEXCEPT {
+[[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool isnan(_A1) _NOEXCEPT {
return false;
}
-_LIBCPP_NODISCARD inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool isnan(float __x) _NOEXCEPT {
+[[__nodiscard__]] inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool isnan(float __x) _NOEXCEPT {
return __builtin_isnan(__x);
}
-_LIBCPP_NODISCARD inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI
+[[__nodiscard__]] inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI
#ifdef _LIBCPP_PREFERRED_OVERLOAD
_LIBCPP_PREFERRED_OVERLOAD
#endif
@@ -112,33 +124,33 @@ _LIBCPP_PREFERRED_OVERLOAD
return __builtin_isnan(__x);
}
-_LIBCPP_NODISCARD inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool isnan(long double __x) _NOEXCEPT {
+[[__nodiscard__]] inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool isnan(long double __x) _NOEXCEPT {
return __builtin_isnan(__x);
}
// isnormal
template <class _A1, __enable_if_t<is_integral<_A1>::value, int> = 0>
-_LIBCPP_NODISCARD _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool isnormal(_A1 __x) _NOEXCEPT {
+[[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool isnormal(_A1 __x) _NOEXCEPT {
return __x != 0;
}
-_LIBCPP_NODISCARD inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool isnormal(float __x) _NOEXCEPT {
+[[__nodiscard__]] inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool isnormal(float __x) _NOEXCEPT {
return __builtin_isnormal(__x);
}
-_LIBCPP_NODISCARD inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool isnormal(double __x) _NOEXCEPT {
+[[__nodiscard__]] inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool isnormal(double __x) _NOEXCEPT {
return __builtin_isnormal(__x);
}
-_LIBCPP_NODISCARD inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool isnormal(long double __x) _NOEXCEPT {
+[[__nodiscard__]] inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool isnormal(long double __x) _NOEXCEPT {
return __builtin_isnormal(__x);
}
// isgreater
template <class _A1, class _A2, __enable_if_t<is_arithmetic<_A1>::value && is_arithmetic<_A2>::value, int> = 0>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI bool isgreater(_A1 __x, _A2 __y) _NOEXCEPT {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI bool isgreater(_A1 __x, _A2 __y) _NOEXCEPT {
using type = typename __promote<_A1, _A2>::type;
return __builtin_isgreater((type)__x, (type)__y);
}
@@ -146,7 +158,7 @@ _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI bool isgreater(_A1 __x, _A2 __y)
// isgreaterequal
template <class _A1, class _A2, __enable_if_t<is_arithmetic<_A1>::value && is_arithmetic<_A2>::value, int> = 0>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI bool isgreaterequal(_A1 __x, _A2 __y) _NOEXCEPT {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI bool isgreaterequal(_A1 __x, _A2 __y) _NOEXCEPT {
using type = typename __promote<_A1, _A2>::type;
return __builtin_isgreaterequal((type)__x, (type)__y);
}
@@ -154,7 +166,7 @@ _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI bool isgreaterequal(_A1 __x, _A2
// isless
template <class _A1, class _A2, __enable_if_t<is_arithmetic<_A1>::value && is_arithmetic<_A2>::value, int> = 0>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI bool isless(_A1 __x, _A2 __y) _NOEXCEPT {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI bool isless(_A1 __x, _A2 __y) _NOEXCEPT {
using type = typename __promote<_A1, _A2>::type;
return __builtin_isless((type)__x, (type)__y);
}
@@ -162,7 +174,7 @@ _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI bool isless(_A1 __x, _A2 __y) _NO
// islessequal
template <class _A1, class _A2, __enable_if_t<is_arithmetic<_A1>::value && is_arithmetic<_A2>::value, int> = 0>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI bool islessequal(_A1 __x, _A2 __y) _NOEXCEPT {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI bool islessequal(_A1 __x, _A2 __y) _NOEXCEPT {
using type = typename __promote<_A1, _A2>::type;
return __builtin_islessequal((type)__x, (type)__y);
}
@@ -170,7 +182,7 @@ _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI bool islessequal(_A1 __x, _A2 __y
// islessgreater
template <class _A1, class _A2, __enable_if_t<is_arithmetic<_A1>::value && is_arithmetic<_A2>::value, int> = 0>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI bool islessgreater(_A1 __x, _A2 __y) _NOEXCEPT {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI bool islessgreater(_A1 __x, _A2 __y) _NOEXCEPT {
using type = typename __promote<_A1, _A2>::type;
return __builtin_islessgreater((type)__x, (type)__y);
}
@@ -178,7 +190,7 @@ _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI bool islessgreater(_A1 __x, _A2 _
// isunordered
template <class _A1, class _A2, __enable_if_t<is_arithmetic<_A1>::value && is_arithmetic<_A2>::value, int> = 0>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI bool isunordered(_A1 __x, _A2 __y) _NOEXCEPT {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI bool isunordered(_A1 __x, _A2 __y) _NOEXCEPT {
using type = typename __promote<_A1, _A2>::type;
return __builtin_isunordered((type)__x, (type)__y);
}
diff --git a/libcxx/include/__memory/allocate_at_least.h b/libcxx/include/__memory/allocate_at_least.h
index df73d9a..a10e4fba 100644
--- a/libcxx/include/__memory/allocate_at_least.h
+++ b/libcxx/include/__memory/allocate_at_least.h
@@ -35,7 +35,7 @@ struct __allocation_result {
};
template <class _Alloc>
-_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI
_LIBCPP_CONSTEXPR __allocation_result<typename allocator_traits<_Alloc>::pointer>
__allocate_at_least(_Alloc& __alloc, size_t __n) {
return {__alloc.allocate(__n), __n};
diff --git a/libcxx/include/__memory/allocator.h b/libcxx/include/__memory/allocator.h
index 6a9eed9..cd146da 100644
--- a/libcxx/include/__memory/allocator.h
+++ b/libcxx/include/__memory/allocator.h
@@ -93,7 +93,7 @@ public:
template <class _Up>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 allocator(const allocator<_Up>&) _NOEXCEPT {}
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Tp* allocate(size_t __n) {
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Tp* allocate(size_t __n) {
static_assert(sizeof(_Tp) >= 0, "cannot allocate memory for an incomplete type");
if (__n > allocator_traits<allocator>::max_size(*this))
__throw_bad_array_new_length();
@@ -138,7 +138,7 @@ public:
return std::addressof(__x);
}
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_DEPRECATED_IN_CXX17 _Tp* allocate(size_t __n, const void*) {
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_DEPRECATED_IN_CXX17 _Tp* allocate(size_t __n, const void*) {
return allocate(__n);
}
diff --git a/libcxx/include/__memory/allocator_traits.h b/libcxx/include/__memory/allocator_traits.h
index c5fcc89..082c36d 100644
--- a/libcxx/include/__memory/allocator_traits.h
+++ b/libcxx/include/__memory/allocator_traits.h
@@ -275,13 +275,13 @@ struct _LIBCPP_TEMPLATE_VIS allocator_traits {
};
#endif // _LIBCPP_CXX03_LANG
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 static pointer
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 static pointer
allocate(allocator_type& __a, size_type __n) {
return __a.allocate(__n);
}
template <class _Ap = _Alloc, __enable_if_t<__has_allocate_hint<_Ap, size_type, const_void_pointer>::value, int> = 0>
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 static pointer
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 static pointer
allocate(allocator_type& __a, size_type __n, const_void_pointer __hint) {
_LIBCPP_SUPPRESS_DEPRECATED_PUSH
return __a.allocate(__n, __hint);
@@ -290,7 +290,7 @@ struct _LIBCPP_TEMPLATE_VIS allocator_traits {
template <class _Ap = _Alloc,
class = void,
__enable_if_t<!__has_allocate_hint<_Ap, size_type, const_void_pointer>::value, int> = 0>
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 static pointer
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 static pointer
allocate(allocator_type& __a, size_type __n, const_void_pointer) {
return __a.allocate(__n);
}
diff --git a/libcxx/include/__memory/assume_aligned.h b/libcxx/include/__memory/assume_aligned.h
index 526eb33..c7ba2a9 100644
--- a/libcxx/include/__memory/assume_aligned.h
+++ b/libcxx/include/__memory/assume_aligned.h
@@ -23,7 +23,7 @@
_LIBCPP_BEGIN_NAMESPACE_STD
template <size_t _Np, class _Tp>
-_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp* __assume_aligned(_Tp* __ptr) {
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp* __assume_aligned(_Tp* __ptr) {
static_assert(_Np != 0 && (_Np & (_Np - 1)) == 0, "std::assume_aligned<N>(p) requires N to be a power of two");
if (__libcpp_is_constant_evaluated()) {
diff --git a/libcxx/include/__memory/temporary_buffer.h b/libcxx/include/__memory/temporary_buffer.h
index 88799ca..633c9dc 100644
--- a/libcxx/include/__memory/temporary_buffer.h
+++ b/libcxx/include/__memory/temporary_buffer.h
@@ -22,7 +22,7 @@
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _Tp>
-_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_NO_CFI _LIBCPP_DEPRECATED_IN_CXX17 pair<_Tp*, ptrdiff_t>
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_NO_CFI _LIBCPP_DEPRECATED_IN_CXX17 pair<_Tp*, ptrdiff_t>
get_temporary_buffer(ptrdiff_t __n) _NOEXCEPT {
pair<_Tp*, ptrdiff_t> __r(0, 0);
const ptrdiff_t __m =
diff --git a/libcxx/include/__mutex/lock_guard.h b/libcxx/include/__mutex/lock_guard.h
index ef56896..50765cd 100644
--- a/libcxx/include/__mutex/lock_guard.h
+++ b/libcxx/include/__mutex/lock_guard.h
@@ -27,13 +27,13 @@ private:
mutex_type& __m_;
public:
- _LIBCPP_NODISCARD
+ [[__nodiscard__]]
_LIBCPP_HIDE_FROM_ABI explicit lock_guard(mutex_type& __m) _LIBCPP_THREAD_SAFETY_ANNOTATION(acquire_capability(__m))
: __m_(__m) {
__m_.lock();
}
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI lock_guard(mutex_type& __m, adopt_lock_t)
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI lock_guard(mutex_type& __m, adopt_lock_t)
_LIBCPP_THREAD_SAFETY_ANNOTATION(requires_capability(__m))
: __m_(__m) {}
_LIBCPP_HIDE_FROM_ABI ~lock_guard() _LIBCPP_THREAD_SAFETY_ANNOTATION(release_capability()) { __m_.unlock(); }
diff --git a/libcxx/include/__mutex/unique_lock.h b/libcxx/include/__mutex/unique_lock.h
index db506f3..c404921 100644
--- a/libcxx/include/__mutex/unique_lock.h
+++ b/libcxx/include/__mutex/unique_lock.h
@@ -34,28 +34,28 @@ private:
bool __owns_;
public:
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI unique_lock() _NOEXCEPT : __m_(nullptr), __owns_(false) {}
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI explicit unique_lock(mutex_type& __m)
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI unique_lock() _NOEXCEPT : __m_(nullptr), __owns_(false) {}
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI explicit unique_lock(mutex_type& __m)
: __m_(std::addressof(__m)), __owns_(true) {
__m_->lock();
}
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI unique_lock(mutex_type& __m, defer_lock_t) _NOEXCEPT
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI unique_lock(mutex_type& __m, defer_lock_t) _NOEXCEPT
: __m_(std::addressof(__m)),
__owns_(false) {}
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI unique_lock(mutex_type& __m, try_to_lock_t)
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI unique_lock(mutex_type& __m, try_to_lock_t)
: __m_(std::addressof(__m)), __owns_(__m.try_lock()) {}
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI unique_lock(mutex_type& __m, adopt_lock_t)
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI unique_lock(mutex_type& __m, adopt_lock_t)
: __m_(std::addressof(__m)), __owns_(true) {}
template <class _Clock, class _Duration>
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI unique_lock(mutex_type& __m, const chrono::time_point<_Clock, _Duration>& __t)
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI unique_lock(mutex_type& __m, const chrono::time_point<_Clock, _Duration>& __t)
: __m_(std::addressof(__m)), __owns_(__m.try_lock_until(__t)) {}
template <class _Rep, class _Period>
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI unique_lock(mutex_type& __m, const chrono::duration<_Rep, _Period>& __d)
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI unique_lock(mutex_type& __m, const chrono::duration<_Rep, _Period>& __d)
: __m_(std::addressof(__m)), __owns_(__m.try_lock_for(__d)) {}
_LIBCPP_HIDE_FROM_ABI ~unique_lock() {
@@ -66,7 +66,7 @@ public:
unique_lock(unique_lock const&) = delete;
unique_lock& operator=(unique_lock const&) = delete;
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI unique_lock(unique_lock&& __u) _NOEXCEPT
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI unique_lock(unique_lock&& __u) _NOEXCEPT
: __m_(__u.__m_),
__owns_(__u.__owns_) {
__u.__m_ = nullptr;
diff --git a/libcxx/include/__ranges/elements_view.h b/libcxx/include/__ranges/elements_view.h
index f159f53..989d36f 100644
--- a/libcxx/include/__ranges/elements_view.h
+++ b/libcxx/include/__ranges/elements_view.h
@@ -16,7 +16,7 @@
#include <__concepts/derived_from.h>
#include <__concepts/equality_comparable.h>
#include <__config>
-#include <__fwd/complex.h>
+#include <__fwd/get.h>
#include <__iterator/concepts.h>
#include <__iterator/iterator_traits.h>
#include <__ranges/access.h>
diff --git a/libcxx/include/__utility/forward.h b/libcxx/include/__utility/forward.h
index d5275dc..66740664 100644
--- a/libcxx/include/__utility/forward.h
+++ b/libcxx/include/__utility/forward.h
@@ -21,13 +21,13 @@
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _Tp>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR _Tp&&
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR _Tp&&
forward(_LIBCPP_LIFETIMEBOUND __libcpp_remove_reference_t<_Tp>& __t) _NOEXCEPT {
return static_cast<_Tp&&>(__t);
}
template <class _Tp>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR _Tp&&
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR _Tp&&
forward(_LIBCPP_LIFETIMEBOUND __libcpp_remove_reference_t<_Tp>&& __t) _NOEXCEPT {
static_assert(!is_lvalue_reference<_Tp>::value, "cannot forward an rvalue as an lvalue");
return static_cast<_Tp&&>(__t);
diff --git a/libcxx/include/__utility/move.h b/libcxx/include/__utility/move.h
index b6a42db..66aec5a 100644
--- a/libcxx/include/__utility/move.h
+++ b/libcxx/include/__utility/move.h
@@ -26,7 +26,7 @@ _LIBCPP_PUSH_MACROS
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _Tp>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR __libcpp_remove_reference_t<_Tp>&&
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR __libcpp_remove_reference_t<_Tp>&&
move(_LIBCPP_LIFETIMEBOUND _Tp&& __t) _NOEXCEPT {
typedef _LIBCPP_NODEBUG __libcpp_remove_reference_t<_Tp> _Up;
return static_cast<_Up&&>(__t);
@@ -37,7 +37,7 @@ using __move_if_noexcept_result_t =
__conditional_t<!is_nothrow_move_constructible<_Tp>::value && is_copy_constructible<_Tp>::value, const _Tp&, _Tp&&>;
template <class _Tp>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 __move_if_noexcept_result_t<_Tp>
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 __move_if_noexcept_result_t<_Tp>
move_if_noexcept(_LIBCPP_LIFETIMEBOUND _Tp& __x) _NOEXCEPT {
return std::move(__x);
}
diff --git a/libcxx/include/array b/libcxx/include/array
index 588664ac..fde7a70 100644
--- a/libcxx/include/array
+++ b/libcxx/include/array
@@ -232,7 +232,7 @@ struct _LIBCPP_TEMPLATE_VIS array {
// capacity:
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR size_type size() const _NOEXCEPT { return _Size; }
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR size_type max_size() const _NOEXCEPT { return _Size; }
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR bool empty() const _NOEXCEPT { return _Size == 0; }
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR bool empty() const _NOEXCEPT { return _Size == 0; }
// element access:
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 reference operator[](size_type __n) _NOEXCEPT {
@@ -340,7 +340,7 @@ struct _LIBCPP_TEMPLATE_VIS array<_Tp, 0> {
// capacity:
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR size_type size() const _NOEXCEPT { return 0; }
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR size_type max_size() const _NOEXCEPT { return 0; }
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR bool empty() const _NOEXCEPT { return true; }
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR bool empty() const _NOEXCEPT { return true; }
// element access:
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 reference operator[](size_type) _NOEXCEPT {
diff --git a/libcxx/include/barrier b/libcxx/include/barrier
index ba29ebc..abc014e 100644
--- a/libcxx/include/barrier
+++ b/libcxx/include/barrier
@@ -125,7 +125,7 @@ public:
__expected_adjustment_(0),
__completion_(std::move(__completion)),
__phase_(0) {}
- _LIBCPP_NODISCARD _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI arrival_token arrive(ptrdiff_t __update) {
+ [[nodiscard]] _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI arrival_token arrive(ptrdiff_t __update) {
_LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(
__update <= __expected_, "update is greater than the expected count for the current barrier phase");
@@ -277,7 +277,7 @@ public:
barrier(barrier const&) = delete;
barrier& operator=(barrier const&) = delete;
- _LIBCPP_NODISCARD _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI arrival_token arrive(ptrdiff_t __update = 1) {
+ [[nodiscard]] _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI arrival_token arrive(ptrdiff_t __update = 1) {
_LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(__update > 0, "barrier:arrive must be called with a value greater than 0");
return __b_.arrive(__update);
}
diff --git a/libcxx/include/deque b/libcxx/include/deque
index 759de5d..f2f6122 100644
--- a/libcxx/include/deque
+++ b/libcxx/include/deque
@@ -739,7 +739,7 @@ public:
_LIBCPP_HIDE_FROM_ABI void resize(size_type __n);
_LIBCPP_HIDE_FROM_ABI void resize(size_type __n, const value_type& __v);
_LIBCPP_HIDE_FROM_ABI void shrink_to_fit() _NOEXCEPT;
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI bool empty() const _NOEXCEPT { return size() == 0; }
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool empty() const _NOEXCEPT { return size() == 0; }
// element access:
_LIBCPP_HIDE_FROM_ABI reference operator[](size_type __i) _NOEXCEPT;
diff --git a/libcxx/include/forward_list b/libcxx/include/forward_list
index 3187b11..9a80413 100644
--- a/libcxx/include/forward_list
+++ b/libcxx/include/forward_list
@@ -756,7 +756,7 @@ public:
return const_iterator(base::__before_begin());
}
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI bool empty() const _NOEXCEPT {
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool empty() const _NOEXCEPT {
return base::__before_begin()->__next_ == nullptr;
}
_LIBCPP_HIDE_FROM_ABI size_type max_size() const _NOEXCEPT {
diff --git a/libcxx/include/future b/libcxx/include/future
index 9158ea3..8eadbcb 100644
--- a/libcxx/include/future
+++ b/libcxx/include/future
@@ -1845,7 +1845,7 @@ inline _LIBCPP_HIDE_FROM_ABI bool __does_policy_contain(launch __policy, launch
}
template <class _Fp, class... _Args>
-_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI future<typename __invoke_of<__decay_t<_Fp>, __decay_t<_Args>...>::type>
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI future<typename __invoke_of<__decay_t<_Fp>, __decay_t<_Args>...>::type>
async(launch __policy, _Fp&& __f, _Args&&... __args) {
typedef __async_func<__decay_t<_Fp>, __decay_t<_Args>...> _BF;
typedef typename _BF::_Rp _Rp;
@@ -1870,7 +1870,7 @@ async(launch __policy, _Fp&& __f, _Args&&... __args) {
}
template <class _Fp, class... _Args>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI future<typename __invoke_of<__decay_t<_Fp>, __decay_t<_Args>...>::type>
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI future<typename __invoke_of<__decay_t<_Fp>, __decay_t<_Args>...>::type>
async(_Fp&& __f, _Args&&... __args) {
return std::async(launch::any, std::forward<_Fp>(__f), std::forward<_Args>(__args)...);
}
diff --git a/libcxx/include/limits b/libcxx/include/limits
index d55c7cd..b85c662 100644
--- a/libcxx/include/limits
+++ b/libcxx/include/limits
@@ -137,9 +137,9 @@ protected:
typedef _Tp type;
static _LIBCPP_CONSTEXPR const bool is_specialized = false;
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type min() _NOEXCEPT { return type(); }
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type max() _NOEXCEPT { return type(); }
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type lowest() _NOEXCEPT { return type(); }
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type min() _NOEXCEPT { return type(); }
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type max() _NOEXCEPT { return type(); }
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type lowest() _NOEXCEPT { return type(); }
static _LIBCPP_CONSTEXPR const int digits = 0;
static _LIBCPP_CONSTEXPR const int digits10 = 0;
@@ -148,8 +148,8 @@ protected:
static _LIBCPP_CONSTEXPR const bool is_integer = false;
static _LIBCPP_CONSTEXPR const bool is_exact = false;
static _LIBCPP_CONSTEXPR const int radix = 0;
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type epsilon() _NOEXCEPT { return type(); }
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type round_error() _NOEXCEPT { return type(); }
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type epsilon() _NOEXCEPT { return type(); }
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type round_error() _NOEXCEPT { return type(); }
static _LIBCPP_CONSTEXPR const int min_exponent = 0;
static _LIBCPP_CONSTEXPR const int min_exponent10 = 0;
@@ -161,10 +161,10 @@ protected:
static _LIBCPP_CONSTEXPR const bool has_signaling_NaN = false;
static _LIBCPP_DEPRECATED_IN_CXX23 _LIBCPP_CONSTEXPR const float_denorm_style has_denorm = denorm_absent;
static _LIBCPP_DEPRECATED_IN_CXX23 _LIBCPP_CONSTEXPR const bool has_denorm_loss = false;
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type infinity() _NOEXCEPT { return type(); }
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type quiet_NaN() _NOEXCEPT { return type(); }
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type signaling_NaN() _NOEXCEPT { return type(); }
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type denorm_min() _NOEXCEPT { return type(); }
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type infinity() _NOEXCEPT { return type(); }
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type quiet_NaN() _NOEXCEPT { return type(); }
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type signaling_NaN() _NOEXCEPT { return type(); }
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type denorm_min() _NOEXCEPT { return type(); }
static _LIBCPP_CONSTEXPR const bool is_iec559 = false;
static _LIBCPP_CONSTEXPR const bool is_bounded = false;
@@ -198,15 +198,15 @@ protected:
static _LIBCPP_CONSTEXPR const int max_digits10 = 0;
static _LIBCPP_CONSTEXPR const type __min = __libcpp_compute_min<type, digits, is_signed>::value;
static _LIBCPP_CONSTEXPR const type __max = is_signed ? type(type(~0) ^ __min) : type(~0);
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type min() _NOEXCEPT { return __min; }
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type max() _NOEXCEPT { return __max; }
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type lowest() _NOEXCEPT { return min(); }
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type min() _NOEXCEPT { return __min; }
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type max() _NOEXCEPT { return __max; }
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type lowest() _NOEXCEPT { return min(); }
static _LIBCPP_CONSTEXPR const bool is_integer = true;
static _LIBCPP_CONSTEXPR const bool is_exact = true;
static _LIBCPP_CONSTEXPR const int radix = 2;
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type epsilon() _NOEXCEPT { return type(0); }
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type round_error() _NOEXCEPT { return type(0); }
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type epsilon() _NOEXCEPT { return type(0); }
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type round_error() _NOEXCEPT { return type(0); }
static _LIBCPP_CONSTEXPR const int min_exponent = 0;
static _LIBCPP_CONSTEXPR const int min_exponent10 = 0;
@@ -218,10 +218,10 @@ protected:
static _LIBCPP_CONSTEXPR const bool has_signaling_NaN = false;
static _LIBCPP_DEPRECATED_IN_CXX23 _LIBCPP_CONSTEXPR const float_denorm_style has_denorm = denorm_absent;
static _LIBCPP_DEPRECATED_IN_CXX23 _LIBCPP_CONSTEXPR const bool has_denorm_loss = false;
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type infinity() _NOEXCEPT { return type(0); }
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type quiet_NaN() _NOEXCEPT { return type(0); }
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type signaling_NaN() _NOEXCEPT { return type(0); }
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type denorm_min() _NOEXCEPT { return type(0); }
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type infinity() _NOEXCEPT { return type(0); }
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type quiet_NaN() _NOEXCEPT { return type(0); }
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type signaling_NaN() _NOEXCEPT { return type(0); }
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type denorm_min() _NOEXCEPT { return type(0); }
static _LIBCPP_CONSTEXPR const bool is_iec559 = false;
static _LIBCPP_CONSTEXPR const bool is_bounded = true;
@@ -249,15 +249,15 @@ protected:
static _LIBCPP_CONSTEXPR const int max_digits10 = 0;
static _LIBCPP_CONSTEXPR const type __min = false;
static _LIBCPP_CONSTEXPR const type __max = true;
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type min() _NOEXCEPT { return __min; }
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type max() _NOEXCEPT { return __max; }
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type lowest() _NOEXCEPT { return min(); }
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type min() _NOEXCEPT { return __min; }
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type max() _NOEXCEPT { return __max; }
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type lowest() _NOEXCEPT { return min(); }
static _LIBCPP_CONSTEXPR const bool is_integer = true;
static _LIBCPP_CONSTEXPR const bool is_exact = true;
static _LIBCPP_CONSTEXPR const int radix = 2;
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type epsilon() _NOEXCEPT { return type(0); }
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type round_error() _NOEXCEPT { return type(0); }
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type epsilon() _NOEXCEPT { return type(0); }
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type round_error() _NOEXCEPT { return type(0); }
static _LIBCPP_CONSTEXPR const int min_exponent = 0;
static _LIBCPP_CONSTEXPR const int min_exponent10 = 0;
@@ -269,10 +269,10 @@ protected:
static _LIBCPP_CONSTEXPR const bool has_signaling_NaN = false;
static _LIBCPP_DEPRECATED_IN_CXX23 _LIBCPP_CONSTEXPR const float_denorm_style has_denorm = denorm_absent;
static _LIBCPP_DEPRECATED_IN_CXX23 _LIBCPP_CONSTEXPR const bool has_denorm_loss = false;
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type infinity() _NOEXCEPT { return type(0); }
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type quiet_NaN() _NOEXCEPT { return type(0); }
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type signaling_NaN() _NOEXCEPT { return type(0); }
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type denorm_min() _NOEXCEPT { return type(0); }
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type infinity() _NOEXCEPT { return type(0); }
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type quiet_NaN() _NOEXCEPT { return type(0); }
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type signaling_NaN() _NOEXCEPT { return type(0); }
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type denorm_min() _NOEXCEPT { return type(0); }
static _LIBCPP_CONSTEXPR const bool is_iec559 = false;
static _LIBCPP_CONSTEXPR const bool is_bounded = true;
@@ -294,15 +294,15 @@ protected:
static _LIBCPP_CONSTEXPR const int digits = __FLT_MANT_DIG__;
static _LIBCPP_CONSTEXPR const int digits10 = __FLT_DIG__;
static _LIBCPP_CONSTEXPR const int max_digits10 = 2 + (digits * 30103l) / 100000l;
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type min() _NOEXCEPT { return __FLT_MIN__; }
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type max() _NOEXCEPT { return __FLT_MAX__; }
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type lowest() _NOEXCEPT { return -max(); }
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type min() _NOEXCEPT { return __FLT_MIN__; }
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type max() _NOEXCEPT { return __FLT_MAX__; }
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type lowest() _NOEXCEPT { return -max(); }
static _LIBCPP_CONSTEXPR const bool is_integer = false;
static _LIBCPP_CONSTEXPR const bool is_exact = false;
static _LIBCPP_CONSTEXPR const int radix = __FLT_RADIX__;
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type epsilon() _NOEXCEPT { return __FLT_EPSILON__; }
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type round_error() _NOEXCEPT { return 0.5F; }
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type epsilon() _NOEXCEPT { return __FLT_EPSILON__; }
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type round_error() _NOEXCEPT { return 0.5F; }
static _LIBCPP_CONSTEXPR const int min_exponent = __FLT_MIN_EXP__;
static _LIBCPP_CONSTEXPR const int min_exponent10 = __FLT_MIN_10_EXP__;
@@ -314,16 +314,16 @@ protected:
static _LIBCPP_CONSTEXPR const bool has_signaling_NaN = true;
static _LIBCPP_DEPRECATED_IN_CXX23 _LIBCPP_CONSTEXPR const float_denorm_style has_denorm = denorm_present;
static _LIBCPP_DEPRECATED_IN_CXX23 _LIBCPP_CONSTEXPR const bool has_denorm_loss = false;
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type infinity() _NOEXCEPT {
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type infinity() _NOEXCEPT {
return __builtin_huge_valf();
}
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type quiet_NaN() _NOEXCEPT {
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type quiet_NaN() _NOEXCEPT {
return __builtin_nanf("");
}
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type signaling_NaN() _NOEXCEPT {
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type signaling_NaN() _NOEXCEPT {
return __builtin_nansf("");
}
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type denorm_min() _NOEXCEPT {
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type denorm_min() _NOEXCEPT {
return __FLT_DENORM_MIN__;
}
@@ -351,15 +351,15 @@ protected:
static _LIBCPP_CONSTEXPR const int digits = __DBL_MANT_DIG__;
static _LIBCPP_CONSTEXPR const int digits10 = __DBL_DIG__;
static _LIBCPP_CONSTEXPR const int max_digits10 = 2 + (digits * 30103l) / 100000l;
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type min() _NOEXCEPT { return __DBL_MIN__; }
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type max() _NOEXCEPT { return __DBL_MAX__; }
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type lowest() _NOEXCEPT { return -max(); }
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type min() _NOEXCEPT { return __DBL_MIN__; }
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type max() _NOEXCEPT { return __DBL_MAX__; }
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type lowest() _NOEXCEPT { return -max(); }
static _LIBCPP_CONSTEXPR const bool is_integer = false;
static _LIBCPP_CONSTEXPR const bool is_exact = false;
static _LIBCPP_CONSTEXPR const int radix = __FLT_RADIX__;
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type epsilon() _NOEXCEPT { return __DBL_EPSILON__; }
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type round_error() _NOEXCEPT { return 0.5; }
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type epsilon() _NOEXCEPT { return __DBL_EPSILON__; }
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type round_error() _NOEXCEPT { return 0.5; }
static _LIBCPP_CONSTEXPR const int min_exponent = __DBL_MIN_EXP__;
static _LIBCPP_CONSTEXPR const int min_exponent10 = __DBL_MIN_10_EXP__;
@@ -371,16 +371,16 @@ protected:
static _LIBCPP_CONSTEXPR const bool has_signaling_NaN = true;
static _LIBCPP_DEPRECATED_IN_CXX23 _LIBCPP_CONSTEXPR const float_denorm_style has_denorm = denorm_present;
static _LIBCPP_DEPRECATED_IN_CXX23 _LIBCPP_CONSTEXPR const bool has_denorm_loss = false;
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type infinity() _NOEXCEPT {
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type infinity() _NOEXCEPT {
return __builtin_huge_val();
}
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type quiet_NaN() _NOEXCEPT {
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type quiet_NaN() _NOEXCEPT {
return __builtin_nan("");
}
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type signaling_NaN() _NOEXCEPT {
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type signaling_NaN() _NOEXCEPT {
return __builtin_nans("");
}
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type denorm_min() _NOEXCEPT {
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type denorm_min() _NOEXCEPT {
return __DBL_DENORM_MIN__;
}
@@ -408,15 +408,15 @@ protected:
static _LIBCPP_CONSTEXPR const int digits = __LDBL_MANT_DIG__;
static _LIBCPP_CONSTEXPR const int digits10 = __LDBL_DIG__;
static _LIBCPP_CONSTEXPR const int max_digits10 = 2 + (digits * 30103l) / 100000l;
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type min() _NOEXCEPT { return __LDBL_MIN__; }
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type max() _NOEXCEPT { return __LDBL_MAX__; }
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type lowest() _NOEXCEPT { return -max(); }
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type min() _NOEXCEPT { return __LDBL_MIN__; }
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type max() _NOEXCEPT { return __LDBL_MAX__; }
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type lowest() _NOEXCEPT { return -max(); }
static _LIBCPP_CONSTEXPR const bool is_integer = false;
static _LIBCPP_CONSTEXPR const bool is_exact = false;
static _LIBCPP_CONSTEXPR const int radix = __FLT_RADIX__;
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type epsilon() _NOEXCEPT { return __LDBL_EPSILON__; }
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type round_error() _NOEXCEPT { return 0.5L; }
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type epsilon() _NOEXCEPT { return __LDBL_EPSILON__; }
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type round_error() _NOEXCEPT { return 0.5L; }
static _LIBCPP_CONSTEXPR const int min_exponent = __LDBL_MIN_EXP__;
static _LIBCPP_CONSTEXPR const int min_exponent10 = __LDBL_MIN_10_EXP__;
@@ -428,16 +428,16 @@ protected:
static _LIBCPP_CONSTEXPR const bool has_signaling_NaN = true;
static _LIBCPP_DEPRECATED_IN_CXX23 _LIBCPP_CONSTEXPR const float_denorm_style has_denorm = denorm_present;
static _LIBCPP_DEPRECATED_IN_CXX23 _LIBCPP_CONSTEXPR const bool has_denorm_loss = false;
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type infinity() _NOEXCEPT {
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type infinity() _NOEXCEPT {
return __builtin_huge_vall();
}
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type quiet_NaN() _NOEXCEPT {
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type quiet_NaN() _NOEXCEPT {
return __builtin_nanl("");
}
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type signaling_NaN() _NOEXCEPT {
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type signaling_NaN() _NOEXCEPT {
return __builtin_nansl("");
}
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type denorm_min() _NOEXCEPT {
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type denorm_min() _NOEXCEPT {
return __LDBL_DENORM_MIN__;
}
@@ -465,9 +465,9 @@ class _LIBCPP_TEMPLATE_VIS numeric_limits : private __libcpp_numeric_limits<_Tp>
public:
static _LIBCPP_CONSTEXPR const bool is_specialized = __base::is_specialized;
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type min() _NOEXCEPT { return __base::min(); }
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type max() _NOEXCEPT { return __base::max(); }
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type lowest() _NOEXCEPT { return __base::lowest(); }
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type min() _NOEXCEPT { return __base::min(); }
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type max() _NOEXCEPT { return __base::max(); }
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type lowest() _NOEXCEPT { return __base::lowest(); }
static _LIBCPP_CONSTEXPR const int digits = __base::digits;
static _LIBCPP_CONSTEXPR const int digits10 = __base::digits10;
@@ -476,10 +476,10 @@ public:
static _LIBCPP_CONSTEXPR const bool is_integer = __base::is_integer;
static _LIBCPP_CONSTEXPR const bool is_exact = __base::is_exact;
static _LIBCPP_CONSTEXPR const int radix = __base::radix;
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type epsilon() _NOEXCEPT {
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type epsilon() _NOEXCEPT {
return __base::epsilon();
}
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type round_error() _NOEXCEPT {
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type round_error() _NOEXCEPT {
return __base::round_error();
}
@@ -495,16 +495,16 @@ public:
static _LIBCPP_DEPRECATED_IN_CXX23 _LIBCPP_CONSTEXPR const float_denorm_style has_denorm = __base::has_denorm;
static _LIBCPP_DEPRECATED_IN_CXX23 _LIBCPP_CONSTEXPR const bool has_denorm_loss = __base::has_denorm_loss;
_LIBCPP_SUPPRESS_DEPRECATED_POP
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type infinity() _NOEXCEPT {
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type infinity() _NOEXCEPT {
return __base::infinity();
}
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type quiet_NaN() _NOEXCEPT {
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type quiet_NaN() _NOEXCEPT {
return __base::quiet_NaN();
}
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type signaling_NaN() _NOEXCEPT {
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type signaling_NaN() _NOEXCEPT {
return __base::signaling_NaN();
}
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type denorm_min() _NOEXCEPT {
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type denorm_min() _NOEXCEPT {
return __base::denorm_min();
}
diff --git a/libcxx/include/list b/libcxx/include/list
index 2aa7744..dc3b679 100644
--- a/libcxx/include/list
+++ b/libcxx/include/list
@@ -755,7 +755,7 @@ public:
_LIBCPP_HIDE_FROM_ABI allocator_type get_allocator() const _NOEXCEPT;
_LIBCPP_HIDE_FROM_ABI size_type size() const _NOEXCEPT { return base::__sz(); }
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI bool empty() const _NOEXCEPT { return base::empty(); }
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool empty() const _NOEXCEPT { return base::empty(); }
_LIBCPP_HIDE_FROM_ABI size_type max_size() const _NOEXCEPT {
return std::min<size_type>(base::__node_alloc_max_size(), numeric_limits<difference_type >::max());
}
diff --git a/libcxx/include/map b/libcxx/include/map
index 02bd17c..5d97538 100644
--- a/libcxx/include/map
+++ b/libcxx/include/map
@@ -1144,7 +1144,7 @@ public:
_LIBCPP_HIDE_FROM_ABI const_reverse_iterator crbegin() const _NOEXCEPT { return rbegin(); }
_LIBCPP_HIDE_FROM_ABI const_reverse_iterator crend() const _NOEXCEPT { return rend(); }
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI bool empty() const _NOEXCEPT { return __tree_.size() == 0; }
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool empty() const _NOEXCEPT { return __tree_.size() == 0; }
_LIBCPP_HIDE_FROM_ABI size_type size() const _NOEXCEPT { return __tree_.size(); }
_LIBCPP_HIDE_FROM_ABI size_type max_size() const _NOEXCEPT { return __tree_.max_size(); }
@@ -1824,7 +1824,7 @@ public:
_LIBCPP_HIDE_FROM_ABI const_reverse_iterator crbegin() const _NOEXCEPT { return rbegin(); }
_LIBCPP_HIDE_FROM_ABI const_reverse_iterator crend() const _NOEXCEPT { return rend(); }
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI bool empty() const _NOEXCEPT { return __tree_.size() == 0; }
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool empty() const _NOEXCEPT { return __tree_.size() == 0; }
_LIBCPP_HIDE_FROM_ABI size_type size() const _NOEXCEPT { return __tree_.size(); }
_LIBCPP_HIDE_FROM_ABI size_type max_size() const _NOEXCEPT { return __tree_.max_size(); }
diff --git a/libcxx/include/math.h b/libcxx/include/math.h
index 4e6304a..509ecc4 100644
--- a/libcxx/include/math.h
+++ b/libcxx/include/math.h
@@ -388,22 +388,22 @@ namespace __math {
// template on non-double overloads to make them weaker than same overloads from MSVC runtime
template <class = int>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI int fpclassify(float __x) _NOEXCEPT {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI int fpclassify(float __x) _NOEXCEPT {
return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL, FP_ZERO, __x);
}
template <class = int>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI int fpclassify(double __x) _NOEXCEPT {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI int fpclassify(double __x) _NOEXCEPT {
return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL, FP_ZERO, __x);
}
template <class = int>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI int fpclassify(long double __x) _NOEXCEPT {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI int fpclassify(long double __x) _NOEXCEPT {
return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL, FP_ZERO, __x);
}
template <class _A1, std::__enable_if_t<std::is_integral<_A1>::value, int> = 0>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI int fpclassify(_A1 __x) _NOEXCEPT {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI int fpclassify(_A1 __x) _NOEXCEPT {
return __x == 0 ? FP_ZERO : FP_NORMAL;
}
diff --git a/libcxx/include/module.modulemap b/libcxx/include/module.modulemap
index add8726d..cc41912 100644
--- a/libcxx/include/module.modulemap
+++ b/libcxx/include/module.modulemap
@@ -1813,6 +1813,15 @@ module std_private_tuple_tuple_like_no_subrange [system] {
module std_private_tuple_sfinae_helpers [system] { header "__tuple/sfinae_helpers.h" }
module std_private_tuple_tuple_element [system] { header "__tuple/tuple_element.h" }
module std_private_tuple_tuple_fwd [system] { header "__fwd/tuple.h" }
+module std_private_get_fwd [system] {
+ header "__fwd/get.h"
+ export std_private_array_array_fwd
+ export std_private_complex_complex_fwd
+ export std_private_ranges_subrange_fwd
+ export std_private_tuple_tuple_fwd
+ export std_private_utility_pair_fwd
+ export std_private_variant_fwd
+}
module std_private_tuple_tuple_indices [system] { header "__tuple/tuple_indices.h" }
module std_private_tuple_tuple_like [system] {
header "__tuple/tuple_like.h"
@@ -2103,5 +2112,6 @@ module std_private_utility_to_underlying [system] { header "__utility/
module std_private_utility_unreachable [system] { header "__utility/unreachable.h" }
module std_private_variant_monostate [system] { header "__variant/monostate.h" }
+module std_private_variant_fwd [system] { header "__fwd/variant.h" }
module std_private_vector_fwd [system] { header "__fwd/vector.h" }
diff --git a/libcxx/include/new b/libcxx/include/new
index 207e4b4..3252b0b 100644
--- a/libcxx/include/new
+++ b/libcxx/include/new
@@ -203,8 +203,8 @@ inline constexpr destroying_delete_t destroying_delete{};
#if !defined(_LIBCPP_ABI_VCRUNTIME)
-_LIBCPP_NODISCARD _LIBCPP_OVERRIDABLE_FUNC_VIS void* operator new(std::size_t __sz) _THROW_BAD_ALLOC;
-_LIBCPP_NODISCARD _LIBCPP_OVERRIDABLE_FUNC_VIS void* operator new(std::size_t __sz, const std::nothrow_t&) _NOEXCEPT
+[[__nodiscard__]] _LIBCPP_OVERRIDABLE_FUNC_VIS void* operator new(std::size_t __sz) _THROW_BAD_ALLOC;
+[[__nodiscard__]] _LIBCPP_OVERRIDABLE_FUNC_VIS void* operator new(std::size_t __sz, const std::nothrow_t&) _NOEXCEPT
_LIBCPP_NOALIAS;
_LIBCPP_OVERRIDABLE_FUNC_VIS void operator delete(void* __p) _NOEXCEPT;
_LIBCPP_OVERRIDABLE_FUNC_VIS void operator delete(void* __p, const std::nothrow_t&) _NOEXCEPT;
@@ -212,8 +212,8 @@ _LIBCPP_OVERRIDABLE_FUNC_VIS void operator delete(void* __p, const std::nothrow_
_LIBCPP_OVERRIDABLE_FUNC_VIS void operator delete(void* __p, std::size_t __sz) _NOEXCEPT;
# endif
-_LIBCPP_NODISCARD _LIBCPP_OVERRIDABLE_FUNC_VIS void* operator new[](std::size_t __sz) _THROW_BAD_ALLOC;
-_LIBCPP_NODISCARD _LIBCPP_OVERRIDABLE_FUNC_VIS void* operator new[](std::size_t __sz, const std::nothrow_t&) _NOEXCEPT
+[[__nodiscard__]] _LIBCPP_OVERRIDABLE_FUNC_VIS void* operator new[](std::size_t __sz) _THROW_BAD_ALLOC;
+[[__nodiscard__]] _LIBCPP_OVERRIDABLE_FUNC_VIS void* operator new[](std::size_t __sz, const std::nothrow_t&) _NOEXCEPT
_LIBCPP_NOALIAS;
_LIBCPP_OVERRIDABLE_FUNC_VIS void operator delete[](void* __p) _NOEXCEPT;
_LIBCPP_OVERRIDABLE_FUNC_VIS void operator delete[](void* __p, const std::nothrow_t&) _NOEXCEPT;
@@ -222,8 +222,8 @@ _LIBCPP_OVERRIDABLE_FUNC_VIS void operator delete[](void* __p, std::size_t __sz)
# endif
# ifndef _LIBCPP_HAS_NO_LIBRARY_ALIGNED_ALLOCATION
-_LIBCPP_NODISCARD _LIBCPP_OVERRIDABLE_FUNC_VIS void* operator new(std::size_t __sz, std::align_val_t) _THROW_BAD_ALLOC;
-_LIBCPP_NODISCARD _LIBCPP_OVERRIDABLE_FUNC_VIS void*
+[[__nodiscard__]] _LIBCPP_OVERRIDABLE_FUNC_VIS void* operator new(std::size_t __sz, std::align_val_t) _THROW_BAD_ALLOC;
+[[__nodiscard__]] _LIBCPP_OVERRIDABLE_FUNC_VIS void*
operator new(std::size_t __sz, std::align_val_t, const std::nothrow_t&) _NOEXCEPT _LIBCPP_NOALIAS;
_LIBCPP_OVERRIDABLE_FUNC_VIS void operator delete(void* __p, std::align_val_t) _NOEXCEPT;
_LIBCPP_OVERRIDABLE_FUNC_VIS void operator delete(void* __p, std::align_val_t, const std::nothrow_t&) _NOEXCEPT;
@@ -231,9 +231,9 @@ _LIBCPP_OVERRIDABLE_FUNC_VIS void operator delete(void* __p, std::align_val_t, c
_LIBCPP_OVERRIDABLE_FUNC_VIS void operator delete(void* __p, std::size_t __sz, std::align_val_t) _NOEXCEPT;
# endif
-_LIBCPP_NODISCARD _LIBCPP_OVERRIDABLE_FUNC_VIS void*
+[[__nodiscard__]] _LIBCPP_OVERRIDABLE_FUNC_VIS void*
operator new[](std::size_t __sz, std::align_val_t) _THROW_BAD_ALLOC;
-_LIBCPP_NODISCARD _LIBCPP_OVERRIDABLE_FUNC_VIS void*
+[[__nodiscard__]] _LIBCPP_OVERRIDABLE_FUNC_VIS void*
operator new[](std::size_t __sz, std::align_val_t, const std::nothrow_t&) _NOEXCEPT _LIBCPP_NOALIAS;
_LIBCPP_OVERRIDABLE_FUNC_VIS void operator delete[](void* __p, std::align_val_t) _NOEXCEPT;
_LIBCPP_OVERRIDABLE_FUNC_VIS void operator delete[](void* __p, std::align_val_t, const std::nothrow_t&) _NOEXCEPT;
@@ -242,11 +242,11 @@ _LIBCPP_OVERRIDABLE_FUNC_VIS void operator delete[](void* __p, std::size_t __sz,
# endif
# endif
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void*
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void*
operator new(std::size_t, void* __p) _NOEXCEPT {
return __p;
}
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void*
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void*
operator new[](std::size_t, void* __p) _NOEXCEPT {
return __p;
}
@@ -334,7 +334,7 @@ inline _LIBCPP_HIDE_FROM_ABI void __libcpp_deallocate_unsized(void* __ptr, size_
}
template <class _Tp>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR _Tp* __launder(_Tp* __p) _NOEXCEPT {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR _Tp* __launder(_Tp* __p) _NOEXCEPT {
static_assert(!(is_function<_Tp>::value), "can't launder functions");
static_assert(!(is_same<void, __remove_cv_t<_Tp> >::value), "can't launder cv-void");
return __builtin_launder(__p);
diff --git a/libcxx/include/queue b/libcxx/include/queue
index 9508de9..db9ad26 100644
--- a/libcxx/include/queue
+++ b/libcxx/include/queue
@@ -372,7 +372,7 @@ public:
_LIBCPP_HIDE_FROM_ABI queue(queue&& __q, const _Alloc& __a) : c(std::move(__q.c), __a) {}
#endif // _LIBCPP_CXX03_LANG
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI bool empty() const { return c.empty(); }
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool empty() const { return c.empty(); }
_LIBCPP_HIDE_FROM_ABI size_type size() const { return c.size(); }
_LIBCPP_HIDE_FROM_ABI reference front() { return c.front(); }
@@ -416,7 +416,7 @@ public:
swap(c, __q.c);
}
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI const _Container& __get_container() const { return c; }
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const _Container& __get_container() const { return c; }
template <class _T1, class _OtherContainer>
friend _LIBCPP_HIDE_FROM_ABI bool
@@ -649,7 +649,7 @@ public:
#endif
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI bool empty() const { return c.empty(); }
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool empty() const { return c.empty(); }
_LIBCPP_HIDE_FROM_ABI size_type size() const { return c.size(); }
_LIBCPP_HIDE_FROM_ABI const_reference top() const { return c.front(); }
@@ -678,7 +678,7 @@ public:
_LIBCPP_HIDE_FROM_ABI void swap(priority_queue& __q)
_NOEXCEPT_(__is_nothrow_swappable_v<container_type>&& __is_nothrow_swappable_v<value_compare>);
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI const _Container& __get_container() const { return c; }
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const _Container& __get_container() const { return c; }
};
#if _LIBCPP_STD_VER >= 17
diff --git a/libcxx/include/regex b/libcxx/include/regex
index d59abb8..d6b8768 100644
--- a/libcxx/include/regex
+++ b/libcxx/include/regex
@@ -4577,7 +4577,7 @@ public:
// size:
_LIBCPP_HIDE_FROM_ABI size_type size() const _NOEXCEPT { return __matches_.size(); }
_LIBCPP_HIDE_FROM_ABI size_type max_size() const _NOEXCEPT { return __matches_.max_size(); }
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI bool empty() const _NOEXCEPT { return size() == 0; }
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool empty() const _NOEXCEPT { return size() == 0; }
// element access:
_LIBCPP_HIDE_FROM_ABI difference_type length(size_type __sub = 0) const {
diff --git a/libcxx/include/scoped_allocator b/libcxx/include/scoped_allocator
index a49ff46..13e43c2 100644
--- a/libcxx/include/scoped_allocator
+++ b/libcxx/include/scoped_allocator
@@ -389,10 +389,10 @@ public:
return _Base::outer_allocator();
}
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI pointer allocate(size_type __n) {
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI pointer allocate(size_type __n) {
return allocator_traits<outer_allocator_type>::allocate(outer_allocator(), __n);
}
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI pointer allocate(size_type __n, const_void_pointer __hint) {
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI pointer allocate(size_type __n, const_void_pointer __hint) {
return allocator_traits<outer_allocator_type>::allocate(outer_allocator(), __n, __hint);
}
diff --git a/libcxx/include/set b/libcxx/include/set
index 7e9661a..b614e04 100644
--- a/libcxx/include/set
+++ b/libcxx/include/set
@@ -713,7 +713,7 @@ public:
_LIBCPP_HIDE_FROM_ABI const_reverse_iterator crbegin() const _NOEXCEPT { return rbegin(); }
_LIBCPP_HIDE_FROM_ABI const_reverse_iterator crend() const _NOEXCEPT { return rend(); }
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI bool empty() const _NOEXCEPT { return __tree_.size() == 0; }
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool empty() const _NOEXCEPT { return __tree_.size() == 0; }
_LIBCPP_HIDE_FROM_ABI size_type size() const _NOEXCEPT { return __tree_.size(); }
_LIBCPP_HIDE_FROM_ABI size_type max_size() const _NOEXCEPT { return __tree_.max_size(); }
@@ -1170,7 +1170,7 @@ public:
_LIBCPP_HIDE_FROM_ABI const_reverse_iterator crbegin() const _NOEXCEPT { return rbegin(); }
_LIBCPP_HIDE_FROM_ABI const_reverse_iterator crend() const _NOEXCEPT { return rend(); }
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI bool empty() const _NOEXCEPT { return __tree_.size() == 0; }
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool empty() const _NOEXCEPT { return __tree_.size() == 0; }
_LIBCPP_HIDE_FROM_ABI size_type size() const _NOEXCEPT { return __tree_.size(); }
_LIBCPP_HIDE_FROM_ABI size_type max_size() const _NOEXCEPT { return __tree_.max_size(); }
diff --git a/libcxx/include/stack b/libcxx/include/stack
index 90f8933..f75769f 100644
--- a/libcxx/include/stack
+++ b/libcxx/include/stack
@@ -231,7 +231,7 @@ public:
#endif
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI bool empty() const { return c.empty(); }
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool empty() const { return c.empty(); }
_LIBCPP_HIDE_FROM_ABI size_type size() const { return c.size(); }
_LIBCPP_HIDE_FROM_ABI reference top() { return c.back(); }
_LIBCPP_HIDE_FROM_ABI const_reference top() const { return c.back(); }
@@ -273,7 +273,7 @@ public:
swap(c, __s.c);
}
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI const _Container& __get_container() const { return c; }
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const _Container& __get_container() const { return c; }
template <class _T1, class _OtherContainer>
friend bool operator==(const stack<_T1, _OtherContainer>& __x, const stack<_T1, _OtherContainer>& __y);
diff --git a/libcxx/include/stdlib.h b/libcxx/include/stdlib.h
index a74344d..358b10c 100644
--- a/libcxx/include/stdlib.h
+++ b/libcxx/include/stdlib.h
@@ -110,19 +110,19 @@ extern "C++" {
// MSVCRT already has the correct prototype in <stdlib.h> if __cplusplus is defined
# if !defined(_LIBCPP_MSVCRT)
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI long abs(long __x) _NOEXCEPT { return __builtin_labs(__x); }
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI long long abs(long long __x) _NOEXCEPT { return __builtin_llabs(__x); }
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI long abs(long __x) _NOEXCEPT { return __builtin_labs(__x); }
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI long long abs(long long __x) _NOEXCEPT { return __builtin_llabs(__x); }
# endif // !defined(_LIBCPP_MSVCRT)
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI float abs(float __lcpp_x) _NOEXCEPT {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI float abs(float __lcpp_x) _NOEXCEPT {
return __builtin_fabsf(__lcpp_x); // Use builtins to prevent needing math.h
}
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI double abs(double __lcpp_x) _NOEXCEPT {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI double abs(double __lcpp_x) _NOEXCEPT {
return __builtin_fabs(__lcpp_x);
}
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI long double abs(long double __lcpp_x) _NOEXCEPT {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI long double abs(long double __lcpp_x) _NOEXCEPT {
return __builtin_fabsl(__lcpp_x);
}
diff --git a/libcxx/include/string b/libcxx/include/string
index 46c5a5a..e8c9bce 100644
--- a/libcxx/include/string
+++ b/libcxx/include/string
@@ -1321,7 +1321,7 @@ public:
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void shrink_to_fit() _NOEXCEPT;
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void clear() _NOEXCEPT;
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool empty() const _NOEXCEPT {
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool empty() const _NOEXCEPT {
return size() == 0;
}
diff --git a/libcxx/include/string_view b/libcxx/include/string_view
index cf97e3a..3b32117 100644
--- a/libcxx/include/string_view
+++ b/libcxx/include/string_view
@@ -396,7 +396,7 @@ public:
return numeric_limits<size_type>::max() / sizeof(value_type);
}
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR bool empty() const _NOEXCEPT { return __size_ == 0; }
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR bool empty() const _NOEXCEPT { return __size_ == 0; }
// [string.view.access], element access
_LIBCPP_CONSTEXPR _LIBCPP_HIDE_FROM_ABI const_reference operator[](size_type __pos) const _NOEXCEPT {
diff --git a/libcxx/include/type_traits b/libcxx/include/type_traits
index 5937d4f..26c85f2 100644
--- a/libcxx/include/type_traits
+++ b/libcxx/include/type_traits
@@ -421,7 +421,6 @@ namespace std
*/
#include <__config>
-#include <__fwd/functional.h> // This is https://llvm.org/PR56938
#include <__type_traits/add_const.h>
#include <__type_traits/add_cv.h>
#include <__type_traits/add_lvalue_reference.h>
diff --git a/libcxx/include/unordered_map b/libcxx/include/unordered_map
index 69314ba..0743b2e 100644
--- a/libcxx/include/unordered_map
+++ b/libcxx/include/unordered_map
@@ -1204,7 +1204,7 @@ public:
return allocator_type(__table_.__node_alloc());
}
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI bool empty() const _NOEXCEPT { return __table_.size() == 0; }
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool empty() const _NOEXCEPT { return __table_.size() == 0; }
_LIBCPP_HIDE_FROM_ABI size_type size() const _NOEXCEPT { return __table_.size(); }
_LIBCPP_HIDE_FROM_ABI size_type max_size() const _NOEXCEPT { return __table_.max_size(); }
@@ -2003,7 +2003,7 @@ public:
return allocator_type(__table_.__node_alloc());
}
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI bool empty() const _NOEXCEPT { return __table_.size() == 0; }
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool empty() const _NOEXCEPT { return __table_.size() == 0; }
_LIBCPP_HIDE_FROM_ABI size_type size() const _NOEXCEPT { return __table_.size(); }
_LIBCPP_HIDE_FROM_ABI size_type max_size() const _NOEXCEPT { return __table_.max_size(); }
diff --git a/libcxx/include/unordered_set b/libcxx/include/unordered_set
index fb50f78..bd8d3ab 100644
--- a/libcxx/include/unordered_set
+++ b/libcxx/include/unordered_set
@@ -733,7 +733,7 @@ public:
return allocator_type(__table_.__node_alloc());
}
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI bool empty() const _NOEXCEPT { return __table_.size() == 0; }
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool empty() const _NOEXCEPT { return __table_.size() == 0; }
_LIBCPP_HIDE_FROM_ABI size_type size() const _NOEXCEPT { return __table_.size(); }
_LIBCPP_HIDE_FROM_ABI size_type max_size() const _NOEXCEPT { return __table_.max_size(); }
@@ -1327,7 +1327,7 @@ public:
return allocator_type(__table_.__node_alloc());
}
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI bool empty() const _NOEXCEPT { return __table_.size() == 0; }
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool empty() const _NOEXCEPT { return __table_.size() == 0; }
_LIBCPP_HIDE_FROM_ABI size_type size() const _NOEXCEPT { return __table_.size(); }
_LIBCPP_HIDE_FROM_ABI size_type max_size() const _NOEXCEPT { return __table_.max_size(); }
diff --git a/libcxx/include/variant b/libcxx/include/variant
index 1cac603..2fa5623 100644
--- a/libcxx/include/variant
+++ b/libcxx/include/variant
@@ -221,6 +221,7 @@ namespace std {
#include <__functional/invoke.h>
#include <__functional/operations.h>
#include <__functional/unary_function.h>
+#include <__fwd/variant.h>
#include <__memory/addressof.h>
#include <__memory/construct_at.h>
#include <__tuple/find_index.h>
@@ -307,15 +308,7 @@ __throw_bad_variant_access() {
# endif
}
-template <class... _Types>
-class _LIBCPP_TEMPLATE_VIS variant;
-
-template <class _Tp>
-struct _LIBCPP_TEMPLATE_VIS variant_size;
-
-template <class _Tp>
-inline constexpr size_t variant_size_v = variant_size<_Tp>::value;
-
+// variant_size
template <class _Tp>
struct _LIBCPP_TEMPLATE_VIS variant_size<const _Tp> : variant_size<_Tp> {};
@@ -328,12 +321,7 @@ struct _LIBCPP_TEMPLATE_VIS variant_size<const volatile _Tp> : variant_size<_Tp>
template <class... _Types>
struct _LIBCPP_TEMPLATE_VIS variant_size<variant<_Types...>> : integral_constant<size_t, sizeof...(_Types)> {};
-template <size_t _Ip, class _Tp>
-struct _LIBCPP_TEMPLATE_VIS variant_alternative;
-
-template <size_t _Ip, class _Tp>
-using variant_alternative_t = typename variant_alternative<_Ip, _Tp>::type;
-
+// variant_alternative
template <size_t _Ip, class _Tp>
struct _LIBCPP_TEMPLATE_VIS variant_alternative<_Ip, const _Tp> : add_const<variant_alternative_t<_Ip, _Tp>> {};
@@ -349,8 +337,6 @@ struct _LIBCPP_TEMPLATE_VIS variant_alternative<_Ip, variant<_Types...>> {
using type = __type_pack_element<_Ip, _Types...>;
};
-inline constexpr size_t variant_npos = static_cast<size_t>(-1);
-
template <size_t _NumAlternatives>
_LIBCPP_HIDE_FROM_ABI constexpr auto __choose_index_type() {
# ifdef _LIBCPP_ABI_VARIANT_INDEX_TYPE_OPTIMIZATION
@@ -370,9 +356,6 @@ template <class _IndexType>
constexpr _IndexType __variant_npos = static_cast<_IndexType>(-1);
template <class... _Types>
-class _LIBCPP_TEMPLATE_VIS variant;
-
-template <class... _Types>
_LIBCPP_HIDE_FROM_ABI constexpr variant<_Types...>& __as_variant(variant<_Types...>& __vs) noexcept {
return __vs;
}
diff --git a/libcxx/include/vector b/libcxx/include/vector
index fc0a486..4720f8e 100644
--- a/libcxx/include/vector
+++ b/libcxx/include/vector
@@ -636,7 +636,7 @@ public:
_LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI size_type capacity() const _NOEXCEPT {
return static_cast<size_type>(__end_cap() - this->__begin_);
}
- _LIBCPP_NODISCARD _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI bool empty() const _NOEXCEPT {
+ [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI bool empty() const _NOEXCEPT {
return this->__begin_ == this->__end_;
}
_LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI size_type max_size() const _NOEXCEPT;
@@ -2033,7 +2033,7 @@ public:
return __internal_cap_to_external(__cap());
}
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type size() const _NOEXCEPT { return __size_; }
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool empty() const _NOEXCEPT {
+ [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool empty() const _NOEXCEPT {
return __size_ == 0;
}
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void reserve(size_type __n);
diff --git a/libcxx/test/std/containers/sequences/vector.bool/enabled_hash.pass.cpp b/libcxx/test/std/containers/sequences/vector.bool/enabled_hash.pass.cpp
index d89984a..6636120 100644
--- a/libcxx/test/std/containers/sequences/vector.bool/enabled_hash.pass.cpp
+++ b/libcxx/test/std/containers/sequences/vector.bool/enabled_hash.pass.cpp
@@ -20,8 +20,8 @@
#include "min_allocator.h"
TEST_CONSTEXPR_CXX20 bool test() {
- test_hash_enabled_for_type<std::vector<bool> >();
- test_hash_enabled_for_type<std::vector<bool, min_allocator<bool>>>();
+ test_hash_enabled<std::vector<bool> >();
+ test_hash_enabled<std::vector<bool, min_allocator<bool>>>();
return true;
}
diff --git a/libcxx/test/std/diagnostics/syserr/syserr.hash/enabled_hash.pass.cpp b/libcxx/test/std/diagnostics/syserr/syserr.hash/enabled_hash.pass.cpp
index 2aab698..e3eae8b 100644
--- a/libcxx/test/std/diagnostics/syserr/syserr.hash/enabled_hash.pass.cpp
+++ b/libcxx/test/std/diagnostics/syserr/syserr.hash/enabled_hash.pass.cpp
@@ -22,8 +22,8 @@
int main(int, char**) {
test_library_hash_specializations_available();
{
- test_hash_enabled_for_type<std::error_code>();
- test_hash_enabled_for_type<std::error_condition>();
+ test_hash_enabled<std::error_code>();
+ test_hash_enabled<std::error_condition>();
}
return 0;
diff --git a/libcxx/test/std/experimental/memory/memory.observer.ptr/hash.pass.cpp b/libcxx/test/std/experimental/memory/memory.observer.ptr/hash.pass.cpp
index 7aa5dc8..fff5f9b 100644
--- a/libcxx/test/std/experimental/memory/memory.observer.ptr/hash.pass.cpp
+++ b/libcxx/test/std/experimental/memory/memory.observer.ptr/hash.pass.cpp
@@ -33,7 +33,7 @@ void test_hash() {
assert(h == std::hash<T*>()(&obj));
}
- test_hash_enabled_for_type<std::experimental::observer_ptr<T>>();
+ test_hash_enabled<std::experimental::observer_ptr<T>>();
}
struct Bar {};
diff --git a/libcxx/test/std/input.output/filesystems/class.path/path.member/path.hash_enabled.pass.cpp b/libcxx/test/std/input.output/filesystems/class.path/path.member/path.hash_enabled.pass.cpp
index dd28c8f..6cc64e1 100644
--- a/libcxx/test/std/input.output/filesystems/class.path/path.member/path.hash_enabled.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/class.path/path.member/path.hash_enabled.pass.cpp
@@ -20,7 +20,7 @@ namespace fs = std::filesystem;
int main(int, char**) {
test_library_hash_specializations_available();
- test_hash_enabled_for_type<fs::path>();
+ test_hash_enabled<fs::path>();
return 0;
}
diff --git a/libcxx/test/std/numerics/c.math/signbit.pass.cpp b/libcxx/test/std/numerics/c.math/signbit.pass.cpp
index c85033e3..a8a566f 100644
--- a/libcxx/test/std/numerics/c.math/signbit.pass.cpp
+++ b/libcxx/test/std/numerics/c.math/signbit.pass.cpp
@@ -70,9 +70,22 @@ struct TestInt {
}
};
+template <typename T>
+struct ConvertibleTo {
+ operator T() const { return T(); }
+};
+
int main(int, char**) {
types::for_each(types::floating_point_types(), TestFloat());
types::for_each(types::integral_types(), TestInt());
+ // Make sure we can call `std::signbit` with convertible types. This checks
+ // whether overloads for all cv-unqualified floating-point types are working
+ // as expected.
+ {
+ assert(!std::signbit(ConvertibleTo<float>()));
+ assert(!std::signbit(ConvertibleTo<double>()));
+ assert(!std::signbit(ConvertibleTo<long double>()));
+ }
return 0;
}
diff --git a/libcxx/test/std/strings/basic.string.hash/enabled_hashes.pass.cpp b/libcxx/test/std/strings/basic.string.hash/enabled_hashes.pass.cpp
index 611f95f..643c6bec 100644
--- a/libcxx/test/std/strings/basic.string.hash/enabled_hashes.pass.cpp
+++ b/libcxx/test/std/strings/basic.string.hash/enabled_hashes.pass.cpp
@@ -53,18 +53,18 @@ struct std::char_traits<MyChar> {
int main(int, char**) {
test_library_hash_specializations_available();
{
- test_hash_enabled_for_type<std::string>();
+ test_hash_enabled<std::string>();
#ifndef TEST_HAS_NO_WIDE_CHARACTERS
- test_hash_enabled_for_type<std::wstring>();
+ test_hash_enabled<std::wstring>();
#endif
#ifndef TEST_HAS_NO_CHAR8_T
- test_hash_enabled_for_type<std::u8string>();
+ test_hash_enabled<std::u8string>();
#endif
- test_hash_enabled_for_type<std::u16string>();
- test_hash_enabled_for_type<std::u32string>();
- test_hash_enabled_for_type<std::basic_string<char, std::char_traits<char>, test_allocator<char>>>();
- test_hash_disabled_for_type<std::basic_string<MyChar, std::char_traits<MyChar>, std::allocator<MyChar>>>();
- test_hash_disabled_for_type<std::basic_string<char, constexpr_char_traits<char>, std::allocator<char>>>();
+ test_hash_enabled<std::u16string>();
+ test_hash_enabled<std::u32string>();
+ test_hash_enabled<std::basic_string<char, std::char_traits<char>, test_allocator<char>>>();
+ test_hash_disabled<std::basic_string<MyChar, std::char_traits<MyChar>, std::allocator<MyChar>>>();
+ test_hash_disabled<std::basic_string<char, constexpr_char_traits<char>, std::allocator<char>>>();
}
return 0;
diff --git a/libcxx/test/std/strings/string.view/string.view.hash/enabled_hashes.pass.cpp b/libcxx/test/std/strings/string.view/string.view.hash/enabled_hashes.pass.cpp
index b2ffd20..13abb94 100644
--- a/libcxx/test/std/strings/string.view/string.view.hash/enabled_hashes.pass.cpp
+++ b/libcxx/test/std/strings/string.view/string.view.hash/enabled_hashes.pass.cpp
@@ -53,17 +53,17 @@ struct std::char_traits<MyChar> {
int main(int, char**) {
test_library_hash_specializations_available();
{
- test_hash_enabled_for_type<std::string_view>();
+ test_hash_enabled<std::string_view>();
#ifndef TEST_HAS_NO_WIDE_CHARACTERS
- test_hash_enabled_for_type<std::wstring_view>();
+ test_hash_enabled<std::wstring_view>();
#endif
#ifndef TEST_HAS_NO_CHAR8_T
- test_hash_enabled_for_type<std::u8string_view>();
+ test_hash_enabled<std::u8string_view>();
#endif
- test_hash_enabled_for_type<std::u16string_view>();
- test_hash_enabled_for_type<std::u32string_view>();
- test_hash_disabled_for_type<std::basic_string_view<MyChar, std::char_traits<MyChar>>>();
- test_hash_disabled_for_type<std::basic_string_view<char, constexpr_char_traits<char>>>();
+ test_hash_enabled<std::u16string_view>();
+ test_hash_enabled<std::u32string_view>();
+ test_hash_disabled<std::basic_string_view<MyChar, std::char_traits<MyChar>>>();
+ test_hash_disabled<std::basic_string_view<char, constexpr_char_traits<char>>>();
}
return 0;
diff --git a/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.id/enabled_hashes.pass.cpp b/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.id/enabled_hashes.pass.cpp
index 62c8c74..98caff9 100644
--- a/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.id/enabled_hashes.pass.cpp
+++ b/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.id/enabled_hashes.pass.cpp
@@ -24,7 +24,7 @@
int main(int, char**) {
test_library_hash_specializations_available();
{
- test_hash_enabled_for_type<std::thread::id>();
+ test_hash_enabled<std::thread::id>();
}
return 0;
diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.hash/hash_shared_ptr.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.hash/hash_shared_ptr.pass.cpp
index 0c3915b..c6d54a8 100644
--- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.hash/hash_shared_ptr.pass.cpp
+++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.hash/hash_shared_ptr.pass.cpp
@@ -40,8 +40,8 @@ int main(int, char**)
}
#if TEST_STD_VER >= 11
{
- test_hash_enabled_for_type<std::shared_ptr<int>>();
- test_hash_enabled_for_type<std::shared_ptr<A>>();
+ test_hash_enabled<std::shared_ptr<int>>();
+ test_hash_enabled<std::shared_ptr<A>>();
}
#endif
diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.hash/hash_unique_ptr.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.hash/hash_unique_ptr.pass.cpp
index 707038e..32fc949 100644
--- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.hash/hash_unique_ptr.pass.cpp
+++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.hash/hash_unique_ptr.pass.cpp
@@ -35,16 +35,16 @@ void test_enabled_with_deleter() {
using RawDel = typename std::decay<Del>::type;
RawDel d(1);
UPtr p(nullptr, std::forward<Del>(d));
- test_hash_enabled_for_type<UPtr>(p);
- test_hash_enabled_for_type<pointer>();
+ test_hash_enabled<UPtr>(p);
+ test_hash_enabled<pointer>();
}
template <class ValueT, class Del>
void test_disabled_with_deleter() {
using UPtr = std::unique_ptr<ValueT, Del>;
using pointer = typename UPtr::pointer;
- test_hash_disabled_for_type<UPtr>();
- test_hash_disabled_for_type<pointer>();
+ test_hash_disabled<UPtr>();
+ test_hash_disabled<pointer>();
}
template <class T>
diff --git a/libcxx/test/std/utilities/optional/optional.hash/hash.pass.cpp b/libcxx/test/std/utilities/optional/optional.hash/hash.pass.cpp
index ae14b57..54cf407 100644
--- a/libcxx/test/std/utilities/optional/optional.hash/hash.pass.cpp
+++ b/libcxx/test/std/utilities/optional/optional.hash/hash.pass.cpp
@@ -63,16 +63,16 @@ int main(int, char**)
assert(std::hash<optional<T>>{}(opt) == std::hash<T>{}(*opt));
}
{
- test_hash_enabled_for_type<std::optional<int> >();
- test_hash_enabled_for_type<std::optional<int*> >();
- test_hash_enabled_for_type<std::optional<const int> >();
- test_hash_enabled_for_type<std::optional<int* const> >();
+ test_hash_enabled<std::optional<int> >();
+ test_hash_enabled<std::optional<int*> >();
+ test_hash_enabled<std::optional<const int> >();
+ test_hash_enabled<std::optional<int* const> >();
- test_hash_disabled_for_type<std::optional<A>>();
- test_hash_disabled_for_type<std::optional<const A>>();
+ test_hash_disabled<std::optional<A>>();
+ test_hash_disabled<std::optional<const A>>();
- test_hash_enabled_for_type<std::optional<B>>();
- test_hash_enabled_for_type<std::optional<const B>>();
+ test_hash_enabled<std::optional<B>>();
+ test_hash_enabled<std::optional<const B>>();
}
return 0;
diff --git a/libcxx/test/std/utilities/template.bitset/bitset.hash/enabled_hash.pass.cpp b/libcxx/test/std/utilities/template.bitset/bitset.hash/enabled_hash.pass.cpp
index 0e34a5f..c2dc2ca 100644
--- a/libcxx/test/std/utilities/template.bitset/bitset.hash/enabled_hash.pass.cpp
+++ b/libcxx/test/std/utilities/template.bitset/bitset.hash/enabled_hash.pass.cpp
@@ -22,10 +22,10 @@
int main(int, char**) {
test_library_hash_specializations_available();
{
- test_hash_enabled_for_type<std::bitset<0> >();
- test_hash_enabled_for_type<std::bitset<1> >();
- test_hash_enabled_for_type<std::bitset<1024> >();
- test_hash_enabled_for_type<std::bitset<100000> >();
+ test_hash_enabled<std::bitset<0> >();
+ test_hash_enabled<std::bitset<1> >();
+ test_hash_enabled<std::bitset<1024> >();
+ test_hash_enabled<std::bitset<100000> >();
}
return 0;
diff --git a/libcxx/test/std/utilities/type.index/type.index.synopsis/hash_type_index.pass.cpp b/libcxx/test/std/utilities/type.index/type.index.synopsis/hash_type_index.pass.cpp
index a361758..9c0de17 100644
--- a/libcxx/test/std/utilities/type.index/type.index.synopsis/hash_type_index.pass.cpp
+++ b/libcxx/test/std/utilities/type.index/type.index.synopsis/hash_type_index.pass.cpp
@@ -34,7 +34,7 @@ int main(int, char**)
}
#if TEST_STD_VER >= 11
{
- test_hash_enabled_for_type<std::type_index>(std::type_index(typeid(int)));
+ test_hash_enabled<std::type_index>(std::type_index(typeid(int)));
}
#endif
diff --git a/libcxx/test/std/utilities/variant/variant.hash/hash.pass.cpp b/libcxx/test/std/utilities/variant/variant.hash/hash.pass.cpp
index ffd5f82..656b1d8 100644
--- a/libcxx/test/std/utilities/variant/variant.hash/hash.pass.cpp
+++ b/libcxx/test/std/utilities/variant/variant.hash/hash.pass.cpp
@@ -103,7 +103,7 @@ void test_hash_monostate() {
static_assert(std::is_copy_constructible<H>::value, "");
}
{
- test_hash_enabled_for_type<std::monostate>();
+ test_hash_enabled<std::monostate>();
}
}
@@ -131,16 +131,16 @@ struct std::hash<B> {
void test_hash_variant_enabled() {
{
- test_hash_enabled_for_type<std::variant<int> >();
- test_hash_enabled_for_type<std::variant<int*, long, double, const int> >();
+ test_hash_enabled<std::variant<int> >();
+ test_hash_enabled<std::variant<int*, long, double, const int> >();
}
{
- test_hash_disabled_for_type<std::variant<int, A>>();
- test_hash_disabled_for_type<std::variant<const A, void*>>();
+ test_hash_disabled<std::variant<int, A>>();
+ test_hash_disabled<std::variant<const A, void*>>();
}
{
- test_hash_enabled_for_type<std::variant<int, B>>();
- test_hash_enabled_for_type<std::variant<const B, int>>();
+ test_hash_enabled<std::variant<int, B>>();
+ test_hash_enabled<std::variant<const B, int>>();
}
}
diff --git a/libcxx/test/support/poisoned_hash_helper.h b/libcxx/test/support/poisoned_hash_helper.h
index a073350..93b579d 100644
--- a/libcxx/test/support/poisoned_hash_helper.h
+++ b/libcxx/test/support/poisoned_hash_helper.h
@@ -10,131 +10,47 @@
#ifndef SUPPORT_POISONED_HASH_HELPER_H
#define SUPPORT_POISONED_HASH_HELPER_H
+#include <functional>
#include <cassert>
#include <cstddef>
#include <type_traits>
#include <utility>
#include "test_macros.h"
-#include "test_workarounds.h"
+#include "type_algorithms.h"
-#if TEST_STD_VER < 11
-#error this header may only be used in C++11 or newer
-#endif
-
-template <class ...Args> struct TypeList;
-
-// Test that the specified Hash meets the requirements of an enabled hash
-template <class Hash, class Key, class InputKey = Key>
-TEST_CONSTEXPR_CXX20 void test_hash_enabled(InputKey const& key = InputKey{});
-
-template <class T, class InputKey = T>
-TEST_CONSTEXPR_CXX20 void test_hash_enabled_for_type(InputKey const& key = InputKey{}) {
- return test_hash_enabled<std::hash<T>, T, InputKey>(key);
+template <class Hash, class Key, class Res = decltype(std::declval<Hash&>()(std::declval<Key>()))>
+constexpr bool can_hash_impl(int) {
+ return std::is_same<Res, std::size_t>::value;
}
-
-// Test that the specified Hash meets the requirements of a disabled hash.
-template <class Hash, class Key>
-void test_hash_disabled();
-
-template <class T>
-void test_hash_disabled_for_type() {
- return test_hash_disabled<std::hash<T>, T>();
+template <class, class>
+constexpr bool can_hash_impl(long) {
+ return false;
}
-
-namespace PoisonedHashDetail {
- enum Enum {};
- enum EnumClass : bool {};
- struct Class {};
+template <class Hash, class Key>
+constexpr bool can_hash() {
+ return can_hash_impl<Hash, Key>(0);
}
-// Each header that declares the template hash provides enabled
-// specializations of hash for nullptr t and all cv-unqualified
-// arithmetic, enumeration, and pointer types.
-using LibraryHashTypes = TypeList<
-#if TEST_STD_VER > 14
- decltype(nullptr),
-#endif
- bool,
- char,
- signed char,
- unsigned char,
-#ifndef TEST_HAS_NO_WIDE_CHARACTERS
- wchar_t,
-#endif
- char16_t,
- char32_t,
- short,
- unsigned short,
- int,
- unsigned int,
- long,
- unsigned long,
- long long,
- unsigned long long,
-#ifndef TEST_HAS_NO_INT128
- __int128_t,
- __uint128_t,
-#endif
- float,
- double,
- long double,
- PoisonedHashDetail::Enum,
- PoisonedHashDetail::EnumClass,
- void*,
- void const*,
- PoisonedHashDetail::Class*
- >;
-
-
-// Test that each of the library hash specializations for arithmetic types,
-// enum types, and pointer types are available and enabled.
-template <class Types = LibraryHashTypes>
-void test_library_hash_specializations_available(Types = Types{});
-
-
-namespace PoisonedHashDetail {
-
-template <class T, class = typename T::foo_bar_baz>
-constexpr bool instantiate(int) { return true; }
-template <class> constexpr bool instantiate(long) { return true; }
-template <class T> constexpr bool instantiate() { return instantiate<T>(0); }
-
template <class To>
struct ConvertibleToSimple {
- operator To() const {
- return To{};
- }
+ operator To() const { return To{}; }
};
template <class To>
struct ConvertibleTo {
To to{};
operator To&() & { return to; }
- operator To const&() const & { return to; }
+ operator To const&() const& { return to; }
operator To&&() && { return std::move(to); }
- operator To const&&() const && { return std::move(to); }
+ operator To const&&() const&& { return std::move(to); }
};
-template <class Hasher, class Key, class Res = decltype(std::declval<Hasher&>()(std::declval<Key>()))>
-constexpr bool can_hash(int) {
- return std::is_same<Res, std::size_t>::value;
-}
-template <class, class>
-constexpr bool can_hash(long) {
- return false;
-}
-template <class Hasher, class Key>
-constexpr bool can_hash() {
- return can_hash<Hasher, Key>(0);
-}
-} // namespace PoisonedHashDetail
-
-template <class Hash, class Key, class InputKey>
-TEST_CONSTEXPR_CXX20 void test_hash_enabled(InputKey const& key) {
- using namespace PoisonedHashDetail;
-
+// Test that the specified Hash meets the requirements of an enabled hash
+template <class Key, class Hash = std::hash<Key>>
+TEST_CONSTEXPR_CXX20 void test_hash_enabled(Key const& key = Key{}) {
static_assert(std::is_destructible<Hash>::value, "");
+
// Enabled hash requirements
static_assert(std::is_default_constructible<Hash>::value, "");
static_assert(std::is_copy_constructible<Hash>::value, "");
@@ -167,13 +83,11 @@ TEST_CONSTEXPR_CXX20 void test_hash_enabled(InputKey const& key) {
const Hash h{};
assert(h(key) == h(key));
-
}
-template <class Hash, class Key>
+// Test that the specified Hash meets the requirements of a disabled hash.
+template <class Key, class Hash = std::hash<Key>>
void test_hash_disabled() {
- using namespace PoisonedHashDetail;
-
// Disabled hash requirements
static_assert(!std::is_default_constructible<Hash>::value, "");
static_assert(!std::is_copy_constructible<Hash>::value, "");
@@ -181,11 +95,8 @@ void test_hash_disabled() {
static_assert(!std::is_copy_assignable<Hash>::value, "");
static_assert(!std::is_move_assignable<Hash>::value, "");
- static_assert(!std::is_function<
- typename std::remove_pointer<
- typename std::remove_reference<Hash>::type
- >::type
- >::value, "");
+ static_assert(
+ !std::is_function<typename std::remove_pointer<typename std::remove_reference<Hash>::type>::type>::value, "");
// Hashable requirements
static_assert(!can_hash<Hash, Key&>(), "");
@@ -205,41 +116,33 @@ void test_hash_disabled() {
static_assert(!can_hash<Hash, ConvertibleTo<Key> const&&>(), "");
}
+enum Enum {};
+enum EnumClass : bool {};
+struct Class {};
-template <class First, class ...Rest>
-struct TypeList<First, Rest...> {
- template <template <class> class Trait, bool Expect = true>
- static constexpr bool assertTrait() {
- static_assert(Trait<First>::value == Expect, "");
- return TypeList<Rest...>::template assertTrait<Trait, Expect>();
- }
-
- template <class Trait>
- static void applyTrait() {
- Trait::template apply<First>();
- TypeList<Rest...>::template applyTrait<Trait>();
- }
-};
+// Each header that declares the std::hash template provides enabled
+// specializations of std::hash for std::nullptr_t and all cv-unqualified
+// arithmetic, enumeration, and pointer types.
+#if TEST_STD_VER >= 17
+using MaybeNullptr = types::type_list<std::nullptr_t>;
+#else
+using MaybeNullptr = types::type_list<>;
+#endif
+using LibraryHashTypes = types::
+ concatenate_t<types::arithmetic_types, types::type_list<Enum, EnumClass, void*, void const*, Class*>, MaybeNullptr>;
-template <>
-struct TypeList<> {
- template <template <class> class Trait, bool Expect = true>
- static constexpr bool assertTrait() {
- return true;
+struct TestHashEnabled {
+ template <class T>
+ void operator()() const {
+ test_hash_enabled<T>();
}
- template <class Trait>
- static void applyTrait() {}
-};
-
-
-struct TestLibraryTrait {
- template <class Type>
- static void apply() { test_hash_enabled<std::hash<Type>, Type>(); }
};
-template <class Types>
-void test_library_hash_specializations_available(Types) {
- Types::template applyTrait<TestLibraryTrait >();
+// Test that each of the library hash specializations for arithmetic types,
+// enum types, and pointer types are available and enabled.
+template <class Types = LibraryHashTypes>
+void test_library_hash_specializations_available() {
+ types::for_each(Types(), TestHashEnabled());
}
#endif // SUPPORT_POISONED_HASH_HELPER_H
diff --git a/libcxx/test/support/test.support/test_poisoned_hash_helper.pass.cpp b/libcxx/test/support/test.support/test_poisoned_hash_helper.pass.cpp
deleted file mode 100644
index 8145074..0000000
--- a/libcxx/test/support/test.support/test_poisoned_hash_helper.pass.cpp
+++ /dev/null
@@ -1,33 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-// UNSUPPORTED: c++03
-
-// Test that the header `poisoned_hash_helper.h` doesn't include any
-// headers that provide hash<T> specializations. This is required so that the
-// 'test_library_hash_specializations_available()' function returns false
-// by default, unless a STL header providing hash has already been included.
-
-#include "poisoned_hash_helper.h"
-
-#include "test_macros.h"
-
-template <class T, std::size_t = sizeof(T)>
-constexpr bool is_complete_imp(int) { return true; }
-template <class> constexpr bool is_complete_imp(long) { return false; }
-template <class T> constexpr bool is_complete() { return is_complete_imp<T>(0); }
-
-template <class T> struct has_complete_hash {
- enum { value = is_complete<std::hash<T> >() };
-};
-
-int main(int, char**) {
- static_assert(LibraryHashTypes::assertTrait<has_complete_hash, false>(), "");
-
- return 0;
-}
diff --git a/lld/COFF/DLL.cpp b/lld/COFF/DLL.cpp
index 5f00ead..b00a594 100644
--- a/lld/COFF/DLL.cpp
+++ b/lld/COFF/DLL.cpp
@@ -142,6 +142,30 @@ private:
size_t size;
};
+// A chunk for ARM64EC auxiliary IAT.
+class AuxImportChunk : public NonSectionChunk {
+public:
+ explicit AuxImportChunk(ImportFile *file) : file(file) {
+ setAlignment(sizeof(uint64_t));
+ }
+ size_t getSize() const override { return sizeof(uint64_t); }
+
+ void writeTo(uint8_t *buf) const override {
+ uint64_t impchkVA = 0;
+ if (file->impchkThunk)
+ impchkVA = file->impchkThunk->getRVA() + file->ctx.config.imageBase;
+ write64le(buf, impchkVA);
+ }
+
+ void getBaserels(std::vector<Baserel> *res) override {
+ if (file->impchkThunk)
+ res->emplace_back(rva, file->ctx.config.machine);
+ }
+
+private:
+ ImportFile *file;
+};
+
static std::vector<std::vector<DefinedImportData *>>
binImports(COFFLinkerContext &ctx,
const std::vector<DefinedImportData *> &imports) {
@@ -160,7 +184,15 @@ binImports(COFFLinkerContext &ctx,
// Sort symbols by name for each group.
std::vector<DefinedImportData *> &syms = kv.second;
llvm::sort(syms, [](DefinedImportData *a, DefinedImportData *b) {
- return a->getName() < b->getName();
+ auto getBaseName = [](DefinedImportData *sym) {
+ StringRef name = sym->getName();
+ name.consume_front("__imp_");
+ // Skip aux_ part of ARM64EC function symbol name.
+ if (sym->file->impchkThunk)
+ name.consume_front("aux_");
+ return name;
+ };
+ return getBaseName(a) < getBaseName(b);
});
v.push_back(std::move(syms));
}
@@ -687,16 +719,24 @@ void IdataContents::create(COFFLinkerContext &ctx) {
if (s->getExternalName().empty()) {
lookups.push_back(make<OrdinalOnlyChunk>(ctx, ord));
addresses.push_back(make<OrdinalOnlyChunk>(ctx, ord));
- continue;
+ } else {
+ auto *c = make<HintNameChunk>(s->getExternalName(), ord);
+ lookups.push_back(make<LookupChunk>(ctx, c));
+ addresses.push_back(make<LookupChunk>(ctx, c));
+ hints.push_back(c);
+ }
+
+ if (s->file->impECSym) {
+ auto chunk = make<AuxImportChunk>(s->file);
+ auxIat.push_back(chunk);
+ s->file->impECSym->setLocation(chunk);
}
- auto *c = make<HintNameChunk>(s->getExternalName(), ord);
- lookups.push_back(make<LookupChunk>(ctx, c));
- addresses.push_back(make<LookupChunk>(ctx, c));
- hints.push_back(c);
}
// Terminate with null values.
lookups.push_back(make<NullChunk>(ctx.config.wordsize));
addresses.push_back(make<NullChunk>(ctx.config.wordsize));
+ if (ctx.config.machine == ARM64EC)
+ auxIat.push_back(make<NullChunk>(ctx.config.wordsize));
for (int i = 0, e = syms.size(); i < e; ++i)
syms[i]->setLocation(addresses[base + i]);
diff --git a/lld/COFF/DLL.h b/lld/COFF/DLL.h
index 7cf71f5..48b0f17 100644
--- a/lld/COFF/DLL.h
+++ b/lld/COFF/DLL.h
@@ -31,6 +31,7 @@ public:
std::vector<Chunk *> addresses;
std::vector<Chunk *> hints;
std::vector<Chunk *> dllNames;
+ std::vector<Chunk *> auxIat;
};
// Windows-specific.
diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp
index a1fe644..9994639 100644
--- a/lld/COFF/Driver.cpp
+++ b/lld/COFF/Driver.cpp
@@ -2447,6 +2447,7 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
ctx.symtab.addAbsolute("__arm64x_extra_rfe_table_size", 0);
ctx.symtab.addAbsolute("__arm64x_redirection_metadata", 0);
ctx.symtab.addAbsolute("__arm64x_redirection_metadata_count", 0);
+ ctx.symtab.addAbsolute("__hybrid_auxiliary_iat", 0);
ctx.symtab.addAbsolute("__hybrid_code_map", 0);
ctx.symtab.addAbsolute("__hybrid_code_map_count", 0);
ctx.symtab.addAbsolute("__x64_code_ranges_to_entry_points", 0);
diff --git a/lld/COFF/InputFiles.cpp b/lld/COFF/InputFiles.cpp
index 3dbdf8f..5692204 100644
--- a/lld/COFF/InputFiles.cpp
+++ b/lld/COFF/InputFiles.cpp
@@ -1071,19 +1071,39 @@ void ImportFile::parse() {
this->hdr = hdr;
externalName = extName;
- impSym = ctx.symtab.addImportData(impName, this);
+ bool isCode = hdr->getType() == llvm::COFF::IMPORT_CODE;
+
+ if (ctx.config.machine != ARM64EC) {
+ impSym = ctx.symtab.addImportData(impName, this, location);
+ } else {
+ // In addition to the regular IAT, ARM64EC also contains an auxiliary IAT,
+ // which holds addresses that are guaranteed to be callable directly from
+ // ARM64 code. Function symbol naming is swapped: __imp_ symbols refer to
+ // the auxiliary IAT, while __imp_aux_ symbols refer to the regular IAT. For
+ // data imports, the naming is reversed.
+ StringRef auxImpName = saver().save("__imp_aux_" + name);
+ if (isCode) {
+ impSym = ctx.symtab.addImportData(auxImpName, this, location);
+ impECSym = ctx.symtab.addImportData(impName, this, auxLocation);
+ } else {
+ impSym = ctx.symtab.addImportData(impName, this, location);
+ impECSym = ctx.symtab.addImportData(auxImpName, this, auxLocation);
+ }
+ if (!impECSym)
+ return;
+ }
// If this was a duplicate, we logged an error but may continue;
// in this case, impSym is nullptr.
if (!impSym)
return;
if (hdr->getType() == llvm::COFF::IMPORT_CONST)
- static_cast<void>(ctx.symtab.addImportData(name, this));
+ static_cast<void>(ctx.symtab.addImportData(name, this, location));
// If type is function, we need to create a thunk which jump to an
// address pointed by the __imp_ symbol. (This allows you to call
// DLL functions just like regular non-DLL functions.)
- if (hdr->getType() == llvm::COFF::IMPORT_CODE) {
+ if (isCode) {
if (ctx.config.machine != ARM64EC) {
thunkSym = ctx.symtab.addImportThunk(name, impSym, makeImportThunk());
} else {
diff --git a/lld/COFF/InputFiles.h b/lld/COFF/InputFiles.h
index 3b83701..8140a03 100644
--- a/lld/COFF/InputFiles.h
+++ b/lld/COFF/InputFiles.h
@@ -362,6 +362,10 @@ public:
const coff_import_header *hdr;
Chunk *location = nullptr;
+ // Auxiliary IAT symbol and chunk on ARM64EC.
+ DefinedImportData *impECSym = nullptr;
+ Chunk *auxLocation = nullptr;
+
// We want to eliminate dllimported symbols if no one actually refers to them.
// These "Live" bits are used to keep track of which import library members
// are actually in use.
diff --git a/lld/COFF/SymbolTable.cpp b/lld/COFF/SymbolTable.cpp
index a6575ec..582a8562 100644
--- a/lld/COFF/SymbolTable.cpp
+++ b/lld/COFF/SymbolTable.cpp
@@ -584,7 +584,7 @@ void SymbolTable::initializeECThunks() {
Symbol *sym = exitThunks.lookup(file->thunkSym);
if (!sym)
- sym = exitThunks.lookup(file->impSym);
+ sym = exitThunks.lookup(file->impECSym);
file->impchkThunk->exitThunk = dyn_cast_or_null<Defined>(sym);
}
}
@@ -785,11 +785,12 @@ Symbol *SymbolTable::addCommon(InputFile *f, StringRef n, uint64_t size,
return s;
}
-DefinedImportData *SymbolTable::addImportData(StringRef n, ImportFile *f) {
+DefinedImportData *SymbolTable::addImportData(StringRef n, ImportFile *f,
+ Chunk *&location) {
auto [s, wasInserted] = insert(n, nullptr);
s->isUsedInRegularObj = true;
if (wasInserted || isa<Undefined>(s) || s->isLazy()) {
- replaceSymbol<DefinedImportData>(s, n, f);
+ replaceSymbol<DefinedImportData>(s, n, f, location);
return cast<DefinedImportData>(s);
}
diff --git a/lld/COFF/SymbolTable.h b/lld/COFF/SymbolTable.h
index 13e151e..bf97cf4 100644
--- a/lld/COFF/SymbolTable.h
+++ b/lld/COFF/SymbolTable.h
@@ -103,7 +103,8 @@ public:
Symbol *addCommon(InputFile *f, StringRef n, uint64_t size,
const llvm::object::coff_symbol_generic *s = nullptr,
CommonChunk *c = nullptr);
- DefinedImportData *addImportData(StringRef n, ImportFile *f);
+ DefinedImportData *addImportData(StringRef n, ImportFile *f,
+ Chunk *&location);
Symbol *addImportThunk(StringRef name, DefinedImportData *s,
ImportThunkChunk *chunk);
void addLibcall(StringRef name);
diff --git a/lld/COFF/Symbols.h b/lld/COFF/Symbols.h
index 724330e..2df60a0 100644
--- a/lld/COFF/Symbols.h
+++ b/lld/COFF/Symbols.h
@@ -354,23 +354,23 @@ public:
// table in an output. The former has "__imp_" prefix.
class DefinedImportData : public Defined {
public:
- DefinedImportData(StringRef n, ImportFile *f)
- : Defined(DefinedImportDataKind, n), file(f) {
- }
+ DefinedImportData(StringRef n, ImportFile *file, Chunk *&location)
+ : Defined(DefinedImportDataKind, n), file(file), location(location) {}
static bool classof(const Symbol *s) {
return s->kind() == DefinedImportDataKind;
}
- uint64_t getRVA() { return file->location->getRVA(); }
- Chunk *getChunk() { return file->location; }
- void setLocation(Chunk *addressTable) { file->location = addressTable; }
+ uint64_t getRVA() { return getChunk()->getRVA(); }
+ Chunk *getChunk() { return location; }
+ void setLocation(Chunk *addressTable) { location = addressTable; }
StringRef getDLLName() { return file->dllName; }
StringRef getExternalName() { return file->externalName; }
uint16_t getOrdinal() { return file->hdr->OrdinalHint; }
ImportFile *file;
+ Chunk *&location;
// This is a pointer to the synthetic symbol associated with the load thunk
// for this symbol that will be called if the DLL is delay-loaded. This is
diff --git a/lld/COFF/Writer.cpp b/lld/COFF/Writer.cpp
index b589a16..9a80400 100644
--- a/lld/COFF/Writer.cpp
+++ b/lld/COFF/Writer.cpp
@@ -914,6 +914,8 @@ void Writer::addSyntheticIdata() {
if (!idata.hints.empty())
add(".idata$6", idata.hints);
add(".idata$7", idata.dllNames);
+ if (!idata.auxIat.empty())
+ add(".idata$9", idata.auxIat);
}
void Writer::appendECImportTables() {
@@ -936,6 +938,15 @@ void Writer::appendECImportTables() {
rdataSec->contribSections.insert(rdataSec->contribSections.begin(),
importAddresses);
}
+
+ // The auxiliary IAT is always placed at the end of the .rdata section
+ // and is aligned to 4KB.
+ if (PartialSection *auxIat = findPartialSection(".idata$9", rdata)) {
+ auxIat->chunks.front()->setAlignment(0x1000);
+ rdataSec->chunks.insert(rdataSec->chunks.end(), auxIat->chunks.begin(),
+ auxIat->chunks.end());
+ rdataSec->addContributingPartialSection(auxIat);
+ }
}
// Locate the first Chunk and size of the import directory list and the
@@ -1095,7 +1106,8 @@ void Writer::createSections() {
// ARM64EC has specific placement and alignment requirements for the IAT.
// Delay adding its chunks until appendECImportTables.
- if (isArm64EC(ctx.config.machine) && pSec->name == ".idata$5")
+ if (isArm64EC(ctx.config.machine) &&
+ (pSec->name == ".idata$5" || pSec->name == ".idata$9"))
continue;
OutputSection *sec = createSection(name, outChars);
@@ -2254,6 +2266,11 @@ void Writer::setECSymbols() {
Symbol *entryPointCountSym =
ctx.symtab.findUnderscore("__arm64x_redirection_metadata_count");
cast<DefinedAbsolute>(entryPointCountSym)->setVA(exportThunks.size());
+
+ Symbol *iatSym = ctx.symtab.findUnderscore("__hybrid_auxiliary_iat");
+ replaceSymbol<DefinedSynthetic>(iatSym, "__hybrid_auxiliary_iat",
+ idata.auxIat.empty() ? nullptr
+ : idata.auxIat.front());
}
// Write section contents to a mmap'ed file.
diff --git a/lld/test/COFF/Inputs/loadconfig-arm64ec.s b/lld/test/COFF/Inputs/loadconfig-arm64ec.s
index 75dc610..8d59d29 100644
--- a/lld/test/COFF/Inputs/loadconfig-arm64ec.s
+++ b/lld/test/COFF/Inputs/loadconfig-arm64ec.s
@@ -76,7 +76,7 @@ __chpe_metadata:
.rva __os_arm64x_check_icall
.rva __os_arm64x_check_icall_cfg
.word 0 // __arm64x_native_entrypoint
- .word 0 // __hybrid_auxiliary_iat
+ .rva __hybrid_auxiliary_iat
.word __x64_code_ranges_to_entry_points_count
.word __arm64x_redirection_metadata_count
.rva __os_arm64x_get_x64_information
diff --git a/lld/test/COFF/arm64ec-import.test b/lld/test/COFF/arm64ec-import.test
index 44a84c0..f8279ce 100644
--- a/lld/test/COFF/arm64ec-import.test
+++ b/lld/test/COFF/arm64ec-import.test
@@ -63,14 +63,37 @@ DISASM-NEXT: 180002000: ff 25 02 10 00 00 jmpq *0x1002(%rip)
RUN: llvm-readobj --hex-dump=.test out.dll | FileCheck --check-prefix=TESTSEC %s
RUN: llvm-readobj --hex-dump=.test out2.dll | FileCheck --check-prefix=TESTSEC %s
-TESTSEC: 0x180006000 08300000 00300000 10300000 20300000
-TESTSEC-NEXT: 0x180006010 08100000 1c100000 00200000
+TESTSEC: 0x180007000 08500000 00300000 10500000 20500000
+TESTSEC-NEXT: 0x180007010 08300000 00500000 10300000 20300000
+TESTSEC-NEXT: 0x180007020 08100000 1c100000 00200000
RUN: llvm-readobj --headers out.dll | FileCheck -check-prefix=HEADERS %s
HEADERS: LoadConfigTableRVA: 0x4010
HEADERS: IATRVA: 0x3000
HEADERS: IATSize: 0x1000
+RUN: llvm-readobj --coff-load-config out.dll | FileCheck -check-prefix=LOADCONFIG %s
+LOADCONFIG: AuxiliaryIAT: 0x5000
+
+RUN: llvm-readobj --hex-dump=.rdata out.dll | FileCheck -check-prefix=RDATA %s
+RDATA: 0x180005000 00000000 00000000 08100080 01000000
+RDATA-NEXT: 0x180005010 1c100080 01000000 00000000 00000000
+RDATA-NEXT: 0x180005020 30100080 01000000 00000000 00000000
+
+RUN: llvm-readobj --coff-basereloc out.dll | FileCheck -check-prefix=BASERELOC %s
+BASERELOC: BaseReloc [
+BASERELOC-NOT: Address: 0x5000
+BASERELOC: Address: 0x5008
+BASERELOC-NEXT: }
+BASERELOC-NEXT: Entry {
+BASERELOC-NEXT: Type: DIR64
+BASERELOC-NEXT: Address: 0x5010
+BASERELOC-NEXT: }
+BASERELOC-NEXT: Entry {
+BASERELOC-NEXT: Type: DIR64
+BASERELOC-NEXT: Address: 0x5020
+BASERELOC-NEXT: }
+
#--- test.s
.section .test, "r"
.globl arm64ec_data_sym
@@ -80,6 +103,10 @@ arm64ec_data_sym:
.rva __imp_data
.rva __imp_func2
.rva __imp_t2func
+ .rva __imp_aux_func
+ .rva __imp_aux_data
+ .rva __imp_aux_func2
+ .rva __imp_aux_t2func
.rva __impchk_func
.rva __impchk_func2
.rva func
diff --git a/lldb/include/lldb/Core/SourceManager.h b/lldb/include/lldb/Core/SourceManager.h
index e386271..d929f7b 100644
--- a/lldb/include/lldb/Core/SourceManager.h
+++ b/lldb/include/lldb/Core/SourceManager.h
@@ -74,7 +74,7 @@ public:
const Checksum &GetChecksum() const { return m_checksum; }
- llvm::once_flag &GetChecksumWarningOnceFlag() {
+ std::once_flag &GetChecksumWarningOnceFlag() {
return m_checksum_warning_once_flag;
}
@@ -92,7 +92,7 @@ public:
Checksum m_checksum;
/// Once flag for emitting a checksum mismatch warning.
- llvm::once_flag m_checksum_warning_once_flag;
+ std::once_flag m_checksum_warning_once_flag;
// Keep the modification time that this file data is valid for
llvm::sys::TimePoint<> m_mod_time;
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp
index 5b9de6f..70540fe 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp
@@ -2932,14 +2932,22 @@ void DWARFASTParserClang::ParseSingleMember(
last_field_info = this_field_info;
last_field_info.SetIsBitfield(true);
} else {
- last_field_info.bit_offset = field_bit_offset;
+ FieldInfo this_field_info;
+ this_field_info.is_bitfield = false;
+ this_field_info.bit_offset = field_bit_offset;
+ // TODO: we shouldn't silently ignore the bit_size if we fail
+ // to GetByteSize.
if (std::optional<uint64_t> clang_type_size =
member_type->GetByteSize(nullptr)) {
- last_field_info.bit_size = *clang_type_size * character_width;
+ this_field_info.bit_size = *clang_type_size * character_width;
}
- last_field_info.SetIsBitfield(false);
+ if (this_field_info.GetFieldEnd() <= last_field_info.GetEffectiveFieldEnd())
+ this_field_info.SetEffectiveFieldEnd(
+ last_field_info.GetEffectiveFieldEnd());
+
+ last_field_info = this_field_info;
}
// Don't turn artificial members such as vtable pointers into real FieldDecls
@@ -3738,7 +3746,7 @@ void DWARFASTParserClang::AddUnnamedBitfieldToRecordTypeIfNeeded(
const FieldInfo &current_field) {
// TODO: get this value from target
const uint64_t word_width = 32;
- uint64_t last_field_end = previous_field.bit_offset + previous_field.bit_size;
+ uint64_t last_field_end = previous_field.GetEffectiveFieldEnd();
if (!previous_field.IsBitfield()) {
// The last field was not a bit-field...
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h
index 3809ee9..1ffb09b 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h
@@ -258,9 +258,27 @@ protected:
private:
struct FieldInfo {
+ /// Size in bits that this field occupies. Can but
+ /// need not be the DW_AT_bit_size of the field.
uint64_t bit_size = 0;
+
+ /// Offset of this field in bits from the beginning
+ /// of the containing struct. Can but need not
+ /// be the DW_AT_data_bit_offset of the field.
uint64_t bit_offset = 0;
+
+ /// In case this field is folded into the storage
+ /// of a previous member's storage (for example
+ /// with [[no_unique_address]]), the effective field
+ /// end is the offset in bits from the beginning of
+ /// the containing struct where the field we were
+ /// folded into ended.
+ std::optional<uint64_t> effective_field_end;
+
+ /// Set to 'true' if this field is a bit-field.
bool is_bitfield = false;
+
+ /// Set to 'true' if this field is DW_AT_artificial.
bool is_artificial = false;
FieldInfo() = default;
@@ -276,6 +294,19 @@ private:
// bit offset than any previous bitfield + size.
return (bit_size + bit_offset) <= next_bit_offset;
}
+
+ /// Returns the offset in bits of where the storage this field
+ /// occupies ends.
+ uint64_t GetFieldEnd() const { return bit_size + bit_offset; }
+
+ void SetEffectiveFieldEnd(uint64_t val) { effective_field_end = val; }
+
+ /// If this field was folded into storage of a previous field,
+ /// returns the offset in bits of where that storage ends. Otherwise,
+ /// returns the regular field end (see \ref GetFieldEnd).
+ uint64_t GetEffectiveFieldEnd() const {
+ return effective_field_end.value_or(GetFieldEnd());
+ }
};
/// Parsed form of all attributes that are relevant for parsing type members.
diff --git a/lldb/test/API/lit.cfg.py b/lldb/test/API/lit.cfg.py
index dfeb765..96520c7 100644
--- a/lldb/test/API/lit.cfg.py
+++ b/lldb/test/API/lit.cfg.py
@@ -265,11 +265,6 @@ if is_configured("lldb_libs_dir"):
if is_configured("lldb_framework_dir"):
dotest_cmd += ["--framework", config.lldb_framework_dir]
-# Facebook T92898286
-if is_configured("llvm_test_bolt"):
- dotest_cmd += ["-E", '"--post-link-optimize"']
-# End Facebook T92898286
-
if (
"lldb-repro-capture" in config.available_features
or "lldb-repro-replay" in config.available_features
diff --git a/lldb/test/API/lit.site.cfg.py.in b/lldb/test/API/lit.site.cfg.py.in
index 602f457..8b2d09a 100644
--- a/lldb/test/API/lit.site.cfg.py.in
+++ b/lldb/test/API/lit.site.cfg.py.in
@@ -1,9 +1,5 @@
@LIT_SITE_CFG_IN_HEADER@
-#Facebook T92898286
-import lit.util
-#End Facebook T92898286
-
config.llvm_src_root = "@LLVM_SOURCE_DIR@"
config.llvm_obj_root = "@LLVM_BINARY_DIR@"
config.llvm_tools_dir = lit_config.substitute("@LLVM_TOOLS_DIR@")
@@ -43,10 +39,6 @@ config.libcxx_include_target_dir = "@LIBCXX_GENERATED_INCLUDE_TARGET_DIR@"
config.lldb_module_cache = os.path.join("@LLDB_TEST_MODULE_CACHE_LLDB@", "lldb-api")
config.clang_module_cache = os.path.join("@LLDB_TEST_MODULE_CACHE_CLANG@", "lldb-api")
-# Facebook T92898286
-config.llvm_test_bolt = lit.util.pythonize_bool("@LLVM_TEST_BOLT@")
-# End Facebook T92898286
-
# Plugins
lldb_build_intel_pt = '@LLDB_BUILD_INTEL_PT@'
if lldb_build_intel_pt == '1':
diff --git a/lldb/test/Shell/SymbolFile/DWARF/no_unique_address-with-bitfields.cpp b/lldb/test/Shell/SymbolFile/DWARF/no_unique_address-with-bitfields.cpp
index 1c9cc36..980180e 100644
--- a/lldb/test/Shell/SymbolFile/DWARF/no_unique_address-with-bitfields.cpp
+++ b/lldb/test/Shell/SymbolFile/DWARF/no_unique_address-with-bitfields.cpp
@@ -1,10 +1,10 @@
-// LLDB currently erroneously adds an unnamed bitfield
-// into the AST when an overlapping no_unique_address
-// field precedes a bitfield.
-
// RUN: %clang --target=x86_64-apple-macosx -c -gdwarf -o %t %s
// RUN: %lldb %t \
// RUN: -o "target var global" \
+// RUN: -o "target var global2" \
+// RUN: -o "target var global3" \
+// RUN: -o "target var global4" \
+// RUN: -o "target var global5" \
// RUN: -o "image dump ast" \
// RUN: -o exit | FileCheck %s
@@ -12,12 +12,12 @@
// CHECK: CXXRecordDecl {{.*}} struct Foo definition
// CHECK: |-FieldDecl {{.*}} data 'char[5]'
// CHECK-NEXT: |-FieldDecl {{.*}} padding 'Empty'
-// CHECK-NEXT: |-FieldDecl {{.*}} 'int'
-// CHECK-NEXT: | `-IntegerLiteral {{.*}} 'int' 8
-// CHECK-NEXT: `-FieldDecl {{.*}} sloc> flag 'unsigned long'
+// CHECK-NEXT: `-FieldDecl {{.*}} flag 'unsigned long'
// CHECK-NEXT: `-IntegerLiteral {{.*}} 'int' 1
struct Empty {};
+struct Empty2 {};
+struct Empty3 {};
struct Foo {
char data[5];
@@ -26,3 +26,85 @@ struct Foo {
};
Foo global;
+
+// CHECK: CXXRecordDecl {{.*}} struct ConsecutiveOverlap definition
+// CHECK: |-FieldDecl {{.*}} data 'char[5]'
+// CHECK-NEXT: |-FieldDecl {{.*}} p1 'Empty'
+// CHECK-NEXT: |-FieldDecl {{.*}} p2 'Empty2'
+// CHECK-NEXT: |-FieldDecl {{.*}} p3 'Empty3'
+// CHECK-NEXT: `-FieldDecl {{.*}} flag 'unsigned long'
+// CHECK-NEXT: `-IntegerLiteral {{.*}} 'int' 1
+
+struct ConsecutiveOverlap {
+ char data[5];
+ [[no_unique_address]] Empty p1;
+ [[no_unique_address]] Empty2 p2;
+ [[no_unique_address]] Empty3 p3;
+ unsigned long flag : 1;
+};
+
+ConsecutiveOverlap global2;
+
+// FIXME: we fail to deduce the unnamed bitfields here.
+//
+// CHECK: CXXRecordDecl {{.*}} struct MultipleAtOffsetZero definition
+// CHECK: |-FieldDecl {{.*}} data 'char[5]'
+// CHECK-NEXT: |-FieldDecl {{.*}} p1 'Empty'
+// CHECK-NEXT: |-FieldDecl {{.*}} f1 'unsigned long'
+// CHECK-NEXT: | `-IntegerLiteral {{.*}} 'int' 1
+// CHECK-NEXT: |-FieldDecl {{.*}} p2 'Empty2'
+// CHECK-NEXT: `-FieldDecl {{.*}} f2 'unsigned long'
+// CHECK-NEXT: `-IntegerLiteral {{.*}} 'int' 1
+
+struct MultipleAtOffsetZero {
+ char data[5];
+ [[no_unique_address]] Empty p1;
+ int : 4;
+ unsigned long f1 : 1;
+ [[no_unique_address]] Empty2 p2;
+ int : 4;
+ unsigned long f2 : 1;
+};
+
+MultipleAtOffsetZero global3;
+
+// FIXME: we fail to deduce the unnamed bitfields here.
+//
+// CHECK: CXXRecordDecl {{.*}} struct MultipleEmpty definition
+// CHECK: |-FieldDecl {{.*}} data 'char[5]'
+// CHECK-NEXT: |-FieldDecl {{.*}} p1 'Empty'
+// CHECK-NEXT: |-FieldDecl {{.*}} f1 'unsigned long'
+// CHECK-NEXT: | `-IntegerLiteral {{.*}} 'int' 1
+// CHECK-NEXT: |-FieldDecl {{.*}} p2 'Empty'
+// CHECK-NEXT: `-FieldDecl {{.*}} f2 'unsigned long'
+// CHECK-NEXT: `-IntegerLiteral {{.*}} 'int' 1
+
+struct MultipleEmpty {
+ char data[5];
+ [[no_unique_address]] Empty p1;
+ int : 4;
+ unsigned long f1 : 1;
+ [[no_unique_address]] Empty p2;
+ int : 4;
+ unsigned long f2 : 1;
+};
+
+MultipleEmpty global4;
+
+// CHECK: CXXRecordDecl {{.*}} struct FieldBitfieldOverlap definition
+// CHECK: |-FieldDecl {{.*}} a 'int'
+// CHECK-NEXT: | `-IntegerLiteral {{.*}} 'int' 3
+// CHECK-NEXT: |-FieldDecl {{.*}} p1 'Empty'
+// CHECK-NEXT: |-FieldDecl {{.*}} b 'int'
+// CHECK-NEXT: | `-IntegerLiteral {{.*}} 'int' 6
+// CHECK-NEXT: `-FieldDecl {{.*}} c 'int'
+// CHECK-NEXT: `-IntegerLiteral {{.*}} 'int' 1
+
+struct FieldBitfieldOverlap {
+ int a : 3;
+ [[no_unique_address]] Empty p1;
+ int b : 6;
+ int c : 1;
+};
+
+FieldBitfieldOverlap global5;
diff --git a/lldb/test/Shell/helper/toolchain.py b/lldb/test/Shell/helper/toolchain.py
index 7b7be06..255955f 100644
--- a/lldb/test/Shell/helper/toolchain.py
+++ b/lldb/test/Shell/helper/toolchain.py
@@ -165,11 +165,6 @@ def use_support_substitutions(config):
if config.cmake_sysroot:
host_flags += ["--sysroot={}".format(config.cmake_sysroot)]
- # Facebook T92898286
- if config.llvm_test_bolt:
- host_flags += ["--post-link-optimize"]
- # End Facebook T92898286
-
host_flags = " ".join(host_flags)
config.substitutions.append(("%clang_host", "%clang " + host_flags))
config.substitutions.append(("%clangxx_host", "%clangxx " + host_flags))
diff --git a/lldb/test/Shell/lit.site.cfg.py.in b/lldb/test/Shell/lit.site.cfg.py.in
index fe83237..b69e7bc 100644
--- a/lldb/test/Shell/lit.site.cfg.py.in
+++ b/lldb/test/Shell/lit.site.cfg.py.in
@@ -1,10 +1,5 @@
@LIT_SITE_CFG_IN_HEADER@
-#Facebook T92898286
-import lit.util
-#End Facebook T92898286
-
-
config.llvm_src_root = "@LLVM_SOURCE_DIR@"
config.llvm_obj_root = "@LLVM_BINARY_DIR@"
config.llvm_tools_dir = lit_config.substitute("@LLVM_TOOLS_DIR@")
@@ -36,10 +31,6 @@ config.llvm_use_sanitizer = "@LLVM_USE_SANITIZER@"
config.lldb_module_cache = os.path.join("@LLDB_TEST_MODULE_CACHE_LLDB@", "lldb-shell")
config.clang_module_cache = os.path.join("@LLDB_TEST_MODULE_CACHE_CLANG@", "lldb-shell")
-# Facebook T92898286
-config.llvm_test_bolt = lit.util.pythonize_bool("@LLVM_TEST_BOLT@")
-# End Facebook T92898286
-
import lit.llvm
lit.llvm.initialize(lit_config, config)
diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt
index e8f2913f..c637feb 100644
--- a/llvm/CMakeLists.txt
+++ b/llvm/CMakeLists.txt
@@ -711,10 +711,6 @@ set(LLVM_LIB_FUZZING_ENGINE "" CACHE PATH
option(LLVM_USE_SPLIT_DWARF
"Use -gsplit-dwarf when compiling llvm and --gdb-index when linking." OFF)
-# Facebook T92898286
-option(LLVM_TEST_BOLT "Enable BOLT testing in non-BOLT tests that use clang" OFF)
-# End Facebook T92898286
-
# Define an option controlling whether we should build for 32-bit on 64-bit
# platforms, where supported.
if( CMAKE_SIZEOF_VOID_P EQUAL 8 AND NOT (WIN32 OR ${CMAKE_SYSTEM_NAME} MATCHES "AIX"))
diff --git a/llvm/include/llvm/ADT/DenseMap.h b/llvm/include/llvm/ADT/DenseMap.h
index 083d5c9..68498a3 100644
--- a/llvm/include/llvm/ADT/DenseMap.h
+++ b/llvm/include/llvm/ADT/DenseMap.h
@@ -471,19 +471,23 @@ protected:
setNumEntries(other.getNumEntries());
setNumTombstones(other.getNumTombstones());
- if (std::is_trivially_copyable<KeyT>::value &&
- std::is_trivially_copyable<ValueT>::value)
- memcpy(reinterpret_cast<void *>(getBuckets()), other.getBuckets(),
- getNumBuckets() * sizeof(BucketT));
- else
- for (size_t i = 0; i < getNumBuckets(); ++i) {
- ::new (&getBuckets()[i].getFirst())
- KeyT(other.getBuckets()[i].getFirst());
- if (!KeyInfoT::isEqual(getBuckets()[i].getFirst(), getEmptyKey()) &&
- !KeyInfoT::isEqual(getBuckets()[i].getFirst(), getTombstoneKey()))
- ::new (&getBuckets()[i].getSecond())
- ValueT(other.getBuckets()[i].getSecond());
+ BucketT *Buckets = getBuckets();
+ const BucketT *OtherBuckets = other.getBuckets();
+ const size_t NumBuckets = getNumBuckets();
+ if constexpr (std::is_trivially_copyable_v<KeyT> &&
+ std::is_trivially_copyable_v<ValueT>) {
+ memcpy(reinterpret_cast<void *>(Buckets), OtherBuckets,
+ NumBuckets * sizeof(BucketT));
+ } else {
+ const KeyT EmptyKey = getEmptyKey();
+ const KeyT TombstoneKey = getTombstoneKey();
+ for (size_t I = 0; I < NumBuckets; ++I) {
+ ::new (&Buckets[I].getFirst()) KeyT(OtherBuckets[I].getFirst());
+ if (!KeyInfoT::isEqual(Buckets[I].getFirst(), EmptyKey) &&
+ !KeyInfoT::isEqual(Buckets[I].getFirst(), TombstoneKey))
+ ::new (&Buckets[I].getSecond()) ValueT(OtherBuckets[I].getSecond());
}
+ }
}
static unsigned getHashValue(const KeyT &Val) {
@@ -496,7 +500,7 @@ protected:
}
static const KeyT getEmptyKey() {
- static_assert(std::is_base_of<DenseMapBase, DerivedT>::value,
+ static_assert(std::is_base_of_v<DenseMapBase, DerivedT>,
"Must pass the derived type to this template!");
return KeyInfoT::getEmptyKey();
}
@@ -570,7 +574,7 @@ private:
template <typename KeyArg, typename... ValueArgs>
BucketT *InsertIntoBucket(BucketT *TheBucket, KeyArg &&Key,
ValueArgs &&...Values) {
- TheBucket = InsertIntoBucketImpl(Key, Key, TheBucket);
+ TheBucket = InsertIntoBucketImpl(Key, TheBucket);
TheBucket->getFirst() = std::forward<KeyArg>(Key);
::new (&TheBucket->getSecond()) ValueT(std::forward<ValueArgs>(Values)...);
@@ -580,7 +584,7 @@ private:
template <typename LookupKeyT>
BucketT *InsertIntoBucketWithLookup(BucketT *TheBucket, KeyT &&Key,
ValueT &&Value, LookupKeyT &Lookup) {
- TheBucket = InsertIntoBucketImpl(Key, Lookup, TheBucket);
+ TheBucket = InsertIntoBucketImpl(Lookup, TheBucket);
TheBucket->getFirst() = std::move(Key);
::new (&TheBucket->getSecond()) ValueT(std::move(Value));
@@ -588,8 +592,7 @@ private:
}
template <typename LookupKeyT>
- BucketT *InsertIntoBucketImpl(const KeyT &Key, const LookupKeyT &Lookup,
- BucketT *TheBucket) {
+ BucketT *InsertIntoBucketImpl(const LookupKeyT &Lookup, BucketT *TheBucket) {
incrementEpoch();
// If the load of the hash table is more than 3/4, or if fewer than 1/8 of
diff --git a/llvm/include/llvm/Analysis/CtxProfAnalysis.h b/llvm/include/llvm/Analysis/CtxProfAnalysis.h
index d3b7ba9..b3e64b2 100644
--- a/llvm/include/llvm/Analysis/CtxProfAnalysis.h
+++ b/llvm/include/llvm/Analysis/CtxProfAnalysis.h
@@ -9,7 +9,6 @@
#ifndef LLVM_ANALYSIS_CTXPROFANALYSIS_H
#define LLVM_ANALYSIS_CTXPROFANALYSIS_H
-#include "llvm/ADT/DenseMap.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/IntrinsicInst.h"
diff --git a/llvm/include/llvm/Analysis/ValueTracking.h b/llvm/include/llvm/Analysis/ValueTracking.h
index 00ead11..de7e7be 100644
--- a/llvm/include/llvm/Analysis/ValueTracking.h
+++ b/llvm/include/llvm/Analysis/ValueTracking.h
@@ -119,6 +119,9 @@ bool isKnownToBeAPowerOfTwo(const Value *V, const DataLayout &DL,
const DominatorTree *DT = nullptr,
bool UseInstrInfo = true);
+bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth,
+ const SimplifyQuery &Q);
+
bool isOnlyUsedInZeroComparison(const Instruction *CxtI);
bool isOnlyUsedInZeroEqualityComparison(const Instruction *CxtI);
diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td
index f1017bd..97c6963 100644
--- a/llvm/include/llvm/IR/IntrinsicsDirectX.td
+++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td
@@ -87,7 +87,7 @@ def int_dx_umad : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLV
def int_dx_normalize : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty]>;
def int_dx_rcp : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
def int_dx_rsqrt : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
-
def int_dx_wave_is_first_lane : DefaultAttrsIntrinsic<[llvm_i1_ty], [], [IntrConvergent]>;
def int_dx_sign : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_any_ty]>;
+def int_dx_step : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty, LLVMMatchType<0>]>;
}
diff --git a/llvm/include/llvm/IR/IntrinsicsSPIRV.td b/llvm/include/llvm/IR/IntrinsicsSPIRV.td
index 766fc0d..a4c0195 100644
--- a/llvm/include/llvm/IR/IntrinsicsSPIRV.td
+++ b/llvm/include/llvm/IR/IntrinsicsSPIRV.td
@@ -67,6 +67,7 @@ let TargetPrefix = "spv" in {
def int_spv_normalize : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty]>;
def int_spv_rsqrt : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty]>;
def int_spv_saturate : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
+ def int_spv_step : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [LLVMMatchType<0>, llvm_anyfloat_ty]>;
def int_spv_fdot :
DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
[llvm_anyfloat_ty, LLVMScalarOrSameVectorWidth<0, LLVMVectorElementType<0>>],
diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h
index 6605c6f..4352099 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -276,6 +276,7 @@ void initializeSafepointIRVerifierPass(PassRegistry &);
void initializeSelectOptimizePass(PassRegistry &);
void initializeScalarEvolutionWrapperPassPass(PassRegistry &);
void initializeScalarizeMaskedMemIntrinLegacyPassPass(PassRegistry &);
+void initializeScalarizerLegacyPassPass(PassRegistry &);
void initializeScavengerTestPass(PassRegistry &);
void initializeScopedNoAliasAAWrapperPassPass(PassRegistry &);
void initializeSeparateConstOffsetFromGEPLegacyPassPass(PassRegistry &);
diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h
index 1da0215..92b59a6 100644
--- a/llvm/include/llvm/LinkAllPasses.h
+++ b/llvm/include/llvm/LinkAllPasses.h
@@ -130,6 +130,7 @@ struct ForcePassLinking {
(void)llvm::createLowerAtomicPass();
(void)llvm::createLoadStoreVectorizerPass();
(void)llvm::createPartiallyInlineLibCallsPass();
+ (void)llvm::createScalarizerPass();
(void)llvm::createSeparateConstOffsetFromGEPPass();
(void)llvm::createSpeculativeExecutionPass();
(void)llvm::createSpeculativeExecutionIfHasBranchDivergencePass();
diff --git a/llvm/include/llvm/ProfileData/PGOCtxProfReader.h b/llvm/include/llvm/ProfileData/PGOCtxProfReader.h
index e034819..beda07d 100644
--- a/llvm/include/llvm/ProfileData/PGOCtxProfReader.h
+++ b/llvm/include/llvm/ProfileData/PGOCtxProfReader.h
@@ -15,13 +15,11 @@
#ifndef LLVM_PROFILEDATA_CTXINSTRPROFILEREADER_H
#define LLVM_PROFILEDATA_CTXINSTRPROFILEREADER_H
-#include "llvm/ADT/DenseSet.h"
#include "llvm/Bitstream/BitstreamReader.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/ProfileData/PGOCtxProfWriter.h"
#include "llvm/Support/Error.h"
#include <map>
-#include <vector>
namespace llvm {
/// A node (context) in the loaded contextual profile, suitable for mutation
@@ -34,7 +32,7 @@ namespace llvm {
class PGOCtxProfContext final {
public:
using CallTargetMapTy = std::map<GlobalValue::GUID, PGOCtxProfContext>;
- using CallsiteMapTy = DenseMap<uint32_t, CallTargetMapTy>;
+ using CallsiteMapTy = std::map<uint32_t, CallTargetMapTy>;
private:
friend class PGOCtxProfileReader;
@@ -97,7 +95,16 @@ public:
return Callsites.find(I)->second;
}
- void getContainedGuids(DenseSet<GlobalValue::GUID> &Guids) const;
+ /// Insert this node's GUID as well as the GUIDs of the transitive closure of
+ /// child nodes, into the provided set (technically, all that is required of
+ /// `TSetOfGUIDs` is to have an `insert(GUID)` member)
+ template <class TSetOfGUIDs>
+ void getContainedGuids(TSetOfGUIDs &Guids) const {
+ Guids.insert(GUID);
+ for (const auto &[_, Callsite] : Callsites)
+ for (const auto &[_, Callee] : Callsite)
+ Callee.getContainedGuids(Guids);
+ }
};
class PGOCtxProfileReader final {
diff --git a/llvm/include/llvm/SandboxIR/SandboxIR.h b/llvm/include/llvm/SandboxIR/SandboxIR.h
index 95fe239..5b57d5c 100644
--- a/llvm/include/llvm/SandboxIR/SandboxIR.h
+++ b/llvm/include/llvm/SandboxIR/SandboxIR.h
@@ -125,6 +125,7 @@ class ConstantPointerNull;
class PoisonValue;
class BlockAddress;
class ConstantTokenNone;
+class GlobalValue;
class Context;
class Function;
class Instruction;
@@ -326,6 +327,7 @@ protected:
friend class UndefValue; // For `Val`.
friend class PoisonValue; // For `Val`.
friend class BlockAddress; // For `Val`.
+ friend class GlobalValue; // For `Val`.
/// All values point to the context.
Context &Ctx;
@@ -1115,6 +1117,80 @@ public:
#endif
};
+class GlobalValue : public Constant {
+protected:
+ GlobalValue(ClassID ID, llvm::GlobalValue *C, Context &Ctx)
+ : Constant(ID, C, Ctx) {}
+ friend class Context; // For constructor.
+ Use getOperandUseInternal(unsigned OpIdx, bool Verify) const override {
+ return getOperandUseDefault(OpIdx, Verify);
+ }
+
+public:
+ unsigned getUseOperandNo(const Use &Use) const override {
+ return getUseOperandNoDefault(Use);
+ }
+ /// For isa/dyn_cast.
+ static bool classof(const sandboxir::Value *From) {
+ switch (From->getSubclassID()) {
+ case ClassID::Function:
+ case ClassID::GlobalVariable:
+ case ClassID::GlobalAlias:
+ case ClassID::GlobalIFunc:
+ return true;
+ default:
+ return false;
+ }
+ }
+
+ unsigned getAddressSpace() const {
+ return cast<llvm::GlobalValue>(Val)->getAddressSpace();
+ }
+ bool hasGlobalUnnamedAddr() const {
+ return cast<llvm::GlobalValue>(Val)->hasGlobalUnnamedAddr();
+ }
+
+ /// Returns true if this value's address is not significant in this module.
+ /// This attribute is intended to be used only by the code generator and LTO
+ /// to allow the linker to decide whether the global needs to be in the symbol
+ /// table. It should probably not be used in optimizations, as the value may
+ /// have uses outside the module; use hasGlobalUnnamedAddr() instead.
+ bool hasAtLeastLocalUnnamedAddr() const {
+ return cast<llvm::GlobalValue>(Val)->hasAtLeastLocalUnnamedAddr();
+ }
+
+ using UnnamedAddr = llvm::GlobalValue::UnnamedAddr;
+
+ UnnamedAddr getUnnamedAddr() const {
+ return cast<llvm::GlobalValue>(Val)->getUnnamedAddr();
+ }
+ void setUnnamedAddr(UnnamedAddr V);
+
+ static UnnamedAddr getMinUnnamedAddr(UnnamedAddr A, UnnamedAddr B) {
+ return llvm::GlobalValue::getMinUnnamedAddr(A, B);
+ }
+
+ bool hasComdat() const { return cast<llvm::GlobalValue>(Val)->hasComdat(); }
+
+ // TODO: We need a SandboxIR Comdat if we want to implement getComdat().
+ using VisibilityTypes = llvm::GlobalValue::VisibilityTypes;
+ VisibilityTypes getVisibility() const {
+ return cast<llvm::GlobalValue>(Val)->getVisibility();
+ }
+ bool hasDefaultVisibility() const {
+ return cast<llvm::GlobalValue>(Val)->hasDefaultVisibility();
+ }
+ bool hasHiddenVisibility() const {
+ return cast<llvm::GlobalValue>(Val)->hasHiddenVisibility();
+ }
+ bool hasProtectedVisibility() const {
+ return cast<llvm::GlobalValue>(Val)->hasProtectedVisibility();
+ }
+ void setVisibility(VisibilityTypes V);
+
+ // TODO: Add missing functions.
+};
+
class BlockAddress final : public Constant {
BlockAddress(llvm::BlockAddress *C, Context &Ctx)
: Constant(ClassID::BlockAddress, C, Ctx) {}
@@ -3845,8 +3921,9 @@ protected:
friend class PointerType; // For LLVMCtx.
friend class CmpInst; // For LLVMCtx. TODO: cleanup when sandboxir::VectorType
// is complete
- friend class IntegerType; // For LLVMCtx.
- friend class StructType; // For LLVMCtx.
+ friend class IntegerType; // For LLVMCtx.
+ friend class StructType; // For LLVMCtx.
+ friend class TargetExtType; // For LLVMCtx.
Tracker IRTracker;
/// Maps LLVM Value to the corresponding sandboxir::Value. Owns all
diff --git a/llvm/include/llvm/SandboxIR/SandboxIRValues.def b/llvm/include/llvm/SandboxIR/SandboxIRValues.def
index bd2f533e..7b72f9b 100644
--- a/llvm/include/llvm/SandboxIR/SandboxIRValues.def
+++ b/llvm/include/llvm/SandboxIR/SandboxIRValues.def
@@ -34,6 +34,9 @@ DEF_CONST(ConstantAggregateZero, ConstantAggregateZero)
DEF_CONST(ConstantPointerNull, ConstantPointerNull)
DEF_CONST(UndefValue, UndefValue)
DEF_CONST(PoisonValue, PoisonValue)
+DEF_CONST(GlobalVariable, GlobalVariable)
+DEF_CONST(GlobalIFunc, GlobalIFunc)
+DEF_CONST(GlobalAlias, GlobalAlias)
DEF_CONST(BlockAddress, BlockAddress)
DEF_CONST(ConstantTokenNone, ConstantTokenNone)
diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h
index 6ab63ba..921fe94 100644
--- a/llvm/include/llvm/Transforms/IPO/Attributor.h
+++ b/llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -6249,7 +6249,7 @@ struct AAAddressSpace : public StateWrapper<BooleanState, AbstractAttribute> {
/// Return the address space of the associated value. \p NoAddressSpace is
/// returned if the associated value is dead. This functions is not supposed
/// to be called if the AA is invalid.
- virtual int32_t getAddressSpace() const = 0;
+ virtual uint32_t getAddressSpace() const = 0;
/// Create an abstract attribute view for the position \p IRP.
static AAAddressSpace &createForPosition(const IRPosition &IRP,
@@ -6268,7 +6268,7 @@ struct AAAddressSpace : public StateWrapper<BooleanState, AbstractAttribute> {
}
// No address space which indicates the associated value is dead.
- static const int32_t NoAddressSpace = -1;
+ static const uint32_t NoAddressSpace = ~0U;
/// Unique ID (due to the unique address)
static const char ID;
diff --git a/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h b/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h
index 87d0d98..68d9ae8 100644
--- a/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h
+++ b/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h
@@ -450,7 +450,8 @@ public:
bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero = false,
unsigned Depth = 0,
const Instruction *CxtI = nullptr) {
- return llvm::isKnownToBeAPowerOfTwo(V, DL, OrZero, Depth, &AC, CxtI, &DT);
+ return llvm::isKnownToBeAPowerOfTwo(V, OrZero, Depth,
+ SQ.getWithInstruction(CxtI));
}
bool MaskedValueIsZero(const Value *V, const APInt &Mask, unsigned Depth = 0,
diff --git a/llvm/include/llvm/Transforms/Scalar/Scalarizer.h b/llvm/include/llvm/Transforms/Scalar/Scalarizer.h
index 45e25cb..4d2a1a2 100644
--- a/llvm/include/llvm/Transforms/Scalar/Scalarizer.h
+++ b/llvm/include/llvm/Transforms/Scalar/Scalarizer.h
@@ -24,6 +24,7 @@
namespace llvm {
class Function;
+class FunctionPass;
struct ScalarizerPassOptions {
// These options correspond 1:1 to cl::opt options defined in
@@ -50,6 +51,10 @@ public:
void setScalarizeLoadStore(bool Value) { Options.ScalarizeLoadStore = Value; }
void setScalarizeMinBits(unsigned Value) { Options.ScalarizeMinBits = Value; }
};
+
+/// Create a legacy pass manager instance of the Scalarizer pass
+FunctionPass *createScalarizerPass(
+ const ScalarizerPassOptions &Options = ScalarizerPassOptions());
}
#endif /* LLVM_TRANSFORMS_SCALAR_SCALARIZER_H */
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index e9fc9bb..ba3ba7c 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -265,9 +265,6 @@ bool llvm::isOnlyUsedInZeroEqualityComparison(const Instruction *I) {
});
}
-static bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth,
- const SimplifyQuery &Q);
-
bool llvm::isKnownToBeAPowerOfTwo(const Value *V, const DataLayout &DL,
bool OrZero, unsigned Depth,
AssumptionCache *AC, const Instruction *CxtI,
@@ -2210,12 +2207,15 @@ static bool isPowerOfTwoRecurrence(const PHINode *PN, bool OrZero,
/// Return true if we can infer that \p V is known to be a power of 2 from
/// dominating condition \p Cond (e.g., ctpop(V) == 1).
static bool isImpliedToBeAPowerOfTwoFromCond(const Value *V, bool OrZero,
- const Value *Cond) {
+ const Value *Cond,
+ bool CondIsTrue) {
ICmpInst::Predicate Pred;
const APInt *RHSC;
if (!match(Cond, m_ICmp(Pred, m_Intrinsic<Intrinsic::ctpop>(m_Specific(V)),
m_APInt(RHSC))))
return false;
+ if (!CondIsTrue)
+ Pred = ICmpInst::getInversePredicate(Pred);
// ctpop(V) u< 2
if (OrZero && Pred == ICmpInst::ICMP_ULT && *RHSC == 2)
return true;
@@ -2227,8 +2227,8 @@ static bool isImpliedToBeAPowerOfTwoFromCond(const Value *V, bool OrZero,
/// bit set when defined. For vectors return true if every element is known to
/// be a power of two when defined. Supports values with integer or pointer
/// types and vectors of integers.
-bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth,
- const SimplifyQuery &Q) {
+bool llvm::isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth,
+ const SimplifyQuery &Q) {
assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
if (isa<Constant>(V))
@@ -2244,12 +2244,32 @@ bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth,
if (!AssumeVH)
continue;
CallInst *I = cast<CallInst>(AssumeVH);
- if (isImpliedToBeAPowerOfTwoFromCond(V, OrZero, I->getArgOperand(0)) &&
+ if (isImpliedToBeAPowerOfTwoFromCond(V, OrZero, I->getArgOperand(0),
+ /*CondIsTrue=*/true) &&
isValidAssumeForContext(I, Q.CxtI, Q.DT))
return true;
}
}
+ // Handle dominating conditions.
+ if (Q.DC && Q.CxtI && Q.DT) {
+ for (BranchInst *BI : Q.DC->conditionsFor(V)) {
+ Value *Cond = BI->getCondition();
+
+ BasicBlockEdge Edge0(BI->getParent(), BI->getSuccessor(0));
+ if (isImpliedToBeAPowerOfTwoFromCond(V, OrZero, Cond,
+ /*CondIsTrue=*/true) &&
+ Q.DT->dominates(Edge0, Q.CxtI->getParent()))
+ return true;
+
+ BasicBlockEdge Edge1(BI->getParent(), BI->getSuccessor(1));
+ if (isImpliedToBeAPowerOfTwoFromCond(V, OrZero, Cond,
+ /*CondIsTrue=*/false) &&
+ Q.DT->dominates(Edge1, Q.CxtI->getParent()))
+ return true;
+ }
+ }
+
auto *I = dyn_cast<Instruction>(V);
if (!I)
return false;
@@ -9980,8 +10000,7 @@ void llvm::findValuesAffectedByCondition(
}
}
- if (IsAssume && HasRHSC &&
- match(A, m_Intrinsic<Intrinsic::ctpop>(m_Value(X))))
+ if (HasRHSC && match(A, m_Intrinsic<Intrinsic::ctpop>(m_Value(X))))
AddAffected(X);
} else if (match(Cond, m_FCmp(Pred, m_Value(A), m_Value(B)))) {
AddCmpOperands(A, B);
diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index db33d52..53ce219 100644
--- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -635,7 +635,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
void
InstrEmitter::EmitCopyToRegClassNode(SDNode *Node,
DenseMap<SDValue, Register> &VRBaseMap) {
- unsigned VReg = getVR(Node->getOperand(0), VRBaseMap);
+ Register VReg = getVR(Node->getOperand(0), VRBaseMap);
// Create the new VReg in the destination class and emit a copy.
unsigned DstRCIdx = Node->getConstantOperandVal(1);
@@ -678,7 +678,7 @@ void InstrEmitter::EmitRegSequence(SDNode *Node,
// insert copies for them in TwoAddressInstructionPass anyway.
if (!R || !R->getReg().isPhysical()) {
unsigned SubIdx = Op->getAsZExtVal();
- unsigned SubReg = getVR(Node->getOperand(i-1), VRBaseMap);
+ Register SubReg = getVR(Node->getOperand(i - 1), VRBaseMap);
const TargetRegisterClass *TRC = MRI->getRegClass(SubReg);
const TargetRegisterClass *SRC =
TRI->getMatchingSuperRegClass(RC, TRC, SubIdx);
@@ -1274,7 +1274,7 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
break;
}
case ISD::CopyFromReg: {
- unsigned SrcReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
+ Register SrcReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
EmitCopyFromReg(Node, 0, IsClone, SrcReg, VRBaseMap);
break;
}
@@ -1343,7 +1343,7 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
SmallVector<unsigned, 8> GroupIdx;
// Remember registers that are part of early-clobber defs.
- SmallVector<unsigned, 8> ECRegs;
+ SmallVector<Register, 8> ECRegs;
// Add all of the operand registers to the instruction.
for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
@@ -1424,7 +1424,7 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
// used), but this does not match the semantics of our early-clobber flag.
// If an early-clobber operand register is also an input operand register,
// then remove the early-clobber flag.
- for (unsigned Reg : ECRegs) {
+ for (Register Reg : ECRegs) {
if (MIB->readsRegister(Reg, TRI)) {
MachineOperand *MO =
MIB->findRegisterDefOperand(Reg, TRI, false, false);
diff --git a/llvm/lib/ProfileData/PGOCtxProfReader.cpp b/llvm/lib/ProfileData/PGOCtxProfReader.cpp
index 8354e30..496854e 100644
--- a/llvm/lib/ProfileData/PGOCtxProfReader.cpp
+++ b/llvm/lib/ProfileData/PGOCtxProfReader.cpp
@@ -44,14 +44,6 @@ PGOCtxProfContext::getOrEmplace(uint32_t Index, GlobalValue::GUID G,
return Iter->second;
}
-void PGOCtxProfContext::getContainedGuids(
- DenseSet<GlobalValue::GUID> &Guids) const {
- Guids.insert(GUID);
- for (const auto &[_, Callsite] : Callsites)
- for (const auto &[_, Callee] : Callsite)
- Callee.getContainedGuids(Guids);
-}
-
Expected<BitstreamEntry> PGOCtxProfileReader::advance() {
return Cursor.advance(BitstreamCursor::AF_DontAutoprocessAbbrevs);
}
diff --git a/llvm/lib/SandboxIR/SandboxIR.cpp b/llvm/lib/SandboxIR/SandboxIR.cpp
index 05d05f7..8a7c398 100644
--- a/llvm/lib/SandboxIR/SandboxIR.cpp
+++ b/llvm/lib/SandboxIR/SandboxIR.cpp
@@ -2495,6 +2495,20 @@ PoisonValue *PoisonValue::getElementValue(unsigned Idx) const {
cast<llvm::PoisonValue>(Val)->getElementValue(Idx)));
}
+void GlobalValue::setUnnamedAddr(UnnamedAddr V) {
+ Ctx.getTracker()
+ .emplaceIfTracking<GenericSetter<&GlobalValue::getUnnamedAddr,
+ &GlobalValue::setUnnamedAddr>>(this);
+ cast<llvm::GlobalValue>(Val)->setUnnamedAddr(V);
+}
+
+void GlobalValue::setVisibility(VisibilityTypes V) {
+ Ctx.getTracker()
+ .emplaceIfTracking<GenericSetter<&GlobalValue::getVisibility,
+ &GlobalValue::setVisibility>>(this);
+ cast<llvm::GlobalValue>(Val)->setVisibility(V);
+}
+
BlockAddress *BlockAddress::get(Function *F, BasicBlock *BB) {
auto *LLVMC = llvm::BlockAddress::get(cast<llvm::Function>(F->Val),
cast<llvm::BasicBlock>(BB->Val));
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index ffeec31b..687a733 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -1065,19 +1065,16 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
Attributor A(Functions, InfoCache, AC);
- for (Function &F : M) {
- if (F.isIntrinsic())
- continue;
-
- A.getOrCreateAAFor<AAAMDAttributes>(IRPosition::function(F));
- A.getOrCreateAAFor<AAUniformWorkGroupSize>(IRPosition::function(F));
- A.getOrCreateAAFor<AAAMDGPUNoAGPR>(IRPosition::function(F));
- CallingConv::ID CC = F.getCallingConv();
+ for (auto *F : Functions) {
+ A.getOrCreateAAFor<AAAMDAttributes>(IRPosition::function(*F));
+ A.getOrCreateAAFor<AAUniformWorkGroupSize>(IRPosition::function(*F));
+ A.getOrCreateAAFor<AAAMDGPUNoAGPR>(IRPosition::function(*F));
+ CallingConv::ID CC = F->getCallingConv();
if (!AMDGPU::isEntryFunctionCC(CC)) {
- A.getOrCreateAAFor<AAAMDFlatWorkGroupSize>(IRPosition::function(F));
- A.getOrCreateAAFor<AAAMDWavesPerEU>(IRPosition::function(F));
+ A.getOrCreateAAFor<AAAMDFlatWorkGroupSize>(IRPosition::function(*F));
+ A.getOrCreateAAFor<AAAMDWavesPerEU>(IRPosition::function(*F));
} else if (CC == CallingConv::AMDGPU_KERNEL) {
- addPreloadKernArgHint(F, TM);
+ addPreloadKernArgHint(*F, TM);
}
for (auto &I : instructions(F)) {
diff --git a/llvm/lib/Target/DirectX/DXContainerGlobals.cpp b/llvm/lib/Target/DirectX/DXContainerGlobals.cpp
index aa77698..839060b 100644
--- a/llvm/lib/Target/DirectX/DXContainerGlobals.cpp
+++ b/llvm/lib/Target/DirectX/DXContainerGlobals.cpp
@@ -16,6 +16,7 @@
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/DXILMetadataAnalysis.h"
+#include "llvm/Analysis/DXILResource.h"
#include "llvm/BinaryFormat/DXContainer.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/Constants.h"
@@ -40,6 +41,7 @@ class DXContainerGlobals : public llvm::ModulePass {
GlobalVariable *buildSignature(Module &M, Signature &Sig, StringRef Name,
StringRef SectionName);
void addSignature(Module &M, SmallVector<GlobalValue *> &Globals);
+ void addResourcesForPSV(Module &M, PSVRuntimeInfo &PSV);
void addPipelineStateValidationInfo(Module &M,
SmallVector<GlobalValue *> &Globals);
@@ -59,6 +61,7 @@ public:
AU.setPreservesAll();
AU.addRequired<ShaderFlagsAnalysisWrapper>();
AU.addRequired<DXILMetadataAnalysisWrapperPass>();
+ AU.addRequired<DXILResourceWrapperPass>();
}
};
@@ -140,6 +143,56 @@ void DXContainerGlobals::addSignature(Module &M,
Globals.emplace_back(buildSignature(M, OutputSig, "dx.osg1", "OSG1"));
}
+void DXContainerGlobals::addResourcesForPSV(Module &M, PSVRuntimeInfo &PSV) {
+ const DXILResourceMap &ResMap =
+ getAnalysis<DXILResourceWrapperPass>().getResourceMap();
+
+ for (const dxil::ResourceInfo &ResInfo : ResMap) {
+ const dxil::ResourceInfo::ResourceBinding &Binding = ResInfo.getBinding();
+ dxbc::PSV::v2::ResourceBindInfo BindInfo;
+ BindInfo.LowerBound = Binding.LowerBound;
+ BindInfo.UpperBound = Binding.LowerBound + Binding.Size - 1;
+ BindInfo.Space = Binding.Space;
+
+ dxbc::PSV::ResourceType ResType = dxbc::PSV::ResourceType::Invalid;
+ bool IsUAV = ResInfo.getResourceClass() == dxil::ResourceClass::UAV;
+ switch (ResInfo.getResourceKind()) {
+ case dxil::ResourceKind::Sampler:
+ ResType = dxbc::PSV::ResourceType::Sampler;
+ break;
+ case dxil::ResourceKind::CBuffer:
+ ResType = dxbc::PSV::ResourceType::CBV;
+ break;
+ case dxil::ResourceKind::StructuredBuffer:
+ ResType = IsUAV ? dxbc::PSV::ResourceType::UAVStructured
+ : dxbc::PSV::ResourceType::SRVStructured;
+ if (IsUAV && ResInfo.getUAV().HasCounter)
+ ResType = dxbc::PSV::ResourceType::UAVStructuredWithCounter;
+ break;
+ case dxil::ResourceKind::RTAccelerationStructure:
+ ResType = dxbc::PSV::ResourceType::SRVRaw;
+ break;
+ case dxil::ResourceKind::RawBuffer:
+ ResType = IsUAV ? dxbc::PSV::ResourceType::UAVRaw
+ : dxbc::PSV::ResourceType::SRVRaw;
+ break;
+ default:
+ ResType = IsUAV ? dxbc::PSV::ResourceType::UAVTyped
+ : dxbc::PSV::ResourceType::SRVTyped;
+ break;
+ }
+ BindInfo.Type = ResType;
+
+ BindInfo.Kind =
+ static_cast<dxbc::PSV::ResourceKind>(ResInfo.getResourceKind());
+ // TODO: Add support for dxbc::PSV::ResourceFlag::UsedByAtomic64, tracking
+ // with https://github.com/llvm/llvm-project/issues/104392
+ BindInfo.Flags.Flags = 0u;
+
+ PSV.Resources.emplace_back(BindInfo);
+ }
+}
+
void DXContainerGlobals::addPipelineStateValidationInfo(
Module &M, SmallVector<GlobalValue *> &Globals) {
SmallString<256> Data;
@@ -155,6 +208,8 @@ void DXContainerGlobals::addPipelineStateValidationInfo(
PSV.BaseData.ShaderStage =
static_cast<uint8_t>(MMI.ShaderStage - Triple::Pixel);
+ addResourcesForPSV(M, PSV);
+
// Hardcoded values here to unblock loading the shader into D3D.
//
// TODO: Lots more stuff to do here!
@@ -185,6 +240,7 @@ INITIALIZE_PASS_BEGIN(DXContainerGlobals, "dxil-globals",
"DXContainer Global Emitter", false, true)
INITIALIZE_PASS_DEPENDENCY(ShaderFlagsAnalysisWrapper)
INITIALIZE_PASS_DEPENDENCY(DXILMetadataAnalysisWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(DXILResourceWrapperPass)
INITIALIZE_PASS_END(DXContainerGlobals, "dxil-globals",
"DXContainer Global Emitter", false, true)
diff --git a/llvm/lib/Target/DirectX/DXILFinalizeLinkage.cpp b/llvm/lib/Target/DirectX/DXILFinalizeLinkage.cpp
index c02eb768..d315d9b 100644
--- a/llvm/lib/Target/DirectX/DXILFinalizeLinkage.cpp
+++ b/llvm/lib/Target/DirectX/DXILFinalizeLinkage.cpp
@@ -8,6 +8,7 @@
#include "DXILFinalizeLinkage.h"
#include "DirectX.h"
+#include "llvm/Analysis/DXILResource.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/Metadata.h"
@@ -48,6 +49,10 @@ bool DXILFinalizeLinkageLegacy::runOnModule(Module &M) {
return finalizeLinkage(M);
}
+void DXILFinalizeLinkageLegacy::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addPreserved<DXILResourceWrapperPass>();
+}
+
char DXILFinalizeLinkageLegacy::ID = 0;
INITIALIZE_PASS_BEGIN(DXILFinalizeLinkageLegacy, DEBUG_TYPE,
diff --git a/llvm/lib/Target/DirectX/DXILFinalizeLinkage.h b/llvm/lib/Target/DirectX/DXILFinalizeLinkage.h
index aab1bc3..62d3a8a 100644
--- a/llvm/lib/Target/DirectX/DXILFinalizeLinkage.h
+++ b/llvm/lib/Target/DirectX/DXILFinalizeLinkage.h
@@ -32,6 +32,7 @@ public:
DXILFinalizeLinkageLegacy() : ModulePass(ID) {}
bool runOnModule(Module &M) override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
static char ID; // Pass identification.
};
} // namespace llvm
diff --git a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
index 72fa989..dd73b895 100644
--- a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
+++ b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
@@ -14,6 +14,7 @@
#include "DirectX.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/DXILResource.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstrTypes.h"
@@ -50,6 +51,7 @@ static bool isIntrinsicExpansion(Function &F) {
case Intrinsic::dx_sdot:
case Intrinsic::dx_udot:
case Intrinsic::dx_sign:
+ case Intrinsic::dx_step:
return true;
}
return false;
@@ -322,6 +324,28 @@ static Value *expandPowIntrinsic(CallInst *Orig) {
return Exp2Call;
}
+static Value *expandStepIntrinsic(CallInst *Orig) {
+
+ Value *X = Orig->getOperand(0);
+ Value *Y = Orig->getOperand(1);
+ Type *Ty = X->getType();
+ IRBuilder<> Builder(Orig);
+
+ Constant *One = ConstantFP::get(Ty->getScalarType(), 1.0);
+ Constant *Zero = ConstantFP::get(Ty->getScalarType(), 0.0);
+ Value *Cond = Builder.CreateFCmpOLT(Y, X);
+
+ if (Ty != Ty->getScalarType()) {
+ auto *XVec = dyn_cast<FixedVectorType>(Ty);
+ One = ConstantVector::getSplat(
+ ElementCount::getFixed(XVec->getNumElements()), One);
+ Zero = ConstantVector::getSplat(
+ ElementCount::getFixed(XVec->getNumElements()), Zero);
+ }
+
+ return Builder.CreateSelect(Cond, Zero, One);
+}
+
static Intrinsic::ID getMaxForClamp(Type *ElemTy,
Intrinsic::ID ClampIntrinsic) {
if (ClampIntrinsic == Intrinsic::dx_uclamp)
@@ -433,8 +457,9 @@ static bool expandIntrinsic(Function &F, CallInst *Orig) {
case Intrinsic::dx_sign:
Result = expandSignIntrinsic(Orig);
break;
+ case Intrinsic::dx_step:
+ Result = expandStepIntrinsic(Orig);
}
-
if (Result) {
Orig->replaceAllUsesWith(Result);
Orig->eraseFromParent();
@@ -471,6 +496,10 @@ bool DXILIntrinsicExpansionLegacy::runOnModule(Module &M) {
return expansionIntrinsics(M);
}
+void DXILIntrinsicExpansionLegacy::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addPreserved<DXILResourceWrapperPass>();
+}
+
char DXILIntrinsicExpansionLegacy::ID = 0;
INITIALIZE_PASS_BEGIN(DXILIntrinsicExpansionLegacy, DEBUG_TYPE,
diff --git a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.h b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.h
index c86681a..c8ee4b1 100644
--- a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.h
+++ b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.h
@@ -26,6 +26,7 @@ public:
bool runOnModule(Module &M) override;
DXILIntrinsicExpansionLegacy() : ModulePass(ID) {}
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
static char ID; // Pass identification.
};
} // namespace llvm
diff --git a/llvm/lib/Target/DirectX/DXILPrepare.cpp b/llvm/lib/Target/DirectX/DXILPrepare.cpp
index f6b7355..b050240 100644
--- a/llvm/lib/Target/DirectX/DXILPrepare.cpp
+++ b/llvm/lib/Target/DirectX/DXILPrepare.cpp
@@ -20,6 +20,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/Analysis/DXILMetadataAnalysis.h"
+#include "llvm/Analysis/DXILResource.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/AttributeMask.h"
#include "llvm/IR/IRBuilder.h"
@@ -249,6 +250,7 @@ public:
AU.addPreserved<ShaderFlagsAnalysisWrapper>();
AU.addPreserved<DXILResourceMDWrapper>();
AU.addPreserved<DXILMetadataAnalysisWrapperPass>();
+ AU.addPreserved<DXILResourceWrapperPass>();
}
static char ID; // Pass identification.
};
diff --git a/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp
index a29fc21..606022a9 100644
--- a/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp
+++ b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp
@@ -28,6 +28,7 @@
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/IRPrintingPasses.h"
#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/InitializePasses.h"
#include "llvm/MC/MCSectionDXContainer.h"
#include "llvm/MC/SectionKind.h"
#include "llvm/MC/TargetRegistry.h"
@@ -36,6 +37,7 @@
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Transforms/Scalar/Scalarizer.h"
#include <optional>
using namespace llvm;
@@ -44,6 +46,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeDirectXTarget() {
RegisterTargetMachine<DirectXTargetMachine> X(getTheDirectXTarget());
auto *PR = PassRegistry::getPassRegistry();
initializeDXILIntrinsicExpansionLegacyPass(*PR);
+ initializeScalarizerLegacyPassPass(*PR);
initializeDXILPrepareModulePass(*PR);
initializeEmbedDXILPassPass(*PR);
initializeWriteDXILPassPass(*PR);
@@ -83,6 +86,9 @@ public:
FunctionPass *createTargetRegisterAllocator(bool) override { return nullptr; }
void addCodeGenPrepare() override {
addPass(createDXILIntrinsicExpansionLegacyPass());
+ ScalarizerPassOptions DxilScalarOptions;
+ DxilScalarOptions.ScalarizeLoadStore = true;
+ addPass(createScalarizerPass(DxilScalarOptions));
addPass(createDXILOpLoweringLegacyPass());
addPass(createDXILFinalizeLinkageLegacyPass());
addPass(createDXILTranslateMetadataLegacyPass());
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
index 9723789..6177884 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
@@ -337,6 +337,8 @@ private:
bool isEligibleForTailCallOptimization(
CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
const SmallVectorImpl<CCValAssign> &ArgLocs) const;
+
+ bool softPromoteHalfType() const override { return true; }
};
} // end namespace llvm
diff --git a/llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp b/llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp
index 3692384..bf47361 100644
--- a/llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp
@@ -24,9 +24,9 @@
//
//===----------------------------------------------------------------------===//
+#include "MCTargetDesc/NVPTXBaseInfo.h"
#include "NVPTX.h"
#include "NVPTXUtilities.h"
-#include "MCTargetDesc/NVPTXBaseInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
@@ -55,8 +55,8 @@ public:
char NVPTXLowerAlloca::ID = 1;
-INITIALIZE_PASS(NVPTXLowerAlloca, "nvptx-lower-alloca",
- "Lower Alloca", false, false)
+INITIALIZE_PASS(NVPTXLowerAlloca, "nvptx-lower-alloca", "Lower Alloca", false,
+ false)
// =============================================================================
// Main function for this pass.
@@ -70,14 +70,38 @@ bool NVPTXLowerAlloca::runOnFunction(Function &F) {
for (auto &I : BB) {
if (auto allocaInst = dyn_cast<AllocaInst>(&I)) {
Changed = true;
+
+ PointerType *AllocInstPtrTy =
+ cast<PointerType>(allocaInst->getType()->getScalarType());
+ unsigned AllocAddrSpace = AllocInstPtrTy->getAddressSpace();
+ assert((AllocAddrSpace == ADDRESS_SPACE_GENERIC ||
+ AllocAddrSpace == ADDRESS_SPACE_LOCAL) &&
+ "AllocaInst can only be in Generic or Local address space for "
+ "NVPTX.");
+
+ Instruction *AllocaInLocalAS = allocaInst;
auto ETy = allocaInst->getAllocatedType();
- auto LocalAddrTy = PointerType::get(ETy, ADDRESS_SPACE_LOCAL);
- auto NewASCToLocal = new AddrSpaceCastInst(allocaInst, LocalAddrTy, "");
- auto GenericAddrTy = PointerType::get(ETy, ADDRESS_SPACE_GENERIC);
- auto NewASCToGeneric =
- new AddrSpaceCastInst(NewASCToLocal, GenericAddrTy, "");
- NewASCToLocal->insertAfter(allocaInst);
- NewASCToGeneric->insertAfter(NewASCToLocal);
+
+ // We need to make sure that LLVM has info that alloca needs to go to
+ // ADDRESS_SPACE_LOCAL for InferAddressSpace pass.
+ //
+ // For allocas in ADDRESS_SPACE_LOCAL, we add addrspacecast to
+ // ADDRESS_SPACE_LOCAL and back to ADDRESS_SPACE_GENERIC, so that
+ // the alloca's users still use a generic pointer to operate on.
+ //
+ // For allocas already in ADDRESS_SPACE_LOCAL, we just need
+ // addrspacecast to ADDRESS_SPACE_GENERIC.
+ if (AllocAddrSpace == ADDRESS_SPACE_GENERIC) {
+ auto ASCastToLocalAS = new AddrSpaceCastInst(
+ allocaInst, PointerType::get(ETy, ADDRESS_SPACE_LOCAL), "");
+ ASCastToLocalAS->insertAfter(allocaInst);
+ AllocaInLocalAS = ASCastToLocalAS;
+ }
+
+ auto AllocaInGenericAS = new AddrSpaceCastInst(
+ AllocaInLocalAS, PointerType::get(ETy, ADDRESS_SPACE_GENERIC), "");
+ AllocaInGenericAS->insertAfter(AllocaInLocalAS);
+
for (Use &AllocaUse : llvm::make_early_inc_range(allocaInst->uses())) {
// Check Load, Store, GEP, and BitCast Uses on alloca and make them
// use the converted generic address, in order to expose non-generic
@@ -87,23 +111,23 @@ bool NVPTXLowerAlloca::runOnFunction(Function &F) {
auto LI = dyn_cast<LoadInst>(AllocaUse.getUser());
if (LI && LI->getPointerOperand() == allocaInst &&
!LI->isVolatile()) {
- LI->setOperand(LI->getPointerOperandIndex(), NewASCToGeneric);
+ LI->setOperand(LI->getPointerOperandIndex(), AllocaInGenericAS);
continue;
}
auto SI = dyn_cast<StoreInst>(AllocaUse.getUser());
if (SI && SI->getPointerOperand() == allocaInst &&
!SI->isVolatile()) {
- SI->setOperand(SI->getPointerOperandIndex(), NewASCToGeneric);
+ SI->setOperand(SI->getPointerOperandIndex(), AllocaInGenericAS);
continue;
}
auto GI = dyn_cast<GetElementPtrInst>(AllocaUse.getUser());
if (GI && GI->getPointerOperand() == allocaInst) {
- GI->setOperand(GI->getPointerOperandIndex(), NewASCToGeneric);
+ GI->setOperand(GI->getPointerOperandIndex(), AllocaInGenericAS);
continue;
}
auto BI = dyn_cast<BitCastInst>(AllocaUse.getUser());
if (BI && BI->getOperand(0) == allocaInst) {
- BI->setOperand(0, NewASCToGeneric);
+ BI->setOperand(0, AllocaInGenericAS);
continue;
}
}
diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td
index fa141c3..c802274 100644
--- a/llvm/lib/Target/RISCV/RISCVFeatures.td
+++ b/llvm/lib/Target/RISCV/RISCVFeatures.td
@@ -348,6 +348,11 @@ def FeatureStdExtZfinx
def HasStdExtZfinx : Predicate<"Subtarget->hasStdExtZfinx()">,
AssemblerPredicate<(all_of FeatureStdExtZfinx),
"'Zfinx' (Float in Integer)">;
+def HasStdExtFOrZfinx : Predicate<"Subtarget->hasStdExtFOrZfinx()">,
+ AssemblerPredicate<(any_of FeatureStdExtF,
+ FeatureStdExtZfinx),
+ "'F' (Single-Precision Floating-Point) or "
+ "'Zfinx' (Float in Integer)">;
def FeatureStdExtZdinx
: RISCVExtension<"zdinx", 1, 0,
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
index 6d0952a..fe5623e 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@@ -863,8 +863,6 @@ def DRET : Priv<"dret", 0b0111101>, Sched<[]> {
// Assembler Pseudo Instructions (User-Level ISA, Version 2.2, Chapter 20)
//===----------------------------------------------------------------------===//
-def : InstAlias<"nop", (ADDI X0, X0, 0)>;
-
// Note that the size is 32 because up to 8 32-bit instructions are needed to
// generate an arbitrary 64-bit immediate. However, the size does not really
// matter since PseudoLI is currently only used in the AsmParser where it gets
@@ -890,8 +888,10 @@ def PseudoLD : PseudoLoad<"ld">;
def PseudoSD : PseudoStore<"sd">;
} // Predicates = [IsRV64]
-def : InstAlias<"li $rd, $imm", (ADDI GPR:$rd, X0, simm12:$imm)>;
-def : InstAlias<"mv $rd, $rs", (ADDI GPR:$rd, GPR:$rs, 0)>;
+def : InstAlias<"nop", (ADDI X0, X0, 0), 3>;
+def : InstAlias<"li $rd, $imm", (ADDI GPR:$rd, X0, simm12:$imm), 2>;
+def : InstAlias<"mv $rd, $rs", (ADDI GPR:$rd, GPR:$rs, 0)>;
+
def : InstAlias<"not $rd, $rs", (XORI GPR:$rd, GPR:$rs, -1)>;
def : InstAlias<"neg $rd, $rs", (SUB GPR:$rd, X0, GPR:$rs)>;
@@ -961,14 +961,14 @@ def : InstAlias<"fence", (FENCE 0xF, 0xF)>; // 0xF == iorw
let Predicates = [HasStdExtZihintpause] in
def : InstAlias<"pause", (FENCE 0x1, 0x0)>; // 0x1 == w
-def : InstAlias<"rdinstret $rd", (CSRRS GPR:$rd, INSTRET.Encoding, X0)>;
-def : InstAlias<"rdcycle $rd", (CSRRS GPR:$rd, CYCLE.Encoding, X0)>;
-def : InstAlias<"rdtime $rd", (CSRRS GPR:$rd, TIME.Encoding, X0)>;
+def : InstAlias<"rdinstret $rd", (CSRRS GPR:$rd, INSTRET.Encoding, X0), 2>;
+def : InstAlias<"rdcycle $rd", (CSRRS GPR:$rd, CYCLE.Encoding, X0), 2>;
+def : InstAlias<"rdtime $rd", (CSRRS GPR:$rd, TIME.Encoding, X0), 2>;
let Predicates = [IsRV32] in {
-def : InstAlias<"rdinstreth $rd", (CSRRS GPR:$rd, INSTRETH.Encoding, X0)>;
-def : InstAlias<"rdcycleh $rd", (CSRRS GPR:$rd, CYCLEH.Encoding, X0)>;
-def : InstAlias<"rdtimeh $rd", (CSRRS GPR:$rd, TIMEH.Encoding, X0)>;
+def : InstAlias<"rdinstreth $rd", (CSRRS GPR:$rd, INSTRETH.Encoding, X0), 2>;
+def : InstAlias<"rdcycleh $rd", (CSRRS GPR:$rd, CYCLEH.Encoding, X0), 2>;
+def : InstAlias<"rdtimeh $rd", (CSRRS GPR:$rd, TIMEH.Encoding, X0), 2>;
} // Predicates = [IsRV32]
def : InstAlias<"csrr $rd, $csr", (CSRRS GPR:$rd, csr_sysreg:$csr, X0)>;
@@ -990,13 +990,13 @@ def : InstAlias<"csrrs $rd, $csr, $imm", (CSRRSI GPR:$rd, csr_sysreg:$csr, uimm5
def : InstAlias<"csrrc $rd, $csr, $imm", (CSRRCI GPR:$rd, csr_sysreg:$csr, uimm5:$imm)>;
}
-def : InstAlias<"sfence.vma", (SFENCE_VMA X0, X0)>;
+def : InstAlias<"sfence.vma", (SFENCE_VMA X0, X0), 2>;
def : InstAlias<"sfence.vma $rs", (SFENCE_VMA GPR:$rs, X0)>;
-def : InstAlias<"hfence.gvma", (HFENCE_GVMA X0, X0)>;
+def : InstAlias<"hfence.gvma", (HFENCE_GVMA X0, X0), 2>;
def : InstAlias<"hfence.gvma $rs", (HFENCE_GVMA GPR:$rs, X0)>;
-def : InstAlias<"hfence.vvma", (HFENCE_VVMA X0, X0)>;
+def : InstAlias<"hfence.vvma", (HFENCE_VVMA X0, X0), 2>;
def : InstAlias<"hfence.vvma $rs", (HFENCE_VVMA GPR:$rs, X0)>;
let Predicates = [HasStdExtZihintntl] in {
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td
index 1442bc1..a00acb3 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td
@@ -400,23 +400,10 @@ def FMV_W_X : FPUnaryOp_r<0b1111000, 0b00000, 0b000, FPR32, GPR, "fmv.w.x">,
// Assembler Pseudo Instructions (User-Level ISA, Version 2.2, Chapter 20)
//===----------------------------------------------------------------------===//
-let Predicates = [HasStdExtF] in {
-def : InstAlias<"flw $rd, (${rs1})", (FLW FPR32:$rd, GPR:$rs1, 0), 0>;
-def : InstAlias<"fsw $rs2, (${rs1})", (FSW FPR32:$rs2, GPR:$rs1, 0), 0>;
-
-def : InstAlias<"fmv.s $rd, $rs", (FSGNJ_S FPR32:$rd, FPR32:$rs, FPR32:$rs)>;
-def : InstAlias<"fabs.s $rd, $rs", (FSGNJX_S FPR32:$rd, FPR32:$rs, FPR32:$rs)>;
-def : InstAlias<"fneg.s $rd, $rs", (FSGNJN_S FPR32:$rd, FPR32:$rs, FPR32:$rs)>;
-
-// fgt.s/fge.s are recognised by the GNU assembler but the canonical
-// flt.s/fle.s forms will always be printed. Therefore, set a zero weight.
-def : InstAlias<"fgt.s $rd, $rs, $rt",
- (FLT_S GPR:$rd, FPR32:$rt, FPR32:$rs), 0>;
-def : InstAlias<"fge.s $rd, $rs, $rt",
- (FLE_S GPR:$rd, FPR32:$rt, FPR32:$rs), 0>;
-
+let Predicates = [HasStdExtFOrZfinx] in {
// The following csr instructions actually alias instructions from the base ISA.
-// However, it only makes sense to support them when the F extension is enabled.
+// However, it only makes sense to support them when the F or Zfinx extension is
+// enabled.
// NOTE: "frcsr", "frrm", and "frflags" are more specialized version of "csrr".
def : InstAlias<"frcsr $rd", (CSRRS GPR:$rd, SysRegFCSR.Encoding, X0), 2>;
def : InstAlias<"fscsr $rd, $rs", (CSRRW GPR:$rd, SysRegFCSR.Encoding, GPR:$rs)>;
@@ -439,6 +426,22 @@ def : InstAlias<"fsflags $rd, $rs", (CSRRW GPR:$rd, SysRegFFLAGS.Encoding, GP
def : InstAlias<"fsflags $rs", (CSRRW X0, SysRegFFLAGS.Encoding, GPR:$rs), 2>;
def : InstAlias<"fsflagsi $rd, $imm", (CSRRWI GPR:$rd, SysRegFFLAGS.Encoding, uimm5:$imm)>;
def : InstAlias<"fsflagsi $imm", (CSRRWI X0, SysRegFFLAGS.Encoding, uimm5:$imm), 2>;
+} // Predicates = [HasStdExtFOrZfinx]
+
+let Predicates = [HasStdExtF] in {
+def : InstAlias<"flw $rd, (${rs1})", (FLW FPR32:$rd, GPR:$rs1, 0), 0>;
+def : InstAlias<"fsw $rs2, (${rs1})", (FSW FPR32:$rs2, GPR:$rs1, 0), 0>;
+
+def : InstAlias<"fmv.s $rd, $rs", (FSGNJ_S FPR32:$rd, FPR32:$rs, FPR32:$rs)>;
+def : InstAlias<"fabs.s $rd, $rs", (FSGNJX_S FPR32:$rd, FPR32:$rs, FPR32:$rs)>;
+def : InstAlias<"fneg.s $rd, $rs", (FSGNJN_S FPR32:$rd, FPR32:$rs, FPR32:$rs)>;
+
+// fgt.s/fge.s are recognised by the GNU assembler but the canonical
+// flt.s/fle.s forms will always be printed. Therefore, set a zero weight.
+def : InstAlias<"fgt.s $rd, $rs, $rt",
+ (FLT_S GPR:$rd, FPR32:$rt, FPR32:$rs), 0>;
+def : InstAlias<"fge.s $rd, $rs, $rt",
+ (FLE_S GPR:$rd, FPR32:$rt, FPR32:$rs), 0>;
// fmv.w.x and fmv.x.w were previously known as fmv.s.x and fmv.x.s. Both
// spellings should be supported by standard tools.
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td
index 3bd6da2..b586b10 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td
@@ -203,9 +203,9 @@ let Predicates = [HasVendorXCValu, IsRV32],
// General ALU Operations
def CV_ABS : CVInstAluR<0b0101000, 0b011, "cv.abs">,
Sched<[]>;
- def CV_SLET : CVInstAluRR<0b0101001, 0b011, "cv.slet">,
+ def CV_SLE : CVInstAluRR<0b0101001, 0b011, "cv.sle">,
Sched<[]>;
- def CV_SLETU : CVInstAluRR<0b0101010, 0b011, "cv.sletu">,
+ def CV_SLEU : CVInstAluRR<0b0101010, 0b011, "cv.sleu">,
Sched<[]>;
def CV_MIN : CVInstAluRR<0b0101011, 0b011, "cv.min">,
Sched<[]>;
@@ -276,6 +276,10 @@ let Predicates = [HasVendorXCValu, IsRV32],
// hasSideEffects = 0, mayLoad = 0, mayStore = 0,
// Constraints = "$rd = $rd_wb"
+let Predicates = [HasVendorXCValu, IsRV32] in {
+ def : MnemonicAlias<"cv.slet", "cv.sle">;
+ def : MnemonicAlias<"cv.sletu", "cv.sleu">;
+}
class CVInstSIMDRR<bits<5> funct5, bit F, bit funct1, bits<3> funct3,
RISCVOpcode opcode, dag outs,
@@ -778,8 +782,8 @@ multiclass PatCoreVAluGprGprImm<Intrinsic intr> {
let Predicates = [HasVendorXCValu, IsRV32], AddedComplexity = 1 in {
def : PatGpr<abs, CV_ABS>;
- def : PatGprGpr<setle, CV_SLET>;
- def : PatGprGpr<setule, CV_SLETU>;
+ def : PatGprGpr<setle, CV_SLE>;
+ def : PatGprGpr<setule, CV_SLEU>;
def : PatGprGpr<smin, CV_MIN>;
def : PatGprGpr<umin, CV_MINU>;
def : PatGprGpr<smax, CV_MAX>;
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
index 831d7f7..b526c9f 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
@@ -263,6 +263,9 @@ private:
bool selectSpvThreadId(Register ResVReg, const SPIRVType *ResType,
MachineInstr &I) const;
+ bool selectStep(Register ResVReg, const SPIRVType *ResType,
+ MachineInstr &I) const;
+
bool selectUnmergeValues(MachineInstr &I) const;
Register buildI32Constant(uint32_t Val, MachineInstr &I,
@@ -1710,6 +1713,25 @@ bool SPIRVInstructionSelector::selectSign(Register ResVReg,
return Result;
}
+bool SPIRVInstructionSelector::selectStep(Register ResVReg,
+ const SPIRVType *ResType,
+ MachineInstr &I) const {
+
+ assert(I.getNumOperands() == 4);
+ assert(I.getOperand(2).isReg());
+ assert(I.getOperand(3).isReg());
+ MachineBasicBlock &BB = *I.getParent();
+
+ return BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpExtInst))
+ .addDef(ResVReg)
+ .addUse(GR.getSPIRVTypeID(ResType))
+ .addImm(static_cast<uint32_t>(SPIRV::InstructionSet::GLSL_std_450))
+ .addImm(GL::Step)
+ .addUse(I.getOperand(2).getReg())
+ .addUse(I.getOperand(3).getReg())
+ .constrainAllUses(TII, TRI, RBI);
+}
+
bool SPIRVInstructionSelector::selectBitreverse(Register ResVReg,
const SPIRVType *ResType,
MachineInstr &I) const {
@@ -2468,6 +2490,8 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg,
.addUse(GR.getSPIRVTypeID(ResType))
.addUse(GR.getOrCreateConstInt(3, I, IntTy, TII));
}
+ case Intrinsic::spv_step:
+ return selectStep(ResVReg, ResType, I);
default: {
std::string DiagMsg;
raw_string_ostream OS(DiagMsg);
diff --git a/llvm/lib/Target/X86/X86ScheduleZnver4.td b/llvm/lib/Target/X86/X86ScheduleZnver4.td
index 420c429..6181ee8 100644
--- a/llvm/lib/Target/X86/X86ScheduleZnver4.td
+++ b/llvm/lib/Target/X86/X86ScheduleZnver4.td
@@ -1855,6 +1855,14 @@ def Zn4WriteVZeroIdiomLogicX : SchedWriteVariant<[
// NOTE: PXORrr,PANDNrr are not zero-cycle!
def : InstRW<[Zn4WriteVZeroIdiomLogicX], (instrs VPXORrr, VPANDNrr)>;
+// TODO: This should be extended to incorporate all of the AVX512 zeroing
+// idioms that can be executed by the renamer.
+def Zn4WriteVZeroIdiomLogicZ : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [Zn4WriteZeroLatency]>,
+ SchedVar<NoSchedPred, [WriteVecLogicZ]>
+]>;
+def : InstRW<[Zn4WriteVZeroIdiomLogicZ], (instrs VPXORDZrr)>;
+
def Zn4WriteVZeroIdiomLogicY : SchedWriteVariant<[
SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [Zn4WriteZeroLatency]>,
SchedVar<NoSchedPred, [WriteVecLogicY]>
@@ -1930,6 +1938,12 @@ def : IsZeroIdiomFunction<[
VPSUBUSBYrr, VPSUBUSWYrr,
VPCMPGTBYrr, VPCMPGTWYrr, VPCMPGTDYrr, VPCMPGTQYrr
], ZeroIdiomPredicate>,
+
+ // AVX ZMM Zero-idioms.
+ // TODO: This should be expanded to incorporate all AVX512 zeroing idioms.
+ DepBreakingClass<[
+ VPXORDZrr
+ ], ZeroIdiomPredicate>,
]>;
def : IsDepBreakingFunction<[
diff --git a/llvm/lib/Transforms/Coroutines/CMakeLists.txt b/llvm/lib/Transforms/Coroutines/CMakeLists.txt
index c650817..46ef5cd 100644
--- a/llvm/lib/Transforms/Coroutines/CMakeLists.txt
+++ b/llvm/lib/Transforms/Coroutines/CMakeLists.txt
@@ -9,6 +9,7 @@ add_llvm_component_library(LLVMCoroutines
CoroSplit.cpp
SuspendCrossingInfo.cpp
SpillUtils.cpp
+ MaterializationUtils.cpp
ADDITIONAL_HEADER_DIRS
${LLVM_MAIN_INCLUDE_DIR}/llvm/Transforms/Coroutines
diff --git a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
index 8ee4bfa..c08f56b 100644
--- a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
@@ -16,10 +16,10 @@
//===----------------------------------------------------------------------===//
#include "CoroInternal.h"
+#include "MaterializationUtils.h"
#include "SpillUtils.h"
#include "SuspendCrossingInfo.h"
#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/Analysis/StackLifetime.h"
@@ -36,135 +36,12 @@
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/PromoteMemToReg.h"
#include <algorithm>
-#include <deque>
#include <optional>
using namespace llvm;
extern cl::opt<bool> UseNewDbgInfoFormat;
-// The "coro-suspend-crossing" flag is very noisy. There is another debug type,
-// "coro-frame", which results in leaner debug spew.
-#define DEBUG_TYPE "coro-suspend-crossing"
-
-namespace {
-
-// RematGraph is used to construct a DAG for rematerializable instructions
-// When the constructor is invoked with a candidate instruction (which is
-// materializable) it builds a DAG of materializable instructions from that
-// point.
-// Typically, for each instruction identified as re-materializable across a
-// suspend point, a RematGraph will be created.
-struct RematGraph {
- // Each RematNode in the graph contains the edges to instructions providing
- // operands in the current node.
- struct RematNode {
- Instruction *Node;
- SmallVector<RematNode *> Operands;
- RematNode() = default;
- RematNode(Instruction *V) : Node(V) {}
- };
-
- RematNode *EntryNode;
- using RematNodeMap =
- SmallMapVector<Instruction *, std::unique_ptr<RematNode>, 8>;
- RematNodeMap Remats;
- const std::function<bool(Instruction &)> &MaterializableCallback;
- SuspendCrossingInfo &Checker;
-
- RematGraph(const std::function<bool(Instruction &)> &MaterializableCallback,
- Instruction *I, SuspendCrossingInfo &Checker)
- : MaterializableCallback(MaterializableCallback), Checker(Checker) {
- std::unique_ptr<RematNode> FirstNode = std::make_unique<RematNode>(I);
- EntryNode = FirstNode.get();
- std::deque<std::unique_ptr<RematNode>> WorkList;
- addNode(std::move(FirstNode), WorkList, cast<User>(I));
- while (WorkList.size()) {
- std::unique_ptr<RematNode> N = std::move(WorkList.front());
- WorkList.pop_front();
- addNode(std::move(N), WorkList, cast<User>(I));
- }
- }
-
- void addNode(std::unique_ptr<RematNode> NUPtr,
- std::deque<std::unique_ptr<RematNode>> &WorkList,
- User *FirstUse) {
- RematNode *N = NUPtr.get();
- if (Remats.count(N->Node))
- return;
-
- // We haven't see this node yet - add to the list
- Remats[N->Node] = std::move(NUPtr);
- for (auto &Def : N->Node->operands()) {
- Instruction *D = dyn_cast<Instruction>(Def.get());
- if (!D || !MaterializableCallback(*D) ||
- !Checker.isDefinitionAcrossSuspend(*D, FirstUse))
- continue;
-
- if (Remats.count(D)) {
- // Already have this in the graph
- N->Operands.push_back(Remats[D].get());
- continue;
- }
-
- bool NoMatch = true;
- for (auto &I : WorkList) {
- if (I->Node == D) {
- NoMatch = false;
- N->Operands.push_back(I.get());
- break;
- }
- }
- if (NoMatch) {
- // Create a new node
- std::unique_ptr<RematNode> ChildNode = std::make_unique<RematNode>(D);
- N->Operands.push_back(ChildNode.get());
- WorkList.push_back(std::move(ChildNode));
- }
- }
- }
-
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
- static std::string getBasicBlockLabel(const BasicBlock *BB) {
- if (BB->hasName())
- return BB->getName().str();
-
- std::string S;
- raw_string_ostream OS(S);
- BB->printAsOperand(OS, false);
- return OS.str().substr(1);
- }
-
- void dump() const {
- dbgs() << "Entry (";
- dbgs() << getBasicBlockLabel(EntryNode->Node->getParent());
- dbgs() << ") : " << *EntryNode->Node << "\n";
- for (auto &E : Remats) {
- dbgs() << *(E.first) << "\n";
- for (RematNode *U : E.second->Operands)
- dbgs() << " " << *U->Node << "\n";
- }
- }
-#endif
-};
-} // end anonymous namespace
-
-namespace llvm {
-
-template <> struct GraphTraits<RematGraph *> {
- using NodeRef = RematGraph::RematNode *;
- using ChildIteratorType = RematGraph::RematNode **;
-
- static NodeRef getEntryNode(RematGraph *G) { return G->EntryNode; }
- static ChildIteratorType child_begin(NodeRef N) {
- return N->Operands.begin();
- }
- static ChildIteratorType child_end(NodeRef N) { return N->Operands.end(); }
-};
-
-} // end namespace llvm
-
-#undef DEBUG_TYPE // "coro-suspend-crossing"
#define DEBUG_TYPE "coro-frame"
namespace {
@@ -268,15 +145,6 @@ static void dumpSpills(StringRef Title, const coro::SpillInfo &Spills) {
I->dump();
}
}
-static void dumpRemats(
- StringRef Title,
- const SmallMapVector<Instruction *, std::unique_ptr<RematGraph>, 8> &RM) {
- dbgs() << "------------- " << Title << "--------------\n";
- for (const auto &E : RM) {
- E.second->dump();
- dbgs() << "--\n";
- }
-}
static void dumpAllocas(const SmallVectorImpl<coro::AllocaInfo> &Allocas) {
dbgs() << "------------- Allocas --------------\n";
@@ -1284,7 +1152,7 @@ static void insertSpills(const FrameDataInfo &FrameData, coro::Shape &Shape) {
// If we have a single edge PHINode, remove it and replace it with a
// reload from the coroutine frame. (We already took care of multi edge
- // PHINodes by rewriting them in the rewritePHIs function).
+ // PHINodes by normalizing them in the rewritePHIs function).
if (auto *PN = dyn_cast<PHINode>(U)) {
assert(PN->getNumIncomingValues() == 1 &&
"unexpected number of incoming "
@@ -1634,93 +1502,6 @@ static void rewritePHIs(Function &F) {
rewritePHIs(*BB);
}
-/// Default materializable callback
-// Check for instructions that we can recreate on resume as opposed to spill
-// the result into a coroutine frame.
-bool coro::defaultMaterializable(Instruction &V) {
- return (isa<CastInst>(&V) || isa<GetElementPtrInst>(&V) ||
- isa<BinaryOperator>(&V) || isa<CmpInst>(&V) || isa<SelectInst>(&V));
-}
-
-// For each instruction identified as materializable across the suspend point,
-// and its associated DAG of other rematerializable instructions,
-// recreate the DAG of instructions after the suspend point.
-static void rewriteMaterializableInstructions(
- const SmallMapVector<Instruction *, std::unique_ptr<RematGraph>, 8>
- &AllRemats) {
- // This has to be done in 2 phases
- // Do the remats and record the required defs to be replaced in the
- // original use instructions
- // Once all the remats are complete, replace the uses in the final
- // instructions with the new defs
- typedef struct {
- Instruction *Use;
- Instruction *Def;
- Instruction *Remat;
- } ProcessNode;
-
- SmallVector<ProcessNode> FinalInstructionsToProcess;
-
- for (const auto &E : AllRemats) {
- Instruction *Use = E.first;
- Instruction *CurrentMaterialization = nullptr;
- RematGraph *RG = E.second.get();
- ReversePostOrderTraversal<RematGraph *> RPOT(RG);
- SmallVector<Instruction *> InstructionsToProcess;
-
- // If the target use is actually a suspend instruction then we have to
- // insert the remats into the end of the predecessor (there should only be
- // one). This is so that suspend blocks always have the suspend instruction
- // as the first instruction.
- auto InsertPoint = &*Use->getParent()->getFirstInsertionPt();
- if (isa<AnyCoroSuspendInst>(Use)) {
- BasicBlock *SuspendPredecessorBlock =
- Use->getParent()->getSinglePredecessor();
- assert(SuspendPredecessorBlock && "malformed coro suspend instruction");
- InsertPoint = SuspendPredecessorBlock->getTerminator();
- }
-
- // Note: skip the first instruction as this is the actual use that we're
- // rematerializing everything for.
- auto I = RPOT.begin();
- ++I;
- for (; I != RPOT.end(); ++I) {
- Instruction *D = (*I)->Node;
- CurrentMaterialization = D->clone();
- CurrentMaterialization->setName(D->getName());
- CurrentMaterialization->insertBefore(InsertPoint);
- InsertPoint = CurrentMaterialization;
-
- // Replace all uses of Def in the instructions being added as part of this
- // rematerialization group
- for (auto &I : InstructionsToProcess)
- I->replaceUsesOfWith(D, CurrentMaterialization);
-
- // Don't replace the final use at this point as this can cause problems
- // for other materializations. Instead, for any final use that uses a
- // define that's being rematerialized, record the replace values
- for (unsigned i = 0, E = Use->getNumOperands(); i != E; ++i)
- if (Use->getOperand(i) == D) // Is this operand pointing to oldval?
- FinalInstructionsToProcess.push_back(
- {Use, D, CurrentMaterialization});
-
- InstructionsToProcess.push_back(CurrentMaterialization);
- }
- }
-
- // Finally, replace the uses with the defines that we've just rematerialized
- for (auto &R : FinalInstructionsToProcess) {
- if (auto *PN = dyn_cast<PHINode>(R.Use)) {
- assert(PN->getNumIncomingValues() == 1 && "unexpected number of incoming "
- "values in the PHINode");
- PN->replaceAllUsesWith(R.Remat);
- PN->eraseFromParent();
- continue;
- }
- R.Use->replaceUsesOfWith(R.Def, R.Remat);
- }
-}
-
// Splits the block at a particular instruction unless it is the first
// instruction in the block with a single predecessor.
static BasicBlock *splitBlockIfNotFirst(Instruction *I, const Twine &Name) {
@@ -1741,10 +1522,6 @@ static void splitAround(Instruction *I, const Twine &Name) {
splitBlockIfNotFirst(I->getNextNode(), "After" + Name);
}
-static bool isSuspendBlock(BasicBlock *BB) {
- return isa<AnyCoroSuspendInst>(BB->front());
-}
-
/// After we split the coroutine, will the given basic block be along
/// an obvious exit path for the resumption function?
static bool willLeaveFunctionImmediatelyAfter(BasicBlock *BB,
@@ -1754,7 +1531,7 @@ static bool willLeaveFunctionImmediatelyAfter(BasicBlock *BB,
if (depth == 0) return false;
// If this is a suspend block, we're about to exit the resumption function.
- if (isSuspendBlock(BB))
+ if (coro::isSuspendBlock(BB))
return true;
// Recurse into the successors.
@@ -1995,7 +1772,8 @@ static void sinkLifetimeStartMarkers(Function &F, coro::Shape &Shape,
DomSet.insert(&F.getEntryBlock());
for (auto *CSI : Shape.CoroSuspends) {
BasicBlock *SuspendBlock = CSI->getParent();
- assert(isSuspendBlock(SuspendBlock) && SuspendBlock->getSingleSuccessor() &&
+ assert(coro::isSuspendBlock(SuspendBlock) &&
+ SuspendBlock->getSingleSuccessor() &&
"should have split coro.suspend into its own block");
DomSet.insert(SuspendBlock->getSingleSuccessor());
}
@@ -2227,68 +2005,6 @@ void coro::salvageDebugInfo(
}
}
-static void doRematerializations(
- Function &F, SuspendCrossingInfo &Checker,
- const std::function<bool(Instruction &)> &MaterializableCallback) {
- if (F.hasOptNone())
- return;
-
- coro::SpillInfo Spills;
-
- // See if there are materializable instructions across suspend points
- // We record these as the starting point to also identify materializable
- // defs of uses in these operations
- for (Instruction &I : instructions(F)) {
- if (!MaterializableCallback(I))
- continue;
- for (User *U : I.users())
- if (Checker.isDefinitionAcrossSuspend(I, U))
- Spills[&I].push_back(cast<Instruction>(U));
- }
-
- // Process each of the identified rematerializable instructions
- // and add predecessor instructions that can also be rematerialized.
- // This is actually a graph of instructions since we could potentially
- // have multiple uses of a def in the set of predecessor instructions.
- // The approach here is to maintain a graph of instructions for each bottom
- // level instruction - where we have a unique set of instructions (nodes)
- // and edges between them. We then walk the graph in reverse post-dominator
- // order to insert them past the suspend point, but ensure that ordering is
- // correct. We also rely on CSE removing duplicate defs for remats of
- // different instructions with a def in common (rather than maintaining more
- // complex graphs for each suspend point)
-
- // We can do this by adding new nodes to the list for each suspend
- // point. Then using standard GraphTraits to give a reverse post-order
- // traversal when we insert the nodes after the suspend
- SmallMapVector<Instruction *, std::unique_ptr<RematGraph>, 8> AllRemats;
- for (auto &E : Spills) {
- for (Instruction *U : E.second) {
- // Don't process a user twice (this can happen if the instruction uses
- // more than one rematerializable def)
- if (AllRemats.count(U))
- continue;
-
- // Constructor creates the whole RematGraph for the given Use
- auto RematUPtr =
- std::make_unique<RematGraph>(MaterializableCallback, U, Checker);
-
- LLVM_DEBUG(dbgs() << "***** Next remat group *****\n";
- ReversePostOrderTraversal<RematGraph *> RPOT(RematUPtr.get());
- for (auto I = RPOT.begin(); I != RPOT.end();
- ++I) { (*I)->Node->dump(); } dbgs()
- << "\n";);
-
- AllRemats[U] = std::move(RematUPtr);
- }
- }
-
- // Rewrite materializable instructions to be materialized at the use
- // point.
- LLVM_DEBUG(dumpRemats("Materializations", AllRemats));
- rewriteMaterializableInstructions(AllRemats);
-}
-
void coro::normalizeCoroutine(Function &F, coro::Shape &Shape,
TargetTransformInfo &TTI) {
// Don't eliminate swifterror in async functions that won't be split.
@@ -2324,8 +2040,8 @@ void coro::normalizeCoroutine(Function &F, coro::Shape &Shape,
IRBuilder<> Builder(AsyncEnd);
SmallVector<Value *, 8> Args(AsyncEnd->args());
auto Arguments = ArrayRef<Value *>(Args).drop_front(3);
- auto *Call = createMustTailCall(AsyncEnd->getDebugLoc(), MustTailCallFn,
- TTI, Arguments, Builder);
+ auto *Call = coro::createMustTailCall(
+ AsyncEnd->getDebugLoc(), MustTailCallFn, TTI, Arguments, Builder);
splitAround(Call, "MustTailCall.Before.CoroEnd");
}
}
diff --git a/llvm/lib/Transforms/Coroutines/CoroInternal.h b/llvm/lib/Transforms/Coroutines/CoroInternal.h
index 698c21a..891798f 100644
--- a/llvm/lib/Transforms/Coroutines/CoroInternal.h
+++ b/llvm/lib/Transforms/Coroutines/CoroInternal.h
@@ -21,6 +21,7 @@ class CallGraph;
namespace coro {
+bool isSuspendBlock(BasicBlock *BB);
bool declaresAnyIntrinsic(const Module &M);
bool declaresIntrinsics(const Module &M,
const std::initializer_list<StringRef>);
diff --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
index dc3829d..8ea460b 100644
--- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
@@ -2080,12 +2080,13 @@ splitCoroutine(Function &F, SmallVectorImpl<Function *> &Clones,
return Shape;
}
-static void updateCallGraphAfterCoroutineSplit(
+static LazyCallGraph::SCC &updateCallGraphAfterCoroutineSplit(
LazyCallGraph::Node &N, const coro::Shape &Shape,
const SmallVectorImpl<Function *> &Clones, LazyCallGraph::SCC &C,
LazyCallGraph &CG, CGSCCAnalysisManager &AM, CGSCCUpdateResult &UR,
FunctionAnalysisManager &FAM) {
+ auto *CurrentSCC = &C;
if (!Clones.empty()) {
switch (Shape.ABI) {
case coro::ABI::Switch:
@@ -2105,13 +2106,16 @@ static void updateCallGraphAfterCoroutineSplit(
}
// Let the CGSCC infra handle the changes to the original function.
- updateCGAndAnalysisManagerForCGSCCPass(CG, C, N, AM, UR, FAM);
+ CurrentSCC = &updateCGAndAnalysisManagerForCGSCCPass(CG, *CurrentSCC, N, AM,
+ UR, FAM);
}
// Do some cleanup and let the CGSCC infra see if we've cleaned up any edges
// to the split functions.
postSplitCleanup(N.getFunction());
- updateCGAndAnalysisManagerForFunctionPass(CG, C, N, AM, UR, FAM);
+ CurrentSCC = &updateCGAndAnalysisManagerForFunctionPass(CG, *CurrentSCC, N,
+ AM, UR, FAM);
+ return *CurrentSCC;
}
/// Replace a call to llvm.coro.prepare.retcon.
@@ -2200,6 +2204,7 @@ PreservedAnalyses CoroSplitPass::run(LazyCallGraph::SCC &C,
if (Coroutines.empty() && PrepareFns.empty())
return PreservedAnalyses::all();
+ auto *CurrentSCC = &C;
// Split all the coroutines.
for (LazyCallGraph::Node *N : Coroutines) {
Function &F = N->getFunction();
@@ -2211,7 +2216,8 @@ PreservedAnalyses CoroSplitPass::run(LazyCallGraph::SCC &C,
coro::Shape Shape =
splitCoroutine(F, Clones, FAM.getResult<TargetIRAnalysis>(F),
OptimizeFrame, MaterializableCallback);
- updateCallGraphAfterCoroutineSplit(*N, Shape, Clones, C, CG, AM, UR, FAM);
+ CurrentSCC = &updateCallGraphAfterCoroutineSplit(
+ *N, Shape, Clones, *CurrentSCC, CG, AM, UR, FAM);
auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
ORE.emit([&]() {
@@ -2223,14 +2229,14 @@ PreservedAnalyses CoroSplitPass::run(LazyCallGraph::SCC &C,
if (!Shape.CoroSuspends.empty()) {
// Run the CGSCC pipeline on the original and newly split functions.
- UR.CWorklist.insert(&C);
+ UR.CWorklist.insert(CurrentSCC);
for (Function *Clone : Clones)
UR.CWorklist.insert(CG.lookupSCC(CG.get(*Clone)));
}
}
for (auto *PrepareFn : PrepareFns) {
- replaceAllPrepares(PrepareFn, CG, C);
+ replaceAllPrepares(PrepareFn, CG, *CurrentSCC);
}
return PreservedAnalyses::none();
diff --git a/llvm/lib/Transforms/Coroutines/Coroutines.cpp b/llvm/lib/Transforms/Coroutines/Coroutines.cpp
index be25733..cdc442b 100644
--- a/llvm/lib/Transforms/Coroutines/Coroutines.cpp
+++ b/llvm/lib/Transforms/Coroutines/Coroutines.cpp
@@ -100,6 +100,10 @@ static bool isCoroutineIntrinsicName(StringRef Name) {
}
#endif
+bool coro::isSuspendBlock(BasicBlock *BB) {
+ return isa<AnyCoroSuspendInst>(BB->front());
+}
+
bool coro::declaresAnyIntrinsic(const Module &M) {
for (StringRef Name : CoroIntrinsics) {
assert(isCoroutineIntrinsicName(Name) && "not a coroutine intrinsic");
diff --git a/llvm/lib/Transforms/Coroutines/MaterializationUtils.cpp b/llvm/lib/Transforms/Coroutines/MaterializationUtils.cpp
new file mode 100644
index 0000000..708e873
--- /dev/null
+++ b/llvm/lib/Transforms/Coroutines/MaterializationUtils.cpp
@@ -0,0 +1,308 @@
+//===- MaterializationUtils.cpp - Builds and manipulates coroutine frame
+//-------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// This file contains classes used to materialize insts after suspends points.
+//===----------------------------------------------------------------------===//
+
+#include "MaterializationUtils.h"
+#include "SpillUtils.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instruction.h"
+#include <deque>
+
+using namespace llvm;
+
+using namespace coro;
+
+// The "coro-suspend-crossing" flag is very noisy. There is another debug type,
+// "coro-frame", which results in leaner debug spew.
+#define DEBUG_TYPE "coro-suspend-crossing"
+
+namespace {
+
+// RematGraph is used to construct a DAG for rematerializable instructions
+// When the constructor is invoked with a candidate instruction (which is
+// materializable) it builds a DAG of materializable instructions from that
+// point.
+// Typically, for each instruction identified as re-materializable across a
+// suspend point, a RematGraph will be created.
+struct RematGraph {
+ // Each RematNode in the graph contains the edges to instructions providing
+ // operands in the current node.
+ struct RematNode {
+ Instruction *Node;
+ SmallVector<RematNode *> Operands;
+ RematNode() = default;
+ RematNode(Instruction *V) : Node(V) {}
+ };
+
+ RematNode *EntryNode;
+ using RematNodeMap =
+ SmallMapVector<Instruction *, std::unique_ptr<RematNode>, 8>;
+ RematNodeMap Remats;
+ const std::function<bool(Instruction &)> &MaterializableCallback;
+ SuspendCrossingInfo &Checker;
+
+ RematGraph(const std::function<bool(Instruction &)> &MaterializableCallback,
+ Instruction *I, SuspendCrossingInfo &Checker)
+ : MaterializableCallback(MaterializableCallback), Checker(Checker) {
+ std::unique_ptr<RematNode> FirstNode = std::make_unique<RematNode>(I);
+ EntryNode = FirstNode.get();
+ std::deque<std::unique_ptr<RematNode>> WorkList;
+ addNode(std::move(FirstNode), WorkList, cast<User>(I));
+ while (WorkList.size()) {
+ std::unique_ptr<RematNode> N = std::move(WorkList.front());
+ WorkList.pop_front();
+ addNode(std::move(N), WorkList, cast<User>(I));
+ }
+ }
+
+ void addNode(std::unique_ptr<RematNode> NUPtr,
+ std::deque<std::unique_ptr<RematNode>> &WorkList,
+ User *FirstUse) {
+ RematNode *N = NUPtr.get();
+ if (Remats.count(N->Node))
+ return;
+
+ // We haven't see this node yet - add to the list
+ Remats[N->Node] = std::move(NUPtr);
+ for (auto &Def : N->Node->operands()) {
+ Instruction *D = dyn_cast<Instruction>(Def.get());
+ if (!D || !MaterializableCallback(*D) ||
+ !Checker.isDefinitionAcrossSuspend(*D, FirstUse))
+ continue;
+
+ if (Remats.count(D)) {
+ // Already have this in the graph
+ N->Operands.push_back(Remats[D].get());
+ continue;
+ }
+
+ bool NoMatch = true;
+ for (auto &I : WorkList) {
+ if (I->Node == D) {
+ NoMatch = false;
+ N->Operands.push_back(I.get());
+ break;
+ }
+ }
+ if (NoMatch) {
+ // Create a new node
+ std::unique_ptr<RematNode> ChildNode = std::make_unique<RematNode>(D);
+ N->Operands.push_back(ChildNode.get());
+ WorkList.push_back(std::move(ChildNode));
+ }
+ }
+ }
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ static std::string getBasicBlockLabel(const BasicBlock *BB) {
+ if (BB->hasName())
+ return BB->getName().str();
+
+ std::string S;
+ raw_string_ostream OS(S);
+ BB->printAsOperand(OS, false);
+ return OS.str().substr(1);
+ }
+
+ void dump() const {
+ dbgs() << "Entry (";
+ dbgs() << getBasicBlockLabel(EntryNode->Node->getParent());
+ dbgs() << ") : " << *EntryNode->Node << "\n";
+ for (auto &E : Remats) {
+ dbgs() << *(E.first) << "\n";
+ for (RematNode *U : E.second->Operands)
+ dbgs() << " " << *U->Node << "\n";
+ }
+ }
+#endif
+};
+
+} // namespace
+
+namespace llvm {
+template <> struct GraphTraits<RematGraph *> {
+ using NodeRef = RematGraph::RematNode *;
+ using ChildIteratorType = RematGraph::RematNode **;
+
+ static NodeRef getEntryNode(RematGraph *G) { return G->EntryNode; }
+ static ChildIteratorType child_begin(NodeRef N) {
+ return N->Operands.begin();
+ }
+ static ChildIteratorType child_end(NodeRef N) { return N->Operands.end(); }
+};
+
+} // end namespace llvm
+
+// For each instruction identified as materializable across the suspend point,
+// and its associated DAG of other rematerializable instructions,
+// recreate the DAG of instructions after the suspend point.
+static void rewriteMaterializableInstructions(
+ const SmallMapVector<Instruction *, std::unique_ptr<RematGraph>, 8>
+ &AllRemats) {
+ // This has to be done in 2 phases
+ // Do the remats and record the required defs to be replaced in the
+ // original use instructions
+ // Once all the remats are complete, replace the uses in the final
+ // instructions with the new defs
+ typedef struct {
+ Instruction *Use;
+ Instruction *Def;
+ Instruction *Remat;
+ } ProcessNode;
+
+ SmallVector<ProcessNode> FinalInstructionsToProcess;
+
+ for (const auto &E : AllRemats) {
+ Instruction *Use = E.first;
+ Instruction *CurrentMaterialization = nullptr;
+ RematGraph *RG = E.second.get();
+ ReversePostOrderTraversal<RematGraph *> RPOT(RG);
+ SmallVector<Instruction *> InstructionsToProcess;
+
+ // If the target use is actually a suspend instruction then we have to
+ // insert the remats into the end of the predecessor (there should only be
+ // one). This is so that suspend blocks always have the suspend instruction
+ // as the first instruction.
+ auto InsertPoint = &*Use->getParent()->getFirstInsertionPt();
+ if (isa<AnyCoroSuspendInst>(Use)) {
+ BasicBlock *SuspendPredecessorBlock =
+ Use->getParent()->getSinglePredecessor();
+ assert(SuspendPredecessorBlock && "malformed coro suspend instruction");
+ InsertPoint = SuspendPredecessorBlock->getTerminator();
+ }
+
+ // Note: skip the first instruction as this is the actual use that we're
+ // rematerializing everything for.
+ auto I = RPOT.begin();
+ ++I;
+ for (; I != RPOT.end(); ++I) {
+ Instruction *D = (*I)->Node;
+ CurrentMaterialization = D->clone();
+ CurrentMaterialization->setName(D->getName());
+ CurrentMaterialization->insertBefore(InsertPoint);
+ InsertPoint = CurrentMaterialization;
+
+ // Replace all uses of Def in the instructions being added as part of this
+ // rematerialization group
+ for (auto &I : InstructionsToProcess)
+ I->replaceUsesOfWith(D, CurrentMaterialization);
+
+ // Don't replace the final use at this point as this can cause problems
+ // for other materializations. Instead, for any final use that uses a
+ // define that's being rematerialized, record the replace values
+ for (unsigned i = 0, E = Use->getNumOperands(); i != E; ++i)
+ if (Use->getOperand(i) == D) // Is this operand pointing to oldval?
+ FinalInstructionsToProcess.push_back(
+ {Use, D, CurrentMaterialization});
+
+ InstructionsToProcess.push_back(CurrentMaterialization);
+ }
+ }
+
+ // Finally, replace the uses with the defines that we've just rematerialized
+ for (auto &R : FinalInstructionsToProcess) {
+ if (auto *PN = dyn_cast<PHINode>(R.Use)) {
+ assert(PN->getNumIncomingValues() == 1 && "unexpected number of incoming "
+ "values in the PHINode");
+ PN->replaceAllUsesWith(R.Remat);
+ PN->eraseFromParent();
+ continue;
+ }
+ R.Use->replaceUsesOfWith(R.Def, R.Remat);
+ }
+}
+
+/// Default materializable callback
+// Check for instructions that we can recreate on resume as opposed to spill
+// the result into a coroutine frame.
+bool llvm::coro::defaultMaterializable(Instruction &V) {
+ return (isa<CastInst>(&V) || isa<GetElementPtrInst>(&V) ||
+ isa<BinaryOperator>(&V) || isa<CmpInst>(&V) || isa<SelectInst>(&V));
+}
+
+bool llvm::coro::isTriviallyMaterializable(Instruction &V) {
+ return defaultMaterializable(V);
+}
+
+#ifndef NDEBUG
+static void dumpRemats(
+ StringRef Title,
+ const SmallMapVector<Instruction *, std::unique_ptr<RematGraph>, 8> &RM) {
+ dbgs() << "------------- " << Title << "--------------\n";
+ for (const auto &E : RM) {
+ E.second->dump();
+ dbgs() << "--\n";
+ }
+}
+#endif
+
+void coro::doRematerializations(
+ Function &F, SuspendCrossingInfo &Checker,
+ std::function<bool(Instruction &)> IsMaterializable) {
+ if (F.hasOptNone())
+ return;
+
+ coro::SpillInfo Spills;
+
+ // See if there are materializable instructions across suspend points
+ // We record these as the starting point to also identify materializable
+ // defs of uses in these operations
+ for (Instruction &I : instructions(F)) {
+ if (!IsMaterializable(I))
+ continue;
+ for (User *U : I.users())
+ if (Checker.isDefinitionAcrossSuspend(I, U))
+ Spills[&I].push_back(cast<Instruction>(U));
+ }
+
+ // Process each of the identified rematerializable instructions
+ // and add predecessor instructions that can also be rematerialized.
+ // This is actually a graph of instructions since we could potentially
+ // have multiple uses of a def in the set of predecessor instructions.
+ // The approach here is to maintain a graph of instructions for each bottom
+ // level instruction - where we have a unique set of instructions (nodes)
+ // and edges between them. We then walk the graph in reverse post-dominator
+ // order to insert them past the suspend point, but ensure that ordering is
+ // correct. We also rely on CSE removing duplicate defs for remats of
+ // different instructions with a def in common (rather than maintaining more
+ // complex graphs for each suspend point)
+
+ // We can do this by adding new nodes to the list for each suspend
+ // point. Then using standard GraphTraits to give a reverse post-order
+ // traversal when we insert the nodes after the suspend
+ SmallMapVector<Instruction *, std::unique_ptr<RematGraph>, 8> AllRemats;
+ for (auto &E : Spills) {
+ for (Instruction *U : E.second) {
+ // Don't process a user twice (this can happen if the instruction uses
+ // more than one rematerializable def)
+ if (AllRemats.count(U))
+ continue;
+
+ // Constructor creates the whole RematGraph for the given Use
+ auto RematUPtr =
+ std::make_unique<RematGraph>(IsMaterializable, U, Checker);
+
+ LLVM_DEBUG(dbgs() << "***** Next remat group *****\n";
+ ReversePostOrderTraversal<RematGraph *> RPOT(RematUPtr.get());
+ for (auto I = RPOT.begin(); I != RPOT.end();
+ ++I) { (*I)->Node->dump(); } dbgs()
+ << "\n";);
+
+ AllRemats[U] = std::move(RematUPtr);
+ }
+ }
+
+ // Rewrite materializable instructions to be materialized at the use
+ // point.
+ LLVM_DEBUG(dumpRemats("Materializations", AllRemats));
+ rewriteMaterializableInstructions(AllRemats);
+}
diff --git a/llvm/lib/Transforms/Coroutines/MaterializationUtils.h b/llvm/lib/Transforms/Coroutines/MaterializationUtils.h
new file mode 100644
index 0000000..f391851
--- /dev/null
+++ b/llvm/lib/Transforms/Coroutines/MaterializationUtils.h
@@ -0,0 +1,30 @@
+//===- MaterializationUtils.h - Utilities for doing materialization -------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "SuspendCrossingInfo.h"
+#include "llvm/IR/Instruction.h"
+
+#ifndef LIB_TRANSFORMS_COROUTINES_MATERIALIZATIONUTILS_H
+#define LIB_TRANSFORMS_COROUTINES_MATERIALIZATIONUTILS_H
+
+namespace llvm {
+
+namespace coro {
+
+// True if I is trivially rematerialzable, e.g. InsertElementInst
+bool isTriviallyMaterializable(Instruction &I);
+
+// Performs rematerialization, invoked from buildCoroutineFrame.
+void doRematerializations(Function &F, SuspendCrossingInfo &Checker,
+ std::function<bool(Instruction &)> IsMaterializable);
+
+} // namespace coro
+
+} // namespace llvm
+
+#endif // LIB_TRANSFORMS_COROUTINES_MATERIALIZATIONUTILS_H
diff --git a/llvm/lib/Transforms/Coroutines/SpillUtils.cpp b/llvm/lib/Transforms/Coroutines/SpillUtils.cpp
index d71b0a3..f213ac1 100644
--- a/llvm/lib/Transforms/Coroutines/SpillUtils.cpp
+++ b/llvm/lib/Transforms/Coroutines/SpillUtils.cpp
@@ -23,10 +23,6 @@ namespace {
typedef SmallPtrSet<BasicBlock *, 8> VisitedBlocksSet;
-static bool isSuspendBlock(BasicBlock *BB) {
- return isa<AnyCoroSuspendInst>(BB->front());
-}
-
// Check for structural coroutine intrinsics that should not be spilled into
// the coroutine frame.
static bool isCoroutineStructureIntrinsic(Instruction &I) {
@@ -45,7 +41,7 @@ static bool isSuspendReachableFrom(BasicBlock *From,
return false;
// We assume that we'll already have split suspends into their own blocks.
- if (isSuspendBlock(From))
+ if (coro::isSuspendBlock(From))
return true;
// Recurse on the successors.
diff --git a/llvm/lib/Transforms/Coroutines/SpillUtils.h b/llvm/lib/Transforms/Coroutines/SpillUtils.h
index de0ff0b..8843b61 100644
--- a/llvm/lib/Transforms/Coroutines/SpillUtils.h
+++ b/llvm/lib/Transforms/Coroutines/SpillUtils.h
@@ -29,8 +29,6 @@ struct AllocaInfo {
MayWriteBeforeCoroBegin(MayWriteBeforeCoroBegin) {}
};
-bool isSuspendBlock(BasicBlock *BB);
-
void collectSpillsFromArgs(SpillInfo &Spills, Function &F,
const SuspendCrossingInfo &Checker);
void collectSpillsAndAllocasFromInsts(
diff --git a/llvm/lib/Transforms/Coroutines/SuspendCrossingInfo.cpp b/llvm/lib/Transforms/Coroutines/SuspendCrossingInfo.cpp
index 6b0dc12..84699e6 100644
--- a/llvm/lib/Transforms/Coroutines/SuspendCrossingInfo.cpp
+++ b/llvm/lib/Transforms/Coroutines/SuspendCrossingInfo.cpp
@@ -165,8 +165,13 @@ SuspendCrossingInfo::SuspendCrossingInfo(
// Mark all CoroEnd Blocks. We do not propagate Kills beyond coro.ends as
// the code beyond coro.end is reachable during initial invocation of the
// coroutine.
- for (auto *CE : CoroEnds)
+ for (auto *CE : CoroEnds) {
+ // Verify CoroEnd was normalized
+ assert(CE->getParent()->getFirstInsertionPt() == CE->getIterator() &&
+ CE->getParent()->size() <= 2 && "CoroEnd must be in its own BB");
+
getBlockData(CE->getParent()).End = true;
+ }
// Mark all suspend blocks and indicate that they kill everything they
// consume. Note, that crossing coro.save also requires a spill, as any code
@@ -179,6 +184,11 @@ SuspendCrossingInfo::SuspendCrossingInfo(
B.Kills |= B.Consumes;
};
for (auto *CSI : CoroSuspends) {
+ // Verify CoroSuspend was normalized
+ assert(CSI->getParent()->getFirstInsertionPt() == CSI->getIterator() &&
+ CSI->getParent()->size() <= 2 &&
+ "CoroSuspend must be in its own BB");
+
markSuspendBlock(CSI);
if (auto *Save = CSI->getCoroSave())
markSuspendBlock(Save);
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index b73f526..217c7cc 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -12562,7 +12562,7 @@ struct AAAddressSpaceImpl : public AAAddressSpace {
AAAddressSpaceImpl(const IRPosition &IRP, Attributor &A)
: AAAddressSpace(IRP, A) {}
- int32_t getAddressSpace() const override {
+ uint32_t getAddressSpace() const override {
assert(isValidState() && "the AA is invalid");
return AssumedAddressSpace;
}
@@ -12576,7 +12576,7 @@ struct AAAddressSpaceImpl : public AAAddressSpace {
}
ChangeStatus updateImpl(Attributor &A) override {
- int32_t OldAddressSpace = AssumedAddressSpace;
+ uint32_t OldAddressSpace = AssumedAddressSpace;
auto *AUO = A.getOrCreateAAFor<AAUnderlyingObjects>(getIRPosition(), this,
DepClassTy::REQUIRED);
auto Pred = [&](Value &Obj) {
@@ -12597,16 +12597,13 @@ struct AAAddressSpaceImpl : public AAAddressSpace {
Value *AssociatedValue = &getAssociatedValue();
Value *OriginalValue = peelAddrspacecast(AssociatedValue);
if (getAddressSpace() == NoAddressSpace ||
- static_cast<uint32_t>(getAddressSpace()) ==
- getAssociatedType()->getPointerAddressSpace())
+ getAddressSpace() == getAssociatedType()->getPointerAddressSpace())
return ChangeStatus::UNCHANGED;
PointerType *NewPtrTy =
- PointerType::get(getAssociatedType()->getContext(),
- static_cast<uint32_t>(getAddressSpace()));
+ PointerType::get(getAssociatedType()->getContext(), getAddressSpace());
bool UseOriginalValue =
- OriginalValue->getType()->getPointerAddressSpace() ==
- static_cast<uint32_t>(getAddressSpace());
+ OriginalValue->getType()->getPointerAddressSpace() == getAddressSpace();
bool Changed = false;
@@ -12656,9 +12653,9 @@ struct AAAddressSpaceImpl : public AAAddressSpace {
}
private:
- int32_t AssumedAddressSpace = NoAddressSpace;
+ uint32_t AssumedAddressSpace = NoAddressSpace;
- bool takeAddressSpace(int32_t AS) {
+ bool takeAddressSpace(uint32_t AS) {
if (AssumedAddressSpace == NoAddressSpace) {
AssumedAddressSpace = AS;
return true;
diff --git a/llvm/lib/Transforms/IPO/FunctionImport.cpp b/llvm/lib/Transforms/IPO/FunctionImport.cpp
index ff0d7817..261731f 100644
--- a/llvm/lib/Transforms/IPO/FunctionImport.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionImport.cpp
@@ -722,7 +722,7 @@ class WorkloadImportsManager : public ModuleImportsManager {
return;
}
const auto &CtxMap = *Ctx;
- DenseSet<GlobalValue::GUID> ContainedGUIDs;
+ SetVector<GlobalValue::GUID> ContainedGUIDs;
for (const auto &[RootGuid, Root] : CtxMap) {
// Avoid ContainedGUIDs to get in/out of scope. Reuse its memory for
// subsequent roots, but clear its contents.
diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index d816656..1d67773 100644
--- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -378,8 +378,8 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
// after MemInsertPoint.
MemoryUseOrDef *MemInsertPoint = nullptr;
for (++BI; !BI->isTerminator(); ++BI) {
- auto *CurrentAcc = cast_or_null<MemoryUseOrDef>(
- MSSAU->getMemorySSA()->getMemoryAccess(&*BI));
+ auto *CurrentAcc =
+ cast_or_null<MemoryUseOrDef>(MSSA->getMemoryAccess(&*BI));
if (CurrentAcc)
MemInsertPoint = CurrentAcc;
@@ -605,13 +605,13 @@ bool MemCpyOptPass::moveUp(StoreInst *SI, Instruction *P, const LoadInst *LI) {
// TODO: Simplify this once P will be determined by MSSA, in which case the
// discrepancy can no longer occur.
MemoryUseOrDef *MemInsertPoint = nullptr;
- if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(P)) {
+ if (MemoryUseOrDef *MA = MSSA->getMemoryAccess(P)) {
MemInsertPoint = cast<MemoryUseOrDef>(--MA->getIterator());
} else {
const Instruction *ConstP = P;
for (const Instruction &I : make_range(++ConstP->getReverseIterator(),
++LI->getReverseIterator())) {
- if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(&I)) {
+ if (MemoryUseOrDef *MA = MSSA->getMemoryAccess(&I)) {
MemInsertPoint = MA;
break;
}
@@ -623,7 +623,7 @@ bool MemCpyOptPass::moveUp(StoreInst *SI, Instruction *P, const LoadInst *LI) {
LLVM_DEBUG(dbgs() << "Lifting " << *I << " before " << *P << "\n");
I->moveBefore(P);
assert(MemInsertPoint && "Must have found insert point");
- if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(I)) {
+ if (MemoryUseOrDef *MA = MSSA->getMemoryAccess(I)) {
MSSAU->moveAfter(MA, MemInsertPoint);
MemInsertPoint = MA;
}
@@ -661,18 +661,11 @@ bool MemCpyOptPass::processStoreOfLoad(StoreInst *SI, LoadInst *LI,
}
}
- // We found an instruction that may write to the loaded memory.
- // We can try to promote at this position instead of the store
+ // If we found an instruction that may write to the loaded memory,
+ // we can try to promote at this position instead of the store
// position if nothing aliases the store memory after this and the store
// destination is not in the range.
- if (P && P != SI) {
- if (!moveUp(SI, P, LI))
- P = nullptr;
- }
-
- // If a valid insertion position is found, then we can promote
- // the load/store pair to a memcpy.
- if (P) {
+ if (P == SI || moveUp(SI, P, LI)) {
// If we load from memory that may alias the memory we store to,
// memmove must be used to preserve semantic. If not, memcpy can
// be used. Also, if we load from constant memory, memcpy can be used
@@ -697,8 +690,7 @@ bool MemCpyOptPass::processStoreOfLoad(StoreInst *SI, LoadInst *LI,
LLVM_DEBUG(dbgs() << "Promoting " << *LI << " to " << *SI << " => " << *M
<< "\n");
- auto *LastDef =
- cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(SI));
+ auto *LastDef = cast<MemoryDef>(MSSA->getMemoryAccess(SI));
auto *NewAccess = MSSAU->createMemoryAccessAfter(M, nullptr, LastDef);
MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
@@ -1258,8 +1250,8 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
CopySourceAlign, M->getLength(), M->isVolatile());
NewM->copyMetadata(*M, LLVMContext::MD_DIAssignID);
- assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M)));
- auto *LastDef = cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M));
+ assert(isa<MemoryDef>(MSSA->getMemoryAccess(M)));
+ auto *LastDef = cast<MemoryDef>(MSSA->getMemoryAccess(M));
auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, nullptr, LastDef);
MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
@@ -1369,12 +1361,11 @@ bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
Builder.CreateMemSet(Builder.CreatePtrAdd(Dest, SrcSize),
MemSet->getOperand(1), MemsetLen, Alignment);
- assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy)) &&
+ assert(isa<MemoryDef>(MSSA->getMemoryAccess(MemCpy)) &&
"MemCpy must be a MemoryDef");
// The new memset is inserted before the memcpy, and it is known that the
// memcpy's defining access is the memset about to be removed.
- auto *LastDef =
- cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy));
+ auto *LastDef = cast<MemoryDef>(MSSA->getMemoryAccess(MemCpy));
auto *NewAccess =
MSSAU->createMemoryAccessBefore(NewMemSet, nullptr, LastDef);
MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
@@ -1479,8 +1470,7 @@ bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
Instruction *NewM =
Builder.CreateMemSet(MemCpy->getRawDest(), MemSet->getOperand(1),
CopySize, MemCpy->getDestAlign());
- auto *LastDef =
- cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy));
+ auto *LastDef = cast<MemoryDef>(MSSA->getMemoryAccess(MemCpy));
auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, nullptr, LastDef);
MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
diff --git a/llvm/lib/Transforms/Scalar/Scalar.cpp b/llvm/lib/Transforms/Scalar/Scalar.cpp
index 7aeee1d..fa6e671 100644
--- a/llvm/lib/Transforms/Scalar/Scalar.cpp
+++ b/llvm/lib/Transforms/Scalar/Scalar.cpp
@@ -21,6 +21,7 @@ using namespace llvm;
void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeConstantHoistingLegacyPassPass(Registry);
initializeDCELegacyPassPass(Registry);
+ initializeScalarizerLegacyPassPass(Registry);
initializeGVNLegacyPassPass(Registry);
initializeEarlyCSELegacyPassPass(Registry);
initializeEarlyCSEMemSSALegacyPassPass(Registry);
diff --git a/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
index 2bed348..01d2433 100644
--- a/llvm/lib/Transforms/Scalar/Scalarizer.cpp
+++ b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
@@ -36,6 +36,7 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -340,8 +341,33 @@ private:
const unsigned ScalarizeMinBits;
};
+class ScalarizerLegacyPass : public FunctionPass {
+public:
+ static char ID;
+ ScalarizerPassOptions Options;
+ ScalarizerLegacyPass() : FunctionPass(ID), Options() {}
+ ScalarizerLegacyPass(const ScalarizerPassOptions &Options);
+ bool runOnFunction(Function &F) override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+};
+
} // end anonymous namespace
+ScalarizerLegacyPass::ScalarizerLegacyPass(const ScalarizerPassOptions &Options)
+ : FunctionPass(ID), Options(Options) {}
+
+void ScalarizerLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
+}
+
+char ScalarizerLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(ScalarizerLegacyPass, "scalarizer",
+ "Scalarize vector operations", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_END(ScalarizerLegacyPass, "scalarizer",
+ "Scalarize vector operations", false, false)
+
Scatterer::Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v,
const VectorSplit &VS, ValueVector *cachePtr)
: BB(bb), BBI(bbi), V(v), VS(VS), CachePtr(cachePtr) {
@@ -414,6 +440,19 @@ Value *Scatterer::operator[](unsigned Frag) {
return CV[Frag];
}
+bool ScalarizerLegacyPass::runOnFunction(Function &F) {
+ if (skipFunction(F))
+ return false;
+
+ DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ ScalarizerVisitor Impl(DT, Options);
+ return Impl.visit(F);
+}
+
+FunctionPass *llvm::createScalarizerPass(const ScalarizerPassOptions &Options) {
+ return new ScalarizerLegacyPass(Options);
+}
+
bool ScalarizerVisitor::visit(Function &F) {
assert(Gathered.empty() && Scattered.empty());
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
index b5f87e4..322b431 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -69,6 +69,9 @@ public:
VPBuilder() = default;
VPBuilder(VPBasicBlock *InsertBB) { setInsertPoint(InsertBB); }
VPBuilder(VPRecipeBase *InsertPt) { setInsertPoint(InsertPt); }
+ VPBuilder(VPBasicBlock *TheBB, VPBasicBlock::iterator IP) {
+ setInsertPoint(TheBB, IP);
+ }
/// Clear the insertion point: created instructions will not be inserted into
/// a block.
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 3b6b154..7b6cec9 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -585,11 +585,6 @@ protected:
const SCEV2ValueTy &ExpandedSCEVs,
std::pair<BasicBlock *, Value *> AdditionalBypass = {nullptr, nullptr});
- /// Complete the loop skeleton by adding debug MDs, creating appropriate
- /// conditional branches in the middle block, preparing the builder and
- /// running the verifier. Return the preheader of the completed vector loop.
- BasicBlock *completeLoopSkeleton();
-
/// Allow subclasses to override and print debug traces before/after vplan
/// execution, when trace information is requested.
virtual void printDebugTracesAtStart(){};
@@ -8696,14 +8691,7 @@ addUsersInExitBlock(VPlan &Plan,
cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSingleSuccessor());
BasicBlock *ExitBB =
cast<VPIRBasicBlock>(MiddleVPBB->getSuccessors()[0])->getIRBasicBlock();
- // TODO: set B to MiddleVPBB->getFirstNonPhi(), taking care of affected tests.
- VPBuilder B(MiddleVPBB);
- if (auto *Terminator = MiddleVPBB->getTerminator()) {
- auto *Condition = dyn_cast<VPInstruction>(Terminator->getOperand(0));
- assert((!Condition || Condition->getParent() == MiddleVPBB) &&
- "Condition expected in MiddleVPBB");
- B.setInsertPoint(Condition ? Condition : Terminator);
- }
+ VPBuilder B(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
// Introduce VPUsers modeling the exit values.
for (const auto &[ExitPhi, V] : ExitingValuesToFix) {
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 00d9f29..ac59ed3 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -12486,11 +12486,12 @@ public:
V = createShuffle(InVectors.front(), nullptr, CommonMask);
transformMaskAfterShuffle(CommonMask, CommonMask);
}
+ unsigned VF = std::max(CommonMask.size(), Mask.size());
for (unsigned Idx = 0, Sz = CommonMask.size(); Idx < Sz; ++Idx)
if (CommonMask[Idx] == PoisonMaskElem && Mask[Idx] != PoisonMaskElem)
CommonMask[Idx] =
V->getType() != V1->getType()
- ? Idx + Sz
+ ? Idx + VF
: Mask[Idx] + cast<FixedVectorType>(V1->getType())
->getNumElements();
if (V->getType() != V1->getType())
diff --git a/llvm/test/Analysis/CostModel/AArch64/reduce-fadd.ll b/llvm/test/Analysis/CostModel/AArch64/reduce-fadd.ll
index a68c21f..58cb8c2 100644
--- a/llvm/test/Analysis/CostModel/AArch64/reduce-fadd.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/reduce-fadd.ll
@@ -7,8 +7,11 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
define void @strict_fp_reductions() {
; CHECK-LABEL: 'strict_fp_reductions'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fadd_v2f16 = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %fadd_v4f16 = call half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %fadd_v8f16 = call half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %fadd_v16f16 = call half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %fadd_v2f32 = call float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %fadd_v4f32 = call float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %fadd_v8f32 = call float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %fadd_v2f64 = call double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
@@ -18,8 +21,11 @@ define void @strict_fp_reductions() {
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; FP16-LABEL: 'strict_fp_reductions'
+; FP16-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %fadd_v2f16 = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
; FP16-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %fadd_v4f16 = call half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
; FP16-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %fadd_v8f16 = call half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
+; FP16-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %fadd_v16f16 = call half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
+; FP16-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %fadd_v2f32 = call float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef)
; FP16-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %fadd_v4f32 = call float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
; FP16-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %fadd_v8f32 = call float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
; FP16-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %fadd_v2f64 = call double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
@@ -29,8 +35,11 @@ define void @strict_fp_reductions() {
; FP16-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; BF16-LABEL: 'strict_fp_reductions'
+; BF16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fadd_v2f16 = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
; BF16-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %fadd_v4f16 = call half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
; BF16-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %fadd_v8f16 = call half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
+; BF16-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %fadd_v16f16 = call half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
+; BF16-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %fadd_v2f32 = call float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef)
; BF16-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %fadd_v4f32 = call float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
; BF16-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %fadd_v8f32 = call float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
; BF16-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %fadd_v2f64 = call double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
@@ -39,8 +48,11 @@ define void @strict_fp_reductions() {
; BF16-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %fadd_v4f128 = call fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef)
; BF16-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
+ %fadd_v2f16 = call half @llvm.vector.reduce.fadd.v2f16(half 0.0, <2 x half> undef)
%fadd_v4f16 = call half @llvm.vector.reduce.fadd.v4f16(half 0.0, <4 x half> undef)
%fadd_v8f16 = call half @llvm.vector.reduce.fadd.v8f16(half 0.0, <8 x half> undef)
+ %fadd_v16f16 = call half @llvm.vector.reduce.fadd.v16f16(half 0.0, <16 x half> undef)
+ %fadd_v2f32 = call float @llvm.vector.reduce.fadd.v2f32(float 0.0, <2 x float> undef)
%fadd_v4f32 = call float @llvm.vector.reduce.fadd.v4f32(float 0.0, <4 x float> undef)
%fadd_v8f32 = call float @llvm.vector.reduce.fadd.v8f32(float 0.0, <8 x float> undef)
%fadd_v2f64 = call double @llvm.vector.reduce.fadd.v2f64(double 0.0, <2 x double> undef)
@@ -54,12 +66,18 @@ define void @strict_fp_reductions() {
define void @fast_fp_reductions() {
; CHECK-LABEL: 'fast_fp_reductions'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %fadd_v2f16_fast = call fast half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %fadd_v2f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %fadd_v4f16_fast = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %fadd_v4f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %fadd_v8f16 = call fast half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %fadd_v8f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %fadd_v16f16 = call fast half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %fadd_v16f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %fadd_v11f16 = call fast half @llvm.vector.reduce.fadd.v11f16(half 0xH0000, <11 x half> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %fadd_v13f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v13f16(half 0xH0000, <13 x half> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fadd_v2f32 = call fast float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fadd_v2f32_reassoc = call reassoc float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fadd_v4f32 = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fadd_v4f32_reassoc = call reassoc float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %fadd_v8f32 = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
@@ -77,12 +95,18 @@ define void @fast_fp_reductions() {
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; FP16-LABEL: 'fast_fp_reductions'
+; FP16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fadd_v2f16_fast = call fast half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
+; FP16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fadd_v2f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
; FP16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fadd_v4f16_fast = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
; FP16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fadd_v4f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
; FP16-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %fadd_v8f16 = call fast half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
; FP16-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %fadd_v8f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
+; FP16-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %fadd_v16f16 = call fast half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
+; FP16-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %fadd_v16f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
; FP16-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %fadd_v11f16 = call fast half @llvm.vector.reduce.fadd.v11f16(half 0xH0000, <11 x half> undef)
; FP16-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %fadd_v13f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v13f16(half 0xH0000, <13 x half> undef)
+; FP16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fadd_v2f32 = call fast float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef)
+; FP16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fadd_v2f32_reassoc = call reassoc float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef)
; FP16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fadd_v4f32 = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
; FP16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fadd_v4f32_reassoc = call reassoc float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
; FP16-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %fadd_v8f32 = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
@@ -100,12 +124,18 @@ define void @fast_fp_reductions() {
; FP16-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; BF16-LABEL: 'fast_fp_reductions'
+; BF16-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %fadd_v2f16_fast = call fast half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
+; BF16-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %fadd_v2f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
; BF16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %fadd_v4f16_fast = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
; BF16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %fadd_v4f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
; BF16-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %fadd_v8f16 = call fast half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
; BF16-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %fadd_v8f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
+; BF16-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %fadd_v16f16 = call fast half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
+; BF16-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %fadd_v16f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
; BF16-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %fadd_v11f16 = call fast half @llvm.vector.reduce.fadd.v11f16(half 0xH0000, <11 x half> undef)
; BF16-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %fadd_v13f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v13f16(half 0xH0000, <13 x half> undef)
+; BF16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fadd_v2f32 = call fast float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef)
+; BF16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fadd_v2f32_reassoc = call reassoc float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef)
; BF16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fadd_v4f32 = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
; BF16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fadd_v4f32_reassoc = call reassoc float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
; BF16-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %fadd_v8f32 = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
@@ -122,15 +152,24 @@ define void @fast_fp_reductions() {
; BF16-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %fadd_v4f128 = call reassoc fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef)
; BF16-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
+ %fadd_v2f16_fast = call fast half @llvm.vector.reduce.fadd.v2f16(half 0.0, <2 x half> undef)
+ %fadd_v2f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v2f16(half 0.0, <2 x half> undef)
+
%fadd_v4f16_fast = call fast half @llvm.vector.reduce.fadd.v4f16(half 0.0, <4 x half> undef)
%fadd_v4f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v4f16(half 0.0, <4 x half> undef)
%fadd_v8f16 = call fast half @llvm.vector.reduce.fadd.v8f16(half 0.0, <8 x half> undef)
%fadd_v8f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v8f16(half 0.0, <8 x half> undef)
+ %fadd_v16f16 = call fast half @llvm.vector.reduce.fadd.v16f16(half 0.0, <16 x half> undef)
+ %fadd_v16f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v16f16(half 0.0, <16 x half> undef)
+
%fadd_v11f16 = call fast half @llvm.vector.reduce.fadd.v11f16(half 0.0, <11 x half> undef)
%fadd_v13f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v13f16(half 0.0, <13 x half> undef)
+ %fadd_v2f32 = call fast float @llvm.vector.reduce.fadd.v2f32(float 0.0, <2 x float> undef)
+ %fadd_v2f32_reassoc = call reassoc float @llvm.vector.reduce.fadd.v2f32(float 0.0, <2 x float> undef)
+
%fadd_v4f32 = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.0, <4 x float> undef)
%fadd_v4f32_reassoc = call reassoc float @llvm.vector.reduce.fadd.v4f32(float 0.0, <4 x float> undef)
@@ -158,11 +197,14 @@ define void @fast_fp_reductions() {
declare bfloat @llvm.vector.reduce.fadd.v4f8(bfloat, <4 x bfloat>)
declare fp128 @llvm.vector.reduce.fadd.v4f128(fp128, <4 x fp128>)
+declare half @llvm.vector.reduce.fadd.v2f16(half, <2 x half>)
declare half @llvm.vector.reduce.fadd.v4f16(half, <4 x half>)
declare half @llvm.vector.reduce.fadd.v8f16(half, <8 x half>)
+declare half @llvm.vector.reduce.fadd.v16f16(half, <16 x half>)
declare half @llvm.vector.reduce.fadd.v11f16(half, <11 x half>)
declare half @llvm.vector.reduce.fadd.v13f16(half, <13 x half>)
+declare float @llvm.vector.reduce.fadd.v2f32(float, <2 x float>)
declare float @llvm.vector.reduce.fadd.v4f32(float, <4 x float>)
declare float @llvm.vector.reduce.fadd.v8f32(float, <8 x float>)
declare float @llvm.vector.reduce.fadd.v13f32(float, <13 x float>)
diff --git a/llvm/test/CodeGen/DirectX/ContainerData/PSVResources.ll b/llvm/test/CodeGen/DirectX/ContainerData/PSVResources.ll
new file mode 100644
index 0000000..2bd7a2e
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/ContainerData/PSVResources.ll
@@ -0,0 +1,93 @@
+; RUN: llc %s --filetype=obj -o - | obj2yaml | FileCheck %s
+
+; Make sure resource table is created correctly.
+; CHECK: Resources:
+target triple = "dxil-unknown-shadermodel6.0-compute"
+
+define void @main() #0 {
+
+ ; ByteAddressBuffer Buf : register(t8, space1)
+; CHECK: - Type: SRVRaw
+; CHECK: Space: 1
+; CHECK: LowerBound: 8
+; CHECK: UpperBound: 8
+; CHECK: Kind: RawBuffer
+; CHECK: Flags:
+; CHECK: UsedByAtomic64: false
+ %srv0 = call target("dx.RawBuffer", i8, 0, 0)
+ @llvm.dx.handle.fromBinding.tdx.RawBuffer_i8_0_0t(
+ i32 1, i32 8, i32 1, i32 0, i1 false)
+
+ ; struct S { float4 a; uint4 b; };
+ ; StructuredBuffer<S> Buf : register(t2, space4)
+; CHECK: - Type: SRVStructured
+; CHECK: Space: 4
+; CHECK: LowerBound: 2
+; CHECK: UpperBound: 2
+; CHECK: Kind: StructuredBuffer
+; CHECK: Flags:
+; CHECK: UsedByAtomic64: false
+ %srv1 = call target("dx.RawBuffer", {<4 x float>, <4 x i32>}, 0, 0)
+ @llvm.dx.handle.fromBinding.tdx.RawBuffer_sl_v4f32v4i32s_0_0t(
+ i32 4, i32 2, i32 1, i32 0, i1 false)
+
+ ; Buffer<uint4> Buf[24] : register(t3, space5)
+; CHECK: - Type: SRVTyped
+; CHECK: Space: 5
+; CHECK: LowerBound: 3
+; CHECK: UpperBound: 26
+; CHECK: Kind: TypedBuffer
+; CHECK: Flags:
+; CHECK: UsedByAtomic64: false
+ %srv2 = call target("dx.TypedBuffer", <4 x i32>, 0, 0, 0)
+ @llvm.dx.handle.fromBinding.tdx.TypedBuffer_i32_0_0t(
+ i32 5, i32 3, i32 24, i32 0, i1 false)
+
+ ; RWBuffer<int> Buf : register(u7, space2)
+; CHECK: - Type: UAVTyped
+; CHECK: Space: 2
+; CHECK: LowerBound: 7
+; CHECK: UpperBound: 7
+; CHECK: Kind: TypedBuffer
+; CHECK: Flags:
+; CHECK: UsedByAtomic64: false
+ %uav0 = call target("dx.TypedBuffer", i32, 1, 0, 1)
+ @llvm.dx.handle.fromBinding.tdx.TypedBuffer_i32_1_0t(
+ i32 2, i32 7, i32 1, i32 0, i1 false)
+
+ ; RWBuffer<float4> Buf : register(u5, space3)
+; CHECK: - Type: UAVTyped
+; CHECK: Space: 3
+; CHECK: LowerBound: 5
+; CHECK: UpperBound: 5
+; CHECK: Kind: TypedBuffer
+; CHECK: Flags:
+; CHECK: UsedByAtomic64: false
+ %uav1 = call target("dx.TypedBuffer", <4 x float>, 1, 0, 0)
+ @llvm.dx.handle.fromBinding.tdx.TypedBuffer_f32_1_0(
+ i32 3, i32 5, i32 1, i32 0, i1 false)
+
+ ; RWBuffer<float4> BufferArray[10] : register(u0, space4)
+; CHECK: - Type: UAVTyped
+; CHECK: Space: 4
+; CHECK: LowerBound: 0
+; CHECK: UpperBound: 9
+; CHECK: Kind: TypedBuffer
+; CHECK: Flags:
+; CHECK: UsedByAtomic64: false
+ ; RWBuffer<float4> Buf = BufferArray[0]
+ %uav2_1 = call target("dx.TypedBuffer", <4 x float>, 1, 0, 0)
+ @llvm.dx.handle.fromBinding.tdx.TypedBuffer_f32_1_0(
+ i32 4, i32 0, i32 10, i32 0, i1 false)
+ ; RWBuffer<float4> Buf = BufferArray[5]
+ %uav2_2 = call target("dx.TypedBuffer", <4 x float>, 1, 0, 0)
+ @llvm.dx.handle.fromBinding.tdx.TypedBuffer_f32_1_0(
+ i32 4, i32 0, i32 10, i32 5, i1 false)
+ ret void
+}
+
+attributes #0 = { "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" }
+
+!dx.valver = !{!0}
+
+!0 = !{i32 1, i32 7}
diff --git a/llvm/test/CodeGen/DirectX/acos.ll b/llvm/test/CodeGen/DirectX/acos.ll
index cc32182..f4a10eb 100644
--- a/llvm/test/CodeGen/DirectX/acos.ll
+++ b/llvm/test/CodeGen/DirectX/acos.ll
@@ -1,20 +1,39 @@
-; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
+; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
; Make sure dxil operation function calls for acos are generated for float and half.
-define noundef float @tan_float(float noundef %a) {
+define noundef float @acos_float(float noundef %a) {
entry:
; CHECK:call float @dx.op.unary.f32(i32 15, float %{{.*}})
%elt.acos = call float @llvm.acos.f32(float %a)
ret float %elt.acos
}
-define noundef half @tan_half(half noundef %a) {
+define noundef half @acos_half(half noundef %a) {
entry:
; CHECK:call half @dx.op.unary.f16(i32 15, half %{{.*}})
%elt.acos = call half @llvm.acos.f16(half %a)
ret half %elt.acos
}
+define noundef <4 x float> @acos_float4(<4 x float> noundef %a) {
+entry:
+ ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
+ ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 15, float [[ee0]])
+ ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
+ ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 15, float [[ee1]])
+ ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
+ ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 15, float [[ee2]])
+ ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
+ ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 15, float [[ee3]])
+ ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3
+ %2 = call <4 x float> @llvm.acos.v4f32(<4 x float> %a)
+ ret <4 x float> %2
+}
+
declare half @llvm.acos.f16(half)
declare float @llvm.acos.f32(float)
+declare <4 x float> @llvm.acos.v4f32(<4 x float>)
diff --git a/llvm/test/CodeGen/DirectX/asin.ll b/llvm/test/CodeGen/DirectX/asin.ll
index 06e3bab..bd948f59 100644
--- a/llvm/test/CodeGen/DirectX/asin.ll
+++ b/llvm/test/CodeGen/DirectX/asin.ll
@@ -1,20 +1,39 @@
-; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
+; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
; Make sure dxil operation function calls for asin are generated for float and half.
-define noundef float @tan_float(float noundef %a) {
+define noundef float @asin_float(float noundef %a) {
entry:
; CHECK:call float @dx.op.unary.f32(i32 16, float %{{.*}})
%elt.asin = call float @llvm.asin.f32(float %a)
ret float %elt.asin
}
-define noundef half @tan_half(half noundef %a) {
+define noundef half @asin_half(half noundef %a) {
entry:
; CHECK:call half @dx.op.unary.f16(i32 16, half %{{.*}})
%elt.asin = call half @llvm.asin.f16(half %a)
ret half %elt.asin
}
+define noundef <4 x float> @asin_float4(<4 x float> noundef %a) {
+entry:
+ ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
+ ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 16, float [[ee0]])
+ ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
+ ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 16, float [[ee1]])
+ ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
+ ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 16, float [[ee2]])
+ ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
+ ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 16, float [[ee3]])
+ ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3
+ %2 = call <4 x float> @llvm.asin.v4f32(<4 x float> %a)
+ ret <4 x float> %2
+}
+
declare half @llvm.asin.f16(half)
declare float @llvm.asin.f32(float)
+declare <4 x float> @llvm.asin.v4f32(<4 x float>)
diff --git a/llvm/test/CodeGen/DirectX/atan.ll b/llvm/test/CodeGen/DirectX/atan.ll
index d7c4cd0..58899ab 100644
--- a/llvm/test/CodeGen/DirectX/atan.ll
+++ b/llvm/test/CodeGen/DirectX/atan.ll
@@ -1,20 +1,39 @@
-; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
+; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
; Make sure dxil operation function calls for atan are generated for float and half.
-define noundef float @tan_float(float noundef %a) {
+define noundef float @atan_float(float noundef %a) {
entry:
; CHECK:call float @dx.op.unary.f32(i32 17, float %{{.*}})
%elt.atan = call float @llvm.atan.f32(float %a)
ret float %elt.atan
}
-define noundef half @tan_half(half noundef %a) {
+define noundef half @atan_half(half noundef %a) {
entry:
; CHECK:call half @dx.op.unary.f16(i32 17, half %{{.*}})
%elt.atan = call half @llvm.atan.f16(half %a)
ret half %elt.atan
}
+define noundef <4 x float> @atan_float4(<4 x float> noundef %a) {
+entry:
+ ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
+ ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 17, float [[ee0]])
+ ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
+ ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 17, float [[ee1]])
+ ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
+ ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 17, float [[ee2]])
+ ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
+ ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 17, float [[ee3]])
+ ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3
+ %2 = call <4 x float> @llvm.atan.v4f32(<4 x float> %a)
+ ret <4 x float> %2
+}
+
declare half @llvm.atan.f16(half)
declare float @llvm.atan.f32(float)
+declare <4 x float> @llvm.atan.v4f32(<4 x float>)
diff --git a/llvm/test/CodeGen/DirectX/ceil.ll b/llvm/test/CodeGen/DirectX/ceil.ll
index 48bc549..bd6e747 100644
--- a/llvm/test/CodeGen/DirectX/ceil.ll
+++ b/llvm/test/CodeGen/DirectX/ceil.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
+; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
; Make sure dxil operation function calls for ceil are generated for float and half.
@@ -16,5 +16,24 @@ entry:
ret half %elt.ceil
}
+define noundef <4 x float> @ceil_float4(<4 x float> noundef %a) {
+entry:
+ ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
+ ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 28, float [[ee0]])
+ ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
+ ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 28, float [[ee1]])
+ ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
+ ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 28, float [[ee2]])
+ ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
+ ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 28, float [[ee3]])
+ ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3
+ %2 = call <4 x float> @llvm.ceil.v4f32(<4 x float> %a)
+ ret <4 x float> %2
+}
+
declare half @llvm.ceil.f16(half)
declare float @llvm.ceil.f32(float)
+declare <4 x float> @llvm.ceil.v4f32(<4 x float>)
diff --git a/llvm/test/CodeGen/DirectX/cos.ll b/llvm/test/CodeGen/DirectX/cos.ll
index 72f4bfc..85f5db2 100644
--- a/llvm/test/CodeGen/DirectX/cos.ll
+++ b/llvm/test/CodeGen/DirectX/cos.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
+; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
; Make sure dxil operation function calls for cos are generated for float and half.
@@ -16,5 +16,24 @@ entry:
ret half %elt.cos
}
+define noundef <4 x float> @cos_float4(<4 x float> noundef %a) #0 {
+entry:
+ ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
+ ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 12, float [[ee0]])
+ ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
+ ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 12, float [[ee1]])
+ ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
+ ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 12, float [[ee2]])
+ ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
+ ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 12, float [[ee3]])
+ ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3
+ %2 = call <4 x float> @llvm.cos.v4f32(<4 x float> %a)
+ ret <4 x float> %2
+}
+
declare half @llvm.cos.f16(half)
declare float @llvm.cos.f32(float)
+declare <4 x float> @llvm.cos.v4f32(<4 x float>)
diff --git a/llvm/test/CodeGen/DirectX/cosh.ll b/llvm/test/CodeGen/DirectX/cosh.ll
index 91aaf89..670a8a3 100644
--- a/llvm/test/CodeGen/DirectX/cosh.ll
+++ b/llvm/test/CodeGen/DirectX/cosh.ll
@@ -1,20 +1,39 @@
-; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
+; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
; Make sure dxil operation function calls for cosh are generated for float and half.
-define noundef float @tan_float(float noundef %a) {
+define noundef float @cosh_float(float noundef %a) {
entry:
; CHECK:call float @dx.op.unary.f32(i32 18, float %{{.*}})
%elt.cosh = call float @llvm.cosh.f32(float %a)
ret float %elt.cosh
}
-define noundef half @tan_half(half noundef %a) {
+define noundef half @cosh_half(half noundef %a) {
entry:
; CHECK:call half @dx.op.unary.f16(i32 18, half %{{.*}})
%elt.cosh = call half @llvm.cosh.f16(half %a)
ret half %elt.cosh
}
+define noundef <4 x float> @cosh_float4(<4 x float> noundef %a) #0 {
+entry:
+ ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
+ ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 18, float [[ee0]])
+ ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
+ ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 18, float [[ee1]])
+ ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
+ ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 18, float [[ee2]])
+ ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
+ ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 18, float [[ee3]])
+ ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3
+ %2 = call <4 x float> @llvm.cosh.v4f32(<4 x float> %a)
+ ret <4 x float> %2
+}
+
declare half @llvm.cosh.f16(half)
declare float @llvm.cosh.f32(float)
+declare <4 x float> @llvm.cosh.v4f32(<4 x float>)
diff --git a/llvm/test/CodeGen/DirectX/exp2.ll b/llvm/test/CodeGen/DirectX/exp2.ll
index b70b87d..6d16af6 100644
--- a/llvm/test/CodeGen/DirectX/exp2.ll
+++ b/llvm/test/CodeGen/DirectX/exp2.ll
@@ -1,31 +1,39 @@
-; RUN: opt -S -dxil-op-lower < %s | FileCheck %s
+; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.7-library %s | FileCheck %s
; Make sure dxil operation function calls for exp2 are generated for float and half.
-; CHECK:call float @dx.op.unary.f32(i32 21, float %{{.*}})
-; CHECK:call half @dx.op.unary.f16(i32 21, half %{{.*}})
-target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64"
-target triple = "dxil-pc-shadermodel6.7-library"
-
-; Function Attrs: noinline nounwind optnone
-define noundef float @exp2_float(float noundef %a) #0 {
+define noundef float @exp2_float(float noundef %a) {
entry:
- %a.addr = alloca float, align 4
- store float %a, ptr %a.addr, align 4
- %0 = load float, ptr %a.addr, align 4
- %elt.exp2 = call float @llvm.exp2.f32(float %0)
+ ; CHECK:call float @dx.op.unary.f32(i32 21, float %{{.*}})
+ %elt.exp2 = call float @llvm.exp2.f32(float %a)
ret float %elt.exp2
}
-; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn
-declare float @llvm.exp2.f32(float) #1
-
-; Function Attrs: noinline nounwind optnone
-define noundef half @exp2_half(half noundef %a) #0 {
+define noundef half @exp2_half(half noundef %a) {
entry:
- %a.addr = alloca half, align 2
- store half %a, ptr %a.addr, align 2
- %0 = load half, ptr %a.addr, align 2
- %elt.exp2 = call half @llvm.exp2.f16(half %0)
+ ; CHECK:call half @dx.op.unary.f16(i32 21, half %{{.*}})
+ %elt.exp2 = call half @llvm.exp2.f16(half %a)
ret half %elt.exp2
}
+
+define noundef <4 x float> @exp2_float4(<4 x float> noundef %a) {
+entry:
+ ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
+ ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 21, float [[ee0]])
+ ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
+ ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 21, float [[ee1]])
+ ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
+ ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 21, float [[ee2]])
+ ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
+ ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 21, float [[ee3]])
+ ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3
+ %2 = call <4 x float> @llvm.exp2.v4f32(<4 x float> %a)
+ ret <4 x float> %2
+}
+
+declare float @llvm.exp2.f32(float)
+declare half @llvm.exp2.f16(half)
+declare <4 x float> @llvm.exp2.v4f32(<4 x float> %a)
diff --git a/llvm/test/CodeGen/DirectX/fabs.ll b/llvm/test/CodeGen/DirectX/fabs.ll
index becbdf8..6d903f1 100644
--- a/llvm/test/CodeGen/DirectX/fabs.ll
+++ b/llvm/test/CodeGen/DirectX/fabs.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
+; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
; Make sure dxil operation function calls for abs are generated for float, half, and double.
@@ -27,6 +27,26 @@ entry:
ret double %elt.abs
}
+; CHECK-LABEL: fabs_float4
+define noundef <4 x float> @fabs_float4(<4 x float> noundef %a) {
+entry:
+ ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
+ ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 6, float [[ee0]])
+ ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
+ ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 6, float [[ee1]])
+ ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
+ ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 6, float [[ee2]])
+ ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
+ ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 6, float [[ee3]])
+ ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3
+ %2 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %a)
+ ret <4 x float> %2
+}
+
declare half @llvm.fabs.f16(half)
declare float @llvm.fabs.f32(float)
declare double @llvm.fabs.f64(double)
+declare <4 x float> @llvm.fabs.v4f32(<4 x float>)
diff --git a/llvm/test/CodeGen/DirectX/floor.ll b/llvm/test/CodeGen/DirectX/floor.ll
index f79f160..eaab398 100644
--- a/llvm/test/CodeGen/DirectX/floor.ll
+++ b/llvm/test/CodeGen/DirectX/floor.ll
@@ -1,20 +1,39 @@
-; RUN: opt -S -passes=dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
+; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
; Make sure dxil operation function calls for floor are generated for float and half.
-define noundef float @floor_float(float noundef %a) #0 {
+define noundef float @floor_float(float noundef %a) {
entry:
; CHECK:call float @dx.op.unary.f32(i32 27, float %{{.*}})
%elt.floor = call float @llvm.floor.f32(float %a)
ret float %elt.floor
}
-define noundef half @floor_half(half noundef %a) #0 {
+define noundef half @floor_half(half noundef %a) {
entry:
; CHECK:call half @dx.op.unary.f16(i32 27, half %{{.*}})
%elt.floor = call half @llvm.floor.f16(half %a)
ret half %elt.floor
}
+define noundef <4 x float> @floor_float4(<4 x float> noundef %a) {
+entry:
+ ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
+ ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 27, float [[ee0]])
+ ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
+ ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 27, float [[ee1]])
+ ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
+ ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 27, float [[ee2]])
+ ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
+ ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 27, float [[ee3]])
+ ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3
+ %2 = call <4 x float> @llvm.floor.v4f32(<4 x float> %a)
+ ret <4 x float> %2
+}
+
declare half @llvm.floor.f16(half)
declare float @llvm.floor.f32(float)
+declare <4 x float> @llvm.floor.v4f32(<4 x float>)
diff --git a/llvm/test/CodeGen/DirectX/isinf.ll b/llvm/test/CodeGen/DirectX/isinf.ll
index 295776b..03a00c4 100644
--- a/llvm/test/CodeGen/DirectX/isinf.ll
+++ b/llvm/test/CodeGen/DirectX/isinf.ll
@@ -1,25 +1,21 @@
; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
; Make sure dxil operation function calls for isinf are generated for float and half.
-; CHECK: call i1 @dx.op.isSpecialFloat.f32(i32 9, float %{{.*}})
-; CHECK: call i1 @dx.op.isSpecialFloat.f16(i32 9, half %{{.*}})
-; Function Attrs: noinline nounwind optnone
-define noundef i1 @isinf_float(float noundef %a) #0 {
+define noundef i1 @isinf_float(float noundef %a) {
entry:
- %a.addr = alloca float, align 4
- store float %a, ptr %a.addr, align 4
- %0 = load float, ptr %a.addr, align 4
- %dx.isinf = call i1 @llvm.dx.isinf.f32(float %0)
+ ; CHECK: call i1 @dx.op.isSpecialFloat.f32(i32 9, float %{{.*}})
+ %dx.isinf = call i1 @llvm.dx.isinf.f32(float %a)
ret i1 %dx.isinf
}
-; Function Attrs: noinline nounwind optnone
-define noundef i1 @isinf_half(half noundef %p0) #0 {
+define noundef i1 @isinf_half(half noundef %a) {
entry:
- %p0.addr = alloca half, align 2
- store half %p0, ptr %p0.addr, align 2
- %0 = load half, ptr %p0.addr, align 2
- %dx.isinf = call i1 @llvm.dx.isinf.f16(half %0)
+ ; CHECK: call i1 @dx.op.isSpecialFloat.f16(i32 9, half %{{.*}})
+ %dx.isinf = call i1 @llvm.dx.isinf.f16(half %a)
ret i1 %dx.isinf
}
+
+
+declare i1 @llvm.dx.isinf.f16(half)
+declare i1 @llvm.dx.isinf.f32(float)
diff --git a/llvm/test/CodeGen/DirectX/llc-pipeline.ll b/llvm/test/CodeGen/DirectX/llc-pipeline.ll
new file mode 100644
index 0000000..52bd891
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/llc-pipeline.ll
@@ -0,0 +1,24 @@
+; RUN: llc -mtriple=dxil-pc-shadermodel6.3-library -debug-pass=Structure < %s -o /dev/null 2>&1 | \
+; RUN: grep -v "Verify generated machine code" | FileCheck %s
+
+; REQUIRES: asserts
+
+; CHECK-LABEL: Pass Arguments:
+; CHECK-NEXT: Target Library Information
+; CHECK-NEXT: ModulePass Manager
+; CHECK-NEXT: DXIL Intrinsic Expansion
+; CHECK-NEXT: FunctionPass Manager
+; CHECK-NEXT: Dominator Tree Construction
+; CHECK-NEXT: Scalarize vector operations
+; CHECK-NEXT: DXIL Intrinsic Expansion
+; CHECK-NEXT: DXIL Resource analysis
+; CHECK-NEXT: DXIL Op Lowering
+; CHECK-NEXT: DXIL Finalize Linkage
+; CHECK-NEXT: DXIL resource Information
+; CHECK-NEXT: DXIL Shader Flag Analysis
+; CHECK-NEXT: DXIL Module Metadata analysis
+; CHECK-NEXT: DXIL Translate Metadata
+; CHECK-NEXT: DXIL Prepare Module
+; CHECK-NEXT: DXIL Metadata Pretty Printer
+; CHECK-NEXT: Print Module IR
+
diff --git a/llvm/test/CodeGen/DirectX/reversebits.ll b/llvm/test/CodeGen/DirectX/reversebits.ll
index 1ade57b..b5530d0 100644
--- a/llvm/test/CodeGen/DirectX/reversebits.ll
+++ b/llvm/test/CodeGen/DirectX/reversebits.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
+; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
; Make sure dxil operation function calls for reversebits are generated for all integer types.
@@ -26,6 +26,25 @@ entry:
ret i64 %elt.bitreverse
}
+define noundef <4 x i32> @round_int324(<4 x i32> noundef %a) #0 {
+entry:
+ ; CHECK: [[ee0:%.*]] = extractelement <4 x i32> %a, i64 0
+ ; CHECK: [[ie0:%.*]] = call i32 @dx.op.unary.i32(i32 30, i32 [[ee0]])
+ ; CHECK: [[ee1:%.*]] = extractelement <4 x i32> %a, i64 1
+ ; CHECK: [[ie1:%.*]] = call i32 @dx.op.unary.i32(i32 30, i32 [[ee1]])
+ ; CHECK: [[ee2:%.*]] = extractelement <4 x i32> %a, i64 2
+ ; CHECK: [[ie2:%.*]] = call i32 @dx.op.unary.i32(i32 30, i32 [[ee2]])
+ ; CHECK: [[ee3:%.*]] = extractelement <4 x i32> %a, i64 3
+ ; CHECK: [[ie3:%.*]] = call i32 @dx.op.unary.i32(i32 30, i32 [[ee3]])
+ ; CHECK: insertelement <4 x i32> poison, i32 [[ie0]], i64 0
+ ; CHECK: insertelement <4 x i32> %{{.*}}, i32 [[ie1]], i64 1
+ ; CHECK: insertelement <4 x i32> %{{.*}}, i32 [[ie2]], i64 2
+ ; CHECK: insertelement <4 x i32> %{{.*}}, i32 [[ie3]], i64 3
+ %2 = call <4 x i32> @llvm.bitreverse.v4i32(<4 x i32> %a)
+ ret <4 x i32> %2
+}
+
declare i16 @llvm.bitreverse.i16(i16)
declare i32 @llvm.bitreverse.i32(i32)
declare i64 @llvm.bitreverse.i64(i64)
+declare <4 x i32> @llvm.bitreverse.v4i32(<4 x i32>)
diff --git a/llvm/test/CodeGen/DirectX/round.ll b/llvm/test/CodeGen/DirectX/round.ll
index db953fb..b08cbac 100644
--- a/llvm/test/CodeGen/DirectX/round.ll
+++ b/llvm/test/CodeGen/DirectX/round.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
+; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
; Make sure dxil operation function calls for round are generated for float and half.
@@ -18,5 +18,25 @@ entry:
ret float %elt.roundeven
}
+define noundef <4 x float> @round_float4(<4 x float> noundef %a) #0 {
+entry:
+ ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
+ ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 26, float [[ee0]])
+ ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
+ ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 26, float [[ee1]])
+ ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
+ ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 26, float [[ee2]])
+ ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
+ ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 26, float [[ee3]])
+ ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3
+ %2 = call <4 x float> @llvm.roundeven.v4f32(<4 x float> %a)
+ ret <4 x float> %2
+}
+
+
declare half @llvm.roundeven.f16(half)
declare float @llvm.roundeven.f32(float)
+declare <4 x float> @llvm.roundeven.v4f32(<4 x float>)
diff --git a/llvm/test/CodeGen/DirectX/saturate.ll b/llvm/test/CodeGen/DirectX/saturate.ll
index a855735..404cab7 100644
--- a/llvm/test/CodeGen/DirectX/saturate.ll
+++ b/llvm/test/CodeGen/DirectX/saturate.ll
@@ -2,7 +2,7 @@
; Make sure the intrinsic dx.saturate is to appropriate DXIL op for half/float/double data types.
; CHECK-LABEL: test_saturate_half
-define noundef half @test_saturate_half(half noundef %p0) #0 {
+define noundef half @test_saturate_half(half noundef %p0) {
entry:
; CHECK: call half @dx.op.unary.f16(i32 7, half %p0)
%hlsl.saturate = call half @llvm.dx.saturate.f16(half %p0)
@@ -10,11 +10,8 @@ entry:
ret half %hlsl.saturate
}
-; Function Attrs: nocallback nofree nosync nounwind willreturn
-declare half @llvm.dx.saturate.f16(half) #1
-
; CHECK-LABEL: test_saturate_float
-define noundef float @test_saturate_float(float noundef %p0) #0 {
+define noundef float @test_saturate_float(float noundef %p0) {
entry:
; CHECK: call float @dx.op.unary.f32(i32 7, float %p0)
%hlsl.saturate = call float @llvm.dx.saturate.f32(float %p0)
@@ -22,11 +19,8 @@ entry:
ret float %hlsl.saturate
}
-; Function Attrs: nocallback nofree nosync nounwind willreturn
-declare float @llvm.dx.saturate.f32(float) #1
-
; CHECK-LABEL: test_saturate_double
-define noundef double @test_saturate_double(double noundef %p0) #0 {
+define noundef double @test_saturate_double(double noundef %p0) {
entry:
; CHECK: call double @dx.op.unary.f64(i32 7, double %p0)
%hlsl.saturate = call double @llvm.dx.saturate.f64(double %p0)
@@ -34,6 +28,7 @@ entry:
ret double %hlsl.saturate
}
-; Function Attrs: nocallback nofree nosync nounwind willreturn
-declare double @llvm.dx.saturate.f64(double) #1
+declare half @llvm.dx.saturate.f16(half)
+declare float @llvm.dx.saturate.f32(float)
+declare double @llvm.dx.saturate.f64(double)
diff --git a/llvm/test/CodeGen/DirectX/scalar-store.ll b/llvm/test/CodeGen/DirectX/scalar-store.ll
new file mode 100644
index 0000000..b970a28
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/scalar-store.ll
@@ -0,0 +1,17 @@
+; RUN: opt -S -scalarizer -scalarize-load-store -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
+; RUN: llc %s -mtriple=dxil-pc-shadermodel6.3-library --filetype=asm -o - | FileCheck %s
+
+@"sharedData" = local_unnamed_addr addrspace(3) global [2 x <3 x float>] zeroinitializer, align 16
+; CHECK-LABEL: store_test
+define void @store_test () local_unnamed_addr {
+ ; CHECK: store float 1.000000e+00, ptr addrspace(3) {{.*}}, align {{.*}}
+ ; CHECK: store float 2.000000e+00, ptr addrspace(3) {{.*}}, align {{.*}}
+ ; CHECK: store float 3.000000e+00, ptr addrspace(3) {{.*}}, align {{.*}}
+ ; CHECK: store float 2.000000e+00, ptr addrspace(3) {{.*}}, align {{.*}}
+ ; CHECK: store float 4.000000e+00, ptr addrspace(3) {{.*}}, align {{.*}}
+ ; CHECK: store float 6.000000e+00, ptr addrspace(3) {{.*}}, align {{.*}}
+
+ store <3 x float> <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, ptr addrspace(3) @"sharedData", align 16
+ store <3 x float> <float 2.000000e+00, float 4.000000e+00, float 6.000000e+00>, ptr addrspace(3) getelementptr inbounds (i8, ptr addrspace(3) @"sharedData", i32 16), align 16
+ ret void
+ }
diff --git a/llvm/test/CodeGen/DirectX/scalarize-two-calls.ll b/llvm/test/CodeGen/DirectX/scalarize-two-calls.ll
new file mode 100644
index 0000000..a14c1de
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/scalarize-two-calls.ll
@@ -0,0 +1,25 @@
+; RUN: llc %s -mtriple=dxil-pc-shadermodel6.3-library --filetype=asm -o - | FileCheck %s
+
+; CHECK: target triple = "dxilv1.3-pc-shadermodel6.3-library"
+; CHECK-LABEL: cos_sin_float_test
+define noundef <4 x float> @cos_sin_float_test(<4 x float> noundef %a) {
+ ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
+ ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 13, float [[ee0]])
+ ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
+ ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 13, float [[ee1]])
+ ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
+ ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 13, float [[ee2]])
+ ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
+ ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 13, float [[ee3]])
+ ; CHECK: [[ie4:%.*]] = call float @dx.op.unary.f32(i32 12, float [[ie0]])
+ ; CHECK: [[ie5:%.*]] = call float @dx.op.unary.f32(i32 12, float [[ie1]])
+ ; CHECK: [[ie6:%.*]] = call float @dx.op.unary.f32(i32 12, float [[ie2]])
+ ; CHECK: [[ie7:%.*]] = call float @dx.op.unary.f32(i32 12, float [[ie3]])
+ ; CHECK: insertelement <4 x float> poison, float [[ie4]], i64 0
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie5]], i64 1
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie6]], i64 2
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie7]], i64 3
+ %2 = tail call <4 x float> @llvm.sin.v4f32(<4 x float> %a)
+ %3 = tail call <4 x float> @llvm.cos.v4f32(<4 x float> %2)
+ ret <4 x float> %3
+}
diff --git a/llvm/test/CodeGen/DirectX/sin.ll b/llvm/test/CodeGen/DirectX/sin.ll
index f309a36..ac6b217 100644
--- a/llvm/test/CodeGen/DirectX/sin.ll
+++ b/llvm/test/CodeGen/DirectX/sin.ll
@@ -1,25 +1,39 @@
-; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
+; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
; Make sure dxil operation function calls for sin are generated for float and half.
-; CHECK:call float @dx.op.unary.f32(i32 13, float %{{.*}})
-; CHECK:call half @dx.op.unary.f16(i32 13, half %{{.*}})
-; Function Attrs: noinline nounwind optnone
-define noundef float @sin_float(float noundef %a) #0 {
+define noundef float @sin_float(float noundef %a) {
entry:
- %a.addr = alloca float, align 4
- store float %a, ptr %a.addr, align 4
- %0 = load float, ptr %a.addr, align 4
- %1 = call float @llvm.sin.f32(float %0)
+ ; CHECK:call float @dx.op.unary.f32(i32 13, float %{{.*}})
+ %1 = call float @llvm.sin.f32(float %a)
ret float %1
}
-; Function Attrs: noinline nounwind optnone
-define noundef half @sin_half(half noundef %a) #0 {
+define noundef half @sin_half(half noundef %a) {
entry:
- %a.addr = alloca half, align 2
- store half %a, ptr %a.addr, align 2
- %0 = load half, ptr %a.addr, align 2
- %1 = call half @llvm.sin.f16(half %0)
+ ; CHECK:call half @dx.op.unary.f16(i32 13, half %{{.*}})
+ %1 = call half @llvm.sin.f16(half %a)
ret half %1
}
+
+define noundef <4 x float> @sin_float4(<4 x float> noundef %a) {
+entry:
+ ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
+ ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 13, float [[ee0]])
+ ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
+ ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 13, float [[ee1]])
+ ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
+ ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 13, float [[ee2]])
+ ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
+ ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 13, float [[ee3]])
+ ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3
+ %2 = call <4 x float> @llvm.sin.v4f32(<4 x float> %a)
+ ret <4 x float> %2
+}
+
+declare half @llvm.sin.f16(half)
+declare float @llvm.sin.f32(float)
+declare <4 x float> @llvm.sin.v4f32(<4 x float>)
diff --git a/llvm/test/CodeGen/DirectX/sinh.ll b/llvm/test/CodeGen/DirectX/sinh.ll
index d4d3eda..deba726e 100644
--- a/llvm/test/CodeGen/DirectX/sinh.ll
+++ b/llvm/test/CodeGen/DirectX/sinh.ll
@@ -1,20 +1,39 @@
-; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
+; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
; Make sure dxil operation function calls for sinh are generated for float and half.
-define noundef float @tan_float(float noundef %a) {
+define noundef float @sinh_float(float noundef %a) {
entry:
; CHECK:call float @dx.op.unary.f32(i32 19, float %{{.*}})
%elt.sinh = call float @llvm.sinh.f32(float %a)
ret float %elt.sinh
}
-define noundef half @tan_half(half noundef %a) {
+define noundef half @sinh_half(half noundef %a) {
entry:
; CHECK:call half @dx.op.unary.f16(i32 19, half %{{.*}})
%elt.sinh = call half @llvm.sinh.f16(half %a)
ret half %elt.sinh
}
+define noundef <4 x float> @sinh_float4(<4 x float> noundef %a) {
+entry:
+ ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
+ ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 19, float [[ee0]])
+ ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
+ ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 19, float [[ee1]])
+ ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
+ ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 19, float [[ee2]])
+ ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
+ ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 19, float [[ee3]])
+ ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3
+ %2 = call <4 x float> @llvm.sinh.v4f32(<4 x float> %a)
+ ret <4 x float> %2
+}
+
declare half @llvm.sinh.f16(half)
declare float @llvm.sinh.f32(float)
+declare <4 x float> @llvm.sinh.v4f32(<4 x float>)
diff --git a/llvm/test/CodeGen/DirectX/sqrt.ll b/llvm/test/CodeGen/DirectX/sqrt.ll
index 792fbc8..e2955b4 100644
--- a/llvm/test/CodeGen/DirectX/sqrt.ll
+++ b/llvm/test/CodeGen/DirectX/sqrt.ll
@@ -1,20 +1,39 @@
-; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
+; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
; Make sure dxil operation function calls for sqrt are generated for float and half.
-define noundef float @sqrt_float(float noundef %a) #0 {
+define noundef float @sqrt_float(float noundef %a) {
entry:
; CHECK:call float @dx.op.unary.f32(i32 24, float %{{.*}})
%elt.sqrt = call float @llvm.sqrt.f32(float %a)
ret float %elt.sqrt
}
-define noundef half @sqrt_half(half noundef %a) #0 {
+define noundef half @sqrt_half(half noundef %a) {
entry:
; CHECK:call half @dx.op.unary.f16(i32 24, half %{{.*}})
%elt.sqrt = call half @llvm.sqrt.f16(half %a)
ret half %elt.sqrt
}
+define noundef <4 x float> @sqrt_float4(<4 x float> noundef %a) {
+entry:
+ ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
+ ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 24, float [[ee0]])
+ ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
+ ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 24, float [[ee1]])
+ ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
+ ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 24, float [[ee2]])
+ ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
+ ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 24, float [[ee3]])
+ ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3
+ %2 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %a)
+ ret <4 x float> %2
+}
+
declare half @llvm.sqrt.f16(half)
declare float @llvm.sqrt.f32(float)
+declare <4 x float> @llvm.sqrt.v4f32(<4 x float>)
diff --git a/llvm/test/CodeGen/DirectX/step.ll b/llvm/test/CodeGen/DirectX/step.ll
new file mode 100644
index 0000000..0393c15
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/step.ll
@@ -0,0 +1,78 @@
+; RUN: opt -S -dxil-intrinsic-expansion < %s | FileCheck %s --check-prefix=CHECK
+; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library < %s | FileCheck %s --check-prefix=CHECK
+
+; Make sure dxil operation function calls for step are generated for half/float.
+
+declare half @llvm.dx.step.f16(half, half)
+declare <2 x half> @llvm.dx.step.v2f16(<2 x half>, <2 x half>)
+declare <3 x half> @llvm.dx.step.v3f16(<3 x half>, <3 x half>)
+declare <4 x half> @llvm.dx.step.v4f16(<4 x half>, <4 x half>)
+
+declare float @llvm.dx.step.f32(float, float)
+declare <2 x float> @llvm.dx.step.v2f32(<2 x float>, <2 x float>)
+declare <3 x float> @llvm.dx.step.v3f32(<3 x float>, <3 x float>)
+declare <4 x float> @llvm.dx.step.v4f32(<4 x float>, <4 x float>)
+
+define noundef half @test_step_half(half noundef %p0, half noundef %p1) {
+entry:
+ ; CHECK: %0 = fcmp olt half %p1, %p0
+ ; CHECK: %1 = select i1 %0, half 0xH0000, half 0xH3C00
+ %hlsl.step = call half @llvm.dx.step.f16(half %p0, half %p1)
+ ret half %hlsl.step
+}
+
+define noundef <2 x half> @test_step_half2(<2 x half> noundef %p0, <2 x half> noundef %p1) {
+entry:
+ ; CHECK: %0 = fcmp olt <2 x half> %p1, %p0
+ ; CHECK: %1 = select <2 x i1> %0, <2 x half> zeroinitializer, <2 x half> <half 0xH3C00, half 0xH3C00>
+ %hlsl.step = call <2 x half> @llvm.dx.step.v2f16(<2 x half> %p0, <2 x half> %p1)
+ ret <2 x half> %hlsl.step
+}
+
+define noundef <3 x half> @test_step_half3(<3 x half> noundef %p0, <3 x half> noundef %p1) {
+entry:
+ ; CHECK: %0 = fcmp olt <3 x half> %p1, %p0
+ ; CHECK: %1 = select <3 x i1> %0, <3 x half> zeroinitializer, <3 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00>
+ %hlsl.step = call <3 x half> @llvm.dx.step.v3f16(<3 x half> %p0, <3 x half> %p1)
+ ret <3 x half> %hlsl.step
+}
+
+define noundef <4 x half> @test_step_half4(<4 x half> noundef %p0, <4 x half> noundef %p1) {
+entry:
+ ; CHECK: %0 = fcmp olt <4 x half> %p1, %p0
+ ; CHECK: %1 = select <4 x i1> %0, <4 x half> zeroinitializer, <4 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>
+ %hlsl.step = call <4 x half> @llvm.dx.step.v4f16(<4 x half> %p0, <4 x half> %p1)
+ ret <4 x half> %hlsl.step
+}
+
+define noundef float @test_step_float(float noundef %p0, float noundef %p1) {
+entry:
+ ; CHECK: %0 = fcmp olt float %p1, %p0
+ ; CHECK: %1 = select i1 %0, float 0.000000e+00, float 1.000000e+00
+ %hlsl.step = call float @llvm.dx.step.f32(float %p0, float %p1)
+ ret float %hlsl.step
+}
+
+define noundef <2 x float> @test_step_float2(<2 x float> noundef %p0, <2 x float> noundef %p1) {
+entry:
+ ; CHECK: %0 = fcmp olt <2 x float> %p1, %p0
+ ; CHECK: %1 = select <2 x i1> %0, <2 x float> zeroinitializer, <2 x float> <float 1.000000e+00, float 1.000000e+00>
+ %hlsl.step = call <2 x float> @llvm.dx.step.v2f32(<2 x float> %p0, <2 x float> %p1)
+ ret <2 x float> %hlsl.step
+}
+
+define noundef <3 x float> @test_step_float3(<3 x float> noundef %p0, <3 x float> noundef %p1) {
+entry:
+ ; CHECK: %0 = fcmp olt <3 x float> %p1, %p0
+ ; CHECK: %1 = select <3 x i1> %0, <3 x float> zeroinitializer, <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
+ %hlsl.step = call <3 x float> @llvm.dx.step.v3f32(<3 x float> %p0, <3 x float> %p1)
+ ret <3 x float> %hlsl.step
+}
+
+define noundef <4 x float> @test_step_float4(<4 x float> noundef %p0, <4 x float> noundef %p1) {
+entry:
+ ; CHECK: %0 = fcmp olt <4 x float> %p1, %p0
+ ; CHECK: %1 = select <4 x i1> %0, <4 x float> zeroinitializer, <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
+ %hlsl.step = call <4 x float> @llvm.dx.step.v4f32(<4 x float> %p0, <4 x float> %p1)
+ ret <4 x float> %hlsl.step
+}
diff --git a/llvm/test/CodeGen/DirectX/tan.ll b/llvm/test/CodeGen/DirectX/tan.ll
index 6f7beb5..cf6965a 100644
--- a/llvm/test/CodeGen/DirectX/tan.ll
+++ b/llvm/test/CodeGen/DirectX/tan.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
+; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
; Make sure dxil operation function calls for tan are generated for float and half.
@@ -16,5 +16,24 @@ entry:
ret half %elt.tan
}
+define noundef <4 x float> @tan_float4(<4 x float> noundef %a) #0 {
+entry:
+ ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
+ ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 14, float [[ee0]])
+ ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
+ ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 14, float [[ee1]])
+ ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
+ ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 14, float [[ee2]])
+ ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
+ ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 14, float [[ee3]])
+ ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3
+ %2 = call <4 x float> @llvm.tan.v4f32(<4 x float> %a)
+ ret <4 x float> %2
+}
+
declare half @llvm.tan.f16(half)
declare float @llvm.tan.f32(float)
+declare <4 x float> @llvm.tan.v4f32(<4 x float>)
diff --git a/llvm/test/CodeGen/DirectX/tanh.ll b/llvm/test/CodeGen/DirectX/tanh.ll
index e6642d9..54ec6f2 100644
--- a/llvm/test/CodeGen/DirectX/tanh.ll
+++ b/llvm/test/CodeGen/DirectX/tanh.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
+; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
; Make sure dxil operation function calls for tanh are generated for float and half.
@@ -16,5 +16,24 @@ entry:
ret half %elt.tanh
}
+define noundef <4 x float> @tanh_float4(<4 x float> noundef %a) #0 {
+entry:
+ ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
+ ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 20, float [[ee0]])
+ ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
+ ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 20, float [[ee1]])
+ ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
+ ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 20, float [[ee2]])
+ ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
+ ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 20, float [[ee3]])
+ ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3
+ %2 = call <4 x float> @llvm.tanh.v4f32(<4 x float> %a)
+ ret <4 x float> %2
+}
+
declare half @llvm.tanh.f16(half)
declare float @llvm.tanh.f32(float)
+declare <4 x float> @llvm.tanh.v4f32(<4 x float>)
diff --git a/llvm/test/CodeGen/DirectX/trunc.ll b/llvm/test/CodeGen/DirectX/trunc.ll
index f00b737..6d9c222 100644
--- a/llvm/test/CodeGen/DirectX/trunc.ll
+++ b/llvm/test/CodeGen/DirectX/trunc.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
+; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
; Make sure dxil operation function calls for trunc are generated for float and half.
@@ -16,5 +16,24 @@ entry:
ret half %elt.trunc
}
+define noundef <4 x float> @trunc_float4(<4 x float> noundef %a) #0 {
+entry:
+ ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
+ ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 29, float [[ee0]])
+ ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
+ ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 29, float [[ee1]])
+ ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
+ ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 29, float [[ee2]])
+ ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
+ ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 29, float [[ee3]])
+ ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3
+ %2 = call <4 x float> @llvm.trunc.v4f32(<4 x float> %a)
+ ret <4 x float> %2
+}
+
declare half @llvm.trunc.f16(half)
declare float @llvm.trunc.f32(float)
+declare <4 x float> @llvm.trunc.v4f32(<4 x float>)
diff --git a/llvm/test/CodeGen/LoongArch/fp16-promote.ll b/llvm/test/CodeGen/LoongArch/fp16-promote.ll
index 75f920b..03965ac 100644
--- a/llvm/test/CodeGen/LoongArch/fp16-promote.ll
+++ b/llvm/test/CodeGen/LoongArch/fp16-promote.ll
@@ -126,42 +126,40 @@ define void @test_fptrunc_double(double %d, ptr %p) nounwind {
define half @test_fadd_reg(half %a, half %b) nounwind {
; LA32-LABEL: test_fadd_reg:
; LA32: # %bb.0:
-; LA32-NEXT: addi.w $sp, $sp, -32
-; LA32-NEXT: st.w $ra, $sp, 28 # 4-byte Folded Spill
-; LA32-NEXT: fst.d $fs0, $sp, 16 # 8-byte Folded Spill
-; LA32-NEXT: fst.d $fs1, $sp, 8 # 8-byte Folded Spill
+; LA32-NEXT: addi.w $sp, $sp, -16
+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT: st.w $fp, $sp, 8 # 4-byte Folded Spill
+; LA32-NEXT: fst.d $fs0, $sp, 0 # 8-byte Folded Spill
+; LA32-NEXT: move $fp, $a0
+; LA32-NEXT: move $a0, $a1
+; LA32-NEXT: bl %plt(__gnu_h2f_ieee)
; LA32-NEXT: fmov.s $fs0, $fa0
-; LA32-NEXT: fmov.s $fa0, $fa1
-; LA32-NEXT: bl %plt(__gnu_f2h_ieee)
+; LA32-NEXT: move $a0, $fp
; LA32-NEXT: bl %plt(__gnu_h2f_ieee)
-; LA32-NEXT: fmov.s $fs1, $fa0
-; LA32-NEXT: fmov.s $fa0, $fs0
+; LA32-NEXT: fadd.s $fa0, $fa0, $fs0
; LA32-NEXT: bl %plt(__gnu_f2h_ieee)
-; LA32-NEXT: bl %plt(__gnu_h2f_ieee)
-; LA32-NEXT: fadd.s $fa0, $fa0, $fs1
-; LA32-NEXT: fld.d $fs1, $sp, 8 # 8-byte Folded Reload
-; LA32-NEXT: fld.d $fs0, $sp, 16 # 8-byte Folded Reload
-; LA32-NEXT: ld.w $ra, $sp, 28 # 4-byte Folded Reload
-; LA32-NEXT: addi.w $sp, $sp, 32
+; LA32-NEXT: fld.d $fs0, $sp, 0 # 8-byte Folded Reload
+; LA32-NEXT: ld.w $fp, $sp, 8 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 16
; LA32-NEXT: ret
;
; LA64-LABEL: test_fadd_reg:
; LA64: # %bb.0:
; LA64-NEXT: addi.d $sp, $sp, -32
; LA64-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill
-; LA64-NEXT: fst.d $fs0, $sp, 16 # 8-byte Folded Spill
-; LA64-NEXT: fst.d $fs1, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill
+; LA64-NEXT: fst.d $fs0, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT: move $fp, $a0
+; LA64-NEXT: move $a0, $a1
+; LA64-NEXT: bl %plt(__gnu_h2f_ieee)
; LA64-NEXT: fmov.s $fs0, $fa0
-; LA64-NEXT: fmov.s $fa0, $fa1
-; LA64-NEXT: bl %plt(__gnu_f2h_ieee)
+; LA64-NEXT: move $a0, $fp
; LA64-NEXT: bl %plt(__gnu_h2f_ieee)
-; LA64-NEXT: fmov.s $fs1, $fa0
-; LA64-NEXT: fmov.s $fa0, $fs0
+; LA64-NEXT: fadd.s $fa0, $fa0, $fs0
; LA64-NEXT: bl %plt(__gnu_f2h_ieee)
-; LA64-NEXT: bl %plt(__gnu_h2f_ieee)
-; LA64-NEXT: fadd.s $fa0, $fa0, $fs1
-; LA64-NEXT: fld.d $fs1, $sp, 8 # 8-byte Folded Reload
-; LA64-NEXT: fld.d $fs0, $sp, 16 # 8-byte Folded Reload
+; LA64-NEXT: fld.d $fs0, $sp, 8 # 8-byte Folded Reload
+; LA64-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload
; LA64-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload
; LA64-NEXT: addi.d $sp, $sp, 32
; LA64-NEXT: ret
@@ -177,16 +175,16 @@ define void @test_fadd_mem(ptr %p, ptr %q) nounwind {
; LA32-NEXT: st.w $fp, $sp, 24 # 4-byte Folded Spill
; LA32-NEXT: st.w $s0, $sp, 20 # 4-byte Folded Spill
; LA32-NEXT: fst.d $fs0, $sp, 8 # 8-byte Folded Spill
-; LA32-NEXT: move $fp, $a1
-; LA32-NEXT: move $s0, $a0
-; LA32-NEXT: ld.hu $a0, $a0, 0
+; LA32-NEXT: move $fp, $a0
+; LA32-NEXT: ld.hu $s0, $a0, 0
+; LA32-NEXT: ld.hu $a0, $a1, 0
; LA32-NEXT: bl %plt(__gnu_h2f_ieee)
; LA32-NEXT: fmov.s $fs0, $fa0
-; LA32-NEXT: ld.hu $a0, $fp, 0
+; LA32-NEXT: move $a0, $s0
; LA32-NEXT: bl %plt(__gnu_h2f_ieee)
-; LA32-NEXT: fadd.s $fa0, $fs0, $fa0
+; LA32-NEXT: fadd.s $fa0, $fa0, $fs0
; LA32-NEXT: bl %plt(__gnu_f2h_ieee)
-; LA32-NEXT: st.h $a0, $s0, 0
+; LA32-NEXT: st.h $a0, $fp, 0
; LA32-NEXT: fld.d $fs0, $sp, 8 # 8-byte Folded Reload
; LA32-NEXT: ld.w $s0, $sp, 20 # 4-byte Folded Reload
; LA32-NEXT: ld.w $fp, $sp, 24 # 4-byte Folded Reload
@@ -201,16 +199,16 @@ define void @test_fadd_mem(ptr %p, ptr %q) nounwind {
; LA64-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill
; LA64-NEXT: st.d $s0, $sp, 8 # 8-byte Folded Spill
; LA64-NEXT: fst.d $fs0, $sp, 0 # 8-byte Folded Spill
-; LA64-NEXT: move $fp, $a1
-; LA64-NEXT: move $s0, $a0
-; LA64-NEXT: ld.hu $a0, $a0, 0
+; LA64-NEXT: move $fp, $a0
+; LA64-NEXT: ld.hu $s0, $a0, 0
+; LA64-NEXT: ld.hu $a0, $a1, 0
; LA64-NEXT: bl %plt(__gnu_h2f_ieee)
; LA64-NEXT: fmov.s $fs0, $fa0
-; LA64-NEXT: ld.hu $a0, $fp, 0
+; LA64-NEXT: move $a0, $s0
; LA64-NEXT: bl %plt(__gnu_h2f_ieee)
-; LA64-NEXT: fadd.s $fa0, $fs0, $fa0
+; LA64-NEXT: fadd.s $fa0, $fa0, $fs0
; LA64-NEXT: bl %plt(__gnu_f2h_ieee)
-; LA64-NEXT: st.h $a0, $s0, 0
+; LA64-NEXT: st.h $a0, $fp, 0
; LA64-NEXT: fld.d $fs0, $sp, 0 # 8-byte Folded Reload
; LA64-NEXT: ld.d $s0, $sp, 8 # 8-byte Folded Reload
; LA64-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload
@@ -227,42 +225,40 @@ define void @test_fadd_mem(ptr %p, ptr %q) nounwind {
define half @test_fmul_reg(half %a, half %b) nounwind {
; LA32-LABEL: test_fmul_reg:
; LA32: # %bb.0:
-; LA32-NEXT: addi.w $sp, $sp, -32
-; LA32-NEXT: st.w $ra, $sp, 28 # 4-byte Folded Spill
-; LA32-NEXT: fst.d $fs0, $sp, 16 # 8-byte Folded Spill
-; LA32-NEXT: fst.d $fs1, $sp, 8 # 8-byte Folded Spill
+; LA32-NEXT: addi.w $sp, $sp, -16
+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT: st.w $fp, $sp, 8 # 4-byte Folded Spill
+; LA32-NEXT: fst.d $fs0, $sp, 0 # 8-byte Folded Spill
+; LA32-NEXT: move $fp, $a0
+; LA32-NEXT: move $a0, $a1
+; LA32-NEXT: bl %plt(__gnu_h2f_ieee)
; LA32-NEXT: fmov.s $fs0, $fa0
-; LA32-NEXT: fmov.s $fa0, $fa1
-; LA32-NEXT: bl %plt(__gnu_f2h_ieee)
+; LA32-NEXT: move $a0, $fp
; LA32-NEXT: bl %plt(__gnu_h2f_ieee)
-; LA32-NEXT: fmov.s $fs1, $fa0
-; LA32-NEXT: fmov.s $fa0, $fs0
+; LA32-NEXT: fmul.s $fa0, $fa0, $fs0
; LA32-NEXT: bl %plt(__gnu_f2h_ieee)
-; LA32-NEXT: bl %plt(__gnu_h2f_ieee)
-; LA32-NEXT: fmul.s $fa0, $fa0, $fs1
-; LA32-NEXT: fld.d $fs1, $sp, 8 # 8-byte Folded Reload
-; LA32-NEXT: fld.d $fs0, $sp, 16 # 8-byte Folded Reload
-; LA32-NEXT: ld.w $ra, $sp, 28 # 4-byte Folded Reload
-; LA32-NEXT: addi.w $sp, $sp, 32
+; LA32-NEXT: fld.d $fs0, $sp, 0 # 8-byte Folded Reload
+; LA32-NEXT: ld.w $fp, $sp, 8 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 16
; LA32-NEXT: ret
;
; LA64-LABEL: test_fmul_reg:
; LA64: # %bb.0:
; LA64-NEXT: addi.d $sp, $sp, -32
; LA64-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill
-; LA64-NEXT: fst.d $fs0, $sp, 16 # 8-byte Folded Spill
-; LA64-NEXT: fst.d $fs1, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill
+; LA64-NEXT: fst.d $fs0, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT: move $fp, $a0
+; LA64-NEXT: move $a0, $a1
+; LA64-NEXT: bl %plt(__gnu_h2f_ieee)
; LA64-NEXT: fmov.s $fs0, $fa0
-; LA64-NEXT: fmov.s $fa0, $fa1
-; LA64-NEXT: bl %plt(__gnu_f2h_ieee)
+; LA64-NEXT: move $a0, $fp
; LA64-NEXT: bl %plt(__gnu_h2f_ieee)
-; LA64-NEXT: fmov.s $fs1, $fa0
-; LA64-NEXT: fmov.s $fa0, $fs0
+; LA64-NEXT: fmul.s $fa0, $fa0, $fs0
; LA64-NEXT: bl %plt(__gnu_f2h_ieee)
-; LA64-NEXT: bl %plt(__gnu_h2f_ieee)
-; LA64-NEXT: fmul.s $fa0, $fa0, $fs1
-; LA64-NEXT: fld.d $fs1, $sp, 8 # 8-byte Folded Reload
-; LA64-NEXT: fld.d $fs0, $sp, 16 # 8-byte Folded Reload
+; LA64-NEXT: fld.d $fs0, $sp, 8 # 8-byte Folded Reload
+; LA64-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload
; LA64-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload
; LA64-NEXT: addi.d $sp, $sp, 32
; LA64-NEXT: ret
@@ -278,16 +274,16 @@ define void @test_fmul_mem(ptr %p, ptr %q) nounwind {
; LA32-NEXT: st.w $fp, $sp, 24 # 4-byte Folded Spill
; LA32-NEXT: st.w $s0, $sp, 20 # 4-byte Folded Spill
; LA32-NEXT: fst.d $fs0, $sp, 8 # 8-byte Folded Spill
-; LA32-NEXT: move $fp, $a1
-; LA32-NEXT: move $s0, $a0
-; LA32-NEXT: ld.hu $a0, $a0, 0
+; LA32-NEXT: move $fp, $a0
+; LA32-NEXT: ld.hu $s0, $a0, 0
+; LA32-NEXT: ld.hu $a0, $a1, 0
; LA32-NEXT: bl %plt(__gnu_h2f_ieee)
; LA32-NEXT: fmov.s $fs0, $fa0
-; LA32-NEXT: ld.hu $a0, $fp, 0
+; LA32-NEXT: move $a0, $s0
; LA32-NEXT: bl %plt(__gnu_h2f_ieee)
-; LA32-NEXT: fmul.s $fa0, $fs0, $fa0
+; LA32-NEXT: fmul.s $fa0, $fa0, $fs0
; LA32-NEXT: bl %plt(__gnu_f2h_ieee)
-; LA32-NEXT: st.h $a0, $s0, 0
+; LA32-NEXT: st.h $a0, $fp, 0
; LA32-NEXT: fld.d $fs0, $sp, 8 # 8-byte Folded Reload
; LA32-NEXT: ld.w $s0, $sp, 20 # 4-byte Folded Reload
; LA32-NEXT: ld.w $fp, $sp, 24 # 4-byte Folded Reload
@@ -302,16 +298,16 @@ define void @test_fmul_mem(ptr %p, ptr %q) nounwind {
; LA64-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill
; LA64-NEXT: st.d $s0, $sp, 8 # 8-byte Folded Spill
; LA64-NEXT: fst.d $fs0, $sp, 0 # 8-byte Folded Spill
-; LA64-NEXT: move $fp, $a1
-; LA64-NEXT: move $s0, $a0
-; LA64-NEXT: ld.hu $a0, $a0, 0
+; LA64-NEXT: move $fp, $a0
+; LA64-NEXT: ld.hu $s0, $a0, 0
+; LA64-NEXT: ld.hu $a0, $a1, 0
; LA64-NEXT: bl %plt(__gnu_h2f_ieee)
; LA64-NEXT: fmov.s $fs0, $fa0
-; LA64-NEXT: ld.hu $a0, $fp, 0
+; LA64-NEXT: move $a0, $s0
; LA64-NEXT: bl %plt(__gnu_h2f_ieee)
-; LA64-NEXT: fmul.s $fa0, $fs0, $fa0
+; LA64-NEXT: fmul.s $fa0, $fa0, $fs0
; LA64-NEXT: bl %plt(__gnu_f2h_ieee)
-; LA64-NEXT: st.h $a0, $s0, 0
+; LA64-NEXT: st.h $a0, $fp, 0
; LA64-NEXT: fld.d $fs0, $sp, 0 # 8-byte Folded Reload
; LA64-NEXT: ld.d $s0, $sp, 8 # 8-byte Folded Reload
; LA64-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload
@@ -324,3 +320,61 @@ define void @test_fmul_mem(ptr %p, ptr %q) nounwind {
store half %r, ptr %p
ret void
}
+
+define half @freeze_half_undef() nounwind {
+; LA32-LABEL: freeze_half_undef:
+; LA32: # %bb.0:
+; LA32-NEXT: addi.w $sp, $sp, -16
+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT: movgr2fr.w $fa0, $zero
+; LA32-NEXT: bl %plt(__gnu_f2h_ieee)
+; LA32-NEXT: bl %plt(__gnu_h2f_ieee)
+; LA32-NEXT: fadd.s $fa0, $fa0, $fa0
+; LA32-NEXT: bl %plt(__gnu_f2h_ieee)
+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 16
+; LA32-NEXT: ret
+;
+; LA64-LABEL: freeze_half_undef:
+; LA64: # %bb.0:
+; LA64-NEXT: addi.d $sp, $sp, -16
+; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT: movgr2fr.w $fa0, $zero
+; LA64-NEXT: bl %plt(__gnu_f2h_ieee)
+; LA64-NEXT: bl %plt(__gnu_h2f_ieee)
+; LA64-NEXT: fadd.s $fa0, $fa0, $fa0
+; LA64-NEXT: bl %plt(__gnu_f2h_ieee)
+; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 16
+; LA64-NEXT: ret
+ %y1 = freeze half undef
+ %t1 = fadd half %y1, %y1
+ ret half %t1
+}
+
+define half @freeze_half_poison(half %maybe.poison) nounwind {
+; LA32-LABEL: freeze_half_poison:
+; LA32: # %bb.0:
+; LA32-NEXT: addi.w $sp, $sp, -16
+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT: bl %plt(__gnu_h2f_ieee)
+; LA32-NEXT: fadd.s $fa0, $fa0, $fa0
+; LA32-NEXT: bl %plt(__gnu_f2h_ieee)
+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 16
+; LA32-NEXT: ret
+;
+; LA64-LABEL: freeze_half_poison:
+; LA64: # %bb.0:
+; LA64-NEXT: addi.d $sp, $sp, -16
+; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT: bl %plt(__gnu_h2f_ieee)
+; LA64-NEXT: fadd.s $fa0, $fa0, $fa0
+; LA64-NEXT: bl %plt(__gnu_f2h_ieee)
+; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 16
+; LA64-NEXT: ret
+ %y1 = freeze half %maybe.poison
+ %t1 = fadd half %y1, %y1
+ ret half %t1
+}
diff --git a/llvm/test/CodeGen/NVPTX/lower-alloca.ll b/llvm/test/CodeGen/NVPTX/lower-alloca.ll
index b1c34c8..400184a 100644
--- a/llvm/test/CodeGen/NVPTX/lower-alloca.ll
+++ b/llvm/test/CodeGen/NVPTX/lower-alloca.ll
@@ -1,4 +1,5 @@
; RUN: opt < %s -S -nvptx-lower-alloca -infer-address-spaces | FileCheck %s
+; RUN: opt < %s -S -nvptx-lower-alloca | FileCheck %s --check-prefix LOWERALLOCAONLY
; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 | FileCheck %s --check-prefix PTX
; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_35 | %ptxas-verify %}
@@ -11,13 +12,32 @@ define void @kernel() {
%A = alloca i32
; CHECK: addrspacecast ptr %A to ptr addrspace(5)
; CHECK: store i32 0, ptr addrspace(5) {{%.+}}
+; LOWERALLOCAONLY: [[V1:%.*]] = addrspacecast ptr %A to ptr addrspace(5)
+; LOWERALLOCAONLY: [[V2:%.*]] = addrspacecast ptr addrspace(5) [[V1]] to ptr
+; LOWERALLOCAONLY: store i32 0, ptr [[V2]], align 4
; PTX: st.local.u32 [{{%rd[0-9]+}}], {{%r[0-9]+}}
store i32 0, ptr %A
call void @callee(ptr %A)
ret void
}
+define void @alloca_in_explicit_local_as() {
+; LABEL: @lower_alloca_addrspace5
+; PTX-LABEL: .visible .func alloca_in_explicit_local_as(
+ %A = alloca i32, addrspace(5)
+; CHECK: store i32 0, ptr addrspace(5) {{%.+}}
+; PTX: st.local.u32 [%SP+0], {{%r[0-9]+}}
+; LOWERALLOCAONLY: [[V1:%.*]] = addrspacecast ptr addrspace(5) %A to ptr
+; LOWERALLOCAONLY: store i32 0, ptr [[V1]], align 4
+ store i32 0, ptr addrspace(5) %A
+ call void @callee(ptr addrspace(5) %A)
+ ret void
+}
+
declare void @callee(ptr)
+declare void @callee_addrspace5(ptr addrspace(5))
!nvvm.annotations = !{!0}
+!nvvm.annotations = !{!1}
!0 = !{ptr @kernel, !"kernel", i32 1}
+!1 = !{ptr @alloca_in_explicit_local_as, !"alloca_in_explicit_local_as", i32 1}
diff --git a/llvm/test/CodeGen/RISCV/double-fcmp-strict.ll b/llvm/test/CodeGen/RISCV/double-fcmp-strict.ll
index e864d8f..949668f 100644
--- a/llvm/test/CodeGen/RISCV/double-fcmp-strict.ll
+++ b/llvm/test/CodeGen/RISCV/double-fcmp-strict.ll
@@ -68,18 +68,18 @@ define i32 @fcmp_ogt(double %a, double %b) nounwind strictfp {
;
; RV32IZFINXZDINX-LABEL: fcmp_ogt:
; RV32IZFINXZDINX: # %bb.0:
-; RV32IZFINXZDINX-NEXT: csrr a5, fflags
+; RV32IZFINXZDINX-NEXT: frflags a5
; RV32IZFINXZDINX-NEXT: flt.d a4, a2, a0
-; RV32IZFINXZDINX-NEXT: csrw fflags, a5
+; RV32IZFINXZDINX-NEXT: fsflags a5
; RV32IZFINXZDINX-NEXT: feq.d zero, a2, a0
; RV32IZFINXZDINX-NEXT: mv a0, a4
; RV32IZFINXZDINX-NEXT: ret
;
; RV64IZFINXZDINX-LABEL: fcmp_ogt:
; RV64IZFINXZDINX: # %bb.0:
-; RV64IZFINXZDINX-NEXT: csrr a3, fflags
+; RV64IZFINXZDINX-NEXT: frflags a3
; RV64IZFINXZDINX-NEXT: flt.d a2, a1, a0
-; RV64IZFINXZDINX-NEXT: csrw fflags, a3
+; RV64IZFINXZDINX-NEXT: fsflags a3
; RV64IZFINXZDINX-NEXT: feq.d zero, a1, a0
; RV64IZFINXZDINX-NEXT: mv a0, a2
; RV64IZFINXZDINX-NEXT: ret
@@ -119,18 +119,18 @@ define i32 @fcmp_oge(double %a, double %b) nounwind strictfp {
;
; RV32IZFINXZDINX-LABEL: fcmp_oge:
; RV32IZFINXZDINX: # %bb.0:
-; RV32IZFINXZDINX-NEXT: csrr a5, fflags
+; RV32IZFINXZDINX-NEXT: frflags a5
; RV32IZFINXZDINX-NEXT: fle.d a4, a2, a0
-; RV32IZFINXZDINX-NEXT: csrw fflags, a5
+; RV32IZFINXZDINX-NEXT: fsflags a5
; RV32IZFINXZDINX-NEXT: feq.d zero, a2, a0
; RV32IZFINXZDINX-NEXT: mv a0, a4
; RV32IZFINXZDINX-NEXT: ret
;
; RV64IZFINXZDINX-LABEL: fcmp_oge:
; RV64IZFINXZDINX: # %bb.0:
-; RV64IZFINXZDINX-NEXT: csrr a3, fflags
+; RV64IZFINXZDINX-NEXT: frflags a3
; RV64IZFINXZDINX-NEXT: fle.d a2, a1, a0
-; RV64IZFINXZDINX-NEXT: csrw fflags, a3
+; RV64IZFINXZDINX-NEXT: fsflags a3
; RV64IZFINXZDINX-NEXT: feq.d zero, a1, a0
; RV64IZFINXZDINX-NEXT: mv a0, a2
; RV64IZFINXZDINX-NEXT: ret
@@ -172,18 +172,18 @@ define i32 @fcmp_olt(double %a, double %b) nounwind strictfp {
;
; RV32IZFINXZDINX-LABEL: fcmp_olt:
; RV32IZFINXZDINX: # %bb.0:
-; RV32IZFINXZDINX-NEXT: csrr a5, fflags
+; RV32IZFINXZDINX-NEXT: frflags a5
; RV32IZFINXZDINX-NEXT: flt.d a4, a0, a2
-; RV32IZFINXZDINX-NEXT: csrw fflags, a5
+; RV32IZFINXZDINX-NEXT: fsflags a5
; RV32IZFINXZDINX-NEXT: feq.d zero, a0, a2
; RV32IZFINXZDINX-NEXT: mv a0, a4
; RV32IZFINXZDINX-NEXT: ret
;
; RV64IZFINXZDINX-LABEL: fcmp_olt:
; RV64IZFINXZDINX: # %bb.0:
-; RV64IZFINXZDINX-NEXT: csrr a3, fflags
+; RV64IZFINXZDINX-NEXT: frflags a3
; RV64IZFINXZDINX-NEXT: flt.d a2, a0, a1
-; RV64IZFINXZDINX-NEXT: csrw fflags, a3
+; RV64IZFINXZDINX-NEXT: fsflags a3
; RV64IZFINXZDINX-NEXT: feq.d zero, a0, a1
; RV64IZFINXZDINX-NEXT: mv a0, a2
; RV64IZFINXZDINX-NEXT: ret
@@ -223,18 +223,18 @@ define i32 @fcmp_ole(double %a, double %b) nounwind strictfp {
;
; RV32IZFINXZDINX-LABEL: fcmp_ole:
; RV32IZFINXZDINX: # %bb.0:
-; RV32IZFINXZDINX-NEXT: csrr a5, fflags
+; RV32IZFINXZDINX-NEXT: frflags a5
; RV32IZFINXZDINX-NEXT: fle.d a4, a0, a2
-; RV32IZFINXZDINX-NEXT: csrw fflags, a5
+; RV32IZFINXZDINX-NEXT: fsflags a5
; RV32IZFINXZDINX-NEXT: feq.d zero, a0, a2
; RV32IZFINXZDINX-NEXT: mv a0, a4
; RV32IZFINXZDINX-NEXT: ret
;
; RV64IZFINXZDINX-LABEL: fcmp_ole:
; RV64IZFINXZDINX: # %bb.0:
-; RV64IZFINXZDINX-NEXT: csrr a3, fflags
+; RV64IZFINXZDINX-NEXT: frflags a3
; RV64IZFINXZDINX-NEXT: fle.d a2, a0, a1
-; RV64IZFINXZDINX-NEXT: csrw fflags, a3
+; RV64IZFINXZDINX-NEXT: fsflags a3
; RV64IZFINXZDINX-NEXT: feq.d zero, a0, a1
; RV64IZFINXZDINX-NEXT: mv a0, a2
; RV64IZFINXZDINX-NEXT: ret
@@ -281,13 +281,13 @@ define i32 @fcmp_one(double %a, double %b) nounwind strictfp {
;
; RV32IZFINXZDINX-LABEL: fcmp_one:
; RV32IZFINXZDINX: # %bb.0:
-; RV32IZFINXZDINX-NEXT: csrr a4, fflags
+; RV32IZFINXZDINX-NEXT: frflags a4
; RV32IZFINXZDINX-NEXT: flt.d a5, a0, a2
-; RV32IZFINXZDINX-NEXT: csrw fflags, a4
+; RV32IZFINXZDINX-NEXT: fsflags a4
; RV32IZFINXZDINX-NEXT: feq.d zero, a0, a2
-; RV32IZFINXZDINX-NEXT: csrr a4, fflags
+; RV32IZFINXZDINX-NEXT: frflags a4
; RV32IZFINXZDINX-NEXT: flt.d a6, a2, a0
-; RV32IZFINXZDINX-NEXT: csrw fflags, a4
+; RV32IZFINXZDINX-NEXT: fsflags a4
; RV32IZFINXZDINX-NEXT: or a4, a6, a5
; RV32IZFINXZDINX-NEXT: feq.d zero, a2, a0
; RV32IZFINXZDINX-NEXT: mv a0, a4
@@ -295,13 +295,13 @@ define i32 @fcmp_one(double %a, double %b) nounwind strictfp {
;
; RV64IZFINXZDINX-LABEL: fcmp_one:
; RV64IZFINXZDINX: # %bb.0:
-; RV64IZFINXZDINX-NEXT: csrr a2, fflags
+; RV64IZFINXZDINX-NEXT: frflags a2
; RV64IZFINXZDINX-NEXT: flt.d a3, a0, a1
-; RV64IZFINXZDINX-NEXT: csrw fflags, a2
+; RV64IZFINXZDINX-NEXT: fsflags a2
; RV64IZFINXZDINX-NEXT: feq.d zero, a0, a1
-; RV64IZFINXZDINX-NEXT: csrr a2, fflags
+; RV64IZFINXZDINX-NEXT: frflags a2
; RV64IZFINXZDINX-NEXT: flt.d a4, a1, a0
-; RV64IZFINXZDINX-NEXT: csrw fflags, a2
+; RV64IZFINXZDINX-NEXT: fsflags a2
; RV64IZFINXZDINX-NEXT: or a2, a4, a3
; RV64IZFINXZDINX-NEXT: feq.d zero, a1, a0
; RV64IZFINXZDINX-NEXT: mv a0, a2
@@ -430,13 +430,13 @@ define i32 @fcmp_ueq(double %a, double %b) nounwind strictfp {
;
; RV32IZFINXZDINX-LABEL: fcmp_ueq:
; RV32IZFINXZDINX: # %bb.0:
-; RV32IZFINXZDINX-NEXT: csrr a4, fflags
+; RV32IZFINXZDINX-NEXT: frflags a4
; RV32IZFINXZDINX-NEXT: flt.d a5, a0, a2
-; RV32IZFINXZDINX-NEXT: csrw fflags, a4
+; RV32IZFINXZDINX-NEXT: fsflags a4
; RV32IZFINXZDINX-NEXT: feq.d zero, a0, a2
-; RV32IZFINXZDINX-NEXT: csrr a4, fflags
+; RV32IZFINXZDINX-NEXT: frflags a4
; RV32IZFINXZDINX-NEXT: flt.d a6, a2, a0
-; RV32IZFINXZDINX-NEXT: csrw fflags, a4
+; RV32IZFINXZDINX-NEXT: fsflags a4
; RV32IZFINXZDINX-NEXT: or a4, a6, a5
; RV32IZFINXZDINX-NEXT: xori a4, a4, 1
; RV32IZFINXZDINX-NEXT: feq.d zero, a2, a0
@@ -445,13 +445,13 @@ define i32 @fcmp_ueq(double %a, double %b) nounwind strictfp {
;
; RV64IZFINXZDINX-LABEL: fcmp_ueq:
; RV64IZFINXZDINX: # %bb.0:
-; RV64IZFINXZDINX-NEXT: csrr a2, fflags
+; RV64IZFINXZDINX-NEXT: frflags a2
; RV64IZFINXZDINX-NEXT: flt.d a3, a0, a1
-; RV64IZFINXZDINX-NEXT: csrw fflags, a2
+; RV64IZFINXZDINX-NEXT: fsflags a2
; RV64IZFINXZDINX-NEXT: feq.d zero, a0, a1
-; RV64IZFINXZDINX-NEXT: csrr a2, fflags
+; RV64IZFINXZDINX-NEXT: frflags a2
; RV64IZFINXZDINX-NEXT: flt.d a4, a1, a0
-; RV64IZFINXZDINX-NEXT: csrw fflags, a2
+; RV64IZFINXZDINX-NEXT: fsflags a2
; RV64IZFINXZDINX-NEXT: or a3, a4, a3
; RV64IZFINXZDINX-NEXT: xori a2, a3, 1
; RV64IZFINXZDINX-NEXT: feq.d zero, a1, a0
@@ -528,9 +528,9 @@ define i32 @fcmp_ugt(double %a, double %b) nounwind strictfp {
;
; RV32IZFINXZDINX-LABEL: fcmp_ugt:
; RV32IZFINXZDINX: # %bb.0:
-; RV32IZFINXZDINX-NEXT: csrr a4, fflags
+; RV32IZFINXZDINX-NEXT: frflags a4
; RV32IZFINXZDINX-NEXT: fle.d a5, a0, a2
-; RV32IZFINXZDINX-NEXT: csrw fflags, a4
+; RV32IZFINXZDINX-NEXT: fsflags a4
; RV32IZFINXZDINX-NEXT: xori a4, a5, 1
; RV32IZFINXZDINX-NEXT: feq.d zero, a0, a2
; RV32IZFINXZDINX-NEXT: mv a0, a4
@@ -538,9 +538,9 @@ define i32 @fcmp_ugt(double %a, double %b) nounwind strictfp {
;
; RV64IZFINXZDINX-LABEL: fcmp_ugt:
; RV64IZFINXZDINX: # %bb.0:
-; RV64IZFINXZDINX-NEXT: csrr a2, fflags
+; RV64IZFINXZDINX-NEXT: frflags a2
; RV64IZFINXZDINX-NEXT: fle.d a3, a0, a1
-; RV64IZFINXZDINX-NEXT: csrw fflags, a2
+; RV64IZFINXZDINX-NEXT: fsflags a2
; RV64IZFINXZDINX-NEXT: xori a2, a3, 1
; RV64IZFINXZDINX-NEXT: feq.d zero, a0, a1
; RV64IZFINXZDINX-NEXT: mv a0, a2
@@ -582,9 +582,9 @@ define i32 @fcmp_uge(double %a, double %b) nounwind strictfp {
;
; RV32IZFINXZDINX-LABEL: fcmp_uge:
; RV32IZFINXZDINX: # %bb.0:
-; RV32IZFINXZDINX-NEXT: csrr a4, fflags
+; RV32IZFINXZDINX-NEXT: frflags a4
; RV32IZFINXZDINX-NEXT: flt.d a5, a0, a2
-; RV32IZFINXZDINX-NEXT: csrw fflags, a4
+; RV32IZFINXZDINX-NEXT: fsflags a4
; RV32IZFINXZDINX-NEXT: xori a4, a5, 1
; RV32IZFINXZDINX-NEXT: feq.d zero, a0, a2
; RV32IZFINXZDINX-NEXT: mv a0, a4
@@ -592,9 +592,9 @@ define i32 @fcmp_uge(double %a, double %b) nounwind strictfp {
;
; RV64IZFINXZDINX-LABEL: fcmp_uge:
; RV64IZFINXZDINX: # %bb.0:
-; RV64IZFINXZDINX-NEXT: csrr a2, fflags
+; RV64IZFINXZDINX-NEXT: frflags a2
; RV64IZFINXZDINX-NEXT: flt.d a3, a0, a1
-; RV64IZFINXZDINX-NEXT: csrw fflags, a2
+; RV64IZFINXZDINX-NEXT: fsflags a2
; RV64IZFINXZDINX-NEXT: xori a2, a3, 1
; RV64IZFINXZDINX-NEXT: feq.d zero, a0, a1
; RV64IZFINXZDINX-NEXT: mv a0, a2
@@ -638,9 +638,9 @@ define i32 @fcmp_ult(double %a, double %b) nounwind strictfp {
;
; RV32IZFINXZDINX-LABEL: fcmp_ult:
; RV32IZFINXZDINX: # %bb.0:
-; RV32IZFINXZDINX-NEXT: csrr a4, fflags
+; RV32IZFINXZDINX-NEXT: frflags a4
; RV32IZFINXZDINX-NEXT: fle.d a5, a2, a0
-; RV32IZFINXZDINX-NEXT: csrw fflags, a4
+; RV32IZFINXZDINX-NEXT: fsflags a4
; RV32IZFINXZDINX-NEXT: xori a4, a5, 1
; RV32IZFINXZDINX-NEXT: feq.d zero, a2, a0
; RV32IZFINXZDINX-NEXT: mv a0, a4
@@ -648,9 +648,9 @@ define i32 @fcmp_ult(double %a, double %b) nounwind strictfp {
;
; RV64IZFINXZDINX-LABEL: fcmp_ult:
; RV64IZFINXZDINX: # %bb.0:
-; RV64IZFINXZDINX-NEXT: csrr a2, fflags
+; RV64IZFINXZDINX-NEXT: frflags a2
; RV64IZFINXZDINX-NEXT: fle.d a3, a1, a0
-; RV64IZFINXZDINX-NEXT: csrw fflags, a2
+; RV64IZFINXZDINX-NEXT: fsflags a2
; RV64IZFINXZDINX-NEXT: xori a2, a3, 1
; RV64IZFINXZDINX-NEXT: feq.d zero, a1, a0
; RV64IZFINXZDINX-NEXT: mv a0, a2
@@ -692,9 +692,9 @@ define i32 @fcmp_ule(double %a, double %b) nounwind strictfp {
;
; RV32IZFINXZDINX-LABEL: fcmp_ule:
; RV32IZFINXZDINX: # %bb.0:
-; RV32IZFINXZDINX-NEXT: csrr a4, fflags
+; RV32IZFINXZDINX-NEXT: frflags a4
; RV32IZFINXZDINX-NEXT: flt.d a5, a2, a0
-; RV32IZFINXZDINX-NEXT: csrw fflags, a4
+; RV32IZFINXZDINX-NEXT: fsflags a4
; RV32IZFINXZDINX-NEXT: xori a4, a5, 1
; RV32IZFINXZDINX-NEXT: feq.d zero, a2, a0
; RV32IZFINXZDINX-NEXT: mv a0, a4
@@ -702,9 +702,9 @@ define i32 @fcmp_ule(double %a, double %b) nounwind strictfp {
;
; RV64IZFINXZDINX-LABEL: fcmp_ule:
; RV64IZFINXZDINX: # %bb.0:
-; RV64IZFINXZDINX-NEXT: csrr a2, fflags
+; RV64IZFINXZDINX-NEXT: frflags a2
; RV64IZFINXZDINX-NEXT: flt.d a3, a1, a0
-; RV64IZFINXZDINX-NEXT: csrw fflags, a2
+; RV64IZFINXZDINX-NEXT: fsflags a2
; RV64IZFINXZDINX-NEXT: xori a2, a3, 1
; RV64IZFINXZDINX-NEXT: feq.d zero, a1, a0
; RV64IZFINXZDINX-NEXT: mv a0, a2
diff --git a/llvm/test/CodeGen/RISCV/float-fcmp-strict.ll b/llvm/test/CodeGen/RISCV/float-fcmp-strict.ll
index dae9f3e..0cbfc96 100644
--- a/llvm/test/CodeGen/RISCV/float-fcmp-strict.ll
+++ b/llvm/test/CodeGen/RISCV/float-fcmp-strict.ll
@@ -63,9 +63,9 @@ define i32 @fcmp_ogt(float %a, float %b) nounwind strictfp {
;
; CHECKIZFINX-LABEL: fcmp_ogt:
; CHECKIZFINX: # %bb.0:
-; CHECKIZFINX-NEXT: csrr a3, fflags
+; CHECKIZFINX-NEXT: frflags a3
; CHECKIZFINX-NEXT: flt.s a2, a1, a0
-; CHECKIZFINX-NEXT: csrw fflags, a3
+; CHECKIZFINX-NEXT: fsflags a3
; CHECKIZFINX-NEXT: feq.s zero, a1, a0
; CHECKIZFINX-NEXT: mv a0, a2
; CHECKIZFINX-NEXT: ret
@@ -105,9 +105,9 @@ define i32 @fcmp_oge(float %a, float %b) nounwind strictfp {
;
; CHECKIZFINX-LABEL: fcmp_oge:
; CHECKIZFINX: # %bb.0:
-; CHECKIZFINX-NEXT: csrr a3, fflags
+; CHECKIZFINX-NEXT: frflags a3
; CHECKIZFINX-NEXT: fle.s a2, a1, a0
-; CHECKIZFINX-NEXT: csrw fflags, a3
+; CHECKIZFINX-NEXT: fsflags a3
; CHECKIZFINX-NEXT: feq.s zero, a1, a0
; CHECKIZFINX-NEXT: mv a0, a2
; CHECKIZFINX-NEXT: ret
@@ -149,9 +149,9 @@ define i32 @fcmp_olt(float %a, float %b) nounwind strictfp {
;
; CHECKIZFINX-LABEL: fcmp_olt:
; CHECKIZFINX: # %bb.0:
-; CHECKIZFINX-NEXT: csrr a3, fflags
+; CHECKIZFINX-NEXT: frflags a3
; CHECKIZFINX-NEXT: flt.s a2, a0, a1
-; CHECKIZFINX-NEXT: csrw fflags, a3
+; CHECKIZFINX-NEXT: fsflags a3
; CHECKIZFINX-NEXT: feq.s zero, a0, a1
; CHECKIZFINX-NEXT: mv a0, a2
; CHECKIZFINX-NEXT: ret
@@ -191,9 +191,9 @@ define i32 @fcmp_ole(float %a, float %b) nounwind strictfp {
;
; CHECKIZFINX-LABEL: fcmp_ole:
; CHECKIZFINX: # %bb.0:
-; CHECKIZFINX-NEXT: csrr a3, fflags
+; CHECKIZFINX-NEXT: frflags a3
; CHECKIZFINX-NEXT: fle.s a2, a0, a1
-; CHECKIZFINX-NEXT: csrw fflags, a3
+; CHECKIZFINX-NEXT: fsflags a3
; CHECKIZFINX-NEXT: feq.s zero, a0, a1
; CHECKIZFINX-NEXT: mv a0, a2
; CHECKIZFINX-NEXT: ret
@@ -240,13 +240,13 @@ define i32 @fcmp_one(float %a, float %b) nounwind strictfp {
;
; CHECKIZFINX-LABEL: fcmp_one:
; CHECKIZFINX: # %bb.0:
-; CHECKIZFINX-NEXT: csrr a2, fflags
+; CHECKIZFINX-NEXT: frflags a2
; CHECKIZFINX-NEXT: flt.s a3, a0, a1
-; CHECKIZFINX-NEXT: csrw fflags, a2
+; CHECKIZFINX-NEXT: fsflags a2
; CHECKIZFINX-NEXT: feq.s zero, a0, a1
-; CHECKIZFINX-NEXT: csrr a2, fflags
+; CHECKIZFINX-NEXT: frflags a2
; CHECKIZFINX-NEXT: flt.s a4, a1, a0
-; CHECKIZFINX-NEXT: csrw fflags, a2
+; CHECKIZFINX-NEXT: fsflags a2
; CHECKIZFINX-NEXT: or a2, a4, a3
; CHECKIZFINX-NEXT: feq.s zero, a1, a0
; CHECKIZFINX-NEXT: mv a0, a2
@@ -360,13 +360,13 @@ define i32 @fcmp_ueq(float %a, float %b) nounwind strictfp {
;
; CHECKIZFINX-LABEL: fcmp_ueq:
; CHECKIZFINX: # %bb.0:
-; CHECKIZFINX-NEXT: csrr a2, fflags
+; CHECKIZFINX-NEXT: frflags a2
; CHECKIZFINX-NEXT: flt.s a3, a0, a1
-; CHECKIZFINX-NEXT: csrw fflags, a2
+; CHECKIZFINX-NEXT: fsflags a2
; CHECKIZFINX-NEXT: feq.s zero, a0, a1
-; CHECKIZFINX-NEXT: csrr a2, fflags
+; CHECKIZFINX-NEXT: frflags a2
; CHECKIZFINX-NEXT: flt.s a4, a1, a0
-; CHECKIZFINX-NEXT: csrw fflags, a2
+; CHECKIZFINX-NEXT: fsflags a2
; CHECKIZFINX-NEXT: or a3, a4, a3
; CHECKIZFINX-NEXT: xori a2, a3, 1
; CHECKIZFINX-NEXT: feq.s zero, a1, a0
@@ -435,9 +435,9 @@ define i32 @fcmp_ugt(float %a, float %b) nounwind strictfp {
;
; CHECKIZFINX-LABEL: fcmp_ugt:
; CHECKIZFINX: # %bb.0:
-; CHECKIZFINX-NEXT: csrr a2, fflags
+; CHECKIZFINX-NEXT: frflags a2
; CHECKIZFINX-NEXT: fle.s a3, a0, a1
-; CHECKIZFINX-NEXT: csrw fflags, a2
+; CHECKIZFINX-NEXT: fsflags a2
; CHECKIZFINX-NEXT: xori a2, a3, 1
; CHECKIZFINX-NEXT: feq.s zero, a0, a1
; CHECKIZFINX-NEXT: mv a0, a2
@@ -479,9 +479,9 @@ define i32 @fcmp_uge(float %a, float %b) nounwind strictfp {
;
; CHECKIZFINX-LABEL: fcmp_uge:
; CHECKIZFINX: # %bb.0:
-; CHECKIZFINX-NEXT: csrr a2, fflags
+; CHECKIZFINX-NEXT: frflags a2
; CHECKIZFINX-NEXT: flt.s a3, a0, a1
-; CHECKIZFINX-NEXT: csrw fflags, a2
+; CHECKIZFINX-NEXT: fsflags a2
; CHECKIZFINX-NEXT: xori a2, a3, 1
; CHECKIZFINX-NEXT: feq.s zero, a0, a1
; CHECKIZFINX-NEXT: mv a0, a2
@@ -525,9 +525,9 @@ define i32 @fcmp_ult(float %a, float %b) nounwind strictfp {
;
; CHECKIZFINX-LABEL: fcmp_ult:
; CHECKIZFINX: # %bb.0:
-; CHECKIZFINX-NEXT: csrr a2, fflags
+; CHECKIZFINX-NEXT: frflags a2
; CHECKIZFINX-NEXT: fle.s a3, a1, a0
-; CHECKIZFINX-NEXT: csrw fflags, a2
+; CHECKIZFINX-NEXT: fsflags a2
; CHECKIZFINX-NEXT: xori a2, a3, 1
; CHECKIZFINX-NEXT: feq.s zero, a1, a0
; CHECKIZFINX-NEXT: mv a0, a2
@@ -569,9 +569,9 @@ define i32 @fcmp_ule(float %a, float %b) nounwind strictfp {
;
; CHECKIZFINX-LABEL: fcmp_ule:
; CHECKIZFINX: # %bb.0:
-; CHECKIZFINX-NEXT: csrr a2, fflags
+; CHECKIZFINX-NEXT: frflags a2
; CHECKIZFINX-NEXT: flt.s a3, a1, a0
-; CHECKIZFINX-NEXT: csrw fflags, a2
+; CHECKIZFINX-NEXT: fsflags a2
; CHECKIZFINX-NEXT: xori a2, a3, 1
; CHECKIZFINX-NEXT: feq.s zero, a1, a0
; CHECKIZFINX-NEXT: mv a0, a2
diff --git a/llvm/test/CodeGen/RISCV/half-fcmp-strict.ll b/llvm/test/CodeGen/RISCV/half-fcmp-strict.ll
index d96c39c..4bc595b 100644
--- a/llvm/test/CodeGen/RISCV/half-fcmp-strict.ll
+++ b/llvm/test/CodeGen/RISCV/half-fcmp-strict.ll
@@ -61,9 +61,9 @@ define i32 @fcmp_ogt(half %a, half %b) nounwind strictfp {
;
; CHECKIZHINX-LABEL: fcmp_ogt:
; CHECKIZHINX: # %bb.0:
-; CHECKIZHINX-NEXT: csrr a3, fflags
+; CHECKIZHINX-NEXT: frflags a3
; CHECKIZHINX-NEXT: flt.h a2, a1, a0
-; CHECKIZHINX-NEXT: csrw fflags, a3
+; CHECKIZHINX-NEXT: fsflags a3
; CHECKIZHINX-NEXT: feq.h zero, a1, a0
; CHECKIZHINX-NEXT: mv a0, a2
; CHECKIZHINX-NEXT: ret
@@ -80,9 +80,9 @@ define i32 @fcmp_ogt(half %a, half %b) nounwind strictfp {
; CHECKIZHINXMIN: # %bb.0:
; CHECKIZHINXMIN-NEXT: fcvt.s.h a2, a0
; CHECKIZHINXMIN-NEXT: fcvt.s.h a1, a1
-; CHECKIZHINXMIN-NEXT: csrr a3, fflags
+; CHECKIZHINXMIN-NEXT: frflags a3
; CHECKIZHINXMIN-NEXT: flt.s a0, a1, a2
-; CHECKIZHINXMIN-NEXT: csrw fflags, a3
+; CHECKIZHINXMIN-NEXT: fsflags a3
; CHECKIZHINXMIN-NEXT: feq.s zero, a1, a2
; CHECKIZHINXMIN-NEXT: ret
%1 = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"ogt", metadata !"fpexcept.strict") strictfp
@@ -101,9 +101,9 @@ define i32 @fcmp_oge(half %a, half %b) nounwind strictfp {
;
; CHECKIZHINX-LABEL: fcmp_oge:
; CHECKIZHINX: # %bb.0:
-; CHECKIZHINX-NEXT: csrr a3, fflags
+; CHECKIZHINX-NEXT: frflags a3
; CHECKIZHINX-NEXT: fle.h a2, a1, a0
-; CHECKIZHINX-NEXT: csrw fflags, a3
+; CHECKIZHINX-NEXT: fsflags a3
; CHECKIZHINX-NEXT: feq.h zero, a1, a0
; CHECKIZHINX-NEXT: mv a0, a2
; CHECKIZHINX-NEXT: ret
@@ -120,9 +120,9 @@ define i32 @fcmp_oge(half %a, half %b) nounwind strictfp {
; CHECKIZHINXMIN: # %bb.0:
; CHECKIZHINXMIN-NEXT: fcvt.s.h a2, a0
; CHECKIZHINXMIN-NEXT: fcvt.s.h a1, a1
-; CHECKIZHINXMIN-NEXT: csrr a3, fflags
+; CHECKIZHINXMIN-NEXT: frflags a3
; CHECKIZHINXMIN-NEXT: fle.s a0, a1, a2
-; CHECKIZHINXMIN-NEXT: csrw fflags, a3
+; CHECKIZHINXMIN-NEXT: fsflags a3
; CHECKIZHINXMIN-NEXT: feq.s zero, a1, a2
; CHECKIZHINXMIN-NEXT: ret
%1 = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"oge", metadata !"fpexcept.strict") strictfp
@@ -141,9 +141,9 @@ define i32 @fcmp_olt(half %a, half %b) nounwind strictfp {
;
; CHECKIZHINX-LABEL: fcmp_olt:
; CHECKIZHINX: # %bb.0:
-; CHECKIZHINX-NEXT: csrr a3, fflags
+; CHECKIZHINX-NEXT: frflags a3
; CHECKIZHINX-NEXT: flt.h a2, a0, a1
-; CHECKIZHINX-NEXT: csrw fflags, a3
+; CHECKIZHINX-NEXT: fsflags a3
; CHECKIZHINX-NEXT: feq.h zero, a0, a1
; CHECKIZHINX-NEXT: mv a0, a2
; CHECKIZHINX-NEXT: ret
@@ -160,9 +160,9 @@ define i32 @fcmp_olt(half %a, half %b) nounwind strictfp {
; CHECKIZHINXMIN: # %bb.0:
; CHECKIZHINXMIN-NEXT: fcvt.s.h a1, a1
; CHECKIZHINXMIN-NEXT: fcvt.s.h a2, a0
-; CHECKIZHINXMIN-NEXT: csrr a3, fflags
+; CHECKIZHINXMIN-NEXT: frflags a3
; CHECKIZHINXMIN-NEXT: flt.s a0, a2, a1
-; CHECKIZHINXMIN-NEXT: csrw fflags, a3
+; CHECKIZHINXMIN-NEXT: fsflags a3
; CHECKIZHINXMIN-NEXT: feq.s zero, a2, a1
; CHECKIZHINXMIN-NEXT: ret
%1 = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"olt", metadata !"fpexcept.strict") strictfp
@@ -181,9 +181,9 @@ define i32 @fcmp_ole(half %a, half %b) nounwind strictfp {
;
; CHECKIZHINX-LABEL: fcmp_ole:
; CHECKIZHINX: # %bb.0:
-; CHECKIZHINX-NEXT: csrr a3, fflags
+; CHECKIZHINX-NEXT: frflags a3
; CHECKIZHINX-NEXT: fle.h a2, a0, a1
-; CHECKIZHINX-NEXT: csrw fflags, a3
+; CHECKIZHINX-NEXT: fsflags a3
; CHECKIZHINX-NEXT: feq.h zero, a0, a1
; CHECKIZHINX-NEXT: mv a0, a2
; CHECKIZHINX-NEXT: ret
@@ -200,9 +200,9 @@ define i32 @fcmp_ole(half %a, half %b) nounwind strictfp {
; CHECKIZHINXMIN: # %bb.0:
; CHECKIZHINXMIN-NEXT: fcvt.s.h a1, a1
; CHECKIZHINXMIN-NEXT: fcvt.s.h a2, a0
-; CHECKIZHINXMIN-NEXT: csrr a3, fflags
+; CHECKIZHINXMIN-NEXT: frflags a3
; CHECKIZHINXMIN-NEXT: fle.s a0, a2, a1
-; CHECKIZHINXMIN-NEXT: csrw fflags, a3
+; CHECKIZHINXMIN-NEXT: fsflags a3
; CHECKIZHINXMIN-NEXT: feq.s zero, a2, a1
; CHECKIZHINXMIN-NEXT: ret
%1 = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"ole", metadata !"fpexcept.strict") strictfp
@@ -228,13 +228,13 @@ define i32 @fcmp_one(half %a, half %b) nounwind strictfp {
;
; CHECKIZHINX-LABEL: fcmp_one:
; CHECKIZHINX: # %bb.0:
-; CHECKIZHINX-NEXT: csrr a2, fflags
+; CHECKIZHINX-NEXT: frflags a2
; CHECKIZHINX-NEXT: flt.h a3, a0, a1
-; CHECKIZHINX-NEXT: csrw fflags, a2
+; CHECKIZHINX-NEXT: fsflags a2
; CHECKIZHINX-NEXT: feq.h zero, a0, a1
-; CHECKIZHINX-NEXT: csrr a2, fflags
+; CHECKIZHINX-NEXT: frflags a2
; CHECKIZHINX-NEXT: flt.h a4, a1, a0
-; CHECKIZHINX-NEXT: csrw fflags, a2
+; CHECKIZHINX-NEXT: fsflags a2
; CHECKIZHINX-NEXT: or a2, a4, a3
; CHECKIZHINX-NEXT: feq.h zero, a1, a0
; CHECKIZHINX-NEXT: mv a0, a2
@@ -257,13 +257,13 @@ define i32 @fcmp_one(half %a, half %b) nounwind strictfp {
; CHECKIZHINXMIN: # %bb.0:
; CHECKIZHINXMIN-NEXT: fcvt.s.h a1, a1
; CHECKIZHINXMIN-NEXT: fcvt.s.h a2, a0
-; CHECKIZHINXMIN-NEXT: csrr a0, fflags
+; CHECKIZHINXMIN-NEXT: frflags a0
; CHECKIZHINXMIN-NEXT: flt.s a3, a2, a1
-; CHECKIZHINXMIN-NEXT: csrw fflags, a0
+; CHECKIZHINXMIN-NEXT: fsflags a0
; CHECKIZHINXMIN-NEXT: feq.s zero, a2, a1
-; CHECKIZHINXMIN-NEXT: csrr a0, fflags
+; CHECKIZHINXMIN-NEXT: frflags a0
; CHECKIZHINXMIN-NEXT: flt.s a4, a1, a2
-; CHECKIZHINXMIN-NEXT: csrw fflags, a0
+; CHECKIZHINXMIN-NEXT: fsflags a0
; CHECKIZHINXMIN-NEXT: or a0, a4, a3
; CHECKIZHINXMIN-NEXT: feq.s zero, a1, a2
; CHECKIZHINXMIN-NEXT: ret
@@ -326,13 +326,13 @@ define i32 @fcmp_ueq(half %a, half %b) nounwind strictfp {
;
; CHECKIZHINX-LABEL: fcmp_ueq:
; CHECKIZHINX: # %bb.0:
-; CHECKIZHINX-NEXT: csrr a2, fflags
+; CHECKIZHINX-NEXT: frflags a2
; CHECKIZHINX-NEXT: flt.h a3, a0, a1
-; CHECKIZHINX-NEXT: csrw fflags, a2
+; CHECKIZHINX-NEXT: fsflags a2
; CHECKIZHINX-NEXT: feq.h zero, a0, a1
-; CHECKIZHINX-NEXT: csrr a2, fflags
+; CHECKIZHINX-NEXT: frflags a2
; CHECKIZHINX-NEXT: flt.h a4, a1, a0
-; CHECKIZHINX-NEXT: csrw fflags, a2
+; CHECKIZHINX-NEXT: fsflags a2
; CHECKIZHINX-NEXT: or a3, a4, a3
; CHECKIZHINX-NEXT: xori a2, a3, 1
; CHECKIZHINX-NEXT: feq.h zero, a1, a0
@@ -357,13 +357,13 @@ define i32 @fcmp_ueq(half %a, half %b) nounwind strictfp {
; CHECKIZHINXMIN: # %bb.0:
; CHECKIZHINXMIN-NEXT: fcvt.s.h a1, a1
; CHECKIZHINXMIN-NEXT: fcvt.s.h a2, a0
-; CHECKIZHINXMIN-NEXT: csrr a0, fflags
+; CHECKIZHINXMIN-NEXT: frflags a0
; CHECKIZHINXMIN-NEXT: flt.s a3, a2, a1
-; CHECKIZHINXMIN-NEXT: csrw fflags, a0
+; CHECKIZHINXMIN-NEXT: fsflags a0
; CHECKIZHINXMIN-NEXT: feq.s zero, a2, a1
-; CHECKIZHINXMIN-NEXT: csrr a0, fflags
+; CHECKIZHINXMIN-NEXT: frflags a0
; CHECKIZHINXMIN-NEXT: flt.s a4, a1, a2
-; CHECKIZHINXMIN-NEXT: csrw fflags, a0
+; CHECKIZHINXMIN-NEXT: fsflags a0
; CHECKIZHINXMIN-NEXT: or a3, a4, a3
; CHECKIZHINXMIN-NEXT: xori a0, a3, 1
; CHECKIZHINXMIN-NEXT: feq.s zero, a1, a2
@@ -385,9 +385,9 @@ define i32 @fcmp_ugt(half %a, half %b) nounwind strictfp {
;
; CHECKIZHINX-LABEL: fcmp_ugt:
; CHECKIZHINX: # %bb.0:
-; CHECKIZHINX-NEXT: csrr a2, fflags
+; CHECKIZHINX-NEXT: frflags a2
; CHECKIZHINX-NEXT: fle.h a3, a0, a1
-; CHECKIZHINX-NEXT: csrw fflags, a2
+; CHECKIZHINX-NEXT: fsflags a2
; CHECKIZHINX-NEXT: xori a2, a3, 1
; CHECKIZHINX-NEXT: feq.h zero, a0, a1
; CHECKIZHINX-NEXT: mv a0, a2
@@ -406,9 +406,9 @@ define i32 @fcmp_ugt(half %a, half %b) nounwind strictfp {
; CHECKIZHINXMIN: # %bb.0:
; CHECKIZHINXMIN-NEXT: fcvt.s.h a1, a1
; CHECKIZHINXMIN-NEXT: fcvt.s.h a2, a0
-; CHECKIZHINXMIN-NEXT: csrr a0, fflags
+; CHECKIZHINXMIN-NEXT: frflags a0
; CHECKIZHINXMIN-NEXT: fle.s a3, a2, a1
-; CHECKIZHINXMIN-NEXT: csrw fflags, a0
+; CHECKIZHINXMIN-NEXT: fsflags a0
; CHECKIZHINXMIN-NEXT: xori a0, a3, 1
; CHECKIZHINXMIN-NEXT: feq.s zero, a2, a1
; CHECKIZHINXMIN-NEXT: ret
@@ -429,9 +429,9 @@ define i32 @fcmp_uge(half %a, half %b) nounwind strictfp {
;
; CHECKIZHINX-LABEL: fcmp_uge:
; CHECKIZHINX: # %bb.0:
-; CHECKIZHINX-NEXT: csrr a2, fflags
+; CHECKIZHINX-NEXT: frflags a2
; CHECKIZHINX-NEXT: flt.h a3, a0, a1
-; CHECKIZHINX-NEXT: csrw fflags, a2
+; CHECKIZHINX-NEXT: fsflags a2
; CHECKIZHINX-NEXT: xori a2, a3, 1
; CHECKIZHINX-NEXT: feq.h zero, a0, a1
; CHECKIZHINX-NEXT: mv a0, a2
@@ -450,9 +450,9 @@ define i32 @fcmp_uge(half %a, half %b) nounwind strictfp {
; CHECKIZHINXMIN: # %bb.0:
; CHECKIZHINXMIN-NEXT: fcvt.s.h a1, a1
; CHECKIZHINXMIN-NEXT: fcvt.s.h a2, a0
-; CHECKIZHINXMIN-NEXT: csrr a0, fflags
+; CHECKIZHINXMIN-NEXT: frflags a0
; CHECKIZHINXMIN-NEXT: flt.s a3, a2, a1
-; CHECKIZHINXMIN-NEXT: csrw fflags, a0
+; CHECKIZHINXMIN-NEXT: fsflags a0
; CHECKIZHINXMIN-NEXT: xori a0, a3, 1
; CHECKIZHINXMIN-NEXT: feq.s zero, a2, a1
; CHECKIZHINXMIN-NEXT: ret
@@ -473,9 +473,9 @@ define i32 @fcmp_ult(half %a, half %b) nounwind strictfp {
;
; CHECKIZHINX-LABEL: fcmp_ult:
; CHECKIZHINX: # %bb.0:
-; CHECKIZHINX-NEXT: csrr a2, fflags
+; CHECKIZHINX-NEXT: frflags a2
; CHECKIZHINX-NEXT: fle.h a3, a1, a0
-; CHECKIZHINX-NEXT: csrw fflags, a2
+; CHECKIZHINX-NEXT: fsflags a2
; CHECKIZHINX-NEXT: xori a2, a3, 1
; CHECKIZHINX-NEXT: feq.h zero, a1, a0
; CHECKIZHINX-NEXT: mv a0, a2
@@ -494,9 +494,9 @@ define i32 @fcmp_ult(half %a, half %b) nounwind strictfp {
; CHECKIZHINXMIN: # %bb.0:
; CHECKIZHINXMIN-NEXT: fcvt.s.h a2, a0
; CHECKIZHINXMIN-NEXT: fcvt.s.h a1, a1
-; CHECKIZHINXMIN-NEXT: csrr a0, fflags
+; CHECKIZHINXMIN-NEXT: frflags a0
; CHECKIZHINXMIN-NEXT: fle.s a3, a1, a2
-; CHECKIZHINXMIN-NEXT: csrw fflags, a0
+; CHECKIZHINXMIN-NEXT: fsflags a0
; CHECKIZHINXMIN-NEXT: xori a0, a3, 1
; CHECKIZHINXMIN-NEXT: feq.s zero, a1, a2
; CHECKIZHINXMIN-NEXT: ret
@@ -517,9 +517,9 @@ define i32 @fcmp_ule(half %a, half %b) nounwind strictfp {
;
; CHECKIZHINX-LABEL: fcmp_ule:
; CHECKIZHINX: # %bb.0:
-; CHECKIZHINX-NEXT: csrr a2, fflags
+; CHECKIZHINX-NEXT: frflags a2
; CHECKIZHINX-NEXT: flt.h a3, a1, a0
-; CHECKIZHINX-NEXT: csrw fflags, a2
+; CHECKIZHINX-NEXT: fsflags a2
; CHECKIZHINX-NEXT: xori a2, a3, 1
; CHECKIZHINX-NEXT: feq.h zero, a1, a0
; CHECKIZHINX-NEXT: mv a0, a2
@@ -538,9 +538,9 @@ define i32 @fcmp_ule(half %a, half %b) nounwind strictfp {
; CHECKIZHINXMIN: # %bb.0:
; CHECKIZHINXMIN-NEXT: fcvt.s.h a2, a0
; CHECKIZHINXMIN-NEXT: fcvt.s.h a1, a1
-; CHECKIZHINXMIN-NEXT: csrr a0, fflags
+; CHECKIZHINXMIN-NEXT: frflags a0
; CHECKIZHINXMIN-NEXT: flt.s a3, a1, a2
-; CHECKIZHINXMIN-NEXT: csrw fflags, a0
+; CHECKIZHINXMIN-NEXT: fsflags a0
; CHECKIZHINXMIN-NEXT: xori a0, a3, 1
; CHECKIZHINXMIN-NEXT: feq.s zero, a1, a2
; CHECKIZHINXMIN-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/xcvalu.ll b/llvm/test/CodeGen/RISCV/xcvalu.ll
index b103173..1ddfa10 100644
--- a/llvm/test/CodeGen/RISCV/xcvalu.ll
+++ b/llvm/test/CodeGen/RISCV/xcvalu.ll
@@ -20,7 +20,7 @@ define i32 @abs(i32 %a) {
define i1 @slet(i32 %a, i32 %b) {
; CHECK-LABEL: slet:
; CHECK: # %bb.0:
-; CHECK-NEXT: cv.slet a0, a0, a1
+; CHECK-NEXT: cv.sle a0, a0, a1
; CHECK-NEXT: ret
%1 = icmp sle i32 %a, %b
ret i1 %1
@@ -29,7 +29,7 @@ define i1 @slet(i32 %a, i32 %b) {
define i1 @sletu(i32 %a, i32 %b) {
; CHECK-LABEL: sletu:
; CHECK: # %bb.0:
-; CHECK-NEXT: cv.sletu a0, a0, a1
+; CHECK-NEXT: cv.sleu a0, a0, a1
; CHECK-NEXT: ret
%1 = icmp ule i32 %a, %b
ret i1 %1
diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/step.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/step.ll
new file mode 100644
index 0000000..bb50d8c
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/step.ll
@@ -0,0 +1,33 @@
+; RUN: llc -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %}
+
+; Make sure SPIRV operation function calls for step are lowered correctly.
+
+; CHECK-DAG: %[[#op_ext_glsl:]] = OpExtInstImport "GLSL.std.450"
+; CHECK-DAG: %[[#float_32:]] = OpTypeFloat 32
+; CHECK-DAG: %[[#float_16:]] = OpTypeFloat 16
+; CHECK-DAG: %[[#vec4_float_16:]] = OpTypeVector %[[#float_16]] 4
+; CHECK-DAG: %[[#vec4_float_32:]] = OpTypeVector %[[#float_32]] 4
+
+define noundef <4 x half> @step_half4(<4 x half> noundef %a, <4 x half> noundef %b) {
+entry:
+ ; CHECK: %[[#]] = OpFunction %[[#vec4_float_16]] None %[[#]]
+ ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#vec4_float_16]]
+ ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#vec4_float_16]]
+ ; CHECK: %[[#]] = OpExtInst %[[#vec4_float_16]] %[[#op_ext_glsl]] Step %[[#arg0]] %[[#arg1]]
+ %hlsl.step = call <4 x half> @llvm.spv.step.v4f16(<4 x half> %a, <4 x half> %b)
+ ret <4 x half> %hlsl.step
+}
+
+define noundef <4 x float> @step_float4(<4 x float> noundef %a, <4 x float> noundef %b) {
+entry:
+ ; CHECK: %[[#]] = OpFunction %[[#vec4_float_32]] None %[[#]]
+ ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#vec4_float_32]]
+ ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#vec4_float_32]]
+ ; CHECK: %[[#]] = OpExtInst %[[#vec4_float_32]] %[[#op_ext_glsl]] Step %[[#arg0]] %[[#arg1]]
+ %hlsl.step = call <4 x float> @llvm.spv.step.v4f32(<4 x float> %a, <4 x float> %b)
+ ret <4 x float> %hlsl.step
+}
+
+declare <4 x half> @llvm.spv.step.v4f16(<4 x half>, <4 x half>)
+declare <4 x float> @llvm.spv.step.v4f32(<4 x float>, <4 x float>)
diff --git a/llvm/test/MC/RISCV/corev/XCValu-valid.s b/llvm/test/MC/RISCV/corev/XCValu-valid.s
index 423dbba..1c74e36 100644
--- a/llvm/test/MC/RISCV/corev/XCValu-valid.s
+++ b/llvm/test/MC/RISCV/corev/XCValu-valid.s
@@ -36,15 +36,25 @@ cv.subrnr a0, a1, a2
# CHECK-ENCODING: [0x2b,0xb5,0xc5,0x8c]
# CHECK-NO-EXT: instruction requires the following: 'XCValu' (CORE-V ALU Operations){{$}}
+cv.sle t0, t1, t2
+# CHECK-INSTR: cv.sle t0, t1, t2
+# CHECK-ENCODING: [0xab,0x32,0x73,0x52]
+# CHECK-NO-EXT: instruction requires the following: 'XCValu' (CORE-V ALU Operations){{$}}
+
cv.slet t0, t1, t2
-# CHECK-INSTR: cv.slet t0, t1, t2
+# CHECK-INSTR: cv.sle t0, t1, t2
# CHECK-ENCODING: [0xab,0x32,0x73,0x52]
+# CHECK-NO-EXT: unrecognized instruction mnemonic
+
+cv.sle a0, a1, a2
+# CHECK-INSTR: cv.sle a0, a1, a2
+# CHECK-ENCODING: [0x2b,0xb5,0xc5,0x52]
# CHECK-NO-EXT: instruction requires the following: 'XCValu' (CORE-V ALU Operations){{$}}
cv.slet a0, a1, a2
-# CHECK-INSTR: cv.slet a0, a1, a2
+# CHECK-INSTR: cv.sle a0, a1, a2
# CHECK-ENCODING: [0x2b,0xb5,0xc5,0x52]
-# CHECK-NO-EXT: instruction requires the following: 'XCValu' (CORE-V ALU Operations){{$}}
+# CHECK-NO-EXT: unrecognized instruction mnemonic
cv.subrn t0, t1, t2, 0
# CHECK-INSTR: cv.subrn t0, t1, t2, 0
@@ -261,15 +271,25 @@ cv.extbs a0, a1
# CHECK-ENCODING: [0x2b,0xb5,0x05,0x64]
# CHECK-NO-EXT: instruction requires the following: 'XCValu' (CORE-V ALU Operations){{$}}
+cv.sleu t0, t1, t2
+# CHECK-INSTR: cv.sleu t0, t1, t2
+# CHECK-ENCODING: [0xab,0x32,0x73,0x54]
+# CHECK-NO-EXT: instruction requires the following: 'XCValu' (CORE-V ALU Operations){{$}}
+
cv.sletu t0, t1, t2
-# CHECK-INSTR: cv.sletu t0, t1, t2
+# CHECK-INSTR: cv.sleu t0, t1, t2
# CHECK-ENCODING: [0xab,0x32,0x73,0x54]
+# CHECK-NO-EXT: unrecognized instruction mnemonic
+
+cv.sleu a0, a1, a2
+# CHECK-INSTR: cv.sleu a0, a1, a2
+# CHECK-ENCODING: [0x2b,0xb5,0xc5,0x54]
# CHECK-NO-EXT: instruction requires the following: 'XCValu' (CORE-V ALU Operations){{$}}
cv.sletu a0, a1, a2
-# CHECK-INSTR: cv.sletu a0, a1, a2
+# CHECK-INSTR: cv.sleu a0, a1, a2
# CHECK-ENCODING: [0x2b,0xb5,0xc5,0x54]
-# CHECK-NO-EXT: instruction requires the following: 'XCValu' (CORE-V ALU Operations){{$}}
+# CHECK-NO-EXT: unrecognized instruction mnemonic
cv.min t0, t1, t2
# CHECK-INSTR: cv.min t0, t1, t2
diff --git a/llvm/test/MC/RISCV/csr-aliases.s b/llvm/test/MC/RISCV/csr-aliases.s
index 1d7032f..96eb96f 100644
--- a/llvm/test/MC/RISCV/csr-aliases.s
+++ b/llvm/test/MC/RISCV/csr-aliases.s
@@ -7,6 +7,9 @@
# RUN: llvm-mc -filetype=obj -triple riscv32 -mattr=+f < %s \
# RUN: | llvm-objdump -d --mattr=+f - \
# RUN: | FileCheck -check-prefix=CHECK-EXT-F %s
+# RUN: llvm-mc -filetype=obj -triple riscv32 -mattr=+zfinx < %s \
+# RUN: | llvm-objdump -d --mattr=+zfinx - \
+# RUN: | FileCheck -check-prefix=CHECK-EXT-F %s
# RUN: llvm-mc -filetype=obj -triple riscv32 -mattr=-f < %s \
# RUN: | llvm-objdump -d --mattr=+f - \
# RUN: | FileCheck -check-prefix=CHECK-EXT-F %s
@@ -26,6 +29,9 @@
# RUN: llvm-mc -filetype=obj -triple riscv64 -mattr=+f < %s \
# RUN: | llvm-objdump -d --mattr=+f - \
# RUN: | FileCheck -check-prefix=CHECK-EXT-F %s
+# RUN: llvm-mc -filetype=obj -triple riscv64 -mattr=+zfinx < %s \
+# RUN: | llvm-objdump -d --mattr=+zfinx - \
+# RUN: | FileCheck -check-prefix=CHECK-EXT-F %s
# RUN: llvm-mc -filetype=obj -triple riscv64 -mattr=-f < %s \
# RUN: | llvm-objdump -d --mattr=+f - \
# RUN: | FileCheck -check-prefix=CHECK-EXT-F %s
@@ -45,61 +51,61 @@ csrrs t0, 3, zero
# CHECK-INST: csrrw t1, fcsr, t2
# CHECK-ALIAS: fscsr t1, t2
-# CHECK-EXT-F-ON: fscsr t1, t2
+# CHECK-EXT-F: fscsr t1, t2
# CHECK-EXT-F-OFF: csrrw t1, fcsr, t2
csrrw t1, 3, t2
# CHECK-INST: csrrw zero, fcsr, t2
# CHECK-ALIAS: fscsr t2
-# CHECK-EXT-F-ON: fscsr t2
+# CHECK-EXT-F: fscsr t2
# CHECK-EXT-F-OFF: csrw fcsr, t2
csrrw zero, 3, t2
# CHECK-INST: csrrw zero, fcsr, t2
# CHECK-ALIAS: fscsr t2
-# CHECK-EXT-F-ON: fscsr t2
+# CHECK-EXT-F: fscsr t2
# CHECK-EXT-F-OFF: csrw fcsr, t2
csrrw zero, 3, t2
# CHECK-INST: csrrw t0, frm, zero
# CHECK-ALIAS: fsrm t0, zero
-# CHECK-EXT-F-ON: fsrm t0, zero
+# CHECK-EXT-F: fsrm t0, zero
# CHECK-EXT-F-OFF: csrrw t0, frm
csrrw t0, 2, zero
# CHECK-INST: csrrw t0, frm, t1
# CHECK-ALIAS: fsrm t0, t1
-# CHECK-EXT-F-ON: fsrm t0, t1
+# CHECK-EXT-F: fsrm t0, t1
# CHECK-EXT-F-OFF: csrrw t0, frm, t1
csrrw t0, 2, t1
# CHECK-INST: csrrwi t0, frm, 0x1f
# CHECK-ALIAS: fsrmi t0, 0x1f
-# CHECK-EXT-F-ON: fsrmi t0, 0x1f
+# CHECK-EXT-F: fsrmi t0, 0x1f
# CHECK-EXT-F-OFF: csrrwi t0, frm, 0x1f
csrrwi t0, 2, 31
# CHECK-INST: csrrwi zero, frm, 0x1f
# CHECK-ALIAS: fsrmi 0x1f
-# CHECK-EXT-F-ON: fsrmi 0x1f
+# CHECK-EXT-F: fsrmi 0x1f
# CHECK-EXT-F-OFF: csrwi frm, 0x1f
csrrwi zero, 2, 31
# CHECK-INST: csrrs t0, fflags, zero
# CHECK-ALIAS: frflags t0
-# CHECK-EXT-F-ON: frflags t0
+# CHECK-EXT-F: frflags t0
# CHECK-EXT-F-OFF: csrr t0, fflags
csrrs t0, 1, zero
# CHECK-INST: csrrw t0, fflags, t2
# CHECK-ALIAS: fsflags t0, t2
-# CHECK-EXT-F-ON: fsflags t0, t2
+# CHECK-EXT-F: fsflags t0, t2
# CHECK-EXT-F-OFF: csrrw t0, fflags, t2
csrrw t0, 1, t2
# CHECK-INST: csrrw zero, fflags, t2
# CHECK-ALIAS: fsflags t2
-# CHECK-EXT-F-ON: fsflags t2
+# CHECK-EXT-F: fsflags t2
# CHECK-EXT-F-OFF: csrw fflags, t2
csrrw zero, 1, t2
diff --git a/llvm/test/MC/RISCV/rv32ih-aliases-valid.s b/llvm/test/MC/RISCV/rv32ih-aliases-valid.s
index 85af8cf..7ae4b6c 100644
--- a/llvm/test/MC/RISCV/rv32ih-aliases-valid.s
+++ b/llvm/test/MC/RISCV/rv32ih-aliases-valid.s
@@ -1,7 +1,7 @@
-# RUN: llvm-mc %s -triple=riscv32 -mattr=+h -riscv-no-aliases -show-encoding \
-# RUN: | FileCheck -check-prefixes=CHECK,CHECK-INST,CHECK-ALIAS-INST %s
-# RUN: llvm-mc %s -triple=riscv64 -mattr=+h -riscv-no-aliases -show-encoding \
-# RUN: | FileCheck -check-prefixes=CHECK,CHECK-INST,CHECK-ALIAS-INST %s
+# RUN: llvm-mc %s -triple=riscv32 -mattr=+h \
+# RUN: | FileCheck -check-prefixes=CHECK-INST,CHECK-ALIAS-INST %s
+# RUN: llvm-mc %s -triple=riscv64 -mattr=+h \
+# RUN: | FileCheck -check-prefixes=CHECK-INST,CHECK-ALIAS-INST %s
# RUN: llvm-mc -filetype=obj -mattr=+h -triple riscv32 < %s \
# RUN: | llvm-objdump --mattr=+h -M no-aliases -d - \
# RUN: | FileCheck -check-prefixes=CHECK-INST,CHECK-NOALIAS-INST %s
@@ -9,62 +9,48 @@
# RUN: | llvm-objdump --mattr=+h -M no-aliases -d - \
# RUN: | FileCheck -check-prefixes=CHECK-INST,CHECK-NOALIAS-INST %s
-# CHECK-ALIAS-INST: hfence.gvma
+# CHECK-ALIAS-INST: hfence.gvma{{$}}
# CHECK-NOALIAS-INST: hfence.gvma zero, zero
-# CHECK: encoding: [0x73,0x00,0x00,0x62]
hfence.gvma
-# CHECK-ALIAS-INST: hfence.gvma a0
+# CHECK-ALIAS-INST: hfence.gvma a0{{$}}
# CHECK-NOALIAS-INST: hfence.gvma a0, zero
-# CHECK: encoding: [0x73,0x00,0x05,0x62]
hfence.gvma a0
-# CHECK-ALIAS-INST: hfence.vvma
+# CHECK-ALIAS-INST: hfence.vvma{{$}}
# CHECK-NOALIAS-INST: hfence.vvma zero, zero
-# CHECK: encoding: [0x73,0x00,0x00,0x22]
hfence.vvma
-# CHECK-ALIAS-INST: hfence.vvma a0
+# CHECK-ALIAS-INST: hfence.vvma a0{{$}}
# CHECK-NOALIAS-INST: hfence.vvma a0, zero
-# CHECK: encoding: [0x73,0x00,0x05,0x22]
hfence.vvma a0
# CHECK-INST: hlv.b a0, (a1)
-# CHECK: encoding: [0x73,0xc5,0x05,0x60]
hlv.b a0, 0(a1)
# CHECK-INST: hlv.bu a0, (a1)
-# CHECK: encoding: [0x73,0xc5,0x15,0x60]
hlv.bu a0, 0(a1)
# CHECK-INST: hlv.h a1, (a2)
-# CHECK: encoding: [0xf3,0x45,0x06,0x64]
hlv.h a1, 0(a2)
# CHECK-INST: hlv.hu a1, (a1)
-# CHECK: encoding: [0xf3,0xc5,0x15,0x64]
hlv.hu a1, 0(a1)
# CHECK-INST: hlvx.hu a1, (a2)
-# CHECK: encoding: [0xf3,0x45,0x36,0x64]
hlvx.hu a1, 0(a2)
# CHECK-INST: hlv.w a2, (a2)
-# CHECK: encoding: [0x73,0x46,0x06,0x68]
hlv.w a2, 0(a2)
# CHECK-INST: hlvx.wu a2, (a3)
-# CHECK: encoding: [0x73,0xc6,0x36,0x68]
hlvx.wu a2, 0(a3)
# CHECK-INST: hsv.b a0, (a1)
-# CHECK: encoding: [0x73,0xc0,0xa5,0x62]
hsv.b a0, 0(a1)
# CHECK-INST: hsv.h a0, (a1)
-# CHECK: encoding: [0x73,0xc0,0xa5,0x66]
hsv.h a0, 0(a1)
# CHECK-INST: hsv.w a0, (a1)
-# CHECK: encoding: [0x73,0xc0,0xa5,0x6a]
hsv.w a0, 0(a1)
diff --git a/llvm/test/MC/RISCV/rvf-aliases-valid.s b/llvm/test/MC/RISCV/rvf-aliases-valid.s
index 31f931b..0430e2a 100644
--- a/llvm/test/MC/RISCV/rvf-aliases-valid.s
+++ b/llvm/test/MC/RISCV/rvf-aliases-valid.s
@@ -48,7 +48,8 @@ fgt.s x4, f5, f6
fge.s x7, f8, f9
# The following instructions actually alias instructions from the base ISA.
-# However, it only makes sense to support them when the F extension is enabled.
+# However, it only makes sense to support them when the F or Zfinx extension is
+# enabled.
# CHECK-INST: csrrs t0, fcsr, zero
# CHECK-ALIAS: frcsr t0
frcsr x5
diff --git a/llvm/test/MC/RISCV/rvi-aliases-valid.s b/llvm/test/MC/RISCV/rvi-aliases-valid.s
index 9ac6a8a..ef05d12 100644
--- a/llvm/test/MC/RISCV/rvi-aliases-valid.s
+++ b/llvm/test/MC/RISCV/rvi-aliases-valid.s
@@ -261,10 +261,10 @@ csrrs t0, 0xfff, 0x10
csrrc t0, 0x140, 0x11
# CHECK-S-OBJ-NOALIAS: sfence.vma zero, zero
-# CHECK-S-OBJ: sfence.vma
+# CHECK-S-OBJ: sfence.vma{{$}}
sfence.vma
# CHECK-S-OBJ-NOALIAS: sfence.vma a0, zero
-# CHECK-S-OBJ: sfence.vma a0
+# CHECK-S-OBJ: sfence.vma a0{{$}}
sfence.vma a0
# The following aliases are accepted as input but the canonical form
diff --git a/llvm/test/MC/RISCV/rvzfinx-aliases-valid.s b/llvm/test/MC/RISCV/rvzfinx-aliases-valid.s
index f624c17..f9225cf 100644
--- a/llvm/test/MC/RISCV/rvzfinx-aliases-valid.s
+++ b/llvm/test/MC/RISCV/rvzfinx-aliases-valid.s
@@ -7,16 +7,16 @@
# RUN: llvm-mc %s -triple=riscv64 -mattr=+zfinx \
# RUN: | FileCheck -check-prefix=CHECK-ALIAS %s
# RUN: llvm-mc -filetype=obj -triple riscv32 -mattr=+zfinx %s \
-# RUN: | llvm-objdump -d --mattr=+zfinx -M no-aliases - \
+# RUN: | llvm-objdump --no-print-imm-hex -d --mattr=+zfinx -M no-aliases - \
# RUN: | FileCheck -check-prefix=CHECK-INST %s
# RUN: llvm-mc -filetype=obj -triple riscv32 -mattr=+zfinx %s \
-# RUN: | llvm-objdump -d --mattr=+zfinx - \
+# RUN: | llvm-objdump --no-print-imm-hex -d --mattr=+zfinx - \
# RUN: | FileCheck -check-prefix=CHECK-ALIAS %s
# RUN: llvm-mc -filetype=obj -triple riscv64 -mattr=+zfinx %s \
-# RUN: | llvm-objdump -d --mattr=+zfinx -M no-aliases - \
+# RUN: | llvm-objdump --no-print-imm-hex -d --mattr=+zfinx -M no-aliases - \
# RUN: | FileCheck -check-prefix=CHECK-INST %s
# RUN: llvm-mc -filetype=obj -triple riscv64 -mattr=+zfinx %s \
-# RUN: | llvm-objdump -d --mattr=+zfinx - \
+# RUN: | llvm-objdump --no-print-imm-hex -d --mattr=+zfinx - \
# RUN: | FileCheck -check-prefix=CHECK-ALIAS %s
##===----------------------------------------------------------------------===##
@@ -40,6 +40,63 @@ fgt.s x4, s5, s6
# CHECK-ALIAS: fle.s t2, s1, s0
fge.s x7, x8, x9
+# The following instructions actually alias instructions from the base ISA.
+# However, it only makes sense to support them when the F or Zfinx extension is
+# enabled.
+# CHECK-INST: csrrs t0, fcsr, zero
+# CHECK-ALIAS: frcsr t0
+frcsr x5
+# CHECK-INST: csrrw t1, fcsr, t2
+# CHECK-ALIAS: fscsr t1, t2
+fscsr x6, x7
+# CHECK-INST: csrrw zero, fcsr, t3
+# CHECK-ALIAS: fscsr t3
+fscsr x28
+
+# These are obsolete aliases of frcsr/fscsr. They are accepted by the assembler
+# but the disassembler should always print them as the equivalent, new aliases.
+# CHECK-INST: csrrs t4, fcsr, zero
+# CHECK-ALIAS: frcsr t4
+frsr x29
+# CHECK-INST: csrrw t5, fcsr, t6
+# CHECK-ALIAS: fscsr t5, t6
+fssr x30, x31
+# CHECK-INST: csrrw zero, fcsr, s0
+# CHECK-ALIAS: fscsr s0
+fssr x8
+
+# CHECK-INST: csrrs t4, frm, zero
+# CHECK-ALIAS: frrm t4
+frrm x29
+# CHECK-INST: csrrw t5, frm, t4
+# CHECK-ALIAS: fsrm t5, t4
+fsrm x30, x29
+# CHECK-INST: csrrw zero, frm, t6
+# CHECK-ALIAS: fsrm t6
+fsrm x31
+# CHECK-INST: csrrwi a0, frm, 31
+# CHECK-ALIAS: fsrmi a0, 31
+fsrmi x10, 0x1f
+# CHECK-INST: csrrwi zero, frm, 30
+# CHECK-ALIAS: fsrmi 30
+fsrmi 0x1e
+
+# CHECK-INST: csrrs a1, fflags, zero
+# CHECK-ALIAS: frflags a1
+frflags x11
+# CHECK-INST: csrrw a2, fflags, a1
+# CHECK-ALIAS: fsflags a2, a1
+fsflags x12, x11
+# CHECK-INST: csrrw zero, fflags, a3
+# CHECK-ALIAS: fsflags a3
+fsflags x13
+# CHECK-INST: csrrwi a4, fflags, 29
+# CHECK-ALIAS: fsflagsi a4, 29
+fsflagsi x14, 0x1d
+# CHECK-INST: csrrwi zero, fflags, 28
+# CHECK-ALIAS: fsflagsi 28
+fsflagsi 0x1c
+
##===----------------------------------------------------------------------===##
## Aliases which omit the rounding mode.
##===----------------------------------------------------------------------===##
diff --git a/llvm/test/Transforms/Coroutines/gh107139-split-in-scc.ll b/llvm/test/Transforms/Coroutines/gh107139-split-in-scc.ll
new file mode 100644
index 0000000..1fae4d2
--- /dev/null
+++ b/llvm/test/Transforms/Coroutines/gh107139-split-in-scc.ll
@@ -0,0 +1,38 @@
+; Verify that we don't crash on mutually recursive coroutines
+; RUN: opt < %s -passes='cgscc(coro-split)' -S | FileCheck %s
+
+target triple = "x86_64-redhat-linux-gnu"
+
+; CHECK-LABEL: define void @foo
+define void @foo() presplitcoroutine personality ptr null {
+entry:
+
+ %0 = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null)
+ %1 = call ptr @llvm.coro.begin(token %0, ptr null)
+ %2 = call token @llvm.coro.save(ptr null)
+ %3 = call i8 @llvm.coro.suspend(token none, i1 false)
+ %4 = call token @llvm.coro.save(ptr null)
+ ; CHECK: call void @bar(ptr null, ptr null)
+ call void @llvm.coro.await.suspend.void(ptr null, ptr null, ptr @bar)
+ ret void
+}
+
+; CHECK-LABEL: define void @bar({{.*}})
+define void @bar(ptr %0, ptr %1) {
+entry:
+ ; CHECK: call void @foo()
+ call void @foo()
+ ret void
+}
+
+; CHECK-LABEL: @foo.resume({{.*}})
+; CHECK-LABEL: @foo.destroy({{.*}})
+; CHECK-LABEL: @foo.cleanup({{.*}})
+
+declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr) #0
+declare ptr @llvm.coro.begin(token, ptr writeonly) nounwind
+declare token @llvm.coro.save(ptr) nomerge nounwind
+declare void @llvm.coro.await.suspend.void(ptr, ptr, ptr)
+declare i8 @llvm.coro.suspend(token, i1) nounwind
+
+attributes #0 = { nocallback nofree nosync nounwind willreturn memory(argmem: read) }
diff --git a/llvm/test/Transforms/InstCombine/fmod.ll b/llvm/test/Transforms/InstCombine/fmod.ll
new file mode 100644
index 0000000..c021d27
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/fmod.ll
@@ -0,0 +1,128 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -S -passes=instcombine < %s | FileCheck %s
+
+define float @test_inf_const(float %f) {
+; CHECK-LABEL: define float @test_inf_const(
+; CHECK-SAME: float [[F:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[ABS:%.*]] = tail call float @llvm.fabs.f32(float [[F]])
+; CHECK-NEXT: [[ISINF:%.*]] = fcmp oeq float [[ABS]], 0x7FF0000000000000
+; CHECK-NEXT: br i1 [[ISINF]], label [[RETURN:%.*]], label [[IF_END:%.*]]
+; CHECK: if.end:
+; CHECK-NEXT: [[CALL:%.*]] = tail call float @fmodf(float [[F]], float 2.000000e+00)
+; CHECK-NEXT: ret float [[CALL]]
+; CHECK: return:
+; CHECK-NEXT: ret float 0.000000e+00
+;
+entry:
+ %abs = tail call float @llvm.fabs.f32(float %f)
+ %isinf = fcmp oeq float %abs, 0x7FF0000000000000
+ br i1 %isinf, label %return, label %if.end
+
+if.end:
+ %call = tail call float @fmodf(float %f, float 2.0)
+ ret float %call
+
+return:
+ ret float 0.0
+}
+
+define float @test_const_zero(float %f) {
+; CHECK-LABEL: define float @test_const_zero(
+; CHECK-SAME: float [[F:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[ISZERO:%.*]] = fcmp oeq float [[F]], 0.000000e+00
+; CHECK-NEXT: br i1 [[ISZERO]], label [[RETURN:%.*]], label [[IF_END:%.*]]
+; CHECK: if.end:
+; CHECK-NEXT: [[CALL:%.*]] = tail call float @fmodf(float 2.000000e+00, float [[F]])
+; CHECK-NEXT: ret float [[CALL]]
+; CHECK: return:
+; CHECK-NEXT: ret float 0.000000e+00
+;
+entry:
+ %iszero = fcmp oeq float %f, 0.0
+ br i1 %iszero, label %return, label %if.end
+
+if.end:
+ %call = tail call float @fmodf(float 2.0, float %f)
+ ret float %call
+
+return:
+ ret float 0.0
+}
+
+define float @test_unknown_const(float %f) {
+; CHECK-LABEL: define float @test_unknown_const(
+; CHECK-SAME: float [[F:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CALL:%.*]] = tail call float @fmodf(float [[F]], float 2.000000e+00)
+; CHECK-NEXT: ret float [[CALL]]
+;
+entry:
+ %call = tail call float @fmodf(float %f, float 2.000000e+00)
+ ret float %call
+}
+
+define float @test_noinf_nozero(float nofpclass(inf) %f, float nofpclass(zero) %g) {
+; CHECK-LABEL: define float @test_noinf_nozero(
+; CHECK-SAME: float nofpclass(inf) [[F:%.*]], float nofpclass(zero) [[G:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CALL:%.*]] = tail call nnan float @fmodf(float [[F]], float [[G]])
+; CHECK-NEXT: ret float [[CALL]]
+;
+entry:
+ %call = tail call nnan float @fmodf(float %f, float %g)
+ ret float %call
+}
+
+define double @test_double(double nofpclass(inf) %f, double nofpclass(zero) %g) {
+; CHECK-LABEL: define double @test_double(
+; CHECK-SAME: double nofpclass(inf) [[F:%.*]], double nofpclass(zero) [[G:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CALL:%.*]] = tail call double @fmod(double [[F]], double [[G]])
+; CHECK-NEXT: ret double [[CALL]]
+;
+entry:
+ %call = tail call double @fmod(double %f, double %g)
+ ret double %call
+}
+
+define fp128 @test_fp128(fp128 nofpclass(inf) %f, fp128 nofpclass(zero) %g) {
+; CHECK-LABEL: define fp128 @test_fp128(
+; CHECK-SAME: fp128 nofpclass(inf) [[F:%.*]], fp128 nofpclass(zero) [[G:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CALL:%.*]] = tail call fp128 @fmodl(fp128 [[F]], fp128 [[G]])
+; CHECK-NEXT: ret fp128 [[CALL]]
+;
+entry:
+ %call = tail call fp128 @fmodl(fp128 %f, fp128 %g)
+ ret fp128 %call
+}
+
+define float @test_noinf_nozero_dazpreservesign(float nofpclass(inf) %f, float nofpclass(zero) %g) "denormal-fp-math"="preserve-sign,preserve-sign" {
+; CHECK-LABEL: define float @test_noinf_nozero_dazpreservesign(
+; CHECK-SAME: float nofpclass(inf) [[F:%.*]], float nofpclass(zero) [[G:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CALL:%.*]] = tail call nnan float @fmodf(float [[F]], float [[G]])
+; CHECK-NEXT: ret float [[CALL]]
+;
+entry:
+ %call = tail call nnan float @fmodf(float %f, float %g)
+ ret float %call
+}
+
+define float @test_noinf_nozero_dazdynamic(float nofpclass(inf) %f, float nofpclass(zero) %g) "denormal-fp-math"="dynamic,dynamic" {
+; CHECK-LABEL: define float @test_noinf_nozero_dazdynamic(
+; CHECK-SAME: float nofpclass(inf) [[F:%.*]], float nofpclass(zero) [[G:%.*]]) #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CALL:%.*]] = tail call nnan float @fmodf(float [[F]], float [[G]])
+; CHECK-NEXT: ret float [[CALL]]
+;
+entry:
+ %call = tail call nnan float @fmodf(float %f, float %g)
+ ret float %call
+}
+
+declare float @fmodf(float, float)
+declare double @fmod(double, double)
+declare fp128 @fmodl(fp128, fp128)
diff --git a/llvm/test/Transforms/InstCombine/pow-1.ll b/llvm/test/Transforms/InstCombine/pow-1.ll
index 44802f9..f4bbd3e 100644
--- a/llvm/test/Transforms/InstCombine/pow-1.ll
+++ b/llvm/test/Transforms/InstCombine/pow-1.ll
@@ -862,6 +862,30 @@ define double @pow_libcall_half_no_FMF(double %x) {
ret double %retval
}
+define double @pow_libcall_half_fromdomcondition(double %x) {
+; CHECK-LABEL: define double @pow_libcall_half_fromdomcondition(
+; CHECK-SAME: double [[X:%.*]]) {
+; CHECK-NEXT: [[A:%.*]] = call double @llvm.fabs.f64(double [[X]])
+; CHECK-NEXT: [[C:%.*]] = fcmp oeq double [[A]], 0x7FF0000000000000
+; CHECK-NEXT: br i1 [[C]], label [[THEN:%.*]], label [[ELSE:%.*]]
+; CHECK: then:
+; CHECK-NEXT: ret double 0.000000e+00
+; CHECK: else:
+; CHECK-NEXT: [[RETVAL:%.*]] = call double @pow(double [[X]], double 5.000000e-01)
+; CHECK-NEXT: ret double [[RETVAL]]
+;
+ %a = call double @llvm.fabs.f64(double %x)
+ %c = fcmp oeq double %a, 0x7FF0000000000000
+ br i1 %c, label %then, label %else
+
+then:
+ ret double 0.0
+
+else:
+ %retval = call double @pow(double %x, double 0.5)
+ ret double %retval
+}
+
define double @pow_libcall_half_no_FMF_noerrno(double %x) {
; LIB-LABEL: define double @pow_libcall_half_no_FMF_noerrno(
; LIB-SAME: double [[X:%.*]]) {
diff --git a/llvm/test/Transforms/InstCombine/rem.ll b/llvm/test/Transforms/InstCombine/rem.ll
index 9d2a947..2cf56df 100644
--- a/llvm/test/Transforms/InstCombine/rem.ll
+++ b/llvm/test/Transforms/InstCombine/rem.ll
@@ -1073,3 +1073,106 @@ define i16 @rem_pow2(i16 %x, i16 %y) {
%rem = urem i16 %x, %y
ret i16 %rem
}
+
+define i64 @rem_pow2_domcond(i64 %a, i64 %b) {
+; CHECK-LABEL: @rem_pow2_domcond(
+; CHECK-NEXT: start:
+; CHECK-NEXT: [[CPOP:%.*]] = call range(i64 0, 65) i64 @llvm.ctpop.i64(i64 [[B:%.*]])
+; CHECK-NEXT: [[COND:%.*]] = icmp eq i64 [[CPOP]], 1
+; CHECK-NEXT: br i1 [[COND]], label [[BB1:%.*]], label [[BB2:%.*]]
+; CHECK: bb1:
+; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[B]], -1
+; CHECK-NEXT: [[REM:%.*]] = and i64 [[A:%.*]], [[TMP0]]
+; CHECK-NEXT: ret i64 [[REM]]
+; CHECK: bb2:
+; CHECK-NEXT: ret i64 0
+;
+start:
+ %cpop = call i64 @llvm.ctpop.i64(i64 %b)
+ %cond = icmp eq i64 %cpop, 1
+ br i1 %cond, label %bb1, label %bb2
+
+bb1:
+ %rem = urem i64 %a, %b
+ ret i64 %rem
+
+bb2:
+ ret i64 0
+}
+
+define i64 @rem_pow2_domcond_in_else(i64 %a, i64 %b) {
+; CHECK-LABEL: @rem_pow2_domcond_in_else(
+; CHECK-NEXT: start:
+; CHECK-NEXT: [[CPOP:%.*]] = call range(i64 0, 65) i64 @llvm.ctpop.i64(i64 [[B:%.*]])
+; CHECK-NEXT: [[COND_NOT:%.*]] = icmp eq i64 [[CPOP]], 1
+; CHECK-NEXT: br i1 [[COND_NOT]], label [[BB1:%.*]], label [[BB2:%.*]]
+; CHECK: bb1:
+; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[B]], -1
+; CHECK-NEXT: [[REM:%.*]] = and i64 [[A:%.*]], [[TMP0]]
+; CHECK-NEXT: ret i64 [[REM]]
+; CHECK: bb2:
+; CHECK-NEXT: ret i64 0
+;
+start:
+ %cpop = call i64 @llvm.ctpop.i64(i64 %b)
+ %cond = icmp ne i64 %cpop, 1
+ br i1 %cond, label %bb2, label %bb1
+
+bb1:
+ %rem = urem i64 %a, %b
+ ret i64 %rem
+
+bb2:
+ ret i64 0
+}
+
+define i64 @rem_pow2_or_zero_domcond(i64 %a, i64 %b) {
+; CHECK-LABEL: @rem_pow2_or_zero_domcond(
+; CHECK-NEXT: start:
+; CHECK-NEXT: [[CPOP:%.*]] = call range(i64 0, 65) i64 @llvm.ctpop.i64(i64 [[B:%.*]])
+; CHECK-NEXT: [[COND:%.*]] = icmp ult i64 [[CPOP]], 2
+; CHECK-NEXT: br i1 [[COND]], label [[BB1:%.*]], label [[BB2:%.*]]
+; CHECK: bb1:
+; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[B]], -1
+; CHECK-NEXT: [[REM:%.*]] = and i64 [[A:%.*]], [[TMP0]]
+; CHECK-NEXT: ret i64 [[REM]]
+; CHECK: bb2:
+; CHECK-NEXT: ret i64 0
+;
+start:
+ %cpop = call i64 @llvm.ctpop.i64(i64 %b)
+ %cond = icmp ult i64 %cpop, 2
+ br i1 %cond, label %bb1, label %bb2
+
+bb1:
+ %rem = urem i64 %a, %b
+ ret i64 %rem
+
+bb2:
+ ret i64 0
+}
+
+define i64 @rem_pow2_non_domcond(i64 %a, i64 %b) {
+; CHECK-LABEL: @rem_pow2_non_domcond(
+; CHECK-NEXT: start:
+; CHECK-NEXT: [[CPOP:%.*]] = call range(i64 0, 65) i64 @llvm.ctpop.i64(i64 [[B:%.*]])
+; CHECK-NEXT: [[COND_NOT:%.*]] = icmp eq i64 [[CPOP]], 1
+; CHECK-NEXT: br i1 [[COND_NOT]], label [[BB1:%.*]], label [[BB2:%.*]]
+; CHECK: bb1:
+; CHECK-NEXT: [[REM:%.*]] = urem i64 [[A:%.*]], [[B]]
+; CHECK-NEXT: ret i64 [[REM]]
+; CHECK: bb2:
+; CHECK-NEXT: br label [[BB1]]
+;
+start:
+ %cpop = call i64 @llvm.ctpop.i64(i64 %b)
+ %cond = icmp ne i64 %cpop, 1
+ br i1 %cond, label %bb2, label %bb1
+
+bb1:
+ %rem = urem i64 %a, %b
+ ret i64 %rem
+
+bb2:
+ br label %bb1
+}
diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll
index 0c9ab24..69b8519 100644
--- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll
+++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll
@@ -221,8 +221,8 @@ define i32 @sink_replicate_region_3_reduction(i32 %x, i8 %y, ptr %ptr) optsize {
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
; CHECK-NEXT: EMIT vp<[[RED_RES:%.+]]> = compute-reduction-result ir<%and.red>, vp<[[SEL]]>
-; CHECK-NEXT: EMIT vp<[[RESUME_1:%.+]]> = extract-from-end ir<%recur.next>, ir<1>
; CHECK-NEXT: EMIT vp<[[RED_EX:%.+]]> = extract-from-end vp<[[RED_RES]]>, ir<1>
+; CHECK-NEXT: EMIT vp<[[RESUME_1:%.+]]> = extract-from-end ir<%recur.next>, ir<1>
; CHECK-NEXT: EMIT branch-on-cond ir<true>
; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph
; CHECK-EMPTY:
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/reduce-fadd.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/reduce-fadd.ll
new file mode 100644
index 0000000..edc0381
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/reduce-fadd.ll
@@ -0,0 +1,865 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -S -passes=slp-vectorizer -mtriple=aarch64-unknown-linux \
+; RUN: -mattr=-fullfp16 | FileCheck %s --check-prefixes=CHECK,NOFP16
+; RUN: opt < %s -S -passes=slp-vectorizer -mtriple=aarch64-unknown-linux \
+; RUN: -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,FP16
+
+define half @reduce_fast_half2(<2 x half> %vec2) {
+; CHECK-LABEL: define half @reduce_fast_half2(
+; CHECK-SAME: <2 x half> [[VEC2:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[ELT0:%.*]] = extractelement <2 x half> [[VEC2]], i64 0
+; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x half> [[VEC2]], i64 1
+; CHECK-NEXT: [[ADD1:%.*]] = fadd fast half [[ELT1]], [[ELT0]]
+; CHECK-NEXT: ret half [[ADD1]]
+;
+entry:
+ %elt0 = extractelement <2 x half> %vec2, i64 0
+ %elt1 = extractelement <2 x half> %vec2, i64 1
+ %add1 = fadd fast half %elt1, %elt0
+ ret half %add1
+}
+
+define half @reduce_half2(<2 x half> %vec2) {
+; CHECK-LABEL: define half @reduce_half2(
+; CHECK-SAME: <2 x half> [[VEC2:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[ELT0:%.*]] = extractelement <2 x half> [[VEC2]], i64 0
+; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x half> [[VEC2]], i64 1
+; CHECK-NEXT: [[ADD1:%.*]] = fadd half [[ELT1]], [[ELT0]]
+; CHECK-NEXT: ret half [[ADD1]]
+;
+entry:
+ %elt0 = extractelement <2 x half> %vec2, i64 0
+ %elt1 = extractelement <2 x half> %vec2, i64 1
+ %add1 = fadd half %elt1, %elt0
+ ret half %add1
+}
+
+define half @reduce_fast_half4(<4 x half> %vec4) {
+; CHECK-LABEL: define half @reduce_fast_half4(
+; CHECK-SAME: <4 x half> [[VEC4:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[TMP0:%.*]] = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> [[VEC4]])
+; CHECK-NEXT: ret half [[TMP0]]
+;
+entry:
+ %elt0 = extractelement <4 x half> %vec4, i64 0
+ %elt1 = extractelement <4 x half> %vec4, i64 1
+ %elt2 = extractelement <4 x half> %vec4, i64 2
+ %elt3 = extractelement <4 x half> %vec4, i64 3
+ %add1 = fadd fast half %elt1, %elt0
+ %add2 = fadd fast half %elt2, %add1
+ %add3 = fadd fast half %elt3, %add2
+ ret half %add3
+}
+
+define half @reduce_half4(<4 x half> %vec4) {
+; CHECK-LABEL: define half @reduce_half4(
+; CHECK-SAME: <4 x half> [[VEC4:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x half> [[VEC4]], i64 0
+; CHECK-NEXT: [[ELT1:%.*]] = extractelement <4 x half> [[VEC4]], i64 1
+; CHECK-NEXT: [[ELT2:%.*]] = extractelement <4 x half> [[VEC4]], i64 2
+; CHECK-NEXT: [[ELT3:%.*]] = extractelement <4 x half> [[VEC4]], i64 3
+; CHECK-NEXT: [[ADD1:%.*]] = fadd half [[ELT1]], [[ELT0]]
+; CHECK-NEXT: [[ADD2:%.*]] = fadd half [[ELT2]], [[ADD1]]
+; CHECK-NEXT: [[ADD3:%.*]] = fadd half [[ELT3]], [[ADD2]]
+; CHECK-NEXT: ret half [[ADD3]]
+;
+entry:
+ %elt0 = extractelement <4 x half> %vec4, i64 0
+ %elt1 = extractelement <4 x half> %vec4, i64 1
+ %elt2 = extractelement <4 x half> %vec4, i64 2
+ %elt3 = extractelement <4 x half> %vec4, i64 3
+ %add1 = fadd half %elt1, %elt0
+ %add2 = fadd half %elt2, %add1
+ %add3 = fadd half %elt3, %add2
+ ret half %add3
+}
+
+define half @reduce_fast_half8(<8 x half> %vec8) {
+; CHECK-LABEL: define half @reduce_fast_half8(
+; CHECK-SAME: <8 x half> [[VEC8:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[ELT4:%.*]] = extractelement <8 x half> [[VEC8]], i64 4
+; CHECK-NEXT: [[ELT5:%.*]] = extractelement <8 x half> [[VEC8]], i64 5
+; CHECK-NEXT: [[ELT6:%.*]] = extractelement <8 x half> [[VEC8]], i64 6
+; CHECK-NEXT: [[ELT7:%.*]] = extractelement <8 x half> [[VEC8]], i64 7
+; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <8 x half> [[VEC8]], <8 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[TMP1:%.*]] = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> [[TMP0]])
+; CHECK-NEXT: [[OP_RDX:%.*]] = fadd fast half [[TMP1]], [[ELT4]]
+; CHECK-NEXT: [[OP_RDX1:%.*]] = fadd fast half [[ELT5]], [[ELT6]]
+; CHECK-NEXT: [[OP_RDX2:%.*]] = fadd fast half [[OP_RDX]], [[OP_RDX1]]
+; CHECK-NEXT: [[OP_RDX3:%.*]] = fadd fast half [[OP_RDX2]], [[ELT7]]
+; CHECK-NEXT: ret half [[OP_RDX3]]
+;
+entry:
+ %elt0 = extractelement <8 x half> %vec8, i64 0
+ %elt1 = extractelement <8 x half> %vec8, i64 1
+ %elt2 = extractelement <8 x half> %vec8, i64 2
+ %elt3 = extractelement <8 x half> %vec8, i64 3
+ %elt4 = extractelement <8 x half> %vec8, i64 4
+ %elt5 = extractelement <8 x half> %vec8, i64 5
+ %elt6 = extractelement <8 x half> %vec8, i64 6
+ %elt7 = extractelement <8 x half> %vec8, i64 7
+ %add1 = fadd fast half %elt1, %elt0
+ %add2 = fadd fast half %elt2, %add1
+ %add3 = fadd fast half %elt3, %add2
+ %add4 = fadd fast half %elt4, %add3
+ %add5 = fadd fast half %elt5, %add4
+ %add6 = fadd fast half %elt6, %add5
+ %add7 = fadd fast half %elt7, %add6
+ ret half %add7
+}
+
+define half @reduce_half8(<8 x half> %vec8) {
+; CHECK-LABEL: define half @reduce_half8(
+; CHECK-SAME: <8 x half> [[VEC8:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[ELT0:%.*]] = extractelement <8 x half> [[VEC8]], i64 0
+; CHECK-NEXT: [[ELT1:%.*]] = extractelement <8 x half> [[VEC8]], i64 1
+; CHECK-NEXT: [[ELT2:%.*]] = extractelement <8 x half> [[VEC8]], i64 2
+; CHECK-NEXT: [[ELT3:%.*]] = extractelement <8 x half> [[VEC8]], i64 3
+; CHECK-NEXT: [[ELT4:%.*]] = extractelement <8 x half> [[VEC8]], i64 4
+; CHECK-NEXT: [[ELT5:%.*]] = extractelement <8 x half> [[VEC8]], i64 5
+; CHECK-NEXT: [[ELT6:%.*]] = extractelement <8 x half> [[VEC8]], i64 6
+; CHECK-NEXT: [[ELT7:%.*]] = extractelement <8 x half> [[VEC8]], i64 7
+; CHECK-NEXT: [[ADD1:%.*]] = fadd half [[ELT1]], [[ELT0]]
+; CHECK-NEXT: [[ADD2:%.*]] = fadd half [[ELT2]], [[ADD1]]
+; CHECK-NEXT: [[ADD3:%.*]] = fadd half [[ELT3]], [[ADD2]]
+; CHECK-NEXT: [[ADD4:%.*]] = fadd half [[ELT4]], [[ADD3]]
+; CHECK-NEXT: [[ADD5:%.*]] = fadd half [[ELT5]], [[ADD4]]
+; CHECK-NEXT: [[ADD6:%.*]] = fadd half [[ELT6]], [[ADD5]]
+; CHECK-NEXT: [[ADD7:%.*]] = fadd half [[ELT7]], [[ADD6]]
+; CHECK-NEXT: ret half [[ADD7]]
+;
+entry:
+ %elt0 = extractelement <8 x half> %vec8, i64 0
+ %elt1 = extractelement <8 x half> %vec8, i64 1
+ %elt2 = extractelement <8 x half> %vec8, i64 2
+ %elt3 = extractelement <8 x half> %vec8, i64 3
+ %elt4 = extractelement <8 x half> %vec8, i64 4
+ %elt5 = extractelement <8 x half> %vec8, i64 5
+ %elt6 = extractelement <8 x half> %vec8, i64 6
+ %elt7 = extractelement <8 x half> %vec8, i64 7
+ %add1 = fadd half %elt1, %elt0
+ %add2 = fadd half %elt2, %add1
+ %add3 = fadd half %elt3, %add2
+ %add4 = fadd half %elt4, %add3
+ %add5 = fadd half %elt5, %add4
+ %add6 = fadd half %elt6, %add5
+ %add7 = fadd half %elt7, %add6
+ ret half %add7
+}
+
+define half @reduce_fast_half16(<16 x half> %vec16) {
+; NOFP16-LABEL: define half @reduce_fast_half16(
+; NOFP16-SAME: <16 x half> [[VEC16:%.*]]) #[[ATTR0]] {
+; NOFP16-NEXT: [[ENTRY:.*:]]
+; NOFP16-NEXT: [[TMP0:%.*]] = call fast half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> [[VEC16]])
+; NOFP16-NEXT: ret half [[TMP0]]
+;
+; FP16-LABEL: define half @reduce_fast_half16(
+; FP16-SAME: <16 x half> [[VEC16:%.*]]) #[[ATTR0]] {
+; FP16-NEXT: [[ENTRY:.*:]]
+; FP16-NEXT: [[ELT4:%.*]] = extractelement <16 x half> [[VEC16]], i64 4
+; FP16-NEXT: [[ELT5:%.*]] = extractelement <16 x half> [[VEC16]], i64 5
+; FP16-NEXT: [[ELT6:%.*]] = extractelement <16 x half> [[VEC16]], i64 6
+; FP16-NEXT: [[ELT7:%.*]] = extractelement <16 x half> [[VEC16]], i64 7
+; FP16-NEXT: [[ELT12:%.*]] = extractelement <16 x half> [[VEC16]], i64 12
+; FP16-NEXT: [[ELT13:%.*]] = extractelement <16 x half> [[VEC16]], i64 13
+; FP16-NEXT: [[ELT14:%.*]] = extractelement <16 x half> [[VEC16]], i64 14
+; FP16-NEXT: [[ELT15:%.*]] = extractelement <16 x half> [[VEC16]], i64 15
+; FP16-NEXT: [[TMP0:%.*]] = shufflevector <16 x half> [[VEC16]], <16 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; FP16-NEXT: [[TMP1:%.*]] = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> [[TMP0]])
+; FP16-NEXT: [[TMP2:%.*]] = shufflevector <16 x half> [[VEC16]], <16 x half> poison, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
+; FP16-NEXT: [[TMP3:%.*]] = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> [[TMP2]])
+; FP16-NEXT: [[OP_RDX:%.*]] = fadd fast half [[TMP1]], [[TMP3]]
+; FP16-NEXT: [[OP_RDX1:%.*]] = fadd fast half [[OP_RDX]], [[ELT4]]
+; FP16-NEXT: [[OP_RDX2:%.*]] = fadd fast half [[ELT5]], [[ELT6]]
+; FP16-NEXT: [[OP_RDX3:%.*]] = fadd fast half [[ELT7]], [[ELT12]]
+; FP16-NEXT: [[OP_RDX4:%.*]] = fadd fast half [[ELT13]], [[ELT14]]
+; FP16-NEXT: [[OP_RDX5:%.*]] = fadd fast half [[OP_RDX1]], [[OP_RDX2]]
+; FP16-NEXT: [[OP_RDX6:%.*]] = fadd fast half [[OP_RDX3]], [[OP_RDX4]]
+; FP16-NEXT: [[OP_RDX7:%.*]] = fadd fast half [[OP_RDX5]], [[OP_RDX6]]
+; FP16-NEXT: [[OP_RDX8:%.*]] = fadd fast half [[OP_RDX7]], [[ELT15]]
+; FP16-NEXT: ret half [[OP_RDX8]]
+;
+entry:
+ %elt0 = extractelement <16 x half> %vec16, i64 0
+ %elt1 = extractelement <16 x half> %vec16, i64 1
+ %elt2 = extractelement <16 x half> %vec16, i64 2
+ %elt3 = extractelement <16 x half> %vec16, i64 3
+ %elt4 = extractelement <16 x half> %vec16, i64 4
+ %elt5 = extractelement <16 x half> %vec16, i64 5
+ %elt6 = extractelement <16 x half> %vec16, i64 6
+ %elt7 = extractelement <16 x half> %vec16, i64 7
+ %elt8 = extractelement <16 x half> %vec16, i64 8
+ %elt9 = extractelement <16 x half> %vec16, i64 9
+ %elt10 = extractelement <16 x half> %vec16, i64 10
+ %elt11 = extractelement <16 x half> %vec16, i64 11
+ %elt12 = extractelement <16 x half> %vec16, i64 12
+ %elt13 = extractelement <16 x half> %vec16, i64 13
+ %elt14 = extractelement <16 x half> %vec16, i64 14
+ %elt15 = extractelement <16 x half> %vec16, i64 15
+ %add1 = fadd fast half %elt1, %elt0
+ %add2 = fadd fast half %elt2, %add1
+ %add3 = fadd fast half %elt3, %add2
+ %add4 = fadd fast half %elt4, %add3
+ %add5 = fadd fast half %elt5, %add4
+ %add6 = fadd fast half %elt6, %add5
+ %add7 = fadd fast half %elt7, %add6
+ %add8 = fadd fast half %elt8, %add7
+ %add9 = fadd fast half %elt9, %add8
+ %add10 = fadd fast half %elt10, %add9
+ %add11 = fadd fast half %elt11, %add10
+ %add12 = fadd fast half %elt12, %add11
+ %add13 = fadd fast half %elt13, %add12
+ %add14 = fadd fast half %elt14, %add13
+ %add15 = fadd fast half %elt15, %add14
+ ret half %add15
+}
+
+define half @reduce_half16(<16 x half> %vec16) {
+; CHECK-LABEL: define half @reduce_half16(
+; CHECK-SAME: <16 x half> [[VEC16:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[ELT0:%.*]] = extractelement <16 x half> [[VEC16]], i64 0
+; CHECK-NEXT: [[ELT1:%.*]] = extractelement <16 x half> [[VEC16]], i64 1
+; CHECK-NEXT: [[ELT2:%.*]] = extractelement <16 x half> [[VEC16]], i64 2
+; CHECK-NEXT: [[ELT3:%.*]] = extractelement <16 x half> [[VEC16]], i64 3
+; CHECK-NEXT: [[ELT4:%.*]] = extractelement <16 x half> [[VEC16]], i64 4
+; CHECK-NEXT: [[ELT5:%.*]] = extractelement <16 x half> [[VEC16]], i64 5
+; CHECK-NEXT: [[ELT6:%.*]] = extractelement <16 x half> [[VEC16]], i64 6
+; CHECK-NEXT: [[ELT7:%.*]] = extractelement <16 x half> [[VEC16]], i64 7
+; CHECK-NEXT: [[ELT8:%.*]] = extractelement <16 x half> [[VEC16]], i64 8
+; CHECK-NEXT: [[ELT9:%.*]] = extractelement <16 x half> [[VEC16]], i64 9
+; CHECK-NEXT: [[ELT10:%.*]] = extractelement <16 x half> [[VEC16]], i64 10
+; CHECK-NEXT: [[ELT11:%.*]] = extractelement <16 x half> [[VEC16]], i64 11
+; CHECK-NEXT: [[ELT12:%.*]] = extractelement <16 x half> [[VEC16]], i64 12
+; CHECK-NEXT: [[ELT13:%.*]] = extractelement <16 x half> [[VEC16]], i64 13
+; CHECK-NEXT: [[ELT14:%.*]] = extractelement <16 x half> [[VEC16]], i64 14
+; CHECK-NEXT: [[ELT15:%.*]] = extractelement <16 x half> [[VEC16]], i64 15
+; CHECK-NEXT: [[ADD1:%.*]] = fadd half [[ELT1]], [[ELT0]]
+; CHECK-NEXT: [[ADD2:%.*]] = fadd half [[ELT2]], [[ADD1]]
+; CHECK-NEXT: [[ADD3:%.*]] = fadd half [[ELT3]], [[ADD2]]
+; CHECK-NEXT: [[ADD4:%.*]] = fadd half [[ELT4]], [[ADD3]]
+; CHECK-NEXT: [[ADD5:%.*]] = fadd half [[ELT5]], [[ADD4]]
+; CHECK-NEXT: [[ADD6:%.*]] = fadd half [[ELT6]], [[ADD5]]
+; CHECK-NEXT: [[ADD7:%.*]] = fadd half [[ELT7]], [[ADD6]]
+; CHECK-NEXT: [[ADD8:%.*]] = fadd half [[ELT8]], [[ADD7]]
+; CHECK-NEXT: [[ADD9:%.*]] = fadd half [[ELT9]], [[ADD8]]
+; CHECK-NEXT: [[ADD10:%.*]] = fadd half [[ELT10]], [[ADD9]]
+; CHECK-NEXT: [[ADD11:%.*]] = fadd half [[ELT11]], [[ADD10]]
+; CHECK-NEXT: [[ADD12:%.*]] = fadd half [[ELT12]], [[ADD11]]
+; CHECK-NEXT: [[ADD13:%.*]] = fadd half [[ELT13]], [[ADD12]]
+; CHECK-NEXT: [[ADD14:%.*]] = fadd half [[ELT14]], [[ADD13]]
+; CHECK-NEXT: [[ADD15:%.*]] = fadd half [[ELT15]], [[ADD14]]
+; CHECK-NEXT: ret half [[ADD15]]
+;
+entry:
+ %elt0 = extractelement <16 x half> %vec16, i64 0
+ %elt1 = extractelement <16 x half> %vec16, i64 1
+ %elt2 = extractelement <16 x half> %vec16, i64 2
+ %elt3 = extractelement <16 x half> %vec16, i64 3
+ %elt4 = extractelement <16 x half> %vec16, i64 4
+ %elt5 = extractelement <16 x half> %vec16, i64 5
+ %elt6 = extractelement <16 x half> %vec16, i64 6
+ %elt7 = extractelement <16 x half> %vec16, i64 7
+ %elt8 = extractelement <16 x half> %vec16, i64 8
+ %elt9 = extractelement <16 x half> %vec16, i64 9
+ %elt10 = extractelement <16 x half> %vec16, i64 10
+ %elt11 = extractelement <16 x half> %vec16, i64 11
+ %elt12 = extractelement <16 x half> %vec16, i64 12
+ %elt13 = extractelement <16 x half> %vec16, i64 13
+ %elt14 = extractelement <16 x half> %vec16, i64 14
+ %elt15 = extractelement <16 x half> %vec16, i64 15
+ %add1 = fadd half %elt1, %elt0
+ %add2 = fadd half %elt2, %add1
+ %add3 = fadd half %elt3, %add2
+ %add4 = fadd half %elt4, %add3
+ %add5 = fadd half %elt5, %add4
+ %add6 = fadd half %elt6, %add5
+ %add7 = fadd half %elt7, %add6
+ %add8 = fadd half %elt8, %add7
+ %add9 = fadd half %elt9, %add8
+ %add10 = fadd half %elt10, %add9
+ %add11 = fadd half %elt11, %add10
+ %add12 = fadd half %elt12, %add11
+ %add13 = fadd half %elt13, %add12
+ %add14 = fadd half %elt14, %add13
+ %add15 = fadd half %elt15, %add14
+ ret half %add15
+}
+
+define float @reduce_fast_float2(<2 x float> %vec2) {
+; CHECK-LABEL: define float @reduce_fast_float2(
+; CHECK-SAME: <2 x float> [[VEC2:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[ELT0:%.*]] = extractelement <2 x float> [[VEC2]], i64 0
+; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x float> [[VEC2]], i64 1
+; CHECK-NEXT: [[ADD1:%.*]] = fadd fast float [[ELT1]], [[ELT0]]
+; CHECK-NEXT: ret float [[ADD1]]
+;
+entry:
+ %elt0 = extractelement <2 x float> %vec2, i64 0
+ %elt1 = extractelement <2 x float> %vec2, i64 1
+ %add1 = fadd fast float %elt1, %elt0
+ ret float %add1
+}
+
+define float @reduce_float2(<2 x float> %vec2) {
+; CHECK-LABEL: define float @reduce_float2(
+; CHECK-SAME: <2 x float> [[VEC2:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[ELT0:%.*]] = extractelement <2 x float> [[VEC2]], i64 0
+; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x float> [[VEC2]], i64 1
+; CHECK-NEXT: [[ADD1:%.*]] = fadd float [[ELT1]], [[ELT0]]
+; CHECK-NEXT: ret float [[ADD1]]
+;
+entry:
+ %elt0 = extractelement <2 x float> %vec2, i64 0
+ %elt1 = extractelement <2 x float> %vec2, i64 1
+ %add1 = fadd float %elt1, %elt0
+ ret float %add1
+}
+
+define float @reduce_fast_float4(<4 x float> %vec4) {
+; CHECK-LABEL: define float @reduce_fast_float4(
+; CHECK-SAME: <4 x float> [[VEC4:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[TMP0:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[VEC4]])
+; CHECK-NEXT: ret float [[TMP0]]
+;
+entry:
+ %elt0 = extractelement <4 x float> %vec4, i64 0
+ %elt1 = extractelement <4 x float> %vec4, i64 1
+ %elt2 = extractelement <4 x float> %vec4, i64 2
+ %elt3 = extractelement <4 x float> %vec4, i64 3
+ %add1 = fadd fast float %elt1, %elt0
+ %add2 = fadd fast float %elt2, %add1
+ %add3 = fadd fast float %elt3, %add2
+ ret float %add3
+}
+
+define float @reduce_float4(<4 x float> %vec4) {
+; CHECK-LABEL: define float @reduce_float4(
+; CHECK-SAME: <4 x float> [[VEC4:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x float> [[VEC4]], i64 0
+; CHECK-NEXT: [[ELT1:%.*]] = extractelement <4 x float> [[VEC4]], i64 1
+; CHECK-NEXT: [[ELT2:%.*]] = extractelement <4 x float> [[VEC4]], i64 2
+; CHECK-NEXT: [[ELT3:%.*]] = extractelement <4 x float> [[VEC4]], i64 3
+; CHECK-NEXT: [[ADD1:%.*]] = fadd float [[ELT1]], [[ELT0]]
+; CHECK-NEXT: [[ADD2:%.*]] = fadd float [[ELT2]], [[ADD1]]
+; CHECK-NEXT: [[ADD3:%.*]] = fadd float [[ELT3]], [[ADD2]]
+; CHECK-NEXT: ret float [[ADD3]]
+;
+entry:
+ %elt0 = extractelement <4 x float> %vec4, i64 0
+ %elt1 = extractelement <4 x float> %vec4, i64 1
+ %elt2 = extractelement <4 x float> %vec4, i64 2
+ %elt3 = extractelement <4 x float> %vec4, i64 3
+ %add1 = fadd float %elt1, %elt0
+ %add2 = fadd float %elt2, %add1
+ %add3 = fadd float %elt3, %add2
+ ret float %add3
+}
+
+define float @reduce_fast_float8(<8 x float> %vec8) {
+; CHECK-LABEL: define float @reduce_fast_float8(
+; CHECK-SAME: <8 x float> [[VEC8:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[TMP0:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> [[VEC8]])
+; CHECK-NEXT: ret float [[TMP0]]
+;
+entry:
+ %elt0 = extractelement <8 x float> %vec8, i64 0
+ %elt1 = extractelement <8 x float> %vec8, i64 1
+ %elt2 = extractelement <8 x float> %vec8, i64 2
+ %elt3 = extractelement <8 x float> %vec8, i64 3
+ %elt4 = extractelement <8 x float> %vec8, i64 4
+ %elt5 = extractelement <8 x float> %vec8, i64 5
+ %elt6 = extractelement <8 x float> %vec8, i64 6
+ %elt7 = extractelement <8 x float> %vec8, i64 7
+ %add1 = fadd fast float %elt1, %elt0
+ %add2 = fadd fast float %elt2, %add1
+ %add3 = fadd fast float %elt3, %add2
+ %add4 = fadd fast float %elt4, %add3
+ %add5 = fadd fast float %elt5, %add4
+ %add6 = fadd fast float %elt6, %add5
+ %add7 = fadd fast float %elt7, %add6
+ ret float %add7
+}
+
+define float @reduce_float8(<8 x float> %vec8) {
+; CHECK-LABEL: define float @reduce_float8(
+; CHECK-SAME: <8 x float> [[VEC8:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[ELT0:%.*]] = extractelement <8 x float> [[VEC8]], i64 0
+; CHECK-NEXT: [[ELT1:%.*]] = extractelement <8 x float> [[VEC8]], i64 1
+; CHECK-NEXT: [[ELT2:%.*]] = extractelement <8 x float> [[VEC8]], i64 2
+; CHECK-NEXT: [[ELT3:%.*]] = extractelement <8 x float> [[VEC8]], i64 3
+; CHECK-NEXT: [[ELT4:%.*]] = extractelement <8 x float> [[VEC8]], i64 4
+; CHECK-NEXT: [[ELT5:%.*]] = extractelement <8 x float> [[VEC8]], i64 5
+; CHECK-NEXT: [[ELT6:%.*]] = extractelement <8 x float> [[VEC8]], i64 6
+; CHECK-NEXT: [[ELT7:%.*]] = extractelement <8 x float> [[VEC8]], i64 7
+; CHECK-NEXT: [[ADD1:%.*]] = fadd float [[ELT1]], [[ELT0]]
+; CHECK-NEXT: [[ADD2:%.*]] = fadd float [[ELT2]], [[ADD1]]
+; CHECK-NEXT: [[ADD3:%.*]] = fadd float [[ELT3]], [[ADD2]]
+; CHECK-NEXT: [[ADD4:%.*]] = fadd float [[ELT4]], [[ADD3]]
+; CHECK-NEXT: [[ADD5:%.*]] = fadd float [[ELT5]], [[ADD4]]
+; CHECK-NEXT: [[ADD6:%.*]] = fadd float [[ELT6]], [[ADD5]]
+; CHECK-NEXT: [[ADD7:%.*]] = fadd float [[ELT7]], [[ADD6]]
+; CHECK-NEXT: ret float [[ADD7]]
+;
+entry:
+ %elt0 = extractelement <8 x float> %vec8, i64 0
+ %elt1 = extractelement <8 x float> %vec8, i64 1
+ %elt2 = extractelement <8 x float> %vec8, i64 2
+ %elt3 = extractelement <8 x float> %vec8, i64 3
+ %elt4 = extractelement <8 x float> %vec8, i64 4
+ %elt5 = extractelement <8 x float> %vec8, i64 5
+ %elt6 = extractelement <8 x float> %vec8, i64 6
+ %elt7 = extractelement <8 x float> %vec8, i64 7
+ %add1 = fadd float %elt1, %elt0
+ %add2 = fadd float %elt2, %add1
+ %add3 = fadd float %elt3, %add2
+ %add4 = fadd float %elt4, %add3
+ %add5 = fadd float %elt5, %add4
+ %add6 = fadd float %elt6, %add5
+ %add7 = fadd float %elt7, %add6
+ ret float %add7
+}
+
+define double @reduce_fast_double2(<2 x double> %vec2) {
+; CHECK-LABEL: define double @reduce_fast_double2(
+; CHECK-SAME: <2 x double> [[VEC2:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[ELT0:%.*]] = extractelement <2 x double> [[VEC2]], i64 0
+; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x double> [[VEC2]], i64 1
+; CHECK-NEXT: [[ADD1:%.*]] = fadd fast double [[ELT1]], [[ELT0]]
+; CHECK-NEXT: ret double [[ADD1]]
+;
+entry:
+ %elt0 = extractelement <2 x double> %vec2, i64 0
+ %elt1 = extractelement <2 x double> %vec2, i64 1
+ %add1 = fadd fast double %elt1, %elt0
+ ret double %add1
+}
+
+define double @reduce_double2(<2 x double> %vec2) {
+; CHECK-LABEL: define double @reduce_double2(
+; CHECK-SAME: <2 x double> [[VEC2:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[ELT0:%.*]] = extractelement <2 x double> [[VEC2]], i64 0
+; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x double> [[VEC2]], i64 1
+; CHECK-NEXT: [[ADD1:%.*]] = fadd double [[ELT1]], [[ELT0]]
+; CHECK-NEXT: ret double [[ADD1]]
+;
+entry:
+ %elt0 = extractelement <2 x double> %vec2, i64 0
+ %elt1 = extractelement <2 x double> %vec2, i64 1
+ %add1 = fadd double %elt1, %elt0
+ ret double %add1
+}
+
+define double @reduce_fast_double4(<4 x double> %vec4) {
+; CHECK-LABEL: define double @reduce_fast_double4(
+; CHECK-SAME: <4 x double> [[VEC4:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[TMP0:%.*]] = call fast double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> [[VEC4]])
+; CHECK-NEXT: ret double [[TMP0]]
+;
+entry:
+ %elt0 = extractelement <4 x double> %vec4, i64 0
+ %elt1 = extractelement <4 x double> %vec4, i64 1
+ %elt2 = extractelement <4 x double> %vec4, i64 2
+ %elt3 = extractelement <4 x double> %vec4, i64 3
+ %add1 = fadd fast double %elt1, %elt0
+ %add2 = fadd fast double %elt2, %add1
+ %add3 = fadd fast double %elt3, %add2
+ ret double %add3
+}
+
+define double @reduce_double4(<4 x double> %vec4) {
+; CHECK-LABEL: define double @reduce_double4(
+; CHECK-SAME: <4 x double> [[VEC4:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x double> [[VEC4]], i64 0
+; CHECK-NEXT: [[ELT1:%.*]] = extractelement <4 x double> [[VEC4]], i64 1
+; CHECK-NEXT: [[ELT2:%.*]] = extractelement <4 x double> [[VEC4]], i64 2
+; CHECK-NEXT: [[ELT3:%.*]] = extractelement <4 x double> [[VEC4]], i64 3
+; CHECK-NEXT: [[ADD1:%.*]] = fadd double [[ELT1]], [[ELT0]]
+; CHECK-NEXT: [[ADD2:%.*]] = fadd double [[ELT2]], [[ADD1]]
+; CHECK-NEXT: [[ADD3:%.*]] = fadd double [[ELT3]], [[ADD2]]
+; CHECK-NEXT: ret double [[ADD3]]
+;
+entry:
+ %elt0 = extractelement <4 x double> %vec4, i64 0
+ %elt1 = extractelement <4 x double> %vec4, i64 1
+ %elt2 = extractelement <4 x double> %vec4, i64 2
+ %elt3 = extractelement <4 x double> %vec4, i64 3
+ %add1 = fadd double %elt1, %elt0
+ %add2 = fadd double %elt2, %add1
+ %add3 = fadd double %elt3, %add2
+ ret double %add3
+}
+
+; Fixed iteration count. sum += a[i]
+define float @reduce_fast_float_case1(ptr %a) {
+; CHECK-LABEL: define float @reduce_fast_float_case1(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[LOAD:%.*]] = load float, ptr [[A]], align 4
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 4
+; CHECK-NEXT: [[LOAD1:%.*]] = load float, ptr [[GEP]], align 4
+; CHECK-NEXT: [[ADD1:%.*]] = fadd fast float [[LOAD1]], [[LOAD]]
+; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 8
+; CHECK-NEXT: [[LOAD2:%.*]] = load float, ptr [[GEP2]], align 4
+; CHECK-NEXT: [[ADD2:%.*]] = fadd fast float [[LOAD2]], [[ADD1]]
+; CHECK-NEXT: [[GEP3:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 12
+; CHECK-NEXT: [[LOAD3:%.*]] = load float, ptr [[GEP3]], align 4
+; CHECK-NEXT: [[ADD3:%.*]] = fadd fast float [[LOAD3]], [[ADD2]]
+; CHECK-NEXT: [[GEP4:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 16
+; CHECK-NEXT: [[LOAD4:%.*]] = load float, ptr [[GEP4]], align 4
+; CHECK-NEXT: [[ADD4:%.*]] = fadd fast float [[LOAD4]], [[ADD3]]
+; CHECK-NEXT: ret float [[ADD4]]
+;
+entry:
+ %load = load float, ptr %a
+ %gep = getelementptr inbounds i8, ptr %a, i64 4
+ %load1 = load float, ptr %gep
+ %add1 = fadd fast float %load1, %load
+ %gep2 = getelementptr inbounds i8, ptr %a, i64 8
+ %load2 = load float, ptr %gep2
+ %add2 = fadd fast float %load2, %add1
+ %gep3 = getelementptr inbounds i8, ptr %a, i64 12
+ %load3 = load float, ptr %gep3
+ %add3 = fadd fast float %load3, %add2
+ %gep4 = getelementptr inbounds i8, ptr %a, i64 16
+ %load4 = load float, ptr %gep4
+ %add4 = fadd fast float %load4, %add3
+ ret float %add4
+}
+
+; Fixed iteration count. sum += a[i]
+define float @reduce_float_case1(ptr %a) {
+; CHECK-LABEL: define float @reduce_float_case1(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[LOAD:%.*]] = load float, ptr [[A]], align 4
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 4
+; CHECK-NEXT: [[LOAD1:%.*]] = load float, ptr [[GEP]], align 4
+; CHECK-NEXT: [[ADD1:%.*]] = fadd float [[LOAD1]], [[LOAD]]
+; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 8
+; CHECK-NEXT: [[LOAD2:%.*]] = load float, ptr [[GEP2]], align 4
+; CHECK-NEXT: [[ADD2:%.*]] = fadd float [[LOAD2]], [[ADD1]]
+; CHECK-NEXT: [[GEP3:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 12
+; CHECK-NEXT: [[LOAD3:%.*]] = load float, ptr [[GEP3]], align 4
+; CHECK-NEXT: [[ADD3:%.*]] = fadd float [[LOAD3]], [[ADD2]]
+; CHECK-NEXT: [[GEP4:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 16
+; CHECK-NEXT: [[LOAD4:%.*]] = load float, ptr [[GEP4]], align 4
+; CHECK-NEXT: [[ADD4:%.*]] = fadd float [[LOAD4]], [[ADD3]]
+; CHECK-NEXT: ret float [[ADD4]]
+;
+entry:
+ %load = load float, ptr %a
+ %gep = getelementptr inbounds i8, ptr %a, i64 4
+ %load1 = load float, ptr %gep
+ %add1 = fadd float %load1, %load
+ %gep2 = getelementptr inbounds i8, ptr %a, i64 8
+ %load2 = load float, ptr %gep2
+ %add2 = fadd float %load2, %add1
+ %gep3 = getelementptr inbounds i8, ptr %a, i64 12
+ %load3 = load float, ptr %gep3
+ %add3 = fadd float %load3, %add2
+ %gep4 = getelementptr inbounds i8, ptr %a, i64 16
+ %load4 = load float, ptr %gep4
+ %add4 = fadd float %load4, %add3
+ ret float %add4
+}
+
+; Reduction needs a shuffle. See add2 and add3.
+define float @reduce_fast_float_case2(ptr %a, ptr %b) {
+; CHECK-LABEL: define float @reduce_fast_float_case2(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[GEPA2:%.*]] = getelementptr inbounds float, ptr [[A]], i32 2
+; CHECK-NEXT: [[GEPA3:%.*]] = getelementptr inbounds float, ptr [[A]], i32 3
+; CHECK-NEXT: [[GEPB2:%.*]] = getelementptr inbounds float, ptr [[B]], i32 2
+; CHECK-NEXT: [[GEPB3:%.*]] = getelementptr inbounds float, ptr [[B]], i32 3
+; CHECK-NEXT: [[LOADA2:%.*]] = load float, ptr [[GEPA2]], align 4
+; CHECK-NEXT: [[LOADA3:%.*]] = load float, ptr [[GEPA3]], align 4
+; CHECK-NEXT: [[LOADB2:%.*]] = load float, ptr [[GEPB2]], align 4
+; CHECK-NEXT: [[LOADB3:%.*]] = load float, ptr [[GEPB3]], align 4
+; CHECK-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr [[A]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[B]], align 4
+; CHECK-NEXT: [[TMP2:%.*]] = fadd fast <2 x float> [[TMP0]], [[TMP1]]
+; CHECK-NEXT: [[ADD2:%.*]] = fadd fast float [[LOADA3]], [[LOADB2]]
+; CHECK-NEXT: [[ADD3:%.*]] = fadd fast float [[LOADA2]], [[LOADB3]]
+; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0
+; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1
+; CHECK-NEXT: [[RED1:%.*]] = fadd fast float [[TMP3]], [[TMP4]]
+; CHECK-NEXT: [[RED2:%.*]] = fadd fast float [[ADD2]], [[RED1]]
+; CHECK-NEXT: [[RED3:%.*]] = fadd fast float [[ADD3]], [[RED2]]
+; CHECK-NEXT: ret float [[RED3]]
+;
+entry:
+ %gepa1 = getelementptr inbounds float, ptr %a, i32 1
+ %gepa2 = getelementptr inbounds float, ptr %a, i32 2
+ %gepa3 = getelementptr inbounds float, ptr %a, i32 3
+ %gepb1 = getelementptr inbounds float, ptr %b, i32 1
+ %gepb2 = getelementptr inbounds float, ptr %b, i32 2
+ %gepb3 = getelementptr inbounds float, ptr %b, i32 3
+ %loada = load float, ptr %a
+ %loada1 = load float, ptr %gepa1
+ %loada2 = load float, ptr %gepa2
+ %loada3 = load float, ptr %gepa3
+ %loadb = load float, ptr %b
+ %loadb1 = load float, ptr %gepb1
+ %loadb2 = load float, ptr %gepb2
+ %loadb3 = load float, ptr %gepb3
+ %add = fadd fast float %loada, %loadb
+ %add1 = fadd fast float %loada1, %loadb1
+ %add2 = fadd fast float %loada3, %loadb2
+ %add3 = fadd fast float %loada2, %loadb3
+ %red1 = fadd fast float %add, %add1
+ %red2 = fadd fast float %add2, %red1
+ %red3 = fadd fast float %add3, %red2
+ ret float %red3
+}
+
+; Reduction needs a shuffle. See add2 and add3.
+define float @reduce_float_case2(ptr %a, ptr %b) {
+; CHECK-LABEL: define float @reduce_float_case2(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[GEPA2:%.*]] = getelementptr inbounds float, ptr [[A]], i32 2
+; CHECK-NEXT: [[GEPA3:%.*]] = getelementptr inbounds float, ptr [[A]], i32 3
+; CHECK-NEXT: [[GEPB2:%.*]] = getelementptr inbounds float, ptr [[B]], i32 2
+; CHECK-NEXT: [[GEPB3:%.*]] = getelementptr inbounds float, ptr [[B]], i32 3
+; CHECK-NEXT: [[LOADA2:%.*]] = load float, ptr [[GEPA2]], align 4
+; CHECK-NEXT: [[LOADA3:%.*]] = load float, ptr [[GEPA3]], align 4
+; CHECK-NEXT: [[LOADB2:%.*]] = load float, ptr [[GEPB2]], align 4
+; CHECK-NEXT: [[LOADB3:%.*]] = load float, ptr [[GEPB3]], align 4
+; CHECK-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr [[A]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[B]], align 4
+; CHECK-NEXT: [[TMP2:%.*]] = fadd <2 x float> [[TMP0]], [[TMP1]]
+; CHECK-NEXT: [[ADD2:%.*]] = fadd float [[LOADA3]], [[LOADB2]]
+; CHECK-NEXT: [[ADD3:%.*]] = fadd float [[LOADA2]], [[LOADB3]]
+; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0
+; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1
+; CHECK-NEXT: [[RED1:%.*]] = fadd float [[TMP3]], [[TMP4]]
+; CHECK-NEXT: [[RED2:%.*]] = fadd float [[ADD2]], [[RED1]]
+; CHECK-NEXT: [[RED3:%.*]] = fadd float [[ADD3]], [[RED2]]
+; CHECK-NEXT: ret float [[RED3]]
+;
+entry:
+ %gepa1 = getelementptr inbounds float, ptr %a, i32 1
+ %gepa2 = getelementptr inbounds float, ptr %a, i32 2
+ %gepa3 = getelementptr inbounds float, ptr %a, i32 3
+ %gepb1 = getelementptr inbounds float, ptr %b, i32 1
+ %gepb2 = getelementptr inbounds float, ptr %b, i32 2
+ %gepb3 = getelementptr inbounds float, ptr %b, i32 3
+ %loada = load float, ptr %a
+ %loada1 = load float, ptr %gepa1
+ %loada2 = load float, ptr %gepa2
+ %loada3 = load float, ptr %gepa3
+ %loadb = load float, ptr %b
+ %loadb1 = load float, ptr %gepb1
+ %loadb2 = load float, ptr %gepb2
+ %loadb3 = load float, ptr %gepb3
+ %add = fadd float %loada, %loadb
+ %add1 = fadd float %loada1, %loadb1
+ %add2 = fadd float %loada3, %loadb2
+ %add3 = fadd float %loada2, %loadb3
+ %red1 = fadd float %add, %add1
+ %red2 = fadd float %add2, %red1
+ %red3 = fadd float %add3, %red2
+ ret float %red3
+}
+
+; Addition of log.
+define float @reduce_fast_float_case3(ptr %a) {
+; CHECK-LABEL: define float @reduce_fast_float_case3(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds float, ptr [[A]], i32 1
+; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds float, ptr [[A]], i32 2
+; CHECK-NEXT: [[GEP3:%.*]] = getelementptr inbounds float, ptr [[A]], i32 3
+; CHECK-NEXT: [[GEP4:%.*]] = getelementptr inbounds float, ptr [[A]], i32 4
+; CHECK-NEXT: [[GEP5:%.*]] = getelementptr inbounds float, ptr [[A]], i32 5
+; CHECK-NEXT: [[GEP6:%.*]] = getelementptr inbounds float, ptr [[A]], i32 6
+; CHECK-NEXT: [[GEP7:%.*]] = getelementptr inbounds float, ptr [[A]], i32 7
+; CHECK-NEXT: [[LOAD:%.*]] = load float, ptr [[A]], align 4
+; CHECK-NEXT: [[LOAD1:%.*]] = load float, ptr [[GEP1]], align 4
+; CHECK-NEXT: [[LOAD2:%.*]] = load float, ptr [[GEP2]], align 4
+; CHECK-NEXT: [[LOAD3:%.*]] = load float, ptr [[GEP3]], align 4
+; CHECK-NEXT: [[LOAD4:%.*]] = load float, ptr [[GEP4]], align 4
+; CHECK-NEXT: [[LOAD5:%.*]] = load float, ptr [[GEP5]], align 4
+; CHECK-NEXT: [[LOAD6:%.*]] = load float, ptr [[GEP6]], align 4
+; CHECK-NEXT: [[LOAD7:%.*]] = load float, ptr [[GEP7]], align 4
+; CHECK-NEXT: [[LOG:%.*]] = call fast float @llvm.log.f32(float [[LOAD]])
+; CHECK-NEXT: [[LOG1:%.*]] = call fast float @llvm.log.f32(float [[LOAD1]])
+; CHECK-NEXT: [[LOG2:%.*]] = call fast float @llvm.log.f32(float [[LOAD2]])
+; CHECK-NEXT: [[LOG3:%.*]] = call fast float @llvm.log.f32(float [[LOAD3]])
+; CHECK-NEXT: [[LOG4:%.*]] = call fast float @llvm.log.f32(float [[LOAD4]])
+; CHECK-NEXT: [[LOG5:%.*]] = call fast float @llvm.log.f32(float [[LOAD5]])
+; CHECK-NEXT: [[LOG6:%.*]] = call fast float @llvm.log.f32(float [[LOAD6]])
+; CHECK-NEXT: [[LOG7:%.*]] = call fast float @llvm.log.f32(float [[LOAD7]])
+; CHECK-NEXT: [[ADD1:%.*]] = fadd fast float [[LOG]], [[LOG1]]
+; CHECK-NEXT: [[ADD2:%.*]] = fadd fast float [[ADD1]], [[LOG2]]
+; CHECK-NEXT: [[ADD3:%.*]] = fadd fast float [[ADD2]], [[LOG3]]
+; CHECK-NEXT: [[ADD4:%.*]] = fadd fast float [[ADD3]], [[LOG4]]
+; CHECK-NEXT: [[ADD5:%.*]] = fadd fast float [[ADD4]], [[LOG5]]
+; CHECK-NEXT: [[ADD6:%.*]] = fadd fast float [[ADD5]], [[LOG6]]
+; CHECK-NEXT: [[ADD7:%.*]] = fadd fast float [[ADD6]], [[LOG7]]
+; CHECK-NEXT: ret float [[ADD7]]
+;
+entry:
+ %gep1 = getelementptr inbounds float, ptr %a, i32 1
+ %gep2 = getelementptr inbounds float, ptr %a, i32 2
+ %gep3 = getelementptr inbounds float, ptr %a, i32 3
+ %gep4 = getelementptr inbounds float, ptr %a, i32 4
+ %gep5 = getelementptr inbounds float, ptr %a, i32 5
+ %gep6 = getelementptr inbounds float, ptr %a, i32 6
+ %gep7 = getelementptr inbounds float, ptr %a, i32 7
+ %load = load float, ptr %a
+ %load1 = load float, ptr %gep1
+ %load2 = load float, ptr %gep2
+ %load3 = load float, ptr %gep3
+ %load4 = load float, ptr %gep4
+ %load5 = load float, ptr %gep5
+ %load6 = load float, ptr %gep6
+ %load7 = load float, ptr %gep7
+ %log = call fast float @llvm.log.f32(float %load)
+ %log1 = call fast float @llvm.log.f32(float %load1)
+ %log2 = call fast float @llvm.log.f32(float %load2)
+ %log3 = call fast float @llvm.log.f32(float %load3)
+ %log4 = call fast float @llvm.log.f32(float %load4)
+ %log5 = call fast float @llvm.log.f32(float %load5)
+ %log6 = call fast float @llvm.log.f32(float %load6)
+ %log7 = call fast float @llvm.log.f32(float %load7)
+ %add1 = fadd fast float %log, %log1
+ %add2 = fadd fast float %add1, %log2
+ %add3 = fadd fast float %add2, %log3
+ %add4 = fadd fast float %add3, %log4
+ %add5 = fadd fast float %add4, %log5
+ %add6 = fadd fast float %add5, %log6
+ %add7 = fadd fast float %add6, %log7
+ ret float %add7
+}
+
+; Addition of log.
+define float @reduce_float_case3(ptr %a) {
+; CHECK-LABEL: define float @reduce_float_case3(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds float, ptr [[A]], i32 1
+; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds float, ptr [[A]], i32 2
+; CHECK-NEXT: [[GEP3:%.*]] = getelementptr inbounds float, ptr [[A]], i32 3
+; CHECK-NEXT: [[GEP4:%.*]] = getelementptr inbounds float, ptr [[A]], i32 4
+; CHECK-NEXT: [[GEP5:%.*]] = getelementptr inbounds float, ptr [[A]], i32 5
+; CHECK-NEXT: [[GEP6:%.*]] = getelementptr inbounds float, ptr [[A]], i32 6
+; CHECK-NEXT: [[GEP7:%.*]] = getelementptr inbounds float, ptr [[A]], i32 7
+; CHECK-NEXT: [[LOAD:%.*]] = load float, ptr [[A]], align 4
+; CHECK-NEXT: [[LOAD1:%.*]] = load float, ptr [[GEP1]], align 4
+; CHECK-NEXT: [[LOAD2:%.*]] = load float, ptr [[GEP2]], align 4
+; CHECK-NEXT: [[LOAD3:%.*]] = load float, ptr [[GEP3]], align 4
+; CHECK-NEXT: [[LOAD4:%.*]] = load float, ptr [[GEP4]], align 4
+; CHECK-NEXT: [[LOAD5:%.*]] = load float, ptr [[GEP5]], align 4
+; CHECK-NEXT: [[LOAD6:%.*]] = load float, ptr [[GEP6]], align 4
+; CHECK-NEXT: [[LOAD7:%.*]] = load float, ptr [[GEP7]], align 4
+; CHECK-NEXT: [[LOG:%.*]] = call float @llvm.log.f32(float [[LOAD]])
+; CHECK-NEXT: [[LOG1:%.*]] = call float @llvm.log.f32(float [[LOAD1]])
+; CHECK-NEXT: [[LOG2:%.*]] = call float @llvm.log.f32(float [[LOAD2]])
+; CHECK-NEXT: [[LOG3:%.*]] = call float @llvm.log.f32(float [[LOAD3]])
+; CHECK-NEXT: [[LOG4:%.*]] = call float @llvm.log.f32(float [[LOAD4]])
+; CHECK-NEXT: [[LOG5:%.*]] = call float @llvm.log.f32(float [[LOAD5]])
+; CHECK-NEXT: [[LOG6:%.*]] = call float @llvm.log.f32(float [[LOAD6]])
+; CHECK-NEXT: [[LOG7:%.*]] = call float @llvm.log.f32(float [[LOAD7]])
+; CHECK-NEXT: [[ADD1:%.*]] = fadd float [[LOG]], [[LOG1]]
+; CHECK-NEXT: [[ADD2:%.*]] = fadd float [[ADD1]], [[LOG2]]
+; CHECK-NEXT: [[ADD3:%.*]] = fadd float [[ADD2]], [[LOG3]]
+; CHECK-NEXT: [[ADD4:%.*]] = fadd float [[ADD3]], [[LOG4]]
+; CHECK-NEXT: [[ADD5:%.*]] = fadd float [[ADD4]], [[LOG5]]
+; CHECK-NEXT: [[ADD6:%.*]] = fadd float [[ADD5]], [[LOG6]]
+; CHECK-NEXT: [[ADD7:%.*]] = fadd float [[ADD6]], [[LOG7]]
+; CHECK-NEXT: ret float [[ADD7]]
+;
+entry:
+ %gep1 = getelementptr inbounds float, ptr %a, i32 1
+ %gep2 = getelementptr inbounds float, ptr %a, i32 2
+ %gep3 = getelementptr inbounds float, ptr %a, i32 3
+ %gep4 = getelementptr inbounds float, ptr %a, i32 4
+ %gep5 = getelementptr inbounds float, ptr %a, i32 5
+ %gep6 = getelementptr inbounds float, ptr %a, i32 6
+ %gep7 = getelementptr inbounds float, ptr %a, i32 7
+ %load = load float, ptr %a
+ %load1 = load float, ptr %gep1
+ %load2 = load float, ptr %gep2
+ %load3 = load float, ptr %gep3
+ %load4 = load float, ptr %gep4
+ %load5 = load float, ptr %gep5
+ %load6 = load float, ptr %gep6
+ %load7 = load float, ptr %gep7
+ %log = call float @llvm.log.f32(float %load)
+ %log1 = call float @llvm.log.f32(float %load1)
+ %log2 = call float @llvm.log.f32(float %load2)
+ %log3 = call float @llvm.log.f32(float %load3)
+ %log4 = call float @llvm.log.f32(float %load4)
+ %log5 = call float @llvm.log.f32(float %load5)
+ %log6 = call float @llvm.log.f32(float %load6)
+ %log7 = call float @llvm.log.f32(float %load7)
+ %add1 = fadd float %log, %log1
+ %add2 = fadd float %add1, %log2
+ %add3 = fadd float %add2, %log3
+ %add4 = fadd float %add3, %log4
+ %add5 = fadd float %add4, %log5
+ %add6 = fadd float %add5, %log6
+ %add7 = fadd float %add6, %log7
+ ret float %add7
+}
+
+define half @reduce_unordered_fast_half4(<4 x half> %vec4) {
+; CHECK-LABEL: define half @reduce_unordered_fast_half4(
+; CHECK-SAME: <4 x half> [[VEC4:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[TMP0:%.*]] = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> [[VEC4]])
+; CHECK-NEXT: ret half [[TMP0]]
+;
+entry:
+ %elt0 = extractelement <4 x half> %vec4, i64 0
+ %elt1 = extractelement <4 x half> %vec4, i64 1
+ %elt2 = extractelement <4 x half> %vec4, i64 2
+ %elt3 = extractelement <4 x half> %vec4, i64 3
+ %add1 = fadd fast half %elt1, %elt0
+ %add2 = fadd fast half %elt2, %elt3
+ %add3 = fadd fast half %add1, %add2
+ ret half %add3
+}
+
+define half @reduce_unordered_half4(<4 x half> %vec4) {
+; CHECK-LABEL: define half @reduce_unordered_half4(
+; CHECK-SAME: <4 x half> [[VEC4:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x half> [[VEC4]], <4 x half> poison, <2 x i32> <i32 1, i32 2>
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x half> [[VEC4]], <4 x half> poison, <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT: [[TMP2:%.*]] = fadd <2 x half> [[TMP0]], [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x half> [[TMP2]], i32 0
+; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x half> [[TMP2]], i32 1
+; CHECK-NEXT: [[ADD3:%.*]] = fadd half [[TMP3]], [[TMP4]]
+; CHECK-NEXT: ret half [[ADD3]]
+;
+entry:
+ %elt0 = extractelement <4 x half> %vec4, i64 0
+ %elt1 = extractelement <4 x half> %vec4, i64 1
+ %elt2 = extractelement <4 x half> %vec4, i64 2
+ %elt3 = extractelement <4 x half> %vec4, i64 3
+ %add1 = fadd half %elt1, %elt0
+ %add2 = fadd half %elt2, %elt3
+ %add3 = fadd half %add1, %add2
+ ret half %add3
+}
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/multi-extracts-bv-combined.ll b/llvm/test/Transforms/SLPVectorizer/X86/multi-extracts-bv-combined.ll
new file mode 100644
index 0000000..e6a166c2
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/multi-extracts-bv-combined.ll
@@ -0,0 +1,38 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -mcpu=cascadelake < %s | FileCheck %s
+
+define i32 @foo() {
+; CHECK-LABEL: define i32 @foo(
+; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[D:%.*]] = load i32, ptr null, align 4
+; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> <i32 0, i32 undef, i32 1, i32 0>, i32 [[D]], i32 1
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 1, i32 2, i32 3, i32 1, i32 1, i32 1>
+; CHECK-NEXT: [[TMP2:%.*]] = or <8 x i32> zeroinitializer, [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = add <8 x i32> zeroinitializer, [[TMP1]]
+; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 5, i32 6, i32 7>
+; CHECK-NEXT: store <8 x i32> [[TMP4]], ptr getelementptr inbounds ([64 x i32], ptr null, i64 0, i64 15), align 4
+; CHECK-NEXT: ret i32 0
+;
+entry:
+ %d = load i32, ptr null, align 4
+ %0 = extractelement <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, i32 0
+ %1 = extractelement <8 x i32> zeroinitializer, i32 0
+ %2 = or i32 0, %d
+ %3 = or i32 0, %d
+ %4 = or i32 0, %d
+ %5 = add i32 0, 0
+ %6 = or i32 0, %0
+ %7 = or i32 0, %d
+ %8 = or i32 0, %d
+ %9 = or i32 0, %1
+ store i32 %2, ptr getelementptr inbounds ([64 x i32], ptr null, i64 0, i64 22), align 8
+ store i32 %3, ptr getelementptr inbounds ([64 x i32], ptr null, i64 0, i64 21), align 4
+ store i32 %4, ptr getelementptr inbounds ([64 x i32], ptr null, i64 0, i64 20), align 16
+ store i32 %5, ptr getelementptr inbounds ([64 x i32], ptr null, i64 0, i64 19), align 4
+ store i32 %6, ptr getelementptr inbounds ([64 x i32], ptr null, i64 0, i64 18), align 8
+ store i32 %7, ptr getelementptr inbounds ([64 x i32], ptr null, i64 0, i64 17), align 4
+ store i32 %8, ptr getelementptr inbounds ([64 x i32], ptr null, i64 0, i64 16), align 16
+ store i32 %9, ptr getelementptr inbounds ([64 x i32], ptr null, i64 0, i64 15), align 4
+ ret i32 0
+}
diff --git a/llvm/test/tools/llvm-dwp/X86/cu_tu_units_manual_v4.s b/llvm/test/tools/llvm-dwp/X86/cu_tu_units_manual_v4.s
index 333956d..3987671 100644
--- a/llvm/test/tools/llvm-dwp/X86/cu_tu_units_manual_v4.s
+++ b/llvm/test/tools/llvm-dwp/X86/cu_tu_units_manual_v4.s
@@ -4,7 +4,7 @@
# RUN: -split-dwarf-file=%t.dwo -dwarf-version=4
# RUN: llvm-dwp %t.dwo -o %t.dwp
# RUN: llvm-dwarfdump -debug-info -debug-types -debug-cu-index -debug-tu-index %t.dwp | FileCheck %s
-# RUN: llvm-dwarfdump -debug-info -debug-types -debug-cu-index -debug-tu-index -manaully-generate-unit-index %t.dwp | FileCheck %s
+# RUN: llvm-dwarfdump -debug-info -debug-types -debug-cu-index -debug-tu-index -manually-generate-unit-index %t.dwp | FileCheck %s
## Note: In order to check whether the type unit index is generated
## there is no need to add the missing DIEs for the structure type of the type unit.
diff --git a/llvm/test/tools/llvm-dwp/X86/cu_tu_units_manual_v5.s b/llvm/test/tools/llvm-dwp/X86/cu_tu_units_manual_v5.s
index fde41f3..4dee886 100644
--- a/llvm/test/tools/llvm-dwp/X86/cu_tu_units_manual_v5.s
+++ b/llvm/test/tools/llvm-dwp/X86/cu_tu_units_manual_v5.s
@@ -4,7 +4,7 @@
# RUN: -split-dwarf-file=%t.dwo -dwarf-version=5
# RUN: llvm-dwp %t.dwo -o %t.dwp
# RUN: llvm-dwarfdump -debug-info -debug-cu-index -debug-tu-index %t.dwp | FileCheck %s
-# RUN: llvm-dwarfdump -debug-info -debug-cu-index -debug-tu-index -manaully-generate-unit-index %t.dwp | FileCheck %s
+# RUN: llvm-dwarfdump -debug-info -debug-cu-index -debug-tu-index -manually-generate-unit-index %t.dwp | FileCheck %s
## Note: In order to check whether the type unit index is generated
## there is no need to add the missing DIEs for the structure type of the type unit.
diff --git a/llvm/test/tools/llvm-dwp/X86/cu_tu_units_manual_v5_invalid.s b/llvm/test/tools/llvm-dwp/X86/cu_tu_units_manual_v5_invalid.s
index 1f63b21..b131774 100644
--- a/llvm/test/tools/llvm-dwp/X86/cu_tu_units_manual_v5_invalid.s
+++ b/llvm/test/tools/llvm-dwp/X86/cu_tu_units_manual_v5_invalid.s
@@ -5,7 +5,7 @@
# RUN: -split-dwarf-file=%t.dwo -dwarf-version=5
# RUN: llvm-dwp %t.dwo -o %t.dwp
# RUN: llvm-dwarfdump -debug-info -debug-cu-index -debug-tu-index \
-# RUN: -manaully-generate-unit-index %t.dwp 2>&1 | FileCheck %s
+# RUN: -manually-generate-unit-index %t.dwp 2>&1 | FileCheck %s
## Note: In order to check whether the type unit index is generated
## there is no need to add the missing DIEs for the structure type of the type unit.
diff --git a/llvm/test/tools/llvm-dwp/X86/debug_macro_v5.s b/llvm/test/tools/llvm-dwp/X86/debug_macro_v5.s
index e554294..7ed77a1 100644
--- a/llvm/test/tools/llvm-dwp/X86/debug_macro_v5.s
+++ b/llvm/test/tools/llvm-dwp/X86/debug_macro_v5.s
@@ -3,7 +3,7 @@
# RUN: llvm-mc -triple x86_64-unknown-linux --filetype=obj --split-dwarf-file=%t.dwo -dwarf-version=5 %s -o %t.o
# RUN: llvm-dwp %t.dwo -o %t.dwp 2>&1
# RUN: llvm-dwarfdump -debug-macro -debug-cu-index %t.dwp | FileCheck -check-prefix=CHECK %s
-# RUN: llvm-dwarfdump -debug-macro -debug-cu-index -manaully-generate-unit-index %t.dwp | FileCheck -check-prefix=CHECK2 %s
+# RUN: llvm-dwarfdump -debug-macro -debug-cu-index -manually-generate-unit-index %t.dwp | FileCheck -check-prefix=CHECK2 %s
# CHECK-DAG: .debug_macro.dwo contents:
# CHECK: macro header: version = 0x0005, flags = 0x00, format = DWARF32
diff --git a/llvm/test/tools/llvm-dwp/X86/type_dedup.test b/llvm/test/tools/llvm-dwp/X86/type_dedup.test
index 78e50fe..9d89c62 100644
--- a/llvm/test/tools/llvm-dwp/X86/type_dedup.test
+++ b/llvm/test/tools/llvm-dwp/X86/type_dedup.test
@@ -1,10 +1,10 @@
RUN: llvm-dwp %p/../Inputs/type_dedup/a.dwo %p/../Inputs/type_dedup/b.dwo -o %t
RUN: llvm-dwarfdump -v %t | FileCheck -check-prefix=CHECK %s
-RUN: llvm-dwarfdump -v -manaully-generate-unit-index %t | FileCheck -check-prefix=CHECK2 %s
+RUN: llvm-dwarfdump -v -manually-generate-unit-index %t | FileCheck -check-prefix=CHECK2 %s
RUN: llvm-dwp %p/../Inputs/type_dedup/b.dwo -o %tb.dwp
RUN: llvm-dwp %p/../Inputs/type_dedup/a.dwo %tb.dwp -o %t
RUN: llvm-dwarfdump -v %t | FileCheck -check-prefix=CHECK %s
-RUN: llvm-dwarfdump -v -manaully-generate-unit-index %t | FileCheck -check-prefix=CHECK2 %s
+RUN: llvm-dwarfdump -v -manually-generate-unit-index %t | FileCheck -check-prefix=CHECK2 %s
a.cpp:
struct common { };
diff --git a/llvm/test/tools/llvm-mca/X86/Znver4/zero-idioms.s b/llvm/test/tools/llvm-mca/X86/Znver4/zero-idioms.s
index cc3c286..b6ebd93 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver4/zero-idioms.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver4/zero-idioms.s
@@ -161,13 +161,13 @@ vpxorq %zmm19, %zmm19, %zmm21
# CHECK: Iterations: 1
# CHECK-NEXT: Instructions: 139
-# CHECK-NEXT: Total Cycles: 42
+# CHECK-NEXT: Total Cycles: 40
# CHECK-NEXT: Total uOps: 139
# CHECK: Dispatch Width: 6
-# CHECK-NEXT: uOps Per Cycle: 3.31
-# CHECK-NEXT: IPC: 3.31
-# CHECK-NEXT: Block RThroughput: 25.8
+# CHECK-NEXT: uOps Per Cycle: 3.48
+# CHECK-NEXT: IPC: 3.48
+# CHECK-NEXT: Block RThroughput: 24.8
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
@@ -301,7 +301,7 @@ vpxorq %zmm19, %zmm19, %zmm21
# CHECK-NEXT: 1 1 0.25 vpxorq %xmm19, %xmm19, %xmm19
# CHECK-NEXT: 1 1 0.25 vpxord %ymm19, %ymm19, %ymm19
# CHECK-NEXT: 1 1 0.25 vpxorq %ymm19, %ymm19, %ymm19
-# CHECK-NEXT: 1 1 0.50 vpxord %zmm19, %zmm19, %zmm19
+# CHECK-NEXT: 1 0 0.17 vpxord %zmm19, %zmm19, %zmm19
# CHECK-NEXT: 1 1 0.50 vpxorq %zmm19, %zmm19, %zmm19
# CHECK-NEXT: 1 0 0.17 vxorps %xmm4, %xmm4, %xmm5
# CHECK-NEXT: 1 0 0.17 vxorpd %xmm1, %xmm1, %xmm3
@@ -315,17 +315,17 @@ vpxorq %zmm19, %zmm19, %zmm21
# CHECK-NEXT: 1 1 0.25 vpxorq %xmm19, %xmm19, %xmm21
# CHECK-NEXT: 1 1 0.25 vpxord %ymm19, %ymm19, %ymm21
# CHECK-NEXT: 1 1 0.25 vpxorq %ymm19, %ymm19, %ymm21
-# CHECK-NEXT: 1 1 0.50 vpxord %zmm19, %zmm19, %zmm21
+# CHECK-NEXT: 1 0 0.17 vpxord %zmm19, %zmm19, %zmm21
# CHECK-NEXT: 1 1 0.50 vpxorq %zmm19, %zmm19, %zmm21
# CHECK: Register File statistics:
-# CHECK-NEXT: Total number of mappings created: 65
-# CHECK-NEXT: Max number of mappings used: 45
+# CHECK-NEXT: Total number of mappings created: 63
+# CHECK-NEXT: Max number of mappings used: 43
# CHECK: * Register File #1 -- Zn4FpPRF:
# CHECK-NEXT: Number of physical registers: 192
-# CHECK-NEXT: Total number of mappings created: 65
-# CHECK-NEXT: Max number of mappings used: 45
+# CHECK-NEXT: Total number of mappings created: 63
+# CHECK-NEXT: Max number of mappings used: 43
# CHECK: * Register File #2 -- Zn4IntegerPRF:
# CHECK-NEXT: Number of physical registers: 224
@@ -359,7 +359,7 @@ vpxorq %zmm19, %zmm19, %zmm21
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1]
-# CHECK-NEXT: - - - - - - - - 25.00 25.00 27.00 26.00 - - - - - - - - - - -
+# CHECK-NEXT: - - - - - - - - 24.00 25.00 25.00 25.00 - - - - - - - - - - -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions:
@@ -453,9 +453,9 @@ vpxorq %zmm19, %zmm19, %zmm21
# CHECK-NEXT: - - - - - - - - 1.00 - - - - - - - - - - - - - - vpandnd %xmm19, %xmm19, %xmm19
# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - - - - - - - - vpandnq %xmm19, %xmm19, %xmm19
# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - - - - - - - - vpandnd %ymm19, %ymm19, %ymm19
-# CHECK-NEXT: - - - - - - - - - 1.00 - - - - - - - - - - - - - vpandnq %ymm19, %ymm19, %ymm19
-# CHECK-NEXT: - - - - - - - - 2.00 - - - - - - - - - - - - - - vpandnd %zmm19, %zmm19, %zmm19
-# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - - - - - - - - vpandnq %zmm19, %zmm19, %zmm19
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - - - - - - - - vpandnq %ymm19, %ymm19, %ymm19
+# CHECK-NEXT: - - - - - - - - - 2.00 - - - - - - - - - - - - - vpandnd %zmm19, %zmm19, %zmm19
+# CHECK-NEXT: - - - - - - - - 2.00 - - - - - - - - - - - - - - vpandnq %zmm19, %zmm19, %zmm19
# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - vandnps %xmm2, %xmm2, %xmm5
# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - vandnpd %xmm1, %xmm1, %xmm5
# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - vpandn %xmm3, %xmm3, %xmm5
@@ -478,174 +478,174 @@ vpxorq %zmm19, %zmm19, %zmm21
# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - vxorpd %ymm1, %ymm1, %ymm1
# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - - - - - - - - vxorps %zmm2, %zmm2, %zmm2
# CHECK-NEXT: - - - - - - - - - - 2.00 - - - - - - - - - - - - vxorpd %zmm1, %zmm1, %zmm1
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - - - - - - pxor %mm2, %mm2
-# CHECK-NEXT: - - - - - - - - - 1.00 - - - - - - - - - - - - - pxor %xmm2, %xmm2
+# CHECK-NEXT: - - - - - - - - - 1.00 - - - - - - - - - - - - - pxor %mm2, %mm2
+# CHECK-NEXT: - - - - - - - - 1.00 - - - - - - - - - - - - - - pxor %xmm2, %xmm2
# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - vpxor %xmm3, %xmm3, %xmm3
# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - vpxor %ymm3, %ymm3, %ymm3
# CHECK-NEXT: - - - - - - - - 1.00 - - - - - - - - - - - - - - vpxord %xmm19, %xmm19, %xmm19
# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - - - - - - - - vpxorq %xmm19, %xmm19, %xmm19
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - - - - - - vpxord %ymm19, %ymm19, %ymm19
# CHECK-NEXT: - - - - - - - - - 1.00 - - - - - - - - - - - - - vpxorq %ymm19, %ymm19, %ymm19
-# CHECK-NEXT: - - - - - - - - 2.00 - - - - - - - - - - - - - - vpxord %zmm19, %zmm19, %zmm19
-# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - - - - - - - - vpxorq %zmm19, %zmm19, %zmm19
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - vpxord %zmm19, %zmm19, %zmm19
+# CHECK-NEXT: - - - - - - - - - - 2.00 - - - - - - - - - - - - vpxorq %zmm19, %zmm19, %zmm19
# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - vxorps %xmm4, %xmm4, %xmm5
# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - vxorpd %xmm1, %xmm1, %xmm3
# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - vxorps %ymm4, %ymm4, %ymm5
# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - vxorpd %ymm1, %ymm1, %ymm3
-# CHECK-NEXT: - - - - - - - - 2.00 - - - - - - - - - - - - - - vxorps %zmm4, %zmm4, %zmm5
-# CHECK-NEXT: - - - - - - - - - - 2.00 - - - - - - - - - - - - vxorpd %zmm1, %zmm1, %zmm3
+# CHECK-NEXT: - - - - - - - - - 2.00 - - - - - - - - - - - - - vxorps %zmm4, %zmm4, %zmm5
+# CHECK-NEXT: - - - - - - - - 2.00 - - - - - - - - - - - - - - vxorpd %zmm1, %zmm1, %zmm3
# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - vpxor %xmm3, %xmm3, %xmm5
# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - vpxor %ymm3, %ymm3, %ymm5
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - - - - - - vpxord %xmm19, %xmm19, %xmm21
-# CHECK-NEXT: - - - - - - - - - 1.00 - - - - - - - - - - - - - vpxorq %xmm19, %xmm19, %xmm21
-# CHECK-NEXT: - - - - - - - - 1.00 - - - - - - - - - - - - - - vpxord %ymm19, %ymm19, %ymm21
-# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - - - - - - - - vpxorq %ymm19, %ymm19, %ymm21
-# CHECK-NEXT: - - - - - - - - - - 2.00 - - - - - - - - - - - - vpxord %zmm19, %zmm19, %zmm21
-# CHECK-NEXT: - - - - - - - - - 2.00 - - - - - - - - - - - - - vpxorq %zmm19, %zmm19, %zmm21
+# CHECK-NEXT: - - - - - - - - 1.00 - - - - - - - - - - - - - - vpxorq %xmm19, %xmm19, %xmm21
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - - - - - - - - vpxord %ymm19, %ymm19, %ymm21
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - - - - - - vpxorq %ymm19, %ymm19, %ymm21
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - vpxord %zmm19, %zmm19, %zmm21
+# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - - - - - - - - vpxorq %zmm19, %zmm19, %zmm21
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789 0123456789
-# CHECK-NEXT: Index 0123456789 0123456789 01
-
-# CHECK: [0,0] DR . . . . . . . .. subl %eax, %eax
-# CHECK-NEXT: [0,1] DR . . . . . . . .. subq %rax, %rax
-# CHECK-NEXT: [0,2] DR . . . . . . . .. xorl %eax, %eax
-# CHECK-NEXT: [0,3] DR . . . . . . . .. xorq %rax, %rax
-# CHECK-NEXT: [0,4] DeER . . . . . . . .. pcmpgtb %mm2, %mm2
-# CHECK-NEXT: [0,5] D=eER. . . . . . . .. pcmpgtd %mm2, %mm2
-# CHECK-NEXT: [0,6] .D=eER . . . . . . .. pcmpgtw %mm2, %mm2
-# CHECK-NEXT: [0,7] .DeE-R . . . . . . .. pcmpgtb %xmm2, %xmm2
-# CHECK-NEXT: [0,8] .DeE-R . . . . . . .. pcmpgtd %xmm2, %xmm2
-# CHECK-NEXT: [0,9] .DeE-R . . . . . . .. pcmpgtq %xmm2, %xmm2
-# CHECK-NEXT: [0,10] .D=eER . . . . . . .. pcmpgtw %xmm2, %xmm2
-# CHECK-NEXT: [0,11] .D---R . . . . . . .. vpcmpgtb %xmm3, %xmm3, %xmm3
-# CHECK-NEXT: [0,12] . D--R . . . . . . .. vpcmpgtd %xmm3, %xmm3, %xmm3
-# CHECK-NEXT: [0,13] . D--R . . . . . . .. vpcmpgtq %xmm3, %xmm3, %xmm3
-# CHECK-NEXT: [0,14] . D--R . . . . . . .. vpcmpgtw %xmm3, %xmm3, %xmm3
-# CHECK-NEXT: [0,15] . D---R . . . . . . .. vpcmpgtb %xmm3, %xmm3, %xmm5
-# CHECK-NEXT: [0,16] . D---R . . . . . . .. vpcmpgtd %xmm3, %xmm3, %xmm5
-# CHECK-NEXT: [0,17] . D---R . . . . . . .. vpcmpgtq %xmm3, %xmm3, %xmm5
-# CHECK-NEXT: [0,18] . D--R . . . . . . .. vpcmpgtw %xmm3, %xmm3, %xmm5
-# CHECK-NEXT: [0,19] . D--R . . . . . . .. vpcmpgtb %ymm3, %ymm3, %ymm3
-# CHECK-NEXT: [0,20] . D--R . . . . . . .. vpcmpgtd %ymm3, %ymm3, %ymm3
-# CHECK-NEXT: [0,21] . D--R . . . . . . .. vpcmpgtq %ymm3, %ymm3, %ymm3
-# CHECK-NEXT: [0,22] . D--R . . . . . . .. vpcmpgtw %ymm3, %ymm3, %ymm3
-# CHECK-NEXT: [0,23] . D--R . . . . . . .. vpcmpgtb %ymm3, %ymm3, %ymm5
-# CHECK-NEXT: [0,24] . D--R . . . . . . .. vpcmpgtd %ymm3, %ymm3, %ymm5
-# CHECK-NEXT: [0,25] . D--R . . . . . . .. vpcmpgtq %ymm3, %ymm3, %ymm5
-# CHECK-NEXT: [0,26] . D--R . . . . . . .. vpcmpgtw %ymm3, %ymm3, %ymm5
-# CHECK-NEXT: [0,27] . DeER . . . . . . .. psubb %mm2, %mm2
-# CHECK-NEXT: [0,28] . D=eER . . . . . . .. psubd %mm2, %mm2
-# CHECK-NEXT: [0,29] . D==eER. . . . . . .. psubq %mm2, %mm2
-# CHECK-NEXT: [0,30] . D==eER . . . . . .. psubw %mm2, %mm2
-# CHECK-NEXT: [0,31] . DeE--R . . . . . .. psubb %xmm2, %xmm2
-# CHECK-NEXT: [0,32] . DeE--R . . . . . .. psubd %xmm2, %xmm2
-# CHECK-NEXT: [0,33] . DeE--R . . . . . .. psubq %xmm2, %xmm2
-# CHECK-NEXT: [0,34] . D=eE-R . . . . . .. psubw %xmm2, %xmm2
-# CHECK-NEXT: [0,35] . D----R . . . . . .. vpsubb %xmm3, %xmm3, %xmm3
-# CHECK-NEXT: [0,36] . .D---R . . . . . .. vpsubd %xmm3, %xmm3, %xmm3
-# CHECK-NEXT: [0,37] . .D---R . . . . . .. vpsubq %xmm3, %xmm3, %xmm3
-# CHECK-NEXT: [0,38] . .D---R . . . . . .. vpsubw %xmm3, %xmm3, %xmm3
-# CHECK-NEXT: [0,39] . .D----R . . . . . .. vpsubb %ymm3, %ymm3, %ymm3
-# CHECK-NEXT: [0,40] . .D----R . . . . . .. vpsubd %ymm3, %ymm3, %ymm3
-# CHECK-NEXT: [0,41] . .D----R . . . . . .. vpsubq %ymm3, %ymm3, %ymm3
-# CHECK-NEXT: [0,42] . . D---R . . . . . .. vpsubw %ymm3, %ymm3, %ymm3
-# CHECK-NEXT: [0,43] . . D---R . . . . . .. vpsubb %xmm3, %xmm3, %xmm5
-# CHECK-NEXT: [0,44] . . D---R . . . . . .. vpsubd %xmm3, %xmm3, %xmm5
-# CHECK-NEXT: [0,45] . . D---R . . . . . .. vpsubq %xmm3, %xmm3, %xmm5
-# CHECK-NEXT: [0,46] . . D---R . . . . . .. vpsubw %xmm3, %xmm3, %xmm5
-# CHECK-NEXT: [0,47] . . D---R . . . . . .. vpsubb %ymm3, %ymm3, %ymm5
-# CHECK-NEXT: [0,48] . . D---R . . . . . .. vpsubd %ymm3, %ymm3, %ymm5
-# CHECK-NEXT: [0,49] . . D---R . . . . . .. vpsubq %ymm3, %ymm3, %ymm5
-# CHECK-NEXT: [0,50] . . D---R . . . . . .. vpsubw %ymm3, %ymm3, %ymm5
-# CHECK-NEXT: [0,51] . . DeE-R . . . . . .. vpsubb %xmm19, %xmm19, %xmm19
-# CHECK-NEXT: [0,52] . . D=eER . . . . . .. vpsubd %xmm19, %xmm19, %xmm19
-# CHECK-NEXT: [0,53] . . D==eER . . . . . .. vpsubq %xmm19, %xmm19, %xmm19
-# CHECK-NEXT: [0,54] . . D==eER. . . . . .. vpsubw %xmm19, %xmm19, %xmm19
-# CHECK-NEXT: [0,55] . . D===eER . . . . .. vpsubb %ymm19, %ymm19, %ymm19
-# CHECK-NEXT: [0,56] . . D====eER . . . . .. vpsubd %ymm19, %ymm19, %ymm19
-# CHECK-NEXT: [0,57] . . D=====eER . . . . .. vpsubq %ymm19, %ymm19, %ymm19
-# CHECK-NEXT: [0,58] . . D======eER . . . . .. vpsubw %ymm19, %ymm19, %ymm19
-# CHECK-NEXT: [0,59] . . D=======eER. . . . .. vpsubb %zmm19, %zmm19, %zmm19
-# CHECK-NEXT: [0,60] . . D=======eER . . . .. vpsubd %zmm19, %zmm19, %zmm19
-# CHECK-NEXT: [0,61] . . D========eER . . . .. vpsubq %zmm19, %zmm19, %zmm19
-# CHECK-NEXT: [0,62] . . D=========eER . . . .. vpsubw %zmm19, %zmm19, %zmm19
-# CHECK-NEXT: [0,63] . . D==========eER . . . .. vpsubb %xmm19, %xmm19, %xmm21
-# CHECK-NEXT: [0,64] . . D===========eER. . . .. vpsubd %xmm19, %xmm19, %xmm21
-# CHECK-NEXT: [0,65] . . D===========eER. . . .. vpsubq %xmm19, %xmm19, %xmm21
-# CHECK-NEXT: [0,66] . . .D==========eER. . . .. vpsubw %xmm19, %xmm19, %xmm21
-# CHECK-NEXT: [0,67] . . .D==========eER. . . .. vpsubb %ymm19, %ymm19, %ymm21
-# CHECK-NEXT: [0,68] . . .D===========eER . . .. vpsubd %ymm19, %ymm19, %ymm21
-# CHECK-NEXT: [0,69] . . .D===========eER . . .. vpsubq %ymm19, %ymm19, %ymm21
-# CHECK-NEXT: [0,70] . . .D===========eER . . .. vpsubw %ymm19, %ymm19, %ymm21
-# CHECK-NEXT: [0,71] . . .D===========eER . . .. vpsubb %zmm19, %zmm19, %zmm21
-# CHECK-NEXT: [0,72] . . . D===========eER . . .. vpsubd %zmm19, %zmm19, %zmm21
-# CHECK-NEXT: [0,73] . . . D===========eER . . .. vpsubq %zmm19, %zmm19, %zmm21
-# CHECK-NEXT: [0,74] . . . D===========eER . . .. vpsubw %zmm19, %zmm19, %zmm21
-# CHECK-NEXT: [0,75] . . . DeE-----------R . . .. andnps %xmm0, %xmm0
-# CHECK-NEXT: [0,76] . . . DeE-----------R . . .. andnpd %xmm1, %xmm1
-# CHECK-NEXT: [0,77] . . . D-------------R . . .. vandnps %xmm2, %xmm2, %xmm2
-# CHECK-NEXT: [0,78] . . . D------------R . . .. vandnpd %xmm1, %xmm1, %xmm1
-# CHECK-NEXT: [0,79] . . . D------------R . . .. vandnps %ymm2, %ymm2, %ymm2
-# CHECK-NEXT: [0,80] . . . D------------R . . .. vandnpd %ymm1, %ymm1, %ymm1
-# CHECK-NEXT: [0,81] . . . DeE-----------R . . .. vandnps %zmm2, %zmm2, %zmm2
-# CHECK-NEXT: [0,82] . . . DeE-----------R . . .. vandnpd %zmm1, %zmm1, %zmm1
-# CHECK-NEXT: [0,83] . . . DeE-----------R . . .. pandn %mm2, %mm2
-# CHECK-NEXT: [0,84] . . . DeE----------R . . .. pandn %xmm2, %xmm2
-# CHECK-NEXT: [0,85] . . . D------------R . . .. vpandn %xmm3, %xmm3, %xmm3
-# CHECK-NEXT: [0,86] . . . D------------R . . .. vpandn %ymm3, %ymm3, %ymm3
-# CHECK-NEXT: [0,87] . . . D==========eER . . .. vpandnd %xmm19, %xmm19, %xmm19
-# CHECK-NEXT: [0,88] . . . D===========eER . . .. vpandnq %xmm19, %xmm19, %xmm19
-# CHECK-NEXT: [0,89] . . . D============eER. . .. vpandnd %ymm19, %ymm19, %ymm19
-# CHECK-NEXT: [0,90] . . . D============eER . .. vpandnq %ymm19, %ymm19, %ymm19
-# CHECK-NEXT: [0,91] . . . D=============eER . .. vpandnd %zmm19, %zmm19, %zmm19
-# CHECK-NEXT: [0,92] . . . D==============eER . .. vpandnq %zmm19, %zmm19, %zmm19
-# CHECK-NEXT: [0,93] . . . D----------------R . .. vandnps %xmm2, %xmm2, %xmm5
-# CHECK-NEXT: [0,94] . . . D----------------R . .. vandnpd %xmm1, %xmm1, %xmm5
-# CHECK-NEXT: [0,95] . . . D----------------R . .. vpandn %xmm3, %xmm3, %xmm5
-# CHECK-NEXT: [0,96] . . . .D---------------R . .. vandnps %ymm2, %ymm2, %ymm5
-# CHECK-NEXT: [0,97] . . . .D---------------R . .. vandnpd %ymm1, %ymm1, %ymm5
-# CHECK-NEXT: [0,98] . . . .D---------------R . .. vpandn %ymm3, %ymm3, %ymm5
-# CHECK-NEXT: [0,99] . . . .DeE-------------R . .. vandnps %zmm2, %zmm2, %zmm5
-# CHECK-NEXT: [0,100] . . . .DeE-------------R . .. vandnpd %zmm1, %zmm1, %zmm5
-# CHECK-NEXT: [0,101] . . . .D==============eER . .. vpandnd %xmm19, %xmm19, %xmm21
-# CHECK-NEXT: [0,102] . . . . D=============eER . .. vpandnq %xmm19, %xmm19, %xmm21
-# CHECK-NEXT: [0,103] . . . . D=============eER . .. vpandnd %ymm19, %ymm19, %ymm21
-# CHECK-NEXT: [0,104] . . . . D==============eER. .. vpandnq %ymm19, %ymm19, %ymm21
-# CHECK-NEXT: [0,105] . . . . D==============eER. .. vpandnd %zmm19, %zmm19, %zmm21
-# CHECK-NEXT: [0,106] . . . . D==============eER. .. vpandnq %zmm19, %zmm19, %zmm21
-# CHECK-NEXT: [0,107] . . . . D=eE-------------R. .. xorps %xmm0, %xmm0
-# CHECK-NEXT: [0,108] . . . . DeE-------------R. .. xorpd %xmm1, %xmm1
-# CHECK-NEXT: [0,109] . . . . D---------------R. .. vxorps %xmm2, %xmm2, %xmm2
-# CHECK-NEXT: [0,110] . . . . D---------------R. .. vxorpd %xmm1, %xmm1, %xmm1
-# CHECK-NEXT: [0,111] . . . . D---------------R. .. vxorps %ymm2, %ymm2, %ymm2
-# CHECK-NEXT: [0,112] . . . . D---------------R. .. vxorpd %ymm1, %ymm1, %ymm1
-# CHECK-NEXT: [0,113] . . . . D=eE-------------R .. vxorps %zmm2, %zmm2, %zmm2
-# CHECK-NEXT: [0,114] . . . . DeE-------------R .. vxorpd %zmm1, %zmm1, %zmm1
-# CHECK-NEXT: [0,115] . . . . D======eE-------R .. pxor %mm2, %mm2
-# CHECK-NEXT: [0,116] . . . . D======eE-------R .. pxor %xmm2, %xmm2
-# CHECK-NEXT: [0,117] . . . . D---------------R .. vpxor %xmm3, %xmm3, %xmm3
-# CHECK-NEXT: [0,118] . . . . D---------------R .. vpxor %ymm3, %ymm3, %ymm3
-# CHECK-NEXT: [0,119] . . . . D============eE-R .. vpxord %xmm19, %xmm19, %xmm19
-# CHECK-NEXT: [0,120] . . . . D============eER .. vpxorq %xmm19, %xmm19, %xmm19
-# CHECK-NEXT: [0,121] . . . . D=============eER .. vpxord %ymm19, %ymm19, %ymm19
-# CHECK-NEXT: [0,122] . . . . D==============eER .. vpxorq %ymm19, %ymm19, %ymm19
-# CHECK-NEXT: [0,123] . . . . D===============eER .. vpxord %zmm19, %zmm19, %zmm19
-# CHECK-NEXT: [0,124] . . . . D================eER.. vpxorq %zmm19, %zmm19, %zmm19
-# CHECK-NEXT: [0,125] . . . . D------------------R.. vxorps %xmm4, %xmm4, %xmm5
-# CHECK-NEXT: [0,126] . . . . .D-----------------R.. vxorpd %xmm1, %xmm1, %xmm3
-# CHECK-NEXT: [0,127] . . . . .D-----------------R.. vxorps %ymm4, %ymm4, %ymm5
-# CHECK-NEXT: [0,128] . . . . .D-----------------R.. vxorpd %ymm1, %ymm1, %ymm3
-# CHECK-NEXT: [0,129] . . . . .D====eE-----------R.. vxorps %zmm4, %zmm4, %zmm5
-# CHECK-NEXT: [0,130] . . . . .D=====eE----------R.. vxorpd %zmm1, %zmm1, %zmm3
-# CHECK-NEXT: [0,131] . . . . .D-----------------R.. vpxor %xmm3, %xmm3, %xmm5
-# CHECK-NEXT: [0,132] . . . . . D----------------R.. vpxor %ymm3, %ymm3, %ymm5
-# CHECK-NEXT: [0,133] . . . . . D===============eER. vpxord %xmm19, %xmm19, %xmm21
-# CHECK-NEXT: [0,134] . . . . . D===============eER. vpxorq %xmm19, %xmm19, %xmm21
-# CHECK-NEXT: [0,135] . . . . . D===============eER. vpxord %ymm19, %ymm19, %ymm21
-# CHECK-NEXT: [0,136] . . . . . D================eER vpxorq %ymm19, %ymm19, %ymm21
-# CHECK-NEXT: [0,137] . . . . . D================eER vpxord %zmm19, %zmm19, %zmm21
-# CHECK-NEXT: [0,138] . . . . . D===============eER vpxorq %zmm19, %zmm19, %zmm21
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DR . . . . . . . . subl %eax, %eax
+# CHECK-NEXT: [0,1] DR . . . . . . . . subq %rax, %rax
+# CHECK-NEXT: [0,2] DR . . . . . . . . xorl %eax, %eax
+# CHECK-NEXT: [0,3] DR . . . . . . . . xorq %rax, %rax
+# CHECK-NEXT: [0,4] DeER . . . . . . . . pcmpgtb %mm2, %mm2
+# CHECK-NEXT: [0,5] D=eER. . . . . . . . pcmpgtd %mm2, %mm2
+# CHECK-NEXT: [0,6] .D=eER . . . . . . . pcmpgtw %mm2, %mm2
+# CHECK-NEXT: [0,7] .DeE-R . . . . . . . pcmpgtb %xmm2, %xmm2
+# CHECK-NEXT: [0,8] .DeE-R . . . . . . . pcmpgtd %xmm2, %xmm2
+# CHECK-NEXT: [0,9] .DeE-R . . . . . . . pcmpgtq %xmm2, %xmm2
+# CHECK-NEXT: [0,10] .D=eER . . . . . . . pcmpgtw %xmm2, %xmm2
+# CHECK-NEXT: [0,11] .D---R . . . . . . . vpcmpgtb %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,12] . D--R . . . . . . . vpcmpgtd %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,13] . D--R . . . . . . . vpcmpgtq %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,14] . D--R . . . . . . . vpcmpgtw %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,15] . D---R . . . . . . . vpcmpgtb %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,16] . D---R . . . . . . . vpcmpgtd %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,17] . D---R . . . . . . . vpcmpgtq %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,18] . D--R . . . . . . . vpcmpgtw %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,19] . D--R . . . . . . . vpcmpgtb %ymm3, %ymm3, %ymm3
+# CHECK-NEXT: [0,20] . D--R . . . . . . . vpcmpgtd %ymm3, %ymm3, %ymm3
+# CHECK-NEXT: [0,21] . D--R . . . . . . . vpcmpgtq %ymm3, %ymm3, %ymm3
+# CHECK-NEXT: [0,22] . D--R . . . . . . . vpcmpgtw %ymm3, %ymm3, %ymm3
+# CHECK-NEXT: [0,23] . D--R . . . . . . . vpcmpgtb %ymm3, %ymm3, %ymm5
+# CHECK-NEXT: [0,24] . D--R . . . . . . . vpcmpgtd %ymm3, %ymm3, %ymm5
+# CHECK-NEXT: [0,25] . D--R . . . . . . . vpcmpgtq %ymm3, %ymm3, %ymm5
+# CHECK-NEXT: [0,26] . D--R . . . . . . . vpcmpgtw %ymm3, %ymm3, %ymm5
+# CHECK-NEXT: [0,27] . DeER . . . . . . . psubb %mm2, %mm2
+# CHECK-NEXT: [0,28] . D=eER . . . . . . . psubd %mm2, %mm2
+# CHECK-NEXT: [0,29] . D==eER. . . . . . . psubq %mm2, %mm2
+# CHECK-NEXT: [0,30] . D==eER . . . . . . psubw %mm2, %mm2
+# CHECK-NEXT: [0,31] . DeE--R . . . . . . psubb %xmm2, %xmm2
+# CHECK-NEXT: [0,32] . DeE--R . . . . . . psubd %xmm2, %xmm2
+# CHECK-NEXT: [0,33] . DeE--R . . . . . . psubq %xmm2, %xmm2
+# CHECK-NEXT: [0,34] . D=eE-R . . . . . . psubw %xmm2, %xmm2
+# CHECK-NEXT: [0,35] . D----R . . . . . . vpsubb %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,36] . .D---R . . . . . . vpsubd %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,37] . .D---R . . . . . . vpsubq %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,38] . .D---R . . . . . . vpsubw %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,39] . .D----R . . . . . . vpsubb %ymm3, %ymm3, %ymm3
+# CHECK-NEXT: [0,40] . .D----R . . . . . . vpsubd %ymm3, %ymm3, %ymm3
+# CHECK-NEXT: [0,41] . .D----R . . . . . . vpsubq %ymm3, %ymm3, %ymm3
+# CHECK-NEXT: [0,42] . . D---R . . . . . . vpsubw %ymm3, %ymm3, %ymm3
+# CHECK-NEXT: [0,43] . . D---R . . . . . . vpsubb %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,44] . . D---R . . . . . . vpsubd %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,45] . . D---R . . . . . . vpsubq %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,46] . . D---R . . . . . . vpsubw %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,47] . . D---R . . . . . . vpsubb %ymm3, %ymm3, %ymm5
+# CHECK-NEXT: [0,48] . . D---R . . . . . . vpsubd %ymm3, %ymm3, %ymm5
+# CHECK-NEXT: [0,49] . . D---R . . . . . . vpsubq %ymm3, %ymm3, %ymm5
+# CHECK-NEXT: [0,50] . . D---R . . . . . . vpsubw %ymm3, %ymm3, %ymm5
+# CHECK-NEXT: [0,51] . . DeE-R . . . . . . vpsubb %xmm19, %xmm19, %xmm19
+# CHECK-NEXT: [0,52] . . D=eER . . . . . . vpsubd %xmm19, %xmm19, %xmm19
+# CHECK-NEXT: [0,53] . . D==eER . . . . . . vpsubq %xmm19, %xmm19, %xmm19
+# CHECK-NEXT: [0,54] . . D==eER. . . . . . vpsubw %xmm19, %xmm19, %xmm19
+# CHECK-NEXT: [0,55] . . D===eER . . . . . vpsubb %ymm19, %ymm19, %ymm19
+# CHECK-NEXT: [0,56] . . D====eER . . . . . vpsubd %ymm19, %ymm19, %ymm19
+# CHECK-NEXT: [0,57] . . D=====eER . . . . . vpsubq %ymm19, %ymm19, %ymm19
+# CHECK-NEXT: [0,58] . . D======eER . . . . . vpsubw %ymm19, %ymm19, %ymm19
+# CHECK-NEXT: [0,59] . . D=======eER. . . . . vpsubb %zmm19, %zmm19, %zmm19
+# CHECK-NEXT: [0,60] . . D=======eER . . . . vpsubd %zmm19, %zmm19, %zmm19
+# CHECK-NEXT: [0,61] . . D========eER . . . . vpsubq %zmm19, %zmm19, %zmm19
+# CHECK-NEXT: [0,62] . . D=========eER . . . . vpsubw %zmm19, %zmm19, %zmm19
+# CHECK-NEXT: [0,63] . . D==========eER . . . . vpsubb %xmm19, %xmm19, %xmm21
+# CHECK-NEXT: [0,64] . . D===========eER. . . . vpsubd %xmm19, %xmm19, %xmm21
+# CHECK-NEXT: [0,65] . . D===========eER. . . . vpsubq %xmm19, %xmm19, %xmm21
+# CHECK-NEXT: [0,66] . . .D==========eER. . . . vpsubw %xmm19, %xmm19, %xmm21
+# CHECK-NEXT: [0,67] . . .D==========eER. . . . vpsubb %ymm19, %ymm19, %ymm21
+# CHECK-NEXT: [0,68] . . .D===========eER . . . vpsubd %ymm19, %ymm19, %ymm21
+# CHECK-NEXT: [0,69] . . .D===========eER . . . vpsubq %ymm19, %ymm19, %ymm21
+# CHECK-NEXT: [0,70] . . .D===========eER . . . vpsubw %ymm19, %ymm19, %ymm21
+# CHECK-NEXT: [0,71] . . .D===========eER . . . vpsubb %zmm19, %zmm19, %zmm21
+# CHECK-NEXT: [0,72] . . . D===========eER . . . vpsubd %zmm19, %zmm19, %zmm21
+# CHECK-NEXT: [0,73] . . . D===========eER . . . vpsubq %zmm19, %zmm19, %zmm21
+# CHECK-NEXT: [0,74] . . . D===========eER . . . vpsubw %zmm19, %zmm19, %zmm21
+# CHECK-NEXT: [0,75] . . . DeE-----------R . . . andnps %xmm0, %xmm0
+# CHECK-NEXT: [0,76] . . . DeE-----------R . . . andnpd %xmm1, %xmm1
+# CHECK-NEXT: [0,77] . . . D-------------R . . . vandnps %xmm2, %xmm2, %xmm2
+# CHECK-NEXT: [0,78] . . . D------------R . . . vandnpd %xmm1, %xmm1, %xmm1
+# CHECK-NEXT: [0,79] . . . D------------R . . . vandnps %ymm2, %ymm2, %ymm2
+# CHECK-NEXT: [0,80] . . . D------------R . . . vandnpd %ymm1, %ymm1, %ymm1
+# CHECK-NEXT: [0,81] . . . DeE-----------R . . . vandnps %zmm2, %zmm2, %zmm2
+# CHECK-NEXT: [0,82] . . . DeE-----------R . . . vandnpd %zmm1, %zmm1, %zmm1
+# CHECK-NEXT: [0,83] . . . DeE-----------R . . . pandn %mm2, %mm2
+# CHECK-NEXT: [0,84] . . . DeE----------R . . . pandn %xmm2, %xmm2
+# CHECK-NEXT: [0,85] . . . D------------R . . . vpandn %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,86] . . . D------------R . . . vpandn %ymm3, %ymm3, %ymm3
+# CHECK-NEXT: [0,87] . . . D==========eER . . . vpandnd %xmm19, %xmm19, %xmm19
+# CHECK-NEXT: [0,88] . . . D===========eER . . . vpandnq %xmm19, %xmm19, %xmm19
+# CHECK-NEXT: [0,89] . . . D============eER. . . vpandnd %ymm19, %ymm19, %ymm19
+# CHECK-NEXT: [0,90] . . . D============eER . . vpandnq %ymm19, %ymm19, %ymm19
+# CHECK-NEXT: [0,91] . . . D=============eER . . vpandnd %zmm19, %zmm19, %zmm19
+# CHECK-NEXT: [0,92] . . . D==============eER . . vpandnq %zmm19, %zmm19, %zmm19
+# CHECK-NEXT: [0,93] . . . D----------------R . . vandnps %xmm2, %xmm2, %xmm5
+# CHECK-NEXT: [0,94] . . . D----------------R . . vandnpd %xmm1, %xmm1, %xmm5
+# CHECK-NEXT: [0,95] . . . D----------------R . . vpandn %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,96] . . . .D---------------R . . vandnps %ymm2, %ymm2, %ymm5
+# CHECK-NEXT: [0,97] . . . .D---------------R . . vandnpd %ymm1, %ymm1, %ymm5
+# CHECK-NEXT: [0,98] . . . .D---------------R . . vpandn %ymm3, %ymm3, %ymm5
+# CHECK-NEXT: [0,99] . . . .DeE-------------R . . vandnps %zmm2, %zmm2, %zmm5
+# CHECK-NEXT: [0,100] . . . .DeE-------------R . . vandnpd %zmm1, %zmm1, %zmm5
+# CHECK-NEXT: [0,101] . . . .D==============eER . . vpandnd %xmm19, %xmm19, %xmm21
+# CHECK-NEXT: [0,102] . . . . D=============eER . . vpandnq %xmm19, %xmm19, %xmm21
+# CHECK-NEXT: [0,103] . . . . D==============eER. . vpandnd %ymm19, %ymm19, %ymm21
+# CHECK-NEXT: [0,104] . . . . D==============eER. . vpandnq %ymm19, %ymm19, %ymm21
+# CHECK-NEXT: [0,105] . . . . D==============eER. . vpandnd %zmm19, %zmm19, %zmm21
+# CHECK-NEXT: [0,106] . . . . D==============eER. . vpandnq %zmm19, %zmm19, %zmm21
+# CHECK-NEXT: [0,107] . . . . D=eE-------------R. . xorps %xmm0, %xmm0
+# CHECK-NEXT: [0,108] . . . . DeE-------------R. . xorpd %xmm1, %xmm1
+# CHECK-NEXT: [0,109] . . . . D---------------R. . vxorps %xmm2, %xmm2, %xmm2
+# CHECK-NEXT: [0,110] . . . . D---------------R. . vxorpd %xmm1, %xmm1, %xmm1
+# CHECK-NEXT: [0,111] . . . . D---------------R. . vxorps %ymm2, %ymm2, %ymm2
+# CHECK-NEXT: [0,112] . . . . D----------------R . vxorpd %ymm1, %ymm1, %ymm1
+# CHECK-NEXT: [0,113] . . . . D=eE-------------R . vxorps %zmm2, %zmm2, %zmm2
+# CHECK-NEXT: [0,114] . . . . DeE-------------R . vxorpd %zmm1, %zmm1, %zmm1
+# CHECK-NEXT: [0,115] . . . . D======eE-------R . pxor %mm2, %mm2
+# CHECK-NEXT: [0,116] . . . . D======eE-------R . pxor %xmm2, %xmm2
+# CHECK-NEXT: [0,117] . . . . D---------------R . vpxor %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,118] . . . . D---------------R . vpxor %ymm3, %ymm3, %ymm3
+# CHECK-NEXT: [0,119] . . . . D=============eER . vpxord %xmm19, %xmm19, %xmm19
+# CHECK-NEXT: [0,120] . . . . D=============eER . vpxorq %xmm19, %xmm19, %xmm19
+# CHECK-NEXT: [0,121] . . . . D==============eER . vpxord %ymm19, %ymm19, %ymm19
+# CHECK-NEXT: [0,122] . . . . D===============eER. vpxorq %ymm19, %ymm19, %ymm19
+# CHECK-NEXT: [0,123] . . . . D-----------------R. vpxord %zmm19, %zmm19, %zmm19
+# CHECK-NEXT: [0,124] . . . . D=====eE----------R. vpxorq %zmm19, %zmm19, %zmm19
+# CHECK-NEXT: [0,125] . . . . D-----------------R. vxorps %xmm4, %xmm4, %xmm5
+# CHECK-NEXT: [0,126] . . . . .D----------------R. vxorpd %xmm1, %xmm1, %xmm3
+# CHECK-NEXT: [0,127] . . . . .D----------------R. vxorps %ymm4, %ymm4, %ymm5
+# CHECK-NEXT: [0,128] . . . . .D----------------R. vxorpd %ymm1, %ymm1, %ymm3
+# CHECK-NEXT: [0,129] . . . . .D=====eE---------R. vxorps %zmm4, %zmm4, %zmm5
+# CHECK-NEXT: [0,130] . . . . .D=====eE---------R. vxorpd %zmm1, %zmm1, %zmm3
+# CHECK-NEXT: [0,131] . . . . .D-----------------R vpxor %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,132] . . . . . D----------------R vpxor %ymm3, %ymm3, %ymm5
+# CHECK-NEXT: [0,133] . . . . . D=====eE---------R vpxord %xmm19, %xmm19, %xmm21
+# CHECK-NEXT: [0,134] . . . . . D======eE--------R vpxorq %xmm19, %xmm19, %xmm21
+# CHECK-NEXT: [0,135] . . . . . D======eE--------R vpxord %ymm19, %ymm19, %ymm21
+# CHECK-NEXT: [0,136] . . . . . D======eE--------R vpxorq %ymm19, %ymm19, %ymm21
+# CHECK-NEXT: [0,137] . . . . . D----------------R vpxord %zmm19, %zmm19, %zmm21
+# CHECK-NEXT: [0,138] . . . . . D======eE-------R vpxorq %zmm19, %zmm19, %zmm21
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -757,7 +757,7 @@ vpxorq %zmm19, %zmm19, %zmm21
# CHECK-NEXT: 100. 1 1.0 1.0 13.0 vandnpd %zmm1, %zmm1, %zmm5
# CHECK-NEXT: 101. 1 15.0 0.0 0.0 vpandnd %xmm19, %xmm19, %xmm21
# CHECK-NEXT: 102. 1 14.0 0.0 0.0 vpandnq %xmm19, %xmm19, %xmm21
-# CHECK-NEXT: 103. 1 14.0 0.0 0.0 vpandnd %ymm19, %ymm19, %ymm21
+# CHECK-NEXT: 103. 1 15.0 1.0 0.0 vpandnd %ymm19, %ymm19, %ymm21
# CHECK-NEXT: 104. 1 15.0 1.0 0.0 vpandnq %ymm19, %ymm19, %ymm21
# CHECK-NEXT: 105. 1 15.0 1.0 0.0 vpandnd %zmm19, %zmm19, %zmm21
# CHECK-NEXT: 106. 1 15.0 1.0 0.0 vpandnq %zmm19, %zmm19, %zmm21
@@ -766,31 +766,31 @@ vpxorq %zmm19, %zmm19, %zmm21
# CHECK-NEXT: 109. 1 0.0 0.0 15.0 vxorps %xmm2, %xmm2, %xmm2
# CHECK-NEXT: 110. 1 0.0 0.0 15.0 vxorpd %xmm1, %xmm1, %xmm1
# CHECK-NEXT: 111. 1 0.0 0.0 15.0 vxorps %ymm2, %ymm2, %ymm2
-# CHECK-NEXT: 112. 1 0.0 0.0 15.0 vxorpd %ymm1, %ymm1, %ymm1
+# CHECK-NEXT: 112. 1 0.0 0.0 16.0 vxorpd %ymm1, %ymm1, %ymm1
# CHECK-NEXT: 113. 1 2.0 2.0 13.0 vxorps %zmm2, %zmm2, %zmm2
# CHECK-NEXT: 114. 1 1.0 1.0 13.0 vxorpd %zmm1, %zmm1, %zmm1
# CHECK-NEXT: 115. 1 7.0 7.0 7.0 pxor %mm2, %mm2
# CHECK-NEXT: 116. 1 7.0 7.0 7.0 pxor %xmm2, %xmm2
# CHECK-NEXT: 117. 1 0.0 0.0 15.0 vpxor %xmm3, %xmm3, %xmm3
# CHECK-NEXT: 118. 1 0.0 0.0 15.0 vpxor %ymm3, %ymm3, %ymm3
-# CHECK-NEXT: 119. 1 13.0 1.0 1.0 vpxord %xmm19, %xmm19, %xmm19
-# CHECK-NEXT: 120. 1 13.0 0.0 0.0 vpxorq %xmm19, %xmm19, %xmm19
-# CHECK-NEXT: 121. 1 14.0 0.0 0.0 vpxord %ymm19, %ymm19, %ymm19
-# CHECK-NEXT: 122. 1 15.0 0.0 0.0 vpxorq %ymm19, %ymm19, %ymm19
-# CHECK-NEXT: 123. 1 16.0 0.0 0.0 vpxord %zmm19, %zmm19, %zmm19
-# CHECK-NEXT: 124. 1 17.0 0.0 0.0 vpxorq %zmm19, %zmm19, %zmm19
-# CHECK-NEXT: 125. 1 0.0 0.0 18.0 vxorps %xmm4, %xmm4, %xmm5
-# CHECK-NEXT: 126. 1 0.0 0.0 17.0 vxorpd %xmm1, %xmm1, %xmm3
-# CHECK-NEXT: 127. 1 0.0 0.0 17.0 vxorps %ymm4, %ymm4, %ymm5
-# CHECK-NEXT: 128. 1 0.0 0.0 17.0 vxorpd %ymm1, %ymm1, %ymm3
-# CHECK-NEXT: 129. 1 5.0 5.0 11.0 vxorps %zmm4, %zmm4, %zmm5
-# CHECK-NEXT: 130. 1 6.0 6.0 10.0 vxorpd %zmm1, %zmm1, %zmm3
+# CHECK-NEXT: 119. 1 14.0 2.0 0.0 vpxord %xmm19, %xmm19, %xmm19
+# CHECK-NEXT: 120. 1 14.0 0.0 0.0 vpxorq %xmm19, %xmm19, %xmm19
+# CHECK-NEXT: 121. 1 15.0 0.0 0.0 vpxord %ymm19, %ymm19, %ymm19
+# CHECK-NEXT: 122. 1 16.0 0.0 0.0 vpxorq %ymm19, %ymm19, %ymm19
+# CHECK-NEXT: 123. 1 0.0 0.0 17.0 vpxord %zmm19, %zmm19, %zmm19
+# CHECK-NEXT: 124. 1 6.0 6.0 10.0 vpxorq %zmm19, %zmm19, %zmm19
+# CHECK-NEXT: 125. 1 0.0 0.0 17.0 vxorps %xmm4, %xmm4, %xmm5
+# CHECK-NEXT: 126. 1 0.0 0.0 16.0 vxorpd %xmm1, %xmm1, %xmm3
+# CHECK-NEXT: 127. 1 0.0 0.0 16.0 vxorps %ymm4, %ymm4, %ymm5
+# CHECK-NEXT: 128. 1 0.0 0.0 16.0 vxorpd %ymm1, %ymm1, %ymm3
+# CHECK-NEXT: 129. 1 6.0 6.0 9.0 vxorps %zmm4, %zmm4, %zmm5
+# CHECK-NEXT: 130. 1 6.0 6.0 9.0 vxorpd %zmm1, %zmm1, %zmm3
# CHECK-NEXT: 131. 1 0.0 0.0 17.0 vpxor %xmm3, %xmm3, %xmm5
# CHECK-NEXT: 132. 1 0.0 0.0 16.0 vpxor %ymm3, %ymm3, %ymm5
-# CHECK-NEXT: 133. 1 16.0 0.0 0.0 vpxord %xmm19, %xmm19, %xmm21
-# CHECK-NEXT: 134. 1 16.0 0.0 0.0 vpxorq %xmm19, %xmm19, %xmm21
-# CHECK-NEXT: 135. 1 16.0 0.0 0.0 vpxord %ymm19, %ymm19, %ymm21
-# CHECK-NEXT: 136. 1 17.0 1.0 0.0 vpxorq %ymm19, %ymm19, %ymm21
-# CHECK-NEXT: 137. 1 17.0 1.0 0.0 vpxord %zmm19, %zmm19, %zmm21
-# CHECK-NEXT: 138. 1 16.0 1.0 0.0 vpxorq %zmm19, %zmm19, %zmm21
-# CHECK-NEXT: 1 4.5 0.6 4.6 <total>
+# CHECK-NEXT: 133. 1 6.0 1.0 9.0 vpxord %xmm19, %xmm19, %xmm21
+# CHECK-NEXT: 134. 1 7.0 2.0 8.0 vpxorq %xmm19, %xmm19, %xmm21
+# CHECK-NEXT: 135. 1 7.0 2.0 8.0 vpxord %ymm19, %ymm19, %ymm21
+# CHECK-NEXT: 136. 1 7.0 2.0 8.0 vpxorq %ymm19, %ymm19, %ymm21
+# CHECK-NEXT: 137. 1 0.0 0.0 16.0 vpxord %zmm19, %zmm19, %zmm21
+# CHECK-NEXT: 138. 1 7.0 3.0 7.0 vpxorq %zmm19, %zmm19, %zmm21
+# CHECK-NEXT: 1 3.9 0.7 5.1 <total>
diff --git a/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp b/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp
index d00cf52..6858e82 100644
--- a/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp
+++ b/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp
@@ -272,7 +272,7 @@ static cl::opt<bool>
"expressed in bytes."),
cat(DwarfDumpCategory));
static cl::opt<bool> ManuallyGenerateUnitIndex(
- "manaully-generate-unit-index",
+ "manually-generate-unit-index",
cl::desc("if the input is dwp file, parse .debug_info "
"section and use it to populate "
"DW_SECT_INFO contributions in cu-index. "
diff --git a/llvm/tools/opt/optdriver.cpp b/llvm/tools/opt/optdriver.cpp
index 1bdfa71..c5bc7b4 100644
--- a/llvm/tools/opt/optdriver.cpp
+++ b/llvm/tools/opt/optdriver.cpp
@@ -375,6 +375,7 @@ static bool shouldPinPassToLegacyPM(StringRef Pass) {
"fix-irreducible",
"expand-large-fp-convert",
"callbrprepare",
+ "scalarizer",
};
for (const auto &P : PassNamePrefix)
if (Pass.starts_with(P))
diff --git a/llvm/unittests/ProfileData/PGOCtxProfReaderWriterTest.cpp b/llvm/unittests/ProfileData/PGOCtxProfReaderWriterTest.cpp
index f48f4f1..a7950e1 100644
--- a/llvm/unittests/ProfileData/PGOCtxProfReaderWriterTest.cpp
+++ b/llvm/unittests/ProfileData/PGOCtxProfReaderWriterTest.cpp
@@ -6,6 +6,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/DenseSet.h"
#include "llvm/Bitcode/BitcodeAnalyzer.h"
#include "llvm/ProfileData/CtxInstrContextNode.h"
#include "llvm/ProfileData/PGOCtxProfReader.h"
diff --git a/llvm/unittests/SandboxIR/SandboxIRTest.cpp b/llvm/unittests/SandboxIR/SandboxIRTest.cpp
index 6280963..ad5508f 100644
--- a/llvm/unittests/SandboxIR/SandboxIRTest.cpp
+++ b/llvm/unittests/SandboxIR/SandboxIRTest.cpp
@@ -729,6 +729,72 @@ define void @foo() {
EXPECT_EQ(UndefStruct->getNumElements(), 2u);
}
+TEST_F(SandboxIRTest, GlobalValue) {
+ parseIR(C, R"IR(
+declare external void @bar()
+define void @foo() {
+ call void @bar()
+ ret void
+}
+)IR");
+ Function &LLVMF = *M->getFunction("foo");
+ auto *LLVMBB = &*LLVMF.begin();
+ auto LLVMIt = LLVMBB->begin();
+ auto *LLVMCall = cast<llvm::CallInst>(&*LLVMIt++);
+ auto *LLVMGV = cast<llvm::GlobalValue>(LLVMCall->getCalledOperand());
+ sandboxir::Context Ctx(C);
+
+ auto &F = *Ctx.createFunction(&LLVMF);
+ auto *BB = &*F.begin();
+ auto It = BB->begin();
+ auto *Call = cast<sandboxir::CallInst>(&*It++);
+ [[maybe_unused]] auto *Ret = cast<sandboxir::ReturnInst>(&*It++);
+
+ // Check classof(), creation, getFunction(), getBasicBlock().
+ auto *GV = cast<sandboxir::GlobalValue>(Call->getCalledOperand());
+ // Check getAddressSpace().
+ EXPECT_EQ(GV->getAddressSpace(), LLVMGV->getAddressSpace());
+ // Check hasGlobalUnnamedAddr().
+ EXPECT_EQ(GV->hasGlobalUnnamedAddr(), LLVMGV->hasGlobalUnnamedAddr());
+ // Check hasAtLeastLocalUnnamedAddr().
+ EXPECT_EQ(GV->hasAtLeastLocalUnnamedAddr(),
+ LLVMGV->hasAtLeastLocalUnnamedAddr());
+ // Check getUnnamedAddr().
+ EXPECT_EQ(GV->getUnnamedAddr(), LLVMGV->getUnnamedAddr());
+ // Check setUnnamedAddr().
+ auto OrigUnnamedAddr = GV->getUnnamedAddr();
+ auto NewUnnamedAddr = sandboxir::GlobalValue::UnnamedAddr::Global;
+ EXPECT_NE(NewUnnamedAddr, OrigUnnamedAddr);
+ GV->setUnnamedAddr(NewUnnamedAddr);
+ EXPECT_EQ(GV->getUnnamedAddr(), NewUnnamedAddr);
+ GV->setUnnamedAddr(OrigUnnamedAddr);
+ EXPECT_EQ(GV->getUnnamedAddr(), OrigUnnamedAddr);
+ // Check getMinUnnamedAddr().
+ EXPECT_EQ(
+ sandboxir::GlobalValue::getMinUnnamedAddr(OrigUnnamedAddr,
+ NewUnnamedAddr),
+ llvm::GlobalValue::getMinUnnamedAddr(OrigUnnamedAddr, NewUnnamedAddr));
+ // Check hasComdat().
+ EXPECT_EQ(GV->hasComdat(), LLVMGV->hasComdat());
+ // Check getVisibility().
+ EXPECT_EQ(GV->getVisibility(), LLVMGV->getVisibility());
+ // Check hasDefaultVisibility().
+ EXPECT_EQ(GV->hasDefaultVisibility(), LLVMGV->hasDefaultVisibility());
+ // Check hasHiddenVisibility().
+ EXPECT_EQ(GV->hasHiddenVisibility(), LLVMGV->hasHiddenVisibility());
+ // Check hasProtectedVisibility().
+ EXPECT_EQ(GV->hasProtectedVisibility(), LLVMGV->hasProtectedVisibility());
+ // Check setVisibility().
+ auto OrigVisibility = GV->getVisibility();
+ auto NewVisibility =
+ sandboxir::GlobalValue::VisibilityTypes::ProtectedVisibility;
+ EXPECT_NE(NewVisibility, OrigVisibility);
+ GV->setVisibility(NewVisibility);
+ EXPECT_EQ(GV->getVisibility(), NewVisibility);
+ GV->setVisibility(OrigVisibility);
+ EXPECT_EQ(GV->getVisibility(), OrigVisibility);
+}
+
TEST_F(SandboxIRTest, BlockAddress) {
parseIR(C, R"IR(
define void @foo(ptr %ptr) {
diff --git a/llvm/unittests/SandboxIR/TrackerTest.cpp b/llvm/unittests/SandboxIR/TrackerTest.cpp
index a1a4117..6454c54 100644
--- a/llvm/unittests/SandboxIR/TrackerTest.cpp
+++ b/llvm/unittests/SandboxIR/TrackerTest.cpp
@@ -1521,6 +1521,43 @@ define void @foo(i64 %i0, i64 %i1, float %f0, float %f1) {
checkCmpInst(Ctx, ICmp);
}
+TEST_F(TrackerTest, GlobalValueSetters) {
+ parseIR(C, R"IR(
+define void @foo() {
+ call void @foo()
+ ret void
+}
+)IR");
+ Function &LLVMF = *M->getFunction("foo");
+ sandboxir::Context Ctx(C);
+
+ auto &F = *Ctx.createFunction(&LLVMF);
+ auto *BB = &*F.begin();
+ auto *Call = cast<sandboxir::CallInst>(&*BB->begin());
+
+ auto *GV = cast<sandboxir::GlobalValue>(Call->getCalledOperand());
+ // Check setUnnamedAddr().
+ auto OrigUnnamedAddr = GV->getUnnamedAddr();
+ auto NewUnnamedAddr = sandboxir::GlobalValue::UnnamedAddr::Global;
+ EXPECT_NE(NewUnnamedAddr, OrigUnnamedAddr);
+ Ctx.save();
+ GV->setUnnamedAddr(NewUnnamedAddr);
+ EXPECT_EQ(GV->getUnnamedAddr(), NewUnnamedAddr);
+ Ctx.revert();
+ EXPECT_EQ(GV->getUnnamedAddr(), OrigUnnamedAddr);
+
+ // Check setVisibility().
+ auto OrigVisibility = GV->getVisibility();
+ auto NewVisibility =
+ sandboxir::GlobalValue::VisibilityTypes::ProtectedVisibility;
+ EXPECT_NE(NewVisibility, OrigVisibility);
+ Ctx.save();
+ GV->setVisibility(NewVisibility);
+ EXPECT_EQ(GV->getVisibility(), NewVisibility);
+ Ctx.revert();
+ EXPECT_EQ(GV->getVisibility(), OrigVisibility);
+}
+
TEST_F(TrackerTest, SetVolatile) {
parseIR(C, R"IR(
define void @foo(ptr %arg0, i8 %val) {
diff --git a/llvm/utils/gn/build/toolchain/target_flags.gni b/llvm/utils/gn/build/toolchain/target_flags.gni
index af8adcd..cbfa229 100644
--- a/llvm/utils/gn/build/toolchain/target_flags.gni
+++ b/llvm/utils/gn/build/toolchain/target_flags.gni
@@ -54,6 +54,6 @@ if (current_os == "android") {
target_flags += [ "--target=$llvm_current_triple" ]
}
-if (current_cpu == "x86") {
+if (current_cpu == "x86" && current_os != "win") {
target_flags += [ "-m32" ]
}
diff --git a/llvm/utils/gn/secondary/compiler-rt/lib/BUILD.gn b/llvm/utils/gn/secondary/compiler-rt/lib/BUILD.gn
index d8c75a0..02c2048 100644
--- a/llvm/utils/gn/secondary/compiler-rt/lib/BUILD.gn
+++ b/llvm/utils/gn/secondary/compiler-rt/lib/BUILD.gn
@@ -10,16 +10,16 @@ group("lib") {
if (current_os == "linux" || current_os == "android") {
deps += [ "//compiler-rt/lib/ubsan_minimal" ]
}
- if (current_os != "win" && current_os != "baremetal") {
+ if (current_os != "baremetal") {
deps += [
"//compiler-rt/lib/asan",
- "//compiler-rt/lib/ubsan",
+ "//compiler-rt/lib/profile",
]
+ }
+ if (current_os != "win" && current_os != "baremetal") {
+ deps += [ "//compiler-rt/lib/ubsan" ]
if (current_cpu == "x64" || current_cpu == "arm64") {
deps += [ "//compiler-rt/lib/tsan/rtl" ]
}
}
- if (current_os != "baremetal") {
- deps += [ "//compiler-rt/lib/profile" ]
- }
}
diff --git a/llvm/utils/gn/secondary/compiler-rt/lib/asan/BUILD.gn b/llvm/utils/gn/secondary/compiler-rt/lib/asan/BUILD.gn
index cf30875..42467c2 100644
--- a/llvm/utils/gn/secondary/compiler-rt/lib/asan/BUILD.gn
+++ b/llvm/utils/gn/secondary/compiler-rt/lib/asan/BUILD.gn
@@ -60,12 +60,12 @@ if (current_toolchain == host_toolchain) {
"asan_thread.h",
"asan_win.cpp",
]
- if (target_os != "mac" && target_os != "win") {
+ if (current_os != "mac" && current_os != "win") {
asan_sources += [ "asan_interceptors_vfork.S" ]
}
config("asan_config") {
cflags = []
- if (target_os != "win") {
+ if (current_os != "win") {
cflags += [ "-ftls-model=initial-exec" ]
} else {
ldflags = [ "/OPT:NOICF" ]
@@ -76,11 +76,11 @@ if (current_toolchain == host_toolchain) {
# FIXME: add_sanitizer_rt_version_list (cf hwasan)
# FIXME: need libclang_rt.asan*.a.syms?
- if (target_os == "android") {
+ if (current_os == "android") {
ldflags = [ "-Wl,-z,global" ]
}
- if (target_os == "mac") {
+ if (current_os == "mac") {
# The -U flags below correspond to the add_weak_symbols() calls in CMake.
ldflags = [
"-lc++",
@@ -145,7 +145,7 @@ if (current_toolchain == host_toolchain) {
configs -= [ "//llvm/utils/gn/build:llvm_code" ]
configs += [ "//llvm/utils/gn/build:crt_code" ]
sources = [ "asan_rtl_static.cpp" ]
- if (target_os != "mac" && target_os != "win") {
+ if (current_os != "mac" && current_os != "win") {
sources += [ "asan_rtl_x86_64.S" ]
}
}
@@ -183,7 +183,7 @@ if (current_toolchain == host_toolchain) {
}
}
- if (current_os != "mac") {
+ if (current_os != "mac" && current_os != "win") {
static_library("asan_static_library") {
output_dir = crt_current_out_dir
output_name = "clang_rt.asan$crt_current_target_suffix"
@@ -232,7 +232,8 @@ if (current_toolchain == host_toolchain) {
if (current_os == "win") {
static_library("asan_static_runtime_thunk") {
output_dir = crt_current_out_dir
- output_name = "clang_rt.asan_static_runtime_thunk$crt_current_target_suffix"
+ output_name =
+ "clang_rt.asan_static_runtime_thunk$crt_current_target_suffix"
configs -= [ "//llvm/utils/gn/build:llvm_code" ]
configs += [ "//llvm/utils/gn/build:crt_code" ]
complete_static_lib = true
@@ -277,11 +278,11 @@ if (current_toolchain == host_toolchain) {
deps = [ ":asan_shared_library" ]
if (current_os == "win") {
deps += [
- ":asan_static_runtime_thunk",
":asan_dynamic_runtime_thunk",
+ ":asan_static_runtime_thunk",
]
}
- if (current_os != "mac") {
+ if (current_os != "mac" && current_os != "win") {
deps += [
":asan_cxx",
":asan_preinit",
diff --git a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn
index b907e66..a6d1204 100644
--- a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn
+++ b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn
@@ -496,6 +496,7 @@ if (current_toolchain == default_toolchain) {
"__fwd/format.h",
"__fwd/fstream.h",
"__fwd/functional.h",
+ "__fwd/get.h",
"__fwd/ios.h",
"__fwd/istream.h",
"__fwd/mdspan.h",
@@ -512,6 +513,7 @@ if (current_toolchain == default_toolchain) {
"__fwd/string_view.h",
"__fwd/subrange.h",
"__fwd/tuple.h",
+ "__fwd/variant.h",
"__fwd/vector.h",
"__hash_table",
"__ios/fpos.h",
diff --git a/llvm/utils/gn/secondary/llvm/lib/Transforms/Coroutines/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Transforms/Coroutines/BUILD.gn
index e296a7b..bed2fff 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Transforms/Coroutines/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Transforms/Coroutines/BUILD.gn
@@ -18,6 +18,7 @@ static_library("Coroutines") {
"CoroFrame.cpp",
"CoroSplit.cpp",
"Coroutines.cpp",
+ "MaterializationUtils.cpp",
"SpillUtils.cpp",
"SuspendCrossingInfo.cpp",
]
diff --git a/mlir/cmake/modules/MLIRDetectPythonEnv.cmake b/mlir/cmake/modules/MLIRDetectPythonEnv.cmake
index 0a486c1..c07c55b 100644
--- a/mlir/cmake/modules/MLIRDetectPythonEnv.cmake
+++ b/mlir/cmake/modules/MLIRDetectPythonEnv.cmake
@@ -19,7 +19,7 @@ macro(mlir_configure_python_dev_packages)
set(_python_development_component Development.Module)
find_package(Python3 ${LLVM_MINIMUM_PYTHON_VERSION}
- COMPONENTS Interpreter ${_python_development_component} NumPy REQUIRED)
+ COMPONENTS Interpreter ${_python_development_component} REQUIRED)
unset(_python_development_component)
message(STATUS "Found python include dirs: ${Python3_INCLUDE_DIRS}")
message(STATUS "Found python libraries: ${Python3_LIBRARIES}")
diff --git a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp
index c2785f3..f80d279 100644
--- a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp
+++ b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp
@@ -671,18 +671,27 @@ struct WMMAOpLowering : public ConvertOpToLLVMPattern<WMMAOp> {
matchAndRewrite(WMMAOp op, WMMAOpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const override {
Location loc = op.getLoc();
- Type outType = typeConverter->convertType(op.getDestD().getType());
+ auto outType =
+ typeConverter->convertType<VectorType>(op.getDestD().getType());
+ if (!outType)
+ return rewriter.notifyMatchFailure(op, "type conversion failed");
if (chipset.majorVersion != 11 && chipset.majorVersion != 12)
return op->emitOpError("WMMA only supported on gfx11 and gfx12");
+ // The WMMA operations represent vectors of bf16s as vectors of i16s, so we
+ // need to bitcast bfloats to i16 and then bitcast them back.
+ VectorType rawOutType = outType;
+ if (outType.getElementType().isBF16())
+ rawOutType = outType.clone(rewriter.getI16Type());
+
std::optional<StringRef> maybeIntrinsic = wmmaOpToIntrinsic(op, chipset);
if (!maybeIntrinsic.has_value())
return op.emitOpError("no intrinsic matching WMMA on the given chipset");
OperationState loweredOp(loc, *maybeIntrinsic);
- loweredOp.addTypes(outType);
+ loweredOp.addTypes(rawOutType);
SmallVector<Value, 4> operands;
wmmaPushInputOperand(rewriter, loc, typeConverter, op.getUnsignedA(),
@@ -694,7 +703,12 @@ struct WMMAOpLowering : public ConvertOpToLLVMPattern<WMMAOp> {
loweredOp.addOperands(operands);
Operation *lowered = rewriter.create(loweredOp);
- rewriter.replaceOp(op, lowered->getResults());
+
+ Operation *maybeCastBack = lowered;
+ if (rawOutType != outType)
+ maybeCastBack =
+ rewriter.create<LLVM::BitcastOp>(loc, outType, lowered->getResult(0));
+ rewriter.replaceOp(op, maybeCastBack->getResults());
return success();
}
@@ -1033,15 +1047,6 @@ struct ConvertAMDGPUToROCDLPass
void mlir::populateAMDGPUToROCDLConversionPatterns(LLVMTypeConverter &converter,
RewritePatternSet &patterns,
Chipset chipset) {
- converter.addConversion([](BFloat16Type t) -> Type {
- return IntegerType::get(t.getContext(), 16);
- });
- converter.addConversion([&converter](VectorType t) -> std::optional<Type> {
- if (!t.getElementType().isBF16())
- return std::nullopt;
- return converter.convertType(t.clone(IntegerType::get(t.getContext(), 16)));
- });
-
patterns
.add<RawBufferOpLowering<RawBufferLoadOp, ROCDL::RawPtrBufferLoadOp>,
RawBufferOpLowering<RawBufferStoreOp, ROCDL::RawPtrBufferStoreOp>,
diff --git a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
index 2992671..fc3e1fc 100644
--- a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
+++ b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
@@ -26,6 +26,7 @@
#include "mlir/Conversion/LLVMCommon/LoweringOptions.h"
#include "mlir/Conversion/LLVMCommon/Pattern.h"
#include "mlir/Conversion/LLVMCommon/TypeConverter.h"
+#include "mlir/Conversion/MathToLLVM/MathToLLVM.h"
#include "mlir/Conversion/MathToROCDL/MathToROCDL.h"
#include "mlir/Conversion/MemRefToLLVM/MemRefToLLVM.h"
#include "mlir/Conversion/VectorToLLVM/ConvertVectorToLLVM.h"
@@ -290,6 +291,7 @@ struct LowerGpuOpsToROCDLOpsPass
populateAMDGPUToROCDLConversionPatterns(converter, llvmPatterns,
*maybeChipset);
populateVectorToLLVMConversionPatterns(converter, llvmPatterns);
+ populateMathToLLVMConversionPatterns(converter, llvmPatterns);
cf::populateControlFlowToLLVMConversionPatterns(converter, llvmPatterns);
populateFuncToLLVMConversionPatterns(converter, llvmPatterns);
populateFinalizeMemRefToLLVMConversionPatterns(converter, llvmPatterns);
@@ -332,7 +334,11 @@ void mlir::configureGpuToROCDLConversionLegality(ConversionTarget &target) {
target.addIllegalOp<LLVM::CosOp, LLVM::ExpOp, LLVM::Exp2Op, LLVM::FCeilOp,
LLVM::FFloorOp, LLVM::FRemOp, LLVM::LogOp, LLVM::Log10Op,
LLVM::Log2Op, LLVM::PowOp, LLVM::SinOp>();
-
+ // These ops are legal for f16 and f32 type.
+ target.addDynamicallyLegalOp<LLVM::ExpOp, LLVM::LogOp>([](Operation *op) {
+ return any_of(op->getOperandTypes(),
+ llvm::IsaPred<Float16Type, Float32Type>);
+ });
// TODO: Remove once we support replacing non-root ops.
target.addLegalOp<gpu::YieldOp, gpu::GPUModuleOp>();
}
diff --git a/mlir/lib/Dialect/Transform/Interfaces/TransformInterfaces.cpp b/mlir/lib/Dialect/Transform/Interfaces/TransformInterfaces.cpp
index 5bc6d4e..91702ce 100644
--- a/mlir/lib/Dialect/Transform/Interfaces/TransformInterfaces.cpp
+++ b/mlir/lib/Dialect/Transform/Interfaces/TransformInterfaces.cpp
@@ -934,12 +934,10 @@ transform::TransformState::applyTransform(TransformOpInterface transform) {
assert(scopeIt != regionStack.rend() &&
"could not find region scope for handle");
RegionScope *scope = *scopeIt;
- for (Operation *user : handle.getUsers()) {
- if (user != scope->currentTransform &&
- !happensBefore(user, scope->currentTransform))
- return false;
- }
- return true;
+ return llvm::all_of(handle.getUsers(), [&](Operation *user) {
+ return user == scope->currentTransform ||
+ happensBefore(user, scope->currentTransform);
+ });
};
transform::ErrorCheckingTrackingListener trackingListener(*this, transform,
config);
diff --git a/mlir/test/CMakeLists.txt b/mlir/test/CMakeLists.txt
index df95e5db..4d2d738 100644
--- a/mlir/test/CMakeLists.txt
+++ b/mlir/test/CMakeLists.txt
@@ -150,6 +150,10 @@ if(MLIR_ENABLE_CUDA_RUNNER)
list(APPEND MLIR_TEST_DEPENDS mlir_cuda_runtime)
endif()
+if(MLIR_ENABLE_EXECUTION_ENGINE)
+ list(APPEND MLIR_TEST_DEPENDS mlir-capi-execution-engine-test)
+endif()
+
if(MLIR_ENABLE_ROCM_RUNNER)
list(APPEND MLIR_TEST_DEPENDS mlir_rocm_runtime)
endif()
diff --git a/mlir/test/Conversion/AMDGPUToROCDL/wmma.mlir b/mlir/test/Conversion/AMDGPUToROCDL/wmma.mlir
index 1a4ef33..7b14480 100644
--- a/mlir/test/Conversion/AMDGPUToROCDL/wmma.mlir
+++ b/mlir/test/Conversion/AMDGPUToROCDL/wmma.mlir
@@ -15,9 +15,11 @@ func.func @mfma_to_rocdl(%arg0 : vector<16xf16>, %arg1 : vector<8xf32>, %arg2 :
amdgpu.wmma %arg0 * %arg0 + %arg0 {subwordOffset = 1 : i32}: vector<16xf16>, vector<16xf16>, vector<16xf16>
// CHECK: rocdl.wmma.f16.16x16x16.f16{{.*}}: (vector<16xf16>, vector<16xf16>, vector<8xf16>, i1) -> vector<8xf16>
amdgpu.wmma %arg0 * %arg0 + %arg4 {subwordOffset = 0 : i32}: vector<16xf16>, vector<16xf16>, vector<8xf16>
- // CHECK: rocdl.wmma.bf16.16x16x16.bf16{{.*}}: (vector<16xi16>, vector<16xi16>, vector<16xi16>, i1) -> vector<16xi16>
+ // CHECK: %[[raw_bf16x16:.+]] = rocdl.wmma.bf16.16x16x16.bf16{{.*}}: (vector<16xi16>, vector<16xi16>, vector<16xi16>, i1) -> vector<16xi16>
+ // CHECK-NEXT: llvm.bitcast %[[raw_bf16x16]] : vector<16xi16> to vector<16xbf16>
amdgpu.wmma %arg3 * %arg3 + %arg3 {subwordOffset = 1 : i32}: vector<16xbf16>, vector<16xbf16>, vector<16xbf16>
- // CHECK: rocdl.wmma.bf16.16x16x16.bf16{{.*}}: (vector<16xi16>, vector<16xi16>, vector<8xi16>, i1) -> vector<8xi16>
+ // CHECK: %[[raw_bf16x8:.+]] = rocdl.wmma.bf16.16x16x16.bf16{{.*}}: (vector<16xi16>, vector<16xi16>, vector<8xi16>, i1) -> vector<8xi16>
+ // CHECK-NEXT: llvm.bitcast %[[raw_bf16x8]] : vector<8xi16> to vector<8xbf16>
amdgpu.wmma %arg3 * %arg3 + %arg5 {subwordOffset = 0 : i32}: vector<16xbf16>, vector<16xbf16>, vector<8xbf16>
// CHECK: rocdl.wmma.i32.16x16x16.iu8{{.*}}: (i1, vector<4xi32>, i1, vector<4xi32>, vector<4xi32>, i1) -> vector<4xi32>
amdgpu.wmma %arg6 * %arg6 + %arg7 {clamp}: vector<16xi8>, vector<16xi8>, vector<4xi32>
diff --git a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir
index 56b65be..eb065cb 100644
--- a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir
+++ b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir
@@ -132,6 +132,68 @@ gpu.module @test_module {
// -----
gpu.module @test_module {
+ // CHECK-LABEL: func @gpu_sqrt
+ func.func @gpu_sqrt(%arg_f16 : f16, %arg_f32 : f32, %arg_f64 : f64) -> (f16, f32, f64) {
+ %result16 = math.sqrt %arg_f16 : f16
+ // CHECK: llvm.intr.sqrt(%{{.*}}) : (f16) -> f16
+ %result32 = math.sqrt %arg_f32 : f32
+ // CHECK: llvm.intr.sqrt(%{{.*}}) : (f32) -> f32
+ %result64 = math.sqrt %arg_f64 : f64
+ // CHECK: llvm.intr.sqrt(%{{.*}}) : (f64) -> f64
+ func.return %result16, %result32, %result64 : f16, f32, f64
+ }
+}
+
+// -----
+
+gpu.module @test_module {
+ // CHECK-LABEL: func @gpu_fabs
+ func.func @gpu_fabs(%arg_f16 : f16, %arg_f32 : f32, %arg_f64 : f64) -> (f16, f32, f64) {
+ %result16 = math.absf %arg_f16 : f16
+ // CHECK: llvm.intr.fabs(%{{.*}}) : (f16) -> f16
+ %result32 = math.absf %arg_f32 : f32
+ // CHECK: llvm.intr.fabs(%{{.*}}) : (f32) -> f32
+ %result64 = math.absf %arg_f64 : f64
+ // CHECK: llvm.intr.fabs(%{{.*}}) : (f64) -> f64
+ func.return %result16, %result32, %result64 : f16, f32, f64
+ }
+}
+
+// -----
+
+gpu.module @test_module {
+ // CHECK: llvm.func @__ocml_exp_f64(f64) -> f64
+ // CHECK-LABEL: func @gpu_exp
+ func.func @gpu_exp(%arg_f16 : f16, %arg_f32 : f32, %arg_f64 : f64) -> (f16, f32, f64) {
+ %result16 = math.exp %arg_f16 : f16
+ // CHECK: llvm.intr.exp(%{{.*}}) : (f16) -> f16
+ %result32 = math.exp %arg_f32 : f32
+ // CHECK: llvm.intr.exp(%{{.*}}) : (f32) -> f32
+ %result64 = math.exp %arg_f64 : f64
+ // CHECK: llvm.call @__ocml_exp_f64(%{{.*}}) : (f64) -> f64
+ func.return %result16, %result32, %result64 : f16, f32, f64
+ }
+}
+
+// -----
+
+gpu.module @test_module {
+ // CHECK: llvm.func @__ocml_log_f64(f64) -> f64
+ // CHECK-LABEL: func @gpu_log
+ func.func @gpu_log(%arg_f16 : f16, %arg_f32 : f32, %arg_f64 : f64) -> (f16, f32, f64) {
+ %result16 = math.log %arg_f16 : f16
+ // CHECK: llvm.intr.log(%{{.*}}) : (f16) -> f16
+ %result32 = math.log %arg_f32 : f32
+ // CHECK: llvm.intr.log(%{{.*}}) : (f32) -> f32
+ %result64 = math.log %arg_f64 : f64
+ // CHECK: llvm.call @__ocml_log_f64(%{{.*}}) : (f64) -> f64
+ func.return %result16, %result32, %result64 : f16, f32, f64
+ }
+}
+
+// -----
+
+gpu.module @test_module {
// CHECK: llvm.func @__ocml_cbrt_f32(f32) -> f32
// CHECK: llvm.func @__ocml_cbrt_f64(f64) -> f64
// CHECK-LABEL: func @gpu_cbrt
@@ -445,22 +507,22 @@ gpu.module @test_module {
// -----
-// Test that the bf16 type is lowered away on this target.
+// Test that the bf16 type is passed through to LLVM.
gpu.module @test_module {
// CHECK-LABEL: func @bf16_id
func.func @bf16_id(%arg0 : bf16) -> bf16 {
- // CHECK-SAME: (%[[ARG0:.+]]: i16)
- // CHECK-SAME: -> i16
- // CHECK: return %[[ARG0]] : i16
+ // CHECK-SAME: (%[[ARG0:.+]]: bf16)
+ // CHECK-SAME: -> bf16
+ // CHECK: return %[[ARG0]] : bf16
func.return %arg0 : bf16
}
// CHECK-LABEL: func @bf16x4_id
func.func @bf16x4_id(%arg0 : vector<4xbf16>) -> vector<4xbf16> {
- // CHECK-SAME: (%[[ARG0:.+]]: vector<4xi16>)
- // CHECK-SAME: -> vector<4xi16>
- // CHECK: return %[[ARG0]] : vector<4xi16>
+ // CHECK-SAME: (%[[ARG0:.+]]: vector<4xbf16>)
+ // CHECK-SAME: -> vector<4xbf16>
+ // CHECK: return %[[ARG0]] : vector<4xbf16>
func.return %arg0 : vector<4xbf16>
}
diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
index c931898..4124897 100644
--- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
@@ -6004,6 +6004,7 @@ cc_library(
":LLVMCommonConversion",
":LLVMDialect",
":MathDialect",
+ ":MathToLLVM",
":MathToROCDL",
":MemRefDialect",
":MemRefToLLVM",