aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFangrui Song <i@maskray.me>2024-06-06 13:23:38 -0700
committerFangrui Song <i@maskray.me>2024-06-06 13:23:38 -0700
commit683ca4ab2cce926ca945b5eed9fa0bb3cf575de9 (patch)
treec32c6df233afdf9469e20f99733cde3f552e49de
parentcf44857e7bce6b2defe3f174e0134e2bb7a0ac9d (diff)
parentfbcb92ca017ee7fbf84be808701133fbdf3b1c59 (diff)
downloadllvm-683ca4ab2cce926ca945b5eed9fa0bb3cf575de9.zip
llvm-683ca4ab2cce926ca945b5eed9fa0bb3cf575de9.tar.gz
llvm-683ca4ab2cce926ca945b5eed9fa0bb3cf575de9.tar.bz2
Created using spr 1.3.5-bogner [skip ci]
-rw-r--r--.github/workflows/containers/github-action-ci/stage1.Dockerfile2
-rw-r--r--bolt/include/bolt/Core/GDBIndex.h61
-rw-r--r--bolt/lib/Core/CMakeLists.txt1
-rw-r--r--bolt/lib/Core/GDBIndex.cpp185
-rw-r--r--clang-tools-extra/clang-tidy/misc/CMakeLists.txt1
-rw-r--r--clang-tools-extra/clang-tidy/readability/ContainerSizeEmptyCheck.cpp9
-rw-r--r--clang-tools-extra/clang-tidy/readability/RedundantMemberInitCheck.cpp26
-rw-r--r--clang-tools-extra/docs/ReleaseNotes.rst6
-rw-r--r--clang-tools-extra/test/clang-tidy/checkers/readability/container-size-empty.cpp6
-rw-r--r--clang-tools-extra/test/clang-tidy/checkers/readability/redundant-member-init.cpp16
-rw-r--r--clang/docs/LanguageExtensions.rst24
-rw-r--r--clang/docs/ReleaseNotes.rst3
-rw-r--r--clang/include/clang/AST/OpenACCClause.h2
-rw-r--r--clang/include/clang/AST/Type.h14
-rw-r--r--clang/include/clang/Basic/BuiltinsAMDGPU.def2
-rw-r--r--clang/include/clang/Basic/Cuda.h1
-rw-r--r--clang/include/clang/Basic/DiagnosticSemaKinds.td6
-rw-r--r--clang/include/clang/Basic/OpenACCClauses.def22
-rw-r--r--clang/include/clang/Basic/TokenKinds.def2
-rw-r--r--clang/include/clang/Basic/riscv_vector.td3
-rw-r--r--clang/include/clang/Lex/DependencyDirectivesScanner.h3
-rw-r--r--clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h3
-rw-r--r--clang/lib/AST/CMakeLists.txt1
-rw-r--r--clang/lib/AST/Interp/ByteCodeExprGen.cpp102
-rw-r--r--clang/lib/AST/Interp/Context.cpp15
-rw-r--r--clang/lib/AST/Interp/Context.h2
-rw-r--r--clang/lib/AST/Interp/Descriptor.cpp1
-rw-r--r--clang/lib/AST/Interp/Disasm.cpp3
-rw-r--r--clang/lib/AST/Interp/Function.cpp3
-rw-r--r--clang/lib/AST/Interp/Interp.cpp39
-rw-r--r--clang/lib/AST/Interp/Interp.h107
-rw-r--r--clang/lib/AST/Interp/InterpFrame.cpp1
-rw-r--r--clang/lib/AST/Interp/InterpStack.cpp1
-rw-r--r--clang/lib/AST/Interp/InterpStack.h3
-rw-r--r--clang/lib/AST/Interp/MemberPointer.cpp76
-rw-r--r--clang/lib/AST/Interp/MemberPointer.h112
-rw-r--r--clang/lib/AST/Interp/Opcodes.td18
-rw-r--r--clang/lib/AST/Interp/Pointer.cpp1
-rw-r--r--clang/lib/AST/Interp/Pointer.h1
-rw-r--r--clang/lib/AST/Interp/PrimType.cpp1
-rw-r--r--clang/lib/AST/Interp/PrimType.h8
-rw-r--r--clang/lib/AST/OpenACCClause.cpp2
-rw-r--r--clang/lib/AST/ParentMap.cpp16
-rw-r--r--clang/lib/AST/TextNodeDumper.cpp21
-rw-r--r--clang/lib/AST/Type.cpp38
-rw-r--r--clang/lib/Analysis/CFG.cpp50
-rw-r--r--clang/lib/Basic/Cuda.cpp1
-rw-r--r--clang/lib/Basic/Targets/LoongArch.h2
-rw-r--r--clang/lib/Basic/Targets/NVPTX.cpp1
-rw-r--r--clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp1
-rw-r--r--clang/lib/CodeGen/Targets/AMDGPU.cpp6
-rw-r--r--clang/lib/ExtractAPI/Serialization/SymbolGraphSerializer.cpp5
-rw-r--r--clang/lib/Format/UnwrappedLineParser.cpp6
-rw-r--r--clang/lib/Frontend/FrontendActions.cpp4
-rw-r--r--clang/lib/Interpreter/IncrementalParser.cpp3
-rw-r--r--clang/lib/Interpreter/Interpreter.cpp165
-rw-r--r--clang/lib/Lex/DependencyDirectivesScanner.cpp22
-rw-r--r--clang/lib/Parse/ParseStmt.cpp5
-rw-r--r--clang/lib/Sema/Scope.cpp4
-rw-r--r--clang/lib/Sema/SemaAMDGPU.cpp6
-rw-r--r--clang/lib/Sema/SemaDecl.cpp3
-rw-r--r--clang/lib/Sema/SemaExpr.cpp35
-rw-r--r--clang/lib/Sema/SemaExprCXX.cpp6
-rw-r--r--clang/lib/Sema/SemaInit.cpp19
-rw-r--r--clang/lib/Sema/SemaOpenACC.cpp1191
-rw-r--r--clang/lib/Sema/TreeTransform.h12
-rw-r--r--clang/lib/StaticAnalyzer/Core/ExprEngine.cpp56
-rw-r--r--clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp4
-rw-r--r--clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp5
-rw-r--r--clang/test/AST/Interp/arrays.cpp14
-rw-r--r--clang/test/AST/Interp/cxx23.cpp22
-rw-r--r--clang/test/AST/Interp/eval-order.cpp4
-rw-r--r--clang/test/AST/Interp/literals.cpp9
-rw-r--r--clang/test/AST/Interp/memberpointers.cpp197
-rw-r--r--clang/test/AST/ast-dump-default-init-json.cpp6
-rw-r--r--clang/test/AST/ast-dump-default-init.cpp2
-rw-r--r--clang/test/AST/ast-print-openacc-loop-construct.cpp9
-rw-r--r--clang/test/Analysis/cxx-uninitialized-object.cpp12
-rw-r--r--clang/test/Analysis/lifetime-extended-regions.cpp10
-rw-r--r--clang/test/CXX/drs/cwg16xx.cpp2
-rw-r--r--clang/test/CXX/drs/cwg18xx.cpp19
-rw-r--r--clang/test/CXX/special/class.temporary/p6.cpp34
-rw-r--r--clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vcpopv.c264
-rw-r--r--clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vcpopv.c264
-rw-r--r--clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/non-overloaded/vcpopv.c528
-rw-r--r--clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/overloaded/vcpopv.c528
-rw-r--r--clang/test/CodeGen/voidptr-vaarg.c478
-rw-r--r--clang/test/CodeGenCUDA/cuda-builtin-vars.cu24
-rw-r--r--clang/test/CodeGenCXX/inline-then-fold-variadics.cpp181
-rw-r--r--clang/test/CodeGenCXX/pointers-to-data-members.cpp1
-rw-r--r--clang/test/CodeGenCXX/template-param-objects-linkage.cpp1
-rw-r--r--clang/test/CodeGenOpenCL/amdgpu-features.cl2
-rw-r--r--clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl1
-rw-r--r--clang/test/Driver/aarch64-oryon-1.c19
-rw-r--r--clang/test/Driver/amdgpu-macros.cl1
-rw-r--r--clang/test/Driver/amdgpu-mcpu.cl2
-rw-r--r--clang/test/Interpreter/pretty-print.c8
-rw-r--r--clang/test/Misc/target-invalid-cpu-note.c8
-rw-r--r--clang/test/SemaCXX/attr-weak.cpp1
-rw-r--r--clang/test/SemaCXX/builtin-is-bitwise-cloneable-fsanitize.cpp34
-rw-r--r--clang/test/SemaCXX/builtin-is-bitwise-cloneable.cpp8
-rw-r--r--clang/test/SemaCXX/constexpr-default-arg.cpp4
-rw-r--r--clang/test/SemaCXX/cxx11-default-member-initializers.cpp74
-rw-r--r--clang/test/SemaCXX/eval-crashes.cpp6
-rw-r--r--clang/test/SemaCXX/nullptr_in_arithmetic_ops.cpp1
-rw-r--r--clang/test/SemaObjCXX/arc-type-traits.mm9
-rw-r--r--clang/test/SemaOpenACC/loop-construct-auto_seq_independent-clauses.c6
-rw-r--r--clang/test/SemaOpenACC/loop-construct-private-clause.c132
-rw-r--r--clang/test/SemaOpenACC/loop-construct-private-clause.cpp155
-rw-r--r--clang/test/SemaOpenCL/builtins-amdgcn-gfx940-err.cl6
-rw-r--r--clang/unittests/AST/Interp/toAPValue.cpp46
-rw-r--r--clang/unittests/Format/FormatTest.cpp11
-rw-r--r--clang/unittests/Lex/DependencyDirectivesScannerTest.cpp82
-rw-r--r--clang/unittests/Lex/PPDependencyDirectivesTest.cpp3
-rwxr-xr-xclang/www/cxx_dr_status.html2
-rw-r--r--compiler-rt/lib/builtins/atomic.c31
-rw-r--r--cross-project-tests/debuginfo-tests/llvm-prettyprinters/gdb/llvm-support.cpp2
-rw-r--r--flang/cmake/modules/AddFlangOffloadRuntime.cmake1
-rw-r--r--flang/docs/Intrinsics.md31
-rw-r--r--flang/include/flang/Optimizer/Builder/IntrinsicCall.h2
-rw-r--r--flang/include/flang/Optimizer/Builder/Runtime/Command.h5
-rw-r--r--flang/include/flang/Optimizer/Dialect/FIRAttr.td30
-rw-r--r--flang/include/flang/Optimizer/Dialect/FIROps.td35
-rw-r--r--flang/include/flang/Optimizer/Transforms/Passes.h3
-rw-r--r--flang/include/flang/Optimizer/Transforms/Passes.td1
-rw-r--r--flang/include/flang/Runtime/command.h4
-rw-r--r--flang/include/flang/Runtime/magic-numbers.h5
-rw-r--r--flang/include/flang/Tools/CLOptions.inc5
-rw-r--r--flang/lib/Evaluate/intrinsics.cpp12
-rw-r--r--flang/lib/Lower/OpenMP/ClauseProcessor.cpp16
-rw-r--r--flang/lib/Lower/OpenMP/ClauseProcessor.h8
-rw-r--r--flang/lib/Lower/OpenMP/Clauses.h53
-rw-r--r--flang/lib/Lower/OpenMP/DataSharingProcessor.cpp2
-rw-r--r--flang/lib/Lower/OpenMP/DataSharingProcessor.h4
-rw-r--r--flang/lib/Lower/OpenMP/OpenMP.cpp2
-rw-r--r--flang/lib/Lower/OpenMP/ReductionProcessor.cpp8
-rw-r--r--flang/lib/Lower/OpenMP/Utils.cpp8
-rw-r--r--flang/lib/Optimizer/Builder/IntrinsicCall.cpp39
-rw-r--r--flang/lib/Optimizer/Builder/MutableBox.cpp6
-rw-r--r--flang/lib/Optimizer/Builder/Runtime/Command.cpp13
-rw-r--r--flang/lib/Optimizer/Dialect/FIRAttr.cpp4
-rw-r--r--flang/lib/Optimizer/Dialect/FIROps.cpp73
-rw-r--r--flang/lib/Optimizer/Transforms/ExternalNameConversion.cpp23
-rw-r--r--flang/runtime/command.cpp26
-rw-r--r--flang/runtime/stat.h1
-rw-r--r--flang/test/Fir/loop03.fir17
-rw-r--r--flang/test/Lower/HLFIR/assumed-rank-inquiries.f90383
-rw-r--r--flang/test/Lower/Intrinsics/getcwd-function.f9023
-rw-r--r--flang/test/Lower/Intrinsics/getcwd-optional.f9029
-rw-r--r--flang/test/Lower/Intrinsics/getcwd.f9044
-rw-r--r--flang/test/Lower/OpenMP/critical.f9024
-rw-r--r--flang/test/Lower/OpenMP/map-component-ref.f9041
-rw-r--r--flang/test/Semantics/getcwd.f9035
-rw-r--r--libc/config/gpu/entrypoints.txt11
-rw-r--r--libc/config/linux/aarch64/entrypoints.txt17
-rw-r--r--libc/config/linux/x86_64/entrypoints.txt17
-rw-r--r--libc/docs/c23.rst18
-rw-r--r--libc/docs/math/index.rst28
-rw-r--r--libc/hdr/types/CMakeLists.txt9
-rw-r--r--libc/hdr/types/atexithandler_t.h22
-rw-r--r--libc/spec/stdc.td17
-rw-r--r--libc/src/__support/CPP/CMakeLists.txt1
-rw-r--r--libc/src/__support/CPP/type_traits.h1
-rw-r--r--libc/src/__support/CPP/type_traits/aligned_storage.h27
-rw-r--r--libc/src/__support/fixedvector.h18
-rw-r--r--libc/src/math/CMakeLists.txt14
-rw-r--r--libc/src/math/fmaxf16.h20
-rw-r--r--libc/src/math/fmaximum_mag_numf16.h20
-rw-r--r--libc/src/math/fmaximum_magf16.h20
-rw-r--r--libc/src/math/fmaximum_numf16.h20
-rw-r--r--libc/src/math/fmaximumf16.h20
-rw-r--r--libc/src/math/fminf16.h20
-rw-r--r--libc/src/math/fminimum_mag_numf16.h20
-rw-r--r--libc/src/math/fminimum_magf16.h20
-rw-r--r--libc/src/math/fminimum_numf16.h20
-rw-r--r--libc/src/math/fminimumf16.h20
-rw-r--r--libc/src/math/generic/CMakeLists.txt185
-rw-r--r--libc/src/math/generic/fmaxf16.cpp19
-rw-r--r--libc/src/math/generic/fmaximum_mag_numf16.cpp19
-rw-r--r--libc/src/math/generic/fmaximum_magf16.cpp19
-rw-r--r--libc/src/math/generic/fmaximum_numf16.cpp19
-rw-r--r--libc/src/math/generic/fmaximumf16.cpp19
-rw-r--r--libc/src/math/generic/fminf16.cpp19
-rw-r--r--libc/src/math/generic/fminimum_mag_numf16.cpp19
-rw-r--r--libc/src/math/generic/fminimum_magf16.cpp19
-rw-r--r--libc/src/math/generic/fminimum_numf16.cpp19
-rw-r--r--libc/src/math/generic/fminimumf16.cpp19
-rw-r--r--libc/src/math/generic/nextafterf16.cpp19
-rw-r--r--libc/src/math/generic/nextdownf16.cpp19
-rw-r--r--libc/src/math/generic/nexttowardf16.cpp21
-rw-r--r--libc/src/math/generic/nextupf16.cpp19
-rw-r--r--libc/src/math/nextafterf16.h20
-rw-r--r--libc/src/math/nextdownf16.h20
-rw-r--r--libc/src/math/nexttowardf16.h20
-rw-r--r--libc/src/math/nextupf16.h20
-rw-r--r--libc/src/stdlib/CMakeLists.txt32
-rw-r--r--libc/src/stdlib/at_quick_exit.cpp22
-rw-r--r--libc/src/stdlib/at_quick_exit.h20
-rw-r--r--libc/src/stdlib/atexit.cpp79
-rw-r--r--libc/src/stdlib/atexit.h7
-rw-r--r--libc/src/stdlib/exit_handler.cpp42
-rw-r--r--libc/src/stdlib/exit_handler.h53
-rw-r--r--libc/src/stdlib/quick_exit.cpp6
-rw-r--r--libc/src/sys/epoll/linux/CMakeLists.txt3
-rw-r--r--libc/src/sys/epoll/linux/epoll_pwait.cpp3
-rw-r--r--libc/src/sys/epoll/linux/epoll_pwait2.cpp3
-rw-r--r--libc/src/sys/epoll/linux/epoll_wait.cpp3
-rw-r--r--libc/test/CMakeLists.txt12
-rw-r--r--libc/test/src/__support/CMakeLists.txt5
-rw-r--r--libc/test/src/__support/CPP/type_traits_test.cpp9
-rw-r--r--libc/test/src/__support/fixedvector_test.cpp27
-rw-r--r--libc/test/src/math/smoke/CMakeLists.txt279
-rw-r--r--libc/test/src/math/smoke/FMaxTest.h10
-rw-r--r--libc/test/src/math/smoke/FMaximumMagNumTest.h15
-rw-r--r--libc/test/src/math/smoke/FMaximumMagTest.h15
-rw-r--r--libc/test/src/math/smoke/FMaximumNumTest.h15
-rw-r--r--libc/test/src/math/smoke/FMaximumTest.h15
-rw-r--r--libc/test/src/math/smoke/FMinTest.h10
-rw-r--r--libc/test/src/math/smoke/FMinimumMagNumTest.h15
-rw-r--r--libc/test/src/math/smoke/FMinimumMagTest.h15
-rw-r--r--libc/test/src/math/smoke/FMinimumNumTest.h15
-rw-r--r--libc/test/src/math/smoke/FMinimumTest.h15
-rw-r--r--libc/test/src/math/smoke/NextAfterTest.h10
-rw-r--r--libc/test/src/math/smoke/NextTowardTest.h11
-rw-r--r--libc/test/src/math/smoke/fmaxf16_test.cpp13
-rw-r--r--libc/test/src/math/smoke/fmaximum_mag_numf16_test.cpp13
-rw-r--r--libc/test/src/math/smoke/fmaximum_magf16_test.cpp13
-rw-r--r--libc/test/src/math/smoke/fmaximum_numf16_test.cpp13
-rw-r--r--libc/test/src/math/smoke/fmaximumf16_test.cpp13
-rw-r--r--libc/test/src/math/smoke/fminf16_test.cpp13
-rw-r--r--libc/test/src/math/smoke/fminimum_mag_numf16_test.cpp13
-rw-r--r--libc/test/src/math/smoke/fminimum_magf16_test.cpp13
-rw-r--r--libc/test/src/math/smoke/fminimum_numf16_test.cpp13
-rw-r--r--libc/test/src/math/smoke/fminimumf16_test.cpp13
-rw-r--r--libc/test/src/math/smoke/nextafterf16_test.cpp13
-rw-r--r--libc/test/src/math/smoke/nextdownf16_test.cpp13
-rw-r--r--libc/test/src/math/smoke/nexttowardf16_test.cpp13
-rw-r--r--libc/test/src/math/smoke/nextupf16_test.cpp13
-rw-r--r--libc/test/src/stdlib/CMakeLists.txt15
-rw-r--r--libc/test/src/stdlib/at_quick_exit_test.cpp90
-rw-r--r--libclc/CMakeLists.txt2
-rw-r--r--libcxx/include/__type_traits/datasizeof.h2
-rw-r--r--libcxx/test/std/experimental/simd/simd.class/simd_copy.pass.cpp4
-rw-r--r--lld/ELF/Writer.cpp2
-rw-r--r--lld/test/ELF/linkerscript/sections-nonalloc.s5
-rwxr-xr-xlldb/examples/python/crashlog.py35
-rw-r--r--lldb/examples/python/crashlog_scripted_process.py13
-rw-r--r--lldb/include/lldb/Expression/DWARFExpression.h13
-rw-r--r--lldb/include/lldb/Expression/DWARFExpressionList.h10
-rw-r--r--lldb/source/Core/ValueObject.cpp2
-rw-r--r--lldb/source/Core/ValueObjectVariable.cpp8
-rw-r--r--lldb/source/Expression/DWARFExpression.cpp906
-rw-r--r--lldb/source/Expression/DWARFExpressionList.cpp28
-rw-r--r--lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.h2
-rw-r--r--lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp421
-rw-r--r--lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h197
-rw-r--r--lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.cpp5
-rw-r--r--lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp67
-rw-r--r--lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h15
-rw-r--r--lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.h9
-rw-r--r--lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDwo.cpp2
-rw-r--r--lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDwo.h3
-rw-r--r--lldb/source/Plugins/SymbolFile/DWARF/UniqueDWARFASTType.cpp117
-rw-r--r--lldb/source/Plugins/SymbolFile/DWARF/UniqueDWARFASTType.h36
-rw-r--r--lldb/source/Symbol/Function.cpp17
-rw-r--r--lldb/source/Target/RegisterContextUnwind.cpp23
-rw-r--r--lldb/source/Target/StackFrame.cpp19
-rw-r--r--lldb/source/Target/Statistics.cpp10
-rw-r--r--lldb/source/Target/ThreadPlanStepOverRange.cpp2
-rw-r--r--lldb/test/API/functionalities/param_entry_vals/basic_entry_values/main.cpp7
-rw-r--r--lldb/test/API/lang/c/inlines/Makefile3
-rw-r--r--lldb/test/API/lang/c/inlines/TestRedefinitionsInInlines.py72
-rw-r--r--lldb/test/API/lang/c/inlines/main.c23
-rw-r--r--lldb/test/API/tools/lldb-dap/optimized/TestDAP_optimized.py2
-rw-r--r--lldb/test/API/tools/lldb-server/TestPtyServer.py1
-rw-r--r--lldb/test/Shell/ScriptInterpreter/Python/Crashlog/app_specific_backtrace_crashlog.test4
-rw-r--r--lldb/test/Shell/ScriptInterpreter/Python/Crashlog/interactive_crashlog_invalid_target.test2
-rw-r--r--lldb/test/Shell/ScriptInterpreter/Python/Crashlog/interactive_crashlog_json.test2
-rw-r--r--lldb/test/Shell/ScriptInterpreter/Python/Crashlog/interactive_crashlog_legacy.test2
-rw-r--r--lldb/test/Shell/ScriptInterpreter/Python/Crashlog/last_exception_backtrace_crashlog.test4
-rw-r--r--lldb/test/Shell/ScriptInterpreter/Python/Crashlog/lit.local.cfg5
-rw-r--r--lldb/test/Shell/ScriptInterpreter/Python/Crashlog/skipped_status_interactive_crashlog.test2
-rw-r--r--lldb/test/Shell/SymbolFile/DWARF/delayed-definition-die-searching.test36
-rw-r--r--lldb/test/Shell/SymbolFile/DWARF/x86/simple-template-names-context.cpp44
-rw-r--r--lldb/unittests/Expression/DWARFExpressionTest.cpp78
-rw-r--r--llvm/docs/AMDGPUUsage.rst15
-rw-r--r--llvm/docs/ReleaseNotes.rst4
-rw-r--r--llvm/include/llvm/Analysis/CodeMetrics.h9
-rw-r--r--llvm/include/llvm/Analysis/LoopInfo.h3
-rw-r--r--llvm/include/llvm/Analysis/VecFuncs.def36
-rw-r--r--llvm/include/llvm/AsmParser/LLToken.h1
-rw-r--r--llvm/include/llvm/BinaryFormat/ELF.h2
-rw-r--r--llvm/include/llvm/CodeGen/BasicTTIImpl.h3
-rw-r--r--llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h8
-rw-r--r--llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h41
-rw-r--r--llvm/include/llvm/CodeGen/ISDOpcodes.h2
-rw-r--r--llvm/include/llvm/CodeGen/MachineFunction.h9
-rw-r--r--llvm/include/llvm/CodeGen/SDPatternMatch.h55
-rw-r--r--llvm/include/llvm/CodeGen/SelectionDAG.h3
-rw-r--r--llvm/include/llvm/CodeGen/TargetLowering.h2
-rw-r--r--llvm/include/llvm/IR/CallingConv.h3
-rw-r--r--llvm/include/llvm/IR/InstrTypes.h17
-rw-r--r--llvm/include/llvm/IR/IntrinsicInst.h19
-rw-r--r--llvm/include/llvm/IR/Operator.h2
-rw-r--r--llvm/include/llvm/IR/RuntimeLibcalls.def5
-rw-r--r--llvm/include/llvm/InitializePasses.h1
-rw-r--r--llvm/include/llvm/MC/MCSymbolWasm.h11
-rw-r--r--llvm/include/llvm/ProfileData/MemProf.h14
-rw-r--r--llvm/include/llvm/Support/Error.h5
-rw-r--r--llvm/include/llvm/Target/GlobalISel/Combine.td37
-rw-r--r--llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td1
-rw-r--r--llvm/include/llvm/Target/TargetSelectionDAG.td6
-rw-r--r--llvm/include/llvm/TargetParser/AArch64TargetParser.h5
-rw-r--r--llvm/include/llvm/TargetParser/TargetParser.h1
-rw-r--r--llvm/include/llvm/Transforms/IPO/Attributor.h12
-rw-r--r--llvm/include/llvm/Transforms/IPO/ExpandVariadics.h40
-rw-r--r--llvm/include/llvm/Transforms/Utils/UnrollLoop.h7
-rw-r--r--llvm/lib/Analysis/CodeMetrics.cpp53
-rw-r--r--llvm/lib/Analysis/LoopInfo.cpp20
-rw-r--r--llvm/lib/Analysis/ValueTracking.cpp11
-rw-r--r--llvm/lib/Analysis/VectorUtils.cpp1
-rw-r--r--llvm/lib/AsmParser/LLLexer.cpp1
-rw-r--r--llvm/lib/AsmParser/LLParser.cpp4
-rw-r--r--llvm/lib/CodeGen/GlobalISel/CombinerHelperVectorOps.cpp86
-rw-r--r--llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp5
-rw-r--r--llvm/lib/CodeGen/GlobalISel/Utils.cpp2
-rw-r--r--llvm/lib/CodeGen/RegisterCoalescer.cpp5
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp16
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp7
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp24
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h4
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp11
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp3
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp8
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp17
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp2
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp10
-rw-r--r--llvm/lib/CodeGen/TargetLoweringBase.cpp4
-rw-r--r--llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp32
-rw-r--r--llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp2
-rw-r--r--llvm/lib/IR/AsmWriter.cpp3
-rw-r--r--llvm/lib/IR/AutoUpgrade.cpp4
-rw-r--r--llvm/lib/MC/WasmObjectWriter.cpp5
-rw-r--r--llvm/lib/Object/ELFObjectFile.cpp2
-rw-r--r--llvm/lib/ObjectYAML/ELFYAML.cpp1
-rw-r--r--llvm/lib/Passes/PassBuilder.cpp1
-rw-r--r--llvm/lib/Passes/PassRegistry.def1
-rw-r--r--llvm/lib/ProfileData/MemProfReader.cpp2
-rw-r--r--llvm/lib/Support/VirtualFileSystem.cpp61
-rw-r--r--llvm/lib/Target/AArch64/AArch64.td5
-rw-r--r--llvm/lib/Target/AArch64/AArch64CallingConvention.td8
-rw-r--r--llvm/lib/Target/AArch64/AArch64Processors.td30
-rw-r--r--llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp33
-rw-r--r--llvm/lib/Target/AArch64/AArch64SchedOryon.td1659
-rw-r--r--llvm/lib/Target/AArch64/AArch64Subtarget.cpp7
-rw-r--r--llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp19
-rw-r--r--llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h3
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPU.td6
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp5
-rw-r--r--llvm/lib/Target/AMDGPU/GCNProcessors.td6
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp7
-rw-r--r--llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp103
-rw-r--r--llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp103
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp14
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.h3
-rw-r--r--llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp22
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp12
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchISelLowering.h1
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp2
-rw-r--r--llvm/lib/Target/NVPTX/NVPTX.h7
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp32
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp6
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXUtilities.cpp57
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXUtilities.h16
-rw-r--r--llvm/lib/Target/NVPTX/NVVMIntrRange.cpp197
-rw-r--r--llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp5
-rw-r--r--llvm/lib/Target/PowerPC/PPCFastISel.cpp16
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp11
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstr64Bit.td6
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrInfo.td6
-rw-r--r--llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp16
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td73
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp47
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVBuiltins.td1
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp149
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h16
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp6
-rw-r--r--llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp12
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp3
-rw-r--r--llvm/lib/Target/X86/X86ISelDAGToDAG.cpp9
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp31
-rw-r--r--llvm/lib/Target/X86/X86InstrAMX.td2
-rw-r--r--llvm/lib/Target/X86/X86LowerAMXType.cpp16
-rw-r--r--llvm/lib/Target/X86/X86LowerTileCopy.cpp23
-rw-r--r--llvm/lib/Target/X86/X86MachineFunctionInfo.h12
-rw-r--r--llvm/lib/Target/X86/X86SchedIceLake.td18
-rw-r--r--llvm/lib/Target/X86/X86SchedSkylakeServer.td4
-rw-r--r--llvm/lib/TargetParser/Host.cpp1
-rw-r--r--llvm/lib/TargetParser/TargetParser.cpp4
-rw-r--r--llvm/lib/Transforms/IPO/AttributorAttributes.cpp17
-rw-r--r--llvm/lib/Transforms/IPO/CMakeLists.txt1
-rw-r--r--llvm/lib/Transforms/IPO/ExpandVariadics.cpp1012
-rw-r--r--llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp251
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp25
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp6
-rw-r--r--llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp56
-rw-r--r--llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp3
-rw-r--r--llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp8
-rw-r--r--llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp57
-rw-r--r--llvm/lib/Transforms/Utils/InlineFunction.cpp1
-rw-r--r--llvm/lib/Transforms/Utils/LoopRotationUtils.cpp2
-rw-r--r--llvm/lib/Transforms/Utils/LoopUnroll.cpp47
-rw-r--r--llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp17
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h7
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp3
-rw-r--r--llvm/test/Analysis/ScalarEvolution/trip-count-unknown-stride.ll221
-rw-r--r--llvm/test/CodeGen/AArch64/GlobalISel/combine-vscale.mir113
-rw-r--r--llvm/test/CodeGen/AArch64/arm64-2011-10-18-LdStOptBug.ll2
-rw-r--r--llvm/test/CodeGen/AArch64/arm64-ldp-cluster.ll12
-rw-r--r--llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-predicated-scalable.ll112
-rw-r--r--llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-scalable.ll126
-rw-r--r--llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions.ll17
-rw-r--r--llvm/test/CodeGen/AArch64/sme-support-routines-calling-convention.ll20
-rw-r--r--llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll14
-rw-r--r--llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll14
-rw-r--r--llvm/test/CodeGen/AArch64/zext-to-tbl.ll195
-rw-r--r--llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll2
-rw-r--r--llvm/test/CodeGen/AMDGPU/elf-header-flags-mach.ll2
-rw-r--r--llvm/test/CodeGen/AMDGPU/expand-variadic-call.ll545
-rw-r--r--llvm/test/CodeGen/AMDGPU/fmaximum3.ll600
-rw-r--r--llvm/test/CodeGen/AMDGPU/fminimum3.ll600
-rw-r--r--llvm/test/CodeGen/AMDGPU/llc-pipeline.ll5
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.maximum.f16.ll226
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.maximum.f32.ll3449
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.maximum.f64.ll6331
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.minimum.f16.ll88
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.minimum.f32.ll3449
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.minimum.f64.ll6331
-rw-r--r--llvm/test/CodeGen/AMDGPU/occupancy-levels.ll2
-rw-r--r--llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.mir165
-rw-r--r--llvm/test/CodeGen/AMDGPU/unsupported-calls.ll19
-rw-r--r--llvm/test/CodeGen/ARM/neon_vabd.ll890
-rw-r--r--llvm/test/CodeGen/ARM/vaba.ll193
-rw-r--r--llvm/test/CodeGen/ARM/vabd.ll194
-rw-r--r--llvm/test/CodeGen/LoongArch/fp16-promote.ll326
-rw-r--r--llvm/test/CodeGen/LoongArch/sextw-removal.ll32
-rw-r--r--llvm/test/CodeGen/NVPTX/intr-range.ll88
-rw-r--r--llvm/test/CodeGen/NVPTX/intrinsic-old.ll51
-rw-r--r--llvm/test/CodeGen/PowerPC/toc-data-common.ll136
-rw-r--r--llvm/test/CodeGen/PowerPC/toc-data.ll18
-rw-r--r--llvm/test/CodeGen/PowerPC/tocdata-firm-alignment.ll24
-rw-r--r--llvm/test/CodeGen/RISCV/GlobalISel/legalizer/merge-unmerge-rv32.mir10
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector-csr.ll4
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector-csr.ll4
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vmsbf.ll16
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vmsif.ll16
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vmsof.ll16
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vsll.ll16
-rw-r--r--llvm/test/CodeGen/SPIRV/event-wait-ptr-type.ll16
-rw-r--r--llvm/test/CodeGen/SPIRV/passes/SPIRVEmitIntrinsics-no-duplicate-spv_assign_type.ll4
-rw-r--r--llvm/test/CodeGen/SPIRV/transcoding/OpGroupAsyncCopy-strided.ll36
-rw-r--r--llvm/test/CodeGen/SPIRV/transcoding/check_ro_qualifier.ll2
-rw-r--r--llvm/test/CodeGen/Thumb2/mve-tailpred-vptblock.ll25
-rw-r--r--llvm/test/CodeGen/WebAssembly/expand-variadic-call.ll484
-rw-r--r--llvm/test/CodeGen/WebAssembly/simd-arith.ll220
-rw-r--r--llvm/test/CodeGen/WebAssembly/vararg-frame.ll526
-rw-r--r--llvm/test/CodeGen/X86/avx512fp16-fminimum-fmaximum.ll79
-rw-r--r--llvm/test/CodeGen/X86/llvm.tan.ll70
-rw-r--r--llvm/test/CodeGen/X86/vec-libcalls.ll202
-rw-r--r--llvm/test/DebugInfo/X86/sdag-order.ll46
-rw-r--r--llvm/test/DebugInfo/symbolize-gnu-debuglink-no-realpath.test1
-rw-r--r--llvm/test/Instrumentation/HWAddressSanitizer/stack-safety-analysis.ll23
-rw-r--r--llvm/test/MC/AMDGPU/gfx1150_asm_features.s1
-rw-r--r--llvm/test/MC/RISCV/relocations.s2
-rw-r--r--llvm/test/MC/WebAssembly/reloc-pic64.s3
-rw-r--r--llvm/test/Object/AMDGPU/elf-header-flags-mach.yaml7
-rw-r--r--llvm/test/Other/can-execute.txt1
-rw-r--r--llvm/test/Other/lit-unicode.txt1
-rw-r--r--llvm/test/Transforms/ConstraintElimination/induction-condition-in-loop-exit.ll443
-rw-r--r--llvm/test/Transforms/ExpandVariadics/expand-va-intrinsic-split-linkage.ll232
-rw-r--r--llvm/test/Transforms/ExpandVariadics/expand-va-intrinsic-split-simple.ll214
-rw-r--r--llvm/test/Transforms/ExpandVariadics/indirect-calls.ll59
-rw-r--r--llvm/test/Transforms/ExpandVariadics/intrinsics.ll120
-rw-r--r--llvm/test/Transforms/ExpandVariadics/invoke.ll89
-rw-r--r--llvm/test/Transforms/ExpandVariadics/pass-byval-byref.ll153
-rw-r--r--llvm/test/Transforms/ExpandVariadics/pass-indirect.ll59
-rw-r--r--llvm/test/Transforms/ExpandVariadics/pass-integers.ll345
-rw-r--r--llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sge-to-icmp-sle.ll3
-rw-r--r--llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sgt-to-icmp-sgt.ll3
-rw-r--r--llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sle-to-icmp-sle.ll3
-rw-r--r--llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-slt-to-icmp-sgt.ll3
-rw-r--r--llvm/test/Transforms/InstCombine/icmp-and-lowbit-mask.ll72
-rw-r--r--llvm/test/Transforms/InstCombine/icmp-of-and-x.ll38
-rw-r--r--llvm/test/Transforms/InstCombine/icmp-of-or-x.ll26
-rw-r--r--llvm/test/Transforms/InstCombine/select.ll28
-rw-r--r--llvm/test/Transforms/LoopStrengthReduce/AArch64/lsr-reuse.ll6
-rw-r--r--llvm/test/Transforms/LoopStrengthReduce/RISCV/many-geps.ll109
-rw-r--r--llvm/test/Transforms/LoopUnroll/convergent.controlled.ll562
-rw-r--r--llvm/test/Transforms/LoopVectorize/X86/amdlibm-calls.ll113
-rw-r--r--llvm/test/Transforms/LoopVectorize/X86/libm-vector-calls-VF2-VF8.ll115
-rw-r--r--llvm/test/Transforms/LoopVectorize/X86/svml-calls.ll46
-rw-r--r--llvm/test/Transforms/LoopVectorize/X86/veclib-calls.ll25
-rw-r--r--llvm/test/tools/llvm-cov/gcov/intermediate-format.test2
-rw-r--r--llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx1.s82
-rw-r--r--llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx2.s50
-rw-r--r--llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-sse3.s18
-rw-r--r--llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-ssse3.s50
-rw-r--r--llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx1.s34
-rw-r--r--llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-sse3.s18
-rw-r--r--llvm/test/tools/llvm-objdump/ELF/AMDGPU/subtarget.ll5
-rw-r--r--llvm/test/tools/llvm-rc/windres-prefix.test2
-rw-r--r--llvm/test/tools/llvm-readobj/ELF/AMDGPU/elf-headers.test9
-rw-r--r--llvm/test/tools/llvm-reduce/remove-debug-info-nodes.ll8
-rw-r--r--llvm/test/tools/split-file/output-is-special.test1
-rw-r--r--llvm/tools/llvm-readobj/ELFDumper.cpp1
-rw-r--r--llvm/tools/llvm-reduce/deltas/ReduceDIMetadata.cpp13
-rw-r--r--llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp5
-rw-r--r--llvm/unittests/Support/VirtualFileSystemTest.cpp5
-rw-r--r--llvm/unittests/TargetParser/Host.cpp3
-rw-r--r--llvm/unittests/TargetParser/TargetParserTest.cpp16
-rw-r--r--llvm/utils/gn/secondary/bolt/lib/Core/BUILD.gn1
-rw-r--r--llvm/utils/gn/secondary/clang/lib/AST/BUILD.gn1
-rw-r--r--llvm/utils/gn/secondary/llvm/lib/Transforms/IPO/BUILD.gn1
-rw-r--r--llvm/utils/lit/lit/llvm/config.py5
-rw-r--r--mlir/include/mlir-c/IR.h3
-rw-r--r--mlir/include/mlir/Dialect/LLVMIR/FunctionCallUtils.h38
-rw-r--r--mlir/include/mlir/Dialect/Tensor/TransformOps/TensorTransformOps.td4
-rw-r--r--mlir/include/mlir/Dialect/Tensor/Transforms/Transforms.h5
-rw-r--r--mlir/include/mlir/Target/LLVMIR/Export.h9
-rw-r--r--mlir/lib/Bindings/Python/IRCore.cpp13
-rw-r--r--mlir/lib/CAPI/IR/IR.cpp4
-rw-r--r--mlir/lib/Conversion/MemRefToLLVM/AllocLikeConversion.cpp16
-rw-r--r--mlir/lib/Dialect/LLVMIR/IR/FunctionCallUtils.cpp46
-rw-r--r--mlir/lib/Dialect/Tensor/TransformOps/TensorTransformOps.cpp15
-rw-r--r--mlir/lib/Dialect/Tensor/Transforms/PackAndUnpackPatterns.cpp123
-rw-r--r--mlir/lib/Dialect/Tensor/Transforms/RewriteAsConstant.cpp165
-rw-r--r--mlir/lib/Dialect/Utils/IndexingUtils.cpp2
-rw-r--r--mlir/lib/Dialect/Vector/Transforms/VectorTransferOpTransforms.cpp143
-rw-r--r--mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp2
-rw-r--r--mlir/lib/IR/AsmPrinter.cpp11
-rw-r--r--mlir/test/Dialect/Tensor/fold-into-pack-and-unpack.mlir139
-rw-r--r--mlir/test/Dialect/Tensor/rewrite-as-constant.mlir135
-rw-r--r--mlir/test/Dialect/Vector/vector-transfer-flatten.mlir43
-rw-r--r--mlir/test/IR/array-of-attr.mlir4
-rw-r--r--mlir/test/python/ir/blocks.py32
-rw-r--r--offload/DeviceRTL/CMakeLists.txt2
-rw-r--r--offload/include/PluginManager.h27
-rw-r--r--offload/plugins-nextgen/amdgpu/src/rtl.cpp29
-rw-r--r--offload/plugins-nextgen/common/include/PluginInterface.h34
-rw-r--r--offload/plugins-nextgen/common/src/PluginInterface.cpp72
-rw-r--r--offload/plugins-nextgen/cuda/src/rtl.cpp55
-rw-r--r--offload/plugins-nextgen/host/src/rtl.cpp4
-rw-r--r--offload/src/PluginManager.cpp235
-rw-r--r--offload/src/omptarget.cpp2
-rw-r--r--offload/test/offloading/ompx_bare_shfl_down_sync.cpp2
-rw-r--r--openmp/CMakeLists.txt6
-rw-r--r--third-party/unittest/googletest/include/gtest/internal/gtest-port.h2
-rw-r--r--utils/bazel/llvm-project-overlay/libc/BUILD.bazel7
-rw-r--r--utils/bazel/llvm-project-overlay/libc/test/src/__support/BUILD.bazel1
-rw-r--r--utils/bazel/llvm-project-overlay/libc/test/src/math/BUILD.bazel2
-rw-r--r--utils/bazel/llvm-project-overlay/libc/test/src/math/libc_math_test_rules.bzl3
-rw-r--r--utils/bazel/llvm-project-overlay/libc/test/src/math/smoke/BUILD.bazel3
-rw-r--r--utils/bazel/llvm-project-overlay/libc/utils/MPFRWrapper/BUILD.bazel1
-rw-r--r--utils/bazel/llvm-project-overlay/llvm/BUILD.bazel8
566 files changed, 25499 insertions, 21850 deletions
diff --git a/.github/workflows/containers/github-action-ci/stage1.Dockerfile b/.github/workflows/containers/github-action-ci/stage1.Dockerfile
index fbc4548..8c6bcf4 100644
--- a/.github/workflows/containers/github-action-ci/stage1.Dockerfile
+++ b/.github/workflows/containers/github-action-ci/stage1.Dockerfile
@@ -37,7 +37,7 @@ RUN cmake -B ./build -G Ninja ./llvm \
-DLLVM_ENABLE_RUNTIMES="compiler-rt" \
-DCMAKE_INSTALL_PREFIX="$LLVM_SYSROOT" \
-DLLVM_ENABLE_PROJECTS="bolt;clang;lld;clang-tools-extra" \
- -DLLVM_DISTRIBUTION_COMPONENTS="lld;compiler-rt;clang-format" \
+ -DLLVM_DISTRIBUTION_COMPONENTS="lld;compiler-rt;clang-format;scan-build" \
-DCLANG_DEFAULT_LINKER="lld" \
-DBOOTSTRAP_CLANG_PGO_TRAINING_DATA_SOURCE_DIR=/llvm-project-llvmorg-$LLVM_VERSION/llvm
diff --git a/bolt/include/bolt/Core/GDBIndex.h b/bolt/include/bolt/Core/GDBIndex.h
new file mode 100644
index 0000000..6604c2a
--- /dev/null
+++ b/bolt/include/bolt/Core/GDBIndex.h
@@ -0,0 +1,61 @@
+//===-- bolt/Core/GDBIndex.h - GDB Index support ----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// This file contains declaration of classes required for generation of
+/// .gdb_index section.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef BOLT_CORE_GDB_INDEX_H
+#define BOLT_CORE_GDB_INDEX_H
+
+#include "bolt/Core/BinaryContext.h"
+#include <vector>
+
+namespace llvm {
+namespace bolt {
+
+class GDBIndex {
+public:
+ /// Contains information about TU so we can write out correct entries in GDB
+ /// index.
+ struct GDBIndexTUEntry {
+ uint64_t UnitOffset;
+ uint64_t TypeHash;
+ uint64_t TypeDIERelativeOffset;
+ };
+
+private:
+ BinaryContext &BC;
+
+ /// Entries for GDB Index Types CU List.
+ using GDBIndexTUEntryType = std::vector<GDBIndexTUEntry>;
+ GDBIndexTUEntryType GDBIndexTUEntryVector;
+
+public:
+ GDBIndex(BinaryContext &BC) : BC(BC) {}
+
+ std::mutex GDBIndexMutex;
+
+ /// Adds an GDBIndexTUEntry if .gdb_index section exists.
+ void addGDBTypeUnitEntry(const GDBIndexTUEntry &&Entry);
+
+ /// Rewrite .gdb_index section if present.
+ void updateGdbIndexSection(const CUOffsetMap &CUMap, const uint32_t NumCUs,
+ DebugARangesSectionWriter &ARangesSectionWriter);
+
+ /// Returns all entries needed for Types CU list.
+ const GDBIndexTUEntryType &getGDBIndexTUEntryVector() const {
+ return GDBIndexTUEntryVector;
+ }
+};
+
+} // namespace bolt
+} // namespace llvm
+
+#endif
diff --git a/bolt/lib/Core/CMakeLists.txt b/bolt/lib/Core/CMakeLists.txt
index 441df9f..873cf67 100644
--- a/bolt/lib/Core/CMakeLists.txt
+++ b/bolt/lib/Core/CMakeLists.txt
@@ -25,6 +25,7 @@ add_llvm_library(LLVMBOLTCore
DynoStats.cpp
Exceptions.cpp
FunctionLayout.cpp
+ GDBIndex.cpp
HashUtilities.cpp
JumpTable.cpp
MCPlusBuilder.cpp
diff --git a/bolt/lib/Core/GDBIndex.cpp b/bolt/lib/Core/GDBIndex.cpp
new file mode 100644
index 0000000..9e6d241
--- /dev/null
+++ b/bolt/lib/Core/GDBIndex.cpp
@@ -0,0 +1,185 @@
+//===- bolt/Core/GDBIndex.cpp - GDB Index support ------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "bolt/Core/GDBIndex.h"
+
+using namespace llvm::bolt;
+using namespace llvm::support::endian;
+
+void GDBIndex::addGDBTypeUnitEntry(const GDBIndexTUEntry &&Entry) {
+ std::lock_guard<std::mutex> Lock(GDBIndexMutex);
+ if (!BC.getGdbIndexSection())
+ return;
+ GDBIndexTUEntryVector.emplace_back(Entry);
+}
+
+void GDBIndex::updateGdbIndexSection(
+ const CUOffsetMap &CUMap, const uint32_t NumCUs,
+ DebugARangesSectionWriter &ARangesSectionWriter) {
+ if (!BC.getGdbIndexSection())
+ return;
+
+ // See https://sourceware.org/gdb/onlinedocs/gdb/Index-Section-Format.html
+ // for .gdb_index section format.
+
+ StringRef GdbIndexContents = BC.getGdbIndexSection()->getContents();
+
+ const char *Data = GdbIndexContents.data();
+
+ // Parse the header.
+ const uint32_t Version = read32le(Data);
+ if (Version != 7 && Version != 8) {
+ errs() << "BOLT-ERROR: can only process .gdb_index versions 7 and 8\n";
+ exit(1);
+ }
+
+ // Some .gdb_index generators use file offsets while others use section
+ // offsets. Hence we can only rely on offsets relative to each other,
+ // and ignore their absolute values.
+ const uint32_t CUListOffset = read32le(Data + 4);
+ const uint32_t CUTypesOffset = read32le(Data + 8);
+ const uint32_t AddressTableOffset = read32le(Data + 12);
+ const uint32_t SymbolTableOffset = read32le(Data + 16);
+ const uint32_t ConstantPoolOffset = read32le(Data + 20);
+ Data += 24;
+
+ // Map CUs offsets to indices and verify existing index table.
+ std::map<uint32_t, uint32_t> OffsetToIndexMap;
+ const uint32_t CUListSize = CUTypesOffset - CUListOffset;
+ const uint32_t TUListSize = AddressTableOffset - CUTypesOffset;
+ const unsigned NUmCUsEncoded = CUListSize / 16;
+ unsigned MaxDWARFVersion = BC.DwCtx->getMaxVersion();
+ unsigned NumDWARF5TUs =
+ getGDBIndexTUEntryVector().size() - BC.DwCtx->getNumTypeUnits();
+ bool SkipTypeUnits = false;
+ // For DWARF5 Types are in .debug_info.
+ // LLD doesn't generate Types CU List, and in CU list offset
+ // only includes CUs.
+ // GDB 11+ includes only CUs in CU list and generates Types
+ // list.
+ // GDB 9 includes CUs and TUs in CU list and generates TYpes
+ // list. The NumCUs is CUs + TUs, so need to modify the check.
+ // For split-dwarf
+ // GDB-11, DWARF5: TU units from dwo are not included.
+ // GDB-11, DWARF4: TU units from dwo are included.
+ if (MaxDWARFVersion >= 5)
+ SkipTypeUnits = !TUListSize ? true
+ : ((NUmCUsEncoded + NumDWARF5TUs) ==
+ BC.DwCtx->getNumCompileUnits());
+
+ if (!((CUListSize == NumCUs * 16) ||
+ (CUListSize == (NumCUs + NumDWARF5TUs) * 16))) {
+ errs() << "BOLT-ERROR: .gdb_index: CU count mismatch\n";
+ exit(1);
+ }
+ DenseSet<uint64_t> OriginalOffsets;
+ for (unsigned Index = 0, Units = BC.DwCtx->getNumCompileUnits();
+ Index < Units; ++Index) {
+ const DWARFUnit *CU = BC.DwCtx->getUnitAtIndex(Index);
+ if (SkipTypeUnits && CU->isTypeUnit())
+ continue;
+ const uint64_t Offset = read64le(Data);
+ Data += 16;
+ if (CU->getOffset() != Offset) {
+ errs() << "BOLT-ERROR: .gdb_index CU offset mismatch\n";
+ exit(1);
+ }
+
+ OriginalOffsets.insert(Offset);
+ OffsetToIndexMap[Offset] = Index;
+ }
+
+ // Ignore old address table.
+ const uint32_t OldAddressTableSize = SymbolTableOffset - AddressTableOffset;
+ // Move Data to the beginning of symbol table.
+ Data += SymbolTableOffset - CUTypesOffset;
+
+ // Calculate the size of the new address table.
+ uint32_t NewAddressTableSize = 0;
+ for (const auto &CURangesPair : ARangesSectionWriter.getCUAddressRanges()) {
+ const SmallVector<DebugAddressRange, 2> &Ranges = CURangesPair.second;
+ NewAddressTableSize += Ranges.size() * 20;
+ }
+
+ // Difference between old and new table (and section) sizes.
+ // Could be negative.
+ int32_t Delta = NewAddressTableSize - OldAddressTableSize;
+
+ size_t NewGdbIndexSize = GdbIndexContents.size() + Delta;
+
+ // Free'd by ExecutableFileMemoryManager.
+ auto *NewGdbIndexContents = new uint8_t[NewGdbIndexSize];
+ uint8_t *Buffer = NewGdbIndexContents;
+
+ write32le(Buffer, Version);
+ write32le(Buffer + 4, CUListOffset);
+ write32le(Buffer + 8, CUTypesOffset);
+ write32le(Buffer + 12, AddressTableOffset);
+ write32le(Buffer + 16, SymbolTableOffset + Delta);
+ write32le(Buffer + 20, ConstantPoolOffset + Delta);
+ Buffer += 24;
+
+ using MapEntry = std::pair<uint32_t, CUInfo>;
+ std::vector<MapEntry> CUVector(CUMap.begin(), CUMap.end());
+ // Need to sort since we write out all of TUs in .debug_info before CUs.
+ std::sort(CUVector.begin(), CUVector.end(),
+ [](const MapEntry &E1, const MapEntry &E2) -> bool {
+ return E1.second.Offset < E2.second.Offset;
+ });
+ // Writing out CU List <Offset, Size>
+ for (auto &CUInfo : CUVector) {
+ // Skipping TU for DWARF5 when they are not included in CU list.
+ if (!OriginalOffsets.count(CUInfo.first))
+ continue;
+ write64le(Buffer, CUInfo.second.Offset);
+ // Length encoded in CU doesn't contain first 4 bytes that encode length.
+ write64le(Buffer + 8, CUInfo.second.Length + 4);
+ Buffer += 16;
+ }
+
+ // Rewrite TU CU List, since abbrevs can be different.
+ // Entry example:
+ // 0: offset = 0x00000000, type_offset = 0x0000001e, type_signature =
+ // 0x418503b8111e9a7b Spec says " triplet, the first value is the CU offset,
+ // the second value is the type offset in the CU, and the third value is the
+ // type signature" Looking at what is being generated by gdb-add-index. The
+ // first entry is TU offset, second entry is offset from it, and third entry
+ // is the type signature.
+ if (TUListSize)
+ for (const GDBIndexTUEntry &Entry : getGDBIndexTUEntryVector()) {
+ write64le(Buffer, Entry.UnitOffset);
+ write64le(Buffer + 8, Entry.TypeDIERelativeOffset);
+ write64le(Buffer + 16, Entry.TypeHash);
+ Buffer += sizeof(GDBIndexTUEntry);
+ }
+
+ // Generate new address table.
+ for (const std::pair<const uint64_t, DebugAddressRangesVector> &CURangesPair :
+ ARangesSectionWriter.getCUAddressRanges()) {
+ const uint32_t CUIndex = OffsetToIndexMap[CURangesPair.first];
+ const DebugAddressRangesVector &Ranges = CURangesPair.second;
+ for (const DebugAddressRange &Range : Ranges) {
+ write64le(Buffer, Range.LowPC);
+ write64le(Buffer + 8, Range.HighPC);
+ write32le(Buffer + 16, CUIndex);
+ Buffer += 20;
+ }
+ }
+
+ const size_t TrailingSize =
+ GdbIndexContents.data() + GdbIndexContents.size() - Data;
+ assert(Buffer + TrailingSize == NewGdbIndexContents + NewGdbIndexSize &&
+ "size calculation error");
+
+ // Copy over the rest of the original data.
+ memcpy(Buffer, Data, TrailingSize);
+
+ // Register the new section.
+ BC.registerOrUpdateNoteSection(".gdb_index", NewGdbIndexContents,
+ NewGdbIndexSize);
+}
diff --git a/clang-tools-extra/clang-tidy/misc/CMakeLists.txt b/clang-tools-extra/clang-tidy/misc/CMakeLists.txt
index 35e29b9..36fcd8f 100644
--- a/clang-tools-extra/clang-tidy/misc/CMakeLists.txt
+++ b/clang-tools-extra/clang-tidy/misc/CMakeLists.txt
@@ -43,7 +43,6 @@ add_clang_library(clangTidyMiscModule
UseAnonymousNamespaceCheck.cpp
LINK_LIBS
- clangAnalysis
clangTidy
clangTidyUtils
diff --git a/clang-tools-extra/clang-tidy/readability/ContainerSizeEmptyCheck.cpp b/clang-tools-extra/clang-tidy/readability/ContainerSizeEmptyCheck.cpp
index bbc1b47..bf7a847 100644
--- a/clang-tools-extra/clang-tidy/readability/ContainerSizeEmptyCheck.cpp
+++ b/clang-tools-extra/clang-tidy/readability/ContainerSizeEmptyCheck.cpp
@@ -96,9 +96,14 @@ AST_MATCHER(QualType, isIntegralType) {
AST_MATCHER_P(UserDefinedLiteral, hasLiteral,
clang::ast_matchers::internal::Matcher<Expr>, InnerMatcher) {
- if (const Expr *CookedLiteral = Node.getCookedLiteral()) {
+ const UserDefinedLiteral::LiteralOperatorKind LOK =
+ Node.getLiteralOperatorKind();
+ if (LOK == UserDefinedLiteral::LOK_Template ||
+ LOK == UserDefinedLiteral::LOK_Raw)
+ return false;
+
+ if (const Expr *CookedLiteral = Node.getCookedLiteral())
return InnerMatcher.matches(*CookedLiteral, Finder, Builder);
- }
return false;
}
diff --git a/clang-tools-extra/clang-tidy/readability/RedundantMemberInitCheck.cpp b/clang-tools-extra/clang-tidy/readability/RedundantMemberInitCheck.cpp
index 015347e..601ff44 100644
--- a/clang-tools-extra/clang-tidy/readability/RedundantMemberInitCheck.cpp
+++ b/clang-tools-extra/clang-tidy/readability/RedundantMemberInitCheck.cpp
@@ -41,25 +41,35 @@ void RedundantMemberInitCheck::storeOptions(ClangTidyOptions::OptionMap &Opts) {
void RedundantMemberInitCheck::registerMatchers(MatchFinder *Finder) {
auto ConstructorMatcher =
- cxxConstructExpr(argumentCountIs(0),
- hasDeclaration(cxxConstructorDecl(ofClass(cxxRecordDecl(
- unless(isTriviallyDefaultConstructible()))))))
+ cxxConstructExpr(
+ argumentCountIs(0),
+ hasDeclaration(cxxConstructorDecl(
+ ofClass(cxxRecordDecl(unless(isTriviallyDefaultConstructible()))
+ .bind("class")))))
.bind("construct");
+ auto HasUnionAsParent = hasParent(recordDecl(isUnion()));
+
+ auto HasTypeEqualToConstructorClass = hasType(qualType(
+ hasCanonicalType(qualType(hasDeclaration(equalsBoundNode("class"))))));
+
Finder->addMatcher(
cxxConstructorDecl(
unless(isDelegatingConstructor()), ofClass(unless(isUnion())),
forEachConstructorInitializer(
- cxxCtorInitializer(withInitializer(ConstructorMatcher),
- unless(forField(fieldDecl(
- anyOf(hasType(isConstQualified()),
- hasParent(recordDecl(isUnion())))))))
+ cxxCtorInitializer(
+ withInitializer(ConstructorMatcher),
+ anyOf(isBaseInitializer(),
+ forField(fieldDecl(unless(hasType(isConstQualified())),
+ unless(HasUnionAsParent),
+ HasTypeEqualToConstructorClass))))
.bind("init")))
.bind("constructor"),
this);
Finder->addMatcher(fieldDecl(hasInClassInitializer(ConstructorMatcher),
- unless(hasParent(recordDecl(isUnion()))))
+ HasTypeEqualToConstructorClass,
+ unless(HasUnionAsParent))
.bind("field"),
this);
}
diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst
index 33b65ca..661b2b1 100644
--- a/clang-tools-extra/docs/ReleaseNotes.rst
+++ b/clang-tools-extra/docs/ReleaseNotes.rst
@@ -376,6 +376,7 @@ Changes in existing checks
- Improved :doc:`readability-container-size-empty
<clang-tidy/checks/readability/container-size-empty>` check to prevent false
positives when utilizing ``size`` or ``length`` methods that accept parameter.
+ Fixed crash when facing template user defined literals.
- Improved :doc:`readability-duplicate-include
<clang-tidy/checks/readability/duplicate-include>` check by excluding include
@@ -403,6 +404,11 @@ Changes in existing checks
<clang-tidy/checks/readability/redundant-inline-specifier>` check to properly
emit warnings for static data member with an in-class initializer.
+- Improved :doc:`readability-redundant-member-init
+ <clang-tidy/checks/readability/redundant-member-init>` check to avoid
+ false-positives when type of the member does not match the type of the
+ initializer.
+
- Improved :doc:`readability-static-accessed-through-instance
<clang-tidy/checks/readability/static-accessed-through-instance>` check to
support calls to overloaded operators as base expression and provide fixes to
diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability/container-size-empty.cpp b/clang-tools-extra/test/clang-tidy/checkers/readability/container-size-empty.cpp
index ecaf97f..4675527 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/readability/container-size-empty.cpp
+++ b/clang-tools-extra/test/clang-tidy/checkers/readability/container-size-empty.cpp
@@ -889,3 +889,9 @@ namespace PR88203 {
// CHECK-FIXES: {{^ }}if (s.empty()) {}{{$}}
}
}
+
+namespace PR94454 {
+ template <char...>
+ int operator""_ci() { return 0; }
+ auto eq = 0_ci == 0;
+}
diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability/redundant-member-init.cpp b/clang-tools-extra/test/clang-tidy/checkers/readability/redundant-member-init.cpp
index 17b2714..6f18a60 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/readability/redundant-member-init.cpp
+++ b/clang-tools-extra/test/clang-tidy/checkers/readability/redundant-member-init.cpp
@@ -302,3 +302,19 @@ struct D7 {
D7<int> d7i;
D7<S> d7s;
+
+struct SS {
+ SS() = default;
+ SS(S s) : s(s) {}
+
+ S s;
+};
+
+struct D8 {
+ SS ss = S();
+};
+
+struct D9 {
+ D9() : ss(S()) {}
+ SS ss;
+};
diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst
index 46f99d0..a49e412 100644
--- a/clang/docs/LanguageExtensions.rst
+++ b/clang/docs/LanguageExtensions.rst
@@ -4016,6 +4016,30 @@ Note that the `size` argument must be a compile time constant.
Note that this intrinsic cannot yet be called in a ``constexpr`` context.
+``__is_bitwise_cloneable``
+--------------------------
+
+A type trait is used to check whether a type can be safely copied by memcpy.
+
+**Syntax**:
+
+.. code-block:: c++
+
+ bool __is_bitwise_cloneable(Type)
+
+**Description**:
+
+Objects of bitwise cloneable types can be bitwise copied by memcpy/memmove. The
+Clang compiler warrants that this behavior is well defined, and won't be
+broken by compiler optimizations and sanitizers.
+
+For implicit-lifetime types, the lifetime of the new object is implicitly
+started after the copy. For other types (e.g., classes with virtual methods),
+the lifetime isn't started, and using the object results in undefined behavior
+according to the C++ Standard.
+
+This builtin can be used in constant expressions.
+
Atomic Min/Max builtins with memory ordering
--------------------------------------------
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 69ac081..b9c9070 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -340,6 +340,9 @@ Non-comprehensive list of changes in this release
``-Winvalid-constexpr`` is not enabled for the function definition, which
should result in mild compile-time performance improvements.
+- Added ``__is_bitwise_cloneable`` which is used to check whether a type
+ can be safely copied by memcpy/memmove.
+
New Compiler Flags
------------------
- ``-fsanitize=implicit-bitfield-conversion`` checks implicit truncation and
diff --git a/clang/include/clang/AST/OpenACCClause.h b/clang/include/clang/AST/OpenACCClause.h
index a4c82cd..ea1ffbc 100644
--- a/clang/include/clang/AST/OpenACCClause.h
+++ b/clang/include/clang/AST/OpenACCClause.h
@@ -867,7 +867,7 @@ public:
case OpenACCClauseKind::CLAUSE_NAME: \
Visit##CLAUSE_NAME##Clause(*cast<OpenACC##CLAUSE_NAME##Clause>(C)); \
return;
-#define CLAUSE_ALIAS(ALIAS_NAME, CLAUSE_NAME) \
+#define CLAUSE_ALIAS(ALIAS_NAME, CLAUSE_NAME, DEPRECATED) \
case OpenACCClauseKind::ALIAS_NAME: \
Visit##CLAUSE_NAME##Clause(*cast<OpenACC##CLAUSE_NAME##Clause>(C)); \
return;
diff --git a/clang/include/clang/AST/Type.h b/clang/include/clang/AST/Type.h
index 263b632df..9eb3f6c 100644
--- a/clang/include/clang/AST/Type.h
+++ b/clang/include/clang/AST/Type.h
@@ -1120,6 +1120,20 @@ public:
/// Return true if this is a trivially copyable type (C++0x [basic.types]p9)
bool isTriviallyCopyableType(const ASTContext &Context) const;
+ /// Return true if the type is safe to bitwise copy using memcpy/memmove.
+ ///
+ /// This is an extension in clang: bitwise cloneable types act as trivially
+ /// copyable types, meaning their underlying bytes can be safely copied by
+ /// memcpy or memmove. After the copy, the destination object has the same
+ /// object representation.
+ ///
+ /// However, there are cases where it is not safe to copy:
+ /// - When sanitizers, such as AddressSanitizer, add padding with poison,
+ /// which can cause issues if those poisoned padding bits are accessed.
+ /// - Types with Objective-C lifetimes, where specific runtime
+ /// semantics may not be preserved during a bitwise copy.
+ bool isBitwiseCloneableType(const ASTContext &Context) const;
+
/// Return true if this is a trivially copyable type
bool isTriviallyCopyConstructibleType(const ASTContext &Context) const;
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index 433c779..9e6800e 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -240,7 +240,7 @@ TARGET_BUILTIN(__builtin_amdgcn_flat_atomic_fadd_v2bf16, "V2sV2s*0V2s", "t", "at
TARGET_BUILTIN(__builtin_amdgcn_global_atomic_fadd_v2bf16, "V2sV2s*1V2s", "t", "atomic-global-pk-add-bf16-inst")
TARGET_BUILTIN(__builtin_amdgcn_ds_atomic_fadd_v2bf16, "V2sV2s*3V2s", "t", "atomic-ds-pk-add-16-insts")
TARGET_BUILTIN(__builtin_amdgcn_ds_atomic_fadd_v2f16, "V2hV2h*3V2h", "t", "atomic-ds-pk-add-16-insts")
-TARGET_BUILTIN(__builtin_amdgcn_global_load_lds, "vv*1v*3UiiUi", "t", "gfx940-insts")
+TARGET_BUILTIN(__builtin_amdgcn_global_load_lds, "vv*1v*3IUiIiIUi", "t", "gfx940-insts")
//===----------------------------------------------------------------------===//
// Deep learning builtins.
diff --git a/clang/include/clang/Basic/Cuda.h b/clang/include/clang/Basic/Cuda.h
index d15171d..0d5e38e 100644
--- a/clang/include/clang/Basic/Cuda.h
+++ b/clang/include/clang/Basic/Cuda.h
@@ -124,6 +124,7 @@ enum class CudaArch {
GFX1103,
GFX1150,
GFX1151,
+ GFX1152,
GFX12_GENERIC,
GFX1200,
GFX1201,
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 8774514..9f0b6f5 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -10082,6 +10082,12 @@ def warn_new_dangling_initializer_list : Warning<
"the allocated initializer list}0 "
"will be destroyed at the end of the full-expression">,
InGroup<DanglingInitializerList>;
+def warn_unsupported_lifetime_extension : Warning<
+ "lifetime extension of "
+ "%select{temporary|backing array of initializer list}0 created "
+ "by aggregate initialization using a default member initializer "
+ "is not yet supported; lifetime of %select{temporary|backing array}0 "
+ "will end at the end of the full-expression">, InGroup<Dangling>;
// For non-floating point, expressions of the form x == x or x != x
// should result in a warning, since these always evaluate to a constant.
diff --git a/clang/include/clang/Basic/OpenACCClauses.def b/clang/include/clang/Basic/OpenACCClauses.def
index 53f4cd1..85f4859 100644
--- a/clang/include/clang/Basic/OpenACCClauses.def
+++ b/clang/include/clang/Basic/OpenACCClauses.def
@@ -15,31 +15,31 @@
//
// VISIT_CLAUSE(CLAUSE_NAME)
//
-// CLAUSE_ALIAS(ALIAS_NAME, CLAUSE_NAME)
+// CLAUSE_ALIAS(ALIAS_NAME, CLAUSE_NAME, DEPRECATED)
#ifndef CLAUSE_ALIAS
-#define CLAUSE_ALIAS(ALIAS_NAME, CLAUSE_NAME)
+#define CLAUSE_ALIAS(ALIAS_NAME, CLAUSE_NAME, false)
#endif
VISIT_CLAUSE(Auto)
VISIT_CLAUSE(Async)
VISIT_CLAUSE(Attach)
VISIT_CLAUSE(Copy)
-CLAUSE_ALIAS(PCopy, Copy)
-CLAUSE_ALIAS(PresentOrCopy, Copy)
+CLAUSE_ALIAS(PCopy, Copy, true)
+CLAUSE_ALIAS(PresentOrCopy, Copy, true)
VISIT_CLAUSE(CopyIn)
-CLAUSE_ALIAS(PCopyIn, CopyIn)
-CLAUSE_ALIAS(PresentOrCopyIn, CopyIn)
+CLAUSE_ALIAS(PCopyIn, CopyIn, true)
+CLAUSE_ALIAS(PresentOrCopyIn, CopyIn, true)
VISIT_CLAUSE(CopyOut)
-CLAUSE_ALIAS(PCopyOut, CopyOut)
-CLAUSE_ALIAS(PresentOrCopyOut, CopyOut)
+CLAUSE_ALIAS(PCopyOut, CopyOut, true)
+CLAUSE_ALIAS(PresentOrCopyOut, CopyOut, true)
VISIT_CLAUSE(Create)
-CLAUSE_ALIAS(PCreate, Create)
-CLAUSE_ALIAS(PresentOrCreate, Create)
+CLAUSE_ALIAS(PCreate, Create, true)
+CLAUSE_ALIAS(PresentOrCreate, Create, true)
VISIT_CLAUSE(Default)
VISIT_CLAUSE(DevicePtr)
VISIT_CLAUSE(DeviceType)
-CLAUSE_ALIAS(DType, DeviceType)
+CLAUSE_ALIAS(DType, DeviceType, false)
VISIT_CLAUSE(FirstPrivate)
VISIT_CLAUSE(If)
VISIT_CLAUSE(Independent)
diff --git a/clang/include/clang/Basic/TokenKinds.def b/clang/include/clang/Basic/TokenKinds.def
index b5a0e9d..9c4b174 100644
--- a/clang/include/clang/Basic/TokenKinds.def
+++ b/clang/include/clang/Basic/TokenKinds.def
@@ -542,6 +542,8 @@ TYPE_TRAIT_2(__reference_converts_from_temporary, ReferenceConvertsFromTemporary
// is not exposed to users.
TYPE_TRAIT_2(/*EmptySpellingName*/, IsDeducible, KEYCXX)
+TYPE_TRAIT_1(__is_bitwise_cloneable, IsBitwiseCloneable, KEYALL)
+
// Embarcadero Expression Traits
EXPRESSION_TRAIT(__is_lvalue_expr, IsLValueExpr, KEYCXX)
EXPRESSION_TRAIT(__is_rvalue_expr, IsRValueExpr, KEYCXX)
diff --git a/clang/include/clang/Basic/riscv_vector.td b/clang/include/clang/Basic/riscv_vector.td
index cca4367..a0820e2 100644
--- a/clang/include/clang/Basic/riscv_vector.td
+++ b/clang/include/clang/Basic/riscv_vector.td
@@ -2637,7 +2637,8 @@ let UnMaskedPolicyScheme = HasPassthruOperand in {
defm vbrev : RVVOutBuiltinSetZvbb;
defm vclz : RVVOutBuiltinSetZvbb;
defm vctz : RVVOutBuiltinSetZvbb;
- defm vcpopv : RVVOutBuiltinSetZvbb;
+ let IRName = "vcpopv", MaskedIRName = "vcpopv_mask" in
+ defm vcpop : RVVOutBuiltinSetZvbb;
let OverloadedName = "vwsll" in
defm vwsll : RVVSignedWidenBinBuiltinSetVwsll;
}
diff --git a/clang/include/clang/Lex/DependencyDirectivesScanner.h b/clang/include/clang/Lex/DependencyDirectivesScanner.h
index 2f8354d..0e11590 100644
--- a/clang/include/clang/Lex/DependencyDirectivesScanner.h
+++ b/clang/include/clang/Lex/DependencyDirectivesScanner.h
@@ -17,7 +17,6 @@
#ifndef LLVM_CLANG_LEX_DEPENDENCYDIRECTIVESSCANNER_H
#define LLVM_CLANG_LEX_DEPENDENCYDIRECTIVESSCANNER_H
-#include "clang/Basic/LangOptions.h"
#include "clang/Basic/SourceLocation.h"
#include "llvm/ADT/ArrayRef.h"
@@ -118,7 +117,7 @@ struct Directive {
bool scanSourceForDependencyDirectives(
StringRef Input, SmallVectorImpl<dependency_directives_scan::Token> &Tokens,
SmallVectorImpl<dependency_directives_scan::Directive> &Directives,
- const LangOptions &LangOpts, DiagnosticsEngine *Diags = nullptr,
+ DiagnosticsEngine *Diags = nullptr,
SourceLocation InputSourceLoc = SourceLocation());
/// Print the previously scanned dependency directives as minimized source text.
diff --git a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
index 9dc2006..f7b4510 100644
--- a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
+++ b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
@@ -363,8 +363,7 @@ public:
///
/// Returns true if the directive tokens are populated for this file entry,
/// false if not (i.e. this entry is not a file or its scan fails).
- bool ensureDirectiveTokensArePopulated(EntryRef Entry,
- const LangOptions &LangOpts);
+ bool ensureDirectiveTokensArePopulated(EntryRef Entry);
/// Check whether \p Path exists. By default checks cached result of \c
/// status(), and falls back on FS if unable to do so.
diff --git a/clang/lib/AST/CMakeLists.txt b/clang/lib/AST/CMakeLists.txt
index 3faefb5..a5d3dac 100644
--- a/clang/lib/AST/CMakeLists.txt
+++ b/clang/lib/AST/CMakeLists.txt
@@ -87,6 +87,7 @@ add_clang_library(clangAST
Interp/Record.cpp
Interp/Source.cpp
Interp/State.cpp
+ Interp/MemberPointer.cpp
Interp/InterpShared.cpp
ItaniumCXXABI.cpp
ItaniumMangle.cpp
diff --git a/clang/lib/AST/Interp/ByteCodeExprGen.cpp b/clang/lib/AST/Interp/ByteCodeExprGen.cpp
index 3671c41..d124248 100644
--- a/clang/lib/AST/Interp/ByteCodeExprGen.cpp
+++ b/clang/lib/AST/Interp/ByteCodeExprGen.cpp
@@ -100,6 +100,35 @@ bool ByteCodeExprGen<Emitter>::VisitCastExpr(const CastExpr *CE) {
return this->emitMemcpy(CE);
}
+ case CK_DerivedToBaseMemberPointer: {
+ assert(classifyPrim(CE->getType()) == PT_MemberPtr);
+ assert(classifyPrim(SubExpr->getType()) == PT_MemberPtr);
+ const auto *FromMP = SubExpr->getType()->getAs<MemberPointerType>();
+ const auto *ToMP = CE->getType()->getAs<MemberPointerType>();
+
+ unsigned DerivedOffset = collectBaseOffset(QualType(ToMP->getClass(), 0),
+ QualType(FromMP->getClass(), 0));
+
+ if (!this->visit(SubExpr))
+ return false;
+
+ return this->emitGetMemberPtrBasePop(DerivedOffset, CE);
+ }
+
+ case CK_BaseToDerivedMemberPointer: {
+ assert(classifyPrim(CE) == PT_MemberPtr);
+ assert(classifyPrim(SubExpr) == PT_MemberPtr);
+ const auto *FromMP = SubExpr->getType()->getAs<MemberPointerType>();
+ const auto *ToMP = CE->getType()->getAs<MemberPointerType>();
+
+ unsigned DerivedOffset = collectBaseOffset(QualType(FromMP->getClass(), 0),
+ QualType(ToMP->getClass(), 0));
+
+ if (!this->visit(SubExpr))
+ return false;
+ return this->emitGetMemberPtrBasePop(-DerivedOffset, CE);
+ }
+
case CK_UncheckedDerivedToBase:
case CK_DerivedToBase: {
if (!this->visit(SubExpr))
@@ -187,7 +216,8 @@ bool ByteCodeExprGen<Emitter>::VisitCastExpr(const CastExpr *CE) {
return this->emitCastFloatingIntegral(*ToT, CE);
}
- case CK_NullToPointer: {
+ case CK_NullToPointer:
+ case CK_NullToMemberPointer: {
if (DiscardResult)
return true;
@@ -326,7 +356,8 @@ bool ByteCodeExprGen<Emitter>::VisitCastExpr(const CastExpr *CE) {
return this->emitCast(*FromT, *ToT, CE);
}
- case CK_PointerToBoolean: {
+ case CK_PointerToBoolean:
+ case CK_MemberPointerToBoolean: {
PrimType PtrT = classifyPrim(SubExpr->getType());
// Just emit p != nullptr for this.
@@ -534,8 +565,23 @@ bool ByteCodeExprGen<Emitter>::VisitBinaryOperator(const BinaryOperator *BO) {
BO->isComparisonOp())
return this->emitComplexComparison(LHS, RHS, BO);
- if (BO->isPtrMemOp())
- return this->visit(RHS);
+ if (BO->isPtrMemOp()) {
+ if (!this->visit(LHS))
+ return false;
+
+ if (!this->visit(RHS))
+ return false;
+
+ if (!this->emitToMemberPtr(BO))
+ return false;
+
+ if (classifyPrim(BO) == PT_MemberPtr)
+ return true;
+
+ if (!this->emitCastMemberPtrPtr(BO))
+ return false;
+ return DiscardResult ? this->emitPopPtr(BO) : true;
+ }
// Typecheck the args.
std::optional<PrimType> LT = classify(LHS->getType());
@@ -2773,6 +2819,8 @@ bool ByteCodeExprGen<Emitter>::visitZeroInitializer(PrimType T, QualType QT,
return this->emitNullPtr(nullptr, E);
case PT_FnPtr:
return this->emitNullFnPtr(nullptr, E);
+ case PT_MemberPtr:
+ return this->emitNullMemberPtr(nullptr, E);
case PT_Float: {
return this->emitConstFloat(APFloat::getZero(Ctx.getFloatSemantics(QT)), E);
}
@@ -2875,6 +2923,7 @@ bool ByteCodeExprGen<Emitter>::emitConst(T Value, PrimType Ty, const Expr *E) {
return this->emitConstBool(Value, E);
case PT_Ptr:
case PT_FnPtr:
+ case PT_MemberPtr:
case PT_Float:
case PT_IntAP:
case PT_IntAPS:
@@ -3188,7 +3237,7 @@ bool ByteCodeExprGen<Emitter>::visitAPValueInitializer(const APValue &Val,
const APValue &F = Val.getStructField(I);
const Record::Field *RF = R->getField(I);
- if (F.isInt()) {
+ if (F.isInt() || F.isLValue()) {
PrimType T = classifyPrim(RF->Decl->getType());
if (!this->visitAPValue(F, T, E))
return false;
@@ -3308,10 +3357,27 @@ bool ByteCodeExprGen<Emitter>::VisitCallExpr(const CallExpr *E) {
}
}
+ std::optional<unsigned> CalleeOffset;
// Add the (optional, implicit) This pointer.
if (const auto *MC = dyn_cast<CXXMemberCallExpr>(E)) {
- if (!this->visit(MC->getImplicitObjectArgument()))
+ if (!FuncDecl && classifyPrim(E->getCallee()) == PT_MemberPtr) {
+ // If we end up creating a CallPtr op for this, we need the base of the
+ // member pointer as the instance pointer, and later extract the function
+ // decl as the function pointer.
+ const Expr *Callee = E->getCallee();
+ CalleeOffset =
+ this->allocateLocalPrimitive(Callee, PT_MemberPtr, true, false);
+ if (!this->visit(Callee))
+ return false;
+ if (!this->emitSetLocal(PT_MemberPtr, *CalleeOffset, E))
+ return false;
+ if (!this->emitGetLocal(PT_MemberPtr, *CalleeOffset, E))
+ return false;
+ if (!this->emitGetMemberPtrBase(E))
+ return false;
+ } else if (!this->visit(MC->getImplicitObjectArgument())) {
return false;
+ }
}
llvm::BitVector NonNullArgs = collectNonNullArgs(FuncDecl, Args);
@@ -3380,11 +3446,22 @@ bool ByteCodeExprGen<Emitter>::VisitCallExpr(const CallExpr *E) {
for (unsigned I = 0, N = E->getNumArgs(); I != N; ++I)
ArgSize += align(primSize(classify(E->getArg(I)).value_or(PT_Ptr)));
- if (!this->visit(E->getCallee()))
- return false;
+ // Get the callee, either from a member pointer saved in CalleeOffset,
+ // or by just visiting the Callee expr.
+ if (CalleeOffset) {
+ if (!this->emitGetLocal(PT_MemberPtr, *CalleeOffset, E))
+ return false;
+ if (!this->emitGetMemberPtrDecl(E))
+ return false;
+ if (!this->emitCallPtr(ArgSize, E, E))
+ return false;
+ } else {
+ if (!this->visit(E->getCallee()))
+ return false;
- if (!this->emitCallPtr(ArgSize, E, E))
- return false;
+ if (!this->emitCallPtr(ArgSize, E, E))
+ return false;
+ }
}
// Cleanup for discarded return values.
@@ -3623,6 +3700,11 @@ bool ByteCodeExprGen<Emitter>::VisitUnaryOperator(const UnaryOperator *E) {
return false;
return DiscardResult ? this->emitPop(*T, E) : true;
case UO_AddrOf: // &x
+ if (E->getType()->isMemberPointerType()) {
+ // C++11 [expr.unary.op]p3 has very strict rules on how the address of a
+ // member can be formed.
+ return this->emitGetMemberPtr(cast<DeclRefExpr>(SubExpr)->getDecl(), E);
+ }
// We should already have a pointer when we get here.
return this->delegate(SubExpr);
case UO_Deref: // *x
diff --git a/clang/lib/AST/Interp/Context.cpp b/clang/lib/AST/Interp/Context.cpp
index b0b22b0..98d1837 100644
--- a/clang/lib/AST/Interp/Context.cpp
+++ b/clang/lib/AST/Interp/Context.cpp
@@ -163,8 +163,12 @@ std::optional<PrimType> Context::classify(QualType T) const {
if (T->isFloatingType())
return PT_Float;
+ if (T->isSpecificBuiltinType(BuiltinType::BoundMember) ||
+ T->isMemberPointerType())
+ return PT_MemberPtr;
+
if (T->isFunctionPointerType() || T->isFunctionReferenceType() ||
- T->isFunctionType() || T->isSpecificBuiltinType(BuiltinType::BoundMember))
+ T->isFunctionType())
return PT_FnPtr;
if (T->isReferenceType() || T->isPointerType() ||
@@ -177,9 +181,6 @@ std::optional<PrimType> Context::classify(QualType T) const {
if (const auto *DT = dyn_cast<DecltypeType>(T))
return classify(DT->getUnderlyingType());
- if (const auto *DT = dyn_cast<MemberPointerType>(T))
- return classify(DT->getPointeeType());
-
return std::nullopt;
}
@@ -292,10 +293,12 @@ unsigned Context::collectBaseOffset(const RecordDecl *BaseDecl,
}
if (CurDecl == FinalDecl)
break;
-
- // break;
}
assert(OffsetSum > 0);
return OffsetSum;
}
+
+const Record *Context::getRecord(const RecordDecl *D) const {
+ return P->getOrCreateRecord(D);
+}
diff --git a/clang/lib/AST/Interp/Context.h b/clang/lib/AST/Interp/Context.h
index 360e949..c78dc9a 100644
--- a/clang/lib/AST/Interp/Context.h
+++ b/clang/lib/AST/Interp/Context.h
@@ -107,6 +107,8 @@ public:
unsigned collectBaseOffset(const RecordDecl *BaseDecl,
const RecordDecl *DerivedDecl) const;
+ const Record *getRecord(const RecordDecl *D) const;
+
private:
/// Runs a function.
bool Run(State &Parent, const Function *Func, APValue &Result);
diff --git a/clang/lib/AST/Interp/Descriptor.cpp b/clang/lib/AST/Interp/Descriptor.cpp
index 746b765..d20ab13 100644
--- a/clang/lib/AST/Interp/Descriptor.cpp
+++ b/clang/lib/AST/Interp/Descriptor.cpp
@@ -11,6 +11,7 @@
#include "Floating.h"
#include "FunctionPointer.h"
#include "IntegralAP.h"
+#include "MemberPointer.h"
#include "Pointer.h"
#include "PrimType.h"
#include "Record.h"
diff --git a/clang/lib/AST/Interp/Disasm.cpp b/clang/lib/AST/Interp/Disasm.cpp
index 3f8a92e..0ab84d1 100644
--- a/clang/lib/AST/Interp/Disasm.cpp
+++ b/clang/lib/AST/Interp/Disasm.cpp
@@ -19,6 +19,7 @@
#include "Integral.h"
#include "IntegralAP.h"
#include "InterpFrame.h"
+#include "MemberPointer.h"
#include "Opcode.h"
#include "PrimType.h"
#include "Program.h"
@@ -122,6 +123,8 @@ static const char *primTypeToString(PrimType T) {
return "Ptr";
case PT_FnPtr:
return "FnPtr";
+ case PT_MemberPtr:
+ return "MemberPtr";
}
llvm_unreachable("Unhandled PrimType");
}
diff --git a/clang/lib/AST/Interp/Function.cpp b/clang/lib/AST/Interp/Function.cpp
index 1d04998..00f5a1f 100644
--- a/clang/lib/AST/Interp/Function.cpp
+++ b/clang/lib/AST/Interp/Function.cpp
@@ -40,7 +40,8 @@ SourceInfo Function::getSource(CodePtr PC) const {
unsigned Offset = PC - getCodeBegin();
using Elem = std::pair<unsigned, SourceInfo>;
auto It = llvm::lower_bound(SrcMap, Elem{Offset, {}}, llvm::less_first());
- assert(It != SrcMap.end());
+ if (It == SrcMap.end())
+ return SrcMap.back().second;
return It->second;
}
diff --git a/clang/lib/AST/Interp/Interp.cpp b/clang/lib/AST/Interp/Interp.cpp
index 145fa65..49015b1 100644
--- a/clang/lib/AST/Interp/Interp.cpp
+++ b/clang/lib/AST/Interp/Interp.cpp
@@ -373,6 +373,26 @@ bool CheckSubobject(InterpState &S, CodePtr OpPC, const Pointer &Ptr,
return false;
}
+bool CheckDowncast(InterpState &S, CodePtr OpPC, const Pointer &Ptr,
+ uint32_t Offset) {
+ uint32_t MinOffset = Ptr.getDeclDesc()->getMetadataSize();
+ uint32_t PtrOffset = Ptr.getByteOffset();
+
+ // We subtract Offset from PtrOffset. The result must be at least
+ // MinOffset.
+ if (Offset < PtrOffset && (PtrOffset - Offset) >= MinOffset)
+ return true;
+
+ const auto *E = cast<CastExpr>(S.Current->getExpr(OpPC));
+ QualType TargetQT = E->getType()->getPointeeType();
+ QualType MostDerivedQT = Ptr.getDeclPtr().getType();
+
+ S.CCEDiag(E, diag::note_constexpr_invalid_downcast)
+ << MostDerivedQT << TargetQT;
+
+ return false;
+}
+
bool CheckConst(InterpState &S, CodePtr OpPC, const Pointer &Ptr) {
assert(Ptr.isLive() && "Pointer is not live");
if (!Ptr.isConst())
@@ -493,10 +513,12 @@ bool CheckStore(InterpState &S, CodePtr OpPC, const Pointer &Ptr) {
bool CheckInvoke(InterpState &S, CodePtr OpPC, const Pointer &Ptr) {
if (!CheckLive(S, OpPC, Ptr, AK_MemberCall))
return false;
- if (!CheckExtern(S, OpPC, Ptr))
- return false;
- if (!CheckRange(S, OpPC, Ptr, AK_MemberCall))
- return false;
+ if (!Ptr.isDummy()) {
+ if (!CheckExtern(S, OpPC, Ptr))
+ return false;
+ if (!CheckRange(S, OpPC, Ptr, AK_MemberCall))
+ return false;
+ }
return true;
}
@@ -516,7 +538,7 @@ bool CheckCallable(InterpState &S, CodePtr OpPC, const Function *F) {
return false;
}
- if (!F->isConstexpr()) {
+ if (!F->isConstexpr() || !F->hasBody()) {
const SourceLocation &Loc = S.Current->getLocation(OpPC);
if (S.getLangOpts().CPlusPlus11) {
const FunctionDecl *DiagDecl = F->getDecl();
@@ -550,9 +572,10 @@ bool CheckCallable(InterpState &S, CodePtr OpPC, const Function *F) {
S.checkingPotentialConstantExpression())
return false;
- // If the declaration is defined _and_ declared 'constexpr', the below
- // diagnostic doesn't add anything useful.
- if (DiagDecl->isDefined() && DiagDecl->isConstexpr())
+ // If the declaration is defined, declared 'constexpr' _and_ has a body,
+ // the below diagnostic doesn't add anything useful.
+ if (DiagDecl->isDefined() && DiagDecl->isConstexpr() &&
+ DiagDecl->hasBody())
return false;
S.FFDiag(Loc, diag::note_constexpr_invalid_function, 1)
diff --git a/clang/lib/AST/Interp/Interp.h b/clang/lib/AST/Interp/Interp.h
index eca1792..98caea5 100644
--- a/clang/lib/AST/Interp/Interp.h
+++ b/clang/lib/AST/Interp/Interp.h
@@ -20,6 +20,7 @@
#include "InterpFrame.h"
#include "InterpStack.h"
#include "InterpState.h"
+#include "MemberPointer.h"
#include "Opcode.h"
#include "PrimType.h"
#include "Program.h"
@@ -75,6 +76,11 @@ bool CheckRange(InterpState &S, CodePtr OpPC, const Pointer &Ptr,
bool CheckSubobject(InterpState &S, CodePtr OpPC, const Pointer &Ptr,
CheckSubobjectKind CSK);
+/// Checks if the dowcast using the given offset is possible with the given
+/// pointer.
+bool CheckDowncast(InterpState &S, CodePtr OpPC, const Pointer &Ptr,
+ uint32_t Offset);
+
/// Checks if a pointer points to const storage.
bool CheckConst(InterpState &S, CodePtr OpPC, const Pointer &Ptr);
@@ -725,6 +731,9 @@ using CompareFn = llvm::function_ref<bool(ComparisonCategoryResult)>;
template <typename T>
bool CmpHelper(InterpState &S, CodePtr OpPC, CompareFn Fn) {
+ assert((!std::is_same_v<T, MemberPointer>) &&
+ "Non-equality comparisons on member pointer types should already be "
+ "rejected in Sema.");
using BoolT = PrimConv<PT_Bool>::T;
const T &RHS = S.Stk.pop<T>();
const T &LHS = S.Stk.pop<T>();
@@ -834,6 +843,47 @@ inline bool CmpHelperEQ<Pointer>(InterpState &S, CodePtr OpPC, CompareFn Fn) {
}
}
+template <>
+inline bool CmpHelperEQ<MemberPointer>(InterpState &S, CodePtr OpPC,
+ CompareFn Fn) {
+ const auto &RHS = S.Stk.pop<MemberPointer>();
+ const auto &LHS = S.Stk.pop<MemberPointer>();
+
+ // If either operand is a pointer to a weak function, the comparison is not
+ // constant.
+ for (const auto &MP : {LHS, RHS}) {
+ if (const CXXMethodDecl *MD = MP.getMemberFunction(); MD && MD->isWeak()) {
+ const SourceInfo &Loc = S.Current->getSource(OpPC);
+ S.FFDiag(Loc, diag::note_constexpr_mem_pointer_weak_comparison) << MD;
+ return false;
+ }
+ }
+
+ // C++11 [expr.eq]p2:
+ // If both operands are null, they compare equal. Otherwise if only one is
+ // null, they compare unequal.
+ if (LHS.isZero() && RHS.isZero()) {
+ S.Stk.push<Boolean>(Fn(ComparisonCategoryResult::Equal));
+ return true;
+ }
+ if (LHS.isZero() || RHS.isZero()) {
+ S.Stk.push<Boolean>(Fn(ComparisonCategoryResult::Unordered));
+ return true;
+ }
+
+ // We cannot compare against virtual declarations at compile time.
+ for (const auto &MP : {LHS, RHS}) {
+ if (const CXXMethodDecl *MD = MP.getMemberFunction();
+ MD && MD->isVirtual()) {
+ const SourceInfo &Loc = S.Current->getSource(OpPC);
+ S.CCEDiag(Loc, diag::note_constexpr_compare_virtual_mem_ptr) << MD;
+ }
+ }
+
+ S.Stk.push<Boolean>(Boolean::from(Fn(LHS.compare(RHS))));
+ return true;
+}
+
template <PrimType Name, class T = typename PrimConv<Name>::T>
bool EQ(InterpState &S, CodePtr OpPC) {
return CmpHelperEQ<T>(S, OpPC, [](ComparisonCategoryResult R) {
@@ -1300,6 +1350,9 @@ inline bool GetPtrDerivedPop(InterpState &S, CodePtr OpPC, uint32_t Off) {
return false;
if (!CheckSubobject(S, OpPC, Ptr, CSK_Derived))
return false;
+ if (!CheckDowncast(S, OpPC, Ptr, Off))
+ return false;
+
S.Stk.push<Pointer>(Ptr.atFieldSub(Off));
return true;
}
@@ -1324,6 +1377,12 @@ inline bool GetPtrBasePop(InterpState &S, CodePtr OpPC, uint32_t Off) {
return true;
}
+inline bool GetMemberPtrBasePop(InterpState &S, CodePtr OpPC, int32_t Off) {
+ const auto &Ptr = S.Stk.pop<MemberPointer>();
+ S.Stk.push<MemberPointer>(Ptr.atInstanceBase(Off));
+ return true;
+}
+
inline bool GetPtrThisBase(InterpState &S, CodePtr OpPC, uint32_t Off) {
if (S.checkingPotentialConstantExpression())
return false;
@@ -1532,6 +1591,24 @@ inline bool Memcpy(InterpState &S, CodePtr OpPC) {
return DoMemcpy(S, OpPC, Src, Dest);
}
+inline bool ToMemberPtr(InterpState &S, CodePtr OpPC) {
+ const auto &Member = S.Stk.pop<MemberPointer>();
+ const auto &Base = S.Stk.pop<Pointer>();
+
+ S.Stk.push<MemberPointer>(Member.takeInstance(Base));
+ return true;
+}
+
+inline bool CastMemberPtrPtr(InterpState &S, CodePtr OpPC) {
+ const auto &MP = S.Stk.pop<MemberPointer>();
+
+ if (std::optional<Pointer> Ptr = MP.toPointer(S.Ctx)) {
+ S.Stk.push<Pointer>(*Ptr);
+ return true;
+ }
+ return false;
+}
+
//===----------------------------------------------------------------------===//
// AddOffset, SubOffset
//===----------------------------------------------------------------------===//
@@ -1696,8 +1773,10 @@ inline bool SubPtr(InterpState &S, CodePtr OpPC) {
return true;
}
- T A = T::from(LHS.getIndex());
- T B = T::from(RHS.getIndex());
+ T A = LHS.isElementPastEnd() ? T::from(LHS.getNumElems())
+ : T::from(LHS.getIndex());
+ T B = RHS.isElementPastEnd() ? T::from(RHS.getNumElems())
+ : T::from(RHS.getIndex());
return AddSubMulHelper<T, T::sub, std::minus>(S, OpPC, A.bitWidth(), A, B);
}
@@ -2115,7 +2194,7 @@ inline bool ArrayDecay(InterpState &S, CodePtr OpPC) {
if (!CheckRange(S, OpPC, Ptr, CSK_ArrayToPointer))
return false;
- if (!Ptr.isUnknownSizeArray() || Ptr.isDummy()) {
+ if (Ptr.isRoot() || !Ptr.isUnknownSizeArray() || Ptr.isDummy()) {
S.Stk.push<Pointer>(Ptr.atIndex(0));
return true;
}
@@ -2329,6 +2408,28 @@ inline bool GetIntPtr(InterpState &S, CodePtr OpPC, const Descriptor *Desc) {
return true;
}
+inline bool GetMemberPtr(InterpState &S, CodePtr OpPC, const Decl *D) {
+ S.Stk.push<MemberPointer>(D);
+ return true;
+}
+
+inline bool GetMemberPtrBase(InterpState &S, CodePtr OpPC) {
+ const auto &MP = S.Stk.pop<MemberPointer>();
+
+ S.Stk.push<Pointer>(MP.getBase());
+ return true;
+}
+
+inline bool GetMemberPtrDecl(InterpState &S, CodePtr OpPC) {
+ const auto &MP = S.Stk.pop<MemberPointer>();
+
+ const auto *FD = cast<FunctionDecl>(MP.getDecl());
+ const auto *Func = S.getContext().getOrCreateFunction(FD);
+
+ S.Stk.push<FunctionPointer>(Func);
+ return true;
+}
+
/// Just emit a diagnostic. The expression that caused emission of this
/// op is not valid in a constant context.
inline bool Invalid(InterpState &S, CodePtr OpPC) {
diff --git a/clang/lib/AST/Interp/InterpFrame.cpp b/clang/lib/AST/Interp/InterpFrame.cpp
index 51b0bd5..54ccf90 100644
--- a/clang/lib/AST/Interp/InterpFrame.cpp
+++ b/clang/lib/AST/Interp/InterpFrame.cpp
@@ -12,6 +12,7 @@
#include "Function.h"
#include "InterpStack.h"
#include "InterpState.h"
+#include "MemberPointer.h"
#include "Pointer.h"
#include "PrimType.h"
#include "Program.h"
diff --git a/clang/lib/AST/Interp/InterpStack.cpp b/clang/lib/AST/Interp/InterpStack.cpp
index 91fe40f..c702474 100644
--- a/clang/lib/AST/Interp/InterpStack.cpp
+++ b/clang/lib/AST/Interp/InterpStack.cpp
@@ -10,6 +10,7 @@
#include "Boolean.h"
#include "Floating.h"
#include "Integral.h"
+#include "MemberPointer.h"
#include "Pointer.h"
#include <cassert>
#include <cstdlib>
diff --git a/clang/lib/AST/Interp/InterpStack.h b/clang/lib/AST/Interp/InterpStack.h
index 3fd0f63..9d85503 100644
--- a/clang/lib/AST/Interp/InterpStack.h
+++ b/clang/lib/AST/Interp/InterpStack.h
@@ -15,6 +15,7 @@
#include "FunctionPointer.h"
#include "IntegralAP.h"
+#include "MemberPointer.h"
#include "PrimType.h"
#include <memory>
#include <vector>
@@ -188,6 +189,8 @@ private:
return PT_IntAP;
else if constexpr (std::is_same_v<T, IntegralAP<false>>)
return PT_IntAP;
+ else if constexpr (std::is_same_v<T, MemberPointer>)
+ return PT_MemberPtr;
llvm_unreachable("unknown type push()'ed into InterpStack");
}
diff --git a/clang/lib/AST/Interp/MemberPointer.cpp b/clang/lib/AST/Interp/MemberPointer.cpp
new file mode 100644
index 0000000..96f6364
--- /dev/null
+++ b/clang/lib/AST/Interp/MemberPointer.cpp
@@ -0,0 +1,76 @@
+//===------------------------- MemberPointer.cpp ----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "MemberPointer.h"
+#include "Context.h"
+#include "FunctionPointer.h"
+#include "Program.h"
+#include "Record.h"
+
+namespace clang {
+namespace interp {
+
+std::optional<Pointer> MemberPointer::toPointer(const Context &Ctx) const {
+ if (!Dcl || isa<FunctionDecl>(Dcl))
+ return Base;
+ const FieldDecl *FD = cast<FieldDecl>(Dcl);
+ assert(FD);
+
+ if (!Base.isBlockPointer())
+ return std::nullopt;
+
+ Pointer CastedBase =
+ (PtrOffset < 0 ? Base.atField(-PtrOffset) : Base.atFieldSub(PtrOffset));
+
+ const Record *BaseRecord = CastedBase.getRecord();
+ if (!BaseRecord)
+ return std::nullopt;
+
+ assert(BaseRecord);
+ if (FD->getParent() == BaseRecord->getDecl())
+ return CastedBase.atField(BaseRecord->getField(FD)->Offset);
+
+ const RecordDecl *FieldParent = FD->getParent();
+ const Record *FieldRecord = Ctx.getRecord(FieldParent);
+
+ unsigned Offset = 0;
+ Offset += FieldRecord->getField(FD)->Offset;
+ Offset += CastedBase.block()->getDescriptor()->getMetadataSize();
+
+ if (Offset > CastedBase.block()->getSize())
+ return std::nullopt;
+
+ if (const RecordDecl *BaseDecl = Base.getDeclPtr().getRecord()->getDecl();
+ BaseDecl != FieldParent)
+ Offset += Ctx.collectBaseOffset(FieldParent, BaseDecl);
+
+ if (Offset > CastedBase.block()->getSize())
+ return std::nullopt;
+
+ assert(Offset <= CastedBase.block()->getSize());
+ return Pointer(const_cast<Block *>(Base.block()), Offset, Offset);
+}
+
+FunctionPointer MemberPointer::toFunctionPointer(const Context &Ctx) const {
+ return FunctionPointer(Ctx.getProgram().getFunction(cast<FunctionDecl>(Dcl)));
+}
+
+APValue MemberPointer::toAPValue() const {
+ if (isZero())
+ return APValue(static_cast<ValueDecl *>(nullptr), /*IsDerivedMember=*/false,
+ /*Path=*/{});
+
+ if (hasBase())
+ return Base.toAPValue();
+
+ return APValue(cast<ValueDecl>(getDecl()), /*IsDerivedMember=*/false,
+ /*Path=*/{});
+}
+
+} // namespace interp
+} // namespace clang
diff --git a/clang/lib/AST/Interp/MemberPointer.h b/clang/lib/AST/Interp/MemberPointer.h
new file mode 100644
index 0000000..5c61f6a
--- /dev/null
+++ b/clang/lib/AST/Interp/MemberPointer.h
@@ -0,0 +1,112 @@
+//===------------------------- MemberPointer.h ------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_AST_INTERP_MEMBER_POINTER_H
+#define LLVM_CLANG_AST_INTERP_MEMBER_POINTER_H
+
+#include "Pointer.h"
+#include <optional>
+
+namespace clang {
+class ASTContext;
+namespace interp {
+
+class Context;
+class FunctionPointer;
+
+class MemberPointer final {
+private:
+ Pointer Base;
+ const Decl *Dcl = nullptr;
+ int32_t PtrOffset = 0;
+
+ MemberPointer(Pointer Base, const Decl *Dcl, int32_t PtrOffset)
+ : Base(Base), Dcl(Dcl), PtrOffset(PtrOffset) {}
+
+public:
+ MemberPointer() = default;
+ MemberPointer(Pointer Base, const Decl *Dcl) : Base(Base), Dcl(Dcl) {}
+ MemberPointer(uint32_t Address, const Descriptor *D) {
+ // We only reach this for Address == 0, when creating a null member pointer.
+ assert(Address == 0);
+ }
+
+ MemberPointer(const Decl *D) : Dcl(D) {
+ assert((isa<FieldDecl, IndirectFieldDecl, CXXMethodDecl>(D)));
+ }
+
+ uint64_t getIntegerRepresentation() const {
+ assert(
+ false &&
+ "getIntegerRepresentation() shouldn't be reachable for MemberPointers");
+ return 17;
+ }
+
+ std::optional<Pointer> toPointer(const Context &Ctx) const;
+
+ FunctionPointer toFunctionPointer(const Context &Ctx) const;
+
+ Pointer getBase() const {
+ if (PtrOffset < 0)
+ return Base.atField(-PtrOffset);
+ return Base.atFieldSub(PtrOffset);
+ }
+ bool isMemberFunctionPointer() const {
+ return isa_and_nonnull<CXXMethodDecl>(Dcl);
+ }
+ const CXXMethodDecl *getMemberFunction() const {
+ return dyn_cast_if_present<CXXMethodDecl>(Dcl);
+ }
+ const FieldDecl *getField() const {
+ return dyn_cast_if_present<FieldDecl>(Dcl);
+ }
+
+ bool hasDecl() const { return Dcl; }
+ const Decl *getDecl() const { return Dcl; }
+
+ MemberPointer atInstanceBase(unsigned Offset) const {
+ if (Base.isZero())
+ return MemberPointer(Base, Dcl, Offset);
+ return MemberPointer(this->Base, Dcl, Offset + PtrOffset);
+ }
+
+ MemberPointer takeInstance(Pointer Instance) const {
+ assert(this->Base.isZero());
+ return MemberPointer(Instance, this->Dcl, this->PtrOffset);
+ }
+
+ APValue toAPValue() const;
+
+ bool isZero() const { return Base.isZero() && !Dcl; }
+ bool hasBase() const { return !Base.isZero(); }
+
+ void print(llvm::raw_ostream &OS) const {
+ OS << "MemberPtr(" << Base << " " << (void *)Dcl << " + " << PtrOffset
+ << ")";
+ }
+
+ std::string toDiagnosticString(const ASTContext &Ctx) const {
+ return "FIXME";
+ }
+
+ ComparisonCategoryResult compare(const MemberPointer &RHS) const {
+ if (this->Dcl == RHS.Dcl)
+ return ComparisonCategoryResult::Equal;
+ return ComparisonCategoryResult::Unordered;
+ }
+};
+
+inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, MemberPointer FP) {
+ FP.print(OS);
+ return OS;
+}
+
+} // namespace interp
+} // namespace clang
+
+#endif
diff --git a/clang/lib/AST/Interp/Opcodes.td b/clang/lib/AST/Interp/Opcodes.td
index cfbd7f93..cb4f299 100644
--- a/clang/lib/AST/Interp/Opcodes.td
+++ b/clang/lib/AST/Interp/Opcodes.td
@@ -30,6 +30,7 @@ def IntAPS : Type;
def Float : Type;
def Ptr : Type;
def FnPtr : Type;
+def MemberPtr : Type;
//===----------------------------------------------------------------------===//
// Types transferred to the interpreter.
@@ -61,6 +62,7 @@ def ArgOffsetOfExpr : ArgType { let Name = "const OffsetOfExpr *"; }
def ArgDeclRef : ArgType { let Name = "const DeclRefExpr *"; }
def ArgDesc : ArgType { let Name = "const Descriptor *"; }
def ArgCCI : ArgType { let Name = "const ComparisonCategoryInfo *"; }
+def ArgDecl : ArgType { let Name = "const Decl*"; }
//===----------------------------------------------------------------------===//
// Classes of types instructions operate on.
@@ -93,7 +95,7 @@ def AluTypeClass : TypeClass {
}
def PtrTypeClass : TypeClass {
- let Types = [Ptr, FnPtr];
+ let Types = [Ptr, FnPtr, MemberPtr];
}
def BoolTypeClass : TypeClass {
@@ -208,7 +210,6 @@ def CallBI : Opcode {
def CallPtr : Opcode {
let Args = [ArgUint32, ArgCallExpr];
- let Types = [];
}
def CallVar : Opcode {
@@ -327,6 +328,11 @@ def GetPtrBasePop : Opcode {
// Offset of field, which is a base.
let Args = [ArgUint32];
}
+def GetMemberPtrBasePop : Opcode {
+ // Offset of field, which is a base.
+ let Args = [ArgSint32];
+}
+
def FinishInitPop : Opcode;
def FinishInit : Opcode;
@@ -751,6 +757,14 @@ def CheckNonNullArg : Opcode {
def Memcpy : Opcode;
+def ToMemberPtr : Opcode;
+def CastMemberPtrPtr : Opcode;
+def GetMemberPtr : Opcode {
+ let Args = [ArgDecl];
+}
+def GetMemberPtrBase : Opcode;
+def GetMemberPtrDecl : Opcode;
+
//===----------------------------------------------------------------------===//
// Debugging.
//===----------------------------------------------------------------------===//
diff --git a/clang/lib/AST/Interp/Pointer.cpp b/clang/lib/AST/Interp/Pointer.cpp
index 252f7ea..a60b4d2 100644
--- a/clang/lib/AST/Interp/Pointer.cpp
+++ b/clang/lib/AST/Interp/Pointer.cpp
@@ -13,6 +13,7 @@
#include "Function.h"
#include "Integral.h"
#include "InterpBlock.h"
+#include "MemberPointer.h"
#include "PrimType.h"
#include "Record.h"
diff --git a/clang/lib/AST/Interp/Pointer.h b/clang/lib/AST/Interp/Pointer.h
index 93ca754..c6e4f4d 100644
--- a/clang/lib/AST/Interp/Pointer.h
+++ b/clang/lib/AST/Interp/Pointer.h
@@ -620,6 +620,7 @@ public:
private:
friend class Block;
friend class DeadBlock;
+ friend class MemberPointer;
friend struct InitMap;
Pointer(Block *Pointee, unsigned Base, uint64_t Offset);
diff --git a/clang/lib/AST/Interp/PrimType.cpp b/clang/lib/AST/Interp/PrimType.cpp
index 9b96dcf..3054e67 100644
--- a/clang/lib/AST/Interp/PrimType.cpp
+++ b/clang/lib/AST/Interp/PrimType.cpp
@@ -11,6 +11,7 @@
#include "Floating.h"
#include "FunctionPointer.h"
#include "IntegralAP.h"
+#include "MemberPointer.h"
#include "Pointer.h"
using namespace clang;
diff --git a/clang/lib/AST/Interp/PrimType.h b/clang/lib/AST/Interp/PrimType.h
index 604fb5d..20fb5e8 100644
--- a/clang/lib/AST/Interp/PrimType.h
+++ b/clang/lib/AST/Interp/PrimType.h
@@ -25,6 +25,7 @@ class Pointer;
class Boolean;
class Floating;
class FunctionPointer;
+class MemberPointer;
template <bool Signed> class IntegralAP;
template <unsigned Bits, bool Signed> class Integral;
@@ -44,10 +45,11 @@ enum PrimType : unsigned {
PT_Float = 11,
PT_Ptr = 12,
PT_FnPtr = 13,
+ PT_MemberPtr = 14,
};
inline constexpr bool isPtrType(PrimType T) {
- return T == PT_Ptr || T == PT_FnPtr;
+ return T == PT_Ptr || T == PT_FnPtr || T == PT_MemberPtr;
}
enum class CastKind : uint8_t {
@@ -91,6 +93,9 @@ template <> struct PrimConv<PT_Ptr> { using T = Pointer; };
template <> struct PrimConv<PT_FnPtr> {
using T = FunctionPointer;
};
+template <> struct PrimConv<PT_MemberPtr> {
+ using T = MemberPointer;
+};
/// Returns the size of a primitive type in bytes.
size_t primSize(PrimType Type);
@@ -131,6 +136,7 @@ static inline bool aligned(const void *P) {
TYPE_SWITCH_CASE(PT_Bool, B) \
TYPE_SWITCH_CASE(PT_Ptr, B) \
TYPE_SWITCH_CASE(PT_FnPtr, B) \
+ TYPE_SWITCH_CASE(PT_MemberPtr, B) \
} \
} while (0)
diff --git a/clang/lib/AST/OpenACCClause.cpp b/clang/lib/AST/OpenACCClause.cpp
index 403ce9a..95089a9 100644
--- a/clang/lib/AST/OpenACCClause.cpp
+++ b/clang/lib/AST/OpenACCClause.cpp
@@ -104,7 +104,7 @@ OpenACCClause::child_range OpenACCClause::children() {
#define VISIT_CLAUSE(CLAUSE_NAME) \
case OpenACCClauseKind::CLAUSE_NAME: \
return cast<OpenACC##CLAUSE_NAME##Clause>(this)->children();
-#define CLAUSE_ALIAS(ALIAS_NAME, CLAUSE_NAME) \
+#define CLAUSE_ALIAS(ALIAS_NAME, CLAUSE_NAME, DEPRECATED) \
case OpenACCClauseKind::ALIAS_NAME: \
return cast<OpenACC##CLAUSE_NAME##Clause>(this)->children();
diff --git a/clang/lib/AST/ParentMap.cpp b/clang/lib/AST/ParentMap.cpp
index 534793b..3d6a1cc 100644
--- a/clang/lib/AST/ParentMap.cpp
+++ b/clang/lib/AST/ParentMap.cpp
@@ -97,22 +97,6 @@ static void BuildParentMap(MapTy& M, Stmt* S,
BuildParentMap(M, SubStmt, OVMode);
}
break;
- case Stmt::CXXDefaultArgExprClass:
- if (auto *Arg = dyn_cast<CXXDefaultArgExpr>(S)) {
- if (Arg->hasRewrittenInit()) {
- M[Arg->getExpr()] = S;
- BuildParentMap(M, Arg->getExpr(), OVMode);
- }
- }
- break;
- case Stmt::CXXDefaultInitExprClass:
- if (auto *Init = dyn_cast<CXXDefaultInitExpr>(S)) {
- if (Init->hasRewrittenInit()) {
- M[Init->getExpr()] = S;
- BuildParentMap(M, Init->getExpr(), OVMode);
- }
- }
- break;
default:
for (Stmt *SubStmt : S->children()) {
if (SubStmt) {
diff --git a/clang/lib/AST/TextNodeDumper.cpp b/clang/lib/AST/TextNodeDumper.cpp
index 8baccee..1076dcd 100644
--- a/clang/lib/AST/TextNodeDumper.cpp
+++ b/clang/lib/AST/TextNodeDumper.cpp
@@ -958,6 +958,9 @@ void TextNodeDumper::dumpTemplateArgument(const TemplateArgument &TA) {
}
OS << " '" << Str << "'";
+ if (!Context)
+ return;
+
if (TemplateArgument CanonTA = Context->getCanonicalTemplateArgument(TA);
!CanonTA.structurallyEquals(TA)) {
llvm::SmallString<128> CanonStr;
@@ -1139,15 +1142,17 @@ void TextNodeDumper::dumpTemplateName(TemplateName TN, StringRef Label) {
}
OS << " '" << Str << "'";
- if (TemplateName CanonTN = Context->getCanonicalTemplateName(TN);
- CanonTN != TN) {
- llvm::SmallString<128> CanonStr;
- {
- llvm::raw_svector_ostream SS(CanonStr);
- CanonTN.print(SS, PrintPolicy);
+ if (Context) {
+ if (TemplateName CanonTN = Context->getCanonicalTemplateName(TN);
+ CanonTN != TN) {
+ llvm::SmallString<128> CanonStr;
+ {
+ llvm::raw_svector_ostream SS(CanonStr);
+ CanonTN.print(SS, PrintPolicy);
+ }
+ if (CanonStr != Str)
+ OS << ":'" << CanonStr << "'";
}
- if (CanonStr != Str)
- OS << ":'" << CanonStr << "'";
}
}
dumpBareTemplateName(TN);
diff --git a/clang/lib/AST/Type.cpp b/clang/lib/AST/Type.cpp
index 2097b29..33acae2 100644
--- a/clang/lib/AST/Type.cpp
+++ b/clang/lib/AST/Type.cpp
@@ -2749,6 +2749,43 @@ bool QualType::isTriviallyCopyableType(const ASTContext &Context) const {
/*IsCopyConstructible=*/false);
}
+// FIXME: each call will trigger a full computation, cache the result.
+bool QualType::isBitwiseCloneableType(const ASTContext &Context) const {
+ auto CanonicalType = getCanonicalType();
+ if (CanonicalType.hasNonTrivialObjCLifetime())
+ return false;
+ if (CanonicalType->isArrayType())
+ return Context.getBaseElementType(CanonicalType)
+ .isBitwiseCloneableType(Context);
+
+ if (CanonicalType->isIncompleteType())
+ return false;
+ const auto *RD = CanonicalType->getAsRecordDecl(); // struct/union/class
+ if (!RD)
+ return true;
+
+ // Never allow memcpy when we're adding poisoned padding bits to the struct.
+ // Accessing these posioned bits will trigger false alarms on
+ // SanitizeAddressFieldPadding etc.
+ if (RD->mayInsertExtraPadding())
+ return false;
+
+ for (auto *const Field : RD->fields()) {
+ if (!Field->getType().isBitwiseCloneableType(Context))
+ return false;
+ }
+
+ if (const auto *CXXRD = dyn_cast<CXXRecordDecl>(RD)) {
+ for (auto Base : CXXRD->bases())
+ if (!Base.getType().isBitwiseCloneableType(Context))
+ return false;
+ for (auto VBase : CXXRD->vbases())
+ if (!VBase.getType().isBitwiseCloneableType(Context))
+ return false;
+ }
+ return true;
+}
+
bool QualType::isTriviallyCopyConstructibleType(
const ASTContext &Context) const {
return isTriviallyCopyableTypeImpl(*this, Context,
@@ -4444,7 +4481,6 @@ static CachedProperties computeCachedProperties(const Type *T) {
#define NON_CANONICAL_UNLESS_DEPENDENT_TYPE(Class,Base) case Type::Class:
#include "clang/AST/TypeNodes.inc"
// Treat instantiation-dependent types as external.
- if (!T->isInstantiationDependentType()) T->dump();
assert(T->isInstantiationDependentType());
return CachedProperties(Linkage::External, false);
diff --git a/clang/lib/Analysis/CFG.cpp b/clang/lib/Analysis/CFG.cpp
index 0231725..64e6155 100644
--- a/clang/lib/Analysis/CFG.cpp
+++ b/clang/lib/Analysis/CFG.cpp
@@ -556,10 +556,6 @@ public:
private:
// Visitors to walk an AST and construct the CFG.
- CFGBlock *VisitCXXDefaultArgExpr(CXXDefaultArgExpr *Default,
- AddStmtChoice asc);
- CFGBlock *VisitCXXDefaultInitExpr(CXXDefaultInitExpr *Default,
- AddStmtChoice asc);
CFGBlock *VisitInitListExpr(InitListExpr *ILE, AddStmtChoice asc);
CFGBlock *VisitAddrLabelExpr(AddrLabelExpr *A, AddStmtChoice asc);
CFGBlock *VisitAttributedStmt(AttributedStmt *A, AddStmtChoice asc);
@@ -2258,10 +2254,16 @@ CFGBlock *CFGBuilder::Visit(Stmt * S, AddStmtChoice asc,
asc, ExternallyDestructed);
case Stmt::CXXDefaultArgExprClass:
- return VisitCXXDefaultArgExpr(cast<CXXDefaultArgExpr>(S), asc);
-
case Stmt::CXXDefaultInitExprClass:
- return VisitCXXDefaultInitExpr(cast<CXXDefaultInitExpr>(S), asc);
+ // FIXME: The expression inside a CXXDefaultArgExpr is owned by the
+ // called function's declaration, not by the caller. If we simply add
+ // this expression to the CFG, we could end up with the same Expr
+ // appearing multiple times (PR13385).
+ //
+ // It's likewise possible for multiple CXXDefaultInitExprs for the same
+ // expression to be used in the same function (through aggregate
+ // initialization).
+ return VisitStmt(S, asc);
case Stmt::CXXBindTemporaryExprClass:
return VisitCXXBindTemporaryExpr(cast<CXXBindTemporaryExpr>(S), asc);
@@ -2431,40 +2433,6 @@ CFGBlock *CFGBuilder::VisitChildren(Stmt *S) {
return B;
}
-CFGBlock *CFGBuilder::VisitCXXDefaultArgExpr(CXXDefaultArgExpr *Arg,
- AddStmtChoice asc) {
- if (Arg->hasRewrittenInit()) {
- if (asc.alwaysAdd(*this, Arg)) {
- autoCreateBlock();
- appendStmt(Block, Arg);
- }
- return VisitStmt(Arg->getExpr(), asc);
- }
-
- // We can't add the default argument if it's not rewritten because the
- // expression inside a CXXDefaultArgExpr is owned by the called function's
- // declaration, not by the caller, we could end up with the same expression
- // appearing multiple times.
- return VisitStmt(Arg, asc);
-}
-
-CFGBlock *CFGBuilder::VisitCXXDefaultInitExpr(CXXDefaultInitExpr *Init,
- AddStmtChoice asc) {
- if (Init->hasRewrittenInit()) {
- if (asc.alwaysAdd(*this, Init)) {
- autoCreateBlock();
- appendStmt(Block, Init);
- }
- return VisitStmt(Init->getExpr(), asc);
- }
-
- // We can't add the default initializer if it's not rewritten because multiple
- // CXXDefaultInitExprs for the same sub-expression to be used in the same
- // function (through aggregate initialization). we could end up with the same
- // expression appearing multiple times.
- return VisitStmt(Init, asc);
-}
-
CFGBlock *CFGBuilder::VisitInitListExpr(InitListExpr *ILE, AddStmtChoice asc) {
if (asc.alwaysAdd(*this, ILE)) {
autoCreateBlock();
diff --git a/clang/lib/Basic/Cuda.cpp b/clang/lib/Basic/Cuda.cpp
index e2609b9..1d96a92 100644
--- a/clang/lib/Basic/Cuda.cpp
+++ b/clang/lib/Basic/Cuda.cpp
@@ -144,6 +144,7 @@ static const CudaArchToStringMap arch_names[] = {
GFX(1103), // gfx1103
GFX(1150), // gfx1150
GFX(1151), // gfx1151
+ GFX(1152), // gfx1152
{CudaArch::GFX12_GENERIC, "gfx12-generic", "compute_amdgcn"},
GFX(1200), // gfx1200
GFX(1201), // gfx1201
diff --git a/clang/lib/Basic/Targets/LoongArch.h b/clang/lib/Basic/Targets/LoongArch.h
index 6857284..5fc2234 100644
--- a/clang/lib/Basic/Targets/LoongArch.h
+++ b/clang/lib/Basic/Targets/LoongArch.h
@@ -133,7 +133,7 @@ public:
LongWidth = LongAlign = PointerWidth = PointerAlign = 64;
IntMaxType = Int64Type = SignedLong;
HasUnalignedAccess = true;
- resetDataLayout("e-m:e-p:64:64-i64:64-i128:128-n64-S128");
+ resetDataLayout("e-m:e-p:64:64-i64:64-i128:128-n32:64-S128");
// TODO: select appropriate ABI.
setABI("lp64d");
}
diff --git a/clang/lib/Basic/Targets/NVPTX.cpp b/clang/lib/Basic/Targets/NVPTX.cpp
index fc6ef11..ff7d2f1 100644
--- a/clang/lib/Basic/Targets/NVPTX.cpp
+++ b/clang/lib/Basic/Targets/NVPTX.cpp
@@ -228,6 +228,7 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts,
case CudaArch::GFX1103:
case CudaArch::GFX1150:
case CudaArch::GFX1151:
+ case CudaArch::GFX1152:
case CudaArch::GFX12_GENERIC:
case CudaArch::GFX1200:
case CudaArch::GFX1201:
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
index 191bd75..6e9a1ba 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
@@ -3537,6 +3537,7 @@ void CGOpenMPRuntimeGPU::processRequiresDirective(
case CudaArch::GFX1103:
case CudaArch::GFX1150:
case CudaArch::GFX1151:
+ case CudaArch::GFX1152:
case CudaArch::GFX12_GENERIC:
case CudaArch::GFX1200:
case CudaArch::GFX1201:
diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp b/clang/lib/CodeGen/Targets/AMDGPU.cpp
index d1ff8b4..057f6ef 100644
--- a/clang/lib/CodeGen/Targets/AMDGPU.cpp
+++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp
@@ -120,7 +120,11 @@ void AMDGPUABIInfo::computeInfo(CGFunctionInfo &FI) const {
Address AMDGPUABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
QualType Ty) const {
- llvm_unreachable("AMDGPU does not support varargs");
+ const bool IsIndirect = false;
+ const bool AllowHigherAlign = false;
+ return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect,
+ getContext().getTypeInfoInChars(Ty),
+ CharUnits::fromQuantity(4), AllowHigherAlign);
}
ABIArgInfo AMDGPUABIInfo::classifyReturnType(QualType RetTy) const {
diff --git a/clang/lib/ExtractAPI/Serialization/SymbolGraphSerializer.cpp b/clang/lib/ExtractAPI/Serialization/SymbolGraphSerializer.cpp
index 08e711ca..6e56ee5 100644
--- a/clang/lib/ExtractAPI/Serialization/SymbolGraphSerializer.cpp
+++ b/clang/lib/ExtractAPI/Serialization/SymbolGraphSerializer.cpp
@@ -31,7 +31,6 @@
using namespace clang;
using namespace clang::extractapi;
using namespace llvm;
-using namespace llvm::json;
namespace {
@@ -1036,9 +1035,9 @@ void SymbolGraphSerializer::serializeGraphToStream(
ExtendedModule &&EM) {
Object Root = serializeGraph(ModuleName, std::move(EM));
if (Options.Compact)
- OS << formatv("{0}", Value(std::move(Root))) << "\n";
+ OS << formatv("{0}", json::Value(std::move(Root))) << "\n";
else
- OS << formatv("{0:2}", Value(std::move(Root))) << "\n";
+ OS << formatv("{0:2}", json::Value(std::move(Root))) << "\n";
}
void SymbolGraphSerializer::serializeMainSymbolGraph(
diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp
index d6061c2..eb96b54 100644
--- a/clang/lib/Format/UnwrappedLineParser.cpp
+++ b/clang/lib/Format/UnwrappedLineParser.cpp
@@ -1181,10 +1181,10 @@ void UnwrappedLineParser::parsePPDefine() {
Line->InMacroBody = true;
if (Style.SkipMacroDefinitionBody) {
- do {
+ while (!eof()) {
FormatTok->Finalized = true;
- nextToken();
- } while (!eof());
+ FormatTok = Tokens->getNextToken();
+ }
addUnwrappedLine();
return;
}
diff --git a/clang/lib/Frontend/FrontendActions.cpp b/clang/lib/Frontend/FrontendActions.cpp
index 1812b85..4f06432 100644
--- a/clang/lib/Frontend/FrontendActions.cpp
+++ b/clang/lib/Frontend/FrontendActions.cpp
@@ -1169,8 +1169,8 @@ void PrintDependencyDirectivesSourceMinimizerAction::ExecuteAction() {
llvm::SmallVector<dependency_directives_scan::Token, 16> Tokens;
llvm::SmallVector<dependency_directives_scan::Directive, 32> Directives;
if (scanSourceForDependencyDirectives(
- FromFile.getBuffer(), Tokens, Directives, CI.getLangOpts(),
- &CI.getDiagnostics(), SM.getLocForStartOfFile(SM.getMainFileID()))) {
+ FromFile.getBuffer(), Tokens, Directives, &CI.getDiagnostics(),
+ SM.getLocForStartOfFile(SM.getMainFileID()))) {
assert(CI.getDiagnostics().hasErrorOccurred() &&
"no errors reported for failure");
diff --git a/clang/lib/Interpreter/IncrementalParser.cpp b/clang/lib/Interpreter/IncrementalParser.cpp
index 5bc8385..a8d0294 100644
--- a/clang/lib/Interpreter/IncrementalParser.cpp
+++ b/clang/lib/Interpreter/IncrementalParser.cpp
@@ -413,7 +413,8 @@ void IncrementalParser::CleanUpPTU(PartialTranslationUnit &PTU) {
if (!ND)
continue;
// Check if we need to clean up the IdResolver chain.
- if (ND->getDeclName().getFETokenInfo())
+ if (ND->getDeclName().getFETokenInfo() && !D->getLangOpts().ObjC &&
+ !D->getLangOpts().CPlusPlus)
getCI()->getSema().IdResolver.RemoveDecl(ND);
}
}
diff --git a/clang/lib/Interpreter/Interpreter.cpp b/clang/lib/Interpreter/Interpreter.cpp
index 683f87e..7a95278 100644
--- a/clang/lib/Interpreter/Interpreter.cpp
+++ b/clang/lib/Interpreter/Interpreter.cpp
@@ -42,6 +42,9 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/TargetParser/Host.h"
+
+#include <cstdarg>
+
using namespace clang;
// FIXME: Figure out how to unify with namespace init_convenience from
@@ -270,14 +273,10 @@ Interpreter::~Interpreter() {
// can't find the precise resource directory in unittests so we have to hard
// code them.
const char *const Runtimes = R"(
+ #define __CLANG_REPL__ 1
#ifdef __cplusplus
+ #define EXTERN_C extern "C"
void *__clang_Interpreter_SetValueWithAlloc(void*, void*, void*);
- void __clang_Interpreter_SetValueNoAlloc(void*, void*, void*);
- void __clang_Interpreter_SetValueNoAlloc(void*, void*, void*, void*);
- void __clang_Interpreter_SetValueNoAlloc(void*, void*, void*, float);
- void __clang_Interpreter_SetValueNoAlloc(void*, void*, void*, double);
- void __clang_Interpreter_SetValueNoAlloc(void*, void*, void*, long double);
- void __clang_Interpreter_SetValueNoAlloc(void*,void*,void*,unsigned long long);
struct __clang_Interpreter_NewTag{} __ci_newtag;
void* operator new(__SIZE_TYPE__, void* __p, __clang_Interpreter_NewTag) noexcept;
template <class T, class = T (*)() /*disable for arrays*/>
@@ -289,7 +288,11 @@ const char *const Runtimes = R"(
void __clang_Interpreter_SetValueCopyArr(const T (*Src)[N], void* Placement, unsigned long Size) {
__clang_Interpreter_SetValueCopyArr(Src[0], Placement, Size);
}
+#else
+ #define EXTERN_C extern
#endif // __cplusplus
+
+ EXTERN_C void __clang_Interpreter_SetValueNoAlloc(void *This, void *OutVal, void *OpaqueType, ...);
)";
llvm::Expected<std::unique_ptr<Interpreter>>
@@ -588,15 +591,17 @@ std::unique_ptr<RuntimeInterfaceBuilder> Interpreter::FindRuntimeInterface() {
if (!LookupInterface(ValuePrintingInfo[NoAlloc],
MagicRuntimeInterface[NoAlloc]))
return nullptr;
- if (!LookupInterface(ValuePrintingInfo[WithAlloc],
- MagicRuntimeInterface[WithAlloc]))
- return nullptr;
- if (!LookupInterface(ValuePrintingInfo[CopyArray],
- MagicRuntimeInterface[CopyArray]))
- return nullptr;
- if (!LookupInterface(ValuePrintingInfo[NewTag],
- MagicRuntimeInterface[NewTag]))
- return nullptr;
+ if (Ctx.getLangOpts().CPlusPlus) {
+ if (!LookupInterface(ValuePrintingInfo[WithAlloc],
+ MagicRuntimeInterface[WithAlloc]))
+ return nullptr;
+ if (!LookupInterface(ValuePrintingInfo[CopyArray],
+ MagicRuntimeInterface[CopyArray]))
+ return nullptr;
+ if (!LookupInterface(ValuePrintingInfo[NewTag],
+ MagicRuntimeInterface[NewTag]))
+ return nullptr;
+ }
return createInProcessRuntimeInterfaceBuilder(*this, Ctx, S);
}
@@ -855,69 +860,81 @@ __clang_Interpreter_SetValueWithAlloc(void *This, void *OutVal,
return VRef.getPtr();
}
-// Pointers, lvalue struct that can take as a reference.
-REPL_EXTERNAL_VISIBILITY void
-__clang_Interpreter_SetValueNoAlloc(void *This, void *OutVal, void *OpaqueType,
- void *Val) {
+extern "C" void REPL_EXTERNAL_VISIBILITY __clang_Interpreter_SetValueNoAlloc(
+ void *This, void *OutVal, void *OpaqueType, ...) {
Value &VRef = *(Value *)OutVal;
- VRef = Value(static_cast<Interpreter *>(This), OpaqueType);
- VRef.setPtr(Val);
-}
+ Interpreter *I = static_cast<Interpreter *>(This);
+ VRef = Value(I, OpaqueType);
+ if (VRef.isVoid())
+ return;
-REPL_EXTERNAL_VISIBILITY void
-__clang_Interpreter_SetValueNoAlloc(void *This, void *OutVal,
- void *OpaqueType) {
- Value &VRef = *(Value *)OutVal;
- VRef = Value(static_cast<Interpreter *>(This), OpaqueType);
-}
+ va_list args;
+ va_start(args, /*last named param*/ OpaqueType);
-static void SetValueDataBasedOnQualType(Value &V, unsigned long long Data) {
- QualType QT = V.getType();
- if (const auto *ET = QT->getAs<EnumType>())
- QT = ET->getDecl()->getIntegerType();
-
- switch (QT->castAs<BuiltinType>()->getKind()) {
- default:
- llvm_unreachable("unknown type kind!");
-#define X(type, name) \
- case BuiltinType::name: \
- V.set##name(Data); \
- break;
- REPL_BUILTIN_TYPES
-#undef X
+ QualType QT = VRef.getType();
+ if (VRef.getKind() == Value::K_PtrOrObj) {
+ VRef.setPtr(va_arg(args, void *));
+ } else {
+ if (const auto *ET = QT->getAs<EnumType>())
+ QT = ET->getDecl()->getIntegerType();
+ switch (QT->castAs<BuiltinType>()->getKind()) {
+ default:
+ llvm_unreachable("unknown type kind!");
+ break;
+ // Types shorter than int are resolved as int, else va_arg has UB.
+ case BuiltinType::Bool:
+ VRef.setBool(va_arg(args, int));
+ break;
+ case BuiltinType::Char_S:
+ VRef.setChar_S(va_arg(args, int));
+ break;
+ case BuiltinType::SChar:
+ VRef.setSChar(va_arg(args, int));
+ break;
+ case BuiltinType::Char_U:
+ VRef.setChar_U(va_arg(args, unsigned));
+ break;
+ case BuiltinType::UChar:
+ VRef.setUChar(va_arg(args, unsigned));
+ break;
+ case BuiltinType::Short:
+ VRef.setShort(va_arg(args, int));
+ break;
+ case BuiltinType::UShort:
+ VRef.setUShort(va_arg(args, unsigned));
+ break;
+ case BuiltinType::Int:
+ VRef.setInt(va_arg(args, int));
+ break;
+ case BuiltinType::UInt:
+ VRef.setUInt(va_arg(args, unsigned));
+ break;
+ case BuiltinType::Long:
+ VRef.setLong(va_arg(args, long));
+ break;
+ case BuiltinType::ULong:
+ VRef.setULong(va_arg(args, unsigned long));
+ break;
+ case BuiltinType::LongLong:
+ VRef.setLongLong(va_arg(args, long long));
+ break;
+ case BuiltinType::ULongLong:
+ VRef.setULongLong(va_arg(args, unsigned long long));
+ break;
+ // Types shorter than double are resolved as double, else va_arg has UB.
+ case BuiltinType::Float:
+ VRef.setFloat(va_arg(args, double));
+ break;
+ case BuiltinType::Double:
+ VRef.setDouble(va_arg(args, double));
+ break;
+ case BuiltinType::LongDouble:
+ VRef.setLongDouble(va_arg(args, long double));
+ break;
+ // See REPL_BUILTIN_TYPES.
+ }
}
-}
-
-REPL_EXTERNAL_VISIBILITY void
-__clang_Interpreter_SetValueNoAlloc(void *This, void *OutVal, void *OpaqueType,
- unsigned long long Val) {
- Value &VRef = *(Value *)OutVal;
- VRef = Value(static_cast<Interpreter *>(This), OpaqueType);
- SetValueDataBasedOnQualType(VRef, Val);
-}
-
-REPL_EXTERNAL_VISIBILITY void
-__clang_Interpreter_SetValueNoAlloc(void *This, void *OutVal, void *OpaqueType,
- float Val) {
- Value &VRef = *(Value *)OutVal;
- VRef = Value(static_cast<Interpreter *>(This), OpaqueType);
- VRef.setFloat(Val);
-}
-
-REPL_EXTERNAL_VISIBILITY void
-__clang_Interpreter_SetValueNoAlloc(void *This, void *OutVal, void *OpaqueType,
- double Val) {
- Value &VRef = *(Value *)OutVal;
- VRef = Value(static_cast<Interpreter *>(This), OpaqueType);
- VRef.setDouble(Val);
-}
-
-REPL_EXTERNAL_VISIBILITY void
-__clang_Interpreter_SetValueNoAlloc(void *This, void *OutVal, void *OpaqueType,
- long double Val) {
- Value &VRef = *(Value *)OutVal;
- VRef = Value(static_cast<Interpreter *>(This), OpaqueType);
- VRef.setLongDouble(Val);
+ va_end(args);
}
// A trampoline to work around the fact that operator placement new cannot
diff --git a/clang/lib/Lex/DependencyDirectivesScanner.cpp b/clang/lib/Lex/DependencyDirectivesScanner.cpp
index fda54d3..0971daa 100644
--- a/clang/lib/Lex/DependencyDirectivesScanner.cpp
+++ b/clang/lib/Lex/DependencyDirectivesScanner.cpp
@@ -62,17 +62,14 @@ struct DirectiveWithTokens {
struct Scanner {
Scanner(StringRef Input,
SmallVectorImpl<dependency_directives_scan::Token> &Tokens,
- DiagnosticsEngine *Diags, SourceLocation InputSourceLoc,
- const LangOptions &LangOpts)
+ DiagnosticsEngine *Diags, SourceLocation InputSourceLoc)
: Input(Input), Tokens(Tokens), Diags(Diags),
- InputSourceLoc(InputSourceLoc),
- LangOpts(getLangOptsForDepScanning(LangOpts)),
- TheLexer(InputSourceLoc, this->LangOpts, Input.begin(), Input.begin(),
+ InputSourceLoc(InputSourceLoc), LangOpts(getLangOptsForDepScanning()),
+ TheLexer(InputSourceLoc, LangOpts, Input.begin(), Input.begin(),
Input.end()) {}
- static LangOptions
- getLangOptsForDepScanning(const LangOptions &invocationLangOpts) {
- LangOptions LangOpts(invocationLangOpts);
+ static LangOptions getLangOptsForDepScanning() {
+ LangOptions LangOpts;
// Set the lexer to use 'tok::at' for '@', instead of 'tok::unknown'.
LangOpts.ObjC = true;
LangOpts.LineComment = true;
@@ -703,7 +700,7 @@ bool Scanner::lex_Pragma(const char *&First, const char *const End) {
SmallVector<dependency_directives_scan::Token> DiscardTokens;
const char *Begin = Buffer.c_str();
Scanner PragmaScanner{StringRef(Begin, Buffer.size()), DiscardTokens, Diags,
- InputSourceLoc, LangOptions()};
+ InputSourceLoc};
PragmaScanner.TheLexer.setParsingPreprocessorDirective(true);
if (PragmaScanner.lexPragma(Begin, Buffer.end()))
@@ -953,10 +950,9 @@ bool Scanner::scan(SmallVectorImpl<Directive> &Directives) {
bool clang::scanSourceForDependencyDirectives(
StringRef Input, SmallVectorImpl<dependency_directives_scan::Token> &Tokens,
- SmallVectorImpl<Directive> &Directives, const LangOptions &LangOpts,
- DiagnosticsEngine *Diags, SourceLocation InputSourceLoc) {
- return Scanner(Input, Tokens, Diags, InputSourceLoc, LangOpts)
- .scan(Directives);
+ SmallVectorImpl<Directive> &Directives, DiagnosticsEngine *Diags,
+ SourceLocation InputSourceLoc) {
+ return Scanner(Input, Tokens, Diags, InputSourceLoc).scan(Directives);
}
void clang::printDependencyDirectivesAsSource(
diff --git a/clang/lib/Parse/ParseStmt.cpp b/clang/lib/Parse/ParseStmt.cpp
index c252032..16a5b74 100644
--- a/clang/lib/Parse/ParseStmt.cpp
+++ b/clang/lib/Parse/ParseStmt.cpp
@@ -571,11 +571,8 @@ StmtResult Parser::ParseExprStatement(ParsedStmtContext StmtCtx) {
}
Token *CurTok = nullptr;
- // If the semicolon is missing at the end of REPL input, consider if
- // we want to do value printing. Note this is only enabled in C++ mode
- // since part of the implementation requires C++ language features.
// Note we shouldn't eat the token since the callback needs it.
- if (Tok.is(tok::annot_repl_input_end) && Actions.getLangOpts().CPlusPlus)
+ if (Tok.is(tok::annot_repl_input_end))
CurTok = &Tok;
else
// Otherwise, eat the semicolon.
diff --git a/clang/lib/Sema/Scope.cpp b/clang/lib/Sema/Scope.cpp
index c08073e..5bc7e79 100644
--- a/clang/lib/Sema/Scope.cpp
+++ b/clang/lib/Sema/Scope.cpp
@@ -228,7 +228,11 @@ void Scope::dumpImpl(raw_ostream &OS) const {
{CompoundStmtScope, "CompoundStmtScope"},
{ClassInheritanceScope, "ClassInheritanceScope"},
{CatchScope, "CatchScope"},
+ {ConditionVarScope, "ConditionVarScope"},
+ {OpenMPOrderClauseScope, "OpenMPOrderClauseScope"},
+ {LambdaScope, "LambdaScope"},
{OpenACCComputeConstructScope, "OpenACCComputeConstructScope"},
+ {TypeAliasScope, "TypeAliasScope"},
{FriendScope, "FriendScope"},
};
diff --git a/clang/lib/Sema/SemaAMDGPU.cpp b/clang/lib/Sema/SemaAMDGPU.cpp
index c446cc1..d11bc9e 100644
--- a/clang/lib/Sema/SemaAMDGPU.cpp
+++ b/clang/lib/Sema/SemaAMDGPU.cpp
@@ -31,9 +31,9 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID,
constexpr const int SizeIdx = 2;
llvm::APSInt Size;
Expr *ArgExpr = TheCall->getArg(SizeIdx);
- ExprResult R = SemaRef.VerifyIntegerConstantExpression(ArgExpr, &Size);
- if (R.isInvalid())
- return true;
+ [[maybe_unused]] ExprResult R =
+ SemaRef.VerifyIntegerConstantExpression(ArgExpr, &Size);
+ assert(!R.isInvalid());
switch (Size.getSExtValue()) {
case 1:
case 2:
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index a6734ef..4b9b735 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -2288,7 +2288,8 @@ void Sema::ActOnPopScope(SourceLocation Loc, Scope *S) {
// Partial translation units that are created in incremental processing must
// not clean up the IdResolver because PTUs should take into account the
// declarations that came from previous PTUs.
- if (!PP.isIncrementalProcessingEnabled() || getLangOpts().ObjC)
+ if (!PP.isIncrementalProcessingEnabled() || getLangOpts().ObjC ||
+ getLangOpts().CPlusPlus)
IdResolver.RemoveDecl(D);
// Warn on it if we are shadowing a declaration.
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index fb5ca19..76145f2 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -5572,9 +5572,10 @@ ExprResult Sema::BuildCXXDefaultArgExpr(SourceLocation CallLoc,
Res = Immediate.TransformInitializer(Param->getInit(),
/*NotCopy=*/false);
});
- if (Res.isUsable())
- Res = ConvertParamDefaultArgument(Param, Res.get(),
- Res.get()->getBeginLoc());
+ if (Res.isInvalid())
+ return ExprError();
+ Res = ConvertParamDefaultArgument(Param, Res.get(),
+ Res.get()->getBeginLoc());
if (Res.isInvalid())
return ExprError();
Init = Res.get();
@@ -5608,10 +5609,9 @@ ExprResult Sema::BuildCXXDefaultInitExpr(SourceLocation Loc, FieldDecl *Field) {
InitializationContext.emplace(Loc, Field, CurContext);
Expr *Init = nullptr;
- bool HasRewrittenInit = false;
bool NestedDefaultChecking = isCheckingDefaultArgumentOrInitializer();
- bool InLifetimeExtendingContext = isInLifetimeExtendingContext();
+
EnterExpressionEvaluationContext EvalContext(
*this, ExpressionEvaluationContext::PotentiallyEvaluated, Field);
@@ -5646,36 +5646,19 @@ ExprResult Sema::BuildCXXDefaultInitExpr(SourceLocation Loc, FieldDecl *Field) {
ImmediateCallVisitor V(getASTContext());
if (!NestedDefaultChecking)
V.TraverseDecl(Field);
-
- // CWG1815
- // Support lifetime extension of temporary created by aggregate
- // initialization using a default member initializer. We should always rebuild
- // the initializer if it contains any temporaries (if the initializer
- // expression is an ExprWithCleanups). Then make sure the normal lifetime
- // extension code recurses into the default initializer and does lifetime
- // extension when warranted.
- bool ContainsAnyTemporaries =
- isa_and_present<ExprWithCleanups>(Field->getInClassInitializer());
- if (V.HasImmediateCalls || InLifetimeExtendingContext ||
- ContainsAnyTemporaries) {
- HasRewrittenInit = true;
+ if (V.HasImmediateCalls) {
ExprEvalContexts.back().DelayedDefaultInitializationContext = {Loc, Field,
CurContext};
ExprEvalContexts.back().IsCurrentlyCheckingDefaultArgumentOrInitializer =
NestedDefaultChecking;
- // Pass down lifetime extending flag, and collect temporaries in
- // CreateMaterializeTemporaryExpr when we rewrite the call argument.
- keepInLifetimeExtendingContext();
+
EnsureImmediateInvocationInDefaultArgs Immediate(*this);
ExprResult Res;
-
- // Rebuild CXXDefaultInitExpr might cause diagnostics.
- SFINAETrap Trap(*this);
runWithSufficientStackSpace(Loc, [&] {
Res = Immediate.TransformInitializer(Field->getInClassInitializer(),
/*CXXDirectInit=*/false);
});
- if (Res.isUsable())
+ if (!Res.isInvalid())
Res = ConvertMemberDefaultInitExpression(Field, Res.get(), Loc);
if (Res.isInvalid()) {
Field->setInvalidDecl();
@@ -5702,7 +5685,7 @@ ExprResult Sema::BuildCXXDefaultInitExpr(SourceLocation Loc, FieldDecl *Field) {
return CXXDefaultInitExpr::Create(Context, InitializationContext->Loc,
Field, InitializationContext->Context,
- HasRewrittenInit ? Init : nullptr);
+ Init);
}
// DR1351:
diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp
index 4487c61..cf461a6 100644
--- a/clang/lib/Sema/SemaExprCXX.cpp
+++ b/clang/lib/Sema/SemaExprCXX.cpp
@@ -1555,6 +1555,9 @@ Sema::BuildCXXTypeConstructExpr(TypeSourceInfo *TInfo,
bool ListInitialization) {
QualType Ty = TInfo->getType();
SourceLocation TyBeginLoc = TInfo->getTypeLoc().getBeginLoc();
+
+ assert((!ListInitialization || Exprs.size() == 1) &&
+ "List initialization must have exactly one expression.");
SourceRange FullRange = SourceRange(TyBeginLoc, RParenOrBraceLoc);
InitializedEntity Entity =
@@ -5126,6 +5129,7 @@ static bool CheckUnaryTypeTraitTypeCompleteness(Sema &S, TypeTrait UTT,
case UTT_IsStandardLayout:
case UTT_IsPOD:
case UTT_IsLiteral:
+ case UTT_IsBitwiseCloneable:
// By analogy, is_trivially_relocatable and is_trivially_equality_comparable
// impose the same constraints.
case UTT_IsTriviallyRelocatable:
@@ -5619,6 +5623,8 @@ static bool EvaluateUnaryTypeTrait(Sema &Self, TypeTrait UTT,
return C.hasUniqueObjectRepresentations(T);
case UTT_IsTriviallyRelocatable:
return T.isTriviallyRelocatableType(C);
+ case UTT_IsBitwiseCloneable:
+ return T.isBitwiseCloneableType(C);
case UTT_IsReferenceable:
return T.isReferenceable();
case UTT_CanPassInRegs:
diff --git a/clang/lib/Sema/SemaInit.cpp b/clang/lib/Sema/SemaInit.cpp
index 9ed3e8a..ed8b226 100644
--- a/clang/lib/Sema/SemaInit.cpp
+++ b/clang/lib/Sema/SemaInit.cpp
@@ -8063,6 +8063,11 @@ static void visitLocalsRetainedByInitializer(IndirectLocalPath &Path,
enum PathLifetimeKind {
/// Lifetime-extend along this path.
Extend,
+ /// We should lifetime-extend, but we don't because (due to technical
+ /// limitations) we can't. This happens for default member initializers,
+ /// which we don't clone for every use, so we don't have a unique
+ /// MaterializeTemporaryExpr to update.
+ ShouldExtend,
/// Do not lifetime extend along this path.
NoExtend
};
@@ -8074,7 +8079,7 @@ shouldLifetimeExtendThroughPath(const IndirectLocalPath &Path) {
PathLifetimeKind Kind = PathLifetimeKind::Extend;
for (auto Elem : Path) {
if (Elem.Kind == IndirectLocalPathEntry::DefaultInit)
- Kind = PathLifetimeKind::Extend;
+ Kind = PathLifetimeKind::ShouldExtend;
else if (Elem.Kind != IndirectLocalPathEntry::LambdaCaptureInit)
return PathLifetimeKind::NoExtend;
}
@@ -8194,6 +8199,18 @@ void Sema::checkInitializerLifetime(const InitializedEntity &Entity,
ExtendingEntity->allocateManglingNumber());
// Also visit the temporaries lifetime-extended by this initializer.
return true;
+
+ case PathLifetimeKind::ShouldExtend:
+ // We're supposed to lifetime-extend the temporary along this path (per
+ // the resolution of DR1815), but we don't support that yet.
+ //
+ // FIXME: Properly handle this situation. Perhaps the easiest approach
+ // would be to clone the initializer expression on each use that would
+ // lifetime extend its temporaries.
+ Diag(DiagLoc, diag::warn_unsupported_lifetime_extension)
+ << RK << DiagRange;
+ break;
+
case PathLifetimeKind::NoExtend:
// If the path goes through the initialization of a variable or field,
// it can't possibly reach a temporary created in this full-expression.
diff --git a/clang/lib/Sema/SemaOpenACC.cpp b/clang/lib/Sema/SemaOpenACC.cpp
index cdb60d4..97586a0 100644
--- a/clang/lib/Sema/SemaOpenACC.cpp
+++ b/clang/lib/Sema/SemaOpenACC.cpp
@@ -424,615 +424,736 @@ bool checkValidAfterDeviceType(
S.Diag(DeviceTypeClause.getBeginLoc(), diag::note_acc_previous_clause_here);
return true;
}
-} // namespace
-SemaOpenACC::SemaOpenACC(Sema &S) : SemaBase(S) {}
+class SemaOpenACCClauseVisitor {
+ SemaOpenACC &SemaRef;
+ ASTContext &Ctx;
+ ArrayRef<const OpenACCClause *> ExistingClauses;
+ bool NotImplemented = false;
-SemaOpenACC::AssociatedStmtRAII::AssociatedStmtRAII(SemaOpenACC &S,
- OpenACCDirectiveKind DK)
- : SemaRef(S), WasInsideComputeConstruct(S.InsideComputeConstruct),
- DirKind(DK) {
- // Compute constructs end up taking their 'loop'.
- if (DirKind == OpenACCDirectiveKind::Parallel ||
- DirKind == OpenACCDirectiveKind::Serial ||
- DirKind == OpenACCDirectiveKind::Kernels) {
- SemaRef.InsideComputeConstruct = true;
- SemaRef.ParentlessLoopConstructs.swap(ParentlessLoopConstructs);
+ OpenACCClause *isNotImplemented() {
+ NotImplemented = true;
+ return nullptr;
}
-}
-SemaOpenACC::AssociatedStmtRAII::~AssociatedStmtRAII() {
- SemaRef.InsideComputeConstruct = WasInsideComputeConstruct;
- if (DirKind == OpenACCDirectiveKind::Parallel ||
- DirKind == OpenACCDirectiveKind::Serial ||
- DirKind == OpenACCDirectiveKind::Kernels) {
- assert(SemaRef.ParentlessLoopConstructs.empty() &&
- "Didn't consume loop construct list?");
- SemaRef.ParentlessLoopConstructs.swap(ParentlessLoopConstructs);
- }
-}
+public:
+ SemaOpenACCClauseVisitor(SemaOpenACC &S,
+ ArrayRef<const OpenACCClause *> ExistingClauses)
+ : SemaRef(S), Ctx(S.getASTContext()), ExistingClauses(ExistingClauses) {}
+ // Once we've implemented everything, we shouldn't need this infrastructure.
+ // But in the meantime, we use this to help decide whether the clause was
+ // handled for this directive.
+ bool diagNotImplemented() { return NotImplemented; }
+
+ OpenACCClause *Visit(SemaOpenACC::OpenACCParsedClause &Clause) {
+ switch (Clause.getClauseKind()) {
+ case OpenACCClauseKind::Gang:
+ case OpenACCClauseKind::Worker:
+ case OpenACCClauseKind::Vector: {
+ // TODO OpenACC: These are only implemented enough for the 'seq' diagnostic,
+ // otherwise treats itself as unimplemented. When we implement these, we
+ // can remove them from here.
-OpenACCClause *
-SemaOpenACC::ActOnClause(ArrayRef<const OpenACCClause *> ExistingClauses,
- OpenACCParsedClause &Clause) {
- if (Clause.getClauseKind() == OpenACCClauseKind::Invalid)
- return nullptr;
+ // OpenACC 3.3 2.9:
+ // A 'gang', 'worker', or 'vector' clause may not appear if a 'seq' clause
+ // appears.
+ const auto *Itr =
+ llvm::find_if(ExistingClauses, llvm::IsaPred<OpenACCSeqClause>);
- // Diagnose that we don't support this clause on this directive.
- if (!doesClauseApplyToDirective(Clause.getDirectiveKind(),
- Clause.getClauseKind())) {
- Diag(Clause.getBeginLoc(), diag::err_acc_clause_appertainment)
- << Clause.getDirectiveKind() << Clause.getClauseKind();
- return nullptr;
+ if (Itr != ExistingClauses.end()) {
+ SemaRef.Diag(Clause.getBeginLoc(), diag::err_acc_clause_cannot_combine)
+ << Clause.getClauseKind() << (*Itr)->getClauseKind();
+ SemaRef.Diag((*Itr)->getBeginLoc(), diag::note_acc_previous_clause_here);
+ }
+ return isNotImplemented();
}
- if (const auto *DevTypeClause =
- llvm::find_if(ExistingClauses,
- [&](const OpenACCClause *C) {
- return isa<OpenACCDeviceTypeClause>(C);
- });
- DevTypeClause != ExistingClauses.end()) {
- if (checkValidAfterDeviceType(
- *this, *cast<OpenACCDeviceTypeClause>(*DevTypeClause), Clause))
- return nullptr;
+#define VISIT_CLAUSE(CLAUSE_NAME) \
+ case OpenACCClauseKind::CLAUSE_NAME: \
+ return Visit##CLAUSE_NAME##Clause(Clause);
+#define CLAUSE_ALIAS(ALIAS, CLAUSE_NAME, DEPRECATED) \
+ case OpenACCClauseKind::ALIAS: \
+ if (DEPRECATED) \
+ SemaRef.Diag(Clause.getBeginLoc(), diag::warn_acc_deprecated_alias_name) \
+ << Clause.getClauseKind() << OpenACCClauseKind::CLAUSE_NAME; \
+ return Visit##CLAUSE_NAME##Clause(Clause);
+#include "clang/Basic/OpenACCClauses.def"
+ default:
+ return isNotImplemented();
+ }
+ llvm_unreachable("Invalid clause kind");
}
- switch (Clause.getClauseKind()) {
- case OpenACCClauseKind::Default: {
- // Restrictions only properly implemented on 'compute' constructs, and
- // 'compute' constructs are the only construct that can do anything with
- // this yet, so skip/treat as unimplemented in this case.
- if (!isOpenACCComputeDirectiveKind(Clause.getDirectiveKind()))
- break;
+#define VISIT_CLAUSE(CLAUSE_NAME) \
+ OpenACCClause *Visit##CLAUSE_NAME##Clause( \
+ SemaOpenACC::OpenACCParsedClause &Clause);
+#include "clang/Basic/OpenACCClauses.def"
+};
- // Don't add an invalid clause to the AST.
- if (Clause.getDefaultClauseKind() == OpenACCDefaultClauseKind::Invalid)
- return nullptr;
-
- // OpenACC 3.3, Section 2.5.4:
- // At most one 'default' clause may appear, and it must have a value of
- // either 'none' or 'present'.
- // Second half of the sentence is diagnosed during parsing.
- if (checkAlreadyHasClauseOfKind(*this, ExistingClauses, Clause))
- return nullptr;
-
- return OpenACCDefaultClause::Create(
- getASTContext(), Clause.getDefaultClauseKind(), Clause.getBeginLoc(),
- Clause.getLParenLoc(), Clause.getEndLoc());
- }
+OpenACCClause *SemaOpenACCClauseVisitor::VisitDefaultClause(
+ SemaOpenACC::OpenACCParsedClause &Clause) {
+ // Restrictions only properly implemented on 'compute' constructs, and
+ // 'compute' constructs are the only construct that can do anything with
+ // this yet, so skip/treat as unimplemented in this case.
+ if (!isOpenACCComputeDirectiveKind(Clause.getDirectiveKind()))
+ return isNotImplemented();
+
+ // Don't add an invalid clause to the AST.
+ if (Clause.getDefaultClauseKind() == OpenACCDefaultClauseKind::Invalid)
+ return nullptr;
- case OpenACCClauseKind::If: {
- // Restrictions only properly implemented on 'compute' constructs, and
- // 'compute' constructs are the only construct that can do anything with
- // this yet, so skip/treat as unimplemented in this case.
- if (!isOpenACCComputeDirectiveKind(Clause.getDirectiveKind()))
- break;
+ // OpenACC 3.3, Section 2.5.4:
+ // At most one 'default' clause may appear, and it must have a value of
+ // either 'none' or 'present'.
+ // Second half of the sentence is diagnosed during parsing.
+ if (checkAlreadyHasClauseOfKind(SemaRef, ExistingClauses, Clause))
+ return nullptr;
- // There is no prose in the standard that says duplicates aren't allowed,
- // but this diagnostic is present in other compilers, as well as makes
- // sense.
- if (checkAlreadyHasClauseOfKind(*this, ExistingClauses, Clause))
- return nullptr;
+ return OpenACCDefaultClause::Create(
+ Ctx, Clause.getDefaultClauseKind(), Clause.getBeginLoc(),
+ Clause.getLParenLoc(), Clause.getEndLoc());
+}
- // The parser has ensured that we have a proper condition expr, so there
- // isn't really much to do here.
+OpenACCClause *SemaOpenACCClauseVisitor::VisitIfClause(
+ SemaOpenACC::OpenACCParsedClause &Clause) {
+ // Restrictions only properly implemented on 'compute' constructs, and
+ // 'compute' constructs are the only construct that can do anything with
+ // this yet, so skip/treat as unimplemented in this case.
+ if (!isOpenACCComputeDirectiveKind(Clause.getDirectiveKind()))
+ return isNotImplemented();
+
+ // There is no prose in the standard that says duplicates aren't allowed,
+ // but this diagnostic is present in other compilers, as well as makes
+ // sense.
+ if (checkAlreadyHasClauseOfKind(SemaRef, ExistingClauses, Clause))
+ return nullptr;
- // If the 'if' clause is true, it makes the 'self' clause have no effect,
- // diagnose that here.
- // TODO OpenACC: When we add these two to other constructs, we might not
- // want to warn on this (for example, 'update').
- const auto *Itr =
- llvm::find_if(ExistingClauses, llvm::IsaPred<OpenACCSelfClause>);
- if (Itr != ExistingClauses.end()) {
- Diag(Clause.getBeginLoc(), diag::warn_acc_if_self_conflict);
- Diag((*Itr)->getBeginLoc(), diag::note_acc_previous_clause_here);
- }
+ // The parser has ensured that we have a proper condition expr, so there
+ // isn't really much to do here.
- return OpenACCIfClause::Create(
- getASTContext(), Clause.getBeginLoc(), Clause.getLParenLoc(),
- Clause.getConditionExpr(), Clause.getEndLoc());
+ // If the 'if' clause is true, it makes the 'self' clause have no effect,
+ // diagnose that here.
+ // TODO OpenACC: When we add these two to other constructs, we might not
+ // want to warn on this (for example, 'update').
+ const auto *Itr =
+ llvm::find_if(ExistingClauses, llvm::IsaPred<OpenACCSelfClause>);
+ if (Itr != ExistingClauses.end()) {
+ SemaRef.Diag(Clause.getBeginLoc(), diag::warn_acc_if_self_conflict);
+ SemaRef.Diag((*Itr)->getBeginLoc(), diag::note_acc_previous_clause_here);
}
- case OpenACCClauseKind::Self: {
- // Restrictions only properly implemented on 'compute' constructs, and
- // 'compute' constructs are the only construct that can do anything with
- // this yet, so skip/treat as unimplemented in this case.
- if (!isOpenACCComputeDirectiveKind(Clause.getDirectiveKind()))
- break;
-
- // TODO OpenACC: When we implement this for 'update', this takes a
- // 'var-list' instead of a condition expression, so semantics/handling has
- // to happen differently here.
-
- // There is no prose in the standard that says duplicates aren't allowed,
- // but this diagnostic is present in other compilers, as well as makes
- // sense.
- if (checkAlreadyHasClauseOfKind(*this, ExistingClauses, Clause))
- return nullptr;
+ return OpenACCIfClause::Create(Ctx, Clause.getBeginLoc(),
+ Clause.getLParenLoc(),
+ Clause.getConditionExpr(), Clause.getEndLoc());
+}
- // If the 'if' clause is true, it makes the 'self' clause have no effect,
- // diagnose that here.
- // TODO OpenACC: When we add these two to other constructs, we might not
- // want to warn on this (for example, 'update').
- const auto *Itr =
- llvm::find_if(ExistingClauses, llvm::IsaPred<OpenACCIfClause>);
- if (Itr != ExistingClauses.end()) {
- Diag(Clause.getBeginLoc(), diag::warn_acc_if_self_conflict);
- Diag((*Itr)->getBeginLoc(), diag::note_acc_previous_clause_here);
- }
+OpenACCClause *SemaOpenACCClauseVisitor::VisitSelfClause(
+ SemaOpenACC::OpenACCParsedClause &Clause) {
+ // Restrictions only properly implemented on 'compute' constructs, and
+ // 'compute' constructs are the only construct that can do anything with
+ // this yet, so skip/treat as unimplemented in this case.
+ if (!isOpenACCComputeDirectiveKind(Clause.getDirectiveKind()))
+ return isNotImplemented();
+
+ // TODO OpenACC: When we implement this for 'update', this takes a
+ // 'var-list' instead of a condition expression, so semantics/handling has
+ // to happen differently here.
+
+ // There is no prose in the standard that says duplicates aren't allowed,
+ // but this diagnostic is present in other compilers, as well as makes
+ // sense.
+ if (checkAlreadyHasClauseOfKind(SemaRef, ExistingClauses, Clause))
+ return nullptr;
- return OpenACCSelfClause::Create(
- getASTContext(), Clause.getBeginLoc(), Clause.getLParenLoc(),
- Clause.getConditionExpr(), Clause.getEndLoc());
+ // If the 'if' clause is true, it makes the 'self' clause have no effect,
+ // diagnose that here.
+ // TODO OpenACC: When we add these two to other constructs, we might not
+ // want to warn on this (for example, 'update').
+ const auto *Itr =
+ llvm::find_if(ExistingClauses, llvm::IsaPred<OpenACCIfClause>);
+ if (Itr != ExistingClauses.end()) {
+ SemaRef.Diag(Clause.getBeginLoc(), diag::warn_acc_if_self_conflict);
+ SemaRef.Diag((*Itr)->getBeginLoc(), diag::note_acc_previous_clause_here);
}
- case OpenACCClauseKind::NumGangs: {
- // Restrictions only properly implemented on 'compute' constructs, and
- // 'compute' constructs are the only construct that can do anything with
- // this yet, so skip/treat as unimplemented in this case.
- if (!isOpenACCComputeDirectiveKind(Clause.getDirectiveKind()))
- break;
+ return OpenACCSelfClause::Create(
+ Ctx, Clause.getBeginLoc(), Clause.getLParenLoc(),
+ Clause.getConditionExpr(), Clause.getEndLoc());
+}
- // There is no prose in the standard that says duplicates aren't allowed,
- // but this diagnostic is present in other compilers, as well as makes
- // sense.
- if (checkAlreadyHasClauseOfKind(*this, ExistingClauses, Clause))
- return nullptr;
+OpenACCClause *SemaOpenACCClauseVisitor::VisitNumGangsClause(
+ SemaOpenACC::OpenACCParsedClause &Clause) {
+ // Restrictions only properly implemented on 'compute' constructs, and
+ // 'compute' constructs are the only construct that can do anything with
+ // this yet, so skip/treat as unimplemented in this case.
+ if (!isOpenACCComputeDirectiveKind(Clause.getDirectiveKind()))
+ return isNotImplemented();
+
+ // There is no prose in the standard that says duplicates aren't allowed,
+ // but this diagnostic is present in other compilers, as well as makes
+ // sense.
+ if (checkAlreadyHasClauseOfKind(SemaRef, ExistingClauses, Clause))
+ return nullptr;
- if (Clause.getIntExprs().empty())
- Diag(Clause.getBeginLoc(), diag::err_acc_num_gangs_num_args)
- << /*NoArgs=*/0;
-
- unsigned MaxArgs =
- (Clause.getDirectiveKind() == OpenACCDirectiveKind::Parallel ||
- Clause.getDirectiveKind() == OpenACCDirectiveKind::ParallelLoop)
- ? 3
- : 1;
- if (Clause.getIntExprs().size() > MaxArgs)
- Diag(Clause.getBeginLoc(), diag::err_acc_num_gangs_num_args)
- << /*NoArgs=*/1 << Clause.getDirectiveKind() << MaxArgs
+ // num_gangs requires at least 1 int expr in all forms. Diagnose here, but
+ // allow us to continue, an empty clause might be useful for future
+ // diagnostics.
+ if (Clause.getIntExprs().empty())
+ SemaRef.Diag(Clause.getBeginLoc(), diag::err_acc_num_gangs_num_args)
+ << /*NoArgs=*/0;
+
+ unsigned MaxArgs =
+ (Clause.getDirectiveKind() == OpenACCDirectiveKind::Parallel ||
+ Clause.getDirectiveKind() == OpenACCDirectiveKind::ParallelLoop)
+ ? 3
+ : 1;
+ // The max number of args differs between parallel and other constructs.
+ // Again, allow us to continue for the purposes of future diagnostics.
+ if (Clause.getIntExprs().size() > MaxArgs)
+ SemaRef.Diag(Clause.getBeginLoc(), diag::err_acc_num_gangs_num_args)
+ << /*NoArgs=*/1 << Clause.getDirectiveKind() << MaxArgs
+ << Clause.getIntExprs().size();
+
+ // OpenACC 3.3 Section 2.5.4:
+ // A reduction clause may not appear on a parallel construct with a
+ // num_gangs clause that has more than one argument.
+ if (Clause.getDirectiveKind() == OpenACCDirectiveKind::Parallel &&
+ Clause.getIntExprs().size() > 1) {
+ auto *Parallel =
+ llvm::find_if(ExistingClauses, llvm::IsaPred<OpenACCReductionClause>);
+
+ if (Parallel != ExistingClauses.end()) {
+ SemaRef.Diag(Clause.getBeginLoc(),
+ diag::err_acc_reduction_num_gangs_conflict)
<< Clause.getIntExprs().size();
-
- // OpenACC 3.3 Section 2.5.4:
- // A reduction clause may not appear on a parallel construct with a
- // num_gangs clause that has more than one argument.
- if (Clause.getDirectiveKind() == OpenACCDirectiveKind::Parallel &&
- Clause.getIntExprs().size() > 1) {
- auto *Parallel =
- llvm::find_if(ExistingClauses, llvm::IsaPred<OpenACCReductionClause>);
-
- if (Parallel != ExistingClauses.end()) {
- Diag(Clause.getBeginLoc(), diag::err_acc_reduction_num_gangs_conflict)
- << Clause.getIntExprs().size();
- Diag((*Parallel)->getBeginLoc(), diag::note_acc_previous_clause_here);
- return nullptr;
- }
+ SemaRef.Diag((*Parallel)->getBeginLoc(),
+ diag::note_acc_previous_clause_here);
+ return nullptr;
}
-
- // Create the AST node for the clause even if the number of expressions is
- // incorrect.
- return OpenACCNumGangsClause::Create(
- getASTContext(), Clause.getBeginLoc(), Clause.getLParenLoc(),
- Clause.getIntExprs(), Clause.getEndLoc());
- break;
}
- case OpenACCClauseKind::NumWorkers: {
- // Restrictions only properly implemented on 'compute' constructs, and
- // 'compute' constructs are the only construct that can do anything with
- // this yet, so skip/treat as unimplemented in this case.
- if (!isOpenACCComputeDirectiveKind(Clause.getDirectiveKind()))
- break;
+ return OpenACCNumGangsClause::Create(
+ Ctx, Clause.getBeginLoc(), Clause.getLParenLoc(), Clause.getIntExprs(),
+ Clause.getEndLoc());
+}
- // There is no prose in the standard that says duplicates aren't allowed,
- // but this diagnostic is present in other compilers, as well as makes
- // sense.
- if (checkAlreadyHasClauseOfKind(*this, ExistingClauses, Clause))
- return nullptr;
+OpenACCClause *SemaOpenACCClauseVisitor::VisitNumWorkersClause(
+ SemaOpenACC::OpenACCParsedClause &Clause) {
+ // Restrictions only properly implemented on 'compute' constructs, and
+ // 'compute' constructs are the only construct that can do anything with
+ // this yet, so skip/treat as unimplemented in this case.
+ if (!isOpenACCComputeDirectiveKind(Clause.getDirectiveKind()))
+ return isNotImplemented();
+
+ // There is no prose in the standard that says duplicates aren't allowed,
+ // but this diagnostic is present in other compilers, as well as makes
+ // sense.
+ if (checkAlreadyHasClauseOfKind(SemaRef, ExistingClauses, Clause))
+ return nullptr;
- assert(Clause.getIntExprs().size() == 1 &&
- "Invalid number of expressions for NumWorkers");
- return OpenACCNumWorkersClause::Create(
- getASTContext(), Clause.getBeginLoc(), Clause.getLParenLoc(),
- Clause.getIntExprs()[0], Clause.getEndLoc());
- }
- case OpenACCClauseKind::VectorLength: {
- // Restrictions only properly implemented on 'compute' constructs, and
- // 'compute' constructs are the only construct that can do anything with
- // this yet, so skip/treat as unimplemented in this case.
- if (!isOpenACCComputeDirectiveKind(Clause.getDirectiveKind()))
- break;
+ assert(Clause.getIntExprs().size() == 1 &&
+ "Invalid number of expressions for NumWorkers");
+ return OpenACCNumWorkersClause::Create(
+ Ctx, Clause.getBeginLoc(), Clause.getLParenLoc(), Clause.getIntExprs()[0],
+ Clause.getEndLoc());
+}
- // There is no prose in the standard that says duplicates aren't allowed,
- // but this diagnostic is present in other compilers, as well as makes
- // sense.
- if (checkAlreadyHasClauseOfKind(*this, ExistingClauses, Clause))
- return nullptr;
+OpenACCClause *SemaOpenACCClauseVisitor::VisitVectorLengthClause(
+ SemaOpenACC::OpenACCParsedClause &Clause) {
+ // Restrictions only properly implemented on 'compute' constructs, and
+ // 'compute' constructs are the only construct that can do anything with
+ // this yet, so skip/treat as unimplemented in this case.
+ if (!isOpenACCComputeDirectiveKind(Clause.getDirectiveKind()))
+ return isNotImplemented();
+
+ // There is no prose in the standard that says duplicates aren't allowed,
+ // but this diagnostic is present in other compilers, as well as makes
+ // sense.
+ if (checkAlreadyHasClauseOfKind(SemaRef, ExistingClauses, Clause))
+ return nullptr;
- assert(Clause.getIntExprs().size() == 1 &&
- "Invalid number of expressions for VectorLength");
- return OpenACCVectorLengthClause::Create(
- getASTContext(), Clause.getBeginLoc(), Clause.getLParenLoc(),
- Clause.getIntExprs()[0], Clause.getEndLoc());
- }
- case OpenACCClauseKind::Async: {
- // Restrictions only properly implemented on 'compute' constructs, and
- // 'compute' constructs are the only construct that can do anything with
- // this yet, so skip/treat as unimplemented in this case.
- if (!isOpenACCComputeDirectiveKind(Clause.getDirectiveKind()))
- break;
+ assert(Clause.getIntExprs().size() == 1 &&
+ "Invalid number of expressions for NumWorkers");
+ return OpenACCVectorLengthClause::Create(
+ Ctx, Clause.getBeginLoc(), Clause.getLParenLoc(), Clause.getIntExprs()[0],
+ Clause.getEndLoc());
+}
- // There is no prose in the standard that says duplicates aren't allowed,
- // but this diagnostic is present in other compilers, as well as makes
- // sense.
- if (checkAlreadyHasClauseOfKind(*this, ExistingClauses, Clause))
- return nullptr;
+OpenACCClause *SemaOpenACCClauseVisitor::VisitAsyncClause(
+ SemaOpenACC::OpenACCParsedClause &Clause) {
+ // Restrictions only properly implemented on 'compute' constructs, and
+ // 'compute' constructs are the only construct that can do anything with
+ // this yet, so skip/treat as unimplemented in this case.
+ if (!isOpenACCComputeDirectiveKind(Clause.getDirectiveKind()))
+ return isNotImplemented();
+
+ // There is no prose in the standard that says duplicates aren't allowed,
+ // but this diagnostic is present in other compilers, as well as makes
+ // sense.
+ if (checkAlreadyHasClauseOfKind(SemaRef, ExistingClauses, Clause))
+ return nullptr;
- assert(Clause.getNumIntExprs() < 2 &&
- "Invalid number of expressions for Async");
+ assert(Clause.getNumIntExprs() < 2 &&
+ "Invalid number of expressions for Async");
+ return OpenACCAsyncClause::Create(
+ Ctx, Clause.getBeginLoc(), Clause.getLParenLoc(),
+ Clause.getNumIntExprs() != 0 ? Clause.getIntExprs()[0] : nullptr,
+ Clause.getEndLoc());
+}
- return OpenACCAsyncClause::Create(
- getASTContext(), Clause.getBeginLoc(), Clause.getLParenLoc(),
- Clause.getNumIntExprs() != 0 ? Clause.getIntExprs()[0] : nullptr,
- Clause.getEndLoc());
- }
- case OpenACCClauseKind::Private: {
- // Restrictions only properly implemented on 'compute' constructs, and
- // 'compute' constructs are the only construct that can do anything with
- // this yet, so skip/treat as unimplemented in this case.
- if (!isOpenACCComputeDirectiveKind(Clause.getDirectiveKind()))
- break;
+OpenACCClause *SemaOpenACCClauseVisitor::VisitPrivateClause(
+ SemaOpenACC::OpenACCParsedClause &Clause) {
+ // Restrictions only properly implemented on 'compute' and 'loop'
+ // constructs, and 'compute'/'loop' constructs are the only construct that
+ // can do anything with this yet, so skip/treat as unimplemented in this
+ // case.
+ if (!isOpenACCComputeDirectiveKind(Clause.getDirectiveKind()) &&
+ Clause.getDirectiveKind() != OpenACCDirectiveKind::Loop)
+ return isNotImplemented();
+
+ // ActOnVar ensured that everything is a valid variable reference, so there
+ // really isn't anything to do here. GCC does some duplicate-finding, though
+ // it isn't apparent in the standard where this is justified.
+
+ return OpenACCPrivateClause::Create(Ctx, Clause.getBeginLoc(),
+ Clause.getLParenLoc(),
+ Clause.getVarList(), Clause.getEndLoc());
+}
- // ActOnVar ensured that everything is a valid variable reference, so there
- // really isn't anything to do here. GCC does some duplicate-finding, though
- // it isn't apparent in the standard where this is justified.
+OpenACCClause *SemaOpenACCClauseVisitor::VisitFirstPrivateClause(
+ SemaOpenACC::OpenACCParsedClause &Clause) {
+ // Restrictions only properly implemented on 'compute' constructs, and
+ // 'compute' constructs are the only construct that can do anything with
+ // this yet, so skip/treat as unimplemented in this case.
+ if (!isOpenACCComputeDirectiveKind(Clause.getDirectiveKind()))
+ return isNotImplemented();
+
+ // ActOnVar ensured that everything is a valid variable reference, so there
+ // really isn't anything to do here. GCC does some duplicate-finding, though
+ // it isn't apparent in the standard where this is justified.
+
+ return OpenACCFirstPrivateClause::Create(
+ Ctx, Clause.getBeginLoc(), Clause.getLParenLoc(), Clause.getVarList(),
+ Clause.getEndLoc());
+}
- return OpenACCPrivateClause::Create(
- getASTContext(), Clause.getBeginLoc(), Clause.getLParenLoc(),
- Clause.getVarList(), Clause.getEndLoc());
- }
- case OpenACCClauseKind::FirstPrivate: {
- // Restrictions only properly implemented on 'compute' constructs, and
- // 'compute' constructs are the only construct that can do anything with
- // this yet, so skip/treat as unimplemented in this case.
- if (!isOpenACCComputeDirectiveKind(Clause.getDirectiveKind()))
- break;
+OpenACCClause *SemaOpenACCClauseVisitor::VisitNoCreateClause(
+ SemaOpenACC::OpenACCParsedClause &Clause) {
+ // Restrictions only properly implemented on 'compute' constructs, and
+ // 'compute' constructs are the only construct that can do anything with
+ // this yet, so skip/treat as unimplemented in this case.
+ if (!isOpenACCComputeDirectiveKind(Clause.getDirectiveKind()))
+ return isNotImplemented();
+ // ActOnVar ensured that everything is a valid variable reference, so there
+ // really isn't anything to do here. GCC does some duplicate-finding, though
+ // it isn't apparent in the standard where this is justified.
+
+ return OpenACCNoCreateClause::Create(Ctx, Clause.getBeginLoc(),
+ Clause.getLParenLoc(),
+ Clause.getVarList(), Clause.getEndLoc());
+}
- // ActOnVar ensured that everything is a valid variable reference, so there
- // really isn't anything to do here. GCC does some duplicate-finding, though
- // it isn't apparent in the standard where this is justified.
+OpenACCClause *SemaOpenACCClauseVisitor::VisitPresentClause(
+ SemaOpenACC::OpenACCParsedClause &Clause) {
+ // Restrictions only properly implemented on 'compute' constructs, and
+ // 'compute' constructs are the only construct that can do anything with
+ // this yet, so skip/treat as unimplemented in this case.
+ if (!isOpenACCComputeDirectiveKind(Clause.getDirectiveKind()))
+ return isNotImplemented();
+ // ActOnVar ensured that everything is a valid variable reference, so there
+ // really isn't anything to do here. GCC does some duplicate-finding, though
+ // it isn't apparent in the standard where this is justified.
+
+ return OpenACCPresentClause::Create(Ctx, Clause.getBeginLoc(),
+ Clause.getLParenLoc(),
+ Clause.getVarList(), Clause.getEndLoc());
+}
- return OpenACCFirstPrivateClause::Create(
- getASTContext(), Clause.getBeginLoc(), Clause.getLParenLoc(),
- Clause.getVarList(), Clause.getEndLoc());
- }
- case OpenACCClauseKind::NoCreate: {
- // Restrictions only properly implemented on 'compute' constructs, and
- // 'compute' constructs are the only construct that can do anything with
- // this yet, so skip/treat as unimplemented in this case.
- if (!isOpenACCComputeDirectiveKind(Clause.getDirectiveKind()))
- break;
+OpenACCClause *SemaOpenACCClauseVisitor::VisitCopyClause(
+ SemaOpenACC::OpenACCParsedClause &Clause) {
+ // Restrictions only properly implemented on 'compute' constructs, and
+ // 'compute' constructs are the only construct that can do anything with
+ // this yet, so skip/treat as unimplemented in this case.
+ if (!isOpenACCComputeDirectiveKind(Clause.getDirectiveKind()))
+ return isNotImplemented();
+ // ActOnVar ensured that everything is a valid variable reference, so there
+ // really isn't anything to do here. GCC does some duplicate-finding, though
+ // it isn't apparent in the standard where this is justified.
+
+ return OpenACCCopyClause::Create(
+ Ctx, Clause.getClauseKind(), Clause.getBeginLoc(), Clause.getLParenLoc(),
+ Clause.getVarList(), Clause.getEndLoc());
+}
- // ActOnVar ensured that everything is a valid variable reference, so there
- // really isn't anything to do here. GCC does some duplicate-finding, though
- // it isn't apparent in the standard where this is justified.
+OpenACCClause *SemaOpenACCClauseVisitor::VisitCopyInClause(
+ SemaOpenACC::OpenACCParsedClause &Clause) {
+ // Restrictions only properly implemented on 'compute' constructs, and
+ // 'compute' constructs are the only construct that can do anything with
+ // this yet, so skip/treat as unimplemented in this case.
+ if (!isOpenACCComputeDirectiveKind(Clause.getDirectiveKind()))
+ return isNotImplemented();
+ // ActOnVar ensured that everything is a valid variable reference, so there
+ // really isn't anything to do here. GCC does some duplicate-finding, though
+ // it isn't apparent in the standard where this is justified.
+
+ return OpenACCCopyInClause::Create(
+ Ctx, Clause.getClauseKind(), Clause.getBeginLoc(), Clause.getLParenLoc(),
+ Clause.isReadOnly(), Clause.getVarList(), Clause.getEndLoc());
+}
- return OpenACCNoCreateClause::Create(
- getASTContext(), Clause.getBeginLoc(), Clause.getLParenLoc(),
- Clause.getVarList(), Clause.getEndLoc());
- }
- case OpenACCClauseKind::Present: {
- // Restrictions only properly implemented on 'compute' constructs, and
- // 'compute' constructs are the only construct that can do anything with
- // this yet, so skip/treat as unimplemented in this case.
- if (!isOpenACCComputeDirectiveKind(Clause.getDirectiveKind()))
- break;
+OpenACCClause *SemaOpenACCClauseVisitor::VisitCopyOutClause(
+ SemaOpenACC::OpenACCParsedClause &Clause) {
+ // Restrictions only properly implemented on 'compute' constructs, and
+ // 'compute' constructs are the only construct that can do anything with
+ // this yet, so skip/treat as unimplemented in this case.
+ if (!isOpenACCComputeDirectiveKind(Clause.getDirectiveKind()))
+ return isNotImplemented();
+ // ActOnVar ensured that everything is a valid variable reference, so there
+ // really isn't anything to do here. GCC does some duplicate-finding, though
+ // it isn't apparent in the standard where this is justified.
+
+ return OpenACCCopyOutClause::Create(
+ Ctx, Clause.getClauseKind(), Clause.getBeginLoc(), Clause.getLParenLoc(),
+ Clause.isZero(), Clause.getVarList(), Clause.getEndLoc());
+}
- // ActOnVar ensured that everything is a valid variable reference, so there
- // really isn't anything to do here. GCC does some duplicate-finding, though
- // it isn't apparent in the standard where this is justified.
+OpenACCClause *SemaOpenACCClauseVisitor::VisitCreateClause(
+ SemaOpenACC::OpenACCParsedClause &Clause) {
+ // Restrictions only properly implemented on 'compute' constructs, and
+ // 'compute' constructs are the only construct that can do anything with
+ // this yet, so skip/treat as unimplemented in this case.
+ if (!isOpenACCComputeDirectiveKind(Clause.getDirectiveKind()))
+ return isNotImplemented();
+ // ActOnVar ensured that everything is a valid variable reference, so there
+ // really isn't anything to do here. GCC does some duplicate-finding, though
+ // it isn't apparent in the standard where this is justified.
+
+ return OpenACCCreateClause::Create(
+ Ctx, Clause.getClauseKind(), Clause.getBeginLoc(), Clause.getLParenLoc(),
+ Clause.isZero(), Clause.getVarList(), Clause.getEndLoc());
+}
- return OpenACCPresentClause::Create(
- getASTContext(), Clause.getBeginLoc(), Clause.getLParenLoc(),
- Clause.getVarList(), Clause.getEndLoc());
- }
- case OpenACCClauseKind::PresentOrCopy:
- case OpenACCClauseKind::PCopy:
- Diag(Clause.getBeginLoc(), diag::warn_acc_deprecated_alias_name)
- << Clause.getClauseKind() << OpenACCClauseKind::Copy;
- LLVM_FALLTHROUGH;
- case OpenACCClauseKind::Copy: {
- // Restrictions only properly implemented on 'compute' constructs, and
- // 'compute' constructs are the only construct that can do anything with
- // this yet, so skip/treat as unimplemented in this case.
- if (!isOpenACCComputeDirectiveKind(Clause.getDirectiveKind()))
- break;
+OpenACCClause *SemaOpenACCClauseVisitor::VisitAttachClause(
+ SemaOpenACC::OpenACCParsedClause &Clause) {
+ // Restrictions only properly implemented on 'compute' constructs, and
+ // 'compute' constructs are the only construct that can do anything with
+ // this yet, so skip/treat as unimplemented in this case.
+ if (!isOpenACCComputeDirectiveKind(Clause.getDirectiveKind()))
+ return isNotImplemented();
+
+ // ActOnVar ensured that everything is a valid variable reference, but we
+ // still have to make sure it is a pointer type.
+ llvm::SmallVector<Expr *> VarList{Clause.getVarList().begin(),
+ Clause.getVarList().end()};
+ VarList.erase(std::remove_if(VarList.begin(), VarList.end(),
+ [&](Expr *E) {
+ return SemaRef.CheckVarIsPointerType(
+ OpenACCClauseKind::Attach, E);
+ }),
+ VarList.end());
+ Clause.setVarListDetails(VarList,
+ /*IsReadOnly=*/false, /*IsZero=*/false);
+ return OpenACCAttachClause::Create(Ctx, Clause.getBeginLoc(),
+ Clause.getLParenLoc(), Clause.getVarList(),
+ Clause.getEndLoc());
+}
- // ActOnVar ensured that everything is a valid variable reference, so there
- // really isn't anything to do here. GCC does some duplicate-finding, though
- // it isn't apparent in the standard where this is justified.
+OpenACCClause *SemaOpenACCClauseVisitor::VisitDevicePtrClause(
+ SemaOpenACC::OpenACCParsedClause &Clause) {
+ // Restrictions only properly implemented on 'compute' constructs, and
+ // 'compute' constructs are the only construct that can do anything with
+ // this yet, so skip/treat as unimplemented in this case.
+ if (!isOpenACCComputeDirectiveKind(Clause.getDirectiveKind()))
+ return isNotImplemented();
+
+ // ActOnVar ensured that everything is a valid variable reference, but we
+ // still have to make sure it is a pointer type.
+ llvm::SmallVector<Expr *> VarList{Clause.getVarList().begin(),
+ Clause.getVarList().end()};
+ VarList.erase(std::remove_if(VarList.begin(), VarList.end(),
+ [&](Expr *E) {
+ return SemaRef.CheckVarIsPointerType(
+ OpenACCClauseKind::DevicePtr, E);
+ }),
+ VarList.end());
+ Clause.setVarListDetails(VarList,
+ /*IsReadOnly=*/false, /*IsZero=*/false);
+
+ return OpenACCDevicePtrClause::Create(
+ Ctx, Clause.getBeginLoc(), Clause.getLParenLoc(), Clause.getVarList(),
+ Clause.getEndLoc());
+}
- return OpenACCCopyClause::Create(
- getASTContext(), Clause.getClauseKind(), Clause.getBeginLoc(),
- Clause.getLParenLoc(), Clause.getVarList(), Clause.getEndLoc());
- }
- case OpenACCClauseKind::PresentOrCopyIn:
- case OpenACCClauseKind::PCopyIn:
- Diag(Clause.getBeginLoc(), diag::warn_acc_deprecated_alias_name)
- << Clause.getClauseKind() << OpenACCClauseKind::CopyIn;
- LLVM_FALLTHROUGH;
- case OpenACCClauseKind::CopyIn: {
- // Restrictions only properly implemented on 'compute' constructs, and
- // 'compute' constructs are the only construct that can do anything with
- // this yet, so skip/treat as unimplemented in this case.
- if (!isOpenACCComputeDirectiveKind(Clause.getDirectiveKind()))
- break;
+OpenACCClause *SemaOpenACCClauseVisitor::VisitWaitClause(
+ SemaOpenACC::OpenACCParsedClause &Clause) {
+ // Restrictions only properly implemented on 'compute' constructs, and
+ // 'compute' constructs are the only construct that can do anything with
+ // this yet, so skip/treat as unimplemented in this case.
+ if (!isOpenACCComputeDirectiveKind(Clause.getDirectiveKind()))
+ return isNotImplemented();
+
+ return OpenACCWaitClause::Create(
+ Ctx, Clause.getBeginLoc(), Clause.getLParenLoc(), Clause.getDevNumExpr(),
+ Clause.getQueuesLoc(), Clause.getQueueIdExprs(), Clause.getEndLoc());
+}
- // ActOnVar ensured that everything is a valid variable reference, so there
- // really isn't anything to do here. GCC does some duplicate-finding, though
- // it isn't apparent in the standard where this is justified.
+OpenACCClause *SemaOpenACCClauseVisitor::VisitDeviceTypeClause(
+ SemaOpenACC::OpenACCParsedClause &Clause) {
+ // Restrictions only properly implemented on 'compute' and 'loop'
+ // constructs, and 'compute'/'loop' constructs are the only construct that
+ // can do anything with this yet, so skip/treat as unimplemented in this
+ // case.
+ if (!isOpenACCComputeDirectiveKind(Clause.getDirectiveKind()) &&
+ Clause.getDirectiveKind() != OpenACCDirectiveKind::Loop)
+ return isNotImplemented();
+
+ // TODO OpenACC: Once we get enough of the CodeGen implemented that we have
+ // a source for the list of valid architectures, we need to warn on unknown
+ // identifiers here.
+
+ return OpenACCDeviceTypeClause::Create(
+ Ctx, Clause.getClauseKind(), Clause.getBeginLoc(), Clause.getLParenLoc(),
+ Clause.getDeviceTypeArchitectures(), Clause.getEndLoc());
+}
- return OpenACCCopyInClause::Create(
- getASTContext(), Clause.getClauseKind(), Clause.getBeginLoc(),
- Clause.getLParenLoc(), Clause.isReadOnly(), Clause.getVarList(),
- Clause.getEndLoc());
+OpenACCClause *SemaOpenACCClauseVisitor::VisitAutoClause(
+ SemaOpenACC::OpenACCParsedClause &Clause) {
+ // Restrictions only properly implemented on 'loop' constructs, and it is
+ // the only construct that can do anything with this, so skip/treat as
+ // unimplemented for the combined constructs.
+ if (Clause.getDirectiveKind() != OpenACCDirectiveKind::Loop)
+ return isNotImplemented();
+
+ // OpenACC 3.3 2.9:
+ // Only one of the seq, independent, and auto clauses may appear.
+ const auto *Itr =
+ llvm::find_if(ExistingClauses,
+ llvm::IsaPred<OpenACCIndependentClause, OpenACCSeqClause>);
+ if (Itr != ExistingClauses.end()) {
+ SemaRef.Diag(Clause.getBeginLoc(), diag::err_acc_loop_spec_conflict)
+ << Clause.getClauseKind() << Clause.getDirectiveKind();
+ SemaRef.Diag((*Itr)->getBeginLoc(), diag::note_acc_previous_clause_here);
+ return nullptr;
}
- case OpenACCClauseKind::PresentOrCopyOut:
- case OpenACCClauseKind::PCopyOut:
- Diag(Clause.getBeginLoc(), diag::warn_acc_deprecated_alias_name)
- << Clause.getClauseKind() << OpenACCClauseKind::CopyOut;
- LLVM_FALLTHROUGH;
- case OpenACCClauseKind::CopyOut: {
- // Restrictions only properly implemented on 'compute' constructs, and
- // 'compute' constructs are the only construct that can do anything with
- // this yet, so skip/treat as unimplemented in this case.
- if (!isOpenACCComputeDirectiveKind(Clause.getDirectiveKind()))
- break;
- // ActOnVar ensured that everything is a valid variable reference, so there
- // really isn't anything to do here. GCC does some duplicate-finding, though
- // it isn't apparent in the standard where this is justified.
+ return OpenACCAutoClause::Create(Ctx, Clause.getBeginLoc(),
+ Clause.getEndLoc());
+}
- return OpenACCCopyOutClause::Create(
- getASTContext(), Clause.getClauseKind(), Clause.getBeginLoc(),
- Clause.getLParenLoc(), Clause.isZero(), Clause.getVarList(),
- Clause.getEndLoc());
+OpenACCClause *SemaOpenACCClauseVisitor::VisitIndependentClause(
+ SemaOpenACC::OpenACCParsedClause &Clause) {
+ // Restrictions only properly implemented on 'loop' constructs, and it is
+ // the only construct that can do anything with this, so skip/treat as
+ // unimplemented for the combined constructs.
+ if (Clause.getDirectiveKind() != OpenACCDirectiveKind::Loop)
+ return isNotImplemented();
+
+ // OpenACC 3.3 2.9:
+ // Only one of the seq, independent, and auto clauses may appear.
+ const auto *Itr = llvm::find_if(
+ ExistingClauses, llvm::IsaPred<OpenACCAutoClause, OpenACCSeqClause>);
+ if (Itr != ExistingClauses.end()) {
+ SemaRef.Diag(Clause.getBeginLoc(), diag::err_acc_loop_spec_conflict)
+ << Clause.getClauseKind() << Clause.getDirectiveKind();
+ SemaRef.Diag((*Itr)->getBeginLoc(), diag::note_acc_previous_clause_here);
+ return nullptr;
}
- case OpenACCClauseKind::PresentOrCreate:
- case OpenACCClauseKind::PCreate:
- Diag(Clause.getBeginLoc(), diag::warn_acc_deprecated_alias_name)
- << Clause.getClauseKind() << OpenACCClauseKind::Create;
- LLVM_FALLTHROUGH;
- case OpenACCClauseKind::Create: {
- // Restrictions only properly implemented on 'compute' constructs, and
- // 'compute' constructs are the only construct that can do anything with
- // this yet, so skip/treat as unimplemented in this case.
- if (!isOpenACCComputeDirectiveKind(Clause.getDirectiveKind()))
- break;
-
- // ActOnVar ensured that everything is a valid variable reference, so there
- // really isn't anything to do here. GCC does some duplicate-finding, though
- // it isn't apparent in the standard where this is justified.
- return OpenACCCreateClause::Create(getASTContext(), Clause.getClauseKind(),
- Clause.getBeginLoc(),
- Clause.getLParenLoc(), Clause.isZero(),
- Clause.getVarList(), Clause.getEndLoc());
- }
- case OpenACCClauseKind::Attach: {
- // Restrictions only properly implemented on 'compute' constructs, and
- // 'compute' constructs are the only construct that can do anything with
- // this yet, so skip/treat as unimplemented in this case.
- if (!isOpenACCComputeDirectiveKind(Clause.getDirectiveKind()))
- break;
+ return OpenACCIndependentClause::Create(Ctx, Clause.getBeginLoc(),
+ Clause.getEndLoc());
+}
- // ActOnVar ensured that everything is a valid variable reference, but we
- // still have to make sure it is a pointer type.
- llvm::SmallVector<Expr *> VarList{Clause.getVarList().begin(),
- Clause.getVarList().end()};
- VarList.erase(std::remove_if(VarList.begin(), VarList.end(), [&](Expr *E) {
- return CheckVarIsPointerType(OpenACCClauseKind::Attach, E);
- }), VarList.end());
- Clause.setVarListDetails(VarList,
- /*IsReadOnly=*/false, /*IsZero=*/false);
-
- return OpenACCAttachClause::Create(getASTContext(), Clause.getBeginLoc(),
- Clause.getLParenLoc(),
- Clause.getVarList(), Clause.getEndLoc());
+OpenACCClause *SemaOpenACCClauseVisitor::VisitSeqClause(
+ SemaOpenACC::OpenACCParsedClause &Clause) {
+ // Restrictions only properly implemented on 'loop' constructs, and it is
+ // the only construct that can do anything with this, so skip/treat as
+ // unimplemented for the combined constructs.
+ if (Clause.getDirectiveKind() != OpenACCDirectiveKind::Loop)
+ return isNotImplemented();
+
+ // OpenACC 3.3 2.9:
+ // Only one of the seq, independent, and auto clauses may appear.
+ const auto *Itr =
+ llvm::find_if(ExistingClauses,
+ llvm::IsaPred<OpenACCAutoClause, OpenACCIndependentClause>);
+ if (Itr != ExistingClauses.end()) {
+ SemaRef.Diag(Clause.getBeginLoc(), diag::err_acc_loop_spec_conflict)
+ << Clause.getClauseKind() << Clause.getDirectiveKind();
+ SemaRef.Diag((*Itr)->getBeginLoc(), diag::note_acc_previous_clause_here);
+ return nullptr;
}
- case OpenACCClauseKind::DevicePtr: {
- // Restrictions only properly implemented on 'compute' constructs, and
- // 'compute' constructs are the only construct that can do anything with
- // this yet, so skip/treat as unimplemented in this case.
- if (!isOpenACCComputeDirectiveKind(Clause.getDirectiveKind()))
- break;
- // ActOnVar ensured that everything is a valid variable reference, but we
- // still have to make sure it is a pointer type.
- llvm::SmallVector<Expr *> VarList{Clause.getVarList().begin(),
- Clause.getVarList().end()};
- VarList.erase(std::remove_if(VarList.begin(), VarList.end(), [&](Expr *E) {
- return CheckVarIsPointerType(OpenACCClauseKind::DevicePtr, E);
- }), VarList.end());
- Clause.setVarListDetails(VarList,
- /*IsReadOnly=*/false, /*IsZero=*/false);
-
- return OpenACCDevicePtrClause::Create(
- getASTContext(), Clause.getBeginLoc(), Clause.getLParenLoc(),
- Clause.getVarList(), Clause.getEndLoc());
- }
- case OpenACCClauseKind::Wait: {
- // Restrictions only properly implemented on 'compute' constructs, and
- // 'compute' constructs are the only construct that can do anything with
- // this yet, so skip/treat as unimplemented in this case.
- if (!isOpenACCComputeDirectiveKind(Clause.getDirectiveKind()))
- break;
+ // OpenACC 3.3 2.9:
+ // A 'gang', 'worker', or 'vector' clause may not appear if a 'seq' clause
+ // appears.
+ Itr = llvm::find_if(ExistingClauses,
+ llvm::IsaPred<OpenACCGangClause, OpenACCWorkerClause,
+ OpenACCVectorClause>);
- return OpenACCWaitClause::Create(
- getASTContext(), Clause.getBeginLoc(), Clause.getLParenLoc(),
- Clause.getDevNumExpr(), Clause.getQueuesLoc(), Clause.getQueueIdExprs(),
- Clause.getEndLoc());
+ if (Itr != ExistingClauses.end()) {
+ SemaRef.Diag(Clause.getBeginLoc(), diag::err_acc_clause_cannot_combine)
+ << Clause.getClauseKind() << (*Itr)->getClauseKind();
+ SemaRef.Diag((*Itr)->getBeginLoc(), diag::note_acc_previous_clause_here);
+ return nullptr;
}
- case OpenACCClauseKind::DType:
- case OpenACCClauseKind::DeviceType: {
- // Restrictions only properly implemented on 'compute' and 'loop'
- // constructs, and 'compute'/'loop' constructs are the only construct that
- // can do anything with this yet, so skip/treat as unimplemented in this
- // case.
- if (!isOpenACCComputeDirectiveKind(Clause.getDirectiveKind()) &&
- Clause.getDirectiveKind() != OpenACCDirectiveKind::Loop)
- break;
- // TODO OpenACC: Once we get enough of the CodeGen implemented that we have
- // a source for the list of valid architectures, we need to warn on unknown
- // identifiers here.
-
- return OpenACCDeviceTypeClause::Create(
- getASTContext(), Clause.getClauseKind(), Clause.getBeginLoc(),
- Clause.getLParenLoc(), Clause.getDeviceTypeArchitectures(),
- Clause.getEndLoc());
- }
- case OpenACCClauseKind::Auto: {
- // Restrictions only properly implemented on 'loop' constructs, and it is
- // the only construct that can do anything with this, so skip/treat as
- // unimplemented for the combined constructs.
- if (Clause.getDirectiveKind() != OpenACCDirectiveKind::Loop)
- break;
+ // TODO OpenACC: 2.9 ~ line 2010 specifies that the associated loop has some
+ // restrictions when there is a 'seq' clause in place. We probably need to
+ // implement that.
+ return OpenACCSeqClause::Create(Ctx, Clause.getBeginLoc(),
+ Clause.getEndLoc());
+}
- // OpenACC 3.3 2.9:
- // Only one of the seq, independent, and auto clauses may appear.
- const auto *Itr = llvm::find_if(
- ExistingClauses,
- llvm::IsaPred<OpenACCIndependentClause, OpenACCSeqClause>);
- if (Itr != ExistingClauses.end()) {
- Diag(Clause.getBeginLoc(), diag::err_acc_loop_spec_conflict)
- << Clause.getClauseKind() << Clause.getDirectiveKind();
- Diag((*Itr)->getBeginLoc(), diag::note_acc_previous_clause_here);
+OpenACCClause *SemaOpenACCClauseVisitor::VisitReductionClause(
+ SemaOpenACC::OpenACCParsedClause &Clause) {
+ // Restrictions only properly implemented on 'compute' constructs, and
+ // 'compute' constructs are the only construct that can do anything with
+ // this yet, so skip/treat as unimplemented in this case.
+ if (!isOpenACCComputeDirectiveKind(Clause.getDirectiveKind()))
+ return isNotImplemented();
+
+ // OpenACC 3.3 Section 2.5.4:
+ // A reduction clause may not appear on a parallel construct with a
+ // num_gangs clause that has more than one argument.
+ if (Clause.getDirectiveKind() == OpenACCDirectiveKind::Parallel) {
+ auto NumGangsClauses = llvm::make_filter_range(
+ ExistingClauses, llvm::IsaPred<OpenACCNumGangsClause>);
+
+ for (auto *NGC : NumGangsClauses) {
+ unsigned NumExprs =
+ cast<OpenACCNumGangsClause>(NGC)->getIntExprs().size();
+
+ if (NumExprs > 1) {
+ SemaRef.Diag(Clause.getBeginLoc(),
+ diag::err_acc_reduction_num_gangs_conflict)
+ << NumExprs;
+ SemaRef.Diag(NGC->getBeginLoc(), diag::note_acc_previous_clause_here);
+ return nullptr;
+ }
}
-
- return OpenACCAutoClause::Create(getASTContext(), Clause.getBeginLoc(),
- Clause.getEndLoc());
}
- case OpenACCClauseKind::Independent: {
- // Restrictions only properly implemented on 'loop' constructs, and it is
- // the only construct that can do anything with this, so skip/treat as
- // unimplemented for the combined constructs.
- if (Clause.getDirectiveKind() != OpenACCDirectiveKind::Loop)
- break;
- // OpenACC 3.3 2.9:
- // Only one of the seq, independent, and auto clauses may appear.
- const auto *Itr = llvm::find_if(
- ExistingClauses, llvm::IsaPred<OpenACCAutoClause, OpenACCSeqClause>);
- if (Itr != ExistingClauses.end()) {
- Diag(Clause.getBeginLoc(), diag::err_acc_loop_spec_conflict)
- << Clause.getClauseKind() << Clause.getDirectiveKind();
- Diag((*Itr)->getBeginLoc(), diag::note_acc_previous_clause_here);
- }
+ SmallVector<Expr *> ValidVars;
+
+ for (Expr *Var : Clause.getVarList()) {
+ ExprResult Res = SemaRef.CheckReductionVar(Var);
- return OpenACCIndependentClause::Create(
- getASTContext(), Clause.getBeginLoc(), Clause.getEndLoc());
+ if (Res.isUsable())
+ ValidVars.push_back(Res.get());
}
- case OpenACCClauseKind::Seq: {
- // Restrictions only properly implemented on 'loop' constructs, and it is
- // the only construct that can do anything with this, so skip/treat as
- // unimplemented for the combined constructs.
- if (Clause.getDirectiveKind() != OpenACCDirectiveKind::Loop)
- break;
- // OpenACC 3.3 2.9:
- // Only one of the seq, independent, and auto clauses may appear.
- const auto *Itr = llvm::find_if(
- ExistingClauses,
- llvm::IsaPred<OpenACCAutoClause, OpenACCIndependentClause>);
- if (Itr != ExistingClauses.end()) {
- Diag(Clause.getBeginLoc(), diag::err_acc_loop_spec_conflict)
- << Clause.getClauseKind() << Clause.getDirectiveKind();
- Diag((*Itr)->getBeginLoc(), diag::note_acc_previous_clause_here);
- }
+ return OpenACCReductionClause::Create(
+ Ctx, Clause.getBeginLoc(), Clause.getLParenLoc(), Clause.getReductionOp(),
+ ValidVars, Clause.getEndLoc());
+}
- // OpenACC 3.3 2.9:
- // A 'gang', 'worker', or 'vector' clause may not appear if a 'seq' clause
- // appears.
- Itr = llvm::find_if(ExistingClauses,
- llvm::IsaPred<OpenACCGangClause, OpenACCWorkerClause,
- OpenACCVectorClause>);
+} // namespace
- if (Itr != ExistingClauses.end()) {
- Diag(Clause.getBeginLoc(), diag::err_acc_clause_cannot_combine)
- << Clause.getClauseKind() << (*Itr)->getClauseKind();
- Diag((*Itr)->getBeginLoc(), diag::note_acc_previous_clause_here);
- }
+SemaOpenACC::SemaOpenACC(Sema &S) : SemaBase(S) {}
- // TODO OpenACC: 2.9 ~ line 2010 specifies that the associated loop has some
- // restrictions when there is a 'seq' clause in place. We probably need to
- // implement that.
- return OpenACCSeqClause::Create(getASTContext(), Clause.getBeginLoc(),
- Clause.getEndLoc());
+SemaOpenACC::AssociatedStmtRAII::AssociatedStmtRAII(SemaOpenACC &S,
+ OpenACCDirectiveKind DK)
+ : SemaRef(S), WasInsideComputeConstruct(S.InsideComputeConstruct),
+ DirKind(DK) {
+ // Compute constructs end up taking their 'loop'.
+ if (DirKind == OpenACCDirectiveKind::Parallel ||
+ DirKind == OpenACCDirectiveKind::Serial ||
+ DirKind == OpenACCDirectiveKind::Kernels) {
+ SemaRef.InsideComputeConstruct = true;
+ SemaRef.ParentlessLoopConstructs.swap(ParentlessLoopConstructs);
}
- case OpenACCClauseKind::Gang:
- case OpenACCClauseKind::Worker:
- case OpenACCClauseKind::Vector: {
- // OpenACC 3.3 2.9:
- // A 'gang', 'worker', or 'vector' clause may not appear if a 'seq' clause
- // appears.
- const auto *Itr =
- llvm::find_if(ExistingClauses, llvm::IsaPred<OpenACCSeqClause>);
+}
- if (Itr != ExistingClauses.end()) {
- Diag(Clause.getBeginLoc(), diag::err_acc_clause_cannot_combine)
- << Clause.getClauseKind() << (*Itr)->getClauseKind();
- Diag((*Itr)->getBeginLoc(), diag::note_acc_previous_clause_here);
- }
- // Not yet implemented, so immediately drop to the 'not yet implemented'
- // diagnostic.
- break;
+SemaOpenACC::AssociatedStmtRAII::~AssociatedStmtRAII() {
+ SemaRef.InsideComputeConstruct = WasInsideComputeConstruct;
+ if (DirKind == OpenACCDirectiveKind::Parallel ||
+ DirKind == OpenACCDirectiveKind::Serial ||
+ DirKind == OpenACCDirectiveKind::Kernels) {
+ assert(SemaRef.ParentlessLoopConstructs.empty() &&
+ "Didn't consume loop construct list?");
+ SemaRef.ParentlessLoopConstructs.swap(ParentlessLoopConstructs);
}
- case OpenACCClauseKind::Reduction: {
- // Restrictions only properly implemented on 'compute' constructs, and
- // 'compute' constructs are the only construct that can do anything with
- // this yet, so skip/treat as unimplemented in this case.
- if (!isOpenACCComputeDirectiveKind(Clause.getDirectiveKind()))
- break;
-
- // OpenACC 3.3 Section 2.5.4:
- // A reduction clause may not appear on a parallel construct with a
- // num_gangs clause that has more than one argument.
- if (Clause.getDirectiveKind() == OpenACCDirectiveKind::Parallel) {
- auto NumGangsClauses = llvm::make_filter_range(
- ExistingClauses, llvm::IsaPred<OpenACCNumGangsClause>);
-
- for (auto *NGC : NumGangsClauses) {
- unsigned NumExprs =
- cast<OpenACCNumGangsClause>(NGC)->getIntExprs().size();
-
- if (NumExprs > 1) {
- Diag(Clause.getBeginLoc(), diag::err_acc_reduction_num_gangs_conflict)
- << NumExprs;
- Diag(NGC->getBeginLoc(), diag::note_acc_previous_clause_here);
- return nullptr;
- }
- }
- }
-
- SmallVector<Expr *> ValidVars;
-
- for (Expr *Var : Clause.getVarList()) {
- ExprResult Res = CheckReductionVar(Var);
+}
- if (Res.isUsable())
- ValidVars.push_back(Res.get());
- }
+OpenACCClause *
+SemaOpenACC::ActOnClause(ArrayRef<const OpenACCClause *> ExistingClauses,
+ OpenACCParsedClause &Clause) {
+ if (Clause.getClauseKind() == OpenACCClauseKind::Invalid)
+ return nullptr;
- return OpenACCReductionClause::Create(
- getASTContext(), Clause.getBeginLoc(), Clause.getLParenLoc(),
- Clause.getReductionOp(), ValidVars, Clause.getEndLoc());
+ // Diagnose that we don't support this clause on this directive.
+ if (!doesClauseApplyToDirective(Clause.getDirectiveKind(),
+ Clause.getClauseKind())) {
+ Diag(Clause.getBeginLoc(), diag::err_acc_clause_appertainment)
+ << Clause.getDirectiveKind() << Clause.getClauseKind();
+ return nullptr;
}
- default:
- break;
+
+ if (const auto *DevTypeClause =
+ llvm::find_if(ExistingClauses,
+ [&](const OpenACCClause *C) {
+ return isa<OpenACCDeviceTypeClause>(C);
+ });
+ DevTypeClause != ExistingClauses.end()) {
+ if (checkValidAfterDeviceType(
+ *this, *cast<OpenACCDeviceTypeClause>(*DevTypeClause), Clause))
+ return nullptr;
}
- Diag(Clause.getBeginLoc(), diag::warn_acc_clause_unimplemented)
- << Clause.getClauseKind();
- return nullptr;
+ SemaOpenACCClauseVisitor Visitor{*this, ExistingClauses};
+ OpenACCClause *Result = Visitor.Visit(Clause);
+ assert((!Result || Result->getClauseKind() == Clause.getClauseKind()) &&
+ "Created wrong clause?");
+
+ if (Visitor.diagNotImplemented())
+ Diag(Clause.getBeginLoc(), diag::warn_acc_clause_unimplemented)
+ << Clause.getClauseKind();
+
+ return Result;
+
+ // switch (Clause.getClauseKind()) {
+ // case OpenACCClauseKind::PresentOrCopy:
+ // case OpenACCClauseKind::PCopy:
+ // Diag(Clause.getBeginLoc(), diag::warn_acc_deprecated_alias_name)
+ // << Clause.getClauseKind() << OpenACCClauseKind::Copy;
+ // LLVM_FALLTHROUGH;
+ // case OpenACCClauseKind::PresentOrCreate:
+ // case OpenACCClauseKind::PCreate:
+ // Diag(Clause.getBeginLoc(), diag::warn_acc_deprecated_alias_name)
+ // << Clause.getClauseKind() << OpenACCClauseKind::Create;
+ // LLVM_FALLTHROUGH;
+ //
+ //
+ //
+ //
+ // case OpenACCClauseKind::DType:
+ //
+ //
+ //
+ //
+ //
+ //
+ //
+ //
+ // case OpenACCClauseKind::Gang:
+ // case OpenACCClauseKind::Worker:
+ // case OpenACCClauseKind::Vector: {
+ // // OpenACC 3.3 2.9:
+ // // A 'gang', 'worker', or 'vector' clause may not appear if a 'seq'
+ // clause
+ // // appears.
+ // const auto *Itr =
+ // llvm::find_if(ExistingClauses, llvm::IsaPred<OpenACCSeqClause>);
+ //
+ // if (Itr != ExistingClauses.end()) {
+ // Diag(Clause.getBeginLoc(), diag::err_acc_clause_cannot_combine)
+ // << Clause.getClauseKind() << (*Itr)->getClauseKind();
+ // Diag((*Itr)->getBeginLoc(), diag::note_acc_previous_clause_here);
+ // }
+ // // Not yet implemented, so immediately drop to the 'not yet implemented'
+ // // diagnostic.
+ // break;
+ // }
+ // */
+
}
/// OpenACC 3.3 section 2.5.15:
diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h
index 95dd356..3bfda09 100644
--- a/clang/lib/Sema/TreeTransform.h
+++ b/clang/lib/Sema/TreeTransform.h
@@ -14172,13 +14172,6 @@ TreeTransform<Derived>::TransformCXXTemporaryObjectExpr(
if (TransformExprs(E->getArgs(), E->getNumArgs(), true, Args,
&ArgumentChanged))
return ExprError();
-
- if (E->isListInitialization() && !E->isStdInitListInitialization()) {
- ExprResult Res = RebuildInitList(E->getBeginLoc(), Args, E->getEndLoc());
- if (Res.isInvalid())
- return ExprError();
- Args = {Res.get()};
- }
}
if (!getDerived().AlwaysRebuild() &&
@@ -14190,9 +14183,12 @@ TreeTransform<Derived>::TransformCXXTemporaryObjectExpr(
return SemaRef.MaybeBindToTemporary(E);
}
+ // FIXME: We should just pass E->isListInitialization(), but we're not
+ // prepared to handle list-initialization without a child InitListExpr.
SourceLocation LParenLoc = T->getTypeLoc().getEndLoc();
return getDerived().RebuildCXXTemporaryObjectExpr(
- T, LParenLoc, Args, E->getEndLoc(), E->isListInitialization());
+ T, LParenLoc, Args, E->getEndLoc(),
+ /*ListInitialization=*/LParenLoc.isInvalid());
}
template<typename Derived>
diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
index 290d966..197d673 100644
--- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
+++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
@@ -1971,45 +1971,33 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred,
ExplodedNodeSet Tmp;
StmtNodeBuilder Bldr2(PreVisit, Tmp, *currBldrCtx);
- bool HasRewrittenInit = false;
- const Expr *ArgE = nullptr;
- if (const auto *DefE = dyn_cast<CXXDefaultArgExpr>(S)) {
+ const Expr *ArgE;
+ if (const auto *DefE = dyn_cast<CXXDefaultArgExpr>(S))
ArgE = DefE->getExpr();
- HasRewrittenInit = DefE->hasRewrittenInit();
- } else if (const auto *DefE = dyn_cast<CXXDefaultInitExpr>(S)) {
+ else if (const auto *DefE = dyn_cast<CXXDefaultInitExpr>(S))
ArgE = DefE->getExpr();
- HasRewrittenInit = DefE->hasRewrittenInit();
- } else
+ else
llvm_unreachable("unknown constant wrapper kind");
- if (HasRewrittenInit) {
- for (auto *N : PreVisit) {
- ProgramStateRef state = N->getState();
- const LocationContext *LCtx = N->getLocationContext();
- state = state->BindExpr(S, LCtx, state->getSVal(ArgE, LCtx));
- Bldr2.generateNode(S, N, state);
- }
- } else {
- // If it's not rewritten, the contents of these expressions are not
- // actually part of the current function, so we fall back to constant
- // evaluation.
- bool IsTemporary = false;
- if (const auto *MTE = dyn_cast<MaterializeTemporaryExpr>(ArgE)) {
- ArgE = MTE->getSubExpr();
- IsTemporary = true;
- }
-
- std::optional<SVal> ConstantVal = svalBuilder.getConstantVal(ArgE);
- const LocationContext *LCtx = Pred->getLocationContext();
- for (auto *I : PreVisit) {
- ProgramStateRef State = I->getState();
- State = State->BindExpr(S, LCtx, ConstantVal.value_or(UnknownVal()));
- if (IsTemporary)
- State = createTemporaryRegionIfNeeded(State, LCtx, cast<Expr>(S),
- cast<Expr>(S));
+ bool IsTemporary = false;
+ if (const auto *MTE = dyn_cast<MaterializeTemporaryExpr>(ArgE)) {
+ ArgE = MTE->getSubExpr();
+ IsTemporary = true;
+ }
- Bldr2.generateNode(S, I, State);
- }
+ std::optional<SVal> ConstantVal = svalBuilder.getConstantVal(ArgE);
+ if (!ConstantVal)
+ ConstantVal = UnknownVal();
+
+ const LocationContext *LCtx = Pred->getLocationContext();
+ for (const auto I : PreVisit) {
+ ProgramStateRef State = I->getState();
+ State = State->BindExpr(S, LCtx, *ConstantVal);
+ if (IsTemporary)
+ State = createTemporaryRegionIfNeeded(State, LCtx,
+ cast<Expr>(S),
+ cast<Expr>(S));
+ Bldr2.generateNode(S, I, State);
}
getCheckerManager().runCheckersForPostStmt(Dst, Tmp, S, *this);
diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
index 66a2f6e..0cab17a 100644
--- a/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
+++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
@@ -42,7 +42,7 @@ DependencyScanningWorkerFilesystem::readFile(StringRef Filename) {
}
bool DependencyScanningWorkerFilesystem::ensureDirectiveTokensArePopulated(
- EntryRef Ref, const LangOptions &LangOpts) {
+ EntryRef Ref) {
auto &Entry = Ref.Entry;
if (Entry.isError() || Entry.isDirectory())
@@ -66,7 +66,7 @@ bool DependencyScanningWorkerFilesystem::ensureDirectiveTokensArePopulated(
// dependencies.
if (scanSourceForDependencyDirectives(Contents->Original->getBuffer(),
Contents->DepDirectiveTokens,
- Directives, LangOpts)) {
+ Directives)) {
Contents->DepDirectiveTokens.clear();
// FIXME: Propagate the diagnostic if desired by the client.
Contents->DepDirectives.store(new std::optional<DependencyDirectivesTy>());
diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
index 07e1960d..0f82f22 100644
--- a/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
+++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
@@ -364,12 +364,11 @@ public:
// Use the dependency scanning optimized file system if requested to do so.
if (DepFS)
ScanInstance.getPreprocessorOpts().DependencyDirectivesForFile =
- [LocalDepFS = DepFS,
- &LangOpts = ScanInstance.getLangOpts()](FileEntryRef File)
+ [LocalDepFS = DepFS](FileEntryRef File)
-> std::optional<ArrayRef<dependency_directives_scan::Directive>> {
if (llvm::ErrorOr<EntryRef> Entry =
LocalDepFS->getOrCreateFileSystemEntry(File.getName()))
- if (LocalDepFS->ensureDirectiveTokensArePopulated(*Entry, LangOpts))
+ if (LocalDepFS->ensureDirectiveTokensArePopulated(*Entry))
return Entry->getDirectiveTokens();
return std::nullopt;
};
diff --git a/clang/test/AST/Interp/arrays.cpp b/clang/test/AST/Interp/arrays.cpp
index dd5064d..6f6fca8 100644
--- a/clang/test/AST/Interp/arrays.cpp
+++ b/clang/test/AST/Interp/arrays.cpp
@@ -609,3 +609,17 @@ namespace ArrayMemberAccess {
bool cond = a->x;
}
}
+
+namespace OnePastEndSub {
+ struct A {};
+ constexpr A a[3][3];
+ constexpr int diff2 = &a[1][3] - &a[1][0]; /// Used to crash.
+}
+
+static int same_entity_2[3];
+constexpr int *get2() {
+ // This is a redeclaration of the same entity, even though it doesn't
+ // inherit the type of the prior declaration.
+ extern int same_entity_2[];
+ return same_entity_2;
+}
diff --git a/clang/test/AST/Interp/cxx23.cpp b/clang/test/AST/Interp/cxx23.cpp
index c91d52c..1efd784 100644
--- a/clang/test/AST/Interp/cxx23.cpp
+++ b/clang/test/AST/Interp/cxx23.cpp
@@ -178,3 +178,25 @@ namespace ExplicitLambdaThis {
};
static_assert(f());
}
+
+namespace std {
+ struct strong_ordering {
+ int n;
+ constexpr operator int() const { return n; }
+ static const strong_ordering less, equal, greater;
+ };
+ constexpr strong_ordering strong_ordering::less = {-1};
+ constexpr strong_ordering strong_ordering::equal = {0};
+ constexpr strong_ordering strong_ordering::greater = {1};
+}
+
+namespace UndefinedThreeWay {
+ struct A {
+ friend constexpr std::strong_ordering operator<=>(const A&, const A&) = default; // all-note {{declared here}}
+ };
+
+ constexpr std::strong_ordering operator<=>(const A&, const A&) noexcept;
+ constexpr std::strong_ordering (*test_a_threeway)(const A&, const A&) = &operator<=>;
+ static_assert(!(*test_a_threeway)(A(), A())); // all-error {{static assertion expression is not an integral constant expression}} \
+ // all-note {{undefined function 'operator<=>' cannot be used in a constant expression}}
+}
diff --git a/clang/test/AST/Interp/eval-order.cpp b/clang/test/AST/Interp/eval-order.cpp
index aaf2b74..7a7ce6a 100644
--- a/clang/test/AST/Interp/eval-order.cpp
+++ b/clang/test/AST/Interp/eval-order.cpp
@@ -71,8 +71,8 @@ namespace EvalOrder {
// Rules 1 and 2 have no effect ('b' is not an expression).
// Rule 3: a->*b
- // SEQ(A(ud).*B(&UserDefined::n)); FIXME
- // SEQ(A(&ud)->*B(&UserDefined::n)); FIXME
+ SEQ(A(ud).*B(&UserDefined::n));
+ SEQ(A(&ud)->*B(&UserDefined::n));
// Rule 4: a(b1, b2, b3)
SEQ(A(f)(B(1), B(2), B(3))); // expected-error {{not an integral constant expression}} FIXME \
diff --git a/clang/test/AST/Interp/literals.cpp b/clang/test/AST/Interp/literals.cpp
index c160be0..5a29013 100644
--- a/clang/test/AST/Interp/literals.cpp
+++ b/clang/test/AST/Interp/literals.cpp
@@ -66,7 +66,12 @@ namespace ScalarTypes {
First = 0,
};
static_assert(getScalar<E>() == First, "");
- /// FIXME: Member pointers.
+
+ struct S {
+ int v;
+ };
+ constexpr int S::* MemberPtr = &S::v;
+ static_assert(getScalar<decltype(MemberPtr)>() == nullptr, "");
#if __cplusplus >= 201402L
constexpr void Void(int n) {
@@ -1204,7 +1209,7 @@ namespace incdecbool {
constexpr int externvar1() { // both-error {{never produces a constant expression}}
extern char arr[]; // ref-note {{declared here}}
return arr[0]; // ref-note {{read of non-constexpr variable 'arr'}} \
- // expected-note {{array-to-pointer decay of array member without known bound is not supported}}
+ // expected-note {{indexing of array without known bound}}
}
#endif
diff --git a/clang/test/AST/Interp/memberpointers.cpp b/clang/test/AST/Interp/memberpointers.cpp
new file mode 100644
index 0000000..54d73fe
--- /dev/null
+++ b/clang/test/AST/Interp/memberpointers.cpp
@@ -0,0 +1,197 @@
+// RUN: %clang_cc1 -std=c++14 -fexperimental-new-constant-interpreter -verify=expected,both %s
+// RUN: %clang_cc1 -std=c++14 -verify=ref,both %s
+
+namespace MemberPointers {
+ struct A {
+ constexpr A(int n) : n(n) {}
+ int n;
+ constexpr int f() const { return n + 3; }
+ };
+
+ constexpr A a(7);
+ static_assert(A(5).*&A::n == 5, "");
+ static_assert((&a)->*&A::n == 7, "");
+ static_assert((A(8).*&A::f)() == 11, "");
+ static_assert(((&a)->*&A::f)() == 10, "");
+
+ struct B : A {
+ constexpr B(int n, int m) : A(n), m(m) {}
+ int m;
+ constexpr int g() const { return n + m + 1; }
+ };
+ constexpr B b(9, 13);
+ static_assert(B(4, 11).*&A::n == 4, "");
+ static_assert(B(4, 11).*&B::m == 11, "");
+ static_assert(B(4, 11).m == 11, "");
+ static_assert(B(4, 11).*(int(A::*))&B::m == 11, "");
+ static_assert(B(4, 11).*&B::m == 11, "");
+ static_assert((&b)->*&A::n == 9, "");
+ static_assert((&b)->*&B::m == 13, "");
+ static_assert((&b)->*(int(A::*))&B::m == 13, "");
+ static_assert((B(4, 11).*&A::f)() == 7, "");
+ static_assert((B(4, 11).*&B::g)() == 16, "");
+
+ static_assert((B(4, 11).*(int(A::*)() const)&B::g)() == 16, "");
+
+ static_assert(((&b)->*&A::f)() == 12, "");
+ static_assert(((&b)->*&B::g)() == 23, "");
+ static_assert(((&b)->*(int(A::*)()const)&B::g)() == 23, "");
+
+
+ struct S {
+ constexpr S(int m, int n, int (S::*pf)() const, int S::*pn) :
+ m(m), n(n), pf(pf), pn(pn) {}
+ constexpr S() : m(), n(), pf(&S::f), pn(&S::n) {}
+
+ constexpr int f() const { return this->*pn; }
+ virtual int g() const;
+
+ int m, n;
+ int (S::*pf)() const;
+ int S::*pn;
+ };
+
+ constexpr int S::*pm = &S::m;
+ constexpr int S::*pn = &S::n;
+
+ constexpr int (S::*pf)() const = &S::f;
+ constexpr int (S::*pg)() const = &S::g;
+
+ constexpr S s(2, 5, &S::f, &S::m);
+
+ static_assert((s.*&S::f)() == 2, "");
+ static_assert((s.*s.pf)() == 2, "");
+
+ static_assert(pf == &S::f, "");
+
+ static_assert(pf == s.*&S::pf, "");
+
+ static_assert(pm == &S::m, "");
+ static_assert(pm != pn, "");
+ static_assert(s.pn != pn, "");
+ static_assert(s.pn == pm, "");
+ static_assert(pg != nullptr, "");
+ static_assert(pf != nullptr, "");
+ static_assert((int S::*)nullptr == nullptr, "");
+ static_assert(pg == pg, ""); // both-error {{constant expression}} \
+ // both-note {{comparison of pointer to virtual member function 'g' has unspecified value}}
+ static_assert(pf != pg, ""); // both-error {{constant expression}} \
+ // both-note {{comparison of pointer to virtual member function 'g' has unspecified value}}
+
+ template<int n> struct T : T<n-1> { const int X = n;};
+ template<> struct T<0> { int n; char k;};
+ template<> struct T<30> : T<29> { int m; };
+
+ T<17> t17;
+ T<30> t30;
+
+ constexpr int (T<15>::*deepm) = (int(T<10>::*))&T<30>::m;
+ constexpr int (T<10>::*deepn) = &T<0>::n;
+ constexpr char (T<10>::*deepk) = &T<0>::k;
+
+ static_assert(&(t17.*deepn) == &t17.n, "");
+ static_assert(&(t17.*deepk) == &t17.k, "");
+ static_assert(deepn == &T<2>::n, "");
+
+ constexpr int *pgood = &(t30.*deepm);
+ constexpr int *pbad = &(t17.*deepm); // both-error {{constant expression}}
+ static_assert(&(t30.*deepm) == &t30.m, "");
+
+ static_assert(deepm == &T<50>::m, "");
+ static_assert(deepm != deepn, "");
+
+ constexpr T<5> *p17_5 = &t17;
+ constexpr T<13> *p17_13 = (T<13>*)p17_5;
+ constexpr T<23> *p17_23 = (T<23>*)p17_13; // both-error {{constant expression}} \
+ // both-note {{cannot cast object of dynamic type 'T<17>' to type 'T<23>'}}
+ constexpr T<18> *p17_18 = (T<18>*)p17_13; // both-error {{constant expression}} \
+ // both-note {{cannot cast object of dynamic type 'T<17>' to type 'T<18>'}}
+ static_assert(&(p17_5->*(int(T<0>::*))deepn) == &t17.n, "");
+ static_assert(&(p17_5->*(int(T<0>::*))deepn), "");
+
+
+ static_assert(&(p17_13->*deepn) == &t17.n, "");
+ constexpr int *pbad2 = &(p17_13->*(int(T<9>::*))deepm); // both-error {{constant expression}}
+
+ constexpr T<5> *p30_5 = &t30;
+ constexpr T<23> *p30_23 = (T<23>*)p30_5;
+ constexpr T<13> *p30_13 = p30_23;
+ static_assert(&(p30_13->*deepn) == &t30.n, "");
+ static_assert(&(p30_23->*deepn) == &t30.n, "");
+ static_assert(&(p30_5->*(int(T<3>::*))deepn) == &t30.n, "");
+
+ static_assert(&(p30_5->*(int(T<2>::*))deepm) == &t30.m, "");
+ static_assert(&(((T<17>*)p30_13)->*deepm) == &t30.m, "");
+ static_assert(&(p30_23->*deepm) == &t30.m, "");
+
+
+ /// Added tests not from constant-expression-cxx11.cpp
+ static_assert(pm, "");
+ static_assert(!((int S::*)nullptr), "");
+ constexpr int S::*pk = nullptr;
+ static_assert(!pk, "");
+}
+
+namespace test3 {
+ struct nsCSSRect {
+ };
+ static int nsCSSRect::* sides;
+ nsCSSRect dimenX;
+ void ParseBoxCornerRadii(int y) {
+ switch (y) {
+ }
+ int& x = dimenX.*sides;
+ }
+}
+
+void foo() {
+ class X;
+ void (X::*d) ();
+ d = nullptr; /// This calls in the constant interpreter.
+}
+
+namespace {
+ struct A { int n; };
+ struct B { int n; };
+ struct C : A, B {};
+ struct D { double d; C c; };
+ const int &&u = static_cast<B&&>(0, ((D&&)D{}).*&D::c).n; // both-warning {{left operand of comma operator has no effect}}
+}
+
+/// From SemaTemplate/instantiate-member-pointers.cpp
+namespace {
+ struct Y {
+ int x;
+ };
+
+ template<typename T, typename Class, T Class::*Ptr>
+ struct X3 {
+ X3<T, Class, Ptr> &operator=(const T& value) {
+ return *this;
+ }
+ };
+
+ typedef int Y::*IntMember;
+ template<IntMember Member>
+ struct X4 {
+ X3<int, Y, Member> member;
+ int &getMember(Y& y) { return y.*Member; }
+ };
+
+ int &get_X4(X4<&Y::x> x4, Y& y) {
+ return x4.getMember(y);
+ }
+}
+
+/// From test/CXX/basic/basic.def.odr/p2.cpp
+namespace {
+ void use(int);
+ struct S { int x; int f() const; };
+ constexpr S *ps = nullptr;
+ S *const &psr = ps;
+
+ void test() {
+ use(ps->*&S::x);
+ use(psr->*&S::x);
+ }
+}
diff --git a/clang/test/AST/ast-dump-default-init-json.cpp b/clang/test/AST/ast-dump-default-init-json.cpp
index f4949a9..1058b4e 100644
--- a/clang/test/AST/ast-dump-default-init-json.cpp
+++ b/clang/test/AST/ast-dump-default-init-json.cpp
@@ -789,10 +789,10 @@ void test() {
// CHECK-NEXT: "valueCategory": "lvalue",
// CHECK-NEXT: "extendingDecl": {
// CHECK-NEXT: "id": "0x{{.*}}",
-// CHECK-NEXT: "kind": "VarDecl",
-// CHECK-NEXT: "name": "b",
+// CHECK-NEXT: "kind": "FieldDecl",
+// CHECK-NEXT: "name": "a",
// CHECK-NEXT: "type": {
-// CHECK-NEXT: "qualType": "B"
+// CHECK-NEXT: "qualType": "const A &"
// CHECK-NEXT: }
// CHECK-NEXT: },
// CHECK-NEXT: "storageDuration": "automatic",
diff --git a/clang/test/AST/ast-dump-default-init.cpp b/clang/test/AST/ast-dump-default-init.cpp
index 26864fb..15b29f0 100644
--- a/clang/test/AST/ast-dump-default-init.cpp
+++ b/clang/test/AST/ast-dump-default-init.cpp
@@ -13,7 +13,7 @@ void test() {
}
// CHECK: -CXXDefaultInitExpr 0x{{[^ ]*}} <{{.*}}> 'const A' lvalue has rewritten init
// CHECK-NEXT: `-ExprWithCleanups 0x{{[^ ]*}} <{{.*}}> 'const A' lvalue
-// CHECK-NEXT: `-MaterializeTemporaryExpr 0x{{[^ ]*}} <{{.*}}> 'const A' lvalue extended by Var 0x{{[^ ]*}} 'b' 'B'
+// CHECK-NEXT: `-MaterializeTemporaryExpr 0x{{[^ ]*}} <{{.*}}> 'const A' lvalue extended by Field 0x{{[^ ]*}} 'a' 'const A &'
// CHECK-NEXT: `-ImplicitCastExpr 0x{{[^ ]*}} <{{.*}}> 'const A' <NoOp>
// CHECK-NEXT: `-CXXFunctionalCastExpr 0x{{[^ ]*}} <{{.*}}> 'A' functional cast to A <NoOp>
// CHECK-NEXT: `-InitListExpr 0x{{[^ ]*}} <{{.*}}> 'A'
diff --git a/clang/test/AST/ast-print-openacc-loop-construct.cpp b/clang/test/AST/ast-print-openacc-loop-construct.cpp
index 519825b..cde302a 100644
--- a/clang/test/AST/ast-print-openacc-loop-construct.cpp
+++ b/clang/test/AST/ast-print-openacc-loop-construct.cpp
@@ -48,4 +48,13 @@ void foo() {
// CHECK-NEXT: ;
#pragma acc loop auto
for(;;);
+
+ int i;
+ float array[5];
+
+// CHECK: #pragma acc loop private(i, array[1], array, array[1:2])
+// CHECK-NEXT: for (;;)
+// CHECK-NEXT: ;
+#pragma acc loop private(i, array[1], array, array[1:2])
+ for(;;);
}
diff --git a/clang/test/Analysis/cxx-uninitialized-object.cpp b/clang/test/Analysis/cxx-uninitialized-object.cpp
index aee0dae..e3fa8ae 100644
--- a/clang/test/Analysis/cxx-uninitialized-object.cpp
+++ b/clang/test/Analysis/cxx-uninitialized-object.cpp
@@ -1114,27 +1114,27 @@ void fCXX11MemberInitTest1() {
CXX11MemberInitTest1();
}
-#ifdef PEDANTIC
struct CXX11MemberInitTest2 {
struct RecordType {
- int a; // expected-note {{uninitialized field 'this->a'}}
- int b; // expected-note {{uninitialized field 'this->b'}}
+ // TODO: we'd expect the note: {{uninitialized field 'this->rec.a'}}
+ int a; // no-note
+ // TODO: we'd expect the note: {{uninitialized field 'this->rec.b'}}
+ int b; // no-note
RecordType(int) {}
};
- RecordType rec = RecordType(int()); // expected-warning {{2 uninitialized fields}}
+ RecordType rec = RecordType(int());
int dontGetFilteredByNonPedanticMode = 0;
CXX11MemberInitTest2() {}
};
void fCXX11MemberInitTest2() {
+ // TODO: we'd expect the warning: {{2 uninitializeds field}}
CXX11MemberInitTest2(); // no-warning
}
-#endif // PEDANTIC
-
//===----------------------------------------------------------------------===//
// "Esoteric" primitive type tests.
//===----------------------------------------------------------------------===//
diff --git a/clang/test/Analysis/lifetime-extended-regions.cpp b/clang/test/Analysis/lifetime-extended-regions.cpp
index 524f4e0..4e98bd4 100644
--- a/clang/test/Analysis/lifetime-extended-regions.cpp
+++ b/clang/test/Analysis/lifetime-extended-regions.cpp
@@ -120,11 +120,11 @@ void aggregateWithReferences() {
clang_analyzer_dump(viaReference); // expected-warning-re {{&lifetime_extended_object{RefAggregate, viaReference, S{{[0-9]+}}} }}
clang_analyzer_dump(viaReference.rx); // expected-warning-re {{&lifetime_extended_object{int, viaReference, S{{[0-9]+}}} }}
clang_analyzer_dump(viaReference.ry); // expected-warning-re {{&lifetime_extended_object{Composite, viaReference, S{{[0-9]+}}} }}
-
- // The lifetime lifetime of object bound to reference members of aggregates,
- // that are created from default member initializer was extended.
- RefAggregate defaultInitExtended{i};
- clang_analyzer_dump(defaultInitExtended.ry); // expected-warning-re {{&lifetime_extended_object{Composite, defaultInitExtended, S{{[0-9]+}}} }}
+
+ // clang does not currently implement extending lifetime of object bound to reference members of aggregates,
+ // that are created from default member initializer (see `warn_unsupported_lifetime_extension` from `-Wdangling`)
+ RefAggregate defaultInitExtended{i}; // clang-bug does not extend `Composite`
+ clang_analyzer_dump(defaultInitExtended.ry); // expected-warning {{Unknown }}
}
void lambda() {
diff --git a/clang/test/CXX/drs/cwg16xx.cpp b/clang/test/CXX/drs/cwg16xx.cpp
index 82ef871..cf6b45c 100644
--- a/clang/test/CXX/drs/cwg16xx.cpp
+++ b/clang/test/CXX/drs/cwg16xx.cpp
@@ -483,6 +483,8 @@ namespace cwg1696 { // cwg1696: 7
const A &a = A(); // #cwg1696-D1-a
};
D1 d1 = {}; // #cwg1696-d1
+ // since-cxx14-warning@-1 {{lifetime extension of temporary created by aggregate initialization using a default member initializer is not yet supported; lifetime of temporary will end at the end of the full-expression}}
+ // since-cxx14-note@#cwg1696-D1-a {{initializing field 'a' with default member initializer}}
struct D2 {
const A &a = A(); // #cwg1696-D2-a
diff --git a/clang/test/CXX/drs/cwg18xx.cpp b/clang/test/CXX/drs/cwg18xx.cpp
index 054ce5a..323e56f 100644
--- a/clang/test/CXX/drs/cwg18xx.cpp
+++ b/clang/test/CXX/drs/cwg18xx.cpp
@@ -206,28 +206,19 @@ namespace cwg1814 { // cwg1814: yes
#endif
}
-namespace cwg1815 { // cwg1815: 19
+namespace cwg1815 { // cwg1815: no
#if __cplusplus >= 201402L
- struct A { int &&r = 0; };
+ // FIXME: needs codegen test
+ struct A { int &&r = 0; }; // #cwg1815-A
A a = {};
+ // since-cxx14-warning@-1 {{lifetime extension of temporary created by aggregate initialization using a default member initializer is not yet supported; lifetime of temporary will end at the end of the full-expression}} FIXME
+ // since-cxx14-note@#cwg1815-A {{initializing field 'r' with default member initializer}}
struct B { int &&r = 0; }; // #cwg1815-B
// since-cxx14-error@-1 {{reference member 'r' binds to a temporary object whose lifetime would be shorter than the lifetime of the constructed object}}
// since-cxx14-note@#cwg1815-B {{initializing field 'r' with default member initializer}}
// since-cxx14-note@#cwg1815-b {{in implicit default constructor for 'cwg1815::B' first required here}}
B b; // #cwg1815-b
-
-#if __cplusplus >= 201703L
- struct C { const int &r = 0; };
- constexpr C c = {}; // OK, since cwg1815
- static_assert(c.r == 0);
-
- constexpr int f() {
- A a = {}; // OK, since cwg1815
- return a.r;
- }
- static_assert(f() == 0);
-#endif
#endif
}
diff --git a/clang/test/CXX/special/class.temporary/p6.cpp b/clang/test/CXX/special/class.temporary/p6.cpp
index a6d2adf..5554363 100644
--- a/clang/test/CXX/special/class.temporary/p6.cpp
+++ b/clang/test/CXX/special/class.temporary/p6.cpp
@@ -269,40 +269,6 @@ void init_capture_init_list() {
// CHECK: }
}
-void check_dr1815() { // dr1815: yes
-#if __cplusplus >= 201402L
-
- struct A {
- int &&r = 0;
- ~A() {}
- };
-
- struct B {
- A &&a = A{};
- ~B() {}
- };
- B a = {};
-
- // CHECK: call {{.*}}block_scope_begin_function
- extern void block_scope_begin_function();
- extern void block_scope_end_function();
- block_scope_begin_function();
- {
- // CHECK: call void @_ZZ12check_dr1815vEN1BD1Ev
- // CHECK: call void @_ZZ12check_dr1815vEN1AD1Ev
- B b = {};
- }
- // CHECK: call {{.*}}block_scope_end_function
- block_scope_end_function();
-
- // CHECK: call {{.*}}some_other_function
- extern void some_other_function();
- some_other_function();
- // CHECK: call void @_ZZ12check_dr1815vEN1BD1Ev
- // CHECK: call void @_ZZ12check_dr1815vEN1AD1Ev
-#endif
-}
-
namespace P2718R0 {
namespace basic {
template <typename E> using T2 = std::list<E>;
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vcpopv.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vcpopv.c
index 13748be..b87b225 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vcpopv.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vcpopv.c
@@ -16,399 +16,399 @@
#include <riscv_vector.h>
-// CHECK-LABEL: @test_vcpopv_v_u8mf8(
+// CHECK-LABEL: @test_vcpop_v_u8mf8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vcpopv.nxv1i8.i64(<vscale x 1 x i8> poison, <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 1 x i8> [[TMP0]]
//
-vuint8mf8_t test_vcpopv_v_u8mf8(vuint8mf8_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u8mf8(vs2, vl);
+vuint8mf8_t test_vcpop_v_u8mf8(vuint8mf8_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u8mf8(vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8mf4(
+// CHECK-LABEL: @test_vcpop_v_u8mf4(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vcpopv.nxv2i8.i64(<vscale x 2 x i8> poison, <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 2 x i8> [[TMP0]]
//
-vuint8mf4_t test_vcpopv_v_u8mf4(vuint8mf4_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u8mf4(vs2, vl);
+vuint8mf4_t test_vcpop_v_u8mf4(vuint8mf4_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u8mf4(vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8mf2(
+// CHECK-LABEL: @test_vcpop_v_u8mf2(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vcpopv.nxv4i8.i64(<vscale x 4 x i8> poison, <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 4 x i8> [[TMP0]]
//
-vuint8mf2_t test_vcpopv_v_u8mf2(vuint8mf2_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u8mf2(vs2, vl);
+vuint8mf2_t test_vcpop_v_u8mf2(vuint8mf2_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u8mf2(vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8m1(
+// CHECK-LABEL: @test_vcpop_v_u8m1(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vcpopv.nxv8i8.i64(<vscale x 8 x i8> poison, <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 8 x i8> [[TMP0]]
//
-vuint8m1_t test_vcpopv_v_u8m1(vuint8m1_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u8m1(vs2, vl);
+vuint8m1_t test_vcpop_v_u8m1(vuint8m1_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u8m1(vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8m2(
+// CHECK-LABEL: @test_vcpop_v_u8m2(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vcpopv.nxv16i8.i64(<vscale x 16 x i8> poison, <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-vuint8m2_t test_vcpopv_v_u8m2(vuint8m2_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u8m2(vs2, vl);
+vuint8m2_t test_vcpop_v_u8m2(vuint8m2_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u8m2(vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8m4(
+// CHECK-LABEL: @test_vcpop_v_u8m4(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vcpopv.nxv32i8.i64(<vscale x 32 x i8> poison, <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 32 x i8> [[TMP0]]
//
-vuint8m4_t test_vcpopv_v_u8m4(vuint8m4_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u8m4(vs2, vl);
+vuint8m4_t test_vcpop_v_u8m4(vuint8m4_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u8m4(vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8m8(
+// CHECK-LABEL: @test_vcpop_v_u8m8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vcpopv.nxv64i8.i64(<vscale x 64 x i8> poison, <vscale x 64 x i8> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 64 x i8> [[TMP0]]
//
-vuint8m8_t test_vcpopv_v_u8m8(vuint8m8_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u8m8(vs2, vl);
+vuint8m8_t test_vcpop_v_u8m8(vuint8m8_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u8m8(vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16mf4(
+// CHECK-LABEL: @test_vcpop_v_u16mf4(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vcpopv.nxv1i16.i64(<vscale x 1 x i16> poison, <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 1 x i16> [[TMP0]]
//
-vuint16mf4_t test_vcpopv_v_u16mf4(vuint16mf4_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u16mf4(vs2, vl);
+vuint16mf4_t test_vcpop_v_u16mf4(vuint16mf4_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u16mf4(vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16mf2(
+// CHECK-LABEL: @test_vcpop_v_u16mf2(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vcpopv.nxv2i16.i64(<vscale x 2 x i16> poison, <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 2 x i16> [[TMP0]]
//
-vuint16mf2_t test_vcpopv_v_u16mf2(vuint16mf2_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u16mf2(vs2, vl);
+vuint16mf2_t test_vcpop_v_u16mf2(vuint16mf2_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u16mf2(vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16m1(
+// CHECK-LABEL: @test_vcpop_v_u16m1(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vcpopv.nxv4i16.i64(<vscale x 4 x i16> poison, <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 4 x i16> [[TMP0]]
//
-vuint16m1_t test_vcpopv_v_u16m1(vuint16m1_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u16m1(vs2, vl);
+vuint16m1_t test_vcpop_v_u16m1(vuint16m1_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u16m1(vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16m2(
+// CHECK-LABEL: @test_vcpop_v_u16m2(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vcpopv.nxv8i16.i64(<vscale x 8 x i16> poison, <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
//
-vuint16m2_t test_vcpopv_v_u16m2(vuint16m2_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u16m2(vs2, vl);
+vuint16m2_t test_vcpop_v_u16m2(vuint16m2_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u16m2(vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16m4(
+// CHECK-LABEL: @test_vcpop_v_u16m4(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vcpopv.nxv16i16.i64(<vscale x 16 x i16> poison, <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 16 x i16> [[TMP0]]
//
-vuint16m4_t test_vcpopv_v_u16m4(vuint16m4_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u16m4(vs2, vl);
+vuint16m4_t test_vcpop_v_u16m4(vuint16m4_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u16m4(vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16m8(
+// CHECK-LABEL: @test_vcpop_v_u16m8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vcpopv.nxv32i16.i64(<vscale x 32 x i16> poison, <vscale x 32 x i16> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 32 x i16> [[TMP0]]
//
-vuint16m8_t test_vcpopv_v_u16m8(vuint16m8_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u16m8(vs2, vl);
+vuint16m8_t test_vcpop_v_u16m8(vuint16m8_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u16m8(vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32mf2(
+// CHECK-LABEL: @test_vcpop_v_u32mf2(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vcpopv.nxv1i32.i64(<vscale x 1 x i32> poison, <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 1 x i32> [[TMP0]]
//
-vuint32mf2_t test_vcpopv_v_u32mf2(vuint32mf2_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u32mf2(vs2, vl);
+vuint32mf2_t test_vcpop_v_u32mf2(vuint32mf2_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u32mf2(vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32m1(
+// CHECK-LABEL: @test_vcpop_v_u32m1(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vcpopv.nxv2i32.i64(<vscale x 2 x i32> poison, <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 2 x i32> [[TMP0]]
//
-vuint32m1_t test_vcpopv_v_u32m1(vuint32m1_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u32m1(vs2, vl);
+vuint32m1_t test_vcpop_v_u32m1(vuint32m1_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u32m1(vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32m2(
+// CHECK-LABEL: @test_vcpop_v_u32m2(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vcpopv.nxv4i32.i64(<vscale x 4 x i32> poison, <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
//
-vuint32m2_t test_vcpopv_v_u32m2(vuint32m2_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u32m2(vs2, vl);
+vuint32m2_t test_vcpop_v_u32m2(vuint32m2_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u32m2(vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32m4(
+// CHECK-LABEL: @test_vcpop_v_u32m4(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vcpopv.nxv8i32.i64(<vscale x 8 x i32> poison, <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 8 x i32> [[TMP0]]
//
-vuint32m4_t test_vcpopv_v_u32m4(vuint32m4_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u32m4(vs2, vl);
+vuint32m4_t test_vcpop_v_u32m4(vuint32m4_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u32m4(vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32m8(
+// CHECK-LABEL: @test_vcpop_v_u32m8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vcpopv.nxv16i32.i64(<vscale x 16 x i32> poison, <vscale x 16 x i32> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 16 x i32> [[TMP0]]
//
-vuint32m8_t test_vcpopv_v_u32m8(vuint32m8_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u32m8(vs2, vl);
+vuint32m8_t test_vcpop_v_u32m8(vuint32m8_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u32m8(vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u64m1(
+// CHECK-LABEL: @test_vcpop_v_u64m1(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vcpopv.nxv1i64.i64(<vscale x 1 x i64> poison, <vscale x 1 x i64> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 1 x i64> [[TMP0]]
//
-vuint64m1_t test_vcpopv_v_u64m1(vuint64m1_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u64m1(vs2, vl);
+vuint64m1_t test_vcpop_v_u64m1(vuint64m1_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u64m1(vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u64m2(
+// CHECK-LABEL: @test_vcpop_v_u64m2(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vcpopv.nxv2i64.i64(<vscale x 2 x i64> poison, <vscale x 2 x i64> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
//
-vuint64m2_t test_vcpopv_v_u64m2(vuint64m2_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u64m2(vs2, vl);
+vuint64m2_t test_vcpop_v_u64m2(vuint64m2_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u64m2(vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u64m4(
+// CHECK-LABEL: @test_vcpop_v_u64m4(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vcpopv.nxv4i64.i64(<vscale x 4 x i64> poison, <vscale x 4 x i64> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 4 x i64> [[TMP0]]
//
-vuint64m4_t test_vcpopv_v_u64m4(vuint64m4_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u64m4(vs2, vl);
+vuint64m4_t test_vcpop_v_u64m4(vuint64m4_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u64m4(vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u64m8(
+// CHECK-LABEL: @test_vcpop_v_u64m8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vcpopv.nxv8i64.i64(<vscale x 8 x i64> poison, <vscale x 8 x i64> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 8 x i64> [[TMP0]]
//
-vuint64m8_t test_vcpopv_v_u64m8(vuint64m8_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u64m8(vs2, vl);
+vuint64m8_t test_vcpop_v_u64m8(vuint64m8_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u64m8(vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8mf8_m(
+// CHECK-LABEL: @test_vcpop_v_u8mf8_m(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vcpopv.mask.nxv1i8.i64(<vscale x 1 x i8> poison, <vscale x 1 x i8> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-NEXT: ret <vscale x 1 x i8> [[TMP0]]
//
-vuint8mf8_t test_vcpopv_v_u8mf8_m(vbool64_t mask, vuint8mf8_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u8mf8_m(mask, vs2, vl);
+vuint8mf8_t test_vcpop_v_u8mf8_m(vbool64_t mask, vuint8mf8_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u8mf8_m(mask, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8mf4_m(
+// CHECK-LABEL: @test_vcpop_v_u8mf4_m(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vcpopv.mask.nxv2i8.i64(<vscale x 2 x i8> poison, <vscale x 2 x i8> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-NEXT: ret <vscale x 2 x i8> [[TMP0]]
//
-vuint8mf4_t test_vcpopv_v_u8mf4_m(vbool32_t mask, vuint8mf4_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u8mf4_m(mask, vs2, vl);
+vuint8mf4_t test_vcpop_v_u8mf4_m(vbool32_t mask, vuint8mf4_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u8mf4_m(mask, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8mf2_m(
+// CHECK-LABEL: @test_vcpop_v_u8mf2_m(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vcpopv.mask.nxv4i8.i64(<vscale x 4 x i8> poison, <vscale x 4 x i8> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-NEXT: ret <vscale x 4 x i8> [[TMP0]]
//
-vuint8mf2_t test_vcpopv_v_u8mf2_m(vbool16_t mask, vuint8mf2_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u8mf2_m(mask, vs2, vl);
+vuint8mf2_t test_vcpop_v_u8mf2_m(vbool16_t mask, vuint8mf2_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u8mf2_m(mask, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8m1_m(
+// CHECK-LABEL: @test_vcpop_v_u8m1_m(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vcpopv.mask.nxv8i8.i64(<vscale x 8 x i8> poison, <vscale x 8 x i8> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-NEXT: ret <vscale x 8 x i8> [[TMP0]]
//
-vuint8m1_t test_vcpopv_v_u8m1_m(vbool8_t mask, vuint8m1_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u8m1_m(mask, vs2, vl);
+vuint8m1_t test_vcpop_v_u8m1_m(vbool8_t mask, vuint8m1_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u8m1_m(mask, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8m2_m(
+// CHECK-LABEL: @test_vcpop_v_u8m2_m(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vcpopv.mask.nxv16i8.i64(<vscale x 16 x i8> poison, <vscale x 16 x i8> [[VS2:%.*]], <vscale x 16 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-vuint8m2_t test_vcpopv_v_u8m2_m(vbool4_t mask, vuint8m2_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u8m2_m(mask, vs2, vl);
+vuint8m2_t test_vcpop_v_u8m2_m(vbool4_t mask, vuint8m2_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u8m2_m(mask, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8m4_m(
+// CHECK-LABEL: @test_vcpop_v_u8m4_m(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vcpopv.mask.nxv32i8.i64(<vscale x 32 x i8> poison, <vscale x 32 x i8> [[VS2:%.*]], <vscale x 32 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-NEXT: ret <vscale x 32 x i8> [[TMP0]]
//
-vuint8m4_t test_vcpopv_v_u8m4_m(vbool2_t mask, vuint8m4_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u8m4_m(mask, vs2, vl);
+vuint8m4_t test_vcpop_v_u8m4_m(vbool2_t mask, vuint8m4_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u8m4_m(mask, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8m8_m(
+// CHECK-LABEL: @test_vcpop_v_u8m8_m(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vcpopv.mask.nxv64i8.i64(<vscale x 64 x i8> poison, <vscale x 64 x i8> [[VS2:%.*]], <vscale x 64 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-NEXT: ret <vscale x 64 x i8> [[TMP0]]
//
-vuint8m8_t test_vcpopv_v_u8m8_m(vbool1_t mask, vuint8m8_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u8m8_m(mask, vs2, vl);
+vuint8m8_t test_vcpop_v_u8m8_m(vbool1_t mask, vuint8m8_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u8m8_m(mask, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16mf4_m(
+// CHECK-LABEL: @test_vcpop_v_u16mf4_m(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vcpopv.mask.nxv1i16.i64(<vscale x 1 x i16> poison, <vscale x 1 x i16> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-NEXT: ret <vscale x 1 x i16> [[TMP0]]
//
-vuint16mf4_t test_vcpopv_v_u16mf4_m(vbool64_t mask, vuint16mf4_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u16mf4_m(mask, vs2, vl);
+vuint16mf4_t test_vcpop_v_u16mf4_m(vbool64_t mask, vuint16mf4_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u16mf4_m(mask, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16mf2_m(
+// CHECK-LABEL: @test_vcpop_v_u16mf2_m(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vcpopv.mask.nxv2i16.i64(<vscale x 2 x i16> poison, <vscale x 2 x i16> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-NEXT: ret <vscale x 2 x i16> [[TMP0]]
//
-vuint16mf2_t test_vcpopv_v_u16mf2_m(vbool32_t mask, vuint16mf2_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u16mf2_m(mask, vs2, vl);
+vuint16mf2_t test_vcpop_v_u16mf2_m(vbool32_t mask, vuint16mf2_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u16mf2_m(mask, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16m1_m(
+// CHECK-LABEL: @test_vcpop_v_u16m1_m(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vcpopv.mask.nxv4i16.i64(<vscale x 4 x i16> poison, <vscale x 4 x i16> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-NEXT: ret <vscale x 4 x i16> [[TMP0]]
//
-vuint16m1_t test_vcpopv_v_u16m1_m(vbool16_t mask, vuint16m1_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u16m1_m(mask, vs2, vl);
+vuint16m1_t test_vcpop_v_u16m1_m(vbool16_t mask, vuint16m1_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u16m1_m(mask, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16m2_m(
+// CHECK-LABEL: @test_vcpop_v_u16m2_m(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vcpopv.mask.nxv8i16.i64(<vscale x 8 x i16> poison, <vscale x 8 x i16> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
//
-vuint16m2_t test_vcpopv_v_u16m2_m(vbool8_t mask, vuint16m2_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u16m2_m(mask, vs2, vl);
+vuint16m2_t test_vcpop_v_u16m2_m(vbool8_t mask, vuint16m2_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u16m2_m(mask, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16m4_m(
+// CHECK-LABEL: @test_vcpop_v_u16m4_m(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vcpopv.mask.nxv16i16.i64(<vscale x 16 x i16> poison, <vscale x 16 x i16> [[VS2:%.*]], <vscale x 16 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-NEXT: ret <vscale x 16 x i16> [[TMP0]]
//
-vuint16m4_t test_vcpopv_v_u16m4_m(vbool4_t mask, vuint16m4_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u16m4_m(mask, vs2, vl);
+vuint16m4_t test_vcpop_v_u16m4_m(vbool4_t mask, vuint16m4_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u16m4_m(mask, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16m8_m(
+// CHECK-LABEL: @test_vcpop_v_u16m8_m(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vcpopv.mask.nxv32i16.i64(<vscale x 32 x i16> poison, <vscale x 32 x i16> [[VS2:%.*]], <vscale x 32 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-NEXT: ret <vscale x 32 x i16> [[TMP0]]
//
-vuint16m8_t test_vcpopv_v_u16m8_m(vbool2_t mask, vuint16m8_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u16m8_m(mask, vs2, vl);
+vuint16m8_t test_vcpop_v_u16m8_m(vbool2_t mask, vuint16m8_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u16m8_m(mask, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32mf2_m(
+// CHECK-LABEL: @test_vcpop_v_u32mf2_m(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vcpopv.mask.nxv1i32.i64(<vscale x 1 x i32> poison, <vscale x 1 x i32> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-NEXT: ret <vscale x 1 x i32> [[TMP0]]
//
-vuint32mf2_t test_vcpopv_v_u32mf2_m(vbool64_t mask, vuint32mf2_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u32mf2_m(mask, vs2, vl);
+vuint32mf2_t test_vcpop_v_u32mf2_m(vbool64_t mask, vuint32mf2_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u32mf2_m(mask, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32m1_m(
+// CHECK-LABEL: @test_vcpop_v_u32m1_m(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vcpopv.mask.nxv2i32.i64(<vscale x 2 x i32> poison, <vscale x 2 x i32> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-NEXT: ret <vscale x 2 x i32> [[TMP0]]
//
-vuint32m1_t test_vcpopv_v_u32m1_m(vbool32_t mask, vuint32m1_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u32m1_m(mask, vs2, vl);
+vuint32m1_t test_vcpop_v_u32m1_m(vbool32_t mask, vuint32m1_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u32m1_m(mask, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32m2_m(
+// CHECK-LABEL: @test_vcpop_v_u32m2_m(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vcpopv.mask.nxv4i32.i64(<vscale x 4 x i32> poison, <vscale x 4 x i32> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
//
-vuint32m2_t test_vcpopv_v_u32m2_m(vbool16_t mask, vuint32m2_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u32m2_m(mask, vs2, vl);
+vuint32m2_t test_vcpop_v_u32m2_m(vbool16_t mask, vuint32m2_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u32m2_m(mask, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32m4_m(
+// CHECK-LABEL: @test_vcpop_v_u32m4_m(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vcpopv.mask.nxv8i32.i64(<vscale x 8 x i32> poison, <vscale x 8 x i32> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-NEXT: ret <vscale x 8 x i32> [[TMP0]]
//
-vuint32m4_t test_vcpopv_v_u32m4_m(vbool8_t mask, vuint32m4_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u32m4_m(mask, vs2, vl);
+vuint32m4_t test_vcpop_v_u32m4_m(vbool8_t mask, vuint32m4_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u32m4_m(mask, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32m8_m(
+// CHECK-LABEL: @test_vcpop_v_u32m8_m(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vcpopv.mask.nxv16i32.i64(<vscale x 16 x i32> poison, <vscale x 16 x i32> [[VS2:%.*]], <vscale x 16 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-NEXT: ret <vscale x 16 x i32> [[TMP0]]
//
-vuint32m8_t test_vcpopv_v_u32m8_m(vbool4_t mask, vuint32m8_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u32m8_m(mask, vs2, vl);
+vuint32m8_t test_vcpop_v_u32m8_m(vbool4_t mask, vuint32m8_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u32m8_m(mask, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u64m1_m(
+// CHECK-LABEL: @test_vcpop_v_u64m1_m(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vcpopv.mask.nxv1i64.i64(<vscale x 1 x i64> poison, <vscale x 1 x i64> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-NEXT: ret <vscale x 1 x i64> [[TMP0]]
//
-vuint64m1_t test_vcpopv_v_u64m1_m(vbool64_t mask, vuint64m1_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u64m1_m(mask, vs2, vl);
+vuint64m1_t test_vcpop_v_u64m1_m(vbool64_t mask, vuint64m1_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u64m1_m(mask, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u64m2_m(
+// CHECK-LABEL: @test_vcpop_v_u64m2_m(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vcpopv.mask.nxv2i64.i64(<vscale x 2 x i64> poison, <vscale x 2 x i64> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
//
-vuint64m2_t test_vcpopv_v_u64m2_m(vbool32_t mask, vuint64m2_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u64m2_m(mask, vs2, vl);
+vuint64m2_t test_vcpop_v_u64m2_m(vbool32_t mask, vuint64m2_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u64m2_m(mask, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u64m4_m(
+// CHECK-LABEL: @test_vcpop_v_u64m4_m(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vcpopv.mask.nxv4i64.i64(<vscale x 4 x i64> poison, <vscale x 4 x i64> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-NEXT: ret <vscale x 4 x i64> [[TMP0]]
//
-vuint64m4_t test_vcpopv_v_u64m4_m(vbool16_t mask, vuint64m4_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u64m4_m(mask, vs2, vl);
+vuint64m4_t test_vcpop_v_u64m4_m(vbool16_t mask, vuint64m4_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u64m4_m(mask, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u64m8_m(
+// CHECK-LABEL: @test_vcpop_v_u64m8_m(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vcpopv.mask.nxv8i64.i64(<vscale x 8 x i64> poison, <vscale x 8 x i64> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-NEXT: ret <vscale x 8 x i64> [[TMP0]]
//
-vuint64m8_t test_vcpopv_v_u64m8_m(vbool8_t mask, vuint64m8_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u64m8_m(mask, vs2, vl);
+vuint64m8_t test_vcpop_v_u64m8_m(vbool8_t mask, vuint64m8_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u64m8_m(mask, vs2, vl);
}
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vcpopv.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vcpopv.c
index adb0ac9..5625b19 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vcpopv.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vcpopv.c
@@ -16,399 +16,399 @@
#include <riscv_vector.h>
-// CHECK-LABEL: @test_vcpopv_v_u8mf8(
+// CHECK-LABEL: @test_vcpop_v_u8mf8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vcpopv.nxv1i8.i64(<vscale x 1 x i8> poison, <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 1 x i8> [[TMP0]]
//
-vuint8mf8_t test_vcpopv_v_u8mf8(vuint8mf8_t vs2, size_t vl) {
- return __riscv_vcpopv(vs2, vl);
+vuint8mf8_t test_vcpop_v_u8mf8(vuint8mf8_t vs2, size_t vl) {
+ return __riscv_vcpop(vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8mf4(
+// CHECK-LABEL: @test_vcpop_v_u8mf4(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vcpopv.nxv2i8.i64(<vscale x 2 x i8> poison, <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 2 x i8> [[TMP0]]
//
-vuint8mf4_t test_vcpopv_v_u8mf4(vuint8mf4_t vs2, size_t vl) {
- return __riscv_vcpopv(vs2, vl);
+vuint8mf4_t test_vcpop_v_u8mf4(vuint8mf4_t vs2, size_t vl) {
+ return __riscv_vcpop(vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8mf2(
+// CHECK-LABEL: @test_vcpop_v_u8mf2(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vcpopv.nxv4i8.i64(<vscale x 4 x i8> poison, <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 4 x i8> [[TMP0]]
//
-vuint8mf2_t test_vcpopv_v_u8mf2(vuint8mf2_t vs2, size_t vl) {
- return __riscv_vcpopv(vs2, vl);
+vuint8mf2_t test_vcpop_v_u8mf2(vuint8mf2_t vs2, size_t vl) {
+ return __riscv_vcpop(vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8m1(
+// CHECK-LABEL: @test_vcpop_v_u8m1(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vcpopv.nxv8i8.i64(<vscale x 8 x i8> poison, <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 8 x i8> [[TMP0]]
//
-vuint8m1_t test_vcpopv_v_u8m1(vuint8m1_t vs2, size_t vl) {
- return __riscv_vcpopv(vs2, vl);
+vuint8m1_t test_vcpop_v_u8m1(vuint8m1_t vs2, size_t vl) {
+ return __riscv_vcpop(vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8m2(
+// CHECK-LABEL: @test_vcpop_v_u8m2(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vcpopv.nxv16i8.i64(<vscale x 16 x i8> poison, <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-vuint8m2_t test_vcpopv_v_u8m2(vuint8m2_t vs2, size_t vl) {
- return __riscv_vcpopv(vs2, vl);
+vuint8m2_t test_vcpop_v_u8m2(vuint8m2_t vs2, size_t vl) {
+ return __riscv_vcpop(vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8m4(
+// CHECK-LABEL: @test_vcpop_v_u8m4(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vcpopv.nxv32i8.i64(<vscale x 32 x i8> poison, <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 32 x i8> [[TMP0]]
//
-vuint8m4_t test_vcpopv_v_u8m4(vuint8m4_t vs2, size_t vl) {
- return __riscv_vcpopv(vs2, vl);
+vuint8m4_t test_vcpop_v_u8m4(vuint8m4_t vs2, size_t vl) {
+ return __riscv_vcpop(vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8m8(
+// CHECK-LABEL: @test_vcpop_v_u8m8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vcpopv.nxv64i8.i64(<vscale x 64 x i8> poison, <vscale x 64 x i8> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 64 x i8> [[TMP0]]
//
-vuint8m8_t test_vcpopv_v_u8m8(vuint8m8_t vs2, size_t vl) {
- return __riscv_vcpopv(vs2, vl);
+vuint8m8_t test_vcpop_v_u8m8(vuint8m8_t vs2, size_t vl) {
+ return __riscv_vcpop(vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16mf4(
+// CHECK-LABEL: @test_vcpop_v_u16mf4(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vcpopv.nxv1i16.i64(<vscale x 1 x i16> poison, <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 1 x i16> [[TMP0]]
//
-vuint16mf4_t test_vcpopv_v_u16mf4(vuint16mf4_t vs2, size_t vl) {
- return __riscv_vcpopv(vs2, vl);
+vuint16mf4_t test_vcpop_v_u16mf4(vuint16mf4_t vs2, size_t vl) {
+ return __riscv_vcpop(vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16mf2(
+// CHECK-LABEL: @test_vcpop_v_u16mf2(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vcpopv.nxv2i16.i64(<vscale x 2 x i16> poison, <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 2 x i16> [[TMP0]]
//
-vuint16mf2_t test_vcpopv_v_u16mf2(vuint16mf2_t vs2, size_t vl) {
- return __riscv_vcpopv(vs2, vl);
+vuint16mf2_t test_vcpop_v_u16mf2(vuint16mf2_t vs2, size_t vl) {
+ return __riscv_vcpop(vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16m1(
+// CHECK-LABEL: @test_vcpop_v_u16m1(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vcpopv.nxv4i16.i64(<vscale x 4 x i16> poison, <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 4 x i16> [[TMP0]]
//
-vuint16m1_t test_vcpopv_v_u16m1(vuint16m1_t vs2, size_t vl) {
- return __riscv_vcpopv(vs2, vl);
+vuint16m1_t test_vcpop_v_u16m1(vuint16m1_t vs2, size_t vl) {
+ return __riscv_vcpop(vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16m2(
+// CHECK-LABEL: @test_vcpop_v_u16m2(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vcpopv.nxv8i16.i64(<vscale x 8 x i16> poison, <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
//
-vuint16m2_t test_vcpopv_v_u16m2(vuint16m2_t vs2, size_t vl) {
- return __riscv_vcpopv(vs2, vl);
+vuint16m2_t test_vcpop_v_u16m2(vuint16m2_t vs2, size_t vl) {
+ return __riscv_vcpop(vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16m4(
+// CHECK-LABEL: @test_vcpop_v_u16m4(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vcpopv.nxv16i16.i64(<vscale x 16 x i16> poison, <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 16 x i16> [[TMP0]]
//
-vuint16m4_t test_vcpopv_v_u16m4(vuint16m4_t vs2, size_t vl) {
- return __riscv_vcpopv(vs2, vl);
+vuint16m4_t test_vcpop_v_u16m4(vuint16m4_t vs2, size_t vl) {
+ return __riscv_vcpop(vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16m8(
+// CHECK-LABEL: @test_vcpop_v_u16m8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vcpopv.nxv32i16.i64(<vscale x 32 x i16> poison, <vscale x 32 x i16> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 32 x i16> [[TMP0]]
//
-vuint16m8_t test_vcpopv_v_u16m8(vuint16m8_t vs2, size_t vl) {
- return __riscv_vcpopv(vs2, vl);
+vuint16m8_t test_vcpop_v_u16m8(vuint16m8_t vs2, size_t vl) {
+ return __riscv_vcpop(vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32mf2(
+// CHECK-LABEL: @test_vcpop_v_u32mf2(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vcpopv.nxv1i32.i64(<vscale x 1 x i32> poison, <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 1 x i32> [[TMP0]]
//
-vuint32mf2_t test_vcpopv_v_u32mf2(vuint32mf2_t vs2, size_t vl) {
- return __riscv_vcpopv(vs2, vl);
+vuint32mf2_t test_vcpop_v_u32mf2(vuint32mf2_t vs2, size_t vl) {
+ return __riscv_vcpop(vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32m1(
+// CHECK-LABEL: @test_vcpop_v_u32m1(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vcpopv.nxv2i32.i64(<vscale x 2 x i32> poison, <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 2 x i32> [[TMP0]]
//
-vuint32m1_t test_vcpopv_v_u32m1(vuint32m1_t vs2, size_t vl) {
- return __riscv_vcpopv(vs2, vl);
+vuint32m1_t test_vcpop_v_u32m1(vuint32m1_t vs2, size_t vl) {
+ return __riscv_vcpop(vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32m2(
+// CHECK-LABEL: @test_vcpop_v_u32m2(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vcpopv.nxv4i32.i64(<vscale x 4 x i32> poison, <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
//
-vuint32m2_t test_vcpopv_v_u32m2(vuint32m2_t vs2, size_t vl) {
- return __riscv_vcpopv(vs2, vl);
+vuint32m2_t test_vcpop_v_u32m2(vuint32m2_t vs2, size_t vl) {
+ return __riscv_vcpop(vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32m4(
+// CHECK-LABEL: @test_vcpop_v_u32m4(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vcpopv.nxv8i32.i64(<vscale x 8 x i32> poison, <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 8 x i32> [[TMP0]]
//
-vuint32m4_t test_vcpopv_v_u32m4(vuint32m4_t vs2, size_t vl) {
- return __riscv_vcpopv(vs2, vl);
+vuint32m4_t test_vcpop_v_u32m4(vuint32m4_t vs2, size_t vl) {
+ return __riscv_vcpop(vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32m8(
+// CHECK-LABEL: @test_vcpop_v_u32m8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vcpopv.nxv16i32.i64(<vscale x 16 x i32> poison, <vscale x 16 x i32> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 16 x i32> [[TMP0]]
//
-vuint32m8_t test_vcpopv_v_u32m8(vuint32m8_t vs2, size_t vl) {
- return __riscv_vcpopv(vs2, vl);
+vuint32m8_t test_vcpop_v_u32m8(vuint32m8_t vs2, size_t vl) {
+ return __riscv_vcpop(vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u64m1(
+// CHECK-LABEL: @test_vcpop_v_u64m1(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vcpopv.nxv1i64.i64(<vscale x 1 x i64> poison, <vscale x 1 x i64> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 1 x i64> [[TMP0]]
//
-vuint64m1_t test_vcpopv_v_u64m1(vuint64m1_t vs2, size_t vl) {
- return __riscv_vcpopv(vs2, vl);
+vuint64m1_t test_vcpop_v_u64m1(vuint64m1_t vs2, size_t vl) {
+ return __riscv_vcpop(vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u64m2(
+// CHECK-LABEL: @test_vcpop_v_u64m2(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vcpopv.nxv2i64.i64(<vscale x 2 x i64> poison, <vscale x 2 x i64> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
//
-vuint64m2_t test_vcpopv_v_u64m2(vuint64m2_t vs2, size_t vl) {
- return __riscv_vcpopv(vs2, vl);
+vuint64m2_t test_vcpop_v_u64m2(vuint64m2_t vs2, size_t vl) {
+ return __riscv_vcpop(vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u64m4(
+// CHECK-LABEL: @test_vcpop_v_u64m4(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vcpopv.nxv4i64.i64(<vscale x 4 x i64> poison, <vscale x 4 x i64> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 4 x i64> [[TMP0]]
//
-vuint64m4_t test_vcpopv_v_u64m4(vuint64m4_t vs2, size_t vl) {
- return __riscv_vcpopv(vs2, vl);
+vuint64m4_t test_vcpop_v_u64m4(vuint64m4_t vs2, size_t vl) {
+ return __riscv_vcpop(vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u64m8(
+// CHECK-LABEL: @test_vcpop_v_u64m8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vcpopv.nxv8i64.i64(<vscale x 8 x i64> poison, <vscale x 8 x i64> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 8 x i64> [[TMP0]]
//
-vuint64m8_t test_vcpopv_v_u64m8(vuint64m8_t vs2, size_t vl) {
- return __riscv_vcpopv(vs2, vl);
+vuint64m8_t test_vcpop_v_u64m8(vuint64m8_t vs2, size_t vl) {
+ return __riscv_vcpop(vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8mf8_m(
+// CHECK-LABEL: @test_vcpop_v_u8mf8_m(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vcpopv.mask.nxv1i8.i64(<vscale x 1 x i8> poison, <vscale x 1 x i8> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-NEXT: ret <vscale x 1 x i8> [[TMP0]]
//
-vuint8mf8_t test_vcpopv_v_u8mf8_m(vbool64_t mask, vuint8mf8_t vs2, size_t vl) {
- return __riscv_vcpopv(mask, vs2, vl);
+vuint8mf8_t test_vcpop_v_u8mf8_m(vbool64_t mask, vuint8mf8_t vs2, size_t vl) {
+ return __riscv_vcpop(mask, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8mf4_m(
+// CHECK-LABEL: @test_vcpop_v_u8mf4_m(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vcpopv.mask.nxv2i8.i64(<vscale x 2 x i8> poison, <vscale x 2 x i8> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-NEXT: ret <vscale x 2 x i8> [[TMP0]]
//
-vuint8mf4_t test_vcpopv_v_u8mf4_m(vbool32_t mask, vuint8mf4_t vs2, size_t vl) {
- return __riscv_vcpopv(mask, vs2, vl);
+vuint8mf4_t test_vcpop_v_u8mf4_m(vbool32_t mask, vuint8mf4_t vs2, size_t vl) {
+ return __riscv_vcpop(mask, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8mf2_m(
+// CHECK-LABEL: @test_vcpop_v_u8mf2_m(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vcpopv.mask.nxv4i8.i64(<vscale x 4 x i8> poison, <vscale x 4 x i8> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-NEXT: ret <vscale x 4 x i8> [[TMP0]]
//
-vuint8mf2_t test_vcpopv_v_u8mf2_m(vbool16_t mask, vuint8mf2_t vs2, size_t vl) {
- return __riscv_vcpopv(mask, vs2, vl);
+vuint8mf2_t test_vcpop_v_u8mf2_m(vbool16_t mask, vuint8mf2_t vs2, size_t vl) {
+ return __riscv_vcpop(mask, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8m1_m(
+// CHECK-LABEL: @test_vcpop_v_u8m1_m(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vcpopv.mask.nxv8i8.i64(<vscale x 8 x i8> poison, <vscale x 8 x i8> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-NEXT: ret <vscale x 8 x i8> [[TMP0]]
//
-vuint8m1_t test_vcpopv_v_u8m1_m(vbool8_t mask, vuint8m1_t vs2, size_t vl) {
- return __riscv_vcpopv(mask, vs2, vl);
+vuint8m1_t test_vcpop_v_u8m1_m(vbool8_t mask, vuint8m1_t vs2, size_t vl) {
+ return __riscv_vcpop(mask, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8m2_m(
+// CHECK-LABEL: @test_vcpop_v_u8m2_m(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vcpopv.mask.nxv16i8.i64(<vscale x 16 x i8> poison, <vscale x 16 x i8> [[VS2:%.*]], <vscale x 16 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-vuint8m2_t test_vcpopv_v_u8m2_m(vbool4_t mask, vuint8m2_t vs2, size_t vl) {
- return __riscv_vcpopv(mask, vs2, vl);
+vuint8m2_t test_vcpop_v_u8m2_m(vbool4_t mask, vuint8m2_t vs2, size_t vl) {
+ return __riscv_vcpop(mask, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8m4_m(
+// CHECK-LABEL: @test_vcpop_v_u8m4_m(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vcpopv.mask.nxv32i8.i64(<vscale x 32 x i8> poison, <vscale x 32 x i8> [[VS2:%.*]], <vscale x 32 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-NEXT: ret <vscale x 32 x i8> [[TMP0]]
//
-vuint8m4_t test_vcpopv_v_u8m4_m(vbool2_t mask, vuint8m4_t vs2, size_t vl) {
- return __riscv_vcpopv(mask, vs2, vl);
+vuint8m4_t test_vcpop_v_u8m4_m(vbool2_t mask, vuint8m4_t vs2, size_t vl) {
+ return __riscv_vcpop(mask, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8m8_m(
+// CHECK-LABEL: @test_vcpop_v_u8m8_m(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vcpopv.mask.nxv64i8.i64(<vscale x 64 x i8> poison, <vscale x 64 x i8> [[VS2:%.*]], <vscale x 64 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-NEXT: ret <vscale x 64 x i8> [[TMP0]]
//
-vuint8m8_t test_vcpopv_v_u8m8_m(vbool1_t mask, vuint8m8_t vs2, size_t vl) {
- return __riscv_vcpopv(mask, vs2, vl);
+vuint8m8_t test_vcpop_v_u8m8_m(vbool1_t mask, vuint8m8_t vs2, size_t vl) {
+ return __riscv_vcpop(mask, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16mf4_m(
+// CHECK-LABEL: @test_vcpop_v_u16mf4_m(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vcpopv.mask.nxv1i16.i64(<vscale x 1 x i16> poison, <vscale x 1 x i16> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-NEXT: ret <vscale x 1 x i16> [[TMP0]]
//
-vuint16mf4_t test_vcpopv_v_u16mf4_m(vbool64_t mask, vuint16mf4_t vs2, size_t vl) {
- return __riscv_vcpopv(mask, vs2, vl);
+vuint16mf4_t test_vcpop_v_u16mf4_m(vbool64_t mask, vuint16mf4_t vs2, size_t vl) {
+ return __riscv_vcpop(mask, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16mf2_m(
+// CHECK-LABEL: @test_vcpop_v_u16mf2_m(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vcpopv.mask.nxv2i16.i64(<vscale x 2 x i16> poison, <vscale x 2 x i16> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-NEXT: ret <vscale x 2 x i16> [[TMP0]]
//
-vuint16mf2_t test_vcpopv_v_u16mf2_m(vbool32_t mask, vuint16mf2_t vs2, size_t vl) {
- return __riscv_vcpopv(mask, vs2, vl);
+vuint16mf2_t test_vcpop_v_u16mf2_m(vbool32_t mask, vuint16mf2_t vs2, size_t vl) {
+ return __riscv_vcpop(mask, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16m1_m(
+// CHECK-LABEL: @test_vcpop_v_u16m1_m(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vcpopv.mask.nxv4i16.i64(<vscale x 4 x i16> poison, <vscale x 4 x i16> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-NEXT: ret <vscale x 4 x i16> [[TMP0]]
//
-vuint16m1_t test_vcpopv_v_u16m1_m(vbool16_t mask, vuint16m1_t vs2, size_t vl) {
- return __riscv_vcpopv(mask, vs2, vl);
+vuint16m1_t test_vcpop_v_u16m1_m(vbool16_t mask, vuint16m1_t vs2, size_t vl) {
+ return __riscv_vcpop(mask, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16m2_m(
+// CHECK-LABEL: @test_vcpop_v_u16m2_m(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vcpopv.mask.nxv8i16.i64(<vscale x 8 x i16> poison, <vscale x 8 x i16> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
//
-vuint16m2_t test_vcpopv_v_u16m2_m(vbool8_t mask, vuint16m2_t vs2, size_t vl) {
- return __riscv_vcpopv(mask, vs2, vl);
+vuint16m2_t test_vcpop_v_u16m2_m(vbool8_t mask, vuint16m2_t vs2, size_t vl) {
+ return __riscv_vcpop(mask, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16m4_m(
+// CHECK-LABEL: @test_vcpop_v_u16m4_m(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vcpopv.mask.nxv16i16.i64(<vscale x 16 x i16> poison, <vscale x 16 x i16> [[VS2:%.*]], <vscale x 16 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-NEXT: ret <vscale x 16 x i16> [[TMP0]]
//
-vuint16m4_t test_vcpopv_v_u16m4_m(vbool4_t mask, vuint16m4_t vs2, size_t vl) {
- return __riscv_vcpopv(mask, vs2, vl);
+vuint16m4_t test_vcpop_v_u16m4_m(vbool4_t mask, vuint16m4_t vs2, size_t vl) {
+ return __riscv_vcpop(mask, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16m8_m(
+// CHECK-LABEL: @test_vcpop_v_u16m8_m(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vcpopv.mask.nxv32i16.i64(<vscale x 32 x i16> poison, <vscale x 32 x i16> [[VS2:%.*]], <vscale x 32 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-NEXT: ret <vscale x 32 x i16> [[TMP0]]
//
-vuint16m8_t test_vcpopv_v_u16m8_m(vbool2_t mask, vuint16m8_t vs2, size_t vl) {
- return __riscv_vcpopv(mask, vs2, vl);
+vuint16m8_t test_vcpop_v_u16m8_m(vbool2_t mask, vuint16m8_t vs2, size_t vl) {
+ return __riscv_vcpop(mask, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32mf2_m(
+// CHECK-LABEL: @test_vcpop_v_u32mf2_m(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vcpopv.mask.nxv1i32.i64(<vscale x 1 x i32> poison, <vscale x 1 x i32> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-NEXT: ret <vscale x 1 x i32> [[TMP0]]
//
-vuint32mf2_t test_vcpopv_v_u32mf2_m(vbool64_t mask, vuint32mf2_t vs2, size_t vl) {
- return __riscv_vcpopv(mask, vs2, vl);
+vuint32mf2_t test_vcpop_v_u32mf2_m(vbool64_t mask, vuint32mf2_t vs2, size_t vl) {
+ return __riscv_vcpop(mask, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32m1_m(
+// CHECK-LABEL: @test_vcpop_v_u32m1_m(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vcpopv.mask.nxv2i32.i64(<vscale x 2 x i32> poison, <vscale x 2 x i32> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-NEXT: ret <vscale x 2 x i32> [[TMP0]]
//
-vuint32m1_t test_vcpopv_v_u32m1_m(vbool32_t mask, vuint32m1_t vs2, size_t vl) {
- return __riscv_vcpopv(mask, vs2, vl);
+vuint32m1_t test_vcpop_v_u32m1_m(vbool32_t mask, vuint32m1_t vs2, size_t vl) {
+ return __riscv_vcpop(mask, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32m2_m(
+// CHECK-LABEL: @test_vcpop_v_u32m2_m(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vcpopv.mask.nxv4i32.i64(<vscale x 4 x i32> poison, <vscale x 4 x i32> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
//
-vuint32m2_t test_vcpopv_v_u32m2_m(vbool16_t mask, vuint32m2_t vs2, size_t vl) {
- return __riscv_vcpopv(mask, vs2, vl);
+vuint32m2_t test_vcpop_v_u32m2_m(vbool16_t mask, vuint32m2_t vs2, size_t vl) {
+ return __riscv_vcpop(mask, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32m4_m(
+// CHECK-LABEL: @test_vcpop_v_u32m4_m(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vcpopv.mask.nxv8i32.i64(<vscale x 8 x i32> poison, <vscale x 8 x i32> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-NEXT: ret <vscale x 8 x i32> [[TMP0]]
//
-vuint32m4_t test_vcpopv_v_u32m4_m(vbool8_t mask, vuint32m4_t vs2, size_t vl) {
- return __riscv_vcpopv(mask, vs2, vl);
+vuint32m4_t test_vcpop_v_u32m4_m(vbool8_t mask, vuint32m4_t vs2, size_t vl) {
+ return __riscv_vcpop(mask, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32m8_m(
+// CHECK-LABEL: @test_vcpop_v_u32m8_m(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vcpopv.mask.nxv16i32.i64(<vscale x 16 x i32> poison, <vscale x 16 x i32> [[VS2:%.*]], <vscale x 16 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-NEXT: ret <vscale x 16 x i32> [[TMP0]]
//
-vuint32m8_t test_vcpopv_v_u32m8_m(vbool4_t mask, vuint32m8_t vs2, size_t vl) {
- return __riscv_vcpopv(mask, vs2, vl);
+vuint32m8_t test_vcpop_v_u32m8_m(vbool4_t mask, vuint32m8_t vs2, size_t vl) {
+ return __riscv_vcpop(mask, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u64m1_m(
+// CHECK-LABEL: @test_vcpop_v_u64m1_m(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vcpopv.mask.nxv1i64.i64(<vscale x 1 x i64> poison, <vscale x 1 x i64> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-NEXT: ret <vscale x 1 x i64> [[TMP0]]
//
-vuint64m1_t test_vcpopv_v_u64m1_m(vbool64_t mask, vuint64m1_t vs2, size_t vl) {
- return __riscv_vcpopv(mask, vs2, vl);
+vuint64m1_t test_vcpop_v_u64m1_m(vbool64_t mask, vuint64m1_t vs2, size_t vl) {
+ return __riscv_vcpop(mask, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u64m2_m(
+// CHECK-LABEL: @test_vcpop_v_u64m2_m(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vcpopv.mask.nxv2i64.i64(<vscale x 2 x i64> poison, <vscale x 2 x i64> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
//
-vuint64m2_t test_vcpopv_v_u64m2_m(vbool32_t mask, vuint64m2_t vs2, size_t vl) {
- return __riscv_vcpopv(mask, vs2, vl);
+vuint64m2_t test_vcpop_v_u64m2_m(vbool32_t mask, vuint64m2_t vs2, size_t vl) {
+ return __riscv_vcpop(mask, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u64m4_m(
+// CHECK-LABEL: @test_vcpop_v_u64m4_m(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vcpopv.mask.nxv4i64.i64(<vscale x 4 x i64> poison, <vscale x 4 x i64> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-NEXT: ret <vscale x 4 x i64> [[TMP0]]
//
-vuint64m4_t test_vcpopv_v_u64m4_m(vbool16_t mask, vuint64m4_t vs2, size_t vl) {
- return __riscv_vcpopv(mask, vs2, vl);
+vuint64m4_t test_vcpop_v_u64m4_m(vbool16_t mask, vuint64m4_t vs2, size_t vl) {
+ return __riscv_vcpop(mask, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u64m8_m(
+// CHECK-LABEL: @test_vcpop_v_u64m8_m(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vcpopv.mask.nxv8i64.i64(<vscale x 8 x i64> poison, <vscale x 8 x i64> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-NEXT: ret <vscale x 8 x i64> [[TMP0]]
//
-vuint64m8_t test_vcpopv_v_u64m8_m(vbool8_t mask, vuint64m8_t vs2, size_t vl) {
- return __riscv_vcpopv(mask, vs2, vl);
+vuint64m8_t test_vcpop_v_u64m8_m(vbool8_t mask, vuint64m8_t vs2, size_t vl) {
+ return __riscv_vcpop(mask, vs2, vl);
}
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/non-overloaded/vcpopv.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/non-overloaded/vcpopv.c
index 8a1f2e1..3a11033 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/non-overloaded/vcpopv.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/non-overloaded/vcpopv.c
@@ -16,795 +16,795 @@
#include <riscv_vector.h>
-// CHECK-LABEL: @test_vcpopv_v_u8mf8_tu(
+// CHECK-LABEL: @test_vcpop_v_u8mf8_tu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vcpopv.nxv1i8.i64(<vscale x 1 x i8> [[MASKEDOFF:%.*]], <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 1 x i8> [[TMP0]]
//
-vuint8mf8_t test_vcpopv_v_u8mf8_tu(vuint8mf8_t maskedoff, vuint8mf8_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u8mf8_tu(maskedoff, vs2, vl);
+vuint8mf8_t test_vcpop_v_u8mf8_tu(vuint8mf8_t maskedoff, vuint8mf8_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u8mf8_tu(maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8mf4_tu(
+// CHECK-LABEL: @test_vcpop_v_u8mf4_tu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vcpopv.nxv2i8.i64(<vscale x 2 x i8> [[MASKEDOFF:%.*]], <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 2 x i8> [[TMP0]]
//
-vuint8mf4_t test_vcpopv_v_u8mf4_tu(vuint8mf4_t maskedoff, vuint8mf4_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u8mf4_tu(maskedoff, vs2, vl);
+vuint8mf4_t test_vcpop_v_u8mf4_tu(vuint8mf4_t maskedoff, vuint8mf4_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u8mf4_tu(maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8mf2_tu(
+// CHECK-LABEL: @test_vcpop_v_u8mf2_tu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vcpopv.nxv4i8.i64(<vscale x 4 x i8> [[MASKEDOFF:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 4 x i8> [[TMP0]]
//
-vuint8mf2_t test_vcpopv_v_u8mf2_tu(vuint8mf2_t maskedoff, vuint8mf2_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u8mf2_tu(maskedoff, vs2, vl);
+vuint8mf2_t test_vcpop_v_u8mf2_tu(vuint8mf2_t maskedoff, vuint8mf2_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u8mf2_tu(maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8m1_tu(
+// CHECK-LABEL: @test_vcpop_v_u8m1_tu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vcpopv.nxv8i8.i64(<vscale x 8 x i8> [[MASKEDOFF:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 8 x i8> [[TMP0]]
//
-vuint8m1_t test_vcpopv_v_u8m1_tu(vuint8m1_t maskedoff, vuint8m1_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u8m1_tu(maskedoff, vs2, vl);
+vuint8m1_t test_vcpop_v_u8m1_tu(vuint8m1_t maskedoff, vuint8m1_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u8m1_tu(maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8m2_tu(
+// CHECK-LABEL: @test_vcpop_v_u8m2_tu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vcpopv.nxv16i8.i64(<vscale x 16 x i8> [[MASKEDOFF:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-vuint8m2_t test_vcpopv_v_u8m2_tu(vuint8m2_t maskedoff, vuint8m2_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u8m2_tu(maskedoff, vs2, vl);
+vuint8m2_t test_vcpop_v_u8m2_tu(vuint8m2_t maskedoff, vuint8m2_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u8m2_tu(maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8m4_tu(
+// CHECK-LABEL: @test_vcpop_v_u8m4_tu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vcpopv.nxv32i8.i64(<vscale x 32 x i8> [[MASKEDOFF:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 32 x i8> [[TMP0]]
//
-vuint8m4_t test_vcpopv_v_u8m4_tu(vuint8m4_t maskedoff, vuint8m4_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u8m4_tu(maskedoff, vs2, vl);
+vuint8m4_t test_vcpop_v_u8m4_tu(vuint8m4_t maskedoff, vuint8m4_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u8m4_tu(maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8m8_tu(
+// CHECK-LABEL: @test_vcpop_v_u8m8_tu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vcpopv.nxv64i8.i64(<vscale x 64 x i8> [[MASKEDOFF:%.*]], <vscale x 64 x i8> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 64 x i8> [[TMP0]]
//
-vuint8m8_t test_vcpopv_v_u8m8_tu(vuint8m8_t maskedoff, vuint8m8_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u8m8_tu(maskedoff, vs2, vl);
+vuint8m8_t test_vcpop_v_u8m8_tu(vuint8m8_t maskedoff, vuint8m8_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u8m8_tu(maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16mf4_tu(
+// CHECK-LABEL: @test_vcpop_v_u16mf4_tu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vcpopv.nxv1i16.i64(<vscale x 1 x i16> [[MASKEDOFF:%.*]], <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 1 x i16> [[TMP0]]
//
-vuint16mf4_t test_vcpopv_v_u16mf4_tu(vuint16mf4_t maskedoff, vuint16mf4_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u16mf4_tu(maskedoff, vs2, vl);
+vuint16mf4_t test_vcpop_v_u16mf4_tu(vuint16mf4_t maskedoff, vuint16mf4_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u16mf4_tu(maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16mf2_tu(
+// CHECK-LABEL: @test_vcpop_v_u16mf2_tu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vcpopv.nxv2i16.i64(<vscale x 2 x i16> [[MASKEDOFF:%.*]], <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 2 x i16> [[TMP0]]
//
-vuint16mf2_t test_vcpopv_v_u16mf2_tu(vuint16mf2_t maskedoff, vuint16mf2_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u16mf2_tu(maskedoff, vs2, vl);
+vuint16mf2_t test_vcpop_v_u16mf2_tu(vuint16mf2_t maskedoff, vuint16mf2_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u16mf2_tu(maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16m1_tu(
+// CHECK-LABEL: @test_vcpop_v_u16m1_tu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vcpopv.nxv4i16.i64(<vscale x 4 x i16> [[MASKEDOFF:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 4 x i16> [[TMP0]]
//
-vuint16m1_t test_vcpopv_v_u16m1_tu(vuint16m1_t maskedoff, vuint16m1_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u16m1_tu(maskedoff, vs2, vl);
+vuint16m1_t test_vcpop_v_u16m1_tu(vuint16m1_t maskedoff, vuint16m1_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u16m1_tu(maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16m2_tu(
+// CHECK-LABEL: @test_vcpop_v_u16m2_tu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vcpopv.nxv8i16.i64(<vscale x 8 x i16> [[MASKEDOFF:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
//
-vuint16m2_t test_vcpopv_v_u16m2_tu(vuint16m2_t maskedoff, vuint16m2_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u16m2_tu(maskedoff, vs2, vl);
+vuint16m2_t test_vcpop_v_u16m2_tu(vuint16m2_t maskedoff, vuint16m2_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u16m2_tu(maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16m4_tu(
+// CHECK-LABEL: @test_vcpop_v_u16m4_tu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vcpopv.nxv16i16.i64(<vscale x 16 x i16> [[MASKEDOFF:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 16 x i16> [[TMP0]]
//
-vuint16m4_t test_vcpopv_v_u16m4_tu(vuint16m4_t maskedoff, vuint16m4_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u16m4_tu(maskedoff, vs2, vl);
+vuint16m4_t test_vcpop_v_u16m4_tu(vuint16m4_t maskedoff, vuint16m4_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u16m4_tu(maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16m8_tu(
+// CHECK-LABEL: @test_vcpop_v_u16m8_tu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vcpopv.nxv32i16.i64(<vscale x 32 x i16> [[MASKEDOFF:%.*]], <vscale x 32 x i16> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 32 x i16> [[TMP0]]
//
-vuint16m8_t test_vcpopv_v_u16m8_tu(vuint16m8_t maskedoff, vuint16m8_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u16m8_tu(maskedoff, vs2, vl);
+vuint16m8_t test_vcpop_v_u16m8_tu(vuint16m8_t maskedoff, vuint16m8_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u16m8_tu(maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32mf2_tu(
+// CHECK-LABEL: @test_vcpop_v_u32mf2_tu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vcpopv.nxv1i32.i64(<vscale x 1 x i32> [[MASKEDOFF:%.*]], <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 1 x i32> [[TMP0]]
//
-vuint32mf2_t test_vcpopv_v_u32mf2_tu(vuint32mf2_t maskedoff, vuint32mf2_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u32mf2_tu(maskedoff, vs2, vl);
+vuint32mf2_t test_vcpop_v_u32mf2_tu(vuint32mf2_t maskedoff, vuint32mf2_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u32mf2_tu(maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32m1_tu(
+// CHECK-LABEL: @test_vcpop_v_u32m1_tu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vcpopv.nxv2i32.i64(<vscale x 2 x i32> [[MASKEDOFF:%.*]], <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 2 x i32> [[TMP0]]
//
-vuint32m1_t test_vcpopv_v_u32m1_tu(vuint32m1_t maskedoff, vuint32m1_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u32m1_tu(maskedoff, vs2, vl);
+vuint32m1_t test_vcpop_v_u32m1_tu(vuint32m1_t maskedoff, vuint32m1_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u32m1_tu(maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32m2_tu(
+// CHECK-LABEL: @test_vcpop_v_u32m2_tu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vcpopv.nxv4i32.i64(<vscale x 4 x i32> [[MASKEDOFF:%.*]], <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
//
-vuint32m2_t test_vcpopv_v_u32m2_tu(vuint32m2_t maskedoff, vuint32m2_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u32m2_tu(maskedoff, vs2, vl);
+vuint32m2_t test_vcpop_v_u32m2_tu(vuint32m2_t maskedoff, vuint32m2_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u32m2_tu(maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32m4_tu(
+// CHECK-LABEL: @test_vcpop_v_u32m4_tu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vcpopv.nxv8i32.i64(<vscale x 8 x i32> [[MASKEDOFF:%.*]], <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 8 x i32> [[TMP0]]
//
-vuint32m4_t test_vcpopv_v_u32m4_tu(vuint32m4_t maskedoff, vuint32m4_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u32m4_tu(maskedoff, vs2, vl);
+vuint32m4_t test_vcpop_v_u32m4_tu(vuint32m4_t maskedoff, vuint32m4_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u32m4_tu(maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32m8_tu(
+// CHECK-LABEL: @test_vcpop_v_u32m8_tu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vcpopv.nxv16i32.i64(<vscale x 16 x i32> [[MASKEDOFF:%.*]], <vscale x 16 x i32> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 16 x i32> [[TMP0]]
//
-vuint32m8_t test_vcpopv_v_u32m8_tu(vuint32m8_t maskedoff, vuint32m8_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u32m8_tu(maskedoff, vs2, vl);
+vuint32m8_t test_vcpop_v_u32m8_tu(vuint32m8_t maskedoff, vuint32m8_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u32m8_tu(maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u64m1_tu(
+// CHECK-LABEL: @test_vcpop_v_u64m1_tu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vcpopv.nxv1i64.i64(<vscale x 1 x i64> [[MASKEDOFF:%.*]], <vscale x 1 x i64> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 1 x i64> [[TMP0]]
//
-vuint64m1_t test_vcpopv_v_u64m1_tu(vuint64m1_t maskedoff, vuint64m1_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u64m1_tu(maskedoff, vs2, vl);
+vuint64m1_t test_vcpop_v_u64m1_tu(vuint64m1_t maskedoff, vuint64m1_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u64m1_tu(maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u64m2_tu(
+// CHECK-LABEL: @test_vcpop_v_u64m2_tu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vcpopv.nxv2i64.i64(<vscale x 2 x i64> [[MASKEDOFF:%.*]], <vscale x 2 x i64> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
//
-vuint64m2_t test_vcpopv_v_u64m2_tu(vuint64m2_t maskedoff, vuint64m2_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u64m2_tu(maskedoff, vs2, vl);
+vuint64m2_t test_vcpop_v_u64m2_tu(vuint64m2_t maskedoff, vuint64m2_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u64m2_tu(maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u64m4_tu(
+// CHECK-LABEL: @test_vcpop_v_u64m4_tu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vcpopv.nxv4i64.i64(<vscale x 4 x i64> [[MASKEDOFF:%.*]], <vscale x 4 x i64> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 4 x i64> [[TMP0]]
//
-vuint64m4_t test_vcpopv_v_u64m4_tu(vuint64m4_t maskedoff, vuint64m4_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u64m4_tu(maskedoff, vs2, vl);
+vuint64m4_t test_vcpop_v_u64m4_tu(vuint64m4_t maskedoff, vuint64m4_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u64m4_tu(maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u64m8_tu(
+// CHECK-LABEL: @test_vcpop_v_u64m8_tu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vcpopv.nxv8i64.i64(<vscale x 8 x i64> [[MASKEDOFF:%.*]], <vscale x 8 x i64> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 8 x i64> [[TMP0]]
//
-vuint64m8_t test_vcpopv_v_u64m8_tu(vuint64m8_t maskedoff, vuint64m8_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u64m8_tu(maskedoff, vs2, vl);
+vuint64m8_t test_vcpop_v_u64m8_tu(vuint64m8_t maskedoff, vuint64m8_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u64m8_tu(maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8mf8_tum(
+// CHECK-LABEL: @test_vcpop_v_u8mf8_tum(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vcpopv.mask.nxv1i8.i64(<vscale x 1 x i8> [[MASKEDOFF:%.*]], <vscale x 1 x i8> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
// CHECK-NEXT: ret <vscale x 1 x i8> [[TMP0]]
//
-vuint8mf8_t test_vcpopv_v_u8mf8_tum(vbool64_t mask, vuint8mf8_t maskedoff, vuint8mf8_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u8mf8_tum(mask, maskedoff, vs2, vl);
+vuint8mf8_t test_vcpop_v_u8mf8_tum(vbool64_t mask, vuint8mf8_t maskedoff, vuint8mf8_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u8mf8_tum(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8mf4_tum(
+// CHECK-LABEL: @test_vcpop_v_u8mf4_tum(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vcpopv.mask.nxv2i8.i64(<vscale x 2 x i8> [[MASKEDOFF:%.*]], <vscale x 2 x i8> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
// CHECK-NEXT: ret <vscale x 2 x i8> [[TMP0]]
//
-vuint8mf4_t test_vcpopv_v_u8mf4_tum(vbool32_t mask, vuint8mf4_t maskedoff, vuint8mf4_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u8mf4_tum(mask, maskedoff, vs2, vl);
+vuint8mf4_t test_vcpop_v_u8mf4_tum(vbool32_t mask, vuint8mf4_t maskedoff, vuint8mf4_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u8mf4_tum(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8mf2_tum(
+// CHECK-LABEL: @test_vcpop_v_u8mf2_tum(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vcpopv.mask.nxv4i8.i64(<vscale x 4 x i8> [[MASKEDOFF:%.*]], <vscale x 4 x i8> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
// CHECK-NEXT: ret <vscale x 4 x i8> [[TMP0]]
//
-vuint8mf2_t test_vcpopv_v_u8mf2_tum(vbool16_t mask, vuint8mf2_t maskedoff, vuint8mf2_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u8mf2_tum(mask, maskedoff, vs2, vl);
+vuint8mf2_t test_vcpop_v_u8mf2_tum(vbool16_t mask, vuint8mf2_t maskedoff, vuint8mf2_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u8mf2_tum(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8m1_tum(
+// CHECK-LABEL: @test_vcpop_v_u8m1_tum(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vcpopv.mask.nxv8i8.i64(<vscale x 8 x i8> [[MASKEDOFF:%.*]], <vscale x 8 x i8> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
// CHECK-NEXT: ret <vscale x 8 x i8> [[TMP0]]
//
-vuint8m1_t test_vcpopv_v_u8m1_tum(vbool8_t mask, vuint8m1_t maskedoff, vuint8m1_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u8m1_tum(mask, maskedoff, vs2, vl);
+vuint8m1_t test_vcpop_v_u8m1_tum(vbool8_t mask, vuint8m1_t maskedoff, vuint8m1_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u8m1_tum(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8m2_tum(
+// CHECK-LABEL: @test_vcpop_v_u8m2_tum(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vcpopv.mask.nxv16i8.i64(<vscale x 16 x i8> [[MASKEDOFF:%.*]], <vscale x 16 x i8> [[VS2:%.*]], <vscale x 16 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-vuint8m2_t test_vcpopv_v_u8m2_tum(vbool4_t mask, vuint8m2_t maskedoff, vuint8m2_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u8m2_tum(mask, maskedoff, vs2, vl);
+vuint8m2_t test_vcpop_v_u8m2_tum(vbool4_t mask, vuint8m2_t maskedoff, vuint8m2_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u8m2_tum(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8m4_tum(
+// CHECK-LABEL: @test_vcpop_v_u8m4_tum(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vcpopv.mask.nxv32i8.i64(<vscale x 32 x i8> [[MASKEDOFF:%.*]], <vscale x 32 x i8> [[VS2:%.*]], <vscale x 32 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
// CHECK-NEXT: ret <vscale x 32 x i8> [[TMP0]]
//
-vuint8m4_t test_vcpopv_v_u8m4_tum(vbool2_t mask, vuint8m4_t maskedoff, vuint8m4_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u8m4_tum(mask, maskedoff, vs2, vl);
+vuint8m4_t test_vcpop_v_u8m4_tum(vbool2_t mask, vuint8m4_t maskedoff, vuint8m4_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u8m4_tum(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8m8_tum(
+// CHECK-LABEL: @test_vcpop_v_u8m8_tum(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vcpopv.mask.nxv64i8.i64(<vscale x 64 x i8> [[MASKEDOFF:%.*]], <vscale x 64 x i8> [[VS2:%.*]], <vscale x 64 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
// CHECK-NEXT: ret <vscale x 64 x i8> [[TMP0]]
//
-vuint8m8_t test_vcpopv_v_u8m8_tum(vbool1_t mask, vuint8m8_t maskedoff, vuint8m8_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u8m8_tum(mask, maskedoff, vs2, vl);
+vuint8m8_t test_vcpop_v_u8m8_tum(vbool1_t mask, vuint8m8_t maskedoff, vuint8m8_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u8m8_tum(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16mf4_tum(
+// CHECK-LABEL: @test_vcpop_v_u16mf4_tum(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vcpopv.mask.nxv1i16.i64(<vscale x 1 x i16> [[MASKEDOFF:%.*]], <vscale x 1 x i16> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
// CHECK-NEXT: ret <vscale x 1 x i16> [[TMP0]]
//
-vuint16mf4_t test_vcpopv_v_u16mf4_tum(vbool64_t mask, vuint16mf4_t maskedoff, vuint16mf4_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u16mf4_tum(mask, maskedoff, vs2, vl);
+vuint16mf4_t test_vcpop_v_u16mf4_tum(vbool64_t mask, vuint16mf4_t maskedoff, vuint16mf4_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u16mf4_tum(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16mf2_tum(
+// CHECK-LABEL: @test_vcpop_v_u16mf2_tum(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vcpopv.mask.nxv2i16.i64(<vscale x 2 x i16> [[MASKEDOFF:%.*]], <vscale x 2 x i16> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
// CHECK-NEXT: ret <vscale x 2 x i16> [[TMP0]]
//
-vuint16mf2_t test_vcpopv_v_u16mf2_tum(vbool32_t mask, vuint16mf2_t maskedoff, vuint16mf2_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u16mf2_tum(mask, maskedoff, vs2, vl);
+vuint16mf2_t test_vcpop_v_u16mf2_tum(vbool32_t mask, vuint16mf2_t maskedoff, vuint16mf2_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u16mf2_tum(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16m1_tum(
+// CHECK-LABEL: @test_vcpop_v_u16m1_tum(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vcpopv.mask.nxv4i16.i64(<vscale x 4 x i16> [[MASKEDOFF:%.*]], <vscale x 4 x i16> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
// CHECK-NEXT: ret <vscale x 4 x i16> [[TMP0]]
//
-vuint16m1_t test_vcpopv_v_u16m1_tum(vbool16_t mask, vuint16m1_t maskedoff, vuint16m1_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u16m1_tum(mask, maskedoff, vs2, vl);
+vuint16m1_t test_vcpop_v_u16m1_tum(vbool16_t mask, vuint16m1_t maskedoff, vuint16m1_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u16m1_tum(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16m2_tum(
+// CHECK-LABEL: @test_vcpop_v_u16m2_tum(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vcpopv.mask.nxv8i16.i64(<vscale x 8 x i16> [[MASKEDOFF:%.*]], <vscale x 8 x i16> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
//
-vuint16m2_t test_vcpopv_v_u16m2_tum(vbool8_t mask, vuint16m2_t maskedoff, vuint16m2_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u16m2_tum(mask, maskedoff, vs2, vl);
+vuint16m2_t test_vcpop_v_u16m2_tum(vbool8_t mask, vuint16m2_t maskedoff, vuint16m2_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u16m2_tum(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16m4_tum(
+// CHECK-LABEL: @test_vcpop_v_u16m4_tum(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vcpopv.mask.nxv16i16.i64(<vscale x 16 x i16> [[MASKEDOFF:%.*]], <vscale x 16 x i16> [[VS2:%.*]], <vscale x 16 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
// CHECK-NEXT: ret <vscale x 16 x i16> [[TMP0]]
//
-vuint16m4_t test_vcpopv_v_u16m4_tum(vbool4_t mask, vuint16m4_t maskedoff, vuint16m4_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u16m4_tum(mask, maskedoff, vs2, vl);
+vuint16m4_t test_vcpop_v_u16m4_tum(vbool4_t mask, vuint16m4_t maskedoff, vuint16m4_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u16m4_tum(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16m8_tum(
+// CHECK-LABEL: @test_vcpop_v_u16m8_tum(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vcpopv.mask.nxv32i16.i64(<vscale x 32 x i16> [[MASKEDOFF:%.*]], <vscale x 32 x i16> [[VS2:%.*]], <vscale x 32 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
// CHECK-NEXT: ret <vscale x 32 x i16> [[TMP0]]
//
-vuint16m8_t test_vcpopv_v_u16m8_tum(vbool2_t mask, vuint16m8_t maskedoff, vuint16m8_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u16m8_tum(mask, maskedoff, vs2, vl);
+vuint16m8_t test_vcpop_v_u16m8_tum(vbool2_t mask, vuint16m8_t maskedoff, vuint16m8_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u16m8_tum(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32mf2_tum(
+// CHECK-LABEL: @test_vcpop_v_u32mf2_tum(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vcpopv.mask.nxv1i32.i64(<vscale x 1 x i32> [[MASKEDOFF:%.*]], <vscale x 1 x i32> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
// CHECK-NEXT: ret <vscale x 1 x i32> [[TMP0]]
//
-vuint32mf2_t test_vcpopv_v_u32mf2_tum(vbool64_t mask, vuint32mf2_t maskedoff, vuint32mf2_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u32mf2_tum(mask, maskedoff, vs2, vl);
+vuint32mf2_t test_vcpop_v_u32mf2_tum(vbool64_t mask, vuint32mf2_t maskedoff, vuint32mf2_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u32mf2_tum(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32m1_tum(
+// CHECK-LABEL: @test_vcpop_v_u32m1_tum(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vcpopv.mask.nxv2i32.i64(<vscale x 2 x i32> [[MASKEDOFF:%.*]], <vscale x 2 x i32> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
// CHECK-NEXT: ret <vscale x 2 x i32> [[TMP0]]
//
-vuint32m1_t test_vcpopv_v_u32m1_tum(vbool32_t mask, vuint32m1_t maskedoff, vuint32m1_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u32m1_tum(mask, maskedoff, vs2, vl);
+vuint32m1_t test_vcpop_v_u32m1_tum(vbool32_t mask, vuint32m1_t maskedoff, vuint32m1_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u32m1_tum(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32m2_tum(
+// CHECK-LABEL: @test_vcpop_v_u32m2_tum(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vcpopv.mask.nxv4i32.i64(<vscale x 4 x i32> [[MASKEDOFF:%.*]], <vscale x 4 x i32> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
//
-vuint32m2_t test_vcpopv_v_u32m2_tum(vbool16_t mask, vuint32m2_t maskedoff, vuint32m2_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u32m2_tum(mask, maskedoff, vs2, vl);
+vuint32m2_t test_vcpop_v_u32m2_tum(vbool16_t mask, vuint32m2_t maskedoff, vuint32m2_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u32m2_tum(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32m4_tum(
+// CHECK-LABEL: @test_vcpop_v_u32m4_tum(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vcpopv.mask.nxv8i32.i64(<vscale x 8 x i32> [[MASKEDOFF:%.*]], <vscale x 8 x i32> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
// CHECK-NEXT: ret <vscale x 8 x i32> [[TMP0]]
//
-vuint32m4_t test_vcpopv_v_u32m4_tum(vbool8_t mask, vuint32m4_t maskedoff, vuint32m4_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u32m4_tum(mask, maskedoff, vs2, vl);
+vuint32m4_t test_vcpop_v_u32m4_tum(vbool8_t mask, vuint32m4_t maskedoff, vuint32m4_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u32m4_tum(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32m8_tum(
+// CHECK-LABEL: @test_vcpop_v_u32m8_tum(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vcpopv.mask.nxv16i32.i64(<vscale x 16 x i32> [[MASKEDOFF:%.*]], <vscale x 16 x i32> [[VS2:%.*]], <vscale x 16 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
// CHECK-NEXT: ret <vscale x 16 x i32> [[TMP0]]
//
-vuint32m8_t test_vcpopv_v_u32m8_tum(vbool4_t mask, vuint32m8_t maskedoff, vuint32m8_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u32m8_tum(mask, maskedoff, vs2, vl);
+vuint32m8_t test_vcpop_v_u32m8_tum(vbool4_t mask, vuint32m8_t maskedoff, vuint32m8_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u32m8_tum(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u64m1_tum(
+// CHECK-LABEL: @test_vcpop_v_u64m1_tum(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vcpopv.mask.nxv1i64.i64(<vscale x 1 x i64> [[MASKEDOFF:%.*]], <vscale x 1 x i64> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
// CHECK-NEXT: ret <vscale x 1 x i64> [[TMP0]]
//
-vuint64m1_t test_vcpopv_v_u64m1_tum(vbool64_t mask, vuint64m1_t maskedoff, vuint64m1_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u64m1_tum(mask, maskedoff, vs2, vl);
+vuint64m1_t test_vcpop_v_u64m1_tum(vbool64_t mask, vuint64m1_t maskedoff, vuint64m1_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u64m1_tum(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u64m2_tum(
+// CHECK-LABEL: @test_vcpop_v_u64m2_tum(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vcpopv.mask.nxv2i64.i64(<vscale x 2 x i64> [[MASKEDOFF:%.*]], <vscale x 2 x i64> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
//
-vuint64m2_t test_vcpopv_v_u64m2_tum(vbool32_t mask, vuint64m2_t maskedoff, vuint64m2_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u64m2_tum(mask, maskedoff, vs2, vl);
+vuint64m2_t test_vcpop_v_u64m2_tum(vbool32_t mask, vuint64m2_t maskedoff, vuint64m2_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u64m2_tum(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u64m4_tum(
+// CHECK-LABEL: @test_vcpop_v_u64m4_tum(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vcpopv.mask.nxv4i64.i64(<vscale x 4 x i64> [[MASKEDOFF:%.*]], <vscale x 4 x i64> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
// CHECK-NEXT: ret <vscale x 4 x i64> [[TMP0]]
//
-vuint64m4_t test_vcpopv_v_u64m4_tum(vbool16_t mask, vuint64m4_t maskedoff, vuint64m4_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u64m4_tum(mask, maskedoff, vs2, vl);
+vuint64m4_t test_vcpop_v_u64m4_tum(vbool16_t mask, vuint64m4_t maskedoff, vuint64m4_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u64m4_tum(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u64m8_tum(
+// CHECK-LABEL: @test_vcpop_v_u64m8_tum(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vcpopv.mask.nxv8i64.i64(<vscale x 8 x i64> [[MASKEDOFF:%.*]], <vscale x 8 x i64> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
// CHECK-NEXT: ret <vscale x 8 x i64> [[TMP0]]
//
-vuint64m8_t test_vcpopv_v_u64m8_tum(vbool8_t mask, vuint64m8_t maskedoff, vuint64m8_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u64m8_tum(mask, maskedoff, vs2, vl);
+vuint64m8_t test_vcpop_v_u64m8_tum(vbool8_t mask, vuint64m8_t maskedoff, vuint64m8_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u64m8_tum(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8mf8_tumu(
+// CHECK-LABEL: @test_vcpop_v_u8mf8_tumu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vcpopv.mask.nxv1i8.i64(<vscale x 1 x i8> [[MASKEDOFF:%.*]], <vscale x 1 x i8> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
// CHECK-NEXT: ret <vscale x 1 x i8> [[TMP0]]
//
-vuint8mf8_t test_vcpopv_v_u8mf8_tumu(vbool64_t mask, vuint8mf8_t maskedoff, vuint8mf8_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u8mf8_tumu(mask, maskedoff, vs2, vl);
+vuint8mf8_t test_vcpop_v_u8mf8_tumu(vbool64_t mask, vuint8mf8_t maskedoff, vuint8mf8_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u8mf8_tumu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8mf4_tumu(
+// CHECK-LABEL: @test_vcpop_v_u8mf4_tumu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vcpopv.mask.nxv2i8.i64(<vscale x 2 x i8> [[MASKEDOFF:%.*]], <vscale x 2 x i8> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
// CHECK-NEXT: ret <vscale x 2 x i8> [[TMP0]]
//
-vuint8mf4_t test_vcpopv_v_u8mf4_tumu(vbool32_t mask, vuint8mf4_t maskedoff, vuint8mf4_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u8mf4_tumu(mask, maskedoff, vs2, vl);
+vuint8mf4_t test_vcpop_v_u8mf4_tumu(vbool32_t mask, vuint8mf4_t maskedoff, vuint8mf4_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u8mf4_tumu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8mf2_tumu(
+// CHECK-LABEL: @test_vcpop_v_u8mf2_tumu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vcpopv.mask.nxv4i8.i64(<vscale x 4 x i8> [[MASKEDOFF:%.*]], <vscale x 4 x i8> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
// CHECK-NEXT: ret <vscale x 4 x i8> [[TMP0]]
//
-vuint8mf2_t test_vcpopv_v_u8mf2_tumu(vbool16_t mask, vuint8mf2_t maskedoff, vuint8mf2_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u8mf2_tumu(mask, maskedoff, vs2, vl);
+vuint8mf2_t test_vcpop_v_u8mf2_tumu(vbool16_t mask, vuint8mf2_t maskedoff, vuint8mf2_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u8mf2_tumu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8m1_tumu(
+// CHECK-LABEL: @test_vcpop_v_u8m1_tumu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vcpopv.mask.nxv8i8.i64(<vscale x 8 x i8> [[MASKEDOFF:%.*]], <vscale x 8 x i8> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
// CHECK-NEXT: ret <vscale x 8 x i8> [[TMP0]]
//
-vuint8m1_t test_vcpopv_v_u8m1_tumu(vbool8_t mask, vuint8m1_t maskedoff, vuint8m1_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u8m1_tumu(mask, maskedoff, vs2, vl);
+vuint8m1_t test_vcpop_v_u8m1_tumu(vbool8_t mask, vuint8m1_t maskedoff, vuint8m1_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u8m1_tumu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8m2_tumu(
+// CHECK-LABEL: @test_vcpop_v_u8m2_tumu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vcpopv.mask.nxv16i8.i64(<vscale x 16 x i8> [[MASKEDOFF:%.*]], <vscale x 16 x i8> [[VS2:%.*]], <vscale x 16 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-vuint8m2_t test_vcpopv_v_u8m2_tumu(vbool4_t mask, vuint8m2_t maskedoff, vuint8m2_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u8m2_tumu(mask, maskedoff, vs2, vl);
+vuint8m2_t test_vcpop_v_u8m2_tumu(vbool4_t mask, vuint8m2_t maskedoff, vuint8m2_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u8m2_tumu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8m4_tumu(
+// CHECK-LABEL: @test_vcpop_v_u8m4_tumu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vcpopv.mask.nxv32i8.i64(<vscale x 32 x i8> [[MASKEDOFF:%.*]], <vscale x 32 x i8> [[VS2:%.*]], <vscale x 32 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
// CHECK-NEXT: ret <vscale x 32 x i8> [[TMP0]]
//
-vuint8m4_t test_vcpopv_v_u8m4_tumu(vbool2_t mask, vuint8m4_t maskedoff, vuint8m4_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u8m4_tumu(mask, maskedoff, vs2, vl);
+vuint8m4_t test_vcpop_v_u8m4_tumu(vbool2_t mask, vuint8m4_t maskedoff, vuint8m4_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u8m4_tumu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8m8_tumu(
+// CHECK-LABEL: @test_vcpop_v_u8m8_tumu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vcpopv.mask.nxv64i8.i64(<vscale x 64 x i8> [[MASKEDOFF:%.*]], <vscale x 64 x i8> [[VS2:%.*]], <vscale x 64 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
// CHECK-NEXT: ret <vscale x 64 x i8> [[TMP0]]
//
-vuint8m8_t test_vcpopv_v_u8m8_tumu(vbool1_t mask, vuint8m8_t maskedoff, vuint8m8_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u8m8_tumu(mask, maskedoff, vs2, vl);
+vuint8m8_t test_vcpop_v_u8m8_tumu(vbool1_t mask, vuint8m8_t maskedoff, vuint8m8_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u8m8_tumu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16mf4_tumu(
+// CHECK-LABEL: @test_vcpop_v_u16mf4_tumu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vcpopv.mask.nxv1i16.i64(<vscale x 1 x i16> [[MASKEDOFF:%.*]], <vscale x 1 x i16> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
// CHECK-NEXT: ret <vscale x 1 x i16> [[TMP0]]
//
-vuint16mf4_t test_vcpopv_v_u16mf4_tumu(vbool64_t mask, vuint16mf4_t maskedoff, vuint16mf4_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u16mf4_tumu(mask, maskedoff, vs2, vl);
+vuint16mf4_t test_vcpop_v_u16mf4_tumu(vbool64_t mask, vuint16mf4_t maskedoff, vuint16mf4_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u16mf4_tumu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16mf2_tumu(
+// CHECK-LABEL: @test_vcpop_v_u16mf2_tumu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vcpopv.mask.nxv2i16.i64(<vscale x 2 x i16> [[MASKEDOFF:%.*]], <vscale x 2 x i16> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
// CHECK-NEXT: ret <vscale x 2 x i16> [[TMP0]]
//
-vuint16mf2_t test_vcpopv_v_u16mf2_tumu(vbool32_t mask, vuint16mf2_t maskedoff, vuint16mf2_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u16mf2_tumu(mask, maskedoff, vs2, vl);
+vuint16mf2_t test_vcpop_v_u16mf2_tumu(vbool32_t mask, vuint16mf2_t maskedoff, vuint16mf2_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u16mf2_tumu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16m1_tumu(
+// CHECK-LABEL: @test_vcpop_v_u16m1_tumu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vcpopv.mask.nxv4i16.i64(<vscale x 4 x i16> [[MASKEDOFF:%.*]], <vscale x 4 x i16> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
// CHECK-NEXT: ret <vscale x 4 x i16> [[TMP0]]
//
-vuint16m1_t test_vcpopv_v_u16m1_tumu(vbool16_t mask, vuint16m1_t maskedoff, vuint16m1_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u16m1_tumu(mask, maskedoff, vs2, vl);
+vuint16m1_t test_vcpop_v_u16m1_tumu(vbool16_t mask, vuint16m1_t maskedoff, vuint16m1_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u16m1_tumu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16m2_tumu(
+// CHECK-LABEL: @test_vcpop_v_u16m2_tumu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vcpopv.mask.nxv8i16.i64(<vscale x 8 x i16> [[MASKEDOFF:%.*]], <vscale x 8 x i16> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
//
-vuint16m2_t test_vcpopv_v_u16m2_tumu(vbool8_t mask, vuint16m2_t maskedoff, vuint16m2_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u16m2_tumu(mask, maskedoff, vs2, vl);
+vuint16m2_t test_vcpop_v_u16m2_tumu(vbool8_t mask, vuint16m2_t maskedoff, vuint16m2_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u16m2_tumu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16m4_tumu(
+// CHECK-LABEL: @test_vcpop_v_u16m4_tumu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vcpopv.mask.nxv16i16.i64(<vscale x 16 x i16> [[MASKEDOFF:%.*]], <vscale x 16 x i16> [[VS2:%.*]], <vscale x 16 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
// CHECK-NEXT: ret <vscale x 16 x i16> [[TMP0]]
//
-vuint16m4_t test_vcpopv_v_u16m4_tumu(vbool4_t mask, vuint16m4_t maskedoff, vuint16m4_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u16m4_tumu(mask, maskedoff, vs2, vl);
+vuint16m4_t test_vcpop_v_u16m4_tumu(vbool4_t mask, vuint16m4_t maskedoff, vuint16m4_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u16m4_tumu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16m8_tumu(
+// CHECK-LABEL: @test_vcpop_v_u16m8_tumu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vcpopv.mask.nxv32i16.i64(<vscale x 32 x i16> [[MASKEDOFF:%.*]], <vscale x 32 x i16> [[VS2:%.*]], <vscale x 32 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
// CHECK-NEXT: ret <vscale x 32 x i16> [[TMP0]]
//
-vuint16m8_t test_vcpopv_v_u16m8_tumu(vbool2_t mask, vuint16m8_t maskedoff, vuint16m8_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u16m8_tumu(mask, maskedoff, vs2, vl);
+vuint16m8_t test_vcpop_v_u16m8_tumu(vbool2_t mask, vuint16m8_t maskedoff, vuint16m8_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u16m8_tumu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32mf2_tumu(
+// CHECK-LABEL: @test_vcpop_v_u32mf2_tumu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vcpopv.mask.nxv1i32.i64(<vscale x 1 x i32> [[MASKEDOFF:%.*]], <vscale x 1 x i32> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
// CHECK-NEXT: ret <vscale x 1 x i32> [[TMP0]]
//
-vuint32mf2_t test_vcpopv_v_u32mf2_tumu(vbool64_t mask, vuint32mf2_t maskedoff, vuint32mf2_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u32mf2_tumu(mask, maskedoff, vs2, vl);
+vuint32mf2_t test_vcpop_v_u32mf2_tumu(vbool64_t mask, vuint32mf2_t maskedoff, vuint32mf2_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u32mf2_tumu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32m1_tumu(
+// CHECK-LABEL: @test_vcpop_v_u32m1_tumu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vcpopv.mask.nxv2i32.i64(<vscale x 2 x i32> [[MASKEDOFF:%.*]], <vscale x 2 x i32> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
// CHECK-NEXT: ret <vscale x 2 x i32> [[TMP0]]
//
-vuint32m1_t test_vcpopv_v_u32m1_tumu(vbool32_t mask, vuint32m1_t maskedoff, vuint32m1_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u32m1_tumu(mask, maskedoff, vs2, vl);
+vuint32m1_t test_vcpop_v_u32m1_tumu(vbool32_t mask, vuint32m1_t maskedoff, vuint32m1_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u32m1_tumu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32m2_tumu(
+// CHECK-LABEL: @test_vcpop_v_u32m2_tumu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vcpopv.mask.nxv4i32.i64(<vscale x 4 x i32> [[MASKEDOFF:%.*]], <vscale x 4 x i32> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
//
-vuint32m2_t test_vcpopv_v_u32m2_tumu(vbool16_t mask, vuint32m2_t maskedoff, vuint32m2_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u32m2_tumu(mask, maskedoff, vs2, vl);
+vuint32m2_t test_vcpop_v_u32m2_tumu(vbool16_t mask, vuint32m2_t maskedoff, vuint32m2_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u32m2_tumu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32m4_tumu(
+// CHECK-LABEL: @test_vcpop_v_u32m4_tumu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vcpopv.mask.nxv8i32.i64(<vscale x 8 x i32> [[MASKEDOFF:%.*]], <vscale x 8 x i32> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
// CHECK-NEXT: ret <vscale x 8 x i32> [[TMP0]]
//
-vuint32m4_t test_vcpopv_v_u32m4_tumu(vbool8_t mask, vuint32m4_t maskedoff, vuint32m4_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u32m4_tumu(mask, maskedoff, vs2, vl);
+vuint32m4_t test_vcpop_v_u32m4_tumu(vbool8_t mask, vuint32m4_t maskedoff, vuint32m4_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u32m4_tumu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32m8_tumu(
+// CHECK-LABEL: @test_vcpop_v_u32m8_tumu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vcpopv.mask.nxv16i32.i64(<vscale x 16 x i32> [[MASKEDOFF:%.*]], <vscale x 16 x i32> [[VS2:%.*]], <vscale x 16 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
// CHECK-NEXT: ret <vscale x 16 x i32> [[TMP0]]
//
-vuint32m8_t test_vcpopv_v_u32m8_tumu(vbool4_t mask, vuint32m8_t maskedoff, vuint32m8_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u32m8_tumu(mask, maskedoff, vs2, vl);
+vuint32m8_t test_vcpop_v_u32m8_tumu(vbool4_t mask, vuint32m8_t maskedoff, vuint32m8_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u32m8_tumu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u64m1_tumu(
+// CHECK-LABEL: @test_vcpop_v_u64m1_tumu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vcpopv.mask.nxv1i64.i64(<vscale x 1 x i64> [[MASKEDOFF:%.*]], <vscale x 1 x i64> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
// CHECK-NEXT: ret <vscale x 1 x i64> [[TMP0]]
//
-vuint64m1_t test_vcpopv_v_u64m1_tumu(vbool64_t mask, vuint64m1_t maskedoff, vuint64m1_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u64m1_tumu(mask, maskedoff, vs2, vl);
+vuint64m1_t test_vcpop_v_u64m1_tumu(vbool64_t mask, vuint64m1_t maskedoff, vuint64m1_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u64m1_tumu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u64m2_tumu(
+// CHECK-LABEL: @test_vcpop_v_u64m2_tumu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vcpopv.mask.nxv2i64.i64(<vscale x 2 x i64> [[MASKEDOFF:%.*]], <vscale x 2 x i64> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
//
-vuint64m2_t test_vcpopv_v_u64m2_tumu(vbool32_t mask, vuint64m2_t maskedoff, vuint64m2_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u64m2_tumu(mask, maskedoff, vs2, vl);
+vuint64m2_t test_vcpop_v_u64m2_tumu(vbool32_t mask, vuint64m2_t maskedoff, vuint64m2_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u64m2_tumu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u64m4_tumu(
+// CHECK-LABEL: @test_vcpop_v_u64m4_tumu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vcpopv.mask.nxv4i64.i64(<vscale x 4 x i64> [[MASKEDOFF:%.*]], <vscale x 4 x i64> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
// CHECK-NEXT: ret <vscale x 4 x i64> [[TMP0]]
//
-vuint64m4_t test_vcpopv_v_u64m4_tumu(vbool16_t mask, vuint64m4_t maskedoff, vuint64m4_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u64m4_tumu(mask, maskedoff, vs2, vl);
+vuint64m4_t test_vcpop_v_u64m4_tumu(vbool16_t mask, vuint64m4_t maskedoff, vuint64m4_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u64m4_tumu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u64m8_tumu(
+// CHECK-LABEL: @test_vcpop_v_u64m8_tumu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vcpopv.mask.nxv8i64.i64(<vscale x 8 x i64> [[MASKEDOFF:%.*]], <vscale x 8 x i64> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
// CHECK-NEXT: ret <vscale x 8 x i64> [[TMP0]]
//
-vuint64m8_t test_vcpopv_v_u64m8_tumu(vbool8_t mask, vuint64m8_t maskedoff, vuint64m8_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u64m8_tumu(mask, maskedoff, vs2, vl);
+vuint64m8_t test_vcpop_v_u64m8_tumu(vbool8_t mask, vuint64m8_t maskedoff, vuint64m8_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u64m8_tumu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8mf8_mu(
+// CHECK-LABEL: @test_vcpop_v_u8mf8_mu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vcpopv.mask.nxv1i8.i64(<vscale x 1 x i8> [[MASKEDOFF:%.*]], <vscale x 1 x i8> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
// CHECK-NEXT: ret <vscale x 1 x i8> [[TMP0]]
//
-vuint8mf8_t test_vcpopv_v_u8mf8_mu(vbool64_t mask, vuint8mf8_t maskedoff, vuint8mf8_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u8mf8_mu(mask, maskedoff, vs2, vl);
+vuint8mf8_t test_vcpop_v_u8mf8_mu(vbool64_t mask, vuint8mf8_t maskedoff, vuint8mf8_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u8mf8_mu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8mf4_mu(
+// CHECK-LABEL: @test_vcpop_v_u8mf4_mu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vcpopv.mask.nxv2i8.i64(<vscale x 2 x i8> [[MASKEDOFF:%.*]], <vscale x 2 x i8> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
// CHECK-NEXT: ret <vscale x 2 x i8> [[TMP0]]
//
-vuint8mf4_t test_vcpopv_v_u8mf4_mu(vbool32_t mask, vuint8mf4_t maskedoff, vuint8mf4_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u8mf4_mu(mask, maskedoff, vs2, vl);
+vuint8mf4_t test_vcpop_v_u8mf4_mu(vbool32_t mask, vuint8mf4_t maskedoff, vuint8mf4_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u8mf4_mu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8mf2_mu(
+// CHECK-LABEL: @test_vcpop_v_u8mf2_mu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vcpopv.mask.nxv4i8.i64(<vscale x 4 x i8> [[MASKEDOFF:%.*]], <vscale x 4 x i8> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
// CHECK-NEXT: ret <vscale x 4 x i8> [[TMP0]]
//
-vuint8mf2_t test_vcpopv_v_u8mf2_mu(vbool16_t mask, vuint8mf2_t maskedoff, vuint8mf2_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u8mf2_mu(mask, maskedoff, vs2, vl);
+vuint8mf2_t test_vcpop_v_u8mf2_mu(vbool16_t mask, vuint8mf2_t maskedoff, vuint8mf2_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u8mf2_mu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8m1_mu(
+// CHECK-LABEL: @test_vcpop_v_u8m1_mu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vcpopv.mask.nxv8i8.i64(<vscale x 8 x i8> [[MASKEDOFF:%.*]], <vscale x 8 x i8> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
// CHECK-NEXT: ret <vscale x 8 x i8> [[TMP0]]
//
-vuint8m1_t test_vcpopv_v_u8m1_mu(vbool8_t mask, vuint8m1_t maskedoff, vuint8m1_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u8m1_mu(mask, maskedoff, vs2, vl);
+vuint8m1_t test_vcpop_v_u8m1_mu(vbool8_t mask, vuint8m1_t maskedoff, vuint8m1_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u8m1_mu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8m2_mu(
+// CHECK-LABEL: @test_vcpop_v_u8m2_mu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vcpopv.mask.nxv16i8.i64(<vscale x 16 x i8> [[MASKEDOFF:%.*]], <vscale x 16 x i8> [[VS2:%.*]], <vscale x 16 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-vuint8m2_t test_vcpopv_v_u8m2_mu(vbool4_t mask, vuint8m2_t maskedoff, vuint8m2_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u8m2_mu(mask, maskedoff, vs2, vl);
+vuint8m2_t test_vcpop_v_u8m2_mu(vbool4_t mask, vuint8m2_t maskedoff, vuint8m2_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u8m2_mu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8m4_mu(
+// CHECK-LABEL: @test_vcpop_v_u8m4_mu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vcpopv.mask.nxv32i8.i64(<vscale x 32 x i8> [[MASKEDOFF:%.*]], <vscale x 32 x i8> [[VS2:%.*]], <vscale x 32 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
// CHECK-NEXT: ret <vscale x 32 x i8> [[TMP0]]
//
-vuint8m4_t test_vcpopv_v_u8m4_mu(vbool2_t mask, vuint8m4_t maskedoff, vuint8m4_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u8m4_mu(mask, maskedoff, vs2, vl);
+vuint8m4_t test_vcpop_v_u8m4_mu(vbool2_t mask, vuint8m4_t maskedoff, vuint8m4_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u8m4_mu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8m8_mu(
+// CHECK-LABEL: @test_vcpop_v_u8m8_mu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vcpopv.mask.nxv64i8.i64(<vscale x 64 x i8> [[MASKEDOFF:%.*]], <vscale x 64 x i8> [[VS2:%.*]], <vscale x 64 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
// CHECK-NEXT: ret <vscale x 64 x i8> [[TMP0]]
//
-vuint8m8_t test_vcpopv_v_u8m8_mu(vbool1_t mask, vuint8m8_t maskedoff, vuint8m8_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u8m8_mu(mask, maskedoff, vs2, vl);
+vuint8m8_t test_vcpop_v_u8m8_mu(vbool1_t mask, vuint8m8_t maskedoff, vuint8m8_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u8m8_mu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16mf4_mu(
+// CHECK-LABEL: @test_vcpop_v_u16mf4_mu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vcpopv.mask.nxv1i16.i64(<vscale x 1 x i16> [[MASKEDOFF:%.*]], <vscale x 1 x i16> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
// CHECK-NEXT: ret <vscale x 1 x i16> [[TMP0]]
//
-vuint16mf4_t test_vcpopv_v_u16mf4_mu(vbool64_t mask, vuint16mf4_t maskedoff, vuint16mf4_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u16mf4_mu(mask, maskedoff, vs2, vl);
+vuint16mf4_t test_vcpop_v_u16mf4_mu(vbool64_t mask, vuint16mf4_t maskedoff, vuint16mf4_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u16mf4_mu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16mf2_mu(
+// CHECK-LABEL: @test_vcpop_v_u16mf2_mu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vcpopv.mask.nxv2i16.i64(<vscale x 2 x i16> [[MASKEDOFF:%.*]], <vscale x 2 x i16> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
// CHECK-NEXT: ret <vscale x 2 x i16> [[TMP0]]
//
-vuint16mf2_t test_vcpopv_v_u16mf2_mu(vbool32_t mask, vuint16mf2_t maskedoff, vuint16mf2_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u16mf2_mu(mask, maskedoff, vs2, vl);
+vuint16mf2_t test_vcpop_v_u16mf2_mu(vbool32_t mask, vuint16mf2_t maskedoff, vuint16mf2_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u16mf2_mu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16m1_mu(
+// CHECK-LABEL: @test_vcpop_v_u16m1_mu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vcpopv.mask.nxv4i16.i64(<vscale x 4 x i16> [[MASKEDOFF:%.*]], <vscale x 4 x i16> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
// CHECK-NEXT: ret <vscale x 4 x i16> [[TMP0]]
//
-vuint16m1_t test_vcpopv_v_u16m1_mu(vbool16_t mask, vuint16m1_t maskedoff, vuint16m1_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u16m1_mu(mask, maskedoff, vs2, vl);
+vuint16m1_t test_vcpop_v_u16m1_mu(vbool16_t mask, vuint16m1_t maskedoff, vuint16m1_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u16m1_mu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16m2_mu(
+// CHECK-LABEL: @test_vcpop_v_u16m2_mu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vcpopv.mask.nxv8i16.i64(<vscale x 8 x i16> [[MASKEDOFF:%.*]], <vscale x 8 x i16> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
//
-vuint16m2_t test_vcpopv_v_u16m2_mu(vbool8_t mask, vuint16m2_t maskedoff, vuint16m2_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u16m2_mu(mask, maskedoff, vs2, vl);
+vuint16m2_t test_vcpop_v_u16m2_mu(vbool8_t mask, vuint16m2_t maskedoff, vuint16m2_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u16m2_mu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16m4_mu(
+// CHECK-LABEL: @test_vcpop_v_u16m4_mu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vcpopv.mask.nxv16i16.i64(<vscale x 16 x i16> [[MASKEDOFF:%.*]], <vscale x 16 x i16> [[VS2:%.*]], <vscale x 16 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
// CHECK-NEXT: ret <vscale x 16 x i16> [[TMP0]]
//
-vuint16m4_t test_vcpopv_v_u16m4_mu(vbool4_t mask, vuint16m4_t maskedoff, vuint16m4_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u16m4_mu(mask, maskedoff, vs2, vl);
+vuint16m4_t test_vcpop_v_u16m4_mu(vbool4_t mask, vuint16m4_t maskedoff, vuint16m4_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u16m4_mu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16m8_mu(
+// CHECK-LABEL: @test_vcpop_v_u16m8_mu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vcpopv.mask.nxv32i16.i64(<vscale x 32 x i16> [[MASKEDOFF:%.*]], <vscale x 32 x i16> [[VS2:%.*]], <vscale x 32 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
// CHECK-NEXT: ret <vscale x 32 x i16> [[TMP0]]
//
-vuint16m8_t test_vcpopv_v_u16m8_mu(vbool2_t mask, vuint16m8_t maskedoff, vuint16m8_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u16m8_mu(mask, maskedoff, vs2, vl);
+vuint16m8_t test_vcpop_v_u16m8_mu(vbool2_t mask, vuint16m8_t maskedoff, vuint16m8_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u16m8_mu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32mf2_mu(
+// CHECK-LABEL: @test_vcpop_v_u32mf2_mu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vcpopv.mask.nxv1i32.i64(<vscale x 1 x i32> [[MASKEDOFF:%.*]], <vscale x 1 x i32> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
// CHECK-NEXT: ret <vscale x 1 x i32> [[TMP0]]
//
-vuint32mf2_t test_vcpopv_v_u32mf2_mu(vbool64_t mask, vuint32mf2_t maskedoff, vuint32mf2_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u32mf2_mu(mask, maskedoff, vs2, vl);
+vuint32mf2_t test_vcpop_v_u32mf2_mu(vbool64_t mask, vuint32mf2_t maskedoff, vuint32mf2_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u32mf2_mu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32m1_mu(
+// CHECK-LABEL: @test_vcpop_v_u32m1_mu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vcpopv.mask.nxv2i32.i64(<vscale x 2 x i32> [[MASKEDOFF:%.*]], <vscale x 2 x i32> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
// CHECK-NEXT: ret <vscale x 2 x i32> [[TMP0]]
//
-vuint32m1_t test_vcpopv_v_u32m1_mu(vbool32_t mask, vuint32m1_t maskedoff, vuint32m1_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u32m1_mu(mask, maskedoff, vs2, vl);
+vuint32m1_t test_vcpop_v_u32m1_mu(vbool32_t mask, vuint32m1_t maskedoff, vuint32m1_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u32m1_mu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32m2_mu(
+// CHECK-LABEL: @test_vcpop_v_u32m2_mu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vcpopv.mask.nxv4i32.i64(<vscale x 4 x i32> [[MASKEDOFF:%.*]], <vscale x 4 x i32> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
//
-vuint32m2_t test_vcpopv_v_u32m2_mu(vbool16_t mask, vuint32m2_t maskedoff, vuint32m2_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u32m2_mu(mask, maskedoff, vs2, vl);
+vuint32m2_t test_vcpop_v_u32m2_mu(vbool16_t mask, vuint32m2_t maskedoff, vuint32m2_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u32m2_mu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32m4_mu(
+// CHECK-LABEL: @test_vcpop_v_u32m4_mu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vcpopv.mask.nxv8i32.i64(<vscale x 8 x i32> [[MASKEDOFF:%.*]], <vscale x 8 x i32> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
// CHECK-NEXT: ret <vscale x 8 x i32> [[TMP0]]
//
-vuint32m4_t test_vcpopv_v_u32m4_mu(vbool8_t mask, vuint32m4_t maskedoff, vuint32m4_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u32m4_mu(mask, maskedoff, vs2, vl);
+vuint32m4_t test_vcpop_v_u32m4_mu(vbool8_t mask, vuint32m4_t maskedoff, vuint32m4_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u32m4_mu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32m8_mu(
+// CHECK-LABEL: @test_vcpop_v_u32m8_mu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vcpopv.mask.nxv16i32.i64(<vscale x 16 x i32> [[MASKEDOFF:%.*]], <vscale x 16 x i32> [[VS2:%.*]], <vscale x 16 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
// CHECK-NEXT: ret <vscale x 16 x i32> [[TMP0]]
//
-vuint32m8_t test_vcpopv_v_u32m8_mu(vbool4_t mask, vuint32m8_t maskedoff, vuint32m8_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u32m8_mu(mask, maskedoff, vs2, vl);
+vuint32m8_t test_vcpop_v_u32m8_mu(vbool4_t mask, vuint32m8_t maskedoff, vuint32m8_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u32m8_mu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u64m1_mu(
+// CHECK-LABEL: @test_vcpop_v_u64m1_mu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vcpopv.mask.nxv1i64.i64(<vscale x 1 x i64> [[MASKEDOFF:%.*]], <vscale x 1 x i64> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
// CHECK-NEXT: ret <vscale x 1 x i64> [[TMP0]]
//
-vuint64m1_t test_vcpopv_v_u64m1_mu(vbool64_t mask, vuint64m1_t maskedoff, vuint64m1_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u64m1_mu(mask, maskedoff, vs2, vl);
+vuint64m1_t test_vcpop_v_u64m1_mu(vbool64_t mask, vuint64m1_t maskedoff, vuint64m1_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u64m1_mu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u64m2_mu(
+// CHECK-LABEL: @test_vcpop_v_u64m2_mu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vcpopv.mask.nxv2i64.i64(<vscale x 2 x i64> [[MASKEDOFF:%.*]], <vscale x 2 x i64> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
//
-vuint64m2_t test_vcpopv_v_u64m2_mu(vbool32_t mask, vuint64m2_t maskedoff, vuint64m2_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u64m2_mu(mask, maskedoff, vs2, vl);
+vuint64m2_t test_vcpop_v_u64m2_mu(vbool32_t mask, vuint64m2_t maskedoff, vuint64m2_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u64m2_mu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u64m4_mu(
+// CHECK-LABEL: @test_vcpop_v_u64m4_mu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vcpopv.mask.nxv4i64.i64(<vscale x 4 x i64> [[MASKEDOFF:%.*]], <vscale x 4 x i64> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
// CHECK-NEXT: ret <vscale x 4 x i64> [[TMP0]]
//
-vuint64m4_t test_vcpopv_v_u64m4_mu(vbool16_t mask, vuint64m4_t maskedoff, vuint64m4_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u64m4_mu(mask, maskedoff, vs2, vl);
+vuint64m4_t test_vcpop_v_u64m4_mu(vbool16_t mask, vuint64m4_t maskedoff, vuint64m4_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u64m4_mu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u64m8_mu(
+// CHECK-LABEL: @test_vcpop_v_u64m8_mu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vcpopv.mask.nxv8i64.i64(<vscale x 8 x i64> [[MASKEDOFF:%.*]], <vscale x 8 x i64> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
// CHECK-NEXT: ret <vscale x 8 x i64> [[TMP0]]
//
-vuint64m8_t test_vcpopv_v_u64m8_mu(vbool8_t mask, vuint64m8_t maskedoff, vuint64m8_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u64m8_mu(mask, maskedoff, vs2, vl);
+vuint64m8_t test_vcpop_v_u64m8_mu(vbool8_t mask, vuint64m8_t maskedoff, vuint64m8_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u64m8_mu(mask, maskedoff, vs2, vl);
}
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/overloaded/vcpopv.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/overloaded/vcpopv.c
index 02a499d..953ccac 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/overloaded/vcpopv.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/overloaded/vcpopv.c
@@ -16,795 +16,795 @@
#include <riscv_vector.h>
-// CHECK-LABEL: @test_vcpopv_v_u8mf8_tu(
+// CHECK-LABEL: @test_vcpop_v_u8mf8_tu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vcpopv.nxv1i8.i64(<vscale x 1 x i8> [[MASKEDOFF:%.*]], <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 1 x i8> [[TMP0]]
//
-vuint8mf8_t test_vcpopv_v_u8mf8_tu(vuint8mf8_t maskedoff, vuint8mf8_t vs2, size_t vl) {
- return __riscv_vcpopv_tu(maskedoff, vs2, vl);
+vuint8mf8_t test_vcpop_v_u8mf8_tu(vuint8mf8_t maskedoff, vuint8mf8_t vs2, size_t vl) {
+ return __riscv_vcpop_tu(maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8mf4_tu(
+// CHECK-LABEL: @test_vcpop_v_u8mf4_tu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vcpopv.nxv2i8.i64(<vscale x 2 x i8> [[MASKEDOFF:%.*]], <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 2 x i8> [[TMP0]]
//
-vuint8mf4_t test_vcpopv_v_u8mf4_tu(vuint8mf4_t maskedoff, vuint8mf4_t vs2, size_t vl) {
- return __riscv_vcpopv_tu(maskedoff, vs2, vl);
+vuint8mf4_t test_vcpop_v_u8mf4_tu(vuint8mf4_t maskedoff, vuint8mf4_t vs2, size_t vl) {
+ return __riscv_vcpop_tu(maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8mf2_tu(
+// CHECK-LABEL: @test_vcpop_v_u8mf2_tu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vcpopv.nxv4i8.i64(<vscale x 4 x i8> [[MASKEDOFF:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 4 x i8> [[TMP0]]
//
-vuint8mf2_t test_vcpopv_v_u8mf2_tu(vuint8mf2_t maskedoff, vuint8mf2_t vs2, size_t vl) {
- return __riscv_vcpopv_tu(maskedoff, vs2, vl);
+vuint8mf2_t test_vcpop_v_u8mf2_tu(vuint8mf2_t maskedoff, vuint8mf2_t vs2, size_t vl) {
+ return __riscv_vcpop_tu(maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8m1_tu(
+// CHECK-LABEL: @test_vcpop_v_u8m1_tu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vcpopv.nxv8i8.i64(<vscale x 8 x i8> [[MASKEDOFF:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 8 x i8> [[TMP0]]
//
-vuint8m1_t test_vcpopv_v_u8m1_tu(vuint8m1_t maskedoff, vuint8m1_t vs2, size_t vl) {
- return __riscv_vcpopv_tu(maskedoff, vs2, vl);
+vuint8m1_t test_vcpop_v_u8m1_tu(vuint8m1_t maskedoff, vuint8m1_t vs2, size_t vl) {
+ return __riscv_vcpop_tu(maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8m2_tu(
+// CHECK-LABEL: @test_vcpop_v_u8m2_tu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vcpopv.nxv16i8.i64(<vscale x 16 x i8> [[MASKEDOFF:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-vuint8m2_t test_vcpopv_v_u8m2_tu(vuint8m2_t maskedoff, vuint8m2_t vs2, size_t vl) {
- return __riscv_vcpopv_tu(maskedoff, vs2, vl);
+vuint8m2_t test_vcpop_v_u8m2_tu(vuint8m2_t maskedoff, vuint8m2_t vs2, size_t vl) {
+ return __riscv_vcpop_tu(maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8m4_tu(
+// CHECK-LABEL: @test_vcpop_v_u8m4_tu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vcpopv.nxv32i8.i64(<vscale x 32 x i8> [[MASKEDOFF:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 32 x i8> [[TMP0]]
//
-vuint8m4_t test_vcpopv_v_u8m4_tu(vuint8m4_t maskedoff, vuint8m4_t vs2, size_t vl) {
- return __riscv_vcpopv_tu(maskedoff, vs2, vl);
+vuint8m4_t test_vcpop_v_u8m4_tu(vuint8m4_t maskedoff, vuint8m4_t vs2, size_t vl) {
+ return __riscv_vcpop_tu(maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8m8_tu(
+// CHECK-LABEL: @test_vcpop_v_u8m8_tu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vcpopv.nxv64i8.i64(<vscale x 64 x i8> [[MASKEDOFF:%.*]], <vscale x 64 x i8> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 64 x i8> [[TMP0]]
//
-vuint8m8_t test_vcpopv_v_u8m8_tu(vuint8m8_t maskedoff, vuint8m8_t vs2, size_t vl) {
- return __riscv_vcpopv_tu(maskedoff, vs2, vl);
+vuint8m8_t test_vcpop_v_u8m8_tu(vuint8m8_t maskedoff, vuint8m8_t vs2, size_t vl) {
+ return __riscv_vcpop_tu(maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16mf4_tu(
+// CHECK-LABEL: @test_vcpop_v_u16mf4_tu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vcpopv.nxv1i16.i64(<vscale x 1 x i16> [[MASKEDOFF:%.*]], <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 1 x i16> [[TMP0]]
//
-vuint16mf4_t test_vcpopv_v_u16mf4_tu(vuint16mf4_t maskedoff, vuint16mf4_t vs2, size_t vl) {
- return __riscv_vcpopv_tu(maskedoff, vs2, vl);
+vuint16mf4_t test_vcpop_v_u16mf4_tu(vuint16mf4_t maskedoff, vuint16mf4_t vs2, size_t vl) {
+ return __riscv_vcpop_tu(maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16mf2_tu(
+// CHECK-LABEL: @test_vcpop_v_u16mf2_tu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vcpopv.nxv2i16.i64(<vscale x 2 x i16> [[MASKEDOFF:%.*]], <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 2 x i16> [[TMP0]]
//
-vuint16mf2_t test_vcpopv_v_u16mf2_tu(vuint16mf2_t maskedoff, vuint16mf2_t vs2, size_t vl) {
- return __riscv_vcpopv_tu(maskedoff, vs2, vl);
+vuint16mf2_t test_vcpop_v_u16mf2_tu(vuint16mf2_t maskedoff, vuint16mf2_t vs2, size_t vl) {
+ return __riscv_vcpop_tu(maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16m1_tu(
+// CHECK-LABEL: @test_vcpop_v_u16m1_tu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vcpopv.nxv4i16.i64(<vscale x 4 x i16> [[MASKEDOFF:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 4 x i16> [[TMP0]]
//
-vuint16m1_t test_vcpopv_v_u16m1_tu(vuint16m1_t maskedoff, vuint16m1_t vs2, size_t vl) {
- return __riscv_vcpopv_tu(maskedoff, vs2, vl);
+vuint16m1_t test_vcpop_v_u16m1_tu(vuint16m1_t maskedoff, vuint16m1_t vs2, size_t vl) {
+ return __riscv_vcpop_tu(maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16m2_tu(
+// CHECK-LABEL: @test_vcpop_v_u16m2_tu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vcpopv.nxv8i16.i64(<vscale x 8 x i16> [[MASKEDOFF:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
//
-vuint16m2_t test_vcpopv_v_u16m2_tu(vuint16m2_t maskedoff, vuint16m2_t vs2, size_t vl) {
- return __riscv_vcpopv_tu(maskedoff, vs2, vl);
+vuint16m2_t test_vcpop_v_u16m2_tu(vuint16m2_t maskedoff, vuint16m2_t vs2, size_t vl) {
+ return __riscv_vcpop_tu(maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16m4_tu(
+// CHECK-LABEL: @test_vcpop_v_u16m4_tu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vcpopv.nxv16i16.i64(<vscale x 16 x i16> [[MASKEDOFF:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 16 x i16> [[TMP0]]
//
-vuint16m4_t test_vcpopv_v_u16m4_tu(vuint16m4_t maskedoff, vuint16m4_t vs2, size_t vl) {
- return __riscv_vcpopv_tu(maskedoff, vs2, vl);
+vuint16m4_t test_vcpop_v_u16m4_tu(vuint16m4_t maskedoff, vuint16m4_t vs2, size_t vl) {
+ return __riscv_vcpop_tu(maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16m8_tu(
+// CHECK-LABEL: @test_vcpop_v_u16m8_tu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vcpopv.nxv32i16.i64(<vscale x 32 x i16> [[MASKEDOFF:%.*]], <vscale x 32 x i16> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 32 x i16> [[TMP0]]
//
-vuint16m8_t test_vcpopv_v_u16m8_tu(vuint16m8_t maskedoff, vuint16m8_t vs2, size_t vl) {
- return __riscv_vcpopv_tu(maskedoff, vs2, vl);
+vuint16m8_t test_vcpop_v_u16m8_tu(vuint16m8_t maskedoff, vuint16m8_t vs2, size_t vl) {
+ return __riscv_vcpop_tu(maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32mf2_tu(
+// CHECK-LABEL: @test_vcpop_v_u32mf2_tu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vcpopv.nxv1i32.i64(<vscale x 1 x i32> [[MASKEDOFF:%.*]], <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 1 x i32> [[TMP0]]
//
-vuint32mf2_t test_vcpopv_v_u32mf2_tu(vuint32mf2_t maskedoff, vuint32mf2_t vs2, size_t vl) {
- return __riscv_vcpopv_tu(maskedoff, vs2, vl);
+vuint32mf2_t test_vcpop_v_u32mf2_tu(vuint32mf2_t maskedoff, vuint32mf2_t vs2, size_t vl) {
+ return __riscv_vcpop_tu(maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32m1_tu(
+// CHECK-LABEL: @test_vcpop_v_u32m1_tu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vcpopv.nxv2i32.i64(<vscale x 2 x i32> [[MASKEDOFF:%.*]], <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 2 x i32> [[TMP0]]
//
-vuint32m1_t test_vcpopv_v_u32m1_tu(vuint32m1_t maskedoff, vuint32m1_t vs2, size_t vl) {
- return __riscv_vcpopv_tu(maskedoff, vs2, vl);
+vuint32m1_t test_vcpop_v_u32m1_tu(vuint32m1_t maskedoff, vuint32m1_t vs2, size_t vl) {
+ return __riscv_vcpop_tu(maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32m2_tu(
+// CHECK-LABEL: @test_vcpop_v_u32m2_tu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vcpopv.nxv4i32.i64(<vscale x 4 x i32> [[MASKEDOFF:%.*]], <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
//
-vuint32m2_t test_vcpopv_v_u32m2_tu(vuint32m2_t maskedoff, vuint32m2_t vs2, size_t vl) {
- return __riscv_vcpopv_tu(maskedoff, vs2, vl);
+vuint32m2_t test_vcpop_v_u32m2_tu(vuint32m2_t maskedoff, vuint32m2_t vs2, size_t vl) {
+ return __riscv_vcpop_tu(maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32m4_tu(
+// CHECK-LABEL: @test_vcpop_v_u32m4_tu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vcpopv.nxv8i32.i64(<vscale x 8 x i32> [[MASKEDOFF:%.*]], <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 8 x i32> [[TMP0]]
//
-vuint32m4_t test_vcpopv_v_u32m4_tu(vuint32m4_t maskedoff, vuint32m4_t vs2, size_t vl) {
- return __riscv_vcpopv_tu(maskedoff, vs2, vl);
+vuint32m4_t test_vcpop_v_u32m4_tu(vuint32m4_t maskedoff, vuint32m4_t vs2, size_t vl) {
+ return __riscv_vcpop_tu(maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32m8_tu(
+// CHECK-LABEL: @test_vcpop_v_u32m8_tu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vcpopv.nxv16i32.i64(<vscale x 16 x i32> [[MASKEDOFF:%.*]], <vscale x 16 x i32> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 16 x i32> [[TMP0]]
//
-vuint32m8_t test_vcpopv_v_u32m8_tu(vuint32m8_t maskedoff, vuint32m8_t vs2, size_t vl) {
- return __riscv_vcpopv_tu(maskedoff, vs2, vl);
+vuint32m8_t test_vcpop_v_u32m8_tu(vuint32m8_t maskedoff, vuint32m8_t vs2, size_t vl) {
+ return __riscv_vcpop_tu(maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u64m1_tu(
+// CHECK-LABEL: @test_vcpop_v_u64m1_tu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vcpopv.nxv1i64.i64(<vscale x 1 x i64> [[MASKEDOFF:%.*]], <vscale x 1 x i64> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 1 x i64> [[TMP0]]
//
-vuint64m1_t test_vcpopv_v_u64m1_tu(vuint64m1_t maskedoff, vuint64m1_t vs2, size_t vl) {
- return __riscv_vcpopv_tu(maskedoff, vs2, vl);
+vuint64m1_t test_vcpop_v_u64m1_tu(vuint64m1_t maskedoff, vuint64m1_t vs2, size_t vl) {
+ return __riscv_vcpop_tu(maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u64m2_tu(
+// CHECK-LABEL: @test_vcpop_v_u64m2_tu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vcpopv.nxv2i64.i64(<vscale x 2 x i64> [[MASKEDOFF:%.*]], <vscale x 2 x i64> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
//
-vuint64m2_t test_vcpopv_v_u64m2_tu(vuint64m2_t maskedoff, vuint64m2_t vs2, size_t vl) {
- return __riscv_vcpopv_tu(maskedoff, vs2, vl);
+vuint64m2_t test_vcpop_v_u64m2_tu(vuint64m2_t maskedoff, vuint64m2_t vs2, size_t vl) {
+ return __riscv_vcpop_tu(maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u64m4_tu(
+// CHECK-LABEL: @test_vcpop_v_u64m4_tu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vcpopv.nxv4i64.i64(<vscale x 4 x i64> [[MASKEDOFF:%.*]], <vscale x 4 x i64> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 4 x i64> [[TMP0]]
//
-vuint64m4_t test_vcpopv_v_u64m4_tu(vuint64m4_t maskedoff, vuint64m4_t vs2, size_t vl) {
- return __riscv_vcpopv_tu(maskedoff, vs2, vl);
+vuint64m4_t test_vcpop_v_u64m4_tu(vuint64m4_t maskedoff, vuint64m4_t vs2, size_t vl) {
+ return __riscv_vcpop_tu(maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u64m8_tu(
+// CHECK-LABEL: @test_vcpop_v_u64m8_tu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vcpopv.nxv8i64.i64(<vscale x 8 x i64> [[MASKEDOFF:%.*]], <vscale x 8 x i64> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 8 x i64> [[TMP0]]
//
-vuint64m8_t test_vcpopv_v_u64m8_tu(vuint64m8_t maskedoff, vuint64m8_t vs2, size_t vl) {
- return __riscv_vcpopv_tu(maskedoff, vs2, vl);
+vuint64m8_t test_vcpop_v_u64m8_tu(vuint64m8_t maskedoff, vuint64m8_t vs2, size_t vl) {
+ return __riscv_vcpop_tu(maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8mf8_tum(
+// CHECK-LABEL: @test_vcpop_v_u8mf8_tum(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vcpopv.mask.nxv1i8.i64(<vscale x 1 x i8> [[MASKEDOFF:%.*]], <vscale x 1 x i8> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
// CHECK-NEXT: ret <vscale x 1 x i8> [[TMP0]]
//
-vuint8mf8_t test_vcpopv_v_u8mf8_tum(vbool64_t mask, vuint8mf8_t maskedoff, vuint8mf8_t vs2, size_t vl) {
- return __riscv_vcpopv_tum(mask, maskedoff, vs2, vl);
+vuint8mf8_t test_vcpop_v_u8mf8_tum(vbool64_t mask, vuint8mf8_t maskedoff, vuint8mf8_t vs2, size_t vl) {
+ return __riscv_vcpop_tum(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8mf4_tum(
+// CHECK-LABEL: @test_vcpop_v_u8mf4_tum(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vcpopv.mask.nxv2i8.i64(<vscale x 2 x i8> [[MASKEDOFF:%.*]], <vscale x 2 x i8> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
// CHECK-NEXT: ret <vscale x 2 x i8> [[TMP0]]
//
-vuint8mf4_t test_vcpopv_v_u8mf4_tum(vbool32_t mask, vuint8mf4_t maskedoff, vuint8mf4_t vs2, size_t vl) {
- return __riscv_vcpopv_tum(mask, maskedoff, vs2, vl);
+vuint8mf4_t test_vcpop_v_u8mf4_tum(vbool32_t mask, vuint8mf4_t maskedoff, vuint8mf4_t vs2, size_t vl) {
+ return __riscv_vcpop_tum(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8mf2_tum(
+// CHECK-LABEL: @test_vcpop_v_u8mf2_tum(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vcpopv.mask.nxv4i8.i64(<vscale x 4 x i8> [[MASKEDOFF:%.*]], <vscale x 4 x i8> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
// CHECK-NEXT: ret <vscale x 4 x i8> [[TMP0]]
//
-vuint8mf2_t test_vcpopv_v_u8mf2_tum(vbool16_t mask, vuint8mf2_t maskedoff, vuint8mf2_t vs2, size_t vl) {
- return __riscv_vcpopv_tum(mask, maskedoff, vs2, vl);
+vuint8mf2_t test_vcpop_v_u8mf2_tum(vbool16_t mask, vuint8mf2_t maskedoff, vuint8mf2_t vs2, size_t vl) {
+ return __riscv_vcpop_tum(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8m1_tum(
+// CHECK-LABEL: @test_vcpop_v_u8m1_tum(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vcpopv.mask.nxv8i8.i64(<vscale x 8 x i8> [[MASKEDOFF:%.*]], <vscale x 8 x i8> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
// CHECK-NEXT: ret <vscale x 8 x i8> [[TMP0]]
//
-vuint8m1_t test_vcpopv_v_u8m1_tum(vbool8_t mask, vuint8m1_t maskedoff, vuint8m1_t vs2, size_t vl) {
- return __riscv_vcpopv_tum(mask, maskedoff, vs2, vl);
+vuint8m1_t test_vcpop_v_u8m1_tum(vbool8_t mask, vuint8m1_t maskedoff, vuint8m1_t vs2, size_t vl) {
+ return __riscv_vcpop_tum(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8m2_tum(
+// CHECK-LABEL: @test_vcpop_v_u8m2_tum(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vcpopv.mask.nxv16i8.i64(<vscale x 16 x i8> [[MASKEDOFF:%.*]], <vscale x 16 x i8> [[VS2:%.*]], <vscale x 16 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-vuint8m2_t test_vcpopv_v_u8m2_tum(vbool4_t mask, vuint8m2_t maskedoff, vuint8m2_t vs2, size_t vl) {
- return __riscv_vcpopv_tum(mask, maskedoff, vs2, vl);
+vuint8m2_t test_vcpop_v_u8m2_tum(vbool4_t mask, vuint8m2_t maskedoff, vuint8m2_t vs2, size_t vl) {
+ return __riscv_vcpop_tum(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8m4_tum(
+// CHECK-LABEL: @test_vcpop_v_u8m4_tum(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vcpopv.mask.nxv32i8.i64(<vscale x 32 x i8> [[MASKEDOFF:%.*]], <vscale x 32 x i8> [[VS2:%.*]], <vscale x 32 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
// CHECK-NEXT: ret <vscale x 32 x i8> [[TMP0]]
//
-vuint8m4_t test_vcpopv_v_u8m4_tum(vbool2_t mask, vuint8m4_t maskedoff, vuint8m4_t vs2, size_t vl) {
- return __riscv_vcpopv_tum(mask, maskedoff, vs2, vl);
+vuint8m4_t test_vcpop_v_u8m4_tum(vbool2_t mask, vuint8m4_t maskedoff, vuint8m4_t vs2, size_t vl) {
+ return __riscv_vcpop_tum(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8m8_tum(
+// CHECK-LABEL: @test_vcpop_v_u8m8_tum(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vcpopv.mask.nxv64i8.i64(<vscale x 64 x i8> [[MASKEDOFF:%.*]], <vscale x 64 x i8> [[VS2:%.*]], <vscale x 64 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
// CHECK-NEXT: ret <vscale x 64 x i8> [[TMP0]]
//
-vuint8m8_t test_vcpopv_v_u8m8_tum(vbool1_t mask, vuint8m8_t maskedoff, vuint8m8_t vs2, size_t vl) {
- return __riscv_vcpopv_tum(mask, maskedoff, vs2, vl);
+vuint8m8_t test_vcpop_v_u8m8_tum(vbool1_t mask, vuint8m8_t maskedoff, vuint8m8_t vs2, size_t vl) {
+ return __riscv_vcpop_tum(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16mf4_tum(
+// CHECK-LABEL: @test_vcpop_v_u16mf4_tum(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vcpopv.mask.nxv1i16.i64(<vscale x 1 x i16> [[MASKEDOFF:%.*]], <vscale x 1 x i16> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
// CHECK-NEXT: ret <vscale x 1 x i16> [[TMP0]]
//
-vuint16mf4_t test_vcpopv_v_u16mf4_tum(vbool64_t mask, vuint16mf4_t maskedoff, vuint16mf4_t vs2, size_t vl) {
- return __riscv_vcpopv_tum(mask, maskedoff, vs2, vl);
+vuint16mf4_t test_vcpop_v_u16mf4_tum(vbool64_t mask, vuint16mf4_t maskedoff, vuint16mf4_t vs2, size_t vl) {
+ return __riscv_vcpop_tum(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16mf2_tum(
+// CHECK-LABEL: @test_vcpop_v_u16mf2_tum(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vcpopv.mask.nxv2i16.i64(<vscale x 2 x i16> [[MASKEDOFF:%.*]], <vscale x 2 x i16> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
// CHECK-NEXT: ret <vscale x 2 x i16> [[TMP0]]
//
-vuint16mf2_t test_vcpopv_v_u16mf2_tum(vbool32_t mask, vuint16mf2_t maskedoff, vuint16mf2_t vs2, size_t vl) {
- return __riscv_vcpopv_tum(mask, maskedoff, vs2, vl);
+vuint16mf2_t test_vcpop_v_u16mf2_tum(vbool32_t mask, vuint16mf2_t maskedoff, vuint16mf2_t vs2, size_t vl) {
+ return __riscv_vcpop_tum(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16m1_tum(
+// CHECK-LABEL: @test_vcpop_v_u16m1_tum(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vcpopv.mask.nxv4i16.i64(<vscale x 4 x i16> [[MASKEDOFF:%.*]], <vscale x 4 x i16> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
// CHECK-NEXT: ret <vscale x 4 x i16> [[TMP0]]
//
-vuint16m1_t test_vcpopv_v_u16m1_tum(vbool16_t mask, vuint16m1_t maskedoff, vuint16m1_t vs2, size_t vl) {
- return __riscv_vcpopv_tum(mask, maskedoff, vs2, vl);
+vuint16m1_t test_vcpop_v_u16m1_tum(vbool16_t mask, vuint16m1_t maskedoff, vuint16m1_t vs2, size_t vl) {
+ return __riscv_vcpop_tum(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16m2_tum(
+// CHECK-LABEL: @test_vcpop_v_u16m2_tum(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vcpopv.mask.nxv8i16.i64(<vscale x 8 x i16> [[MASKEDOFF:%.*]], <vscale x 8 x i16> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
//
-vuint16m2_t test_vcpopv_v_u16m2_tum(vbool8_t mask, vuint16m2_t maskedoff, vuint16m2_t vs2, size_t vl) {
- return __riscv_vcpopv_tum(mask, maskedoff, vs2, vl);
+vuint16m2_t test_vcpop_v_u16m2_tum(vbool8_t mask, vuint16m2_t maskedoff, vuint16m2_t vs2, size_t vl) {
+ return __riscv_vcpop_tum(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16m4_tum(
+// CHECK-LABEL: @test_vcpop_v_u16m4_tum(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vcpopv.mask.nxv16i16.i64(<vscale x 16 x i16> [[MASKEDOFF:%.*]], <vscale x 16 x i16> [[VS2:%.*]], <vscale x 16 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
// CHECK-NEXT: ret <vscale x 16 x i16> [[TMP0]]
//
-vuint16m4_t test_vcpopv_v_u16m4_tum(vbool4_t mask, vuint16m4_t maskedoff, vuint16m4_t vs2, size_t vl) {
- return __riscv_vcpopv_tum(mask, maskedoff, vs2, vl);
+vuint16m4_t test_vcpop_v_u16m4_tum(vbool4_t mask, vuint16m4_t maskedoff, vuint16m4_t vs2, size_t vl) {
+ return __riscv_vcpop_tum(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16m8_tum(
+// CHECK-LABEL: @test_vcpop_v_u16m8_tum(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vcpopv.mask.nxv32i16.i64(<vscale x 32 x i16> [[MASKEDOFF:%.*]], <vscale x 32 x i16> [[VS2:%.*]], <vscale x 32 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
// CHECK-NEXT: ret <vscale x 32 x i16> [[TMP0]]
//
-vuint16m8_t test_vcpopv_v_u16m8_tum(vbool2_t mask, vuint16m8_t maskedoff, vuint16m8_t vs2, size_t vl) {
- return __riscv_vcpopv_tum(mask, maskedoff, vs2, vl);
+vuint16m8_t test_vcpop_v_u16m8_tum(vbool2_t mask, vuint16m8_t maskedoff, vuint16m8_t vs2, size_t vl) {
+ return __riscv_vcpop_tum(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32mf2_tum(
+// CHECK-LABEL: @test_vcpop_v_u32mf2_tum(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vcpopv.mask.nxv1i32.i64(<vscale x 1 x i32> [[MASKEDOFF:%.*]], <vscale x 1 x i32> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
// CHECK-NEXT: ret <vscale x 1 x i32> [[TMP0]]
//
-vuint32mf2_t test_vcpopv_v_u32mf2_tum(vbool64_t mask, vuint32mf2_t maskedoff, vuint32mf2_t vs2, size_t vl) {
- return __riscv_vcpopv_tum(mask, maskedoff, vs2, vl);
+vuint32mf2_t test_vcpop_v_u32mf2_tum(vbool64_t mask, vuint32mf2_t maskedoff, vuint32mf2_t vs2, size_t vl) {
+ return __riscv_vcpop_tum(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32m1_tum(
+// CHECK-LABEL: @test_vcpop_v_u32m1_tum(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vcpopv.mask.nxv2i32.i64(<vscale x 2 x i32> [[MASKEDOFF:%.*]], <vscale x 2 x i32> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
// CHECK-NEXT: ret <vscale x 2 x i32> [[TMP0]]
//
-vuint32m1_t test_vcpopv_v_u32m1_tum(vbool32_t mask, vuint32m1_t maskedoff, vuint32m1_t vs2, size_t vl) {
- return __riscv_vcpopv_tum(mask, maskedoff, vs2, vl);
+vuint32m1_t test_vcpop_v_u32m1_tum(vbool32_t mask, vuint32m1_t maskedoff, vuint32m1_t vs2, size_t vl) {
+ return __riscv_vcpop_tum(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32m2_tum(
+// CHECK-LABEL: @test_vcpop_v_u32m2_tum(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vcpopv.mask.nxv4i32.i64(<vscale x 4 x i32> [[MASKEDOFF:%.*]], <vscale x 4 x i32> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
//
-vuint32m2_t test_vcpopv_v_u32m2_tum(vbool16_t mask, vuint32m2_t maskedoff, vuint32m2_t vs2, size_t vl) {
- return __riscv_vcpopv_tum(mask, maskedoff, vs2, vl);
+vuint32m2_t test_vcpop_v_u32m2_tum(vbool16_t mask, vuint32m2_t maskedoff, vuint32m2_t vs2, size_t vl) {
+ return __riscv_vcpop_tum(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32m4_tum(
+// CHECK-LABEL: @test_vcpop_v_u32m4_tum(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vcpopv.mask.nxv8i32.i64(<vscale x 8 x i32> [[MASKEDOFF:%.*]], <vscale x 8 x i32> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
// CHECK-NEXT: ret <vscale x 8 x i32> [[TMP0]]
//
-vuint32m4_t test_vcpopv_v_u32m4_tum(vbool8_t mask, vuint32m4_t maskedoff, vuint32m4_t vs2, size_t vl) {
- return __riscv_vcpopv_tum(mask, maskedoff, vs2, vl);
+vuint32m4_t test_vcpop_v_u32m4_tum(vbool8_t mask, vuint32m4_t maskedoff, vuint32m4_t vs2, size_t vl) {
+ return __riscv_vcpop_tum(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32m8_tum(
+// CHECK-LABEL: @test_vcpop_v_u32m8_tum(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vcpopv.mask.nxv16i32.i64(<vscale x 16 x i32> [[MASKEDOFF:%.*]], <vscale x 16 x i32> [[VS2:%.*]], <vscale x 16 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
// CHECK-NEXT: ret <vscale x 16 x i32> [[TMP0]]
//
-vuint32m8_t test_vcpopv_v_u32m8_tum(vbool4_t mask, vuint32m8_t maskedoff, vuint32m8_t vs2, size_t vl) {
- return __riscv_vcpopv_tum(mask, maskedoff, vs2, vl);
+vuint32m8_t test_vcpop_v_u32m8_tum(vbool4_t mask, vuint32m8_t maskedoff, vuint32m8_t vs2, size_t vl) {
+ return __riscv_vcpop_tum(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u64m1_tum(
+// CHECK-LABEL: @test_vcpop_v_u64m1_tum(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vcpopv.mask.nxv1i64.i64(<vscale x 1 x i64> [[MASKEDOFF:%.*]], <vscale x 1 x i64> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
// CHECK-NEXT: ret <vscale x 1 x i64> [[TMP0]]
//
-vuint64m1_t test_vcpopv_v_u64m1_tum(vbool64_t mask, vuint64m1_t maskedoff, vuint64m1_t vs2, size_t vl) {
- return __riscv_vcpopv_tum(mask, maskedoff, vs2, vl);
+vuint64m1_t test_vcpop_v_u64m1_tum(vbool64_t mask, vuint64m1_t maskedoff, vuint64m1_t vs2, size_t vl) {
+ return __riscv_vcpop_tum(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u64m2_tum(
+// CHECK-LABEL: @test_vcpop_v_u64m2_tum(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vcpopv.mask.nxv2i64.i64(<vscale x 2 x i64> [[MASKEDOFF:%.*]], <vscale x 2 x i64> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
//
-vuint64m2_t test_vcpopv_v_u64m2_tum(vbool32_t mask, vuint64m2_t maskedoff, vuint64m2_t vs2, size_t vl) {
- return __riscv_vcpopv_tum(mask, maskedoff, vs2, vl);
+vuint64m2_t test_vcpop_v_u64m2_tum(vbool32_t mask, vuint64m2_t maskedoff, vuint64m2_t vs2, size_t vl) {
+ return __riscv_vcpop_tum(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u64m4_tum(
+// CHECK-LABEL: @test_vcpop_v_u64m4_tum(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vcpopv.mask.nxv4i64.i64(<vscale x 4 x i64> [[MASKEDOFF:%.*]], <vscale x 4 x i64> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
// CHECK-NEXT: ret <vscale x 4 x i64> [[TMP0]]
//
-vuint64m4_t test_vcpopv_v_u64m4_tum(vbool16_t mask, vuint64m4_t maskedoff, vuint64m4_t vs2, size_t vl) {
- return __riscv_vcpopv_tum(mask, maskedoff, vs2, vl);
+vuint64m4_t test_vcpop_v_u64m4_tum(vbool16_t mask, vuint64m4_t maskedoff, vuint64m4_t vs2, size_t vl) {
+ return __riscv_vcpop_tum(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u64m8_tum(
+// CHECK-LABEL: @test_vcpop_v_u64m8_tum(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vcpopv.mask.nxv8i64.i64(<vscale x 8 x i64> [[MASKEDOFF:%.*]], <vscale x 8 x i64> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
// CHECK-NEXT: ret <vscale x 8 x i64> [[TMP0]]
//
-vuint64m8_t test_vcpopv_v_u64m8_tum(vbool8_t mask, vuint64m8_t maskedoff, vuint64m8_t vs2, size_t vl) {
- return __riscv_vcpopv_tum(mask, maskedoff, vs2, vl);
+vuint64m8_t test_vcpop_v_u64m8_tum(vbool8_t mask, vuint64m8_t maskedoff, vuint64m8_t vs2, size_t vl) {
+ return __riscv_vcpop_tum(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8mf8_tumu(
+// CHECK-LABEL: @test_vcpop_v_u8mf8_tumu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vcpopv.mask.nxv1i8.i64(<vscale x 1 x i8> [[MASKEDOFF:%.*]], <vscale x 1 x i8> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
// CHECK-NEXT: ret <vscale x 1 x i8> [[TMP0]]
//
-vuint8mf8_t test_vcpopv_v_u8mf8_tumu(vbool64_t mask, vuint8mf8_t maskedoff, vuint8mf8_t vs2, size_t vl) {
- return __riscv_vcpopv_tumu(mask, maskedoff, vs2, vl);
+vuint8mf8_t test_vcpop_v_u8mf8_tumu(vbool64_t mask, vuint8mf8_t maskedoff, vuint8mf8_t vs2, size_t vl) {
+ return __riscv_vcpop_tumu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8mf4_tumu(
+// CHECK-LABEL: @test_vcpop_v_u8mf4_tumu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vcpopv.mask.nxv2i8.i64(<vscale x 2 x i8> [[MASKEDOFF:%.*]], <vscale x 2 x i8> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
// CHECK-NEXT: ret <vscale x 2 x i8> [[TMP0]]
//
-vuint8mf4_t test_vcpopv_v_u8mf4_tumu(vbool32_t mask, vuint8mf4_t maskedoff, vuint8mf4_t vs2, size_t vl) {
- return __riscv_vcpopv_tumu(mask, maskedoff, vs2, vl);
+vuint8mf4_t test_vcpop_v_u8mf4_tumu(vbool32_t mask, vuint8mf4_t maskedoff, vuint8mf4_t vs2, size_t vl) {
+ return __riscv_vcpop_tumu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8mf2_tumu(
+// CHECK-LABEL: @test_vcpop_v_u8mf2_tumu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vcpopv.mask.nxv4i8.i64(<vscale x 4 x i8> [[MASKEDOFF:%.*]], <vscale x 4 x i8> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
// CHECK-NEXT: ret <vscale x 4 x i8> [[TMP0]]
//
-vuint8mf2_t test_vcpopv_v_u8mf2_tumu(vbool16_t mask, vuint8mf2_t maskedoff, vuint8mf2_t vs2, size_t vl) {
- return __riscv_vcpopv_tumu(mask, maskedoff, vs2, vl);
+vuint8mf2_t test_vcpop_v_u8mf2_tumu(vbool16_t mask, vuint8mf2_t maskedoff, vuint8mf2_t vs2, size_t vl) {
+ return __riscv_vcpop_tumu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8m1_tumu(
+// CHECK-LABEL: @test_vcpop_v_u8m1_tumu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vcpopv.mask.nxv8i8.i64(<vscale x 8 x i8> [[MASKEDOFF:%.*]], <vscale x 8 x i8> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
// CHECK-NEXT: ret <vscale x 8 x i8> [[TMP0]]
//
-vuint8m1_t test_vcpopv_v_u8m1_tumu(vbool8_t mask, vuint8m1_t maskedoff, vuint8m1_t vs2, size_t vl) {
- return __riscv_vcpopv_tumu(mask, maskedoff, vs2, vl);
+vuint8m1_t test_vcpop_v_u8m1_tumu(vbool8_t mask, vuint8m1_t maskedoff, vuint8m1_t vs2, size_t vl) {
+ return __riscv_vcpop_tumu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8m2_tumu(
+// CHECK-LABEL: @test_vcpop_v_u8m2_tumu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vcpopv.mask.nxv16i8.i64(<vscale x 16 x i8> [[MASKEDOFF:%.*]], <vscale x 16 x i8> [[VS2:%.*]], <vscale x 16 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-vuint8m2_t test_vcpopv_v_u8m2_tumu(vbool4_t mask, vuint8m2_t maskedoff, vuint8m2_t vs2, size_t vl) {
- return __riscv_vcpopv_tumu(mask, maskedoff, vs2, vl);
+vuint8m2_t test_vcpop_v_u8m2_tumu(vbool4_t mask, vuint8m2_t maskedoff, vuint8m2_t vs2, size_t vl) {
+ return __riscv_vcpop_tumu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8m4_tumu(
+// CHECK-LABEL: @test_vcpop_v_u8m4_tumu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vcpopv.mask.nxv32i8.i64(<vscale x 32 x i8> [[MASKEDOFF:%.*]], <vscale x 32 x i8> [[VS2:%.*]], <vscale x 32 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
// CHECK-NEXT: ret <vscale x 32 x i8> [[TMP0]]
//
-vuint8m4_t test_vcpopv_v_u8m4_tumu(vbool2_t mask, vuint8m4_t maskedoff, vuint8m4_t vs2, size_t vl) {
- return __riscv_vcpopv_tumu(mask, maskedoff, vs2, vl);
+vuint8m4_t test_vcpop_v_u8m4_tumu(vbool2_t mask, vuint8m4_t maskedoff, vuint8m4_t vs2, size_t vl) {
+ return __riscv_vcpop_tumu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8m8_tumu(
+// CHECK-LABEL: @test_vcpop_v_u8m8_tumu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vcpopv.mask.nxv64i8.i64(<vscale x 64 x i8> [[MASKEDOFF:%.*]], <vscale x 64 x i8> [[VS2:%.*]], <vscale x 64 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
// CHECK-NEXT: ret <vscale x 64 x i8> [[TMP0]]
//
-vuint8m8_t test_vcpopv_v_u8m8_tumu(vbool1_t mask, vuint8m8_t maskedoff, vuint8m8_t vs2, size_t vl) {
- return __riscv_vcpopv_tumu(mask, maskedoff, vs2, vl);
+vuint8m8_t test_vcpop_v_u8m8_tumu(vbool1_t mask, vuint8m8_t maskedoff, vuint8m8_t vs2, size_t vl) {
+ return __riscv_vcpop_tumu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16mf4_tumu(
+// CHECK-LABEL: @test_vcpop_v_u16mf4_tumu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vcpopv.mask.nxv1i16.i64(<vscale x 1 x i16> [[MASKEDOFF:%.*]], <vscale x 1 x i16> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
// CHECK-NEXT: ret <vscale x 1 x i16> [[TMP0]]
//
-vuint16mf4_t test_vcpopv_v_u16mf4_tumu(vbool64_t mask, vuint16mf4_t maskedoff, vuint16mf4_t vs2, size_t vl) {
- return __riscv_vcpopv_tumu(mask, maskedoff, vs2, vl);
+vuint16mf4_t test_vcpop_v_u16mf4_tumu(vbool64_t mask, vuint16mf4_t maskedoff, vuint16mf4_t vs2, size_t vl) {
+ return __riscv_vcpop_tumu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16mf2_tumu(
+// CHECK-LABEL: @test_vcpop_v_u16mf2_tumu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vcpopv.mask.nxv2i16.i64(<vscale x 2 x i16> [[MASKEDOFF:%.*]], <vscale x 2 x i16> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
// CHECK-NEXT: ret <vscale x 2 x i16> [[TMP0]]
//
-vuint16mf2_t test_vcpopv_v_u16mf2_tumu(vbool32_t mask, vuint16mf2_t maskedoff, vuint16mf2_t vs2, size_t vl) {
- return __riscv_vcpopv_tumu(mask, maskedoff, vs2, vl);
+vuint16mf2_t test_vcpop_v_u16mf2_tumu(vbool32_t mask, vuint16mf2_t maskedoff, vuint16mf2_t vs2, size_t vl) {
+ return __riscv_vcpop_tumu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16m1_tumu(
+// CHECK-LABEL: @test_vcpop_v_u16m1_tumu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vcpopv.mask.nxv4i16.i64(<vscale x 4 x i16> [[MASKEDOFF:%.*]], <vscale x 4 x i16> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
// CHECK-NEXT: ret <vscale x 4 x i16> [[TMP0]]
//
-vuint16m1_t test_vcpopv_v_u16m1_tumu(vbool16_t mask, vuint16m1_t maskedoff, vuint16m1_t vs2, size_t vl) {
- return __riscv_vcpopv_tumu(mask, maskedoff, vs2, vl);
+vuint16m1_t test_vcpop_v_u16m1_tumu(vbool16_t mask, vuint16m1_t maskedoff, vuint16m1_t vs2, size_t vl) {
+ return __riscv_vcpop_tumu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16m2_tumu(
+// CHECK-LABEL: @test_vcpop_v_u16m2_tumu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vcpopv.mask.nxv8i16.i64(<vscale x 8 x i16> [[MASKEDOFF:%.*]], <vscale x 8 x i16> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
//
-vuint16m2_t test_vcpopv_v_u16m2_tumu(vbool8_t mask, vuint16m2_t maskedoff, vuint16m2_t vs2, size_t vl) {
- return __riscv_vcpopv_tumu(mask, maskedoff, vs2, vl);
+vuint16m2_t test_vcpop_v_u16m2_tumu(vbool8_t mask, vuint16m2_t maskedoff, vuint16m2_t vs2, size_t vl) {
+ return __riscv_vcpop_tumu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16m4_tumu(
+// CHECK-LABEL: @test_vcpop_v_u16m4_tumu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vcpopv.mask.nxv16i16.i64(<vscale x 16 x i16> [[MASKEDOFF:%.*]], <vscale x 16 x i16> [[VS2:%.*]], <vscale x 16 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
// CHECK-NEXT: ret <vscale x 16 x i16> [[TMP0]]
//
-vuint16m4_t test_vcpopv_v_u16m4_tumu(vbool4_t mask, vuint16m4_t maskedoff, vuint16m4_t vs2, size_t vl) {
- return __riscv_vcpopv_tumu(mask, maskedoff, vs2, vl);
+vuint16m4_t test_vcpop_v_u16m4_tumu(vbool4_t mask, vuint16m4_t maskedoff, vuint16m4_t vs2, size_t vl) {
+ return __riscv_vcpop_tumu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16m8_tumu(
+// CHECK-LABEL: @test_vcpop_v_u16m8_tumu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vcpopv.mask.nxv32i16.i64(<vscale x 32 x i16> [[MASKEDOFF:%.*]], <vscale x 32 x i16> [[VS2:%.*]], <vscale x 32 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
// CHECK-NEXT: ret <vscale x 32 x i16> [[TMP0]]
//
-vuint16m8_t test_vcpopv_v_u16m8_tumu(vbool2_t mask, vuint16m8_t maskedoff, vuint16m8_t vs2, size_t vl) {
- return __riscv_vcpopv_tumu(mask, maskedoff, vs2, vl);
+vuint16m8_t test_vcpop_v_u16m8_tumu(vbool2_t mask, vuint16m8_t maskedoff, vuint16m8_t vs2, size_t vl) {
+ return __riscv_vcpop_tumu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32mf2_tumu(
+// CHECK-LABEL: @test_vcpop_v_u32mf2_tumu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vcpopv.mask.nxv1i32.i64(<vscale x 1 x i32> [[MASKEDOFF:%.*]], <vscale x 1 x i32> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
// CHECK-NEXT: ret <vscale x 1 x i32> [[TMP0]]
//
-vuint32mf2_t test_vcpopv_v_u32mf2_tumu(vbool64_t mask, vuint32mf2_t maskedoff, vuint32mf2_t vs2, size_t vl) {
- return __riscv_vcpopv_tumu(mask, maskedoff, vs2, vl);
+vuint32mf2_t test_vcpop_v_u32mf2_tumu(vbool64_t mask, vuint32mf2_t maskedoff, vuint32mf2_t vs2, size_t vl) {
+ return __riscv_vcpop_tumu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32m1_tumu(
+// CHECK-LABEL: @test_vcpop_v_u32m1_tumu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vcpopv.mask.nxv2i32.i64(<vscale x 2 x i32> [[MASKEDOFF:%.*]], <vscale x 2 x i32> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
// CHECK-NEXT: ret <vscale x 2 x i32> [[TMP0]]
//
-vuint32m1_t test_vcpopv_v_u32m1_tumu(vbool32_t mask, vuint32m1_t maskedoff, vuint32m1_t vs2, size_t vl) {
- return __riscv_vcpopv_tumu(mask, maskedoff, vs2, vl);
+vuint32m1_t test_vcpop_v_u32m1_tumu(vbool32_t mask, vuint32m1_t maskedoff, vuint32m1_t vs2, size_t vl) {
+ return __riscv_vcpop_tumu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32m2_tumu(
+// CHECK-LABEL: @test_vcpop_v_u32m2_tumu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vcpopv.mask.nxv4i32.i64(<vscale x 4 x i32> [[MASKEDOFF:%.*]], <vscale x 4 x i32> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
//
-vuint32m2_t test_vcpopv_v_u32m2_tumu(vbool16_t mask, vuint32m2_t maskedoff, vuint32m2_t vs2, size_t vl) {
- return __riscv_vcpopv_tumu(mask, maskedoff, vs2, vl);
+vuint32m2_t test_vcpop_v_u32m2_tumu(vbool16_t mask, vuint32m2_t maskedoff, vuint32m2_t vs2, size_t vl) {
+ return __riscv_vcpop_tumu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32m4_tumu(
+// CHECK-LABEL: @test_vcpop_v_u32m4_tumu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vcpopv.mask.nxv8i32.i64(<vscale x 8 x i32> [[MASKEDOFF:%.*]], <vscale x 8 x i32> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
// CHECK-NEXT: ret <vscale x 8 x i32> [[TMP0]]
//
-vuint32m4_t test_vcpopv_v_u32m4_tumu(vbool8_t mask, vuint32m4_t maskedoff, vuint32m4_t vs2, size_t vl) {
- return __riscv_vcpopv_tumu(mask, maskedoff, vs2, vl);
+vuint32m4_t test_vcpop_v_u32m4_tumu(vbool8_t mask, vuint32m4_t maskedoff, vuint32m4_t vs2, size_t vl) {
+ return __riscv_vcpop_tumu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32m8_tumu(
+// CHECK-LABEL: @test_vcpop_v_u32m8_tumu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vcpopv.mask.nxv16i32.i64(<vscale x 16 x i32> [[MASKEDOFF:%.*]], <vscale x 16 x i32> [[VS2:%.*]], <vscale x 16 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
// CHECK-NEXT: ret <vscale x 16 x i32> [[TMP0]]
//
-vuint32m8_t test_vcpopv_v_u32m8_tumu(vbool4_t mask, vuint32m8_t maskedoff, vuint32m8_t vs2, size_t vl) {
- return __riscv_vcpopv_tumu(mask, maskedoff, vs2, vl);
+vuint32m8_t test_vcpop_v_u32m8_tumu(vbool4_t mask, vuint32m8_t maskedoff, vuint32m8_t vs2, size_t vl) {
+ return __riscv_vcpop_tumu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u64m1_tumu(
+// CHECK-LABEL: @test_vcpop_v_u64m1_tumu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vcpopv.mask.nxv1i64.i64(<vscale x 1 x i64> [[MASKEDOFF:%.*]], <vscale x 1 x i64> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
// CHECK-NEXT: ret <vscale x 1 x i64> [[TMP0]]
//
-vuint64m1_t test_vcpopv_v_u64m1_tumu(vbool64_t mask, vuint64m1_t maskedoff, vuint64m1_t vs2, size_t vl) {
- return __riscv_vcpopv_tumu(mask, maskedoff, vs2, vl);
+vuint64m1_t test_vcpop_v_u64m1_tumu(vbool64_t mask, vuint64m1_t maskedoff, vuint64m1_t vs2, size_t vl) {
+ return __riscv_vcpop_tumu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u64m2_tumu(
+// CHECK-LABEL: @test_vcpop_v_u64m2_tumu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vcpopv.mask.nxv2i64.i64(<vscale x 2 x i64> [[MASKEDOFF:%.*]], <vscale x 2 x i64> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
//
-vuint64m2_t test_vcpopv_v_u64m2_tumu(vbool32_t mask, vuint64m2_t maskedoff, vuint64m2_t vs2, size_t vl) {
- return __riscv_vcpopv_tumu(mask, maskedoff, vs2, vl);
+vuint64m2_t test_vcpop_v_u64m2_tumu(vbool32_t mask, vuint64m2_t maskedoff, vuint64m2_t vs2, size_t vl) {
+ return __riscv_vcpop_tumu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u64m4_tumu(
+// CHECK-LABEL: @test_vcpop_v_u64m4_tumu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vcpopv.mask.nxv4i64.i64(<vscale x 4 x i64> [[MASKEDOFF:%.*]], <vscale x 4 x i64> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
// CHECK-NEXT: ret <vscale x 4 x i64> [[TMP0]]
//
-vuint64m4_t test_vcpopv_v_u64m4_tumu(vbool16_t mask, vuint64m4_t maskedoff, vuint64m4_t vs2, size_t vl) {
- return __riscv_vcpopv_tumu(mask, maskedoff, vs2, vl);
+vuint64m4_t test_vcpop_v_u64m4_tumu(vbool16_t mask, vuint64m4_t maskedoff, vuint64m4_t vs2, size_t vl) {
+ return __riscv_vcpop_tumu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u64m8_tumu(
+// CHECK-LABEL: @test_vcpop_v_u64m8_tumu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vcpopv.mask.nxv8i64.i64(<vscale x 8 x i64> [[MASKEDOFF:%.*]], <vscale x 8 x i64> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
// CHECK-NEXT: ret <vscale x 8 x i64> [[TMP0]]
//
-vuint64m8_t test_vcpopv_v_u64m8_tumu(vbool8_t mask, vuint64m8_t maskedoff, vuint64m8_t vs2, size_t vl) {
- return __riscv_vcpopv_tumu(mask, maskedoff, vs2, vl);
+vuint64m8_t test_vcpop_v_u64m8_tumu(vbool8_t mask, vuint64m8_t maskedoff, vuint64m8_t vs2, size_t vl) {
+ return __riscv_vcpop_tumu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8mf8_mu(
+// CHECK-LABEL: @test_vcpop_v_u8mf8_mu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vcpopv.mask.nxv1i8.i64(<vscale x 1 x i8> [[MASKEDOFF:%.*]], <vscale x 1 x i8> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
// CHECK-NEXT: ret <vscale x 1 x i8> [[TMP0]]
//
-vuint8mf8_t test_vcpopv_v_u8mf8_mu(vbool64_t mask, vuint8mf8_t maskedoff, vuint8mf8_t vs2, size_t vl) {
- return __riscv_vcpopv_mu(mask, maskedoff, vs2, vl);
+vuint8mf8_t test_vcpop_v_u8mf8_mu(vbool64_t mask, vuint8mf8_t maskedoff, vuint8mf8_t vs2, size_t vl) {
+ return __riscv_vcpop_mu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8mf4_mu(
+// CHECK-LABEL: @test_vcpop_v_u8mf4_mu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vcpopv.mask.nxv2i8.i64(<vscale x 2 x i8> [[MASKEDOFF:%.*]], <vscale x 2 x i8> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
// CHECK-NEXT: ret <vscale x 2 x i8> [[TMP0]]
//
-vuint8mf4_t test_vcpopv_v_u8mf4_mu(vbool32_t mask, vuint8mf4_t maskedoff, vuint8mf4_t vs2, size_t vl) {
- return __riscv_vcpopv_mu(mask, maskedoff, vs2, vl);
+vuint8mf4_t test_vcpop_v_u8mf4_mu(vbool32_t mask, vuint8mf4_t maskedoff, vuint8mf4_t vs2, size_t vl) {
+ return __riscv_vcpop_mu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8mf2_mu(
+// CHECK-LABEL: @test_vcpop_v_u8mf2_mu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vcpopv.mask.nxv4i8.i64(<vscale x 4 x i8> [[MASKEDOFF:%.*]], <vscale x 4 x i8> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
// CHECK-NEXT: ret <vscale x 4 x i8> [[TMP0]]
//
-vuint8mf2_t test_vcpopv_v_u8mf2_mu(vbool16_t mask, vuint8mf2_t maskedoff, vuint8mf2_t vs2, size_t vl) {
- return __riscv_vcpopv_mu(mask, maskedoff, vs2, vl);
+vuint8mf2_t test_vcpop_v_u8mf2_mu(vbool16_t mask, vuint8mf2_t maskedoff, vuint8mf2_t vs2, size_t vl) {
+ return __riscv_vcpop_mu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8m1_mu(
+// CHECK-LABEL: @test_vcpop_v_u8m1_mu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vcpopv.mask.nxv8i8.i64(<vscale x 8 x i8> [[MASKEDOFF:%.*]], <vscale x 8 x i8> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
// CHECK-NEXT: ret <vscale x 8 x i8> [[TMP0]]
//
-vuint8m1_t test_vcpopv_v_u8m1_mu(vbool8_t mask, vuint8m1_t maskedoff, vuint8m1_t vs2, size_t vl) {
- return __riscv_vcpopv_mu(mask, maskedoff, vs2, vl);
+vuint8m1_t test_vcpop_v_u8m1_mu(vbool8_t mask, vuint8m1_t maskedoff, vuint8m1_t vs2, size_t vl) {
+ return __riscv_vcpop_mu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8m2_mu(
+// CHECK-LABEL: @test_vcpop_v_u8m2_mu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vcpopv.mask.nxv16i8.i64(<vscale x 16 x i8> [[MASKEDOFF:%.*]], <vscale x 16 x i8> [[VS2:%.*]], <vscale x 16 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-vuint8m2_t test_vcpopv_v_u8m2_mu(vbool4_t mask, vuint8m2_t maskedoff, vuint8m2_t vs2, size_t vl) {
- return __riscv_vcpopv_mu(mask, maskedoff, vs2, vl);
+vuint8m2_t test_vcpop_v_u8m2_mu(vbool4_t mask, vuint8m2_t maskedoff, vuint8m2_t vs2, size_t vl) {
+ return __riscv_vcpop_mu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8m4_mu(
+// CHECK-LABEL: @test_vcpop_v_u8m4_mu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vcpopv.mask.nxv32i8.i64(<vscale x 32 x i8> [[MASKEDOFF:%.*]], <vscale x 32 x i8> [[VS2:%.*]], <vscale x 32 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
// CHECK-NEXT: ret <vscale x 32 x i8> [[TMP0]]
//
-vuint8m4_t test_vcpopv_v_u8m4_mu(vbool2_t mask, vuint8m4_t maskedoff, vuint8m4_t vs2, size_t vl) {
- return __riscv_vcpopv_mu(mask, maskedoff, vs2, vl);
+vuint8m4_t test_vcpop_v_u8m4_mu(vbool2_t mask, vuint8m4_t maskedoff, vuint8m4_t vs2, size_t vl) {
+ return __riscv_vcpop_mu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8m8_mu(
+// CHECK-LABEL: @test_vcpop_v_u8m8_mu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vcpopv.mask.nxv64i8.i64(<vscale x 64 x i8> [[MASKEDOFF:%.*]], <vscale x 64 x i8> [[VS2:%.*]], <vscale x 64 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
// CHECK-NEXT: ret <vscale x 64 x i8> [[TMP0]]
//
-vuint8m8_t test_vcpopv_v_u8m8_mu(vbool1_t mask, vuint8m8_t maskedoff, vuint8m8_t vs2, size_t vl) {
- return __riscv_vcpopv_mu(mask, maskedoff, vs2, vl);
+vuint8m8_t test_vcpop_v_u8m8_mu(vbool1_t mask, vuint8m8_t maskedoff, vuint8m8_t vs2, size_t vl) {
+ return __riscv_vcpop_mu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16mf4_mu(
+// CHECK-LABEL: @test_vcpop_v_u16mf4_mu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vcpopv.mask.nxv1i16.i64(<vscale x 1 x i16> [[MASKEDOFF:%.*]], <vscale x 1 x i16> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
// CHECK-NEXT: ret <vscale x 1 x i16> [[TMP0]]
//
-vuint16mf4_t test_vcpopv_v_u16mf4_mu(vbool64_t mask, vuint16mf4_t maskedoff, vuint16mf4_t vs2, size_t vl) {
- return __riscv_vcpopv_mu(mask, maskedoff, vs2, vl);
+vuint16mf4_t test_vcpop_v_u16mf4_mu(vbool64_t mask, vuint16mf4_t maskedoff, vuint16mf4_t vs2, size_t vl) {
+ return __riscv_vcpop_mu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16mf2_mu(
+// CHECK-LABEL: @test_vcpop_v_u16mf2_mu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vcpopv.mask.nxv2i16.i64(<vscale x 2 x i16> [[MASKEDOFF:%.*]], <vscale x 2 x i16> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
// CHECK-NEXT: ret <vscale x 2 x i16> [[TMP0]]
//
-vuint16mf2_t test_vcpopv_v_u16mf2_mu(vbool32_t mask, vuint16mf2_t maskedoff, vuint16mf2_t vs2, size_t vl) {
- return __riscv_vcpopv_mu(mask, maskedoff, vs2, vl);
+vuint16mf2_t test_vcpop_v_u16mf2_mu(vbool32_t mask, vuint16mf2_t maskedoff, vuint16mf2_t vs2, size_t vl) {
+ return __riscv_vcpop_mu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16m1_mu(
+// CHECK-LABEL: @test_vcpop_v_u16m1_mu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vcpopv.mask.nxv4i16.i64(<vscale x 4 x i16> [[MASKEDOFF:%.*]], <vscale x 4 x i16> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
// CHECK-NEXT: ret <vscale x 4 x i16> [[TMP0]]
//
-vuint16m1_t test_vcpopv_v_u16m1_mu(vbool16_t mask, vuint16m1_t maskedoff, vuint16m1_t vs2, size_t vl) {
- return __riscv_vcpopv_mu(mask, maskedoff, vs2, vl);
+vuint16m1_t test_vcpop_v_u16m1_mu(vbool16_t mask, vuint16m1_t maskedoff, vuint16m1_t vs2, size_t vl) {
+ return __riscv_vcpop_mu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16m2_mu(
+// CHECK-LABEL: @test_vcpop_v_u16m2_mu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vcpopv.mask.nxv8i16.i64(<vscale x 8 x i16> [[MASKEDOFF:%.*]], <vscale x 8 x i16> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
//
-vuint16m2_t test_vcpopv_v_u16m2_mu(vbool8_t mask, vuint16m2_t maskedoff, vuint16m2_t vs2, size_t vl) {
- return __riscv_vcpopv_mu(mask, maskedoff, vs2, vl);
+vuint16m2_t test_vcpop_v_u16m2_mu(vbool8_t mask, vuint16m2_t maskedoff, vuint16m2_t vs2, size_t vl) {
+ return __riscv_vcpop_mu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16m4_mu(
+// CHECK-LABEL: @test_vcpop_v_u16m4_mu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vcpopv.mask.nxv16i16.i64(<vscale x 16 x i16> [[MASKEDOFF:%.*]], <vscale x 16 x i16> [[VS2:%.*]], <vscale x 16 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
// CHECK-NEXT: ret <vscale x 16 x i16> [[TMP0]]
//
-vuint16m4_t test_vcpopv_v_u16m4_mu(vbool4_t mask, vuint16m4_t maskedoff, vuint16m4_t vs2, size_t vl) {
- return __riscv_vcpopv_mu(mask, maskedoff, vs2, vl);
+vuint16m4_t test_vcpop_v_u16m4_mu(vbool4_t mask, vuint16m4_t maskedoff, vuint16m4_t vs2, size_t vl) {
+ return __riscv_vcpop_mu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16m8_mu(
+// CHECK-LABEL: @test_vcpop_v_u16m8_mu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vcpopv.mask.nxv32i16.i64(<vscale x 32 x i16> [[MASKEDOFF:%.*]], <vscale x 32 x i16> [[VS2:%.*]], <vscale x 32 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
// CHECK-NEXT: ret <vscale x 32 x i16> [[TMP0]]
//
-vuint16m8_t test_vcpopv_v_u16m8_mu(vbool2_t mask, vuint16m8_t maskedoff, vuint16m8_t vs2, size_t vl) {
- return __riscv_vcpopv_mu(mask, maskedoff, vs2, vl);
+vuint16m8_t test_vcpop_v_u16m8_mu(vbool2_t mask, vuint16m8_t maskedoff, vuint16m8_t vs2, size_t vl) {
+ return __riscv_vcpop_mu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32mf2_mu(
+// CHECK-LABEL: @test_vcpop_v_u32mf2_mu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vcpopv.mask.nxv1i32.i64(<vscale x 1 x i32> [[MASKEDOFF:%.*]], <vscale x 1 x i32> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
// CHECK-NEXT: ret <vscale x 1 x i32> [[TMP0]]
//
-vuint32mf2_t test_vcpopv_v_u32mf2_mu(vbool64_t mask, vuint32mf2_t maskedoff, vuint32mf2_t vs2, size_t vl) {
- return __riscv_vcpopv_mu(mask, maskedoff, vs2, vl);
+vuint32mf2_t test_vcpop_v_u32mf2_mu(vbool64_t mask, vuint32mf2_t maskedoff, vuint32mf2_t vs2, size_t vl) {
+ return __riscv_vcpop_mu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32m1_mu(
+// CHECK-LABEL: @test_vcpop_v_u32m1_mu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vcpopv.mask.nxv2i32.i64(<vscale x 2 x i32> [[MASKEDOFF:%.*]], <vscale x 2 x i32> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
// CHECK-NEXT: ret <vscale x 2 x i32> [[TMP0]]
//
-vuint32m1_t test_vcpopv_v_u32m1_mu(vbool32_t mask, vuint32m1_t maskedoff, vuint32m1_t vs2, size_t vl) {
- return __riscv_vcpopv_mu(mask, maskedoff, vs2, vl);
+vuint32m1_t test_vcpop_v_u32m1_mu(vbool32_t mask, vuint32m1_t maskedoff, vuint32m1_t vs2, size_t vl) {
+ return __riscv_vcpop_mu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32m2_mu(
+// CHECK-LABEL: @test_vcpop_v_u32m2_mu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vcpopv.mask.nxv4i32.i64(<vscale x 4 x i32> [[MASKEDOFF:%.*]], <vscale x 4 x i32> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
//
-vuint32m2_t test_vcpopv_v_u32m2_mu(vbool16_t mask, vuint32m2_t maskedoff, vuint32m2_t vs2, size_t vl) {
- return __riscv_vcpopv_mu(mask, maskedoff, vs2, vl);
+vuint32m2_t test_vcpop_v_u32m2_mu(vbool16_t mask, vuint32m2_t maskedoff, vuint32m2_t vs2, size_t vl) {
+ return __riscv_vcpop_mu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32m4_mu(
+// CHECK-LABEL: @test_vcpop_v_u32m4_mu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vcpopv.mask.nxv8i32.i64(<vscale x 8 x i32> [[MASKEDOFF:%.*]], <vscale x 8 x i32> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
// CHECK-NEXT: ret <vscale x 8 x i32> [[TMP0]]
//
-vuint32m4_t test_vcpopv_v_u32m4_mu(vbool8_t mask, vuint32m4_t maskedoff, vuint32m4_t vs2, size_t vl) {
- return __riscv_vcpopv_mu(mask, maskedoff, vs2, vl);
+vuint32m4_t test_vcpop_v_u32m4_mu(vbool8_t mask, vuint32m4_t maskedoff, vuint32m4_t vs2, size_t vl) {
+ return __riscv_vcpop_mu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32m8_mu(
+// CHECK-LABEL: @test_vcpop_v_u32m8_mu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vcpopv.mask.nxv16i32.i64(<vscale x 16 x i32> [[MASKEDOFF:%.*]], <vscale x 16 x i32> [[VS2:%.*]], <vscale x 16 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
// CHECK-NEXT: ret <vscale x 16 x i32> [[TMP0]]
//
-vuint32m8_t test_vcpopv_v_u32m8_mu(vbool4_t mask, vuint32m8_t maskedoff, vuint32m8_t vs2, size_t vl) {
- return __riscv_vcpopv_mu(mask, maskedoff, vs2, vl);
+vuint32m8_t test_vcpop_v_u32m8_mu(vbool4_t mask, vuint32m8_t maskedoff, vuint32m8_t vs2, size_t vl) {
+ return __riscv_vcpop_mu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u64m1_mu(
+// CHECK-LABEL: @test_vcpop_v_u64m1_mu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vcpopv.mask.nxv1i64.i64(<vscale x 1 x i64> [[MASKEDOFF:%.*]], <vscale x 1 x i64> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
// CHECK-NEXT: ret <vscale x 1 x i64> [[TMP0]]
//
-vuint64m1_t test_vcpopv_v_u64m1_mu(vbool64_t mask, vuint64m1_t maskedoff, vuint64m1_t vs2, size_t vl) {
- return __riscv_vcpopv_mu(mask, maskedoff, vs2, vl);
+vuint64m1_t test_vcpop_v_u64m1_mu(vbool64_t mask, vuint64m1_t maskedoff, vuint64m1_t vs2, size_t vl) {
+ return __riscv_vcpop_mu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u64m2_mu(
+// CHECK-LABEL: @test_vcpop_v_u64m2_mu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vcpopv.mask.nxv2i64.i64(<vscale x 2 x i64> [[MASKEDOFF:%.*]], <vscale x 2 x i64> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
//
-vuint64m2_t test_vcpopv_v_u64m2_mu(vbool32_t mask, vuint64m2_t maskedoff, vuint64m2_t vs2, size_t vl) {
- return __riscv_vcpopv_mu(mask, maskedoff, vs2, vl);
+vuint64m2_t test_vcpop_v_u64m2_mu(vbool32_t mask, vuint64m2_t maskedoff, vuint64m2_t vs2, size_t vl) {
+ return __riscv_vcpop_mu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u64m4_mu(
+// CHECK-LABEL: @test_vcpop_v_u64m4_mu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vcpopv.mask.nxv4i64.i64(<vscale x 4 x i64> [[MASKEDOFF:%.*]], <vscale x 4 x i64> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
// CHECK-NEXT: ret <vscale x 4 x i64> [[TMP0]]
//
-vuint64m4_t test_vcpopv_v_u64m4_mu(vbool16_t mask, vuint64m4_t maskedoff, vuint64m4_t vs2, size_t vl) {
- return __riscv_vcpopv_mu(mask, maskedoff, vs2, vl);
+vuint64m4_t test_vcpop_v_u64m4_mu(vbool16_t mask, vuint64m4_t maskedoff, vuint64m4_t vs2, size_t vl) {
+ return __riscv_vcpop_mu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u64m8_mu(
+// CHECK-LABEL: @test_vcpop_v_u64m8_mu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vcpopv.mask.nxv8i64.i64(<vscale x 8 x i64> [[MASKEDOFF:%.*]], <vscale x 8 x i64> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
// CHECK-NEXT: ret <vscale x 8 x i64> [[TMP0]]
//
-vuint64m8_t test_vcpopv_v_u64m8_mu(vbool8_t mask, vuint64m8_t maskedoff, vuint64m8_t vs2, size_t vl) {
- return __riscv_vcpopv_mu(mask, maskedoff, vs2, vl);
+vuint64m8_t test_vcpop_v_u64m8_mu(vbool8_t mask, vuint64m8_t maskedoff, vuint64m8_t vs2, size_t vl) {
+ return __riscv_vcpop_mu(mask, maskedoff, vs2, vl);
}
diff --git a/clang/test/CodeGen/voidptr-vaarg.c b/clang/test/CodeGen/voidptr-vaarg.c
new file mode 100644
index 0000000..d023ddf
--- /dev/null
+++ b/clang/test/CodeGen/voidptr-vaarg.c
@@ -0,0 +1,478 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: webassembly-registered-target
+// RUN: %clang_cc1 -triple wasm32-unknown-unknown -emit-llvm -o - %s | FileCheck %s
+
+// Multiple targets use emitVoidPtrVAArg to lower va_arg instructions in clang
+// PPC is complicated, excluding from this case analysis
+// ForceRightAdjust is false for all non-PPC targets
+// AllowHigherAlign is only false for two Microsoft targets, both of which
+// pass most things by reference.
+//
+// Address emitVoidPtrVAArg(CodeGenFunction &CGF, Address VAListAddr,
+// QualType ValueTy, bool IsIndirect,
+// TypeInfoChars ValueInfo, CharUnits SlotSizeAndAlign,
+// bool AllowHigherAlign, bool ForceRightAdjust =
+// false);
+//
+// Target IsIndirect SlotSize AllowHigher ForceRightAdjust
+// ARC false four true false
+// ARM varies four true false
+// Mips false 4 or 8 true false
+// RISCV varies register true false
+// PPC elided
+// LoongArch varies register true false
+// NVPTX WIP
+// AMDGPU WIP
+// X86_32 false four true false
+// X86_64 MS varies eight false false
+// CSKY false four true false
+// Webassembly varies four true false
+// AArch64 false eight true false
+// AArch64 MS false eight false false
+//
+// Webassembly passes indirectly iff it's an aggregate of multiple values
+// Choosing this as a representative architecture to check IR generation
+// partly because it has a relatively simple variadic calling convention.
+
+// Int, by itself and packed in structs
+// CHECK-LABEL: @raw_int(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[LIST_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: store ptr [[LIST:%.*]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4
+// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARGP_CUR]], align 4
+// CHECK-NEXT: ret i32 [[TMP0]]
+//
+int raw_int(__builtin_va_list list) { return __builtin_va_arg(list, int); }
+
+typedef struct {
+ int x;
+} one_int_t;
+
+// CHECK-LABEL: @one_int(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_ONE_INT_T:%.*]], align 4
+// CHECK-NEXT: [[LIST_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: store ptr [[LIST:%.*]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4
+// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[RETVAL]], ptr align 4 [[ARGP_CUR]], i32 4, i1 false)
+// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_ONE_INT_T]], ptr [[RETVAL]], i32 0, i32 0
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[COERCE_DIVE]], align 4
+// CHECK-NEXT: ret i32 [[TMP0]]
+//
+one_int_t one_int(__builtin_va_list list) {
+ return __builtin_va_arg(list, one_int_t);
+}
+
+typedef struct {
+ int x;
+ int y;
+} two_int_t;
+
+// CHECK-LABEL: @two_int(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[LIST_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: store ptr [[LIST:%.*]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4
+// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGP_CUR]], align 4
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[AGG_RESULT:%.*]], ptr align 4 [[TMP0]], i32 8, i1 false)
+// CHECK-NEXT: ret void
+//
+two_int_t two_int(__builtin_va_list list) {
+ return __builtin_va_arg(list, two_int_t);
+}
+
+// Double, by itself and packed in structs
+// CHECK-LABEL: @raw_double(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[LIST_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: store ptr [[LIST:%.*]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 7
+// CHECK-NEXT: [[ARGP_CUR_ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i32(ptr [[TMP0]], i32 -8)
+// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR_ALIGNED]], i32 8
+// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load double, ptr [[ARGP_CUR_ALIGNED]], align 8
+// CHECK-NEXT: ret double [[TMP1]]
+//
+double raw_double(__builtin_va_list list) {
+ return __builtin_va_arg(list, double);
+}
+
+typedef struct {
+ double x;
+} one_double_t;
+
+// CHECK-LABEL: @one_double(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_ONE_DOUBLE_T:%.*]], align 8
+// CHECK-NEXT: [[LIST_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: store ptr [[LIST:%.*]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 7
+// CHECK-NEXT: [[ARGP_CUR_ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i32(ptr [[TMP0]], i32 -8)
+// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR_ALIGNED]], i32 8
+// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[RETVAL]], ptr align 8 [[ARGP_CUR_ALIGNED]], i32 8, i1 false)
+// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_ONE_DOUBLE_T]], ptr [[RETVAL]], i32 0, i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = load double, ptr [[COERCE_DIVE]], align 8
+// CHECK-NEXT: ret double [[TMP1]]
+//
+one_double_t one_double(__builtin_va_list list) {
+ return __builtin_va_arg(list, one_double_t);
+}
+
+typedef struct {
+ double x;
+ double y;
+} two_double_t;
+
+// CHECK-LABEL: @two_double(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[LIST_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: store ptr [[LIST:%.*]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4
+// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGP_CUR]], align 4
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[AGG_RESULT:%.*]], ptr align 8 [[TMP0]], i32 16, i1 false)
+// CHECK-NEXT: ret void
+//
+two_double_t two_double(__builtin_va_list list) {
+ return __builtin_va_arg(list, two_double_t);
+}
+
+// Scalar smaller than the slot size (C would promote a short to int)
+typedef struct {
+ char x;
+} one_char_t;
+
+// CHECK-LABEL: @one_char(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_ONE_CHAR_T:%.*]], align 1
+// CHECK-NEXT: [[LIST_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: store ptr [[LIST:%.*]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4
+// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[RETVAL]], ptr align 4 [[ARGP_CUR]], i32 1, i1 false)
+// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_ONE_CHAR_T]], ptr [[RETVAL]], i32 0, i32 0
+// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[COERCE_DIVE]], align 1
+// CHECK-NEXT: ret i8 [[TMP0]]
+//
+one_char_t one_char(__builtin_va_list list) {
+ return __builtin_va_arg(list, one_char_t);
+}
+
+typedef struct {
+ short x;
+} one_short_t;
+
+// CHECK-LABEL: @one_short(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_ONE_SHORT_T:%.*]], align 2
+// CHECK-NEXT: [[LIST_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: store ptr [[LIST:%.*]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4
+// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 2 [[RETVAL]], ptr align 4 [[ARGP_CUR]], i32 2, i1 false)
+// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_ONE_SHORT_T]], ptr [[RETVAL]], i32 0, i32 0
+// CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[COERCE_DIVE]], align 2
+// CHECK-NEXT: ret i16 [[TMP0]]
+//
+one_short_t one_short(__builtin_va_list list) {
+ return __builtin_va_arg(list, one_short_t);
+}
+
+// Composite smaller than the slot size
+typedef struct {
+ _Alignas(2) char x;
+ char y;
+} char_pair_t;
+
+// CHECK-LABEL: @char_pair(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[LIST_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: store ptr [[LIST:%.*]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4
+// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGP_CUR]], align 4
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 2 [[AGG_RESULT:%.*]], ptr align 2 [[TMP0]], i32 2, i1 false)
+// CHECK-NEXT: ret void
+//
+char_pair_t char_pair(__builtin_va_list list) {
+ return __builtin_va_arg(list, char_pair_t);
+}
+
+// Empty struct
+typedef struct {
+} empty_t;
+
+// CHECK-LABEL: @empty(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_EMPTY_T:%.*]], align 1
+// CHECK-NEXT: [[LIST_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: store ptr [[LIST:%.*]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 0
+// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[RETVAL]], ptr align 4 [[ARGP_CUR]], i32 0, i1 false)
+// CHECK-NEXT: ret void
+//
+empty_t empty(__builtin_va_list list) {
+ return __builtin_va_arg(list, empty_t);
+}
+
+typedef struct {
+ empty_t x;
+ int y;
+} empty_int_t;
+
+// CHECK-LABEL: @empty_int(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_EMPTY_INT_T:%.*]], align 4
+// CHECK-NEXT: [[LIST_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: store ptr [[LIST:%.*]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4
+// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[RETVAL]], ptr align 4 [[ARGP_CUR]], i32 4, i1 false)
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[RETVAL]], align 4
+// CHECK-NEXT: ret i32 [[TMP0]]
+//
+empty_int_t empty_int(__builtin_va_list list) {
+ return __builtin_va_arg(list, empty_int_t);
+}
+
+typedef struct {
+ int x;
+ empty_t y;
+} int_empty_t;
+
+// CHECK-LABEL: @int_empty(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_INT_EMPTY_T:%.*]], align 4
+// CHECK-NEXT: [[LIST_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: store ptr [[LIST:%.*]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4
+// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[RETVAL]], ptr align 4 [[ARGP_CUR]], i32 4, i1 false)
+// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT_EMPTY_T]], ptr [[RETVAL]], i32 0, i32 0
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[COERCE_DIVE]], align 4
+// CHECK-NEXT: ret i32 [[TMP0]]
+//
+int_empty_t int_empty(__builtin_va_list list) {
+ return __builtin_va_arg(list, int_empty_t);
+}
+
+// Need multiple va_arg instructions to check the postincrement
+// Using types that are passed directly as the indirect handling
+// is independent of the alignment handling in emitVoidPtrDirectVAArg.
+
+// CHECK-LABEL: @multiple_int(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[LIST_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: [[OUT0_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: [[OUT1_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: [[OUT2_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: store ptr [[LIST:%.*]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: store ptr [[OUT0:%.*]], ptr [[OUT0_ADDR]], align 4
+// CHECK-NEXT: store ptr [[OUT1:%.*]], ptr [[OUT1_ADDR]], align 4
+// CHECK-NEXT: store ptr [[OUT2:%.*]], ptr [[OUT2_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4
+// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARGP_CUR]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[OUT0_ADDR]], align 4
+// CHECK-NEXT: store i32 [[TMP0]], ptr [[TMP1]], align 4
+// CHECK-NEXT: [[ARGP_CUR1:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_NEXT2:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR1]], i32 4
+// CHECK-NEXT: store ptr [[ARGP_NEXT2]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARGP_CUR1]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[OUT1_ADDR]], align 4
+// CHECK-NEXT: store i32 [[TMP2]], ptr [[TMP3]], align 4
+// CHECK-NEXT: [[ARGP_CUR3:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_NEXT4:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR3]], i32 4
+// CHECK-NEXT: store ptr [[ARGP_NEXT4]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARGP_CUR3]], align 4
+// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[OUT2_ADDR]], align 4
+// CHECK-NEXT: store i32 [[TMP4]], ptr [[TMP5]], align 4
+// CHECK-NEXT: ret void
+//
+void multiple_int(__builtin_va_list list, int *out0, int *out1, int *out2) {
+ *out0 = __builtin_va_arg(list, int);
+ *out1 = __builtin_va_arg(list, int);
+ *out2 = __builtin_va_arg(list, int);
+}
+
+// Scalars in structs are an easy way of specifying alignment from C
+// CHECK-LABEL: @increasing_alignment(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[LIST_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: [[OUT0_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: [[OUT1_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: [[OUT2_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: [[OUT3_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: store ptr [[LIST:%.*]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: store ptr [[OUT0:%.*]], ptr [[OUT0_ADDR]], align 4
+// CHECK-NEXT: store ptr [[OUT1:%.*]], ptr [[OUT1_ADDR]], align 4
+// CHECK-NEXT: store ptr [[OUT2:%.*]], ptr [[OUT2_ADDR]], align 4
+// CHECK-NEXT: store ptr [[OUT3:%.*]], ptr [[OUT3_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[OUT0_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4
+// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[TMP0]], ptr align 4 [[ARGP_CUR]], i32 1, i1 false)
+// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[OUT1_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_CUR1:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_NEXT2:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR1]], i32 4
+// CHECK-NEXT: store ptr [[ARGP_NEXT2]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 2 [[TMP1]], ptr align 4 [[ARGP_CUR1]], i32 2, i1 false)
+// CHECK-NEXT: [[ARGP_CUR3:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_NEXT4:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR3]], i32 4
+// CHECK-NEXT: store ptr [[ARGP_NEXT4]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARGP_CUR3]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[OUT2_ADDR]], align 4
+// CHECK-NEXT: store i32 [[TMP2]], ptr [[TMP3]], align 4
+// CHECK-NEXT: [[ARGP_CUR5:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR5]], i32 7
+// CHECK-NEXT: [[ARGP_CUR5_ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i32(ptr [[TMP4]], i32 -8)
+// CHECK-NEXT: [[ARGP_NEXT6:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR5_ALIGNED]], i32 8
+// CHECK-NEXT: store ptr [[ARGP_NEXT6]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[TMP5:%.*]] = load double, ptr [[ARGP_CUR5_ALIGNED]], align 8
+// CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[OUT3_ADDR]], align 4
+// CHECK-NEXT: store double [[TMP5]], ptr [[TMP6]], align 8
+// CHECK-NEXT: ret void
+//
+void increasing_alignment(__builtin_va_list list, one_char_t *out0,
+ one_short_t *out1, int *out2, double *out3) {
+ *out0 = __builtin_va_arg(list, one_char_t);
+ *out1 = __builtin_va_arg(list, one_short_t);
+ *out2 = __builtin_va_arg(list, int);
+ *out3 = __builtin_va_arg(list, double);
+}
+
+// CHECK-LABEL: @decreasing_alignment(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[LIST_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: [[OUT0_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: [[OUT1_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: [[OUT2_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: [[OUT3_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: store ptr [[LIST:%.*]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: store ptr [[OUT0:%.*]], ptr [[OUT0_ADDR]], align 4
+// CHECK-NEXT: store ptr [[OUT1:%.*]], ptr [[OUT1_ADDR]], align 4
+// CHECK-NEXT: store ptr [[OUT2:%.*]], ptr [[OUT2_ADDR]], align 4
+// CHECK-NEXT: store ptr [[OUT3:%.*]], ptr [[OUT3_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 7
+// CHECK-NEXT: [[ARGP_CUR_ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i32(ptr [[TMP0]], i32 -8)
+// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR_ALIGNED]], i32 8
+// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load double, ptr [[ARGP_CUR_ALIGNED]], align 8
+// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[OUT0_ADDR]], align 4
+// CHECK-NEXT: store double [[TMP1]], ptr [[TMP2]], align 8
+// CHECK-NEXT: [[ARGP_CUR1:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_NEXT2:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR1]], i32 4
+// CHECK-NEXT: store ptr [[ARGP_NEXT2]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARGP_CUR1]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[OUT1_ADDR]], align 4
+// CHECK-NEXT: store i32 [[TMP3]], ptr [[TMP4]], align 4
+// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[OUT2_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_CUR3:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_NEXT4:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR3]], i32 4
+// CHECK-NEXT: store ptr [[ARGP_NEXT4]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 2 [[TMP5]], ptr align 4 [[ARGP_CUR3]], i32 2, i1 false)
+// CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[OUT3_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_CUR5:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_NEXT6:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR5]], i32 4
+// CHECK-NEXT: store ptr [[ARGP_NEXT6]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[TMP6]], ptr align 4 [[ARGP_CUR5]], i32 1, i1 false)
+// CHECK-NEXT: ret void
+//
+void decreasing_alignment(__builtin_va_list list, double *out0, int *out1,
+ one_short_t *out2, one_char_t *out3) {
+ *out0 = __builtin_va_arg(list, double);
+ *out1 = __builtin_va_arg(list, int);
+ *out2 = __builtin_va_arg(list, one_short_t);
+ *out3 = __builtin_va_arg(list, one_char_t);
+}
+
+// Typical edge cases, none hit special handling in VAArg lowering.
+typedef struct {
+ int x[16];
+ double y[8];
+} large_value_t;
+
+// CHECK-LABEL: @large_value(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[LIST_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: store ptr [[LIST:%.*]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: store ptr [[OUT:%.*]], ptr [[OUT_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[OUT_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4
+// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARGP_CUR]], align 4
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[TMP0]], ptr align 8 [[TMP1]], i32 128, i1 false)
+// CHECK-NEXT: ret void
+//
+void large_value(__builtin_va_list list, large_value_t *out) {
+ *out = __builtin_va_arg(list, large_value_t);
+}
+
+typedef int v128_t __attribute__((__vector_size__(16), __aligned__(16)));
+// CHECK-LABEL: @vector(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[LIST_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: store ptr [[LIST:%.*]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: store ptr [[OUT:%.*]], ptr [[OUT_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 15
+// CHECK-NEXT: [[ARGP_CUR_ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i32(ptr [[TMP0]], i32 -16)
+// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR_ALIGNED]], i32 16
+// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[ARGP_CUR_ALIGNED]], align 16
+// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[OUT_ADDR]], align 4
+// CHECK-NEXT: store <4 x i32> [[TMP1]], ptr [[TMP2]], align 16
+// CHECK-NEXT: ret void
+//
+void vector(__builtin_va_list list, v128_t *out) {
+ *out = __builtin_va_arg(list, v128_t);
+}
+
+typedef struct BF {
+ float not_an_i32[2];
+ int A : 1;
+ char B;
+ int C : 13;
+} BF;
+
+// CHECK-LABEL: @bitfield(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[LIST_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: store ptr [[LIST:%.*]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: store ptr [[OUT:%.*]], ptr [[OUT_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[OUT_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4
+// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARGP_CUR]], align 4
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP0]], ptr align 4 [[TMP1]], i32 12, i1 false)
+// CHECK-NEXT: ret void
+//
+void bitfield(__builtin_va_list list, BF *out) {
+ *out = __builtin_va_arg(list, BF);
+}
diff --git a/clang/test/CodeGenCUDA/cuda-builtin-vars.cu b/clang/test/CodeGenCUDA/cuda-builtin-vars.cu
index ba5e5f1..7880a80 100644
--- a/clang/test/CodeGenCUDA/cuda-builtin-vars.cu
+++ b/clang/test/CodeGenCUDA/cuda-builtin-vars.cu
@@ -6,21 +6,21 @@
__attribute__((global))
void kernel(int *out) {
int i = 0;
- out[i++] = threadIdx.x; // CHECK: call noundef i32 @llvm.nvvm.read.ptx.sreg.tid.x()
- out[i++] = threadIdx.y; // CHECK: call noundef i32 @llvm.nvvm.read.ptx.sreg.tid.y()
- out[i++] = threadIdx.z; // CHECK: call noundef i32 @llvm.nvvm.read.ptx.sreg.tid.z()
+ out[i++] = threadIdx.x; // CHECK: call noundef{{.*}} i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+ out[i++] = threadIdx.y; // CHECK: call noundef{{.*}} i32 @llvm.nvvm.read.ptx.sreg.tid.y()
+ out[i++] = threadIdx.z; // CHECK: call noundef{{.*}} i32 @llvm.nvvm.read.ptx.sreg.tid.z()
- out[i++] = blockIdx.x; // CHECK: call noundef i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
- out[i++] = blockIdx.y; // CHECK: call noundef i32 @llvm.nvvm.read.ptx.sreg.ctaid.y()
- out[i++] = blockIdx.z; // CHECK: call noundef i32 @llvm.nvvm.read.ptx.sreg.ctaid.z()
+ out[i++] = blockIdx.x; // CHECK: call noundef{{.*}} i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
+ out[i++] = blockIdx.y; // CHECK: call noundef{{.*}} i32 @llvm.nvvm.read.ptx.sreg.ctaid.y()
+ out[i++] = blockIdx.z; // CHECK: call noundef{{.*}} i32 @llvm.nvvm.read.ptx.sreg.ctaid.z()
- out[i++] = blockDim.x; // CHECK: call noundef i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
- out[i++] = blockDim.y; // CHECK: call noundef i32 @llvm.nvvm.read.ptx.sreg.ntid.y()
- out[i++] = blockDim.z; // CHECK: call noundef i32 @llvm.nvvm.read.ptx.sreg.ntid.z()
+ out[i++] = blockDim.x; // CHECK: call noundef{{.*}} i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
+ out[i++] = blockDim.y; // CHECK: call noundef{{.*}} i32 @llvm.nvvm.read.ptx.sreg.ntid.y()
+ out[i++] = blockDim.z; // CHECK: call noundef{{.*}} i32 @llvm.nvvm.read.ptx.sreg.ntid.z()
- out[i++] = gridDim.x; // CHECK: call noundef i32 @llvm.nvvm.read.ptx.sreg.nctaid.x()
- out[i++] = gridDim.y; // CHECK: call noundef i32 @llvm.nvvm.read.ptx.sreg.nctaid.y()
- out[i++] = gridDim.z; // CHECK: call noundef i32 @llvm.nvvm.read.ptx.sreg.nctaid.z()
+ out[i++] = gridDim.x; // CHECK: call noundef{{.*}} i32 @llvm.nvvm.read.ptx.sreg.nctaid.x()
+ out[i++] = gridDim.y; // CHECK: call noundef{{.*}} i32 @llvm.nvvm.read.ptx.sreg.nctaid.y()
+ out[i++] = gridDim.z; // CHECK: call noundef{{.*}} i32 @llvm.nvvm.read.ptx.sreg.nctaid.z()
out[i++] = warpSize; // CHECK: store i32 32,
diff --git a/clang/test/CodeGenCXX/inline-then-fold-variadics.cpp b/clang/test/CodeGenCXX/inline-then-fold-variadics.cpp
new file mode 100644
index 0000000..a0673b9
--- /dev/null
+++ b/clang/test/CodeGenCXX/inline-then-fold-variadics.cpp
@@ -0,0 +1,181 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature
+// REQUIRES: webassembly-registered-target
+
+// Simple calls to known variadic functions that are completely elided when
+// optimisations are on This is a functional check that the expand-variadic pass
+// is consistent with clang's va_arg handling
+
+// When expand-variadics is added to the default pipeline, clang -O1 will
+// suffice here -Wno-varargs avoids warning second argument to 'va_start' is not
+// the last named parameter
+
+// RUN: %clang_cc1 %s -triple wasm32-unknown-unknown -Wno-varargs -O1 -emit-llvm -o - | opt - -S --passes='module(expand-variadics,default<O1>)' --expand-variadics-override=optimize -o - | FileCheck %s
+
+#include <stdarg.h>
+#include <stdint.h>
+
+template <typename X, typename Y> static X first(...) {
+ va_list va;
+ __builtin_va_start(va, 0);
+ X r = va_arg(va, X);
+ va_end(va);
+ return r;
+}
+
+template <typename X, typename Y> static Y second(...) {
+ va_list va;
+ __builtin_va_start(va, 0);
+ va_arg(va, X);
+ Y r = va_arg(va, Y);
+ va_end(va);
+ return r;
+}
+
+extern "C" {
+
+// CHECK-LABEL: define {{[^@]+}}@first_pair_i32
+// CHECK-SAME: (i32 noundef returned [[X:%.*]], i32 noundef [[Y:%.*]])
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 [[X]]
+//
+int first_pair_i32(int x, int y) { return first<int, int>(x, y); }
+
+// CHECK-LABEL: define {{[^@]+}}@second_pair_i32
+// CHECK-SAME: (i32 noundef [[X:%.*]], i32 noundef returned [[Y:%.*]])
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 [[Y]]
+//
+int second_pair_i32(int x, int y) { return second<int, int>(x, y); }
+
+// CHECK-LABEL: define {{[^@]+}}@first_pair_f64
+// CHECK-SAME: (double noundef returned [[X:%.*]], double noundef [[Y:%.*]])
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret double [[X]]
+//
+double first_pair_f64(double x, double y) {
+ return first<double, double>(x, y);
+}
+
+// CHECK-LABEL: define {{[^@]+}}@second_pair_f64
+// CHECK-SAME: (double noundef [[X:%.*]], double noundef returned [[Y:%.*]])
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret double [[Y]]
+//
+double second_pair_f64(double x, double y) {
+ return second<double, double>(x, y);
+}
+}
+
+extern "C" {
+
+// CHECK-LABEL: define {{[^@]+}}@first_i32_f64
+// CHECK-SAME: (i32 noundef returned [[X:%.*]], double noundef [[Y:%.*]])
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 [[X]]
+//
+int first_i32_f64(int x, double y) { return first<int, double>(x, y); }
+
+// CHECK-LABEL: define {{[^@]+}}@second_i32_f64
+// CHECK-SAME: (i32 noundef [[X:%.*]], double noundef returned [[Y:%.*]])
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret double [[Y]]
+//
+double second_i32_f64(int x, double y) { return second<int, double>(x, y); }
+
+// CHECK-LABEL: define {{[^@]+}}@first_f64_i32
+// CHECK-SAME: (double noundef returned [[X:%.*]], i32 noundef [[Y:%.*]])
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret double [[X]]
+//
+double first_f64_i32(double x, int y) { return first<double, int>(x, y); }
+
+// CHECK-LABEL: define {{[^@]+}}@second_f64_i32
+// CHECK-SAME: (double noundef [[X:%.*]], i32 noundef returned [[Y:%.*]])
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 [[Y]]
+//
+int second_f64_i32(double x, int y) { return second<double, int>(x, y); }
+}
+
+extern "C" {
+typedef uint64_t ulong2 __attribute__((__vector_size__(16), __aligned__(16)));
+
+// CHECK-LABEL: define {{[^@]+}}@first_i32_ulong2
+// CHECK-SAME: (i32 noundef returned [[X:%.*]], ptr nocapture noundef readonly [[Y:%.*]])
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 [[X]]
+//
+int first_i32_ulong2(int x, ulong2 *y) { return first<int, ulong2>(x, *y); }
+
+// CHECK-LABEL: define {{[^@]+}}@second_i32_ulong2
+// CHECK-SAME: (i32 noundef [[X:%.*]], ptr nocapture noundef readonly [[Y:%.*]], ptr nocapture noundef writeonly [[R:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr [[Y]], align 16, !tbaa [[TBAA2:![0-9]+]]
+// CHECK-NEXT: store <2 x i64> [[TMP0]], ptr [[R]], align 16, !tbaa [[TBAA2]]
+// CHECK-NEXT: ret void
+//
+void second_i32_ulong2(int x, ulong2 *y, ulong2 *r) {
+ *r = second<int, ulong2>(x, *y);
+}
+
+// CHECK-LABEL: define {{[^@]+}}@first_ulong2_i32
+// CHECK-SAME: (ptr nocapture noundef readonly [[X:%.*]], i32 noundef [[Y:%.*]], ptr nocapture noundef writeonly [[R:%.*]]) local_unnamed_addr #[[ATTR1]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr [[X]], align 16, !tbaa [[TBAA2]]
+// CHECK-NEXT: store <2 x i64> [[TMP0]], ptr [[R]], align 16, !tbaa [[TBAA2]]
+// CHECK-NEXT: ret void
+//
+void first_ulong2_i32(ulong2 *x, int y, ulong2 *r) {
+ *r = first<ulong2, int>(*x, y);
+}
+
+// CHECK-LABEL: define {{[^@]+}}@second_ulong2_i32
+// CHECK-SAME: (ptr nocapture noundef readonly [[X:%.*]], i32 noundef returned [[Y:%.*]])
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 [[Y]]
+//
+int second_ulong2_i32(ulong2 *x, int y) { return second<ulong2, int>(*x, y); }
+}
+
+// ascending alignment
+typedef struct {
+ char c;
+ short s;
+ int i;
+ long l;
+ float f;
+ double d;
+} asc;
+
+extern "C" {
+
+// CHECK-LABEL: define {{[^@]+}}@first_i32_asc
+// CHECK-SAME: (i32 noundef returned [[X:%.*]], ptr nocapture noundef readonly [[Y:%.*]])
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 [[X]]
+//
+int first_i32_asc(int x, asc *y) { return first<int, asc>(x, *y); }
+
+// CHECK-LABEL: define {{[^@]+}}@second_i32_asc
+// CHECK-SAME: (i32 noundef [[X:%.*]], ptr nocapture noundef readonly [[Y:%.*]], ptr nocapture noundef writeonly [[R:%.*]]) local_unnamed_addr #[[ATTR1]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: tail call void @llvm.memmove.p0.p0.i32(ptr noundef nonnull align 8 dereferenceable(24) [[R]], ptr noundef nonnull align 1 dereferenceable(24) [[Y]], i32 24, i1 false)
+// CHECK-NEXT: ret void
+//
+void second_i32_asc(int x, asc *y, asc *r) { *r = second<int, asc>(x, *y); }
+
+// CHECK-LABEL: define {{[^@]+}}@first_asc_i32
+// CHECK-SAME: (ptr nocapture noundef readonly [[X:%.*]], i32 noundef [[Y:%.*]], ptr nocapture noundef writeonly [[R:%.*]]) local_unnamed_addr #[[ATTR1]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: tail call void @llvm.memmove.p0.p0.i32(ptr noundef nonnull align 8 dereferenceable(24) [[R]], ptr noundef nonnull align 1 dereferenceable(24) [[X]], i32 24, i1 false)
+// CHECK-NEXT: ret void
+//
+void first_asc_i32(asc *x, int y, asc *r) { *r = first<asc, int>(*x, y); }
+
+// CHECK-LABEL: define {{[^@]+}}@second_asc_i32
+// CHECK-SAME: (ptr nocapture noundef readonly [[X:%.*]], i32 noundef returned [[Y:%.*]])
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 [[Y]]
+//
+int second_asc_i32(asc *x, int y) { return second<asc, int>(*x, y); }
+}
diff --git a/clang/test/CodeGenCXX/pointers-to-data-members.cpp b/clang/test/CodeGenCXX/pointers-to-data-members.cpp
index 29f1c3f..cf1d6c0 100644
--- a/clang/test/CodeGenCXX/pointers-to-data-members.cpp
+++ b/clang/test/CodeGenCXX/pointers-to-data-members.cpp
@@ -1,4 +1,5 @@
// RUN: %clang_cc1 %s -emit-llvm -o %t.ll -triple=x86_64-apple-darwin10
+// RUN: %clang_cc1 %s -emit-llvm -o %t.ll -triple=x86_64-apple-darwin10 -fexperimental-new-constant-interpreter
// RUN: FileCheck %s < %t.ll
// RUN: FileCheck -check-prefix=CHECK-GLOBAL %s < %t.ll
diff --git a/clang/test/CodeGenCXX/template-param-objects-linkage.cpp b/clang/test/CodeGenCXX/template-param-objects-linkage.cpp
index 63e7d8c..9c148ed 100644
--- a/clang/test/CodeGenCXX/template-param-objects-linkage.cpp
+++ b/clang/test/CodeGenCXX/template-param-objects-linkage.cpp
@@ -1,4 +1,5 @@
// RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++20 %s -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++20 %s -emit-llvm -o - -fexperimental-new-constant-interpreter | FileCheck %s
struct S { char buf[32]; };
template<S s> constexpr const char* f() { return s.buf; }
diff --git a/clang/test/CodeGenOpenCL/amdgpu-features.cl b/clang/test/CodeGenOpenCL/amdgpu-features.cl
index 2fda52d..854ab39 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-features.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-features.cl
@@ -49,6 +49,7 @@
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1103 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1103 %s
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1150 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1150 %s
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1151 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1151 %s
+// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1152 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1152 %s
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1200 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1200 %s
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1201 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1201 %s
@@ -100,6 +101,7 @@
// GFX1103: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
// GFX1150: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
// GFX1151: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
+// GFX1152: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
// GFX1200: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot10-insts,+dot11-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
// GFX1201: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot10-insts,+dot11-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl
index d17ff81..6606178 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl
@@ -5,6 +5,7 @@
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1103 -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1150 -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1151 -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1152 -emit-llvm -o - %s | FileCheck %s
typedef unsigned int uint;
typedef unsigned long ulong;
diff --git a/clang/test/Driver/aarch64-oryon-1.c b/clang/test/Driver/aarch64-oryon-1.c
new file mode 100644
index 0000000..952ba5d
--- /dev/null
+++ b/clang/test/Driver/aarch64-oryon-1.c
@@ -0,0 +1,19 @@
+// RUN: %clang -target aarch64 -mcpu=oryon-1 -### -c %s 2>&1 | FileCheck -check-prefix=Phoenix %s
+// RUN: %clang -target aarch64 -mlittle-endian -mcpu=oryon-1 -### -c %s 2>&1 | FileCheck -check-prefix=Phoenix %s
+// RUN: %clang -target aarch64_be -mlittle-endian -mcpu=oryon-1 -### -c %s 2>&1 | FileCheck -check-prefix=Phoenix %s
+// RUN: %clang -target aarch64 -mtune=oryon-1 -### -c %s 2>&1 | FileCheck -check-prefix=Phoenix-TUNE %s
+// RUN: %clang -target aarch64 -mlittle-endian -mtune=oryon-1 -### -c %s 2>&1 | FileCheck -check-prefix=Phoenix-TUNE %s
+// RUN: %clang -target aarch64_be -mlittle-endian -mtune=oryon-1 -### -c %s 2>&1 | FileCheck -check-prefix=Phoenix-TUNE %s
+// Phoenix: "-cc1"{{.*}} "-triple" "aarch64{{(--)?}}"{{.*}} "-target-cpu" "oryon-1" "-target-feature" "+v8.6a"
+// Phoenix-TUNE: "-cc1"{{.*}} "-triple" "aarch64{{(--)?}}"{{.*}} "-target-cpu" "generic"
+
+// RUN: %clang -target arm64 -mcpu=oryon-1 -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-Phoenix %s
+// RUN: %clang -target arm64 -mlittle-endian -mcpu=oryon-1 -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-Phoenix %s
+// RUN: %clang -target arm64 -mtune=oryon-1 -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-Phoenix-TUNE %s
+// RUN: %clang -target arm64 -mlittle-endian -mtune=oryon-1 -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-Phoenix-TUNE %s
+// ARM64-Phoenix: "-cc1"{{.*}} "-triple" "arm64{{.*}}" "-target-cpu" "oryon-1" "-target-feature" "+v8.6a"
+// ARM64-Phoenix-TUNE: "-cc1"{{.*}} "-triple" "arm64{{.*}}" "-target-cpu" "generic"
+
+// RUN: %clang -target aarch64 -mcpu=oryon-1 -mtune=cortex-a53 -### -c %s 2>&1 | FileCheck -check-prefix=MCPU-MTUNE-Phoenix %s
+// RUN: %clang -target aarch64 -mtune=cortex-a53 -mcpu=oryon-1 -### -c %s 2>&1 | FileCheck -check-prefix=MCPU-MTUNE-Phoenix %s
+// MCPU-MTUNE-Phoenix: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "oryon-1"
diff --git a/clang/test/Driver/amdgpu-macros.cl b/clang/test/Driver/amdgpu-macros.cl
index a878a7d..3e4a570 100644
--- a/clang/test/Driver/amdgpu-macros.cl
+++ b/clang/test/Driver/amdgpu-macros.cl
@@ -127,6 +127,7 @@
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx1103 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1103 -DFAMILY=GFX11
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx1150 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1150 -DFAMILY=GFX11
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx1151 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1151 -DFAMILY=GFX11
+// RUN: %clang -E -dM -target amdgcn -mcpu=gfx1152 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1152 -DFAMILY=GFX11
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx1200 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1200 -DFAMILY=GFX12
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx1201 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1201 -DFAMILY=GFX12
diff --git a/clang/test/Driver/amdgpu-mcpu.cl b/clang/test/Driver/amdgpu-mcpu.cl
index 5b6a220..4b0ef92 100644
--- a/clang/test/Driver/amdgpu-mcpu.cl
+++ b/clang/test/Driver/amdgpu-mcpu.cl
@@ -112,6 +112,7 @@
// RUN: %clang -### -target amdgcn -mcpu=gfx1103 %s 2>&1 | FileCheck --check-prefix=GFX1103 %s
// RUN: %clang -### -target amdgcn -mcpu=gfx1150 %s 2>&1 | FileCheck --check-prefix=GFX1150 %s
// RUN: %clang -### -target amdgcn -mcpu=gfx1151 %s 2>&1 | FileCheck --check-prefix=GFX1151 %s
+// RUN: %clang -### -target amdgcn -mcpu=gfx1152 %s 2>&1 | FileCheck --check-prefix=GFX1152 %s
// RUN: %clang -### -target amdgcn -mcpu=gfx1200 %s 2>&1 | FileCheck --check-prefix=GFX1200 %s
// RUN: %clang -### -target amdgcn -mcpu=gfx1201 %s 2>&1 | FileCheck --check-prefix=GFX1201 %s
@@ -164,6 +165,7 @@
// GFX1103: "-target-cpu" "gfx1103"
// GFX1150: "-target-cpu" "gfx1150"
// GFX1151: "-target-cpu" "gfx1151"
+// GFX1152: "-target-cpu" "gfx1152"
// GFX1200: "-target-cpu" "gfx1200"
// GFX1201: "-target-cpu" "gfx1201"
diff --git a/clang/test/Interpreter/pretty-print.c b/clang/test/Interpreter/pretty-print.c
new file mode 100644
index 0000000..f6158ad
--- /dev/null
+++ b/clang/test/Interpreter/pretty-print.c
@@ -0,0 +1,8 @@
+// REQUIRES: host-supports-jit
+// UNSUPPORTED: system-aix
+// RUN: cat %s | clang-repl -Xcc -xc | FileCheck %s
+// RUN: cat %s | clang-repl -Xcc -std=c++11 | FileCheck %s
+
+const char* c_str = "Hello, world!"; c_str
+
+// CHECK: Not implement yet.
diff --git a/clang/test/Misc/target-invalid-cpu-note.c b/clang/test/Misc/target-invalid-cpu-note.c
index bad1374..cb5b675 100644
--- a/clang/test/Misc/target-invalid-cpu-note.c
+++ b/clang/test/Misc/target-invalid-cpu-note.c
@@ -5,11 +5,11 @@
// RUN: not %clang_cc1 -triple arm64--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix AARCH64
// AARCH64: error: unknown target CPU 'not-a-cpu'
-// AARCH64-NEXT: note: valid target CPU values are: cortex-a34, cortex-a35, cortex-a53, cortex-a55, cortex-a510, cortex-a520, cortex-a520ae, cortex-a57, cortex-a65, cortex-a65ae, cortex-a72, cortex-a73, cortex-a75, cortex-a76, cortex-a76ae, cortex-a77, cortex-a78, cortex-a78ae, cortex-a78c, cortex-a710, cortex-a715, cortex-a720, cortex-a720ae, cortex-r82, cortex-r82ae, cortex-x1, cortex-x1c, cortex-x2, cortex-x3, cortex-x4, neoverse-e1, neoverse-n1, neoverse-n2, neoverse-n3, neoverse-512tvb, neoverse-v1, neoverse-v2, neoverse-v3, neoverse-v3ae, cyclone, apple-a7, apple-a8, apple-a9, apple-a10, apple-a11, apple-a12, apple-a13, apple-a14, apple-a15, apple-a16, apple-a17, apple-m1, apple-m2, apple-m3, apple-s4, apple-s5, exynos-m3, exynos-m4, exynos-m5, falkor, saphira, kryo, thunderx2t99, thunderx3t110, thunderx, thunderxt88, thunderxt81, thunderxt83, tsv110, a64fx, carmel, ampere1, ampere1a, ampere1b, cobalt-100, grace{{$}}
+// AARCH64-NEXT: note: valid target CPU values are: cortex-a34, cortex-a35, cortex-a53, cortex-a55, cortex-a510, cortex-a520, cortex-a520ae, cortex-a57, cortex-a65, cortex-a65ae, cortex-a72, cortex-a73, cortex-a75, cortex-a76, cortex-a76ae, cortex-a77, cortex-a78, cortex-a78ae, cortex-a78c, cortex-a710, cortex-a715, cortex-a720, cortex-a720ae, cortex-r82, cortex-r82ae, cortex-x1, cortex-x1c, cortex-x2, cortex-x3, cortex-x4, neoverse-e1, neoverse-n1, neoverse-n2, neoverse-n3, neoverse-512tvb, neoverse-v1, neoverse-v2, neoverse-v3, neoverse-v3ae, cyclone, apple-a7, apple-a8, apple-a9, apple-a10, apple-a11, apple-a12, apple-a13, apple-a14, apple-a15, apple-a16, apple-a17, apple-m1, apple-m2, apple-m3, apple-s4, apple-s5, exynos-m3, exynos-m4, exynos-m5, falkor, saphira, kryo, thunderx2t99, thunderx3t110, thunderx, thunderxt88, thunderxt81, thunderxt83, tsv110, a64fx, carmel, ampere1, ampere1a, ampere1b, oryon-1, cobalt-100, grace{{$}}
// RUN: not %clang_cc1 -triple arm64--- -tune-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix TUNE_AARCH64
// TUNE_AARCH64: error: unknown target CPU 'not-a-cpu'
-// TUNE_AARCH64-NEXT: note: valid target CPU values are: cortex-a34, cortex-a35, cortex-a53, cortex-a55, cortex-a510, cortex-a520, cortex-a520ae, cortex-a57, cortex-a65, cortex-a65ae, cortex-a72, cortex-a73, cortex-a75, cortex-a76, cortex-a76ae, cortex-a77, cortex-a78, cortex-a78ae, cortex-a78c, cortex-a710, cortex-a715, cortex-a720, cortex-a720ae, cortex-r82, cortex-r82ae, cortex-x1, cortex-x1c, cortex-x2, cortex-x3, cortex-x4, neoverse-e1, neoverse-n1, neoverse-n2, neoverse-n3, neoverse-512tvb, neoverse-v1, neoverse-v2, neoverse-v3, neoverse-v3ae, cyclone, apple-a7, apple-a8, apple-a9, apple-a10, apple-a11, apple-a12, apple-a13, apple-a14, apple-a15, apple-a16, apple-a17, apple-m1, apple-m2, apple-m3, apple-s4, apple-s5, exynos-m3, exynos-m4, exynos-m5, falkor, saphira, kryo, thunderx2t99, thunderx3t110, thunderx, thunderxt88, thunderxt81, thunderxt83, tsv110, a64fx, carmel, ampere1, ampere1a, ampere1b, cobalt-100, grace{{$}}
+// TUNE_AARCH64-NEXT: note: valid target CPU values are: cortex-a34, cortex-a35, cortex-a53, cortex-a55, cortex-a510, cortex-a520, cortex-a520ae, cortex-a57, cortex-a65, cortex-a65ae, cortex-a72, cortex-a73, cortex-a75, cortex-a76, cortex-a76ae, cortex-a77, cortex-a78, cortex-a78ae, cortex-a78c, cortex-a710, cortex-a715, cortex-a720, cortex-a720ae, cortex-r82, cortex-r82ae, cortex-x1, cortex-x1c, cortex-x2, cortex-x3, cortex-x4, neoverse-e1, neoverse-n1, neoverse-n2, neoverse-n3, neoverse-512tvb, neoverse-v1, neoverse-v2, neoverse-v3, neoverse-v3ae, cyclone, apple-a7, apple-a8, apple-a9, apple-a10, apple-a11, apple-a12, apple-a13, apple-a14, apple-a15, apple-a16, apple-a17, apple-m1, apple-m2, apple-m3, apple-s4, apple-s5, exynos-m3, exynos-m4, exynos-m5, falkor, saphira, kryo, thunderx2t99, thunderx3t110, thunderx, thunderxt88, thunderxt81, thunderxt83, tsv110, a64fx, carmel, ampere1, ampere1a, ampere1b, oryon-1, cobalt-100, grace{{$}}
// RUN: not %clang_cc1 -triple i386--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix X86
// X86: error: unknown target CPU 'not-a-cpu'
@@ -29,7 +29,7 @@
// RUN: not %clang_cc1 -triple nvptx--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix NVPTX
// NVPTX: error: unknown target CPU 'not-a-cpu'
-// NVPTX-NEXT: note: valid target CPU values are: sm_20, sm_21, sm_30, sm_32, sm_35, sm_37, sm_50, sm_52, sm_53, sm_60, sm_61, sm_62, sm_70, sm_72, sm_75, sm_80, sm_86, sm_87, sm_89, sm_90, sm_90a, gfx600, gfx601, gfx602, gfx700, gfx701, gfx702, gfx703, gfx704, gfx705, gfx801, gfx802, gfx803, gfx805, gfx810, gfx9-generic, gfx900, gfx902, gfx904, gfx906, gfx908, gfx909, gfx90a, gfx90c, gfx940, gfx941, gfx942, gfx10-1-generic, gfx1010, gfx1011, gfx1012, gfx1013, gfx10-3-generic, gfx1030, gfx1031, gfx1032, gfx1033, gfx1034, gfx1035, gfx1036, gfx11-generic, gfx1100, gfx1101, gfx1102, gfx1103, gfx1150, gfx1151, gfx12-generic, gfx1200, gfx1201{{$}}
+// NVPTX-NEXT: note: valid target CPU values are: sm_20, sm_21, sm_30, sm_32, sm_35, sm_37, sm_50, sm_52, sm_53, sm_60, sm_61, sm_62, sm_70, sm_72, sm_75, sm_80, sm_86, sm_87, sm_89, sm_90, sm_90a, gfx600, gfx601, gfx602, gfx700, gfx701, gfx702, gfx703, gfx704, gfx705, gfx801, gfx802, gfx803, gfx805, gfx810, gfx9-generic, gfx900, gfx902, gfx904, gfx906, gfx908, gfx909, gfx90a, gfx90c, gfx940, gfx941, gfx942, gfx10-1-generic, gfx1010, gfx1011, gfx1012, gfx1013, gfx10-3-generic, gfx1030, gfx1031, gfx1032, gfx1033, gfx1034, gfx1035, gfx1036, gfx11-generic, gfx1100, gfx1101, gfx1102, gfx1103, gfx1150, gfx1151, gfx1152, gfx12-generic, gfx1200, gfx1201{{$}}
// RUN: not %clang_cc1 -triple r600--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix R600
// R600: error: unknown target CPU 'not-a-cpu'
@@ -37,7 +37,7 @@
// RUN: not %clang_cc1 -triple amdgcn--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix AMDGCN
// AMDGCN: error: unknown target CPU 'not-a-cpu'
-// AMDGCN-NEXT: note: valid target CPU values are: gfx600, tahiti, gfx601, pitcairn, verde, gfx602, hainan, oland, gfx700, kaveri, gfx701, hawaii, gfx702, gfx703, kabini, mullins, gfx704, bonaire, gfx705, gfx801, carrizo, gfx802, iceland, tonga, gfx803, fiji, polaris10, polaris11, gfx805, tongapro, gfx810, stoney, gfx900, gfx902, gfx904, gfx906, gfx908, gfx909, gfx90a, gfx90c, gfx940, gfx941, gfx942, gfx1010, gfx1011, gfx1012, gfx1013, gfx1030, gfx1031, gfx1032, gfx1033, gfx1034, gfx1035, gfx1036, gfx1100, gfx1101, gfx1102, gfx1103, gfx1150, gfx1151, gfx1200, gfx1201, gfx9-generic, gfx10-1-generic, gfx10-3-generic, gfx11-generic, gfx12-generic{{$}}
+// AMDGCN-NEXT: note: valid target CPU values are: gfx600, tahiti, gfx601, pitcairn, verde, gfx602, hainan, oland, gfx700, kaveri, gfx701, hawaii, gfx702, gfx703, kabini, mullins, gfx704, bonaire, gfx705, gfx801, carrizo, gfx802, iceland, tonga, gfx803, fiji, polaris10, polaris11, gfx805, tongapro, gfx810, stoney, gfx900, gfx902, gfx904, gfx906, gfx908, gfx909, gfx90a, gfx90c, gfx940, gfx941, gfx942, gfx1010, gfx1011, gfx1012, gfx1013, gfx1030, gfx1031, gfx1032, gfx1033, gfx1034, gfx1035, gfx1036, gfx1100, gfx1101, gfx1102, gfx1103, gfx1150, gfx1151, gfx1152, gfx1200, gfx1201, gfx9-generic, gfx10-1-generic, gfx10-3-generic, gfx11-generic, gfx12-generic{{$}}
// RUN: not %clang_cc1 -triple wasm64--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix WEBASM
// WEBASM: error: unknown target CPU 'not-a-cpu'
diff --git a/clang/test/SemaCXX/attr-weak.cpp b/clang/test/SemaCXX/attr-weak.cpp
index f065bfd..0f9a297 100644
--- a/clang/test/SemaCXX/attr-weak.cpp
+++ b/clang/test/SemaCXX/attr-weak.cpp
@@ -1,4 +1,5 @@
// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fsyntax-only -verify -std=c++11 %s
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fsyntax-only -verify -std=c++11 %s -fexperimental-new-constant-interpreter
static int test0 __attribute__((weak)); // expected-error {{weak declaration cannot have internal linkage}}
static void test1() __attribute__((weak)); // expected-error {{weak declaration cannot have internal linkage}}
diff --git a/clang/test/SemaCXX/builtin-is-bitwise-cloneable-fsanitize.cpp b/clang/test/SemaCXX/builtin-is-bitwise-cloneable-fsanitize.cpp
new file mode 100644
index 0000000..d47a39a
--- /dev/null
+++ b/clang/test/SemaCXX/builtin-is-bitwise-cloneable-fsanitize.cpp
@@ -0,0 +1,34 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux -DSANITIZER_ENABLED -fsanitize=address -fsanitize-address-field-padding=1 %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux %s
+
+struct S {
+ ~S() {}
+ virtual void foo() {}
+
+ int buffer[1];
+ int other_field = 0;
+};
+
+union U {
+ S s;
+};
+
+struct Derived : S {};
+
+static_assert(!__is_trivially_copyable(S));
+#ifdef SANITIZER_ENABLED
+// Don't allow memcpy when the struct has poisoned padding bits.
+// The sanitizer adds posion padding bits to struct S.
+static_assert(sizeof(S) > 16);
+static_assert(!__is_bitwise_cloneable(S));
+static_assert(sizeof(U) == sizeof(S)); // no padding bit for U.
+static_assert(!__is_bitwise_cloneable(U));
+static_assert(!__is_bitwise_cloneable(S[2]));
+static_assert(!__is_bitwise_cloneable(Derived));
+#else
+static_assert(sizeof(S) == 16);
+static_assert(__is_bitwise_cloneable(S));
+static_assert(__is_bitwise_cloneable(U));
+static_assert(__is_bitwise_cloneable(S[2]));
+static_assert(__is_bitwise_cloneable(Derived));
+#endif
diff --git a/clang/test/SemaCXX/builtin-is-bitwise-cloneable.cpp b/clang/test/SemaCXX/builtin-is-bitwise-cloneable.cpp
new file mode 100644
index 0000000..1781cf4
--- /dev/null
+++ b/clang/test/SemaCXX/builtin-is-bitwise-cloneable.cpp
@@ -0,0 +1,8 @@
+// RUN: %clang_cc1 -fsyntax-only -verify %s
+//
+struct DynamicClass { virtual int Foo(); };
+static_assert(!__is_trivially_copyable(DynamicClass));
+static_assert(__is_bitwise_cloneable(DynamicClass));
+
+struct InComplete; // expected-note{{forward declaration}}
+static_assert(!__is_bitwise_cloneable(InComplete)); // expected-error{{incomplete type 'InComplete' used in type trait expression}}
diff --git a/clang/test/SemaCXX/constexpr-default-arg.cpp b/clang/test/SemaCXX/constexpr-default-arg.cpp
index 901123b..ec9b292 100644
--- a/clang/test/SemaCXX/constexpr-default-arg.cpp
+++ b/clang/test/SemaCXX/constexpr-default-arg.cpp
@@ -32,8 +32,8 @@ void test_default_arg2() {
}
// Check that multiple CXXDefaultInitExprs don't cause an assertion failure.
-struct A { int &&r = 0; };
+struct A { int &&r = 0; }; // expected-note 2{{default member initializer}}
struct B { A x, y; };
-B b = {}; // expected-no-diagnostics
+B b = {}; // expected-warning 2{{lifetime extension of temporary created by aggregate initialization using a default member initializer is not yet supported}}
}
diff --git a/clang/test/SemaCXX/cxx11-default-member-initializers.cpp b/clang/test/SemaCXX/cxx11-default-member-initializers.cpp
index 1ea8b98..dd8e9c6 100644
--- a/clang/test/SemaCXX/cxx11-default-member-initializers.cpp
+++ b/clang/test/SemaCXX/cxx11-default-member-initializers.cpp
@@ -27,80 +27,6 @@ class MemInit {
C m = s;
};
-namespace std {
-typedef decltype(sizeof(int)) size_t;
-
-// libc++'s implementation
-template <class _E> class initializer_list {
- const _E *__begin_;
- size_t __size_;
-
- initializer_list(const _E *__b, size_t __s) : __begin_(__b), __size_(__s) {}
-
-public:
- typedef _E value_type;
- typedef const _E &reference;
- typedef const _E &const_reference;
- typedef size_t size_type;
-
- typedef const _E *iterator;
- typedef const _E *const_iterator;
-
- initializer_list() : __begin_(nullptr), __size_(0) {}
-
- size_t size() const { return __size_; }
- const _E *begin() const { return __begin_; }
- const _E *end() const { return __begin_ + __size_; }
-};
-} // namespace std
-
-#if __cplusplus >= 201703L
-namespace test_rebuild {
-template <typename T, int> class C {
-public:
- C(std::initializer_list<T>);
-};
-
-template <typename T> using Ptr = __remove_pointer(T) *;
-template <typename T> C(T) -> C<Ptr<T>, sizeof(T)>;
-
-class A {
-public:
- template <typename T1, typename T2> T1 *some_func(T2 &&);
-};
-
-struct B : A {
- // Test CXXDefaultInitExpr rebuild issue in
- // https://github.com/llvm/llvm-project/pull/87933
- int *ar = some_func<int>(C{some_func<int>(0)});
- B() {}
-};
-
-int TestBody_got;
-template <int> class Vector {
-public:
- Vector(std::initializer_list<int>);
-};
-template <typename... Ts> Vector(Ts...) -> Vector<sizeof...(Ts)>;
-class ProgramBuilder {
-public:
- template <typename T, typename ARGS> int *create(ARGS);
-};
-
-struct TypeTest : ProgramBuilder {
- int *str_f16 = create<int>(Vector{0});
- TypeTest() {}
-};
-class TypeTest_Element_Test : TypeTest {
- void TestBody();
-};
-void TypeTest_Element_Test::TestBody() {
- int *expect = str_f16;
- &TestBody_got != expect; // expected-warning {{inequality comparison result unused}}
-}
-} // namespace test_rebuild
-#endif // __cplusplus >= 201703L
-
#if __cplusplus >= 202002L
// This test ensures cleanup expressions are correctly produced
// in the presence of default member initializers.
diff --git a/clang/test/SemaCXX/eval-crashes.cpp b/clang/test/SemaCXX/eval-crashes.cpp
index a06f60f..017df97 100644
--- a/clang/test/SemaCXX/eval-crashes.cpp
+++ b/clang/test/SemaCXX/eval-crashes.cpp
@@ -25,9 +25,11 @@ namespace pr33140_0b {
}
namespace pr33140_2 {
- struct A { int &&r = 0; };
+ // FIXME: The declaration of 'b' below should lifetime-extend two int
+ // temporaries.
+ struct A { int &&r = 0; }; // expected-note 2{{initializing field 'r' with default member initializer}}
struct B { A x, y; };
- B b = {};
+ B b = {}; // expected-warning 2{{lifetime extension of temporary created by aggregate initialization using a default member initializer is not yet supported}}
}
namespace pr33140_3 {
diff --git a/clang/test/SemaCXX/nullptr_in_arithmetic_ops.cpp b/clang/test/SemaCXX/nullptr_in_arithmetic_ops.cpp
index 6273d9c..98bec18 100644
--- a/clang/test/SemaCXX/nullptr_in_arithmetic_ops.cpp
+++ b/clang/test/SemaCXX/nullptr_in_arithmetic_ops.cpp
@@ -1,4 +1,5 @@
// RUN: %clang_cc1 -fsyntax-only -Wno-tautological-pointer-compare -fblocks -std=c++11 -verify %s
+// RUN: %clang_cc1 -fsyntax-only -Wno-tautological-pointer-compare -fblocks -std=c++11 -verify %s -fexperimental-new-constant-interpreter
void foo() {
int a;
diff --git a/clang/test/SemaObjCXX/arc-type-traits.mm b/clang/test/SemaObjCXX/arc-type-traits.mm
index 2d30ae4..25bc8b36 100644
--- a/clang/test/SemaObjCXX/arc-type-traits.mm
+++ b/clang/test/SemaObjCXX/arc-type-traits.mm
@@ -221,3 +221,12 @@ TRAIT_IS_TRUE(__is_trivially_relocatable, __unsafe_unretained id);
TRAIT_IS_TRUE(__is_trivially_relocatable, HasStrong);
TRAIT_IS_FALSE(__is_trivially_relocatable, HasWeak);
TRAIT_IS_TRUE(__is_trivially_relocatable, HasUnsafeUnretained);
+
+// __is_bitwise_cloneable
+TRAIT_IS_FALSE(__is_bitwise_cloneable, __strong id);
+TRAIT_IS_FALSE(__is_bitwise_cloneable, __weak id);
+TRAIT_IS_FALSE(__is_bitwise_cloneable, __autoreleasing id);
+TRAIT_IS_TRUE(__is_trivial, __unsafe_unretained id);
+TRAIT_IS_FALSE(__is_bitwise_cloneable, HasStrong);
+TRAIT_IS_FALSE(__is_bitwise_cloneable, HasWeak);
+TRAIT_IS_TRUE(__is_bitwise_cloneable, HasUnsafeUnretained);
diff --git a/clang/test/SemaOpenACC/loop-construct-auto_seq_independent-clauses.c b/clang/test/SemaOpenACC/loop-construct-auto_seq_independent-clauses.c
index 23f852e..ac61976 100644
--- a/clang/test/SemaOpenACC/loop-construct-auto_seq_independent-clauses.c
+++ b/clang/test/SemaOpenACC/loop-construct-auto_seq_independent-clauses.c
@@ -106,7 +106,6 @@ void uses() {
// expected-error@+1{{OpenACC 'present' clause is not valid on 'loop' directive}}
#pragma acc loop auto present(Var)
for(;;);
- // expected-warning@+1{{OpenACC clause 'private' not yet implemented}}
#pragma acc loop auto private(Var)
for(;;);
// expected-error@+1{{OpenACC 'copyout' clause is not valid on 'loop' directive}}
@@ -246,7 +245,6 @@ void uses() {
// expected-error@+1{{OpenACC 'present' clause is not valid on 'loop' directive}}
#pragma acc loop present(Var) auto
for(;;);
- // expected-warning@+1{{OpenACC clause 'private' not yet implemented}}
#pragma acc loop private(Var) auto
for(;;);
// expected-error@+1{{OpenACC 'copyout' clause is not valid on 'loop' directive}}
@@ -387,7 +385,6 @@ void uses() {
// expected-error@+1{{OpenACC 'present' clause is not valid on 'loop' directive}}
#pragma acc loop independent present(Var)
for(;;);
- // expected-warning@+1{{OpenACC clause 'private' not yet implemented}}
#pragma acc loop independent private(Var)
for(;;);
// expected-error@+1{{OpenACC 'copyout' clause is not valid on 'loop' directive}}
@@ -527,7 +524,6 @@ void uses() {
// expected-error@+1{{OpenACC 'present' clause is not valid on 'loop' directive}}
#pragma acc loop present(Var) independent
for(;;);
- // expected-warning@+1{{OpenACC clause 'private' not yet implemented}}
#pragma acc loop private(Var) independent
for(;;);
// expected-error@+1{{OpenACC 'copyout' clause is not valid on 'loop' directive}}
@@ -677,7 +673,6 @@ void uses() {
// expected-error@+1{{OpenACC 'present' clause is not valid on 'loop' directive}}
#pragma acc loop seq present(Var)
for(;;);
- // expected-warning@+1{{OpenACC clause 'private' not yet implemented}}
#pragma acc loop seq private(Var)
for(;;);
// expected-error@+1{{OpenACC 'copyout' clause is not valid on 'loop' directive}}
@@ -826,7 +821,6 @@ void uses() {
// expected-error@+1{{OpenACC 'present' clause is not valid on 'loop' directive}}
#pragma acc loop present(Var) seq
for(;;);
- // expected-warning@+1{{OpenACC clause 'private' not yet implemented}}
#pragma acc loop private(Var) seq
for(;;);
// expected-error@+1{{OpenACC 'copyout' clause is not valid on 'loop' directive}}
diff --git a/clang/test/SemaOpenACC/loop-construct-private-clause.c b/clang/test/SemaOpenACC/loop-construct-private-clause.c
new file mode 100644
index 0000000..f3ffdfb
--- /dev/null
+++ b/clang/test/SemaOpenACC/loop-construct-private-clause.c
@@ -0,0 +1,132 @@
+// RUN: %clang_cc1 %s -fopenacc -verify
+
+struct Incomplete;
+enum SomeE{ A };
+typedef struct IsComplete {
+ struct S { int A; } CompositeMember;
+ int ScalarMember;
+ float ArrayMember[5];
+ enum SomeE EnumMember;
+ void *PointerMember;
+} Complete;
+
+int GlobalInt;
+float GlobalArray[5];
+short *GlobalPointer;
+Complete GlobalComposite;
+
+void uses(int IntParam, short *PointerParam, float ArrayParam[5], Complete CompositeParam) {
+ int LocalInt;
+ short *LocalPointer;
+ float LocalArray[5];
+ Complete LocalComposite;
+
+ // Check Appertainment:
+#pragma acc loop private(LocalInt)
+ for(;;);
+
+ // Valid cases:
+#pragma acc loop private(LocalInt, LocalPointer, LocalArray)
+ for(;;);
+#pragma acc loop private(LocalArray)
+ for(;;);
+#pragma acc loop private(LocalArray[:])
+ for(;;);
+#pragma acc loop private(LocalArray[:5])
+ for(;;);
+#pragma acc loop private(LocalArray[2:])
+ for(;;);
+#pragma acc loop private(LocalArray[2:1])
+ for(;;);
+#pragma acc loop private(LocalArray[2])
+ for(;;);
+#pragma acc loop private(LocalComposite)
+ for(;;);
+#pragma acc loop private(LocalComposite.EnumMember)
+ for(;;);
+#pragma acc loop private(LocalComposite.ScalarMember)
+ for(;;);
+#pragma acc loop private(LocalComposite.ArrayMember)
+ for(;;);
+#pragma acc loop private(LocalComposite.ArrayMember[5])
+ for(;;);
+#pragma acc loop private(LocalComposite.PointerMember)
+ for(;;);
+#pragma acc loop private(GlobalInt, GlobalArray, GlobalPointer, GlobalComposite)
+ for(;;);
+#pragma acc loop private(GlobalArray[2], GlobalPointer[2], GlobalComposite.CompositeMember.A)
+ for(;;);
+#pragma acc loop private(LocalComposite, GlobalComposite)
+ for(;;);
+#pragma acc loop private(IntParam, PointerParam, ArrayParam, CompositeParam)
+ for(;;);
+#pragma acc loop private(PointerParam[IntParam], ArrayParam[IntParam], CompositeParam.CompositeMember.A)
+ for(;;);
+
+#pragma acc loop private(LocalArray) private(LocalArray[2])
+ for(;;);
+
+#pragma acc loop private(LocalArray, LocalArray[2])
+ for(;;);
+
+#pragma acc loop private(LocalComposite, LocalComposite.ScalarMember)
+ for(;;);
+
+#pragma acc loop private(LocalComposite.CompositeMember.A, LocalComposite.ScalarMember)
+ for(;;);
+
+#pragma acc loop private(LocalComposite.CompositeMember.A) private(LocalComposite.ScalarMember)
+ for(;;);
+
+ Complete LocalComposite2;
+#pragma acc loop private(LocalComposite2.ScalarMember, LocalComposite2.ScalarMember)
+ for(;;);
+
+ // Invalid cases, arbitrary expressions.
+ struct Incomplete *I;
+ // expected-error@+1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
+#pragma acc loop private(*I)
+ for(;;);
+ // expected-error@+1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
+#pragma acc loop private(GlobalInt + IntParam)
+ for(;;);
+ // expected-error@+1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
+#pragma acc loop private(+GlobalInt)
+ for(;;);
+
+ // expected-error@+1{{OpenACC sub-array length is unspecified and cannot be inferred because the subscripted value is not an array}}
+#pragma acc loop private(PointerParam[:])
+ for(;;);
+#pragma acc loop private(PointerParam[:5])
+ for(;;);
+#pragma acc loop private(PointerParam[:IntParam])
+ for(;;);
+ // expected-error@+1{{OpenACC sub-array length is unspecified and cannot be inferred because the subscripted value is not an array}}
+#pragma acc loop private(PointerParam[2:])
+ for(;;);
+#pragma acc loop private(PointerParam[2:5])
+ for(;;);
+#pragma acc loop private(PointerParam[2])
+ for(;;);
+#pragma acc loop private(ArrayParam[:])
+ for(;;);
+#pragma acc loop private(ArrayParam[:5])
+ for(;;);
+#pragma acc loop private(ArrayParam[:IntParam])
+ for(;;);
+#pragma acc loop private(ArrayParam[2:])
+ for(;;);
+ // expected-error@+1{{OpenACC sub-array specified range [2:5] would be out of the range of the subscripted array size of 5}}
+#pragma acc loop private(ArrayParam[2:5])
+ for(;;);
+#pragma acc loop private(ArrayParam[2])
+ for(;;);
+
+ // expected-error@+2{{OpenACC sub-array specified range [2:5] would be out of the range of the subscripted array size of 5}}
+ // expected-error@+1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
+#pragma acc loop private((float*)ArrayParam[2:5])
+ for(;;);
+ // expected-error@+1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
+#pragma acc loop private((float)ArrayParam[2])
+ for(;;);
+}
diff --git a/clang/test/SemaOpenACC/loop-construct-private-clause.cpp b/clang/test/SemaOpenACC/loop-construct-private-clause.cpp
new file mode 100644
index 0000000..b5d3fc9
--- /dev/null
+++ b/clang/test/SemaOpenACC/loop-construct-private-clause.cpp
@@ -0,0 +1,155 @@
+// RUN: %clang_cc1 %s -fopenacc -verify
+
+struct Incomplete;
+enum SomeE{};
+typedef struct IsComplete {
+ struct S { int A; } CompositeMember;
+ int ScalarMember;
+ float ArrayMember[5];
+ SomeE EnumMember;
+ char *PointerMember;
+} Complete;
+
+int GlobalInt;
+float GlobalArray[5];
+char *GlobalPointer;
+Complete GlobalComposite;
+
+void uses(int IntParam, char *PointerParam, float ArrayParam[5], Complete CompositeParam, int &IntParamRef) {
+ int LocalInt;
+ char *LocalPointer;
+ float LocalArray[5];
+ Complete LocalComposite;
+
+ // Check Appertainment:
+
+#pragma acc loop private(LocalInt)
+ for(;;);
+
+ // Valid cases:
+#pragma acc loop private(LocalInt, LocalPointer, LocalArray)
+ for(;;);
+#pragma acc loop private(LocalArray)
+ for(;;);
+#pragma acc loop private(LocalArray[2])
+ for(;;);
+#pragma acc loop private(LocalComposite)
+ for(;;);
+#pragma acc loop private(LocalComposite.EnumMember)
+ for(;;);
+#pragma acc loop private(LocalComposite.ScalarMember)
+ for(;;);
+#pragma acc loop private(LocalComposite.ArrayMember)
+ for(;;);
+#pragma acc loop private(LocalComposite.ArrayMember[5])
+ for(;;);
+#pragma acc loop private(LocalComposite.PointerMember)
+ for(;;);
+#pragma acc loop private(GlobalInt, GlobalArray, GlobalPointer, GlobalComposite)
+ for(;;);
+#pragma acc loop private(GlobalArray[2], GlobalPointer[2], GlobalComposite.CompositeMember.A)
+ for(;;);
+#pragma acc loop private(LocalComposite, GlobalComposite)
+ for(;;);
+#pragma acc loop private(IntParam, PointerParam, ArrayParam, CompositeParam) private(IntParamRef)
+ for(;;);
+#pragma acc loop private(PointerParam[IntParam], ArrayParam[IntParam], CompositeParam.CompositeMember.A)
+ for(;;);
+
+
+ // Invalid cases, arbitrary expressions.
+ Incomplete *I;
+ // expected-error@+1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
+#pragma acc loop private(*I)
+ for(;;);
+ // expected-error@+1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
+#pragma acc loop private(GlobalInt + IntParam)
+ for(;;);
+ // expected-error@+1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
+#pragma acc loop private(+GlobalInt)
+ for(;;);
+}
+
+template<typename T, unsigned I, typename V>
+void TemplUses(T t, T (&arrayT)[I], V TemplComp) {
+ // expected-error@+1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
+#pragma acc loop private(+t)
+ for(;;);
+
+ // expected-error@+1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
+#pragma acc loop private(+I)
+ for(;;);
+
+ // NTTP's are only valid if it is a reference to something.
+ // expected-error@+2{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
+ // expected-note@#TEMPL_USES_INST{{in instantiation of}}
+#pragma acc loop private(I)
+ for(;;);
+
+ // expected-error@+1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
+#pragma acc loop private(t, I)
+ for(;;);
+
+#pragma acc loop private(arrayT)
+ for(;;);
+
+#pragma acc loop private(TemplComp)
+ for(;;);
+
+#pragma acc loop private(TemplComp.PointerMember[5])
+ for(;;);
+
+#pragma acc loop private(TemplComp.PointerMember[5]) private(TemplComp)
+ for(;;);
+
+ int *Pointer;
+#pragma acc loop private(Pointer[:I])
+ for(;;);
+#pragma acc loop private(Pointer[:t])
+ for(;;);
+ // expected-error@+1{{OpenACC sub-array length is unspecified and cannot be inferred because the subscripted value is not an array}}
+#pragma acc loop private(Pointer[1:])
+ for(;;);
+}
+
+template<unsigned I, auto &NTTP_REF>
+void NTTP() {
+ // NTTP's are only valid if it is a reference to something.
+ // expected-error@+2{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
+ // expected-note@#NTTP_INST{{in instantiation of}}
+#pragma acc loop private(I)
+ for(;;);
+
+#pragma acc loop private(NTTP_REF)
+ for(;;);
+}
+
+struct S {
+ int ThisMember;
+ int ThisMemberArray[5];
+
+ void foo();
+};
+
+void S::foo() {
+#pragma acc loop private(ThisMember, this->ThisMemberArray[1])
+ for(;;);
+
+#pragma acc loop private(ThisMemberArray[1:2])
+ for(;;);
+
+#pragma acc loop private(this)
+ for(;;);
+
+#pragma acc loop private(ThisMember, this->ThisMember)
+ for(;;);
+}
+
+void Inst() {
+ static constexpr int NTTP_REFed = 1;
+ int i;
+ int Arr[5];
+ Complete C;
+ TemplUses(i, Arr, C); // #TEMPL_USES_INST
+ NTTP<5, NTTP_REFed>(); // #NTTP_INST
+}
diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-gfx940-err.cl b/clang/test/SemaOpenCL/builtins-amdgcn-gfx940-err.cl
index 487cc53..2a1ba43 100644
--- a/clang/test/SemaOpenCL/builtins-amdgcn-gfx940-err.cl
+++ b/clang/test/SemaOpenCL/builtins-amdgcn-gfx940-err.cl
@@ -3,8 +3,10 @@
typedef unsigned int u32;
-void test_global_load_lds_unsupported_size(global u32* src, local u32 *dst, u32 size) {
- __builtin_amdgcn_global_load_lds(src, dst, size, /*offset=*/0, /*aux=*/0); // expected-error{{expression is not an integer constant expression}}
+void test_global_load_lds_unsupported_size(global u32* src, local u32 *dst, u32 size, u32 offset, u32 aux) {
+ __builtin_amdgcn_global_load_lds(src, dst, size, /*offset=*/0, /*aux=*/0); // expected-error{{argument to '__builtin_amdgcn_global_load_lds' must be a constant integer}}
+ __builtin_amdgcn_global_load_lds(src, dst, /*size=*/4, offset, /*aux=*/0); // expected-error{{argument to '__builtin_amdgcn_global_load_lds' must be a constant integer}}
+ __builtin_amdgcn_global_load_lds(src, dst, /*size=*/4, /*offset=*/0, aux); // expected-error{{argument to '__builtin_amdgcn_global_load_lds' must be a constant integer}}
__builtin_amdgcn_global_load_lds(src, dst, /*size=*/5, /*offset=*/0, /*aux=*/0); // expected-error{{invalid size value}} expected-note {{size must be 1, 2, or 4}}
__builtin_amdgcn_global_load_lds(src, dst, /*size=*/0, /*offset=*/0, /*aux=*/0); // expected-error{{invalid size value}} expected-note {{size must be 1, 2, or 4}}
__builtin_amdgcn_global_load_lds(src, dst, /*size=*/3, /*offset=*/0, /*aux=*/0); // expected-error{{invalid size value}} expected-note {{size must be 1, 2, or 4}}
diff --git a/clang/unittests/AST/Interp/toAPValue.cpp b/clang/unittests/AST/Interp/toAPValue.cpp
index e56453a..d6879d6 100644
--- a/clang/unittests/AST/Interp/toAPValue.cpp
+++ b/clang/unittests/AST/Interp/toAPValue.cpp
@@ -186,3 +186,49 @@ TEST(ToAPValue, FunctionPointersC) {
ASSERT_EQ(I, 17);
}
}
+
+TEST(ToAPValue, MemberPointers) {
+ constexpr char Code[] = "struct S {\n"
+ " int m, n;\n"
+ "};\n"
+ "constexpr int S::*pm = &S::m;\n"
+ "constexpr int S::*nn = nullptr;\n";
+
+ auto AST = tooling::buildASTFromCodeWithArgs(
+ Code, {"-fexperimental-new-constant-interpreter"});
+
+ auto &Ctx = AST->getASTContext().getInterpContext();
+ Program &Prog = Ctx.getProgram();
+
+ auto getDecl = [&](const char *Name) -> const ValueDecl * {
+ auto Nodes =
+ match(valueDecl(hasName(Name)).bind("var"), AST->getASTContext());
+ assert(Nodes.size() == 1);
+ const auto *D = Nodes[0].getNodeAs<ValueDecl>("var");
+ assert(D);
+ return D;
+ };
+
+ auto getGlobalPtr = [&](const char *Name) -> Pointer {
+ const VarDecl *D = cast<VarDecl>(getDecl(Name));
+ return Prog.getPtrGlobal(*Prog.getGlobal(D));
+ };
+
+ {
+ const Pointer &GP = getGlobalPtr("pm");
+ ASSERT_TRUE(GP.isLive());
+ const MemberPointer &FP = GP.deref<MemberPointer>();
+ APValue A = FP.toAPValue();
+ ASSERT_EQ(A.getMemberPointerDecl(), getDecl("m"));
+ ASSERT_EQ(A.getKind(), APValue::MemberPointer);
+ }
+
+ {
+ const Pointer &GP = getGlobalPtr("nn");
+ ASSERT_TRUE(GP.isLive());
+ const MemberPointer &NP = GP.deref<MemberPointer>();
+ ASSERT_TRUE(NP.isZero());
+ APValue A = NP.toAPValue();
+ ASSERT_EQ(A.getKind(), APValue::MemberPointer);
+ }
+}
diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp
index 004ecb6..4e42726 100644
--- a/clang/unittests/Format/FormatTest.cpp
+++ b/clang/unittests/Format/FormatTest.cpp
@@ -24879,7 +24879,7 @@ TEST_F(FormatTest, SkipMacroDefinitionBody) {
Style);
// With comments.
- verifyFormat("/* */ #define A a // a a", "/* */ # define A a // a a",
+ verifyFormat("/* */ #define A a // a a", "/* */ # define A a // a a",
Style);
verifyNoChange("/* */ #define A a // a a", Style);
@@ -24891,6 +24891,15 @@ TEST_F(FormatTest, SkipMacroDefinitionBody) {
"int aaa; // a",
Style);
+ verifyNoChange(
+ "#define MACRO_WITH_COMMENTS() \\\n"
+ " public: \\\n"
+ " /* Documentation parsed by Doxygen for the following method. */ \\\n"
+ " static MyType getClassTypeId(); \\\n"
+ " /** Normal comment for the following method. */ \\\n"
+ " virtual MyType getTypeId() const;",
+ Style);
+
// multiline macro definitions
verifyNoChange("#define A a\\\n"
" A a \\\n "
diff --git a/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp b/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp
index 044c3d6..59fef9e 100644
--- a/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp
+++ b/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp
@@ -7,7 +7,6 @@
//===----------------------------------------------------------------------===//
#include "clang/Lex/DependencyDirectivesScanner.h"
-#include "clang/Basic/TokenKinds.h"
#include "llvm/ADT/SmallString.h"
#include "gtest/gtest.h"
@@ -18,11 +17,11 @@ using namespace clang::dependency_directives_scan;
static bool minimizeSourceToDependencyDirectives(
StringRef Input, SmallVectorImpl<char> &Out,
SmallVectorImpl<dependency_directives_scan::Token> &Tokens,
- SmallVectorImpl<Directive> &Directives, const LangOptions &LangOpts) {
+ SmallVectorImpl<Directive> &Directives) {
Out.clear();
Tokens.clear();
Directives.clear();
- if (scanSourceForDependencyDirectives(Input, Tokens, Directives, LangOpts))
+ if (scanSourceForDependencyDirectives(Input, Tokens, Directives))
return true;
raw_svector_ostream OS(Out);
@@ -39,9 +38,7 @@ static bool minimizeSourceToDependencyDirectives(StringRef Input,
SmallVectorImpl<char> &Out) {
SmallVector<dependency_directives_scan::Token, 16> Tokens;
SmallVector<Directive, 32> Directives;
- LangOptions LangOpts;
- return minimizeSourceToDependencyDirectives(Input, Out, Tokens, Directives,
- LangOpts);
+ return minimizeSourceToDependencyDirectives(Input, Out, Tokens, Directives);
}
namespace {
@@ -50,17 +47,16 @@ TEST(MinimizeSourceToDependencyDirectivesTest, Empty) {
SmallVector<char, 128> Out;
SmallVector<dependency_directives_scan::Token, 4> Tokens;
SmallVector<Directive, 4> Directives;
- LangOptions LangOpts;
- ASSERT_FALSE(minimizeSourceToDependencyDirectives("", Out, Tokens, Directives,
- LangOpts));
+ ASSERT_FALSE(
+ minimizeSourceToDependencyDirectives("", Out, Tokens, Directives));
EXPECT_TRUE(Out.empty());
EXPECT_TRUE(Tokens.empty());
ASSERT_EQ(1u, Directives.size());
ASSERT_EQ(pp_eof, Directives.back().Kind);
ASSERT_FALSE(minimizeSourceToDependencyDirectives("abc def\nxyz", Out, Tokens,
- Directives, LangOpts));
+ Directives));
EXPECT_STREQ("<TokBeforeEOF>\n", Out.data());
EXPECT_TRUE(Tokens.empty());
ASSERT_EQ(2u, Directives.size());
@@ -72,7 +68,6 @@ TEST(MinimizeSourceToDependencyDirectivesTest, AllTokens) {
SmallVector<char, 128> Out;
SmallVector<dependency_directives_scan::Token, 4> Tokens;
SmallVector<Directive, 4> Directives;
- LangOptions LangOpts;
ASSERT_FALSE(
minimizeSourceToDependencyDirectives("#define A\n"
@@ -97,7 +92,7 @@ TEST(MinimizeSourceToDependencyDirectivesTest, AllTokens) {
"export module m;\n"
"import m;\n"
"#pragma clang system_header\n",
- Out, Tokens, Directives, LangOpts));
+ Out, Tokens, Directives));
EXPECT_EQ(pp_define, Directives[0].Kind);
EXPECT_EQ(pp_undef, Directives[1].Kind);
EXPECT_EQ(pp_endif, Directives[2].Kind);
@@ -150,10 +145,9 @@ TEST(MinimizeSourceToDependencyDirectivesTest, Define) {
SmallVector<char, 128> Out;
SmallVector<dependency_directives_scan::Token, 4> Tokens;
SmallVector<Directive, 4> Directives;
- LangOptions LangOpts;
- ASSERT_FALSE(minimizeSourceToDependencyDirectives(
- "#define MACRO", Out, Tokens, Directives, LangOpts));
+ ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define MACRO", Out,
+ Tokens, Directives));
EXPECT_STREQ("#define MACRO\n", Out.data());
ASSERT_EQ(4u, Tokens.size());
ASSERT_EQ(2u, Directives.size());
@@ -844,7 +838,6 @@ TEST(MinimizeSourceToDependencyDirectivesTest, PragmaOnce) {
SmallVector<char, 128> Out;
SmallVector<dependency_directives_scan::Token, 4> Tokens;
SmallVector<Directive, 4> Directives;
- LangOptions LangOpts;
StringRef Source = R"(// comment
#pragma once
@@ -852,8 +845,8 @@ TEST(MinimizeSourceToDependencyDirectivesTest, PragmaOnce) {
#include <test.h>
_Pragma("once")
)";
- ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out, Tokens,
- Directives, LangOpts));
+ ASSERT_FALSE(
+ minimizeSourceToDependencyDirectives(Source, Out, Tokens, Directives));
EXPECT_STREQ("#pragma once\n#include <test.h>\n_Pragma(\"once\")\n",
Out.data());
ASSERT_EQ(Directives.size(), 4u);
@@ -933,7 +926,6 @@ TEST(MinimizeSourceToDependencyDirectivesTest, CxxModules) {
SmallVector<char, 128> Out;
SmallVector<dependency_directives_scan::Token, 4> Tokens;
SmallVector<Directive, 4> Directives;
- LangOptions LangOpts;
StringRef Source = R"(
module;
@@ -962,8 +954,8 @@ ort \
import f(->a = 3);
}
)";
- ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out, Tokens,
- Directives, LangOpts));
+ ASSERT_FALSE(
+ minimizeSourceToDependencyDirectives(Source, Out, Tokens, Directives));
EXPECT_STREQ("#include \"textual-header.h\"\nexport module m;"
"exp\\\nort import:l[[rename]];"
"import<<=3;import a b d e d e f e;"
@@ -1020,52 +1012,4 @@ TEST(MinimizeSourceToDependencyDirectivesTest, TokensBeforeEOF) {
EXPECT_STREQ("#ifndef A\n#define A\n#endif\n<TokBeforeEOF>\n", Out.data());
}
-TEST(MinimizeSourceToDependencyDirectivesTest, CPlusPlus14PPNumber) {
- SmallVector<char, 128> Out;
- SmallVector<dependency_directives_scan::Token, 4> Tokens;
- SmallVector<Directive, 4> Directives;
- LangOptions LangOpts;
-
- StringRef Source = R"(
-#if 123'124
-#endif
-)";
-
- LangOpts.CPlusPlus14 = true;
- ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out, Tokens,
- Directives, LangOpts));
- EXPECT_STREQ("#if 123'124\n#endif\n", Out.data());
- ASSERT_EQ(Directives.size(), 3u);
- EXPECT_EQ(Directives[0].Kind, dependency_directives_scan::pp_if);
- EXPECT_EQ(Directives[1].Kind, dependency_directives_scan::pp_endif);
- EXPECT_EQ(Directives[2].Kind, dependency_directives_scan::pp_eof);
- ASSERT_EQ(Tokens.size(), 7u);
-
- ASSERT_TRUE(Tokens[0].is(tok::hash));
- ASSERT_TRUE(Tokens[1].is(tok::raw_identifier)); // "if"
- ASSERT_TRUE(Tokens[2].is(tok::numeric_constant)); // 123'124
- ASSERT_TRUE(Tokens[3].is(tok::eod));
- ASSERT_TRUE(Tokens[4].is(tok::hash));
- ASSERT_TRUE(Tokens[5].is(tok::raw_identifier)); // #endif
- ASSERT_TRUE(Tokens[6].is(tok::eod));
-
- LangOpts.CPlusPlus14 = false;
- ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out, Tokens,
- Directives, LangOpts));
- EXPECT_STREQ("#if 123'124\n#endif\n", Out.data());
- ASSERT_EQ(Directives.size(), 3u);
- EXPECT_EQ(Directives[0].Kind, dependency_directives_scan::pp_if);
- EXPECT_EQ(Directives[1].Kind, dependency_directives_scan::pp_endif);
- EXPECT_EQ(Directives[2].Kind, dependency_directives_scan::pp_eof);
- ASSERT_EQ(Tokens.size(), 8u);
- ASSERT_TRUE(Tokens[0].is(tok::hash));
- ASSERT_TRUE(Tokens[1].is(tok::raw_identifier)); // "if"
- ASSERT_TRUE(Tokens[2].is(tok::numeric_constant)); // 123
- ASSERT_TRUE(Tokens[3].is(tok::unknown)); // '124
- ASSERT_TRUE(Tokens[4].is(tok::eod));
- ASSERT_TRUE(Tokens[5].is(tok::hash));
- ASSERT_TRUE(Tokens[6].is(tok::raw_identifier)); // #endif
- ASSERT_TRUE(Tokens[7].is(tok::eod));
-}
-
} // end anonymous namespace
diff --git a/clang/unittests/Lex/PPDependencyDirectivesTest.cpp b/clang/unittests/Lex/PPDependencyDirectivesTest.cpp
index 410f378..6ff87f7 100644
--- a/clang/unittests/Lex/PPDependencyDirectivesTest.cpp
+++ b/clang/unittests/Lex/PPDependencyDirectivesTest.cpp
@@ -104,7 +104,6 @@ TEST_F(PPDependencyDirectivesTest, MacroGuard) {
SmallVector<dependency_directives_scan::Directive> Directives;
};
SmallVector<std::unique_ptr<DepDirectives>> DepDirectivesObjects;
- LangOptions LangOpts;
auto getDependencyDirectives = [&](FileEntryRef File)
-> std::optional<ArrayRef<dependency_directives_scan::Directive>> {
@@ -112,7 +111,7 @@ TEST_F(PPDependencyDirectivesTest, MacroGuard) {
StringRef Input = (*FileMgr.getBufferForFile(File))->getBuffer();
bool Err = scanSourceForDependencyDirectives(
Input, DepDirectivesObjects.back()->Tokens,
- DepDirectivesObjects.back()->Directives, LangOpts);
+ DepDirectivesObjects.back()->Directives);
EXPECT_FALSE(Err);
return llvm::ArrayRef(DepDirectivesObjects.back()->Directives);
};
diff --git a/clang/www/cxx_dr_status.html b/clang/www/cxx_dr_status.html
index b046468..4385744 100755
--- a/clang/www/cxx_dr_status.html
+++ b/clang/www/cxx_dr_status.html
@@ -10698,7 +10698,7 @@ and <I>POD class</I></td>
<td><a href="https://cplusplus.github.io/CWG/issues/1815.html">1815</a></td>
<td>CD4</td>
<td>Lifetime extension in aggregate initialization</td>
- <td class="unreleased" align="center">Clang 19</td>
+ <td class="none" align="center">No</td>
</tr>
<tr id="1816">
<td><a href="https://cplusplus.github.io/CWG/issues/1816.html">1816</a></td>
diff --git a/compiler-rt/lib/builtins/atomic.c b/compiler-rt/lib/builtins/atomic.c
index c3a36a9..852bb20 100644
--- a/compiler-rt/lib/builtins/atomic.c
+++ b/compiler-rt/lib/builtins/atomic.c
@@ -51,14 +51,6 @@
#endif
static const long SPINLOCK_MASK = SPINLOCK_COUNT - 1;
-#ifndef CACHE_LINE_SIZE
-#define CACHE_LINE_SIZE 64
-#endif
-
-#ifdef __clang__
-#pragma clang diagnostic ignored "-Wgnu-designator"
-#endif
-
////////////////////////////////////////////////////////////////////////////////
// Platform-specific lock implementation. Falls back to spinlocks if none is
// defined. Each platform should define the Lock type, and corresponding
@@ -102,18 +94,21 @@ static Lock locks[SPINLOCK_COUNT]; // initialized to OS_SPINLOCK_INIT which is 0
#else
_Static_assert(__atomic_always_lock_free(sizeof(uintptr_t), 0),
"Implementation assumes lock-free pointer-size cmpxchg");
-#include <pthread.h>
-#include <stdalign.h>
-typedef struct {
- alignas(CACHE_LINE_SIZE) pthread_mutex_t m;
-} Lock;
+typedef _Atomic(uintptr_t) Lock;
/// Unlock a lock. This is a release operation.
-__inline static void unlock(Lock *l) { pthread_mutex_unlock(&l->m); }
-/// Locks a lock.
-__inline static void lock(Lock *l) { pthread_mutex_lock(&l->m); }
+__inline static void unlock(Lock *l) {
+ __c11_atomic_store(l, 0, __ATOMIC_RELEASE);
+}
+/// Locks a lock. In the current implementation, this is potentially
+/// unbounded in the contended case.
+__inline static void lock(Lock *l) {
+ uintptr_t old = 0;
+ while (!__c11_atomic_compare_exchange_weak(l, &old, 1, __ATOMIC_ACQUIRE,
+ __ATOMIC_RELAXED))
+ old = 0;
+}
/// locks for atomic operations
-static Lock locks[SPINLOCK_COUNT] = {
- [0 ... SPINLOCK_COUNT - 1] = {PTHREAD_MUTEX_INITIALIZER}};
+static Lock locks[SPINLOCK_COUNT];
#endif
/// Returns a lock to use for a given pointer.
diff --git a/cross-project-tests/debuginfo-tests/llvm-prettyprinters/gdb/llvm-support.cpp b/cross-project-tests/debuginfo-tests/llvm-prettyprinters/gdb/llvm-support.cpp
index c0168ab..8c5c5da 100644
--- a/cross-project-tests/debuginfo-tests/llvm-prettyprinters/gdb/llvm-support.cpp
+++ b/cross-project-tests/debuginfo-tests/llvm-prettyprinters/gdb/llvm-support.cpp
@@ -18,7 +18,7 @@ llvm::MutableArrayRef<int> MutableArrayRef(Array);
llvm::DenseMap<int, int> DenseMap = {{4, 5}, {6, 7}};
llvm::StringMap<int> StringMap = {{"foo", 123}, {"bar", 456}};
llvm::Expected<int> ExpectedValue(8);
-llvm::Expected<int> ExpectedError(llvm::createStringError({}, ""));
+llvm::Expected<int> ExpectedError(llvm::createStringError(""));
std::optional<int> OptionalValue(9);
std::optional<int> OptionalNone(std::nullopt);
llvm::SmallVector<int, 5> SmallVector = {10, 11, 12};
diff --git a/flang/cmake/modules/AddFlangOffloadRuntime.cmake b/flang/cmake/modules/AddFlangOffloadRuntime.cmake
index 0af12c8..6407be5 100644
--- a/flang/cmake/modules/AddFlangOffloadRuntime.cmake
+++ b/flang/cmake/modules/AddFlangOffloadRuntime.cmake
@@ -101,6 +101,7 @@ macro(enable_omp_offload_compilation files)
"gfx908;gfx90a;gfx90c;gfx940;gfx1010;gfx1030"
"gfx1031;gfx1032;gfx1033;gfx1034;gfx1035;gfx1036"
"gfx1100;gfx1101;gfx1102;gfx1103;gfx1150;gfx1151"
+ "gfx1152"
)
set(all_nvptx_architectures
"sm_35;sm_37;sm_50;sm_52;sm_53;sm_60;sm_61;sm_62"
diff --git a/flang/docs/Intrinsics.md b/flang/docs/Intrinsics.md
index 41129b1..8853d4d 100644
--- a/flang/docs/Intrinsics.md
+++ b/flang/docs/Intrinsics.md
@@ -967,4 +967,35 @@ program test_etime
print *, tarray(1)
print *, tarray(2)
end program test_etime
+```
+
+### Non-Standard Intrinsics: GETCWD
+
+#### Description
+`GETCWD(C, STATUS)` returns current working directory.
+
+This intrinsic is provided in both subroutine and function forms; however, only one form can be used in any given program unit.
+
+*C* and *STATUS* are `INTENT(OUT)` and provide the following:
+
+| | |
+|------------|---------------------------------------------------------------------------------------------------|
+| `C` | Current work directory. The type shall be `CHARACTER` and of default kind. |
+| `STATUS` | (Optional) Status flag. Returns 0 on success, a system specific and nonzero error code otherwise. The type shall be `INTEGER` and of a kind greater or equal to 4. |
+
+#### Usage and Info
+
+- **Standard:** GNU extension
+- **Class:** Subroutine, function
+- **Syntax:** `CALL GETCWD(C, STATUS)`, `STATUS = GETCWD(C)`
+
+#### Example
+```Fortran
+PROGRAM example_getcwd
+ CHARACTER(len=255) :: cwd
+ INTEGER :: status
+ CALL getcwd(cwd, status)
+ PRINT *, cwd
+ PRINT *, status
+END PROGRAM
``` \ No newline at end of file
diff --git a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
index c47e41e..8ef5d59 100644
--- a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
+++ b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
@@ -232,6 +232,8 @@ struct IntrinsicLibrary {
mlir::Value genFloor(mlir::Type, llvm::ArrayRef<mlir::Value>);
mlir::Value genFraction(mlir::Type resultType,
mlir::ArrayRef<mlir::Value> args);
+ fir::ExtendedValue genGetCwd(std::optional<mlir::Type> resultType,
+ llvm::ArrayRef<fir::ExtendedValue> args);
void genGetCommand(mlir::ArrayRef<fir::ExtendedValue> args);
mlir::Value genGetPID(mlir::Type resultType,
llvm::ArrayRef<mlir::Value> args);
diff --git a/flang/include/flang/Optimizer/Builder/Runtime/Command.h b/flang/include/flang/Optimizer/Builder/Runtime/Command.h
index 976fb3a..0d60a36 100644
--- a/flang/include/flang/Optimizer/Builder/Runtime/Command.h
+++ b/flang/include/flang/Optimizer/Builder/Runtime/Command.h
@@ -53,5 +53,10 @@ mlir::Value genGetEnvVariable(fir::FirOpBuilder &, mlir::Location,
mlir::Value length, mlir::Value trimName,
mlir::Value errmsg);
+/// Generate a call to the GetCwd runtime function which implements
+/// the GETCWD intrinsic.
+mlir::Value genGetCwd(fir::FirOpBuilder &builder, mlir::Location loc,
+ mlir::Value c);
+
} // namespace fir::runtime
#endif // FORTRAN_OPTIMIZER_BUILDER_RUNTIME_COMMAND_H
diff --git a/flang/include/flang/Optimizer/Dialect/FIRAttr.td b/flang/include/flang/Optimizer/Dialect/FIRAttr.td
index 0c34b64..aedb676 100644
--- a/flang/include/flang/Optimizer/Dialect/FIRAttr.td
+++ b/flang/include/flang/Optimizer/Dialect/FIRAttr.td
@@ -67,6 +67,36 @@ def fir_BoxFieldAttr : I32EnumAttr<
let cppNamespace = "fir";
}
+def fir_ReduceOperationEnum : I32BitEnumAttr<"ReduceOperationEnum",
+ "intrinsic operations and functions supported by DO CONCURRENT REDUCE",
+ [
+ I32BitEnumAttrCaseBit<"Add", 0, "add">,
+ I32BitEnumAttrCaseBit<"Multiply", 1, "multiply">,
+ I32BitEnumAttrCaseBit<"AND", 2, "and">,
+ I32BitEnumAttrCaseBit<"OR", 3, "or">,
+ I32BitEnumAttrCaseBit<"EQV", 4, "eqv">,
+ I32BitEnumAttrCaseBit<"NEQV", 5, "neqv">,
+ I32BitEnumAttrCaseBit<"MAX", 6, "max">,
+ I32BitEnumAttrCaseBit<"MIN", 7, "min">,
+ I32BitEnumAttrCaseBit<"IAND", 8, "iand">,
+ I32BitEnumAttrCaseBit<"IOR", 9, "ior">,
+ I32BitEnumAttrCaseBit<"EIOR", 10, "eior">
+ ]> {
+ let separator = ", ";
+ let cppNamespace = "::fir";
+ let printBitEnumPrimaryGroups = 1;
+}
+
+def fir_ReduceAttr : fir_Attr<"Reduce"> {
+ let mnemonic = "reduce_attr";
+
+ let parameters = (ins
+ "ReduceOperationEnum":$reduce_operation
+ );
+
+ let assemblyFormat = "`<` $reduce_operation `>`";
+}
+
// mlir::SideEffects::Resource for modelling operations which add debugging information
def DebuggingResource : Resource<"::fir::DebuggingResource">;
diff --git a/flang/include/flang/Optimizer/Dialect/FIROps.td b/flang/include/flang/Optimizer/Dialect/FIROps.td
index 37fbd1f..e7da3af 100644
--- a/flang/include/flang/Optimizer/Dialect/FIROps.td
+++ b/flang/include/flang/Optimizer/Dialect/FIROps.td
@@ -2125,8 +2125,8 @@ class region_Op<string mnemonic, list<Trait> traits = []> :
let hasVerifier = 1;
}
-def fir_DoLoopOp : region_Op<"do_loop",
- [DeclareOpInterfaceMethods<LoopLikeOpInterface,
+def fir_DoLoopOp : region_Op<"do_loop", [AttrSizedOperandSegments,
+ DeclareOpInterfaceMethods<LoopLikeOpInterface,
["getYieldedValuesMutable"]>]> {
let summary = "generalized loop operation";
let description = [{
@@ -2156,9 +2156,11 @@ def fir_DoLoopOp : region_Op<"do_loop",
Index:$lowerBound,
Index:$upperBound,
Index:$step,
+ Variadic<AnyType>:$reduceOperands,
Variadic<AnyType>:$initArgs,
OptionalAttr<UnitAttr>:$unordered,
- OptionalAttr<UnitAttr>:$finalValue
+ OptionalAttr<UnitAttr>:$finalValue,
+ OptionalAttr<ArrayAttr>:$reduceAttrs
);
let results = (outs Variadic<AnyType>:$results);
let regions = (region SizedRegion<1>:$region);
@@ -2169,6 +2171,8 @@ def fir_DoLoopOp : region_Op<"do_loop",
"mlir::Value":$step, CArg<"bool", "false">:$unordered,
CArg<"bool", "false">:$finalCountValue,
CArg<"mlir::ValueRange", "std::nullopt">:$iterArgs,
+ CArg<"mlir::ValueRange", "std::nullopt">:$reduceOperands,
+ CArg<"llvm::ArrayRef<mlir::Attribute>", "{}">:$reduceAttrs,
CArg<"llvm::ArrayRef<mlir::NamedAttribute>", "{}">:$attributes)>
];
@@ -2181,11 +2185,12 @@ def fir_DoLoopOp : region_Op<"do_loop",
return getBody()->getArguments().drop_front();
}
mlir::Operation::operand_range getIterOperands() {
- return getOperands().drop_front(getNumControlOperands());
+ return getOperands()
+ .drop_front(getNumControlOperands() + getNumReduceOperands());
}
llvm::MutableArrayRef<mlir::OpOperand> getInitsMutable() {
- return
- getOperation()->getOpOperands().drop_front(getNumControlOperands());
+ return getOperation()->getOpOperands()
+ .drop_front(getNumControlOperands() + getNumReduceOperands());
}
void setLowerBound(mlir::Value bound) { (*this)->setOperand(0, bound); }
@@ -2200,11 +2205,25 @@ def fir_DoLoopOp : region_Op<"do_loop",
unsigned getNumControlOperands() { return 3; }
/// Does the operation hold operands for loop-carried values
bool hasIterOperands() {
- return (*this)->getNumOperands() > getNumControlOperands();
+ return getNumIterOperands() > 0;
+ }
+ /// Does the operation hold operands for reduction variables
+ bool hasReduceOperands() {
+ return getNumReduceOperands() > 0;
+ }
+ /// Get Number of variadic operands
+ unsigned getNumOperands(unsigned idx) {
+ auto segments = (*this)->getAttrOfType<mlir::DenseI32ArrayAttr>(
+ getOperandSegmentSizeAttr());
+ return static_cast<unsigned>(segments[idx]);
+ }
+ // Get Number of reduction operands
+ unsigned getNumReduceOperands() {
+ return getNumOperands(3);
}
/// Get Number of loop-carried values
unsigned getNumIterOperands() {
- return (*this)->getNumOperands() - getNumControlOperands();
+ return getNumOperands(4);
}
/// Get the body of the loop
diff --git a/flang/include/flang/Optimizer/Transforms/Passes.h b/flang/include/flang/Optimizer/Transforms/Passes.h
index a7ba704..2d43f4d 100644
--- a/flang/include/flang/Optimizer/Transforms/Passes.h
+++ b/flang/include/flang/Optimizer/Transforms/Passes.h
@@ -60,9 +60,6 @@ std::unique_ptr<mlir::Pass> createAffineDemotionPass();
std::unique_ptr<mlir::Pass>
createArrayValueCopyPass(fir::ArrayValueCopyOptions options = {});
std::unique_ptr<mlir::Pass> createCFGConversionPassWithNSW();
-std::unique_ptr<mlir::Pass> createExternalNameConversionPass();
-std::unique_ptr<mlir::Pass>
-createExternalNameConversionPass(bool appendUnderscore);
std::unique_ptr<mlir::Pass> createMemDataFlowOptPass();
std::unique_ptr<mlir::Pass> createPromoteToAffinePass();
std::unique_ptr<mlir::Pass>
diff --git a/flang/include/flang/Optimizer/Transforms/Passes.td b/flang/include/flang/Optimizer/Transforms/Passes.td
index 8263820..cac590a8 100644
--- a/flang/include/flang/Optimizer/Transforms/Passes.td
+++ b/flang/include/flang/Optimizer/Transforms/Passes.td
@@ -163,7 +163,6 @@ def ExternalNameConversion : Pass<"external-name-interop", "mlir::ModuleOp"> {
let description = [{
Demangle FIR internal name and mangle them for external interoperability.
}];
- let constructor = "::fir::createExternalNameConversionPass()";
let options = [
Option<"appendUnderscoreOpt", "append-underscore",
"bool", /*default=*/"true",
diff --git a/flang/include/flang/Runtime/command.h b/flang/include/flang/Runtime/command.h
index c67d171..7ab3f64 100644
--- a/flang/include/flang/Runtime/command.h
+++ b/flang/include/flang/Runtime/command.h
@@ -55,6 +55,10 @@ std::int32_t RTNAME(GetEnvVariable)(const Descriptor &name,
const Descriptor *value = nullptr, const Descriptor *length = nullptr,
bool trim_name = true, const Descriptor *errmsg = nullptr,
const char *sourceFile = nullptr, int line = 0);
+
+// Calls getcwd()
+std::int32_t RTNAME(GetCwd)(
+ const Descriptor &cwd, const char *sourceFile, int line);
}
} // namespace Fortran::runtime
diff --git a/flang/include/flang/Runtime/magic-numbers.h b/flang/include/flang/Runtime/magic-numbers.h
index 38ccc5e..1cded1f 100644
--- a/flang/include/flang/Runtime/magic-numbers.h
+++ b/flang/include/flang/Runtime/magic-numbers.h
@@ -69,6 +69,11 @@ Additional status code for a bad pointer DEALLOCATE.
#define FORTRAN_RUNTIME_STAT_BAD_POINTER_DEALLOCATION 110
#if 0
+Status codes for GETCWD.
+#endif
+#define FORTRAN_RUNTIME_STAT_MISSING_CWD 111
+
+#if 0
ieee_class_type values
The sequence is that of F18 Clause 17.2p3, but nothing depends on that.
#endif
diff --git a/flang/include/flang/Tools/CLOptions.inc b/flang/include/flang/Tools/CLOptions.inc
index c5c35e9..d0399d6 100644
--- a/flang/include/flang/Tools/CLOptions.inc
+++ b/flang/include/flang/Tools/CLOptions.inc
@@ -233,9 +233,8 @@ inline void addBoxedProcedurePass(mlir::PassManager &pm) {
inline void addExternalNameConversionPass(
mlir::PassManager &pm, bool appendUnderscore = true) {
- addPassConditionally(pm, disableExternalNameConversion, [&]() {
- return fir::createExternalNameConversionPass(appendUnderscore);
- });
+ addPassConditionally(pm, disableExternalNameConversion,
+ [&]() { return fir::createExternalNameConversion({appendUnderscore}); });
}
// Use inliner extension point callback to register the default inliner pass.
diff --git a/flang/lib/Evaluate/intrinsics.cpp b/flang/lib/Evaluate/intrinsics.cpp
index 12d1342..58c1b69 100644
--- a/flang/lib/Evaluate/intrinsics.cpp
+++ b/flang/lib/Evaluate/intrinsics.cpp
@@ -514,6 +514,10 @@ static const IntrinsicInterface genericIntrinsicFunction[]{
{"gamma", {{"x", SameReal}}, SameReal},
{"get_team", {{"level", DefaultInt, Rank::scalar, Optionality::optional}},
TeamType, Rank::scalar, IntrinsicClass::transformationalFunction},
+ {"getcwd",
+ {{"c", DefaultChar, Rank::scalar, Optionality::required,
+ common::Intent::Out}},
+ TypePattern{IntType, KindCode::greaterOrEqualToKind, 4}},
{"getpid", {}, DefaultInt},
{"huge",
{{"x", SameIntOrReal, Rank::anyOrAssumedRank, Optionality::required,
@@ -1406,6 +1410,12 @@ static const IntrinsicInterface intrinsicSubroutine[]{
{"errmsg", DefaultChar, Rank::scalar, Optionality::optional,
common::Intent::InOut}},
{}, Rank::elemental, IntrinsicClass::impureSubroutine},
+ {"getcwd",
+ {{"c", DefaultChar, Rank::scalar, Optionality::required,
+ common::Intent::Out},
+ {"status", TypePattern{IntType, KindCode::greaterOrEqualToKind, 4},
+ Rank::scalar, Optionality::optional, common::Intent::Out}},
+ {}, Rank::elemental, IntrinsicClass::impureSubroutine},
{"move_alloc",
{{"from", SameType, Rank::known, Optionality::required,
common::Intent::InOut},
@@ -2574,7 +2584,7 @@ bool IntrinsicProcTable::Implementation::IsDualIntrinsic(
const std::string &name) const {
// Collection for some intrinsics with function and subroutine form,
// in order to pass the semantic check.
- static const std::string dualIntrinsic[]{{"etime"}};
+ static const std::string dualIntrinsic[]{{"etime"}, {"getcwd"}};
return std::find_if(std::begin(dualIntrinsic), std::end(dualIntrinsic),
[&name](const std::string &dualName) {
diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
index 68619f6..d289f2f 100644
--- a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
@@ -175,7 +175,7 @@ static void addUseDeviceClause(
useDeviceLocs.push_back(operand.getLoc());
}
for (const omp::Object &object : objects)
- useDeviceSyms.push_back(object.id());
+ useDeviceSyms.push_back(object.sym());
}
static void convertLoopBounds(lower::AbstractConverter &converter,
@@ -525,7 +525,7 @@ bool ClauseProcessor::processCopyin() const {
bool hasCopyin = findRepeatableClause<omp::clause::Copyin>(
[&](const omp::clause::Copyin &clause, const parser::CharBlock &) {
for (const omp::Object &object : clause.v) {
- semantics::Symbol *sym = object.id();
+ semantics::Symbol *sym = object.sym();
assert(sym && "Expecting symbol");
if (const auto *commonDetails =
sym->detailsIf<semantics::CommonBlockDetails>()) {
@@ -698,7 +698,7 @@ bool ClauseProcessor::processCopyprivate(
bool hasCopyPrivate = findRepeatableClause<clause::Copyprivate>(
[&](const clause::Copyprivate &clause, const parser::CharBlock &) {
for (const Object &object : clause.v) {
- semantics::Symbol *sym = object.id();
+ semantics::Symbol *sym = object.sym();
if (const auto *commonDetails =
sym->detailsIf<semantics::CommonBlockDetails>()) {
for (const auto &mem : commonDetails->objects())
@@ -739,7 +739,7 @@ bool ClauseProcessor::processDepend(mlir::omp::DependClauseOps &result) const {
"array sections not supported for task depend");
}
- semantics::Symbol *sym = object.id();
+ semantics::Symbol *sym = object.sym();
const mlir::Value variable = converter.getSymbolAddress(*sym);
result.dependVars.push_back(variable);
}
@@ -870,11 +870,11 @@ bool ClauseProcessor::processMap(
lower::AddrAndBoundsInfo info =
lower::gatherDataOperandAddrAndBounds<mlir::omp::MapBoundsOp,
mlir::omp::MapBoundsType>(
- converter, firOpBuilder, semaCtx, stmtCtx, *object.id(),
+ converter, firOpBuilder, semaCtx, stmtCtx, *object.sym(),
object.ref(), clauseLocation, asFortran, bounds,
treatIndexAsSection);
- auto origSymbol = converter.getSymbolAddress(*object.id());
+ auto origSymbol = converter.getSymbolAddress(*object.sym());
mlir::Value symAddr = info.addr;
if (origSymbol && fir::isTypeWithDescriptor(origSymbol.getType()))
symAddr = origSymbol;
@@ -894,12 +894,12 @@ bool ClauseProcessor::processMap(
mapTypeBits),
mlir::omp::VariableCaptureKind::ByRef, symAddr.getType());
- if (object.id()->owner().IsDerivedType()) {
+ if (object.sym()->owner().IsDerivedType()) {
addChildIndexAndMapToParent(object, parentMemberIndices, mapOp,
semaCtx);
} else {
result.mapVars.push_back(mapOp);
- ptrMapSyms->push_back(object.id());
+ ptrMapSyms->push_back(object.sym());
if (mapSymTypes)
mapSymTypes->push_back(symAddr.getType());
if (mapSymLocs)
diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.h b/flang/lib/Lower/OpenMP/ClauseProcessor.h
index 4d3d444..28f2669 100644
--- a/flang/lib/Lower/OpenMP/ClauseProcessor.h
+++ b/flang/lib/Lower/OpenMP/ClauseProcessor.h
@@ -205,11 +205,11 @@ bool ClauseProcessor::processMotionClauses(lower::StatementContext &stmtCtx,
lower::AddrAndBoundsInfo info =
lower::gatherDataOperandAddrAndBounds<mlir::omp::MapBoundsOp,
mlir::omp::MapBoundsType>(
- converter, firOpBuilder, semaCtx, stmtCtx, *object.id(),
+ converter, firOpBuilder, semaCtx, stmtCtx, *object.sym(),
object.ref(), clauseLocation, asFortran, bounds,
treatIndexAsSection);
- auto origSymbol = converter.getSymbolAddress(*object.id());
+ auto origSymbol = converter.getSymbolAddress(*object.sym());
mlir::Value symAddr = info.addr;
if (origSymbol && fir::isTypeWithDescriptor(origSymbol.getType()))
symAddr = origSymbol;
@@ -226,12 +226,12 @@ bool ClauseProcessor::processMotionClauses(lower::StatementContext &stmtCtx,
mapTypeBits),
mlir::omp::VariableCaptureKind::ByRef, symAddr.getType());
- if (object.id()->owner().IsDerivedType()) {
+ if (object.sym()->owner().IsDerivedType()) {
addChildIndexAndMapToParent(object, parentMemberIndices, mapOp,
semaCtx);
} else {
result.mapVars.push_back(mapOp);
- mapSymbols.push_back(object.id());
+ mapSymbols.push_back(object.sym());
}
}
});
diff --git a/flang/lib/Lower/OpenMP/Clauses.h b/flang/lib/Lower/OpenMP/Clauses.h
index 5391b13..98fb5dc 100644
--- a/flang/lib/Lower/OpenMP/Clauses.h
+++ b/flang/lib/Lower/OpenMP/Clauses.h
@@ -21,6 +21,10 @@
#include <type_traits>
#include <utility>
+namespace Fortran::semantics {
+class Symbol;
+}
+
namespace Fortran::lower::omp {
using namespace Fortran;
using SomeExpr = semantics::SomeExpr;
@@ -32,29 +36,64 @@ struct TypeTy : public evaluate::SomeType {
bool operator==(const TypeTy &t) const { return true; }
};
-using IdTy = semantics::Symbol *;
+template <typename ExprTy>
+struct IdTyTemplate {
+ // "symbol" is always non-null for id's of actual objects.
+ Fortran::semantics::Symbol *symbol;
+ std::optional<ExprTy> designator;
+
+ bool operator==(const IdTyTemplate &other) const {
+ // If symbols are different, then the objects are different.
+ if (symbol != other.symbol)
+ return false;
+ if (symbol == nullptr)
+ return true;
+ // Equal symbols don't necessarily indicate identical objects,
+ // for example, a derived object component may use a single symbol,
+ // which will refer to different objects for different designators,
+ // e.g. a%c and b%c.
+ return designator == other.designator;
+ }
+
+ operator bool() const { return symbol != nullptr; }
+};
+
using ExprTy = SomeExpr;
template <typename T>
using List = tomp::ListT<T>;
} // namespace Fortran::lower::omp
+// Specialization of the ObjectT template
namespace tomp::type {
template <>
-struct ObjectT<Fortran::lower::omp::IdTy, Fortran::lower::omp::ExprTy> {
- using IdTy = Fortran::lower::omp::IdTy;
+struct ObjectT<Fortran::lower::omp::IdTyTemplate<Fortran::lower::omp::ExprTy>,
+ Fortran::lower::omp::ExprTy> {
+ using IdTy = Fortran::lower::omp::IdTyTemplate<Fortran::lower::omp::ExprTy>;
using ExprTy = Fortran::lower::omp::ExprTy;
- const IdTy &id() const { return symbol; }
- const std::optional<ExprTy> &ref() const { return designator; }
+ IdTy id() const { return identity; }
+ Fortran::semantics::Symbol *sym() const { return identity.symbol; }
+ const std::optional<ExprTy> &ref() const { return identity.designator; }
- IdTy symbol;
- std::optional<ExprTy> designator;
+ IdTy identity;
};
} // namespace tomp::type
namespace Fortran::lower::omp {
+using IdTy = IdTyTemplate<ExprTy>;
+}
+
+namespace std {
+template <>
+struct hash<Fortran::lower::omp::IdTy> {
+ size_t operator()(const Fortran::lower::omp::IdTy &id) const {
+ return static_cast<size_t>(reinterpret_cast<uintptr_t>(id.symbol));
+ }
+};
+} // namespace std
+namespace Fortran::lower::omp {
using Object = tomp::ObjectT<IdTy, ExprTy>;
using ObjectList = tomp::ObjectListT<IdTy, ExprTy>;
diff --git a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
index 557a968..b206040 100644
--- a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
@@ -139,7 +139,7 @@ void DataSharingProcessor::collectOmpObjectListSymbol(
const omp::ObjectList &objects,
llvm::SetVector<const semantics::Symbol *> &symbolSet) {
for (const omp::Object &object : objects)
- symbolSet.insert(object.id());
+ symbolSet.insert(object.sym());
}
void DataSharingProcessor::collectSymbolsForPrivatization() {
diff --git a/flang/lib/Lower/OpenMP/DataSharingProcessor.h b/flang/lib/Lower/OpenMP/DataSharingProcessor.h
index 80a956d..fb340e6 100644
--- a/flang/lib/Lower/OpenMP/DataSharingProcessor.h
+++ b/flang/lib/Lower/OpenMP/DataSharingProcessor.h
@@ -44,7 +44,9 @@ private:
void Post(const T &) {}
bool Pre(const parser::OpenMPConstruct &omp) {
- currentConstruct = &omp;
+ // Skip constructs that may not have privatizations.
+ if (!std::holds_alternative<parser::OpenMPCriticalConstruct>(omp.u))
+ currentConstruct = &omp;
return true;
}
diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index af9e2af..f84440d 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -1434,7 +1434,7 @@ genSectionsOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
mlir::OpBuilder::InsertPoint insp = builder.saveInsertionPoint();
const auto &objList = std::get<ObjectList>(lastp->t);
for (const Object &object : objList) {
- semantics::Symbol *sym = object.id();
+ semantics::Symbol *sym = object.sym();
converter.copyHostAssociateVar(*sym, &insp);
}
}
diff --git a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
index 1a63e31..60e933f 100644
--- a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
@@ -37,7 +37,7 @@ namespace omp {
ReductionProcessor::ReductionIdentifier ReductionProcessor::getReductionType(
const omp::clause::ProcedureDesignator &pd) {
auto redType = llvm::StringSwitch<std::optional<ReductionIdentifier>>(
- getRealName(pd.v.id()).ToString())
+ getRealName(pd.v.sym()).ToString())
.Case("max", ReductionIdentifier::MAX)
.Case("min", ReductionIdentifier::MIN)
.Case("iand", ReductionIdentifier::IAND)
@@ -72,7 +72,7 @@ ReductionProcessor::ReductionIdentifier ReductionProcessor::getReductionType(
bool ReductionProcessor::supportedIntrinsicProcReduction(
const omp::clause::ProcedureDesignator &pd) {
- semantics::Symbol *sym = pd.v.id();
+ semantics::Symbol *sym = pd.v.sym();
if (!sym->GetUltimate().attrs().test(semantics::Attr::INTRINSIC))
return false;
auto redType = llvm::StringSwitch<bool>(getRealName(sym).ToString())
@@ -707,7 +707,7 @@ void ReductionProcessor::addDeclareReduction(
// should happen byref
fir::FirOpBuilder &builder = converter.getFirOpBuilder();
for (const Object &object : objectList) {
- const semantics::Symbol *symbol = object.id();
+ const semantics::Symbol *symbol = object.sym();
if (reductionSymbols)
reductionSymbols->push_back(symbol);
mlir::Value symVal = converter.getSymbolAddress(*symbol);
@@ -825,7 +825,7 @@ ReductionProcessor::getRealName(const semantics::Symbol *symbol) {
const semantics::SourceName
ReductionProcessor::getRealName(const omp::clause::ProcedureDesignator &pd) {
- return getRealName(pd.v.id());
+ return getRealName(pd.v.sym());
}
int ReductionProcessor::getOperationIdentity(ReductionIdentifier redId,
diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp
index 4d665e6..da94352 100644
--- a/flang/lib/Lower/OpenMP/Utils.cpp
+++ b/flang/lib/Lower/OpenMP/Utils.cpp
@@ -55,7 +55,7 @@ void genObjectList(const ObjectList &objects,
lower::AbstractConverter &converter,
llvm::SmallVectorImpl<mlir::Value> &operands) {
for (const Object &object : objects) {
- const semantics::Symbol *sym = object.id();
+ const semantics::Symbol *sym = object.sym();
assert(sym && "Expected Symbol");
if (mlir::Value variable = converter.getSymbolAddress(*sym)) {
operands.push_back(variable);
@@ -107,7 +107,7 @@ void gatherFuncAndVarSyms(
const ObjectList &objects, mlir::omp::DeclareTargetCaptureClause clause,
llvm::SmallVectorImpl<DeclareTargetCapturePair> &symbolAndClause) {
for (const Object &object : objects)
- symbolAndClause.emplace_back(clause, *object.id());
+ symbolAndClause.emplace_back(clause, *object.sym());
}
mlir::omp::MapInfoOp
@@ -175,7 +175,7 @@ generateMemberPlacementIndices(const Object &object,
semantics::SemanticsContext &semaCtx) {
auto compObj = getComponentObject(object, semaCtx);
while (compObj) {
- indices.push_back(getComponentPlacementInParent(compObj->id()));
+ indices.push_back(getComponentPlacementInParent(compObj->sym()));
compObj =
getComponentObject(getBaseObject(compObj.value(), semaCtx), semaCtx);
}
@@ -188,7 +188,7 @@ void addChildIndexAndMapToParent(
std::map<const semantics::Symbol *,
llvm::SmallVector<OmpMapMemberIndicesData>> &parentMemberIndices,
mlir::omp::MapInfoOp &mapOp, semantics::SemanticsContext &semaCtx) {
- std::optional<evaluate::DataRef> dataRef = ExtractDataRef(object.designator);
+ std::optional<evaluate::DataRef> dataRef = ExtractDataRef(object.ref());
assert(dataRef.has_value() &&
"DataRef could not be extracted during mapping of derived type "
"cannot proceed");
diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
index 1cd3976..d3f6fa1 100644
--- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
+++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
@@ -280,6 +280,10 @@ static constexpr IntrinsicHandler handlers[]{
{"trim_name", asAddr, handleDynamicOptional},
{"errmsg", asBox, handleDynamicOptional}}},
/*isElemental=*/false},
+ {"getcwd",
+ &I::genGetCwd,
+ {{{"c", asBox}, {"status", asAddr, handleDynamicOptional}}},
+ /*isElemental=*/false},
{"getpid", &I::genGetPID},
{"iachar", &I::genIchar},
{"iall",
@@ -3476,6 +3480,37 @@ mlir::Value IntrinsicLibrary::genFraction(mlir::Type resultType,
fir::runtime::genFraction(builder, loc, fir::getBase(args[0])));
}
+// GETCWD
+fir::ExtendedValue
+IntrinsicLibrary::genGetCwd(std::optional<mlir::Type> resultType,
+ llvm::ArrayRef<fir::ExtendedValue> args) {
+ assert((args.size() == 1 && resultType.has_value()) ||
+ (args.size() >= 1 && !resultType.has_value()));
+
+ mlir::Value cwd = fir::getBase(args[0]);
+ mlir::Value statusValue = fir::runtime::genGetCwd(builder, loc, cwd);
+
+ if (resultType.has_value()) {
+ // Function form, return status.
+ return statusValue;
+ } else {
+ // Subroutine form, store status and return none.
+ const fir::ExtendedValue &status = args[1];
+ if (!isStaticallyAbsent(status)) {
+ mlir::Value statusAddr = fir::getBase(status);
+ mlir::Value statusIsPresentAtRuntime =
+ builder.genIsNotNullAddr(loc, statusAddr);
+ builder.genIfThen(loc, statusIsPresentAtRuntime)
+ .genThen([&]() {
+ builder.createStoreWithConvert(loc, statusValue, statusAddr);
+ })
+ .end();
+ }
+ }
+
+ return {};
+}
+
// GET_COMMAND
void IntrinsicLibrary::genGetCommand(llvm::ArrayRef<fir::ExtendedValue> args) {
assert(args.size() == 4);
@@ -4965,10 +5000,6 @@ fir::ExtendedValue
IntrinsicLibrary::genIsContiguous(mlir::Type resultType,
llvm::ArrayRef<fir::ExtendedValue> args) {
assert(args.size() == 1);
- if (const auto *boxValue = args[0].getBoxOf<fir::BoxValue>())
- if (boxValue->hasAssumedRank())
- TODO(loc, "intrinsic: is_contiguous with assumed rank argument");
-
return builder.createConvert(
loc, resultType,
fir::runtime::genIsContiguous(builder, loc, fir::getBase(args[0])));
diff --git a/flang/lib/Optimizer/Builder/MutableBox.cpp b/flang/lib/Optimizer/Builder/MutableBox.cpp
index 76b920d..16e543f 100644
--- a/flang/lib/Optimizer/Builder/MutableBox.cpp
+++ b/flang/lib/Optimizer/Builder/MutableBox.cpp
@@ -394,6 +394,8 @@ static bool readToBoxValue(const fir::MutableBoxValue &box,
// Track value as fir.box
if ((box.isDerived() && mayBePolymorphic) || box.isUnlimitedPolymorphic())
return true;
+ if (box.hasAssumedRank())
+ return true;
// Intrinsic allocatables are contiguous, no need to track the value by
// fir.box.
if (box.isAllocatable() || box.rank() == 0)
@@ -409,14 +411,12 @@ fir::factory::genMutableBoxRead(fir::FirOpBuilder &builder, mlir::Location loc,
const fir::MutableBoxValue &box,
bool mayBePolymorphic,
bool preserveLowerBounds) {
- if (box.hasAssumedRank())
- TODO(loc, "assumed rank allocatables or pointers");
llvm::SmallVector<mlir::Value> lbounds;
llvm::SmallVector<mlir::Value> extents;
llvm::SmallVector<mlir::Value> lengths;
if (readToBoxValue(box, mayBePolymorphic)) {
auto reader = MutablePropertyReader(builder, loc, box);
- if (preserveLowerBounds)
+ if (preserveLowerBounds && !box.hasAssumedRank())
reader.getLowerBounds(lbounds);
return fir::BoxValue{reader.getIrBox(), lbounds,
box.nonDeferredLenParams()};
diff --git a/flang/lib/Optimizer/Builder/Runtime/Command.cpp b/flang/lib/Optimizer/Builder/Runtime/Command.cpp
index 1d719e7..8320d89 100644
--- a/flang/lib/Optimizer/Builder/Runtime/Command.cpp
+++ b/flang/lib/Optimizer/Builder/Runtime/Command.cpp
@@ -88,3 +88,16 @@ mlir::Value fir::runtime::genGetEnvVariable(fir::FirOpBuilder &builder,
sourceFile, sourceLine);
return builder.create<fir::CallOp>(loc, runtimeFunc, args).getResult(0);
}
+
+mlir::Value fir::runtime::genGetCwd(fir::FirOpBuilder &builder,
+ mlir::Location loc, mlir::Value cwd) {
+ mlir::func::FuncOp func =
+ fir::runtime::getRuntimeFunc<mkRTKey(GetCwd)>(loc, builder);
+ auto runtimeFuncTy = func.getFunctionType();
+ mlir::Value sourceFile = fir::factory::locationToFilename(builder, loc);
+ mlir::Value sourceLine =
+ fir::factory::locationToLineNo(builder, loc, runtimeFuncTy.getInput(2));
+ llvm::SmallVector<mlir::Value> args = fir::runtime::createArguments(
+ builder, loc, runtimeFuncTy, cwd, sourceFile, sourceLine);
+ return builder.create<fir::CallOp>(loc, func, args).getResult(0);
+}
diff --git a/flang/lib/Optimizer/Dialect/FIRAttr.cpp b/flang/lib/Optimizer/Dialect/FIRAttr.cpp
index 2faba63..a0202a0 100644
--- a/flang/lib/Optimizer/Dialect/FIRAttr.cpp
+++ b/flang/lib/Optimizer/Dialect/FIRAttr.cpp
@@ -297,6 +297,6 @@ void fir::printFirAttribute(FIROpsDialect *dialect, mlir::Attribute attr,
void FIROpsDialect::registerAttributes() {
addAttributes<ClosedIntervalAttr, ExactTypeAttr, FortranVariableFlagsAttr,
- LowerBoundAttr, PointIntervalAttr, RealAttr, SubclassAttr,
- UpperBoundAttr>();
+ LowerBoundAttr, PointIntervalAttr, RealAttr, ReduceAttr,
+ SubclassAttr, UpperBoundAttr>();
}
diff --git a/flang/lib/Optimizer/Dialect/FIROps.cpp b/flang/lib/Optimizer/Dialect/FIROps.cpp
index b530a9d..75ca738 100644
--- a/flang/lib/Optimizer/Dialect/FIROps.cpp
+++ b/flang/lib/Optimizer/Dialect/FIROps.cpp
@@ -2456,9 +2456,16 @@ void fir::DoLoopOp::build(mlir::OpBuilder &builder,
mlir::OperationState &result, mlir::Value lb,
mlir::Value ub, mlir::Value step, bool unordered,
bool finalCountValue, mlir::ValueRange iterArgs,
+ mlir::ValueRange reduceOperands,
+ llvm::ArrayRef<mlir::Attribute> reduceAttrs,
llvm::ArrayRef<mlir::NamedAttribute> attributes) {
result.addOperands({lb, ub, step});
+ result.addOperands(reduceOperands);
result.addOperands(iterArgs);
+ result.addAttribute(getOperandSegmentSizeAttr(),
+ builder.getDenseI32ArrayAttr(
+ {1, 1, 1, static_cast<int32_t>(reduceOperands.size()),
+ static_cast<int32_t>(iterArgs.size())}));
if (finalCountValue) {
result.addTypes(builder.getIndexType());
result.addAttribute(getFinalValueAttrName(result.name),
@@ -2477,6 +2484,9 @@ void fir::DoLoopOp::build(mlir::OpBuilder &builder,
if (unordered)
result.addAttribute(getUnorderedAttrName(result.name),
builder.getUnitAttr());
+ if (!reduceAttrs.empty())
+ result.addAttribute(getReduceAttrsAttrName(result.name),
+ builder.getArrayAttr(reduceAttrs));
result.addAttributes(attributes);
}
@@ -2502,24 +2512,51 @@ mlir::ParseResult fir::DoLoopOp::parse(mlir::OpAsmParser &parser,
if (mlir::succeeded(parser.parseOptionalKeyword("unordered")))
result.addAttribute("unordered", builder.getUnitAttr());
+ // Parse the reduction arguments.
+ llvm::SmallVector<mlir::OpAsmParser::UnresolvedOperand> reduceOperands;
+ llvm::SmallVector<mlir::Type> reduceArgTypes;
+ if (succeeded(parser.parseOptionalKeyword("reduce"))) {
+ // Parse reduction attributes and variables.
+ llvm::SmallVector<ReduceAttr> attributes;
+ if (failed(parser.parseCommaSeparatedList(
+ mlir::AsmParser::Delimiter::Paren, [&]() {
+ if (parser.parseAttribute(attributes.emplace_back()) ||
+ parser.parseArrow() ||
+ parser.parseOperand(reduceOperands.emplace_back()) ||
+ parser.parseColonType(reduceArgTypes.emplace_back()))
+ return mlir::failure();
+ return mlir::success();
+ })))
+ return mlir::failure();
+ // Resolve input operands.
+ for (auto operand_type : llvm::zip(reduceOperands, reduceArgTypes))
+ if (parser.resolveOperand(std::get<0>(operand_type),
+ std::get<1>(operand_type), result.operands))
+ return mlir::failure();
+ llvm::SmallVector<mlir::Attribute> arrayAttr(attributes.begin(),
+ attributes.end());
+ result.addAttribute(getReduceAttrsAttrName(result.name),
+ builder.getArrayAttr(arrayAttr));
+ }
+
// Parse the optional initial iteration arguments.
llvm::SmallVector<mlir::OpAsmParser::Argument> regionArgs;
- llvm::SmallVector<mlir::OpAsmParser::UnresolvedOperand> operands;
+ llvm::SmallVector<mlir::OpAsmParser::UnresolvedOperand> iterOperands;
llvm::SmallVector<mlir::Type> argTypes;
bool prependCount = false;
regionArgs.push_back(inductionVariable);
if (succeeded(parser.parseOptionalKeyword("iter_args"))) {
// Parse assignment list and results type list.
- if (parser.parseAssignmentList(regionArgs, operands) ||
+ if (parser.parseAssignmentList(regionArgs, iterOperands) ||
parser.parseArrowTypeList(result.types))
return mlir::failure();
- if (result.types.size() == operands.size() + 1)
+ if (result.types.size() == iterOperands.size() + 1)
prependCount = true;
// Resolve input operands.
llvm::ArrayRef<mlir::Type> resTypes = result.types;
- for (auto operand_type :
- llvm::zip(operands, prependCount ? resTypes.drop_front() : resTypes))
+ for (auto operand_type : llvm::zip(
+ iterOperands, prependCount ? resTypes.drop_front() : resTypes))
if (parser.resolveOperand(std::get<0>(operand_type),
std::get<1>(operand_type), result.operands))
return mlir::failure();
@@ -2530,6 +2567,12 @@ mlir::ParseResult fir::DoLoopOp::parse(mlir::OpAsmParser &parser,
prependCount = true;
}
+ // Set the operandSegmentSizes attribute
+ result.addAttribute(getOperandSegmentSizeAttr(),
+ builder.getDenseI32ArrayAttr(
+ {1, 1, 1, static_cast<int32_t>(reduceOperands.size()),
+ static_cast<int32_t>(iterOperands.size())}));
+
if (parser.parseOptionalAttrDictWithKeyword(result.attributes))
return mlir::failure();
@@ -2606,6 +2649,10 @@ mlir::LogicalResult fir::DoLoopOp::verify() {
i++;
}
+ auto reduceAttrs = getReduceAttrsAttr();
+ if (getNumReduceOperands() != (reduceAttrs ? reduceAttrs.size() : 0))
+ return emitOpError(
+ "mismatch in number of reduction variables and reduction attributes");
return mlir::success();
}
@@ -2615,6 +2662,17 @@ void fir::DoLoopOp::print(mlir::OpAsmPrinter &p) {
<< getUpperBound() << " step " << getStep();
if (getUnordered())
p << " unordered";
+ if (hasReduceOperands()) {
+ p << " reduce(";
+ auto attrs = getReduceAttrsAttr();
+ auto operands = getReduceOperands();
+ llvm::interleaveComma(llvm::zip(attrs, operands), p, [&](auto it) {
+ p << std::get<0>(it) << " -> " << std::get<1>(it) << " : "
+ << std::get<1>(it).getType();
+ });
+ p << ')';
+ printBlockTerminators = true;
+ }
if (hasIterOperands()) {
p << " iter_args(";
auto regionArgs = getRegionIterArgs();
@@ -2628,8 +2686,9 @@ void fir::DoLoopOp::print(mlir::OpAsmPrinter &p) {
p << " -> " << getResultTypes();
printBlockTerminators = true;
}
- p.printOptionalAttrDictWithKeyword((*this)->getAttrs(),
- {"unordered", "finalValue"});
+ p.printOptionalAttrDictWithKeyword(
+ (*this)->getAttrs(),
+ {"unordered", "finalValue", "reduceAttrs", "operandSegmentSizes"});
p << ' ';
p.printRegion(getRegion(), /*printEntryBlockArgs=*/false,
printBlockTerminators);
diff --git a/flang/lib/Optimizer/Transforms/ExternalNameConversion.cpp b/flang/lib/Optimizer/Transforms/ExternalNameConversion.cpp
index b265c74..648628fd 100644
--- a/flang/lib/Optimizer/Transforms/ExternalNameConversion.cpp
+++ b/flang/lib/Optimizer/Transforms/ExternalNameConversion.cpp
@@ -45,17 +45,11 @@ namespace {
class ExternalNameConversionPass
: public fir::impl::ExternalNameConversionBase<ExternalNameConversionPass> {
public:
- ExternalNameConversionPass(bool appendUnderscoring)
- : appendUnderscores(appendUnderscoring) {}
-
- ExternalNameConversionPass() { usePassOpt = true; }
+ using ExternalNameConversionBase<
+ ExternalNameConversionPass>::ExternalNameConversionBase;
mlir::ModuleOp getModule() { return getOperation(); }
void runOnOperation() override;
-
-private:
- bool appendUnderscores;
- bool usePassOpt = false;
};
} // namespace
@@ -63,7 +57,6 @@ void ExternalNameConversionPass::runOnOperation() {
auto op = getOperation();
auto *context = &getContext();
- appendUnderscores = (usePassOpt) ? appendUnderscoreOpt : appendUnderscores;
llvm::DenseMap<mlir::StringAttr, mlir::FlatSymbolRefAttr> remappings;
// Update names of external Fortran functions and names of Common Block
// globals.
@@ -74,7 +67,8 @@ void ExternalNameConversionPass::runOnOperation() {
mlir::SymbolTable::getSymbolAttrName());
auto deconstructedName = fir::NameUniquer::deconstruct(symName);
if (fir::NameUniquer::isExternalFacingUniquedName(deconstructedName)) {
- auto newName = mangleExternalName(deconstructedName, appendUnderscores);
+ auto newName =
+ mangleExternalName(deconstructedName, appendUnderscoreOpt);
auto newAttr = mlir::StringAttr::get(context, newName);
mlir::SymbolTable::setSymbolName(&funcOrGlobal, newAttr);
auto newSymRef = mlir::FlatSymbolRefAttr::get(newAttr);
@@ -101,12 +95,3 @@ void ExternalNameConversionPass::runOnOperation() {
nestedOp->setAttr(update.first, update.second);
});
}
-
-std::unique_ptr<mlir::Pass> fir::createExternalNameConversionPass() {
- return std::make_unique<ExternalNameConversionPass>();
-}
-
-std::unique_ptr<mlir::Pass>
-fir::createExternalNameConversionPass(bool appendUnderscoring) {
- return std::make_unique<ExternalNameConversionPass>(appendUnderscoring);
-}
diff --git a/flang/runtime/command.cpp b/flang/runtime/command.cpp
index b573c5d..e642248a 100644
--- a/flang/runtime/command.cpp
+++ b/flang/runtime/command.cpp
@@ -17,12 +17,19 @@
#ifdef _WIN32
#include "flang/Common/windows-include.h"
+#include <direct.h>
+#define getcwd _getcwd
+#define PATH_MAX MAX_PATH
// On Windows GetCurrentProcessId returns a DWORD aka uint32_t
#include <processthreadsapi.h>
inline pid_t getpid() { return GetCurrentProcessId(); }
#else
#include <unistd.h> //getpid()
+
+#ifndef PATH_MAX
+#define PATH_MAX 4096
+#endif
#endif
namespace Fortran::runtime {
@@ -239,4 +246,23 @@ std::int32_t RTNAME(GetEnvVariable)(const Descriptor &name,
return StatOk;
}
+std::int32_t RTNAME(GetCwd)(
+ const Descriptor &cwd, const char *sourceFile, int line) {
+ Terminator terminator{sourceFile, line};
+
+ RUNTIME_CHECK(terminator, IsValidCharDescriptor(&cwd));
+
+ char *buf{(char *)AllocateMemoryOrCrash(terminator, PATH_MAX)};
+
+ if (!getcwd(buf, PATH_MAX)) {
+ return StatMissingCurrentWorkDirectory;
+ }
+
+ std::int64_t strLen{StringLength(buf)};
+ std::int32_t status{CopyCharsToDescriptor(cwd, buf, strLen)};
+
+ std::free(buf);
+ return status;
+}
+
} // namespace Fortran::runtime
diff --git a/flang/runtime/stat.h b/flang/runtime/stat.h
index 4f46f52..71faeb0 100644
--- a/flang/runtime/stat.h
+++ b/flang/runtime/stat.h
@@ -41,6 +41,7 @@ enum Stat {
StatLocked = FORTRAN_RUNTIME_STAT_LOCKED,
StatLockedOtherImage = FORTRAN_RUNTIME_STAT_LOCKED_OTHER_IMAGE,
StatMissingEnvVariable = FORTRAN_RUNTIME_STAT_MISSING_ENV_VAR,
+ StatMissingCurrentWorkDirectory = FORTRAN_RUNTIME_STAT_MISSING_CWD,
StatStoppedImage = FORTRAN_RUNTIME_STAT_STOPPED_IMAGE,
StatUnlocked = FORTRAN_RUNTIME_STAT_UNLOCKED,
StatUnlockedFailedImage = FORTRAN_RUNTIME_STAT_UNLOCKED_FAILED_IMAGE,
diff --git a/flang/test/Fir/loop03.fir b/flang/test/Fir/loop03.fir
new file mode 100644
index 0000000..b88dcaf
--- /dev/null
+++ b/flang/test/Fir/loop03.fir
@@ -0,0 +1,17 @@
+// Test the reduction semantics of fir.do_loop
+// RUN: fir-opt %s | FileCheck %s
+
+func.func @reduction() {
+ %bound = arith.constant 10 : index
+ %step = arith.constant 1 : index
+ %sum = fir.alloca i32
+// CHECK: %[[VAL_0:.*]] = fir.alloca i32
+// CHECK: fir.do_loop %[[VAL_1:.*]] = %[[VAL_2:.*]] to %[[VAL_3:.*]] step %[[VAL_4:.*]] unordered reduce(#fir.reduce_attr<add> -> %[[VAL_0]] : !fir.ref<i32>) {
+ fir.do_loop %iv = %step to %bound step %step unordered reduce(#fir.reduce_attr<add> -> %sum : !fir.ref<i32>) {
+ %index = fir.convert %iv : (index) -> i32
+ %1 = fir.load %sum : !fir.ref<i32>
+ %2 = arith.addi %index, %1 : i32
+ fir.store %2 to %sum : !fir.ref<i32>
+ }
+ return
+}
diff --git a/flang/test/Lower/HLFIR/assumed-rank-inquiries.f90 b/flang/test/Lower/HLFIR/assumed-rank-inquiries.f90
new file mode 100644
index 0000000..e8610aa
--- /dev/null
+++ b/flang/test/Lower/HLFIR/assumed-rank-inquiries.f90
@@ -0,0 +1,383 @@
+! Test lowering of inquiry intrinsics with assumed-ranks arguments.
+! RUN: bbc -emit-hlfir -o - %s -allow-assumed-rank | FileCheck %s
+
+subroutine test_allocated(x)
+ real, allocatable :: x(..)
+ call takes_logical(allocated(x))
+end subroutine
+
+subroutine test_associated_1(x)
+ real, pointer :: x(..)
+ call takes_logical(associated(x))
+end subroutine
+
+subroutine test_associated_2(x, y)
+ real, pointer :: x(..)
+ real, target :: y(:)
+ call takes_logical(associated(x, y))
+end subroutine
+
+subroutine test_associated_3(x, y)
+ real, pointer :: x(..)
+ real, pointer :: y(..)
+ call takes_logical(associated(x, y))
+end subroutine
+
+subroutine test_len_1(x)
+ character(*) :: x(..)
+ call takes_integer(len(x))
+end subroutine
+
+subroutine test_len_2(x)
+ character(*), pointer :: x(..)
+ call takes_integer(len(x))
+end subroutine
+
+subroutine test_storage_size_1(x)
+ class(*) :: x(..)
+ call takes_integer(storage_size(x))
+end subroutine
+
+subroutine test_storage_size_2(x)
+ class(*), pointer :: x(..)
+ call takes_integer(storage_size(x))
+end subroutine
+
+subroutine test_present_1(x)
+ class(*), optional :: x(..)
+ call takes_logical(present(x))
+end subroutine
+
+subroutine test_present_2(x)
+ class(*), optional, pointer :: x(..)
+ call takes_logical(present(x))
+end subroutine
+
+subroutine test_is_contiguous_1(x)
+ class(*) :: x(..)
+ call takes_logical(is_contiguous(x))
+end subroutine
+
+subroutine test_is_contiguous_2(x)
+ class(*), pointer :: x(..)
+ call takes_logical(is_contiguous(x))
+end subroutine
+
+subroutine test_same_type_as_1(x, y)
+ class(*) :: x(..), y(..)
+ call takes_logical(same_type_as(x, y))
+end subroutine
+
+subroutine test_same_type_as_2(x, y)
+ class(*), pointer :: x(..), y(..)
+ call takes_logical(same_type_as(x, y))
+end subroutine
+
+subroutine test_extends_type_of_1(x, y)
+ class(*) :: x(..), y(..)
+ call takes_logical(extends_type_of(x, y))
+end subroutine
+
+subroutine test_extends_type_of_2(x, y)
+ class(*), pointer :: x(..), y(..)
+ call takes_logical(extends_type_of(x, y))
+end subroutine
+
+subroutine c_loc_1(x)
+ use iso_c_binding, only : c_loc
+ real, target :: x(..)
+ call takes_cloc(c_loc(x))
+end subroutine
+
+subroutine c_loc_2(x)
+ use iso_c_binding, only : c_loc
+ real, pointer :: x(..)
+ call takes_cloc(c_loc(x))
+end subroutine
+
+! CHECK-LABEL: func.func @_QPtest_allocated(
+! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref<!fir.box<!fir.heap<!fir.array<*:f32>>>> {fir.bindc_name = "x"}) {
+! CHECK: %[[VAL_1:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_0]] dummy_scope %[[VAL_1]] {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFtest_allocatedEx"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<*:f32>>>>, !fir.dscope) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<*:f32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<*:f32>>>>)
+! CHECK: %[[VAL_3:.*]] = fir.load %[[VAL_2]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<*:f32>>>>
+! CHECK: %[[VAL_4:.*]] = fir.box_addr %[[VAL_3]] : (!fir.box<!fir.heap<!fir.array<*:f32>>>) -> !fir.heap<!fir.array<*:f32>>
+! CHECK: %[[VAL_5:.*]] = fir.convert %[[VAL_4]] : (!fir.heap<!fir.array<*:f32>>) -> i64
+! CHECK: %[[VAL_6:.*]] = arith.constant 0 : i64
+! CHECK: %[[VAL_7:.*]] = arith.cmpi ne, %[[VAL_5]], %[[VAL_6]] : i64
+! CHECK: %[[VAL_8:.*]] = fir.convert %[[VAL_7]] : (i1) -> !fir.logical<4>
+! CHECK: %[[VAL_9:.*]]:3 = hlfir.associate %[[VAL_8]] {adapt.valuebyref} : (!fir.logical<4>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>, i1)
+! CHECK: fir.call @_QPtakes_logical(%[[VAL_9]]#1) fastmath<contract> : (!fir.ref<!fir.logical<4>>) -> ()
+! CHECK: hlfir.end_associate %[[VAL_9]]#1, %[[VAL_9]]#2 : !fir.ref<!fir.logical<4>>, i1
+! CHECK: return
+! CHECK: }
+
+! CHECK-LABEL: func.func @_QPtest_associated_1(
+! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref<!fir.box<!fir.ptr<!fir.array<*:f32>>>> {fir.bindc_name = "x"}) {
+! CHECK: %[[VAL_1:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_0]] dummy_scope %[[VAL_1]] {fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QFtest_associated_1Ex"} : (!fir.ref<!fir.box<!fir.ptr<!fir.array<*:f32>>>>, !fir.dscope) -> (!fir.ref<!fir.box<!fir.ptr<!fir.array<*:f32>>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<*:f32>>>>)
+! CHECK: %[[VAL_3:.*]] = fir.load %[[VAL_2]]#1 : !fir.ref<!fir.box<!fir.ptr<!fir.array<*:f32>>>>
+! CHECK: %[[VAL_4:.*]] = fir.box_addr %[[VAL_3]] : (!fir.box<!fir.ptr<!fir.array<*:f32>>>) -> !fir.ptr<!fir.array<*:f32>>
+! CHECK: %[[VAL_5:.*]] = fir.convert %[[VAL_4]] : (!fir.ptr<!fir.array<*:f32>>) -> i64
+! CHECK: %[[VAL_6:.*]] = arith.constant 0 : i64
+! CHECK: %[[VAL_7:.*]] = arith.cmpi ne, %[[VAL_5]], %[[VAL_6]] : i64
+! CHECK: %[[VAL_8:.*]] = fir.convert %[[VAL_7]] : (i1) -> !fir.logical<4>
+! CHECK: %[[VAL_9:.*]]:3 = hlfir.associate %[[VAL_8]] {adapt.valuebyref} : (!fir.logical<4>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>, i1)
+! CHECK: fir.call @_QPtakes_logical(%[[VAL_9]]#1) fastmath<contract> : (!fir.ref<!fir.logical<4>>) -> ()
+! CHECK: hlfir.end_associate %[[VAL_9]]#1, %[[VAL_9]]#2 : !fir.ref<!fir.logical<4>>, i1
+! CHECK: return
+! CHECK: }
+
+! CHECK-LABEL: func.func @_QPtest_associated_2(
+! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref<!fir.box<!fir.ptr<!fir.array<*:f32>>>> {fir.bindc_name = "x"},
+! CHECK-SAME: %[[VAL_1:.*]]: !fir.box<!fir.array<?xf32>> {fir.bindc_name = "y", fir.target}) {
+! CHECK: %[[VAL_2:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_0]] dummy_scope %[[VAL_2]] {fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QFtest_associated_2Ex"} : (!fir.ref<!fir.box<!fir.ptr<!fir.array<*:f32>>>>, !fir.dscope) -> (!fir.ref<!fir.box<!fir.ptr<!fir.array<*:f32>>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<*:f32>>>>)
+! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_1]] dummy_scope %[[VAL_2]] {fortran_attrs = #fir.var_attrs<target>, uniq_name = "_QFtest_associated_2Ey"} : (!fir.box<!fir.array<?xf32>>, !fir.dscope) -> (!fir.box<!fir.array<?xf32>>, !fir.box<!fir.array<?xf32>>)
+! CHECK: %[[VAL_5:.*]] = fir.load %[[VAL_3]]#1 : !fir.ref<!fir.box<!fir.ptr<!fir.array<*:f32>>>>
+! CHECK: %[[VAL_6:.*]] = fir.convert %[[VAL_5]] : (!fir.box<!fir.ptr<!fir.array<*:f32>>>) -> !fir.box<none>
+! CHECK: %[[VAL_7:.*]] = fir.convert %[[VAL_4]]#1 : (!fir.box<!fir.array<?xf32>>) -> !fir.box<none>
+! CHECK: %[[VAL_8:.*]] = fir.call @_FortranAPointerIsAssociatedWith(%[[VAL_6]], %[[VAL_7]]) fastmath<contract> : (!fir.box<none>, !fir.box<none>) -> i1
+! CHECK: %[[VAL_9:.*]] = fir.convert %[[VAL_8]] : (i1) -> !fir.logical<4>
+! CHECK: %[[VAL_10:.*]]:3 = hlfir.associate %[[VAL_9]] {adapt.valuebyref} : (!fir.logical<4>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>, i1)
+! CHECK: fir.call @_QPtakes_logical(%[[VAL_10]]#1) fastmath<contract> : (!fir.ref<!fir.logical<4>>) -> ()
+! CHECK: hlfir.end_associate %[[VAL_10]]#1, %[[VAL_10]]#2 : !fir.ref<!fir.logical<4>>, i1
+! CHECK: return
+! CHECK: }
+
+! CHECK-LABEL: func.func @_QPtest_associated_3(
+! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref<!fir.box<!fir.ptr<!fir.array<*:f32>>>> {fir.bindc_name = "x"},
+! CHECK-SAME: %[[VAL_1:.*]]: !fir.ref<!fir.box<!fir.ptr<!fir.array<*:f32>>>> {fir.bindc_name = "y"}) {
+! CHECK: %[[VAL_2:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_0]] dummy_scope %[[VAL_2]] {fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QFtest_associated_3Ex"} : (!fir.ref<!fir.box<!fir.ptr<!fir.array<*:f32>>>>, !fir.dscope) -> (!fir.ref<!fir.box<!fir.ptr<!fir.array<*:f32>>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<*:f32>>>>)
+! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_1]] dummy_scope %[[VAL_2]] {fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QFtest_associated_3Ey"} : (!fir.ref<!fir.box<!fir.ptr<!fir.array<*:f32>>>>, !fir.dscope) -> (!fir.ref<!fir.box<!fir.ptr<!fir.array<*:f32>>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<*:f32>>>>)
+! CHECK: %[[VAL_5:.*]] = fir.load %[[VAL_4]]#1 : !fir.ref<!fir.box<!fir.ptr<!fir.array<*:f32>>>>
+! CHECK: %[[VAL_6:.*]] = fir.load %[[VAL_3]]#1 : !fir.ref<!fir.box<!fir.ptr<!fir.array<*:f32>>>>
+! CHECK: %[[VAL_7:.*]] = fir.convert %[[VAL_6]] : (!fir.box<!fir.ptr<!fir.array<*:f32>>>) -> !fir.box<none>
+! CHECK: %[[VAL_8:.*]] = fir.convert %[[VAL_5]] : (!fir.box<!fir.ptr<!fir.array<*:f32>>>) -> !fir.box<none>
+! CHECK: %[[VAL_9:.*]] = fir.call @_FortranAPointerIsAssociatedWith(%[[VAL_7]], %[[VAL_8]]) fastmath<contract> : (!fir.box<none>, !fir.box<none>) -> i1
+! CHECK: %[[VAL_10:.*]] = fir.convert %[[VAL_9]] : (i1) -> !fir.logical<4>
+! CHECK: %[[VAL_11:.*]]:3 = hlfir.associate %[[VAL_10]] {adapt.valuebyref} : (!fir.logical<4>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>, i1)
+! CHECK: fir.call @_QPtakes_logical(%[[VAL_11]]#1) fastmath<contract> : (!fir.ref<!fir.logical<4>>) -> ()
+! CHECK: hlfir.end_associate %[[VAL_11]]#1, %[[VAL_11]]#2 : !fir.ref<!fir.logical<4>>, i1
+! CHECK: return
+! CHECK: }
+
+! CHECK-LABEL: func.func @_QPtest_len_1(
+! CHECK-SAME: %[[VAL_0:.*]]: !fir.box<!fir.array<*:!fir.char<1,?>>> {fir.bindc_name = "x"}) {
+! CHECK: %[[VAL_1:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_0]] dummy_scope %[[VAL_1]] {uniq_name = "_QFtest_len_1Ex"} : (!fir.box<!fir.array<*:!fir.char<1,?>>>, !fir.dscope) -> (!fir.box<!fir.array<*:!fir.char<1,?>>>, !fir.box<!fir.array<*:!fir.char<1,?>>>)
+! CHECK: %[[VAL_3:.*]] = fir.box_elesize %[[VAL_2]]#1 : (!fir.box<!fir.array<*:!fir.char<1,?>>>) -> index
+! CHECK: %[[VAL_4:.*]] = fir.convert %[[VAL_3]] : (index) -> i32
+! CHECK: %[[VAL_5:.*]]:3 = hlfir.associate %[[VAL_4]] {adapt.valuebyref} : (i32) -> (!fir.ref<i32>, !fir.ref<i32>, i1)
+! CHECK: fir.call @_QPtakes_integer(%[[VAL_5]]#1) fastmath<contract> : (!fir.ref<i32>) -> ()
+! CHECK: hlfir.end_associate %[[VAL_5]]#1, %[[VAL_5]]#2 : !fir.ref<i32>, i1
+! CHECK: return
+! CHECK: }
+
+! CHECK-LABEL: func.func @_QPtest_len_2(
+! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref<!fir.box<!fir.ptr<!fir.array<*:!fir.char<1,?>>>>> {fir.bindc_name = "x"}) {
+! CHECK: %[[VAL_1:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[VAL_2:.*]] = fir.load %[[VAL_0]] : !fir.ref<!fir.box<!fir.ptr<!fir.array<*:!fir.char<1,?>>>>>
+! CHECK: %[[VAL_3:.*]] = fir.box_elesize %[[VAL_2]] : (!fir.box<!fir.ptr<!fir.array<*:!fir.char<1,?>>>>) -> index
+! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_0]] typeparams %[[VAL_3]] dummy_scope %[[VAL_1]] {fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QFtest_len_2Ex"} : (!fir.ref<!fir.box<!fir.ptr<!fir.array<*:!fir.char<1,?>>>>>, index, !fir.dscope) -> (!fir.ref<!fir.box<!fir.ptr<!fir.array<*:!fir.char<1,?>>>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<*:!fir.char<1,?>>>>>)
+! CHECK: %[[VAL_5:.*]] = fir.convert %[[VAL_3]] : (index) -> i32
+! CHECK: %[[VAL_6:.*]]:3 = hlfir.associate %[[VAL_5]] {adapt.valuebyref} : (i32) -> (!fir.ref<i32>, !fir.ref<i32>, i1)
+! CHECK: fir.call @_QPtakes_integer(%[[VAL_6]]#1) fastmath<contract> : (!fir.ref<i32>) -> ()
+! CHECK: hlfir.end_associate %[[VAL_6]]#1, %[[VAL_6]]#2 : !fir.ref<i32>, i1
+! CHECK: return
+! CHECK: }
+
+! CHECK-LABEL: func.func @_QPtest_storage_size_1(
+! CHECK-SAME: %[[VAL_0:.*]]: !fir.class<!fir.array<*:none>> {fir.bindc_name = "x"}) {
+! CHECK: %[[VAL_1:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_0]] dummy_scope %[[VAL_1]] {uniq_name = "_QFtest_storage_size_1Ex"} : (!fir.class<!fir.array<*:none>>, !fir.dscope) -> (!fir.class<!fir.array<*:none>>, !fir.class<!fir.array<*:none>>)
+! CHECK: %[[VAL_3:.*]] = fir.box_elesize %[[VAL_2]]#1 : (!fir.class<!fir.array<*:none>>) -> i32
+! CHECK: %[[VAL_4:.*]] = arith.constant 8 : i32
+! CHECK: %[[VAL_5:.*]] = arith.muli %[[VAL_3]], %[[VAL_4]] : i32
+! CHECK: %[[VAL_6:.*]]:3 = hlfir.associate %[[VAL_5]] {adapt.valuebyref} : (i32) -> (!fir.ref<i32>, !fir.ref<i32>, i1)
+! CHECK: fir.call @_QPtakes_integer(%[[VAL_6]]#1) fastmath<contract> : (!fir.ref<i32>) -> ()
+! CHECK: hlfir.end_associate %[[VAL_6]]#1, %[[VAL_6]]#2 : !fir.ref<i32>, i1
+! CHECK: return
+! CHECK: }
+
+! CHECK-LABEL: func.func @_QPtest_storage_size_2(
+! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref<!fir.class<!fir.ptr<!fir.array<*:none>>>> {fir.bindc_name = "x"}) {
+! CHECK: %[[VAL_1:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_0]] dummy_scope %[[VAL_1]] {fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QFtest_storage_size_2Ex"} : (!fir.ref<!fir.class<!fir.ptr<!fir.array<*:none>>>>, !fir.dscope) -> (!fir.ref<!fir.class<!fir.ptr<!fir.array<*:none>>>>, !fir.ref<!fir.class<!fir.ptr<!fir.array<*:none>>>>)
+! CHECK: %[[VAL_3:.*]] = fir.load %[[VAL_2]]#1 : !fir.ref<!fir.class<!fir.ptr<!fir.array<*:none>>>>
+! CHECK: %[[VAL_4:.*]] = fir.box_addr %[[VAL_3]] : (!fir.class<!fir.ptr<!fir.array<*:none>>>) -> !fir.ptr<!fir.array<*:none>>
+! CHECK: %[[VAL_5:.*]] = fir.convert %[[VAL_4]] : (!fir.ptr<!fir.array<*:none>>) -> i64
+! CHECK: %[[VAL_6:.*]] = arith.constant 0 : i64
+! CHECK: %[[VAL_7:.*]] = arith.cmpi eq, %[[VAL_5]], %[[VAL_6]] : i64
+! CHECK: fir.if %[[VAL_7]] {
+! CHECK: %[[VAL_13:.*]] = fir.call @_FortranAReportFatalUserError
+! CHECK: }
+! CHECK: %[[VAL_14:.*]] = fir.load %[[VAL_2]]#1 : !fir.ref<!fir.class<!fir.ptr<!fir.array<*:none>>>>
+! CHECK: %[[VAL_15:.*]] = fir.box_elesize %[[VAL_14]] : (!fir.class<!fir.ptr<!fir.array<*:none>>>) -> i32
+! CHECK: %[[VAL_16:.*]] = arith.constant 8 : i32
+! CHECK: %[[VAL_17:.*]] = arith.muli %[[VAL_15]], %[[VAL_16]] : i32
+! CHECK: %[[VAL_18:.*]]:3 = hlfir.associate %[[VAL_17]] {adapt.valuebyref} : (i32) -> (!fir.ref<i32>, !fir.ref<i32>, i1)
+! CHECK: fir.call @_QPtakes_integer(%[[VAL_18]]#1) fastmath<contract> : (!fir.ref<i32>) -> ()
+! CHECK: hlfir.end_associate %[[VAL_18]]#1, %[[VAL_18]]#2 : !fir.ref<i32>, i1
+! CHECK: return
+! CHECK: }
+
+! CHECK-LABEL: func.func @_QPtest_present_1(
+! CHECK-SAME: %[[VAL_0:.*]]: !fir.class<!fir.array<*:none>> {fir.bindc_name = "x", fir.optional}) {
+! CHECK: %[[VAL_1:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_0]] dummy_scope %[[VAL_1]] {fortran_attrs = #fir.var_attrs<optional>, uniq_name = "_QFtest_present_1Ex"} : (!fir.class<!fir.array<*:none>>, !fir.dscope) -> (!fir.class<!fir.array<*:none>>, !fir.class<!fir.array<*:none>>)
+! CHECK: %[[VAL_3:.*]] = fir.is_present %[[VAL_2]]#1 : (!fir.class<!fir.array<*:none>>) -> i1
+! CHECK: %[[VAL_4:.*]] = fir.convert %[[VAL_3]] : (i1) -> !fir.logical<4>
+! CHECK: %[[VAL_5:.*]]:3 = hlfir.associate %[[VAL_4]] {adapt.valuebyref} : (!fir.logical<4>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>, i1)
+! CHECK: fir.call @_QPtakes_logical(%[[VAL_5]]#1) fastmath<contract> : (!fir.ref<!fir.logical<4>>) -> ()
+! CHECK: hlfir.end_associate %[[VAL_5]]#1, %[[VAL_5]]#2 : !fir.ref<!fir.logical<4>>, i1
+! CHECK: return
+! CHECK: }
+
+! CHECK-LABEL: func.func @_QPtest_present_2(
+! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref<!fir.class<!fir.ptr<!fir.array<*:none>>>> {fir.bindc_name = "x", fir.optional}) {
+! CHECK: %[[VAL_1:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_0]] dummy_scope %[[VAL_1]] {fortran_attrs = #fir.var_attrs<optional, pointer>, uniq_name = "_QFtest_present_2Ex"} : (!fir.ref<!fir.class<!fir.ptr<!fir.array<*:none>>>>, !fir.dscope) -> (!fir.ref<!fir.class<!fir.ptr<!fir.array<*:none>>>>, !fir.ref<!fir.class<!fir.ptr<!fir.array<*:none>>>>)
+! CHECK: %[[VAL_3:.*]] = fir.is_present %[[VAL_2]]#1 : (!fir.ref<!fir.class<!fir.ptr<!fir.array<*:none>>>>) -> i1
+! CHECK: %[[VAL_4:.*]] = fir.convert %[[VAL_3]] : (i1) -> !fir.logical<4>
+! CHECK: %[[VAL_5:.*]]:3 = hlfir.associate %[[VAL_4]] {adapt.valuebyref} : (!fir.logical<4>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>, i1)
+! CHECK: fir.call @_QPtakes_logical(%[[VAL_5]]#1) fastmath<contract> : (!fir.ref<!fir.logical<4>>) -> ()
+! CHECK: hlfir.end_associate %[[VAL_5]]#1, %[[VAL_5]]#2 : !fir.ref<!fir.logical<4>>, i1
+! CHECK: return
+! CHECK: }
+
+! CHECK-LABEL: func.func @_QPtest_is_contiguous_1(
+! CHECK-SAME: %[[VAL_0:.*]]: !fir.class<!fir.array<*:none>> {fir.bindc_name = "x"}) {
+! CHECK: %[[VAL_1:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_0]] dummy_scope %[[VAL_1]] {uniq_name = "_QFtest_is_contiguous_1Ex"} : (!fir.class<!fir.array<*:none>>, !fir.dscope) -> (!fir.class<!fir.array<*:none>>, !fir.class<!fir.array<*:none>>)
+! CHECK: %[[VAL_3:.*]] = fir.convert %[[VAL_2]]#1 : (!fir.class<!fir.array<*:none>>) -> !fir.box<none>
+! CHECK: %[[VAL_4:.*]] = fir.call @_FortranAIsContiguous(%[[VAL_3]]) fastmath<contract> : (!fir.box<none>) -> i1
+! CHECK: %[[VAL_5:.*]] = fir.convert %[[VAL_4]] : (i1) -> !fir.logical<4>
+! CHECK: %[[VAL_6:.*]]:3 = hlfir.associate %[[VAL_5]] {adapt.valuebyref} : (!fir.logical<4>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>, i1)
+! CHECK: fir.call @_QPtakes_logical(%[[VAL_6]]#1) fastmath<contract> : (!fir.ref<!fir.logical<4>>) -> ()
+! CHECK: hlfir.end_associate %[[VAL_6]]#1, %[[VAL_6]]#2 : !fir.ref<!fir.logical<4>>, i1
+! CHECK: return
+! CHECK: }
+
+! CHECK-LABEL: func.func @_QPtest_is_contiguous_2(
+! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref<!fir.class<!fir.ptr<!fir.array<*:none>>>> {fir.bindc_name = "x"}) {
+! CHECK: %[[VAL_1:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_0]] dummy_scope %[[VAL_1]] {fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QFtest_is_contiguous_2Ex"} : (!fir.ref<!fir.class<!fir.ptr<!fir.array<*:none>>>>, !fir.dscope) -> (!fir.ref<!fir.class<!fir.ptr<!fir.array<*:none>>>>, !fir.ref<!fir.class<!fir.ptr<!fir.array<*:none>>>>)
+! CHECK: %[[VAL_3:.*]] = fir.load %[[VAL_2]]#1 : !fir.ref<!fir.class<!fir.ptr<!fir.array<*:none>>>>
+! CHECK: %[[VAL_4:.*]] = fir.convert %[[VAL_3]] : (!fir.class<!fir.ptr<!fir.array<*:none>>>) -> !fir.box<none>
+! CHECK: %[[VAL_5:.*]] = fir.call @_FortranAIsContiguous(%[[VAL_4]]) fastmath<contract> : (!fir.box<none>) -> i1
+! CHECK: %[[VAL_6:.*]] = fir.convert %[[VAL_5]] : (i1) -> !fir.logical<4>
+! CHECK: %[[VAL_7:.*]]:3 = hlfir.associate %[[VAL_6]] {adapt.valuebyref} : (!fir.logical<4>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>, i1)
+! CHECK: fir.call @_QPtakes_logical(%[[VAL_7]]#1) fastmath<contract> : (!fir.ref<!fir.logical<4>>) -> ()
+! CHECK: hlfir.end_associate %[[VAL_7]]#1, %[[VAL_7]]#2 : !fir.ref<!fir.logical<4>>, i1
+! CHECK: return
+! CHECK: }
+
+! CHECK-LABEL: func.func @_QPtest_same_type_as_1(
+! CHECK-SAME: %[[VAL_0:.*]]: !fir.class<!fir.array<*:none>> {fir.bindc_name = "x"},
+! CHECK-SAME: %[[VAL_1:.*]]: !fir.class<!fir.array<*:none>> {fir.bindc_name = "y"}) {
+! CHECK: %[[VAL_2:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_0]] dummy_scope %[[VAL_2]] {uniq_name = "_QFtest_same_type_as_1Ex"} : (!fir.class<!fir.array<*:none>>, !fir.dscope) -> (!fir.class<!fir.array<*:none>>, !fir.class<!fir.array<*:none>>)
+! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_1]] dummy_scope %[[VAL_2]] {uniq_name = "_QFtest_same_type_as_1Ey"} : (!fir.class<!fir.array<*:none>>, !fir.dscope) -> (!fir.class<!fir.array<*:none>>, !fir.class<!fir.array<*:none>>)
+! CHECK: %[[VAL_5:.*]] = fir.convert %[[VAL_3]]#1 : (!fir.class<!fir.array<*:none>>) -> !fir.box<none>
+! CHECK: %[[VAL_6:.*]] = fir.convert %[[VAL_4]]#1 : (!fir.class<!fir.array<*:none>>) -> !fir.box<none>
+! CHECK: %[[VAL_7:.*]] = fir.call @_FortranASameTypeAs(%[[VAL_5]], %[[VAL_6]]) fastmath<contract> : (!fir.box<none>, !fir.box<none>) -> i1
+! CHECK: %[[VAL_8:.*]] = fir.convert %[[VAL_7]] : (i1) -> !fir.logical<4>
+! CHECK: %[[VAL_9:.*]]:3 = hlfir.associate %[[VAL_8]] {adapt.valuebyref} : (!fir.logical<4>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>, i1)
+! CHECK: fir.call @_QPtakes_logical(%[[VAL_9]]#1) fastmath<contract> : (!fir.ref<!fir.logical<4>>) -> ()
+! CHECK: hlfir.end_associate %[[VAL_9]]#1, %[[VAL_9]]#2 : !fir.ref<!fir.logical<4>>, i1
+! CHECK: return
+! CHECK: }
+
+! CHECK-LABEL: func.func @_QPtest_same_type_as_2(
+! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref<!fir.class<!fir.ptr<!fir.array<*:none>>>> {fir.bindc_name = "x"},
+! CHECK-SAME: %[[VAL_1:.*]]: !fir.ref<!fir.class<!fir.ptr<!fir.array<*:none>>>> {fir.bindc_name = "y"}) {
+! CHECK: %[[VAL_2:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_0]] dummy_scope %[[VAL_2]] {fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QFtest_same_type_as_2Ex"} : (!fir.ref<!fir.class<!fir.ptr<!fir.array<*:none>>>>, !fir.dscope) -> (!fir.ref<!fir.class<!fir.ptr<!fir.array<*:none>>>>, !fir.ref<!fir.class<!fir.ptr<!fir.array<*:none>>>>)
+! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_1]] dummy_scope %[[VAL_2]] {fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QFtest_same_type_as_2Ey"} : (!fir.ref<!fir.class<!fir.ptr<!fir.array<*:none>>>>, !fir.dscope) -> (!fir.ref<!fir.class<!fir.ptr<!fir.array<*:none>>>>, !fir.ref<!fir.class<!fir.ptr<!fir.array<*:none>>>>)
+! CHECK: %[[VAL_5:.*]] = fir.load %[[VAL_3]]#1 : !fir.ref<!fir.class<!fir.ptr<!fir.array<*:none>>>>
+! CHECK: %[[VAL_6:.*]] = fir.load %[[VAL_4]]#1 : !fir.ref<!fir.class<!fir.ptr<!fir.array<*:none>>>>
+! CHECK: %[[VAL_7:.*]] = fir.convert %[[VAL_5]] : (!fir.class<!fir.ptr<!fir.array<*:none>>>) -> !fir.box<none>
+! CHECK: %[[VAL_8:.*]] = fir.convert %[[VAL_6]] : (!fir.class<!fir.ptr<!fir.array<*:none>>>) -> !fir.box<none>
+! CHECK: %[[VAL_9:.*]] = fir.call @_FortranASameTypeAs(%[[VAL_7]], %[[VAL_8]]) fastmath<contract> : (!fir.box<none>, !fir.box<none>) -> i1
+! CHECK: %[[VAL_10:.*]] = fir.convert %[[VAL_9]] : (i1) -> !fir.logical<4>
+! CHECK: %[[VAL_11:.*]]:3 = hlfir.associate %[[VAL_10]] {adapt.valuebyref} : (!fir.logical<4>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>, i1)
+! CHECK: fir.call @_QPtakes_logical(%[[VAL_11]]#1) fastmath<contract> : (!fir.ref<!fir.logical<4>>) -> ()
+! CHECK: hlfir.end_associate %[[VAL_11]]#1, %[[VAL_11]]#2 : !fir.ref<!fir.logical<4>>, i1
+! CHECK: return
+! CHECK: }
+
+! CHECK-LABEL: func.func @_QPtest_extends_type_of_1(
+! CHECK-SAME: %[[VAL_0:.*]]: !fir.class<!fir.array<*:none>> {fir.bindc_name = "x"},
+! CHECK-SAME: %[[VAL_1:.*]]: !fir.class<!fir.array<*:none>> {fir.bindc_name = "y"}) {
+! CHECK: %[[VAL_2:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_0]] dummy_scope %[[VAL_2]] {uniq_name = "_QFtest_extends_type_of_1Ex"} : (!fir.class<!fir.array<*:none>>, !fir.dscope) -> (!fir.class<!fir.array<*:none>>, !fir.class<!fir.array<*:none>>)
+! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_1]] dummy_scope %[[VAL_2]] {uniq_name = "_QFtest_extends_type_of_1Ey"} : (!fir.class<!fir.array<*:none>>, !fir.dscope) -> (!fir.class<!fir.array<*:none>>, !fir.class<!fir.array<*:none>>)
+! CHECK: %[[VAL_5:.*]] = fir.convert %[[VAL_3]]#1 : (!fir.class<!fir.array<*:none>>) -> !fir.box<none>
+! CHECK: %[[VAL_6:.*]] = fir.convert %[[VAL_4]]#1 : (!fir.class<!fir.array<*:none>>) -> !fir.box<none>
+! CHECK: %[[VAL_7:.*]] = fir.call @_FortranAExtendsTypeOf(%[[VAL_5]], %[[VAL_6]]) fastmath<contract> : (!fir.box<none>, !fir.box<none>) -> i1
+! CHECK: %[[VAL_8:.*]] = fir.convert %[[VAL_7]] : (i1) -> !fir.logical<4>
+! CHECK: %[[VAL_9:.*]]:3 = hlfir.associate %[[VAL_8]] {adapt.valuebyref} : (!fir.logical<4>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>, i1)
+! CHECK: fir.call @_QPtakes_logical(%[[VAL_9]]#1) fastmath<contract> : (!fir.ref<!fir.logical<4>>) -> ()
+! CHECK: hlfir.end_associate %[[VAL_9]]#1, %[[VAL_9]]#2 : !fir.ref<!fir.logical<4>>, i1
+! CHECK: return
+! CHECK: }
+
+! CHECK-LABEL: func.func @_QPtest_extends_type_of_2(
+! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref<!fir.class<!fir.ptr<!fir.array<*:none>>>> {fir.bindc_name = "x"},
+! CHECK-SAME: %[[VAL_1:.*]]: !fir.ref<!fir.class<!fir.ptr<!fir.array<*:none>>>> {fir.bindc_name = "y"}) {
+! CHECK: %[[VAL_2:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_0]] dummy_scope %[[VAL_2]] {fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QFtest_extends_type_of_2Ex"} : (!fir.ref<!fir.class<!fir.ptr<!fir.array<*:none>>>>, !fir.dscope) -> (!fir.ref<!fir.class<!fir.ptr<!fir.array<*:none>>>>, !fir.ref<!fir.class<!fir.ptr<!fir.array<*:none>>>>)
+! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_1]] dummy_scope %[[VAL_2]] {fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QFtest_extends_type_of_2Ey"} : (!fir.ref<!fir.class<!fir.ptr<!fir.array<*:none>>>>, !fir.dscope) -> (!fir.ref<!fir.class<!fir.ptr<!fir.array<*:none>>>>, !fir.ref<!fir.class<!fir.ptr<!fir.array<*:none>>>>)
+! CHECK: %[[VAL_5:.*]] = fir.load %[[VAL_3]]#1 : !fir.ref<!fir.class<!fir.ptr<!fir.array<*:none>>>>
+! CHECK: %[[VAL_6:.*]] = fir.load %[[VAL_4]]#1 : !fir.ref<!fir.class<!fir.ptr<!fir.array<*:none>>>>
+! CHECK: %[[VAL_7:.*]] = fir.convert %[[VAL_5]] : (!fir.class<!fir.ptr<!fir.array<*:none>>>) -> !fir.box<none>
+! CHECK: %[[VAL_8:.*]] = fir.convert %[[VAL_6]] : (!fir.class<!fir.ptr<!fir.array<*:none>>>) -> !fir.box<none>
+! CHECK: %[[VAL_9:.*]] = fir.call @_FortranAExtendsTypeOf(%[[VAL_7]], %[[VAL_8]]) fastmath<contract> : (!fir.box<none>, !fir.box<none>) -> i1
+! CHECK: %[[VAL_10:.*]] = fir.convert %[[VAL_9]] : (i1) -> !fir.logical<4>
+! CHECK: %[[VAL_11:.*]]:3 = hlfir.associate %[[VAL_10]] {adapt.valuebyref} : (!fir.logical<4>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>, i1)
+! CHECK: fir.call @_QPtakes_logical(%[[VAL_11]]#1) fastmath<contract> : (!fir.ref<!fir.logical<4>>) -> ()
+! CHECK: hlfir.end_associate %[[VAL_11]]#1, %[[VAL_11]]#2 : !fir.ref<!fir.logical<4>>, i1
+! CHECK: return
+! CHECK: }
+
+! CHECK-LABEL: func.func @_QPc_loc_1(
+! CHECK-SAME: %[[VAL_0:.*]]: !fir.box<!fir.array<*:f32>> {fir.bindc_name = "x", fir.target}) {
+! CHECK: %[[VAL_1:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_0]] dummy_scope %[[VAL_1]] {fortran_attrs = #fir.var_attrs<target>, uniq_name = "_QFc_loc_1Ex"} : (!fir.box<!fir.array<*:f32>>, !fir.dscope) -> (!fir.box<!fir.array<*:f32>>, !fir.box<!fir.array<*:f32>>)
+! CHECK: %[[VAL_3:.*]] = fir.alloca !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>
+! CHECK: %[[VAL_4:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>
+! CHECK: %[[VAL_5:.*]] = fir.coordinate_of %[[VAL_3]], %[[VAL_4]] : (!fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>, !fir.field) -> !fir.ref<i64>
+! CHECK: %[[VAL_6:.*]] = fir.box_addr %[[VAL_2]]#1 : (!fir.box<!fir.array<*:f32>>) -> !fir.ref<!fir.array<*:f32>>
+! CHECK: %[[VAL_7:.*]] = fir.convert %[[VAL_6]] : (!fir.ref<!fir.array<*:f32>>) -> i64
+! CHECK: fir.store %[[VAL_7]] to %[[VAL_5]] : !fir.ref<i64>
+! CHECK: %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_3]] {uniq_name = ".tmp.intrinsic_result"} : (!fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>) -> (!fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>, !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>)
+! CHECK: %[[VAL_9:.*]] = arith.constant false
+! CHECK: %[[VAL_10:.*]] = hlfir.as_expr %[[VAL_8]]#0 move %[[VAL_9]] : (!fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>, i1) -> !hlfir.expr<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>
+! CHECK: %[[VAL_11:.*]]:3 = hlfir.associate %[[VAL_10]] {adapt.valuebyref} : (!hlfir.expr<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>) -> (!fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>, !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>, i1)
+! CHECK: fir.call @_QPtakes_cloc(%[[VAL_11]]#1) fastmath<contract> : (!fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>) -> ()
+! CHECK: hlfir.end_associate %[[VAL_11]]#1, %[[VAL_11]]#2 : !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>, i1
+! CHECK: hlfir.destroy %[[VAL_10]] : !hlfir.expr<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>
+! CHECK: return
+! CHECK: }
+
+! CHECK-LABEL: func.func @_QPc_loc_2(
+! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref<!fir.box<!fir.ptr<!fir.array<*:f32>>>> {fir.bindc_name = "x"}) {
+! CHECK: %[[VAL_1:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_0]] dummy_scope %[[VAL_1]] {fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QFc_loc_2Ex"} : (!fir.ref<!fir.box<!fir.ptr<!fir.array<*:f32>>>>, !fir.dscope) -> (!fir.ref<!fir.box<!fir.ptr<!fir.array<*:f32>>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<*:f32>>>>)
+! CHECK: %[[VAL_3:.*]] = fir.load %[[VAL_2]]#1 : !fir.ref<!fir.box<!fir.ptr<!fir.array<*:f32>>>>
+! CHECK: %[[VAL_4:.*]] = fir.alloca !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>
+! CHECK: %[[VAL_5:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>
+! CHECK: %[[VAL_6:.*]] = fir.coordinate_of %[[VAL_4]], %[[VAL_5]] : (!fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>, !fir.field) -> !fir.ref<i64>
+! CHECK: %[[VAL_7:.*]] = fir.box_addr %[[VAL_3]] : (!fir.box<!fir.ptr<!fir.array<*:f32>>>) -> !fir.ptr<!fir.array<*:f32>>
+! CHECK: %[[VAL_8:.*]] = fir.convert %[[VAL_7]] : (!fir.ptr<!fir.array<*:f32>>) -> i64
+! CHECK: fir.store %[[VAL_8]] to %[[VAL_6]] : !fir.ref<i64>
+! CHECK: %[[VAL_9:.*]]:2 = hlfir.declare %[[VAL_4]] {uniq_name = ".tmp.intrinsic_result"} : (!fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>) -> (!fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>, !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>)
+! CHECK: %[[VAL_10:.*]] = arith.constant false
+! CHECK: %[[VAL_11:.*]] = hlfir.as_expr %[[VAL_9]]#0 move %[[VAL_10]] : (!fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>, i1) -> !hlfir.expr<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>
+! CHECK: %[[VAL_12:.*]]:3 = hlfir.associate %[[VAL_11]] {adapt.valuebyref} : (!hlfir.expr<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>) -> (!fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>, !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>, i1)
+! CHECK: fir.call @_QPtakes_cloc(%[[VAL_12]]#1) fastmath<contract> : (!fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>) -> ()
+! CHECK: hlfir.end_associate %[[VAL_12]]#1, %[[VAL_12]]#2 : !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>, i1
+! CHECK: hlfir.destroy %[[VAL_11]] : !hlfir.expr<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>
+! CHECK: return
+! CHECK: }
diff --git a/flang/test/Lower/Intrinsics/getcwd-function.f90 b/flang/test/Lower/Intrinsics/getcwd-function.f90
new file mode 100644
index 0000000..50b6472
--- /dev/null
+++ b/flang/test/Lower/Intrinsics/getcwd-function.f90
@@ -0,0 +1,23 @@
+! Test GETCWD with dynamically optional arguments.
+! RUN: bbc -emit-fir %s -o - | FileCheck %s
+
+! CHECK-LABEL: func.func @_QPtest(
+! CHECK-SAME: %[[cwdArg:.*]]: !fir.boxchar<1> {fir.bindc_name = "cwd"}) -> i32 {
+integer function test(cwd)
+ CHARACTER(len=255) :: cwd
+ test = getcwd(cwd)
+ ! CHECK-NEXT: %[[c8:.*]] = arith.constant 8 : i32
+ ! CHECK-NEXT: %[[c255:.*]] = arith.constant 255 : index
+ ! CHECK-NEXT: %[[DSCOPE:.*]] = fir.dummy_scope : !fir.dscope
+ ! CHECK-NEXT: %[[cwdUnbox:.*]]:2 = fir.unboxchar %[[cwdArg]] : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+ ! CHECK-NEXT: %[[cwdCast:.*]] = fir.convert %[[cwdUnbox]]#0 : (!fir.ref<!fir.char<1,?>>) -> !fir.ref<!fir.char<1,255>>
+ ! CHECK-NEXT: %[[cwdDeclare:.*]] = fir.declare %[[cwdCast]] typeparams %[[c255]] dummy_scope %[[DSCOPE]] {uniq_name = "_QFtestEcwd"} : (!fir.ref<!fir.char<1,255>>, index, !fir.dscope) -> !fir.ref<!fir.char<1,255>>
+ ! CHECK-NEXT: %[[test:.*]] = fir.alloca i32 {bindc_name = "test", uniq_name = "_QFtestEtest"}
+ ! CHECK-NEXT: %[[testAddr:.*]] = fir.declare %[[test]] {uniq_name = "_QFtestEtest"} : (!fir.ref<i32>) -> !fir.ref<i32>
+ ! CHECK-NEXT: %[[cwdBox:.*]] = fir.embox %[[cwdDeclare]] : (!fir.ref<!fir.char<1,255>>) -> !fir.box<!fir.char<1,255>>
+ ! CHECK: %[[cwd:.*]] = fir.convert %[[cwdBox]] : (!fir.box<!fir.char<1,255>>) -> !fir.box<none>
+ ! CHECK: %[[statusValue:.*]] = fir.call @_FortranAGetCwd(%[[cwd]], %[[VAL_9:.*]], %[[c8]]) fastmath<contract> : (!fir.box<none>, !fir.ref<i8>, i32) -> i32
+ ! CHECK-NEXT: fir.store %[[statusValue]] to %[[testAddr]] : !fir.ref<i32>
+ ! CHECK-NEXT: %[[returnValue:.*]] = fir.load %[[testAddr]] : !fir.ref<i32>
+ ! CHECK-NEXT: return %[[returnValue]] : i32
+end function
diff --git a/flang/test/Lower/Intrinsics/getcwd-optional.f90 b/flang/test/Lower/Intrinsics/getcwd-optional.f90
new file mode 100644
index 0000000..3e2a221
--- /dev/null
+++ b/flang/test/Lower/Intrinsics/getcwd-optional.f90
@@ -0,0 +1,29 @@
+! Test GETCWD with dynamically optional arguments.
+! RUN: bbc -emit-fir %s -o - | FileCheck %s
+
+
+! CHECK-LABEL: func.func @_QPtest(
+! CHECK-SAME: %[[cwdArg:.*]]: !fir.boxchar<1> {fir.bindc_name = "cwd"},
+! CHECK-SAME: %[[statusArg:.*]]: !fir.ref<i32> {fir.bindc_name = "status", fir.optional}) {
+subroutine test(cwd, status)
+ CHARACTER(len=255) :: cwd
+ INTEGER, OPTIONAL :: status
+ call getcwd(cwd, status)
+ ! CHECK-NEXT: %[[c0:.*]] = arith.constant 0 : i64
+ ! CHECK-NEXT: %[[c11:.*]] = arith.constant 11 : i32
+ ! CHECK-NEXT: %[[c255:.*]] = arith.constant 255 : index
+ ! CHECK-NEXT: %[[DSCOPE:.*]] = fir.dummy_scope : !fir.dscope
+ ! CHECK-NEXT: %[[cwdUnbox:.*]]:2 = fir.unboxchar %[[cwdArg]] : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+ ! CHECK-NEXT: %[[cwdCast:.*]] = fir.convert %[[cwdUnbox]]#0 : (!fir.ref<!fir.char<1,?>>) -> !fir.ref<!fir.char<1,255>>
+ ! CHECK-NEXT: %[[cwdDeclare:.*]] = fir.declare %[[cwdCast]] typeparams %[[c255]] dummy_scope %[[DSCOPE]] {uniq_name = "_QFtestEcwd"} : (!fir.ref<!fir.char<1,255>>, index, !fir.dscope) -> !fir.ref<!fir.char<1,255>>
+ ! CHECK-NEXT: %[[statusAddr:.*]] = fir.declare %[[statusArg]] dummy_scope %[[DSCOPE]] {fortran_attrs = #fir.var_attrs<optional>, uniq_name = "_QFtestEstatus"} : (!fir.ref<i32>, !fir.dscope) -> !fir.ref<i32>
+ ! CHECK-NEXT: %[[cwdBox:.*]] = fir.embox %[[cwdDeclare]] : (!fir.ref<!fir.char<1,255>>) -> !fir.box<!fir.char<1,255>>
+ ! CHECK: %[[cwd:.*]] = fir.convert %[[cwdBox]] : (!fir.box<!fir.char<1,255>>) -> !fir.box<none>
+ ! CHECK: %[[statusValue:.*]] = fir.call @_FortranAGetCwd(%[[cwd]], %[[VAL_8:.*]], %[[c11]]) fastmath<contract> : (!fir.box<none>, !fir.ref<i8>, i32) -> i32
+ ! CHECK-NEXT: %[[statusCast:.*]] = fir.convert %[[statusAddr]] : (!fir.ref<i32>) -> i64
+ ! CHECK-NEXT: %[[isPresent:.*]] = arith.cmpi ne, %[[statusCast]], %[[c0]] : i64
+ ! CHECK-NEXT: fir.if %[[isPresent]] {
+ ! CHECK-NEXT: fir.store %[[statusValue]] to %[[statusAddr]] : !fir.ref<i32>
+ ! CHECK-NEXT: }
+ ! CHECK-NEXT: return
+end subroutine
diff --git a/flang/test/Lower/Intrinsics/getcwd.f90 b/flang/test/Lower/Intrinsics/getcwd.f90
new file mode 100644
index 0000000..fe20785
--- /dev/null
+++ b/flang/test/Lower/Intrinsics/getcwd.f90
@@ -0,0 +1,44 @@
+! RUN: bbc -emit-fir %s -o - | FileCheck %s
+
+! CHECK-LABEL: func.func @_QPcwd_only(
+! CHECK-SAME: %[[cwdArg:.*]]: !fir.boxchar<1> {fir.bindc_name = "cwd"}) {
+subroutine cwd_only(cwd)
+ CHARACTER(len=255) :: cwd
+ call getcwd(cwd)
+ ! CHECK-NEXT: %[[c7:.*]] = arith.constant 7 : i32
+ ! CHECK-NEXT: %[[c255:.*]] = arith.constant 255 : index
+ ! CHECK-NEXT: %[[DSCOPE:.*]] = fir.dummy_scope : !fir.dscope
+ ! CHECK-NEXT: %[[cwdUnbox:.*]]:2 = fir.unboxchar %[[cwdArg]] : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+ ! CHECK-NEXT: %[[cwdCast:.*]] = fir.convert %[[cwdUnbox]]#0 : (!fir.ref<!fir.char<1,?>>) -> !fir.ref<!fir.char<1,255>>
+ ! CHECK-NEXT: %[[cwdDeclare:.*]] = fir.declare %[[cwdCast]] typeparams %[[c255]] dummy_scope %[[DSCOPE]] {uniq_name = "_QFcwd_onlyEcwd"} : (!fir.ref<!fir.char<1,255>>, index, !fir.dscope) -> !fir.ref<!fir.char<1,255>>
+ ! CHECK-NEXT: %[[cwdBox:.*]] = fir.embox %[[cwdDeclare]] : (!fir.ref<!fir.char<1,255>>) -> !fir.box<!fir.char<1,255>>
+ ! CHECK: %[[cwd:.*]] = fir.convert %[[cwdBox]] : (!fir.box<!fir.char<1,255>>) -> !fir.box<none>
+ ! CHECK: %[[statusValue:.*]] = fir.call @_FortranAGetCwd(%[[cwd]], %[[VAL_7:.*]], %[[c7]]) fastmath<contract> : (!fir.box<none>, !fir.ref<i8>, i32) -> i32
+ ! CHECK-NEXT: return
+end subroutine cwd_only
+
+! CHECK-LABEL: func.func @_QPall_arguments(
+! CHECK-SAME: %[[cwdArg:.*]]: !fir.boxchar<1> {fir.bindc_name = "cwd"},
+! CHECK-SAME: %[[statusArg:.*]]: !fir.ref<i32> {fir.bindc_name = "status"}) {
+subroutine all_arguments(cwd, status)
+ CHARACTER(len=255) :: cwd
+ INTEGER :: status
+ call getcwd(cwd, status)
+ ! CHECK-NEXT: %[[c0:.*]] = arith.constant 0 : i64
+ ! CHECK-NEXT: %[[c26:.*]] = arith.constant 26 : i32
+ ! CHECK-NEXT: %[[c255:.*]] = arith.constant 255 : index
+ ! CHECK-NEXT: %[[DSCOPE:.*]] = fir.dummy_scope : !fir.dscope
+ ! CHECK-NEXT: %[[cwdUnbox:.*]]:2 = fir.unboxchar %[[cwdArg]] : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+ ! CHECK-NEXT: %[[cwdCast:.*]] = fir.convert %[[cwdUnbox]]#0 : (!fir.ref<!fir.char<1,?>>) -> !fir.ref<!fir.char<1,255>>
+ ! CHECK-NEXT: %[[cwdDeclare:.*]] = fir.declare %[[cwdCast]] typeparams %[[c255]] dummy_scope %[[DSCOPE]] {uniq_name = "_QFall_argumentsEcwd"} : (!fir.ref<!fir.char<1,255>>, index, !fir.dscope) -> !fir.ref<!fir.char<1,255>>
+ ! CHECK-NEXT: %[[statusAddr:.*]] = fir.declare %[[statusArg]] dummy_scope %0 {uniq_name = "_QFall_argumentsEstatus"} : (!fir.ref<i32>, !fir.dscope) -> !fir.ref<i32>
+ ! CHECK-NEXT: %[[cwdBox:.*]] = fir.embox %[[cwdDeclare]] : (!fir.ref<!fir.char<1,255>>) -> !fir.box<!fir.char<1,255>>
+ ! CHECK: %[[cwd:.*]] = fir.convert %[[cwdBox]] : (!fir.box<!fir.char<1,255>>) -> !fir.box<none>
+ ! CHECK: %[[statusValue:.*]] = fir.call @_FortranAGetCwd(%[[cwd]], %[[VAL_8:.*]], %[[c26]]) fastmath<contract> : (!fir.box<none>, !fir.ref<i8>, i32) -> i32
+ ! CHECK-NEXT: %[[statusCast:.*]] = fir.convert %[[statusAddr]] : (!fir.ref<i32>) -> i64
+ ! CHECK-NEXT: %[[isPresent:.*]] = arith.cmpi ne, %[[statusCast]], %[[c0]] : i64
+ ! CHECK-NEXT: fir.if %[[isPresent]] {
+ ! CHECK-NEXT: fir.store %[[statusValue]] to %[[statusAddr]] : !fir.ref<i32>
+ ! CHECK-NEXT: }
+ ! CHECK-NEXT: return
+end subroutine all_arguments \ No newline at end of file
diff --git a/flang/test/Lower/OpenMP/critical.f90 b/flang/test/Lower/OpenMP/critical.f90
index d62c58b..c52ae688 100644
--- a/flang/test/Lower/OpenMP/critical.f90
+++ b/flang/test/Lower/OpenMP/critical.f90
@@ -51,3 +51,27 @@ subroutine predetermined_privatization()
end do
!$omp end parallel do
end
+
+! https://github.com/llvm/llvm-project/issues/75767
+!CHECK-LABEL: func @_QPparallel_critical_privatization(
+subroutine parallel_critical_privatization()
+ integer :: i
+
+ !CHECK: %[[I:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFparallel_critical_privatizationEi"}
+ !CHECK: %[[I_DECL:.*]]:2 = hlfir.declare %[[I]] {uniq_name = "_QFparallel_critical_privatizationEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+ !CHECK: omp.parallel {
+ !CHECK: %[[PRIV_I:.*]] = fir.alloca i32 {bindc_name = "i", pinned, uniq_name = "_QFparallel_critical_privatizationEi"}
+ !CHECK: %[[PRIV_I_DECL:.*]]:2 = hlfir.declare %[[PRIV_I]] {uniq_name = "_QFparallel_critical_privatizationEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+ !CHECK: %[[TEMP:.*]] = fir.load %[[I_DECL]]#0 : !fir.ref<i32>
+ !CHECK: hlfir.assign %[[TEMP]] to %[[PRIV_I_DECL]]#0 temporary_lhs : i32, !fir.ref<i32>
+ !$omp parallel default(firstprivate)
+ !CHECK: omp.critical {
+ !$omp critical
+ !CHECK: %[[C200:.*]] = arith.constant 200 : i32
+ !CHECK: hlfir.assign %[[C200]] to %[[PRIV_I_DECL]]#0 : i32, !fir.ref<i32>
+ i = 200
+ !CHECK: }
+ !$omp end critical
+ !CHECK: }
+ !$omp end parallel
+end subroutine
diff --git a/flang/test/Lower/OpenMP/map-component-ref.f90 b/flang/test/Lower/OpenMP/map-component-ref.f90
index 2c58266..21b56ab 100644
--- a/flang/test/Lower/OpenMP/map-component-ref.f90
+++ b/flang/test/Lower/OpenMP/map-component-ref.f90
@@ -1,21 +1,22 @@
! RUN: %flang_fc1 -emit-hlfir -fopenmp %s -o - | FileCheck %s
! RUN: bbc -fopenmp -emit-hlfir %s -o - | FileCheck %s
-! CHECK: %[[V0:[0-9]+]] = fir.alloca !fir.type<_QFfooTt0{a0:i32,a1:i32}> {bindc_name = "a", uniq_name = "_QFfooEa"}
-! CHECK: %[[V1:[0-9]+]]:2 = hlfir.declare %[[V0]] {uniq_name = "_QFfooEa"} : (!fir.ref<!fir.type<_QFfooTt0{a0:i32,a1:i32}>>) -> (!fir.ref<!fir.type<_QFfooTt0{a0:i32,a1:i32}>>, !fir.ref<!fir.type<_QFfooTt0{a0:i32,a1:i32}>>)
-! CHECK: %[[V2:[0-9]+]] = hlfir.designate %[[V1]]#0{"a1"} : (!fir.ref<!fir.type<_QFfooTt0{a0:i32,a1:i32}>>) -> !fir.ref<i32>
+! CHECK-LABEL: func.func @_QPfoo1
+! CHECK: %[[V0:[0-9]+]] = fir.alloca !fir.type<_QFfoo1Tt0{a0:i32,a1:i32}> {bindc_name = "a", uniq_name = "_QFfoo1Ea"}
+! CHECK: %[[V1:[0-9]+]]:2 = hlfir.declare %[[V0]] {uniq_name = "_QFfoo1Ea"} : (!fir.ref<!fir.type<_QFfoo1Tt0{a0:i32,a1:i32}>>) -> (!fir.ref<!fir.type<_QFfoo1Tt0{a0:i32,a1:i32}>>, !fir.ref<!fir.type<_QFfoo1Tt0{a0:i32,a1:i32}>>)
+! CHECK: %[[V2:[0-9]+]] = hlfir.designate %[[V1]]#0{"a1"} : (!fir.ref<!fir.type<_QFfoo1Tt0{a0:i32,a1:i32}>>) -> !fir.ref<i32>
! CHECK: %[[V3:[0-9]+]] = omp.map.info var_ptr(%[[V2]] : !fir.ref<i32>, i32) map_clauses(tofrom) capture(ByRef) -> !fir.ref<i32> {name = "a%a1"}
-! CHECK: %[[V4:[0-9]+]] = omp.map.info var_ptr(%[[V1]]#1 : !fir.ref<!fir.type<_QFfooTt0{a0:i32,a1:i32}>>, !fir.type<_QFfooTt0{a0:i32,a1:i32}>) map_clauses(tofrom) capture(ByRef) members(%[[V3]] : [1] : !fir.ref<i32>) -> !fir.ref<!fir.type<_QFfooTt0{a0:i32,a1:i32}>> {name = "a", partial_map = true}
-! CHECK: omp.target map_entries(%[[V3]] -> %arg0, %[[V4]] -> %arg1 : !fir.ref<i32>, !fir.ref<!fir.type<_QFfooTt0{a0:i32,a1:i32}>>) {
-! CHECK: ^bb0(%arg0: !fir.ref<i32>, %arg1: !fir.ref<!fir.type<_QFfooTt0{a0:i32,a1:i32}>>):
-! CHECK: %[[V5:[0-9]+]]:2 = hlfir.declare %arg1 {uniq_name = "_QFfooEa"} : (!fir.ref<!fir.type<_QFfooTt0{a0:i32,a1:i32}>>) -> (!fir.ref<!fir.type<_QFfooTt0{a0:i32,a1:i32}>>, !fir.ref<!fir.type<_QFfooTt0{a0:i32,a1:i32}>>)
+! CHECK: %[[V4:[0-9]+]] = omp.map.info var_ptr(%[[V1]]#1 : !fir.ref<!fir.type<_QFfoo1Tt0{a0:i32,a1:i32}>>, !fir.type<_QFfoo1Tt0{a0:i32,a1:i32}>) map_clauses(tofrom) capture(ByRef) members(%[[V3]] : [1] : !fir.ref<i32>) -> !fir.ref<!fir.type<_QFfoo1Tt0{a0:i32,a1:i32}>> {name = "a", partial_map = true}
+! CHECK: omp.target map_entries(%[[V3]] -> %arg0, %[[V4]] -> %arg1 : !fir.ref<i32>, !fir.ref<!fir.type<_QFfoo1Tt0{a0:i32,a1:i32}>>) {
+! CHECK: ^bb0(%arg0: !fir.ref<i32>, %arg1: !fir.ref<!fir.type<_QFfoo1Tt0{a0:i32,a1:i32}>>):
+! CHECK: %[[V5:[0-9]+]]:2 = hlfir.declare %arg1 {uniq_name = "_QFfoo1Ea"} : (!fir.ref<!fir.type<_QFfoo1Tt0{a0:i32,a1:i32}>>) -> (!fir.ref<!fir.type<_QFfoo1Tt0{a0:i32,a1:i32}>>, !fir.ref<!fir.type<_QFfoo1Tt0{a0:i32,a1:i32}>>)
! CHECK: %c0_i32 = arith.constant 0 : i32
-! CHECK: %[[V6:[0-9]+]] = hlfir.designate %[[V5]]#0{"a1"} : (!fir.ref<!fir.type<_QFfooTt0{a0:i32,a1:i32}>>) -> !fir.ref<i32>
+! CHECK: %[[V6:[0-9]+]] = hlfir.designate %[[V5]]#0{"a1"} : (!fir.ref<!fir.type<_QFfoo1Tt0{a0:i32,a1:i32}>>) -> !fir.ref<i32>
! CHECK: hlfir.assign %c0_i32 to %[[V6]] : i32, !fir.ref<i32>
! CHECK: omp.terminator
! CHECK: }
-subroutine foo()
+subroutine foo1()
implicit none
type t0
@@ -29,3 +30,25 @@ subroutine foo()
!$omp end target
end
+
+! CHECK-LABEL: func.func @_QPfoo2
+! CHECK-DAG: omp.map.info var_ptr(%{{[0-9]+}} : {{.*}} map_clauses(to) capture(ByRef) bounds(%{{[0-9]+}}) -> {{.*}} {name = "t%b(1_8)%a(1)"}
+! CHECK-DAG: omp.map.info var_ptr(%{{[0-9]+}} : {{.*}} map_clauses(from) capture(ByRef) bounds(%{{[0-9]+}}) -> {{.*}} {name = "u%b(1_8)%a(1)"}
+subroutine foo2()
+ implicit none
+
+ type t0
+ integer :: a(10)
+ end type
+
+ type t1
+ type(t0) :: b(10)
+ end type
+
+ type(t1) :: t, u
+
+!$omp target map(to: t%b(1)%a(1)) map(from: u%b(1)%a(1))
+ t%b(1)%a(1) = u%b(1)%a(1)
+!$omp end target
+
+end
diff --git a/flang/test/Semantics/getcwd.f90 b/flang/test/Semantics/getcwd.f90
new file mode 100644
index 0000000..b6ff16e
--- /dev/null
+++ b/flang/test/Semantics/getcwd.f90
@@ -0,0 +1,35 @@
+! RUN: %python %S/test_errors.py %s %flang_fc1 -pedantic
+! Tests for the GETCWD intrinsics
+
+subroutine bad_kind_error(cwd, status)
+ CHARACTER(len=255) :: cwd
+ INTEGER(2) :: status
+ !ERROR: Actual argument for 'status=' has bad type or kind 'INTEGER(2)'
+ call getcwd(cwd, status)
+end subroutine bad_kind_error
+
+subroutine bad_args_error()
+ !ERROR: missing mandatory 'c=' argument
+ call getcwd()
+end subroutine bad_args_error
+
+subroutine bad_apply_form(cwd)
+ CHARACTER(len=255) :: cwd
+ INTEGER :: status
+ !Declaration of 'getcwd'
+ call getcwd(cwd, status)
+ !ERROR: Cannot call subroutine 'getcwd' like a function
+ status = getcwd(cwd)
+end subroutine bad_apply_form
+
+subroutine good_subroutine(cwd, status)
+ CHARACTER(len=255) :: cwd
+ INTEGER :: status
+ call getcwd(cwd, status)
+end subroutine good_subroutine
+
+subroutine good_function(cwd, status)
+ CHARACTER(len=255) :: cwd
+ INTEGER :: status
+ status = getcwd(cwd)
+end subroutine good_function \ No newline at end of file
diff --git a/libc/config/gpu/entrypoints.txt b/libc/config/gpu/entrypoints.txt
index b678350..2217a69 100644
--- a/libc/config/gpu/entrypoints.txt
+++ b/libc/config/gpu/entrypoints.txt
@@ -1,3 +1,13 @@
+if(LIBC_TARGET_ARCHITECTURE_IS_AMDGPU)
+ set(extra_entrypoints
+ # stdio.h entrypoints
+ libc.src.stdio.sprintf
+ libc.src.stdio.snprintf
+ libc.src.stdio.vsprintf
+ libc.src.stdio.vsnprintf
+ )
+endif()
+
set(TARGET_LIBC_ENTRYPOINTS
# assert.h entrypoints
libc.src.assert.__assert_fail
@@ -175,6 +185,7 @@ set(TARGET_LIBC_ENTRYPOINTS
libc.src.errno.errno
# stdio.h entrypoints
+ ${extra_entrypoints}
libc.src.stdio.feof
libc.src.stdio.ferror
libc.src.stdio.fseek
diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt
index c990a5b..33ecff8 100644
--- a/libc/config/linux/aarch64/entrypoints.txt
+++ b/libc/config/linux/aarch64/entrypoints.txt
@@ -505,6 +505,16 @@ if(LIBC_TYPES_HAS_FLOAT16)
libc.src.math.fabsf16
libc.src.math.fdimf16
libc.src.math.floorf16
+ libc.src.math.fmaxf16
+ libc.src.math.fmaximumf16
+ libc.src.math.fmaximum_magf16
+ libc.src.math.fmaximum_mag_numf16
+ libc.src.math.fmaximum_numf16
+ libc.src.math.fminf16
+ libc.src.math.fminimumf16
+ libc.src.math.fminimum_magf16
+ libc.src.math.fminimum_mag_numf16
+ libc.src.math.fminimum_numf16
libc.src.math.fromfpf16
libc.src.math.fromfpxf16
libc.src.math.llrintf16
@@ -512,6 +522,13 @@ if(LIBC_TYPES_HAS_FLOAT16)
libc.src.math.lrintf16
libc.src.math.lroundf16
libc.src.math.nearbyintf16
+ libc.src.math.nextafterf16
+ libc.src.math.nextdownf16
+ # Temporarily disable nexttowardf16 on aarch64 because the conversion
+ # between _Float16 and long double will crash clang-11. This is fixed in
+ # clang-12 and after: https://godbolt.org/z/8ceT9454c
+ # libc.src.math.nexttowardf16
+ libc.src.math.nextupf16
libc.src.math.rintf16
libc.src.math.roundf16
libc.src.math.roundevenf16
diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index 780ffb6..e3ca544 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -173,7 +173,6 @@ set(TARGET_LIBC_ENTRYPOINTS
libc.src.stdlib.atoll
libc.src.stdlib.bsearch
libc.src.stdlib.div
- libc.src.stdlib.quick_exit
libc.src.stdlib.labs
libc.src.stdlib.ldiv
libc.src.stdlib.llabs
@@ -538,6 +537,16 @@ if(LIBC_TYPES_HAS_FLOAT16)
libc.src.math.fabsf16
libc.src.math.fdimf16
libc.src.math.floorf16
+ libc.src.math.fmaxf16
+ libc.src.math.fmaximumf16
+ libc.src.math.fmaximum_magf16
+ libc.src.math.fmaximum_mag_numf16
+ libc.src.math.fmaximum_numf16
+ libc.src.math.fminf16
+ libc.src.math.fminimumf16
+ libc.src.math.fminimum_magf16
+ libc.src.math.fminimum_mag_numf16
+ libc.src.math.fminimum_numf16
libc.src.math.fromfpf16
libc.src.math.fromfpxf16
libc.src.math.llrintf16
@@ -545,6 +554,10 @@ if(LIBC_TYPES_HAS_FLOAT16)
libc.src.math.lrintf16
libc.src.math.lroundf16
libc.src.math.nearbyintf16
+ libc.src.math.nextafterf16
+ libc.src.math.nextdownf16
+ libc.src.math.nexttowardf16
+ libc.src.math.nextupf16
libc.src.math.rintf16
libc.src.math.roundf16
libc.src.math.roundevenf16
@@ -756,9 +769,11 @@ if(LLVM_LIBC_FULL_BUILD)
# stdlib.h entrypoints
libc.src.stdlib._Exit
libc.src.stdlib.abort
+ libc.src.stdlib.at_quick_exit
libc.src.stdlib.atexit
libc.src.stdlib.exit
libc.src.stdlib.getenv
+ libc.src.stdlib.quick_exit
# signal.h entrypoints
libc.src.signal.raise
diff --git a/libc/docs/c23.rst b/libc/docs/c23.rst
index 5bbb056..71232cc 100644
--- a/libc/docs/c23.rst
+++ b/libc/docs/c23.rst
@@ -59,15 +59,17 @@ Additions:
* ufromfp* |check|
* fromfpx* |check|
* ufromfpx* |check|
- * nextup*
- * nextdown*
+ * nextup* |check|
+ * nextdown* |check|
* canonicalize* |check|
- * fmaximum*
- * fminimum*
- * fmaximum_mag*
- * fminimum_mag*
- * fmaximum_mag_num*
- * fminimum_mag_num*
+ * fmaximum* |check|
+ * fminimum* |check|
+ * fmaximum_mag* |check|
+ * fminimum_mag* |check|
+ * fmaximum_num* |check|
+ * fminimum_num* |check|
+ * fmaximum_mag_num* |check|
+ * fminimum_mag_num* |check|
* fadd*
* fsub*
* fmul*
diff --git a/libc/docs/math/index.rst b/libc/docs/math/index.rst
index fd75374..b9507f0 100644
--- a/libc/docs/math/index.rst
+++ b/libc/docs/math/index.rst
@@ -136,25 +136,25 @@ Basic Operations
+------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
| floor | |check| | |check| | |check| | |check| | |check| | 7.12.9.2 | F.10.6.2 |
+------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
-| fmax | |check| | |check| | |check| | | |check| | 7.12.12.2 | F.10.9.2 |
+| fmax | |check| | |check| | |check| | |check| | |check| | 7.12.12.2 | F.10.9.2 |
+------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
-| fmaximum | |check| | |check| | |check| | | |check| | 7.12.12.4 | F.10.9.4 |
+| fmaximum | |check| | |check| | |check| | |check| | |check| | 7.12.12.4 | F.10.9.4 |
+------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
-| fmaximum_mag | |check| | |check| | |check| | | |check| | 7.12.12.6 | F.10.9.4 |
+| fmaximum_mag | |check| | |check| | |check| | |check| | |check| | 7.12.12.6 | F.10.9.4 |
+------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
-| fmaximum_mag_num | |check| | |check| | |check| | | |check| | 7.12.12.10 | F.10.9.5 |
+| fmaximum_mag_num | |check| | |check| | |check| | |check| | |check| | 7.12.12.10 | F.10.9.5 |
+------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
-| fmaximum_num | |check| | |check| | |check| | | |check| | 7.12.12.8 | F.10.9.5 |
+| fmaximum_num | |check| | |check| | |check| | |check| | |check| | 7.12.12.8 | F.10.9.5 |
+------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
-| fmin | |check| | |check| | |check| | | |check| | 7.12.12.3 | F.10.9.3 |
+| fmin | |check| | |check| | |check| | |check| | |check| | 7.12.12.3 | F.10.9.3 |
+------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
-| fminimum | |check| | |check| | |check| | | |check| | 7.12.12.5 | F.10.9.4 |
+| fminimum | |check| | |check| | |check| | |check| | |check| | 7.12.12.5 | F.10.9.4 |
+------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
-| fminimum_mag | |check| | |check| | |check| | | |check| | 7.12.12.7 | F.10.9.4 |
+| fminimum_mag | |check| | |check| | |check| | |check| | |check| | 7.12.12.7 | F.10.9.4 |
+------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
-| fminimum_mag_num | |check| | |check| | |check| | | |check| | 7.12.12.11 | F.10.9.5 |
+| fminimum_mag_num | |check| | |check| | |check| | |check| | |check| | 7.12.12.11 | F.10.9.5 |
+------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
-| fminimum_num | |check| | |check| | |check| | | |check| | 7.12.12.9 | F.10.9.5 |
+| fminimum_num | |check| | |check| | |check| | |check| | |check| | 7.12.12.9 | F.10.9.5 |
+------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
| fmod | |check| | |check| | |check| | | |check| | 7.12.10.1 | F.10.7.1 |
+------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
@@ -190,13 +190,13 @@ Basic Operations
+------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
| nearbyint | |check| | |check| | |check| | |check| | |check| | 7.12.9.3 | F.10.6.3 |
+------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
-| nextafter | |check| | |check| | |check| | | |check| | 7.12.11.3 | F.10.8.3 |
+| nextafter | |check| | |check| | |check| | |check| | |check| | 7.12.11.3 | F.10.8.3 |
+------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
-| nextdown | |check| | |check| | |check| | | |check| | 7.12.11.6 | F.10.8.6 |
+| nextdown | |check| | |check| | |check| | |check| | |check| | 7.12.11.6 | F.10.8.6 |
+------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
-| nexttoward | |check| | |check| | |check| | | N/A | 7.12.11.4 | F.10.8.4 |
+| nexttoward | |check| | |check| | |check| | |check| | N/A | 7.12.11.4 | F.10.8.4 |
+------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
-| nextup | |check| | |check| | |check| | | |check| | 7.12.11.5 | F.10.8.5 |
+| nextup | |check| | |check| | |check| | |check| | |check| | 7.12.11.5 | F.10.8.5 |
+------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
| remainder | |check| | |check| | |check| | | | 7.12.10.2 | F.10.7.2 |
+------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
diff --git a/libc/hdr/types/CMakeLists.txt b/libc/hdr/types/CMakeLists.txt
index 8e87642..9b3373a 100644
--- a/libc/hdr/types/CMakeLists.txt
+++ b/libc/hdr/types/CMakeLists.txt
@@ -117,3 +117,12 @@ add_proxy_header_library(
libc.include.llvm-libc-types.pid_t
libc.include.sys_types
)
+
+add_proxy_header_library(
+ atexithandler_t
+ HDRS
+ atexithandler_t.h
+ FULL_BUILD_DEPENDS
+ libc.include.llvm-libc-types.atexithandler_t
+ libc.include.stdlib
+)
diff --git a/libc/hdr/types/atexithandler_t.h b/libc/hdr/types/atexithandler_t.h
new file mode 100644
index 0000000..4275e44
--- /dev/null
+++ b/libc/hdr/types/atexithandler_t.h
@@ -0,0 +1,22 @@
+//===-- Definition of macros from atexithandler_t.h -----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_HDR_ATEXITHANDLER_T_H
+#define LLVM_LIBC_HDR_ATEXITHANDLER_T_H
+
+#ifdef LIBC_FULL_BUILD
+
+#include "include/llvm-libc-types/__atexithandler_t.h"
+
+#else // overlay mode
+
+#error // type not available in overlay mode
+
+#endif // LLVM_LIBC_FULL_BUILD
+
+#endif // LLVM_LIBC_HDR_ATEXITHANDLER_T_H
diff --git a/libc/spec/stdc.td b/libc/spec/stdc.td
index 0aadeb1..9a436c8 100644
--- a/libc/spec/stdc.td
+++ b/libc/spec/stdc.td
@@ -416,50 +416,60 @@ def StdC : StandardSpec<"stdc"> {
FunctionSpec<"fminf", RetValSpec<FloatType>, [ArgSpec<FloatType>, ArgSpec<FloatType>]>,
FunctionSpec<"fminl", RetValSpec<LongDoubleType>, [ArgSpec<LongDoubleType>, ArgSpec<LongDoubleType>]>,
GuardedFunctionSpec<"fminf128", RetValSpec<Float128Type>, [ArgSpec<Float128Type>, ArgSpec<Float128Type>], "LIBC_TYPES_HAS_FLOAT128">,
+ GuardedFunctionSpec<"fminf16", RetValSpec<Float16Type>, [ArgSpec<Float16Type>, ArgSpec<Float16Type>], "LIBC_TYPES_HAS_FLOAT16">,
FunctionSpec<"fmax", RetValSpec<DoubleType>, [ArgSpec<DoubleType>, ArgSpec<DoubleType>]>,
FunctionSpec<"fmaxf", RetValSpec<FloatType>, [ArgSpec<FloatType>, ArgSpec<FloatType>]>,
FunctionSpec<"fmaxl", RetValSpec<LongDoubleType>, [ArgSpec<LongDoubleType>, ArgSpec<LongDoubleType>]>,
GuardedFunctionSpec<"fmaxf128", RetValSpec<Float128Type>, [ArgSpec<Float128Type>, ArgSpec<Float128Type>], "LIBC_TYPES_HAS_FLOAT128">,
+ GuardedFunctionSpec<"fmaxf16", RetValSpec<Float16Type>, [ArgSpec<Float16Type>, ArgSpec<Float16Type>], "LIBC_TYPES_HAS_FLOAT16">,
FunctionSpec<"fmaximum", RetValSpec<DoubleType>, [ArgSpec<DoubleType>, ArgSpec<DoubleType>]>,
FunctionSpec<"fmaximumf", RetValSpec<FloatType>, [ArgSpec<FloatType>, ArgSpec<FloatType>]>,
FunctionSpec<"fmaximuml", RetValSpec<LongDoubleType>, [ArgSpec<LongDoubleType>, ArgSpec<LongDoubleType>]>,
+ GuardedFunctionSpec<"fmaximumf16", RetValSpec<Float16Type>, [ArgSpec<Float16Type>, ArgSpec<Float16Type>], "LIBC_TYPES_HAS_FLOAT16">,
GuardedFunctionSpec<"fmaximumf128", RetValSpec<Float128Type>, [ArgSpec<Float128Type>, ArgSpec<Float128Type>], "LIBC_TYPES_HAS_FLOAT128">,
FunctionSpec<"fmaximum_num", RetValSpec<DoubleType>, [ArgSpec<DoubleType>, ArgSpec<DoubleType>]>,
FunctionSpec<"fmaximum_numf", RetValSpec<FloatType>, [ArgSpec<FloatType>, ArgSpec<FloatType>]>,
FunctionSpec<"fmaximum_numl", RetValSpec<LongDoubleType>, [ArgSpec<LongDoubleType>, ArgSpec<LongDoubleType>]>,
+ GuardedFunctionSpec<"fmaximum_numf16", RetValSpec<Float16Type>, [ArgSpec<Float16Type>, ArgSpec<Float16Type>], "LIBC_TYPES_HAS_FLOAT16">,
GuardedFunctionSpec<"fmaximum_numf128", RetValSpec<Float128Type>, [ArgSpec<Float128Type>, ArgSpec<Float128Type>], "LIBC_TYPES_HAS_FLOAT128">,
FunctionSpec<"fmaximum_mag", RetValSpec<DoubleType>, [ArgSpec<DoubleType>, ArgSpec<DoubleType>]>,
FunctionSpec<"fmaximum_magf", RetValSpec<FloatType>, [ArgSpec<FloatType>, ArgSpec<FloatType>]>,
FunctionSpec<"fmaximum_magl", RetValSpec<LongDoubleType>, [ArgSpec<LongDoubleType>, ArgSpec<LongDoubleType>]>,
+ GuardedFunctionSpec<"fmaximum_magf16", RetValSpec<Float16Type>, [ArgSpec<Float16Type>, ArgSpec<Float16Type>], "LIBC_TYPES_HAS_FLOAT16">,
GuardedFunctionSpec<"fmaximum_magf128", RetValSpec<Float128Type>, [ArgSpec<Float128Type>, ArgSpec<Float128Type>], "LIBC_TYPES_HAS_FLOAT128">,
FunctionSpec<"fmaximum_mag_num", RetValSpec<DoubleType>, [ArgSpec<DoubleType>, ArgSpec<DoubleType>]>,
FunctionSpec<"fmaximum_mag_numf", RetValSpec<FloatType>, [ArgSpec<FloatType>, ArgSpec<FloatType>]>,
FunctionSpec<"fmaximum_mag_numl", RetValSpec<LongDoubleType>, [ArgSpec<LongDoubleType>, ArgSpec<LongDoubleType>]>,
+ GuardedFunctionSpec<"fmaximum_mag_numf16", RetValSpec<Float16Type>, [ArgSpec<Float16Type>, ArgSpec<Float16Type>], "LIBC_TYPES_HAS_FLOAT16">,
GuardedFunctionSpec<"fmaximum_mag_numf128", RetValSpec<Float128Type>, [ArgSpec<Float128Type>, ArgSpec<Float128Type>], "LIBC_TYPES_HAS_FLOAT128">,
FunctionSpec<"fminimum", RetValSpec<DoubleType>, [ArgSpec<DoubleType>, ArgSpec<DoubleType>]>,
FunctionSpec<"fminimumf", RetValSpec<FloatType>, [ArgSpec<FloatType>, ArgSpec<FloatType>]>,
FunctionSpec<"fminimuml", RetValSpec<LongDoubleType>, [ArgSpec<LongDoubleType>, ArgSpec<LongDoubleType>]>,
+ GuardedFunctionSpec<"fminimumf16", RetValSpec<Float16Type>, [ArgSpec<Float16Type>, ArgSpec<Float16Type>], "LIBC_TYPES_HAS_FLOAT16">,
GuardedFunctionSpec<"fminimumf128", RetValSpec<Float128Type>, [ArgSpec<Float128Type>, ArgSpec<Float128Type>], "LIBC_TYPES_HAS_FLOAT128">,
FunctionSpec<"fminimum_num", RetValSpec<DoubleType>, [ArgSpec<DoubleType>, ArgSpec<DoubleType>]>,
FunctionSpec<"fminimum_numf", RetValSpec<FloatType>, [ArgSpec<FloatType>, ArgSpec<FloatType>]>,
FunctionSpec<"fmaximum_numl", RetValSpec<LongDoubleType>, [ArgSpec<LongDoubleType>, ArgSpec<LongDoubleType>]>,
+ GuardedFunctionSpec<"fminimum_numf16", RetValSpec<Float16Type>, [ArgSpec<Float16Type>, ArgSpec<Float16Type>], "LIBC_TYPES_HAS_FLOAT16">,
GuardedFunctionSpec<"fminimum_numf128", RetValSpec<Float128Type>, [ArgSpec<Float128Type>, ArgSpec<Float128Type>], "LIBC_TYPES_HAS_FLOAT128">,
FunctionSpec<"fminimum_mag", RetValSpec<DoubleType>, [ArgSpec<DoubleType>, ArgSpec<DoubleType>]>,
FunctionSpec<"fminimum_magf", RetValSpec<FloatType>, [ArgSpec<FloatType>, ArgSpec<FloatType>]>,
FunctionSpec<"fminimum_magl", RetValSpec<LongDoubleType>, [ArgSpec<LongDoubleType>, ArgSpec<LongDoubleType>]>,
+ GuardedFunctionSpec<"fminimum_magf16", RetValSpec<Float16Type>, [ArgSpec<Float16Type>, ArgSpec<Float16Type>], "LIBC_TYPES_HAS_FLOAT16">,
GuardedFunctionSpec<"fminimum_magf128", RetValSpec<Float128Type>, [ArgSpec<Float128Type>, ArgSpec<Float128Type>], "LIBC_TYPES_HAS_FLOAT128">,
FunctionSpec<"fminimum_mag_num", RetValSpec<DoubleType>, [ArgSpec<DoubleType>, ArgSpec<DoubleType>]>,
FunctionSpec<"fminimum_mag_numf", RetValSpec<FloatType>, [ArgSpec<FloatType>, ArgSpec<FloatType>]>,
FunctionSpec<"fminimum_mag_numl", RetValSpec<LongDoubleType>, [ArgSpec<LongDoubleType>, ArgSpec<LongDoubleType>]>,
+ GuardedFunctionSpec<"fminimum_mag_numf16", RetValSpec<Float16Type>, [ArgSpec<Float16Type>, ArgSpec<Float16Type>], "LIBC_TYPES_HAS_FLOAT16">,
GuardedFunctionSpec<"fminimum_mag_numf128", RetValSpec<Float128Type>, [ArgSpec<Float128Type>, ArgSpec<Float128Type>], "LIBC_TYPES_HAS_FLOAT128">,
FunctionSpec<"fma", RetValSpec<DoubleType>, [ArgSpec<DoubleType>, ArgSpec<DoubleType>, ArgSpec<DoubleType>]>,
@@ -632,20 +642,24 @@ def StdC : StandardSpec<"stdc"> {
FunctionSpec<"nextafterf", RetValSpec<FloatType>, [ArgSpec<FloatType>, ArgSpec<FloatType>]>,
FunctionSpec<"nextafter", RetValSpec<DoubleType>, [ArgSpec<DoubleType>, ArgSpec<DoubleType>]>,
FunctionSpec<"nextafterl", RetValSpec<LongDoubleType>, [ArgSpec<LongDoubleType>, ArgSpec<LongDoubleType>]>,
+ GuardedFunctionSpec<"nextafterf16", RetValSpec<Float16Type>, [ArgSpec<Float16Type>, ArgSpec<Float16Type>], "LIBC_TYPES_HAS_FLOAT16">,
GuardedFunctionSpec<"nextafterf128", RetValSpec<Float128Type>, [ArgSpec<Float128Type>, ArgSpec<Float128Type>], "LIBC_TYPES_HAS_FLOAT128">,
FunctionSpec<"nexttowardf", RetValSpec<FloatType>, [ArgSpec<FloatType>, ArgSpec<LongDoubleType>]>,
FunctionSpec<"nexttoward", RetValSpec<DoubleType>, [ArgSpec<DoubleType>, ArgSpec<LongDoubleType>]>,
FunctionSpec<"nexttowardl", RetValSpec<LongDoubleType>, [ArgSpec<LongDoubleType>, ArgSpec<LongDoubleType>]>,
+ GuardedFunctionSpec<"nexttowardf16", RetValSpec<Float16Type>, [ArgSpec<Float16Type>, ArgSpec<Float16Type>], "LIBC_TYPES_HAS_FLOAT16">,
FunctionSpec<"nextdown", RetValSpec<DoubleType>, [ArgSpec<DoubleType>]>,
FunctionSpec<"nextdownf", RetValSpec<FloatType>, [ArgSpec<FloatType>]>,
FunctionSpec<"nextdownl", RetValSpec<LongDoubleType>, [ArgSpec<LongDoubleType>]>,
+ GuardedFunctionSpec<"nextdownf16", RetValSpec<Float16Type>, [ArgSpec<Float16Type>], "LIBC_TYPES_HAS_FLOAT16">,
GuardedFunctionSpec<"nextdownf128", RetValSpec<Float128Type>, [ArgSpec<Float128Type>], "LIBC_TYPES_HAS_FLOAT128">,
FunctionSpec<"nextup", RetValSpec<DoubleType>, [ArgSpec<DoubleType>]>,
FunctionSpec<"nextupf", RetValSpec<FloatType>, [ArgSpec<FloatType>]>,
FunctionSpec<"nextupl", RetValSpec<LongDoubleType>, [ArgSpec<LongDoubleType>]>,
+ GuardedFunctionSpec<"nextupf16", RetValSpec<Float16Type>, [ArgSpec<Float16Type>], "LIBC_TYPES_HAS_FLOAT16">,
GuardedFunctionSpec<"nextupf128", RetValSpec<Float128Type>, [ArgSpec<Float128Type>], "LIBC_TYPES_HAS_FLOAT128">,
FunctionSpec<"powf", RetValSpec<FloatType>, [ArgSpec<FloatType>, ArgSpec<FloatType>]>,
@@ -1093,8 +1107,9 @@ def StdC : StandardSpec<"stdc"> {
FunctionSpec<"free", RetValSpec<VoidType>, [ArgSpec<VoidPtr>]>,
FunctionSpec<"_Exit", RetValSpec<NoReturn>, [ArgSpec<IntType>]>,
- FunctionSpec<"exit", RetValSpec<NoReturn>, [ArgSpec<IntType>]>,
+ FunctionSpec<"at_quick_exit", RetValSpec<IntType>, [ArgSpec<AtexitHandlerT>]>,
FunctionSpec<"atexit", RetValSpec<IntType>, [ArgSpec<AtexitHandlerT>]>,
+ FunctionSpec<"exit", RetValSpec<NoReturn>, [ArgSpec<IntType>]>,
FunctionSpec<"quick_exit", RetValSpec<NoReturn>, [ArgSpec<IntType>]>,
]
>;
diff --git a/libc/src/__support/CPP/CMakeLists.txt b/libc/src/__support/CPP/CMakeLists.txt
index 08661ab..e6f58b7 100644
--- a/libc/src/__support/CPP/CMakeLists.txt
+++ b/libc/src/__support/CPP/CMakeLists.txt
@@ -111,6 +111,7 @@ add_header_library(
type_traits/add_lvalue_reference.h
type_traits/add_pointer.h
type_traits/add_rvalue_reference.h
+ type_traits/aligned_storage.h
type_traits/always_false.h
type_traits/bool_constant.h
type_traits/conditional.h
diff --git a/libc/src/__support/CPP/type_traits.h b/libc/src/__support/CPP/type_traits.h
index 1494aeb..d50b661 100644
--- a/libc/src/__support/CPP/type_traits.h
+++ b/libc/src/__support/CPP/type_traits.h
@@ -12,6 +12,7 @@
#include "src/__support/CPP/type_traits/add_lvalue_reference.h"
#include "src/__support/CPP/type_traits/add_pointer.h"
#include "src/__support/CPP/type_traits/add_rvalue_reference.h"
+#include "src/__support/CPP/type_traits/aligned_storage.h"
#include "src/__support/CPP/type_traits/bool_constant.h"
#include "src/__support/CPP/type_traits/conditional.h"
#include "src/__support/CPP/type_traits/decay.h"
diff --git a/libc/src/__support/CPP/type_traits/aligned_storage.h b/libc/src/__support/CPP/type_traits/aligned_storage.h
new file mode 100644
index 0000000..574b114
--- /dev/null
+++ b/libc/src/__support/CPP/type_traits/aligned_storage.h
@@ -0,0 +1,27 @@
+//===-- aligned_storage type_traits --------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_CPP_TYPE_TRAITS_ALIGNED_STORAGE_H
+#define LLVM_LIBC_SRC___SUPPORT_CPP_TYPE_TRAITS_ALIGNED_STORAGE_H
+
+#include <stddef.h> // size_t
+
+namespace LIBC_NAMESPACE::cpp {
+
+template <size_t Len, size_t Align> struct aligned_storage {
+ struct type {
+ alignas(Align) unsigned char data[Len];
+ };
+};
+
+template <size_t Len, size_t Align>
+using aligned_storage_t = typename aligned_storage<Len, Align>::type;
+
+} // namespace LIBC_NAMESPACE::cpp
+
+#endif // LLVM_LIBC_SRC___SUPPORT_CPP_TYPE_TRAITS_ALIGNED_STORAGE_H
diff --git a/libc/src/__support/fixedvector.h b/libc/src/__support/fixedvector.h
index 6aeb4d5..ddd0993 100644
--- a/libc/src/__support/fixedvector.h
+++ b/libc/src/__support/fixedvector.h
@@ -24,6 +24,17 @@ template <typename T, size_t CAPACITY> class FixedVector {
public:
constexpr FixedVector() = default;
+ using iterator = typename cpp::array<T, CAPACITY>::iterator;
+ constexpr FixedVector(iterator begin, iterator end) {
+ for (; begin != end; ++begin)
+ push_back(*begin);
+ }
+
+ constexpr FixedVector(size_t count, const T &value) {
+ for (size_t i = 0; i < count; ++i)
+ push_back(value);
+ }
+
bool push_back(const T &obj) {
if (item_count == CAPACITY)
return false;
@@ -43,8 +54,14 @@ public:
return true;
}
+ T &operator[](size_t idx) { return store[idx]; }
+
+ const T &operator[](size_t idx) const { return store[idx]; }
+
bool empty() const { return item_count == 0; }
+ size_t size() const { return item_count; }
+
// Empties the store for all practical purposes.
void reset() { item_count = 0; }
@@ -64,7 +81,6 @@ public:
}
LIBC_INLINE constexpr reverse_iterator rend() { return store.rend(); }
- using iterator = typename cpp::array<T, CAPACITY>::iterator;
LIBC_INLINE constexpr iterator begin() { return store.begin(); }
LIBC_INLINE constexpr iterator end() { return iterator{&store[item_count]}; }
};
diff --git a/libc/src/math/CMakeLists.txt b/libc/src/math/CMakeLists.txt
index 5ae03b1..7a349dd 100644
--- a/libc/src/math/CMakeLists.txt
+++ b/libc/src/math/CMakeLists.txt
@@ -124,50 +124,60 @@ add_math_entrypoint_object(fmax)
add_math_entrypoint_object(fmaxf)
add_math_entrypoint_object(fmaxl)
add_math_entrypoint_object(fmaxf128)
+add_math_entrypoint_object(fmaxf16)
add_math_entrypoint_object(fmin)
add_math_entrypoint_object(fminf)
add_math_entrypoint_object(fminl)
add_math_entrypoint_object(fminf128)
+add_math_entrypoint_object(fminf16)
add_math_entrypoint_object(fmaximum)
add_math_entrypoint_object(fmaximumf)
add_math_entrypoint_object(fmaximuml)
+add_math_entrypoint_object(fmaximumf16)
add_math_entrypoint_object(fmaximumf128)
add_math_entrypoint_object(fmaximum_num)
add_math_entrypoint_object(fmaximum_numf)
add_math_entrypoint_object(fmaximum_numl)
+add_math_entrypoint_object(fmaximum_numf16)
add_math_entrypoint_object(fmaximum_numf128)
add_math_entrypoint_object(fmaximum_mag)
add_math_entrypoint_object(fmaximum_magf)
add_math_entrypoint_object(fmaximum_magl)
+add_math_entrypoint_object(fmaximum_magf16)
add_math_entrypoint_object(fmaximum_magf128)
add_math_entrypoint_object(fmaximum_mag_num)
add_math_entrypoint_object(fmaximum_mag_numf)
add_math_entrypoint_object(fmaximum_mag_numl)
+add_math_entrypoint_object(fmaximum_mag_numf16)
add_math_entrypoint_object(fmaximum_mag_numf128)
add_math_entrypoint_object(fminimum)
add_math_entrypoint_object(fminimumf)
add_math_entrypoint_object(fminimuml)
+add_math_entrypoint_object(fminimumf16)
add_math_entrypoint_object(fminimumf128)
add_math_entrypoint_object(fminimum_num)
add_math_entrypoint_object(fminimum_numf)
add_math_entrypoint_object(fminimum_numl)
+add_math_entrypoint_object(fminimum_numf16)
add_math_entrypoint_object(fminimum_numf128)
add_math_entrypoint_object(fminimum_mag)
add_math_entrypoint_object(fminimum_magf)
add_math_entrypoint_object(fminimum_magl)
+add_math_entrypoint_object(fminimum_magf16)
add_math_entrypoint_object(fminimum_magf128)
add_math_entrypoint_object(fminimum_mag_num)
add_math_entrypoint_object(fminimum_mag_numf)
add_math_entrypoint_object(fminimum_mag_numl)
+add_math_entrypoint_object(fminimum_mag_numf16)
add_math_entrypoint_object(fminimum_mag_numf128)
add_math_entrypoint_object(fmod)
@@ -270,20 +280,24 @@ add_math_entrypoint_object(nearbyintf128)
add_math_entrypoint_object(nextafter)
add_math_entrypoint_object(nextafterf)
add_math_entrypoint_object(nextafterl)
+add_math_entrypoint_object(nextafterf16)
add_math_entrypoint_object(nextafterf128)
add_math_entrypoint_object(nexttoward)
add_math_entrypoint_object(nexttowardf)
add_math_entrypoint_object(nexttowardl)
+add_math_entrypoint_object(nexttowardf16)
add_math_entrypoint_object(nextdown)
add_math_entrypoint_object(nextdownf)
add_math_entrypoint_object(nextdownl)
+add_math_entrypoint_object(nextdownf16)
add_math_entrypoint_object(nextdownf128)
add_math_entrypoint_object(nextup)
add_math_entrypoint_object(nextupf)
add_math_entrypoint_object(nextupl)
+add_math_entrypoint_object(nextupf16)
add_math_entrypoint_object(nextupf128)
add_math_entrypoint_object(pow)
diff --git a/libc/src/math/fmaxf16.h b/libc/src/math/fmaxf16.h
new file mode 100644
index 0000000..bf608f8
--- /dev/null
+++ b/libc/src/math/fmaxf16.h
@@ -0,0 +1,20 @@
+//===-- Implementation header for fmaxf16 -----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_FMAXF16_H
+#define LLVM_LIBC_SRC_MATH_FMAXF16_H
+
+#include "src/__support/macros/properties/types.h"
+
+namespace LIBC_NAMESPACE {
+
+float16 fmaxf16(float16 x, float16 y);
+
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_MATH_FMAXF16_H
diff --git a/libc/src/math/fmaximum_mag_numf16.h b/libc/src/math/fmaximum_mag_numf16.h
new file mode 100644
index 0000000..4c963d4
--- /dev/null
+++ b/libc/src/math/fmaximum_mag_numf16.h
@@ -0,0 +1,20 @@
+//===-- Implementation header for fmaximum_mag_numf16 -----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_FMAXIMUM_MAG_NUMF16_H
+#define LLVM_LIBC_SRC_MATH_FMAXIMUM_MAG_NUMF16_H
+
+#include "src/__support/macros/properties/types.h"
+
+namespace LIBC_NAMESPACE {
+
+float16 fmaximum_mag_numf16(float16 x, float16 y);
+
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_MATH_FMAXIMUM_MAG_NUMF16_H
diff --git a/libc/src/math/fmaximum_magf16.h b/libc/src/math/fmaximum_magf16.h
new file mode 100644
index 0000000..e5f57d3
--- /dev/null
+++ b/libc/src/math/fmaximum_magf16.h
@@ -0,0 +1,20 @@
+//===-- Implementation header for fmaximum_magf16 ---------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_FMAXIMUM_MAGF16_H
+#define LLVM_LIBC_SRC_MATH_FMAXIMUM_MAGF16_H
+
+#include "src/__support/macros/properties/types.h"
+
+namespace LIBC_NAMESPACE {
+
+float16 fmaximum_magf16(float16 x, float16 y);
+
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_MATH_FMAXIMUM_MAGF16_H
diff --git a/libc/src/math/fmaximum_numf16.h b/libc/src/math/fmaximum_numf16.h
new file mode 100644
index 0000000..b450a45
--- /dev/null
+++ b/libc/src/math/fmaximum_numf16.h
@@ -0,0 +1,20 @@
+//===-- Implementation header for fmaximum_numf16 ---------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_FMAXIMUM_NUMF16_H
+#define LLVM_LIBC_SRC_MATH_FMAXIMUM_NUMF16_H
+
+#include "src/__support/macros/properties/types.h"
+
+namespace LIBC_NAMESPACE {
+
+float16 fmaximum_numf16(float16 x, float16 y);
+
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_MATH_FMAXIMUM_NUMF16_H
diff --git a/libc/src/math/fmaximumf16.h b/libc/src/math/fmaximumf16.h
new file mode 100644
index 0000000..806339f
--- /dev/null
+++ b/libc/src/math/fmaximumf16.h
@@ -0,0 +1,20 @@
+//===-- Implementation header for fmaximumf16 -------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_FMAXIMUMF16_H
+#define LLVM_LIBC_SRC_MATH_FMAXIMUMF16_H
+
+#include "src/__support/macros/properties/types.h"
+
+namespace LIBC_NAMESPACE {
+
+float16 fmaximumf16(float16 x, float16 y);
+
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_MATH_FMAXIMUMF16_H
diff --git a/libc/src/math/fminf16.h b/libc/src/math/fminf16.h
new file mode 100644
index 0000000..22d4e6c
--- /dev/null
+++ b/libc/src/math/fminf16.h
@@ -0,0 +1,20 @@
+//===-- Implementation header for fminf16 -----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_FMINF16_H
+#define LLVM_LIBC_SRC_MATH_FMINF16_H
+
+#include "src/__support/macros/properties/types.h"
+
+namespace LIBC_NAMESPACE {
+
+float16 fminf16(float16 x, float16 y);
+
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_MATH_FMINF16_H
diff --git a/libc/src/math/fminimum_mag_numf16.h b/libc/src/math/fminimum_mag_numf16.h
new file mode 100644
index 0000000..0fd314b
--- /dev/null
+++ b/libc/src/math/fminimum_mag_numf16.h
@@ -0,0 +1,20 @@
+//===-- Implementation header for fminimum_mag_numf16 -----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_FMINIMUM_MAG_NUMF16_H
+#define LLVM_LIBC_SRC_MATH_FMINIMUM_MAG_NUMF16_H
+
+#include "src/__support/macros/properties/types.h"
+
+namespace LIBC_NAMESPACE {
+
+float16 fminimum_mag_numf16(float16 x, float16 y);
+
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_MATH_FMINIMUM_MAG_NUMF16_H
diff --git a/libc/src/math/fminimum_magf16.h b/libc/src/math/fminimum_magf16.h
new file mode 100644
index 0000000..27673555
--- /dev/null
+++ b/libc/src/math/fminimum_magf16.h
@@ -0,0 +1,20 @@
+//===-- Implementation header for fminimum_magf16 ---------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_FMINIMUM_MAGF16_H
+#define LLVM_LIBC_SRC_MATH_FMINIMUM_MAGF16_H
+
+#include "src/__support/macros/properties/types.h"
+
+namespace LIBC_NAMESPACE {
+
+float16 fminimum_magf16(float16 x, float16 y);
+
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_MATH_FMINIMUM_MAGF16_H
diff --git a/libc/src/math/fminimum_numf16.h b/libc/src/math/fminimum_numf16.h
new file mode 100644
index 0000000..598ff9d
--- /dev/null
+++ b/libc/src/math/fminimum_numf16.h
@@ -0,0 +1,20 @@
+//===-- Implementation header for fminimum_numf16 ---------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_FMINIMUM_NUMF16_H
+#define LLVM_LIBC_SRC_MATH_FMINIMUM_NUMF16_H
+
+#include "src/__support/macros/properties/types.h"
+
+namespace LIBC_NAMESPACE {
+
+float16 fminimum_numf16(float16 x, float16 y);
+
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_MATH_FMINIMUM_NUMF16_H
diff --git a/libc/src/math/fminimumf16.h b/libc/src/math/fminimumf16.h
new file mode 100644
index 0000000..86dd240
--- /dev/null
+++ b/libc/src/math/fminimumf16.h
@@ -0,0 +1,20 @@
+//===-- Implementation header for fminimumf16 -------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_FMINIMUMF16_H
+#define LLVM_LIBC_SRC_MATH_FMINIMUMF16_H
+
+#include "src/__support/macros/properties/types.h"
+
+namespace LIBC_NAMESPACE {
+
+float16 fminimumf16(float16 x, float16 y);
+
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_MATH_FMINIMUMF16_H
diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt
index 95904be..b1d786f 100644
--- a/libc/src/math/generic/CMakeLists.txt
+++ b/libc/src/math/generic/CMakeLists.txt
@@ -1783,6 +1783,20 @@ add_entrypoint_object(
)
add_entrypoint_object(
+ fminf16
+ SRCS
+ fminf16.cpp
+ HDRS
+ ../fminf16.h
+ DEPENDS
+ libc.src.__support.macros.properties.types
+ libc.src.__support.FPUtil.basic_operations
+ COMPILE_OPTIONS
+ -O3
+)
+
+
+add_entrypoint_object(
fmax
SRCS
fmax.cpp
@@ -1832,6 +1846,19 @@ add_entrypoint_object(
)
add_entrypoint_object(
+ fmaxf16
+ SRCS
+ fmaxf16.cpp
+ HDRS
+ ../fmaxf16.h
+ DEPENDS
+ libc.src.__support.macros.properties.types
+ libc.src.__support.FPUtil.basic_operations
+ COMPILE_OPTIONS
+ -O3
+)
+
+add_entrypoint_object(
fmaximum
SRCS
fmaximum.cpp
@@ -1868,6 +1895,19 @@ add_entrypoint_object(
)
add_entrypoint_object(
+ fmaximumf16
+ SRCS
+ fmaximumf16.cpp
+ HDRS
+ ../fmaximumf16.h
+ DEPENDS
+ libc.src.__support.macros.properties.types
+ libc.src.__support.FPUtil.basic_operations
+ COMPILE_OPTIONS
+ -O3
+)
+
+add_entrypoint_object(
fmaximumf128
SRCS
fmaximumf128.cpp
@@ -1917,6 +1957,19 @@ add_entrypoint_object(
)
add_entrypoint_object(
+ fmaximum_numf16
+ SRCS
+ fmaximum_numf16.cpp
+ HDRS
+ ../fmaximum_numf16.h
+ DEPENDS
+ libc.src.__support.macros.properties.types
+ libc.src.__support.FPUtil.basic_operations
+ COMPILE_OPTIONS
+ -O3
+)
+
+add_entrypoint_object(
fmaximum_numf128
SRCS
fmaximum_numf128.cpp
@@ -1966,6 +2019,19 @@ add_entrypoint_object(
)
add_entrypoint_object(
+ fmaximum_magf16
+ SRCS
+ fmaximum_magf16.cpp
+ HDRS
+ ../fmaximum_magf16.h
+ DEPENDS
+ libc.src.__support.macros.properties.types
+ libc.src.__support.FPUtil.basic_operations
+ COMPILE_OPTIONS
+ -O3
+)
+
+add_entrypoint_object(
fmaximum_magf128
SRCS
fmaximum_magf128.cpp
@@ -1978,7 +2044,6 @@ add_entrypoint_object(
-O3
)
-
add_entrypoint_object(
fmaximum_mag_num
SRCS
@@ -2016,6 +2081,19 @@ add_entrypoint_object(
)
add_entrypoint_object(
+ fmaximum_mag_numf16
+ SRCS
+ fmaximum_mag_numf16.cpp
+ HDRS
+ ../fmaximum_mag_numf16.h
+ DEPENDS
+ libc.src.__support.macros.properties.types
+ libc.src.__support.FPUtil.basic_operations
+ COMPILE_OPTIONS
+ -O3
+)
+
+add_entrypoint_object(
fmaximum_mag_numf128
SRCS
fmaximum_mag_numf128.cpp
@@ -2065,6 +2143,19 @@ add_entrypoint_object(
)
add_entrypoint_object(
+ fminimumf16
+ SRCS
+ fminimumf16.cpp
+ HDRS
+ ../fminimumf16.h
+ DEPENDS
+ libc.src.__support.macros.properties.types
+ libc.src.__support.FPUtil.basic_operations
+ COMPILE_OPTIONS
+ -O3
+)
+
+add_entrypoint_object(
fminimumf128
SRCS
fminimumf128.cpp
@@ -2114,6 +2205,19 @@ add_entrypoint_object(
)
add_entrypoint_object(
+ fminimum_numf16
+ SRCS
+ fminimum_numf16.cpp
+ HDRS
+ ../fminimum_numf16.h
+ DEPENDS
+ libc.src.__support.macros.properties.types
+ libc.src.__support.FPUtil.basic_operations
+ COMPILE_OPTIONS
+ -O3
+)
+
+add_entrypoint_object(
fminimum_numf128
SRCS
fminimum_numf128.cpp
@@ -2163,6 +2267,19 @@ add_entrypoint_object(
)
add_entrypoint_object(
+ fminimum_magf16
+ SRCS
+ fminimum_magf16.cpp
+ HDRS
+ ../fminimum_magf16.h
+ DEPENDS
+ libc.src.__support.macros.properties.types
+ libc.src.__support.FPUtil.basic_operations
+ COMPILE_OPTIONS
+ -O3
+)
+
+add_entrypoint_object(
fminimum_magf128
SRCS
fminimum_magf128.cpp
@@ -2175,7 +2292,6 @@ add_entrypoint_object(
-O3
)
-
add_entrypoint_object(
fminimum_mag_num
SRCS
@@ -2213,6 +2329,19 @@ add_entrypoint_object(
)
add_entrypoint_object(
+ fminimum_mag_numf16
+ SRCS
+ fminimum_mag_numf16.cpp
+ HDRS
+ ../fminimum_mag_numf16.h
+ DEPENDS
+ libc.src.__support.macros.properties.types
+ libc.src.__support.FPUtil.basic_operations
+ COMPILE_OPTIONS
+ -O3
+)
+
+add_entrypoint_object(
fminimum_mag_numf128
SRCS
fminimum_mag_numf128.cpp
@@ -2524,6 +2653,19 @@ add_entrypoint_object(
)
add_entrypoint_object(
+ nextafterf16
+ SRCS
+ nextafterf16.cpp
+ HDRS
+ ../nextafterf16.h
+ DEPENDS
+ libc.src.__support.macros.properties.types
+ libc.src.__support.FPUtil.manipulation_functions
+ COMPILE_OPTIONS
+ -O3
+)
+
+add_entrypoint_object(
nextafterf128
SRCS
nextafterf128.cpp
@@ -2573,6 +2715,19 @@ add_entrypoint_object(
)
add_entrypoint_object(
+ nexttowardf16
+ SRCS
+ nexttowardf16.cpp
+ HDRS
+ ../nexttowardf16.h
+ DEPENDS
+ libc.src.__support.macros.properties.types
+ libc.src.__support.FPUtil.manipulation_functions
+ COMPILE_OPTIONS
+ -O3
+)
+
+add_entrypoint_object(
nextdown
SRCS
nextdown.cpp
@@ -2609,6 +2764,19 @@ add_entrypoint_object(
)
add_entrypoint_object(
+ nextdownf16
+ SRCS
+ nextdownf16.cpp
+ HDRS
+ ../nextdownf16.h
+ DEPENDS
+ libc.src.__support.macros.properties.types
+ libc.src.__support.FPUtil.manipulation_functions
+ COMPILE_OPTIONS
+ -O3
+)
+
+add_entrypoint_object(
nextdownf128
SRCS
nextdownf128.cpp
@@ -2658,6 +2826,19 @@ add_entrypoint_object(
)
add_entrypoint_object(
+ nextupf16
+ SRCS
+ nextupf16.cpp
+ HDRS
+ ../nextupf16.h
+ DEPENDS
+ libc.src.__support.macros.properties.types
+ libc.src.__support.FPUtil.manipulation_functions
+ COMPILE_OPTIONS
+ -O3
+)
+
+add_entrypoint_object(
nextupf128
SRCS
nextupf128.cpp
diff --git a/libc/src/math/generic/fmaxf16.cpp b/libc/src/math/generic/fmaxf16.cpp
new file mode 100644
index 0000000..c317aef
--- /dev/null
+++ b/libc/src/math/generic/fmaxf16.cpp
@@ -0,0 +1,19 @@
+//===-- Implementation of fmaxf16 function --------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/fmaxf16.h"
+#include "src/__support/FPUtil/BasicOperations.h"
+#include "src/__support/common.h"
+
+namespace LIBC_NAMESPACE {
+
+LLVM_LIBC_FUNCTION(float16, fmaxf16, (float16 x, float16 y)) {
+ return fputil::fmax(x, y);
+}
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/generic/fmaximum_mag_numf16.cpp b/libc/src/math/generic/fmaximum_mag_numf16.cpp
new file mode 100644
index 0000000..5055802
--- /dev/null
+++ b/libc/src/math/generic/fmaximum_mag_numf16.cpp
@@ -0,0 +1,19 @@
+//===-- Implementation of fmaximum_mag_numf16 function --------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/fmaximum_mag_numf16.h"
+#include "src/__support/FPUtil/BasicOperations.h"
+#include "src/__support/common.h"
+
+namespace LIBC_NAMESPACE {
+
+LLVM_LIBC_FUNCTION(float16, fmaximum_mag_numf16, (float16 x, float16 y)) {
+ return fputil::fmaximum_mag_num(x, y);
+}
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/generic/fmaximum_magf16.cpp b/libc/src/math/generic/fmaximum_magf16.cpp
new file mode 100644
index 0000000..fbd5eac
--- /dev/null
+++ b/libc/src/math/generic/fmaximum_magf16.cpp
@@ -0,0 +1,19 @@
+//===-- Implementation of fmaximum_magf16 function ------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/fmaximum_magf16.h"
+#include "src/__support/FPUtil/BasicOperations.h"
+#include "src/__support/common.h"
+
+namespace LIBC_NAMESPACE {
+
+LLVM_LIBC_FUNCTION(float16, fmaximum_magf16, (float16 x, float16 y)) {
+ return fputil::fmaximum_mag(x, y);
+}
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/generic/fmaximum_numf16.cpp b/libc/src/math/generic/fmaximum_numf16.cpp
new file mode 100644
index 0000000..187cfbe
--- /dev/null
+++ b/libc/src/math/generic/fmaximum_numf16.cpp
@@ -0,0 +1,19 @@
+//===-- Implementation of fmaximum_numf16 function ------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/fmaximum_numf16.h"
+#include "src/__support/FPUtil/BasicOperations.h"
+#include "src/__support/common.h"
+
+namespace LIBC_NAMESPACE {
+
+LLVM_LIBC_FUNCTION(float16, fmaximum_numf16, (float16 x, float16 y)) {
+ return fputil::fmaximum_num(x, y);
+}
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/generic/fmaximumf16.cpp b/libc/src/math/generic/fmaximumf16.cpp
new file mode 100644
index 0000000..9e194d2ece
--- /dev/null
+++ b/libc/src/math/generic/fmaximumf16.cpp
@@ -0,0 +1,19 @@
+//===-- Implementation of fmaximumf16 function ----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/fmaximumf16.h"
+#include "src/__support/FPUtil/BasicOperations.h"
+#include "src/__support/common.h"
+
+namespace LIBC_NAMESPACE {
+
+LLVM_LIBC_FUNCTION(float16, fmaximumf16, (float16 x, float16 y)) {
+ return fputil::fmaximum(x, y);
+}
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/generic/fminf16.cpp b/libc/src/math/generic/fminf16.cpp
new file mode 100644
index 0000000..12547c3
--- /dev/null
+++ b/libc/src/math/generic/fminf16.cpp
@@ -0,0 +1,19 @@
+//===-- Implementation of fminf16 function --------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/fminf16.h"
+#include "src/__support/FPUtil/BasicOperations.h"
+#include "src/__support/common.h"
+
+namespace LIBC_NAMESPACE {
+
+LLVM_LIBC_FUNCTION(float16, fminf16, (float16 x, float16 y)) {
+ return fputil::fmin(x, y);
+}
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/generic/fminimum_mag_numf16.cpp b/libc/src/math/generic/fminimum_mag_numf16.cpp
new file mode 100644
index 0000000..1a893c6
--- /dev/null
+++ b/libc/src/math/generic/fminimum_mag_numf16.cpp
@@ -0,0 +1,19 @@
+//===-- Implementation of fminimum_mag_numf16 function --------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/fminimum_mag_numf16.h"
+#include "src/__support/FPUtil/BasicOperations.h"
+#include "src/__support/common.h"
+
+namespace LIBC_NAMESPACE {
+
+LLVM_LIBC_FUNCTION(float16, fminimum_mag_numf16, (float16 x, float16 y)) {
+ return fputil::fminimum_mag_num(x, y);
+}
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/generic/fminimum_magf16.cpp b/libc/src/math/generic/fminimum_magf16.cpp
new file mode 100644
index 0000000..45183a9
--- /dev/null
+++ b/libc/src/math/generic/fminimum_magf16.cpp
@@ -0,0 +1,19 @@
+//===-- Implementation of fminimum_magf16 function ------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/fminimum_magf16.h"
+#include "src/__support/FPUtil/BasicOperations.h"
+#include "src/__support/common.h"
+
+namespace LIBC_NAMESPACE {
+
+LLVM_LIBC_FUNCTION(float16, fminimum_magf16, (float16 x, float16 y)) {
+ return fputil::fminimum_mag(x, y);
+}
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/generic/fminimum_numf16.cpp b/libc/src/math/generic/fminimum_numf16.cpp
new file mode 100644
index 0000000..825ad3e
--- /dev/null
+++ b/libc/src/math/generic/fminimum_numf16.cpp
@@ -0,0 +1,19 @@
+//===-- Implementation of fminimum_numf16 function ------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/fminimum_numf16.h"
+#include "src/__support/FPUtil/BasicOperations.h"
+#include "src/__support/common.h"
+
+namespace LIBC_NAMESPACE {
+
+LLVM_LIBC_FUNCTION(float16, fminimum_numf16, (float16 x, float16 y)) {
+ return fputil::fminimum_num(x, y);
+}
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/generic/fminimumf16.cpp b/libc/src/math/generic/fminimumf16.cpp
new file mode 100644
index 0000000..16f738b
--- /dev/null
+++ b/libc/src/math/generic/fminimumf16.cpp
@@ -0,0 +1,19 @@
+//===-- Implementation of fminimumf16 function ----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/fminimumf16.h"
+#include "src/__support/FPUtil/BasicOperations.h"
+#include "src/__support/common.h"
+
+namespace LIBC_NAMESPACE {
+
+LLVM_LIBC_FUNCTION(float16, fminimumf16, (float16 x, float16 y)) {
+ return fputil::fminimum(x, y);
+}
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/generic/nextafterf16.cpp b/libc/src/math/generic/nextafterf16.cpp
new file mode 100644
index 0000000..144b3fc
--- /dev/null
+++ b/libc/src/math/generic/nextafterf16.cpp
@@ -0,0 +1,19 @@
+//===-- Implementation of nextafterf16 function ---------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/nextafterf16.h"
+#include "src/__support/FPUtil/ManipulationFunctions.h"
+#include "src/__support/common.h"
+
+namespace LIBC_NAMESPACE {
+
+LLVM_LIBC_FUNCTION(float16, nextafterf16, (float16 x, float16 y)) {
+ return fputil::nextafter(x, y);
+}
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/generic/nextdownf16.cpp b/libc/src/math/generic/nextdownf16.cpp
new file mode 100644
index 0000000..9fdaa9d
--- /dev/null
+++ b/libc/src/math/generic/nextdownf16.cpp
@@ -0,0 +1,19 @@
+//===-- Implementation of nextdownf16 function ----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/nextdownf16.h"
+#include "src/__support/FPUtil/ManipulationFunctions.h"
+#include "src/__support/common.h"
+
+namespace LIBC_NAMESPACE {
+
+LLVM_LIBC_FUNCTION(float16, nextdownf16, (float16 x)) {
+ return fputil::nextupdown</*IsDown=*/true>(x);
+}
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/generic/nexttowardf16.cpp b/libc/src/math/generic/nexttowardf16.cpp
new file mode 100644
index 0000000..d1d78e8
--- /dev/null
+++ b/libc/src/math/generic/nexttowardf16.cpp
@@ -0,0 +1,21 @@
+//===-- Implementation of nexttowardf16 function --------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/nexttowardf16.h"
+#include "src/__support/FPUtil/ManipulationFunctions.h"
+#include "src/__support/common.h"
+
+namespace LIBC_NAMESPACE {
+
+LLVM_LIBC_FUNCTION(float16, nexttowardf16, (float16 x, long double y)) {
+ // We can reuse the nextafter implementation because the internal nextafter is
+ // templated on the types of the arguments.
+ return fputil::nextafter(x, y);
+}
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/generic/nextupf16.cpp b/libc/src/math/generic/nextupf16.cpp
new file mode 100644
index 0000000..5d3d52c
--- /dev/null
+++ b/libc/src/math/generic/nextupf16.cpp
@@ -0,0 +1,19 @@
+//===-- Implementation of nextupf16 function ------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/nextupf16.h"
+#include "src/__support/FPUtil/ManipulationFunctions.h"
+#include "src/__support/common.h"
+
+namespace LIBC_NAMESPACE {
+
+LLVM_LIBC_FUNCTION(float16, nextupf16, (float16 x)) {
+ return fputil::nextupdown</*IsDown=*/false>(x);
+}
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/nextafterf16.h b/libc/src/math/nextafterf16.h
new file mode 100644
index 0000000..293569e
--- /dev/null
+++ b/libc/src/math/nextafterf16.h
@@ -0,0 +1,20 @@
+//===-- Implementation header for nextafterf16 ------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_NEXTAFTERF16_H
+#define LLVM_LIBC_SRC_MATH_NEXTAFTERF16_H
+
+#include "src/__support/macros/properties/types.h"
+
+namespace LIBC_NAMESPACE {
+
+float16 nextafterf16(float16 x, float16 y);
+
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_MATH_NEXTAFTERF16_H
diff --git a/libc/src/math/nextdownf16.h b/libc/src/math/nextdownf16.h
new file mode 100644
index 0000000..1913757
--- /dev/null
+++ b/libc/src/math/nextdownf16.h
@@ -0,0 +1,20 @@
+//===-- Implementation header for nextdownf16 -------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_NEXTDOWNF16_H
+#define LLVM_LIBC_SRC_MATH_NEXTDOWNF16_H
+
+#include "src/__support/macros/properties/types.h"
+
+namespace LIBC_NAMESPACE {
+
+float16 nextdownf16(float16 x);
+
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_MATH_NEXTDOWNF16_H
diff --git a/libc/src/math/nexttowardf16.h b/libc/src/math/nexttowardf16.h
new file mode 100644
index 0000000..604eb32
--- /dev/null
+++ b/libc/src/math/nexttowardf16.h
@@ -0,0 +1,20 @@
+//===-- Implementation header for nexttowardf16 -----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_NEXTTOWARDF16_H
+#define LLVM_LIBC_SRC_MATH_NEXTTOWARDF16_H
+
+#include "src/__support/macros/properties/types.h"
+
+namespace LIBC_NAMESPACE {
+
+float16 nexttowardf16(float16 x, long double y);
+
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_MATH_NEXTTOWARDF16_H
diff --git a/libc/src/math/nextupf16.h b/libc/src/math/nextupf16.h
new file mode 100644
index 0000000..b2973e4
--- /dev/null
+++ b/libc/src/math/nextupf16.h
@@ -0,0 +1,20 @@
+//===-- Implementation header for nextupf16 ---------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_NEXTUPF16_H
+#define LLVM_LIBC_SRC_MATH_NEXTUPF16_H
+
+#include "src/__support/macros/properties/types.h"
+
+namespace LIBC_NAMESPACE {
+
+float16 nextupf16(float16 x);
+
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_MATH_NEXTUPF16_H
diff --git a/libc/src/stdlib/CMakeLists.txt b/libc/src/stdlib/CMakeLists.txt
index e0bff51..219c85d 100644
--- a/libc/src/stdlib/CMakeLists.txt
+++ b/libc/src/stdlib/CMakeLists.txt
@@ -50,6 +50,7 @@ add_entrypoint_object(
quick_exit.h
DEPENDS
libc.src.__support.OSUtil.osutil
+ .exit_handler
)
add_entrypoint_object(
@@ -415,14 +416,14 @@ add_entrypoint_object(
libc.src.__support.OSUtil.osutil
)
-add_entrypoint_object(
- atexit
+add_object_library(
+ exit_handler
SRCS
- atexit.cpp
+ exit_handler.cpp
HDRS
- atexit.h
+ exit_handler.h
CXX_STANDARD
- 20 # For constinit of the atexit callback list.
+ 20 # For constinit
DEPENDS
libc.src.__support.CPP.mutex
libc.src.__support.CPP.new
@@ -433,6 +434,26 @@ add_entrypoint_object(
)
add_entrypoint_object(
+ atexit
+ SRCS
+ atexit.cpp
+ HDRS
+ atexit.h
+ DEPENDS
+ .exit_handler
+)
+
+add_entrypoint_object(
+ at_quick_exit
+ SRCS
+ at_quick_exit.cpp
+ HDRS
+ at_quick_exit.h
+ DEPENDS
+ .exit_handler
+)
+
+add_entrypoint_object(
exit
SRCS
exit.cpp
@@ -442,6 +463,7 @@ add_entrypoint_object(
._Exit
.atexit
libc.src.__support.OSUtil.osutil
+ .exit_handler
)
add_entrypoint_object(
diff --git a/libc/src/stdlib/at_quick_exit.cpp b/libc/src/stdlib/at_quick_exit.cpp
new file mode 100644
index 0000000..752d67e
--- /dev/null
+++ b/libc/src/stdlib/at_quick_exit.cpp
@@ -0,0 +1,22 @@
+//===-- Implementation of at_quick_exit -----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/stdlib/at_quick_exit.h"
+#include "hdr/types/atexithandler_t.h"
+#include "src/__support/common.h"
+#include "src/stdlib/exit_handler.h"
+
+namespace LIBC_NAMESPACE {
+
+LLVM_LIBC_FUNCTION(int, at_quick_exit, (__atexithandler_t callback)) {
+ return add_atexit_unit(
+ at_quick_exit_callbacks,
+ {&stdc_at_exit_func, reinterpret_cast<void *>(callback)});
+}
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/stdlib/at_quick_exit.h b/libc/src/stdlib/at_quick_exit.h
new file mode 100644
index 0000000..c36c797
--- /dev/null
+++ b/libc/src/stdlib/at_quick_exit.h
@@ -0,0 +1,20 @@
+//===-- Implementation header for at_quick_exit -----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_STDLIB_AT_QUICK_EXIT_H
+#define LLVM_LIBC_SRC_STDLIB_AT_QUICK_EXIT_H
+
+#include "hdr/types/atexithandler_t.h"
+
+namespace LIBC_NAMESPACE {
+
+int at_quick_exit(__atexithandler_t);
+
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_STDLIB_AT_QUICK_EXIT_H
diff --git a/libc/src/stdlib/atexit.cpp b/libc/src/stdlib/atexit.cpp
index 9e37c4c..ca3cbfe 100644
--- a/libc/src/stdlib/atexit.cpp
+++ b/libc/src/stdlib/atexit.cpp
@@ -7,95 +7,28 @@
//===----------------------------------------------------------------------===//
#include "src/stdlib/atexit.h"
-#include "src/__support/CPP/mutex.h" // lock_guard
-#include "src/__support/blockstore.h"
+#include "hdr/types/atexithandler_t.h"
#include "src/__support/common.h"
-#include "src/__support/fixedvector.h"
-#include "src/__support/threads/mutex.h"
+#include "src/stdlib/exit_handler.h"
namespace LIBC_NAMESPACE {
-namespace {
-
-Mutex handler_list_mtx(/*timed=*/false, /*recursive=*/false, /*robust=*/false,
- /*pshared=*/false);
-
-using AtExitCallback = void(void *);
-using StdCAtExitCallback = void(void);
-
-struct AtExitUnit {
- AtExitCallback *callback = nullptr;
- void *payload = nullptr;
- constexpr AtExitUnit() = default;
- constexpr AtExitUnit(AtExitCallback *c, void *p) : callback(c), payload(p) {}
-};
-
-#if defined(LIBC_TARGET_ARCH_IS_GPU)
-// The GPU build cannot handle the potentially recursive definitions required by
-// the BlockStore class. Additionally, the liklihood that someone exceeds this
-// while executing on the GPU is extremely small.
-// FIXME: It is not generally safe to use 'atexit' on the GPU because the
-// mutexes simply passthrough. We will need a lock free stack.
-using ExitCallbackList = FixedVector<AtExitUnit, 64>;
-#elif defined(LIBC_COPT_PUBLIC_PACKAGING)
-using ExitCallbackList = ReverseOrderBlockStore<AtExitUnit, 32>;
-#else
-// BlockStore uses dynamic memory allocation. To avoid dynamic memory
-// allocation in tests, we use a fixed size callback list when built for
-// tests.
-// If we use BlockStore, then we will have to pull in malloc etc into
-// the tests. While this is not bad, the problem we have currently is
-// that LLVM libc' allocator is SCUDO. So, we will end up pulling SCUDO's
-// deps also (some of which are not yet available in LLVM libc) into the
-// integration tests.
-using ExitCallbackList = FixedVector<AtExitUnit, CALLBACK_LIST_SIZE_FOR_TESTS>;
-#endif // LIBC_COPT_PUBLIC_PACKAGING
-
-constinit ExitCallbackList exit_callbacks;
-
-void stdc_at_exit_func(void *payload) {
- reinterpret_cast<StdCAtExitCallback *>(payload)();
-}
-
-void call_exit_callbacks() {
- handler_list_mtx.lock();
- while (!exit_callbacks.empty()) {
- AtExitUnit &unit = exit_callbacks.back();
- exit_callbacks.pop_back();
- handler_list_mtx.unlock();
- unit.callback(unit.payload);
- handler_list_mtx.lock();
- }
- ExitCallbackList::destroy(&exit_callbacks);
-}
-
-int add_atexit_unit(const AtExitUnit &unit) {
- cpp::lock_guard lock(handler_list_mtx);
- if (exit_callbacks.push_back(unit))
- return 0;
- return -1;
-}
-
-} // namespace
-
extern "C" {
-// TODO: Handle the last dso handle argument.
int __cxa_atexit(AtExitCallback *callback, void *payload, void *) {
- return add_atexit_unit({callback, payload});
+ return add_atexit_unit(atexit_callbacks, {callback, payload});
}
-// TODO: Handle the dso handle argument. call_exit_callbacks should only invoke
-// the callbacks from this DSO. Requires adding support for __dso_handle.
void __cxa_finalize(void *dso) {
if (!dso)
- call_exit_callbacks();
+ call_exit_callbacks(atexit_callbacks);
}
} // extern "C"
-LLVM_LIBC_FUNCTION(int, atexit, (StdCAtExitCallback * callback)) {
+LLVM_LIBC_FUNCTION(int, atexit, (__atexithandler_t callback)) {
return add_atexit_unit(
+ atexit_callbacks,
{&stdc_at_exit_func, reinterpret_cast<void *>(callback)});
}
diff --git a/libc/src/stdlib/atexit.h b/libc/src/stdlib/atexit.h
index 7cf9d7c..7faaf65 100644
--- a/libc/src/stdlib/atexit.h
+++ b/libc/src/stdlib/atexit.h
@@ -9,13 +9,10 @@
#ifndef LLVM_LIBC_SRC_STDLIB_ATEXIT_H
#define LLVM_LIBC_SRC_STDLIB_ATEXIT_H
-#include <stddef.h> // For size_t
-
+#include "hdr/types/atexithandler_t.h"
namespace LIBC_NAMESPACE {
-constexpr size_t CALLBACK_LIST_SIZE_FOR_TESTS = 1024;
-
-int atexit(void (*function)());
+int atexit(__atexithandler_t);
} // namespace LIBC_NAMESPACE
diff --git a/libc/src/stdlib/exit_handler.cpp b/libc/src/stdlib/exit_handler.cpp
new file mode 100644
index 0000000..ed41247
--- /dev/null
+++ b/libc/src/stdlib/exit_handler.cpp
@@ -0,0 +1,42 @@
+//===--- Implementation of exit_handler------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/stdlib/exit_handler.h"
+#include "src/__support/CPP/mutex.h" // lock_guard
+
+namespace LIBC_NAMESPACE {
+
+constinit ExitCallbackList at_quick_exit_callbacks;
+constinit ExitCallbackList atexit_callbacks;
+
+Mutex handler_list_mtx(false, false, false, false);
+
+void stdc_at_exit_func(void *payload) {
+ reinterpret_cast<StdCAtExitCallback *>(payload)();
+}
+
+void call_exit_callbacks(ExitCallbackList &callbacks) {
+ handler_list_mtx.lock();
+ while (!callbacks.empty()) {
+ AtExitUnit &unit = callbacks.back();
+ callbacks.pop_back();
+ handler_list_mtx.unlock();
+ unit.callback(unit.payload);
+ handler_list_mtx.lock();
+ }
+ ExitCallbackList::destroy(&callbacks);
+}
+
+int add_atexit_unit(ExitCallbackList &callbacks, const AtExitUnit &unit) {
+ cpp::lock_guard lock(handler_list_mtx);
+ if (callbacks.push_back(unit))
+ return 0;
+ return -1;
+}
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/stdlib/exit_handler.h b/libc/src/stdlib/exit_handler.h
new file mode 100644
index 0000000..8494c2f
--- /dev/null
+++ b/libc/src/stdlib/exit_handler.h
@@ -0,0 +1,53 @@
+//===-- Implementation header for exit_handler ------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_STDLIB_EXIT_HANDLER_H
+#define LLVM_LIBC_SRC_STDLIB_EXIT_HANDLER_H
+
+#include "src/__support/CPP/mutex.h" // lock_guard
+#include "src/__support/blockstore.h"
+#include "src/__support/common.h"
+#include "src/__support/fixedvector.h"
+#include "src/__support/threads/mutex.h"
+
+namespace LIBC_NAMESPACE {
+
+using AtExitCallback = void(void *);
+using StdCAtExitCallback = void(void);
+constexpr size_t CALLBACK_LIST_SIZE_FOR_TESTS = 1024;
+
+struct AtExitUnit {
+ AtExitCallback *callback = nullptr;
+ void *payload = nullptr;
+ LIBC_INLINE constexpr AtExitUnit() = default;
+ LIBC_INLINE constexpr AtExitUnit(AtExitCallback *c, void *p)
+ : callback(c), payload(p) {}
+};
+
+#if defined(LIBC_TARGET_ARCH_IS_GPU)
+using ExitCallbackList = FixedVector<AtExitUnit, 64>;
+#elif defined(LIBC_COPT_PUBLIC_PACKAGING)
+using ExitCallbackList = ReverseOrderBlockStore<AtExitUnit, 32>;
+#else
+using ExitCallbackList = FixedVector<AtExitUnit, CALLBACK_LIST_SIZE_FOR_TESTS>;
+#endif
+
+extern ExitCallbackList atexit_callbacks;
+extern ExitCallbackList at_quick_exit_callbacks;
+
+extern Mutex handler_list_mtx;
+
+void stdc_at_exit_func(void *payload);
+
+void call_exit_callbacks(ExitCallbackList &callbacks);
+
+int add_atexit_unit(ExitCallbackList &callbacks, const AtExitUnit &unit);
+
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_STDLIB_EXIT_HANDLER_H
diff --git a/libc/src/stdlib/quick_exit.cpp b/libc/src/stdlib/quick_exit.cpp
index cf7f07b..38f0a3d 100644
--- a/libc/src/stdlib/quick_exit.cpp
+++ b/libc/src/stdlib/quick_exit.cpp
@@ -9,13 +9,15 @@
#include "src/stdlib/quick_exit.h"
#include "src/__support/OSUtil/exit.h"
#include "src/__support/common.h"
+#include "src/stdlib/exit_handler.h"
// extern "C" void __cxa_finalize(void *);
-
namespace LIBC_NAMESPACE {
+extern ExitCallbackList at_quick_exit_callbacks;
+
[[noreturn]] LLVM_LIBC_FUNCTION(void, quick_exit, (int status)) {
- // __cxa_finalize(nullptr);
+ call_exit_callbacks(at_quick_exit_callbacks);
internal::exit(status);
}
diff --git a/libc/src/sys/epoll/linux/CMakeLists.txt b/libc/src/sys/epoll/linux/CMakeLists.txt
index 4e661b2..5ba89bd 100644
--- a/libc/src/sys/epoll/linux/CMakeLists.txt
+++ b/libc/src/sys/epoll/linux/CMakeLists.txt
@@ -48,6 +48,7 @@ add_entrypoint_object(
libc.hdr.types.struct_timespec
libc.include.sys_syscall
libc.src.__support.OSUtil.osutil
+ libc.src.__support.macros.sanitizer
libc.src.errno.errno
)
@@ -65,6 +66,7 @@ add_entrypoint_object(
libc.hdr.signal_macros
libc.include.sys_syscall
libc.src.__support.OSUtil.osutil
+ libc.src.__support.macros.sanitizer
libc.src.errno.errno
)
@@ -82,5 +84,6 @@ add_entrypoint_object(
libc.hdr.signal_macros
libc.include.sys_syscall
libc.src.__support.OSUtil.osutil
+ libc.src.__support.macros.sanitizer
libc.src.errno.errno
)
diff --git a/libc/src/sys/epoll/linux/epoll_pwait.cpp b/libc/src/sys/epoll/linux/epoll_pwait.cpp
index 8f498d1..24b66f0 100644
--- a/libc/src/sys/epoll/linux/epoll_pwait.cpp
+++ b/libc/src/sys/epoll/linux/epoll_pwait.cpp
@@ -13,6 +13,7 @@
#include "hdr/types/struct_epoll_event.h"
#include "src/__support/OSUtil/syscall.h" // For internal syscall function.
#include "src/__support/common.h"
+#include "src/__support/macros/sanitizer.h"
#include "src/errno/libc_errno.h"
#include <sys/syscall.h> // For syscall numbers.
@@ -33,6 +34,8 @@ LLVM_LIBC_FUNCTION(int, epoll_pwait,
return -1;
}
+ MSAN_UNPOISON(events, ret * sizeof(struct epoll_event));
+
return ret;
}
diff --git a/libc/src/sys/epoll/linux/epoll_pwait2.cpp b/libc/src/sys/epoll/linux/epoll_pwait2.cpp
index bd33cb6..e13423a 100644
--- a/libc/src/sys/epoll/linux/epoll_pwait2.cpp
+++ b/libc/src/sys/epoll/linux/epoll_pwait2.cpp
@@ -14,6 +14,7 @@
#include "hdr/types/struct_timespec.h"
#include "src/__support/OSUtil/syscall.h" // For internal syscall function.
#include "src/__support/common.h"
+#include "src/__support/macros/sanitizer.h"
#include "src/errno/libc_errno.h"
#include <sys/syscall.h> // For syscall numbers.
@@ -35,6 +36,8 @@ LLVM_LIBC_FUNCTION(int, epoll_pwait2,
return -1;
}
+ MSAN_UNPOISON(events, ret * sizeof(struct epoll_event));
+
return ret;
}
diff --git a/libc/src/sys/epoll/linux/epoll_wait.cpp b/libc/src/sys/epoll/linux/epoll_wait.cpp
index 95238d8..3ce4a92 100644
--- a/libc/src/sys/epoll/linux/epoll_wait.cpp
+++ b/libc/src/sys/epoll/linux/epoll_wait.cpp
@@ -13,6 +13,7 @@
#include "hdr/types/struct_epoll_event.h"
#include "src/__support/OSUtil/syscall.h" // For internal syscall function.
#include "src/__support/common.h"
+#include "src/__support/macros/sanitizer.h"
#include "src/errno/libc_errno.h"
#include <sys/syscall.h> // For syscall numbers.
@@ -39,6 +40,8 @@ LLVM_LIBC_FUNCTION(int, epoll_wait,
return -1;
}
+ MSAN_UNPOISON(events, ret * sizeof(struct epoll_event));
+
return ret;
}
diff --git a/libc/test/CMakeLists.txt b/libc/test/CMakeLists.txt
index 5e26a10..b5c989a 100644
--- a/libc/test/CMakeLists.txt
+++ b/libc/test/CMakeLists.txt
@@ -8,10 +8,14 @@ add_custom_target(libc-long-running-tests)
add_subdirectory(UnitTest)
-if(LIBC_TARGET_OS_IS_GPU AND
- (NOT TARGET libc.utils.gpu.loader OR LIBC_GPU_TESTS_DISABLED))
- message(WARNING "Cannot build libc GPU tests, missing loader or architecture")
- return()
+if(LIBC_TARGET_OS_IS_GPU)
+ if(NOT TARGET libc.utils.gpu.loader)
+ message(WARNING "Cannot build libc GPU tests, missing loader.")
+ return()
+ elseif(LIBC_GPU_TESTS_DISABLED)
+ message(WARNING "Cannot build libc GPU tests, missing target architecture.")
+ return()
+ endif()
endif()
add_subdirectory(include)
diff --git a/libc/test/src/__support/CMakeLists.txt b/libc/test/src/__support/CMakeLists.txt
index 663aa2b..d05377e 100644
--- a/libc/test/src/__support/CMakeLists.txt
+++ b/libc/test/src/__support/CMakeLists.txt
@@ -86,8 +86,8 @@ add_libc_test(
libc.src.__support.uint128
)
-# The GPU does not support varargs currently.
-if(NOT LIBC_TARGET_OS_IS_GPU)
+# NVPTX does not support varargs currently.
+if(NOT LIBC_TARGET_ARCHITECTURE_IS_NVPTX)
add_libc_test(
arg_list_test
SUITE
@@ -132,6 +132,7 @@ add_libc_test(
SRCS
fixedvector_test.cpp
DEPENDS
+ libc.src.__support.CPP.array
libc.src.__support.fixedvector
)
diff --git a/libc/test/src/__support/CPP/type_traits_test.cpp b/libc/test/src/__support/CPP/type_traits_test.cpp
index a2051f3..1c428e9 100644
--- a/libc/test/src/__support/CPP/type_traits_test.cpp
+++ b/libc/test/src/__support/CPP/type_traits_test.cpp
@@ -112,6 +112,15 @@ TEST(LlvmLibcTypeTraitsTest, add_rvalue_reference_void) {
const volatile void>));
}
+TEST(LlvmLibcTypeTraitsTest, aligned_storage) {
+ struct S {
+ int a, b;
+ };
+ aligned_storage_t<sizeof(S), alignof(S)> buf;
+ EXPECT_EQ(alignof(buf), alignof(S));
+ EXPECT_EQ(sizeof(buf), sizeof(S));
+}
+
TEST(LlvmLibcTypeTraitsTest, bool_constant) {
EXPECT_TRUE((bool_constant<true>::value));
EXPECT_FALSE((bool_constant<false>::value));
diff --git a/libc/test/src/__support/fixedvector_test.cpp b/libc/test/src/__support/fixedvector_test.cpp
index e9ffdd0..212e1ae 100644
--- a/libc/test/src/__support/fixedvector_test.cpp
+++ b/libc/test/src/__support/fixedvector_test.cpp
@@ -6,6 +6,7 @@
//
//===----------------------------------------------------------------------===//
+#include "src/__support/CPP/array.h"
#include "src/__support/fixedvector.h"
#include "test/UnitTest/Test.h"
@@ -69,3 +70,29 @@ TEST(LlvmLibcFixedVectorTest, Iteration) {
for (int &x : v)
ASSERT_GE(x, 0);
}
+
+TEST(LlvmLibcFixedVectorTest, ConstructionFromIterators) {
+ LIBC_NAMESPACE::cpp::array<int, 4> arr{1, 2, 3, 4};
+ LIBC_NAMESPACE::FixedVector<int, 5> vec(arr.begin(), arr.end());
+ ASSERT_EQ(vec.size(), arr.size());
+ for (size_t i = 0; i < arr.size(); ++i)
+ ASSERT_EQ(vec[i], arr[i]);
+}
+
+TEST(LlvmLibcFixedVectorTest, ConstructionFromCountAndValue) {
+ constexpr int kVal = 10;
+ LIBC_NAMESPACE::FixedVector<int, 5> vec(4, kVal);
+ ASSERT_EQ(vec.size(), size_t(4));
+ for (size_t i = 0; i < vec.size(); ++i)
+ ASSERT_EQ(vec[i], kVal);
+}
+
+TEST(LlvmLibcFixedVectorTest, ForwardIteration) {
+ LIBC_NAMESPACE::cpp::array<int, 4> arr{1, 2, 3, 4};
+ LIBC_NAMESPACE::FixedVector<int, 5> vec(arr.begin(), arr.end());
+ ASSERT_EQ(vec.size(), arr.size());
+ for (auto it = vec.begin(); it != vec.end(); ++it) {
+ auto idx = it - vec.begin();
+ ASSERT_EQ(*it, arr[idx]);
+ }
+}
diff --git a/libc/test/src/math/smoke/CMakeLists.txt b/libc/test/src/math/smoke/CMakeLists.txt
index 09e54349..110fa1d 100644
--- a/libc/test/src/math/smoke/CMakeLists.txt
+++ b/libc/test/src/math/smoke/CMakeLists.txt
@@ -1705,6 +1705,7 @@ add_fp_unittest(
FMinTest.h
DEPENDS
libc.src.math.fminf
+ libc.src.__support.CPP.algorithm
libc.src.__support.FPUtil.fp_bits
)
@@ -1718,6 +1719,7 @@ add_fp_unittest(
FMinTest.h
DEPENDS
libc.src.math.fmin
+ libc.src.__support.CPP.algorithm
libc.src.__support.FPUtil.fp_bits
)
@@ -1731,6 +1733,7 @@ add_fp_unittest(
FMinTest.h
DEPENDS
libc.src.math.fminl
+ libc.src.__support.CPP.algorithm
libc.src.__support.FPUtil.fp_bits
)
@@ -1744,6 +1747,21 @@ add_fp_unittest(
FMinTest.h
DEPENDS
libc.src.math.fminf128
+ libc.src.__support.CPP.algorithm
+ libc.src.__support.FPUtil.fp_bits
+)
+
+add_fp_unittest(
+ fminf16_test
+ SUITE
+ libc-math-smoke-tests
+ SRCS
+ fminf16_test.cpp
+ HDRS
+ FMinTest.h
+ DEPENDS
+ libc.src.math.fminf16
+ libc.src.__support.CPP.algorithm
libc.src.__support.FPUtil.fp_bits
)
@@ -1757,6 +1775,7 @@ add_fp_unittest(
FMaxTest.h
DEPENDS
libc.src.math.fmaxf
+ libc.src.__support.CPP.algorithm
libc.src.__support.FPUtil.fp_bits
)
@@ -1770,6 +1789,7 @@ add_fp_unittest(
FMaxTest.h
DEPENDS
libc.src.math.fmax
+ libc.src.__support.CPP.algorithm
libc.src.__support.FPUtil.fp_bits
)
@@ -1783,6 +1803,7 @@ add_fp_unittest(
FMaxTest.h
DEPENDS
libc.src.math.fmaxl
+ libc.src.__support.CPP.algorithm
libc.src.__support.FPUtil.fp_bits
)
@@ -1796,6 +1817,21 @@ add_fp_unittest(
FMaxTest.h
DEPENDS
libc.src.math.fmaxf128
+ libc.src.__support.CPP.algorithm
+ libc.src.__support.FPUtil.fp_bits
+)
+
+add_fp_unittest(
+ fmaxf16_test
+ SUITE
+ libc-math-smoke-tests
+ SRCS
+ fmaxf16_test.cpp
+ HDRS
+ FMaxTest.h
+ DEPENDS
+ libc.src.math.fmaxf16
+ libc.src.__support.CPP.algorithm
libc.src.__support.FPUtil.fp_bits
)
@@ -1809,6 +1845,21 @@ add_fp_unittest(
FMaximumTest.h
DEPENDS
libc.src.math.fmaximuml
+ libc.src.__support.CPP.algorithm
+ libc.src.__support.FPUtil.fp_bits
+)
+
+add_fp_unittest(
+ fmaximumf16_test
+ SUITE
+ libc-math-smoke-tests
+ SRCS
+ fmaximumf16_test.cpp
+ HDRS
+ FMaximumTest.h
+ DEPENDS
+ libc.src.math.fmaximumf16
+ libc.src.__support.CPP.algorithm
libc.src.__support.FPUtil.fp_bits
)
@@ -1822,6 +1873,7 @@ add_fp_unittest(
FMaximumTest.h
DEPENDS
libc.src.math.fmaximumf128
+ libc.src.__support.CPP.algorithm
libc.src.__support.FPUtil.fp_bits
)
@@ -1835,6 +1887,7 @@ add_fp_unittest(
FMaximumTest.h
DEPENDS
libc.src.math.fmaximum
+ libc.src.__support.CPP.algorithm
libc.src.__support.FPUtil.fp_bits
)
@@ -1848,6 +1901,7 @@ add_fp_unittest(
FMaximumTest.h
DEPENDS
libc.src.math.fmaximumf
+ libc.src.__support.CPP.algorithm
libc.src.__support.FPUtil.fp_bits
)
@@ -1861,6 +1915,7 @@ add_fp_unittest(
FMaximumNumTest.h
DEPENDS
libc.src.math.fmaximum_numf
+ libc.src.__support.CPP.algorithm
libc.src.__support.FPUtil.fp_bits
)
@@ -1874,6 +1929,7 @@ add_fp_unittest(
FMaximumNumTest.h
DEPENDS
libc.src.math.fmaximum_num
+ libc.src.__support.CPP.algorithm
libc.src.__support.FPUtil.fp_bits
)
@@ -1887,6 +1943,21 @@ add_fp_unittest(
FMaximumNumTest.h
DEPENDS
libc.src.math.fmaximum_numl
+ libc.src.__support.CPP.algorithm
+ libc.src.__support.FPUtil.fp_bits
+)
+
+add_fp_unittest(
+ fmaximum_numf16_test
+ SUITE
+ libc-math-smoke-tests
+ SRCS
+ fmaximum_numf16_test.cpp
+ HDRS
+ FMaximumNumTest.h
+ DEPENDS
+ libc.src.math.fmaximum_numf16
+ libc.src.__support.CPP.algorithm
libc.src.__support.FPUtil.fp_bits
)
@@ -1900,6 +1971,7 @@ add_fp_unittest(
FMaximumNumTest.h
DEPENDS
libc.src.math.fmaximum_numf128
+ libc.src.__support.CPP.algorithm
libc.src.__support.FPUtil.fp_bits
)
@@ -1913,6 +1985,8 @@ add_fp_unittest(
FMaximumMagTest.h
DEPENDS
libc.src.math.fmaximum_magf
+ libc.src.__support.CPP.algorithm
+ libc.src.__support.FPUtil.basic_operations
libc.src.__support.FPUtil.fp_bits
)
@@ -1926,6 +2000,8 @@ add_fp_unittest(
FMaximumMagTest.h
DEPENDS
libc.src.math.fmaximum_mag
+ libc.src.__support.CPP.algorithm
+ libc.src.__support.FPUtil.basic_operations
libc.src.__support.FPUtil.fp_bits
)
@@ -1939,6 +2015,23 @@ add_fp_unittest(
FMaximumMagTest.h
DEPENDS
libc.src.math.fmaximum_magl
+ libc.src.__support.CPP.algorithm
+ libc.src.__support.FPUtil.basic_operations
+ libc.src.__support.FPUtil.fp_bits
+)
+
+add_fp_unittest(
+ fmaximum_magf16_test
+ SUITE
+ libc-math-smoke-tests
+ SRCS
+ fmaximum_magf16_test.cpp
+ HDRS
+ FMaximumMagTest.h
+ DEPENDS
+ libc.src.math.fmaximum_magf16
+ libc.src.__support.CPP.algorithm
+ libc.src.__support.FPUtil.basic_operations
libc.src.__support.FPUtil.fp_bits
)
@@ -1952,10 +2045,11 @@ add_fp_unittest(
FMaximumMagTest.h
DEPENDS
libc.src.math.fmaximum_magf128
+ libc.src.__support.CPP.algorithm
+ libc.src.__support.FPUtil.basic_operations
libc.src.__support.FPUtil.fp_bits
)
-
add_fp_unittest(
fmaximum_mag_numf_test
SUITE
@@ -1966,6 +2060,7 @@ add_fp_unittest(
FMaximumMagNumTest.h
DEPENDS
libc.src.math.fmaximum_mag_numf
+ libc.src.__support.FPUtil.basic_operations
libc.src.__support.FPUtil.fp_bits
)
@@ -1979,6 +2074,7 @@ add_fp_unittest(
FMaximumMagNumTest.h
DEPENDS
libc.src.math.fmaximum_mag_num
+ libc.src.__support.FPUtil.basic_operations
libc.src.__support.FPUtil.fp_bits
)
@@ -1992,6 +2088,21 @@ add_fp_unittest(
FMaximumMagNumTest.h
DEPENDS
libc.src.math.fmaximum_mag_numl
+ libc.src.__support.FPUtil.basic_operations
+ libc.src.__support.FPUtil.fp_bits
+)
+
+add_fp_unittest(
+ fmaximum_mag_numf16_test
+ SUITE
+ libc-math-smoke-tests
+ SRCS
+ fmaximum_mag_numf16_test.cpp
+ HDRS
+ FMaximumMagNumTest.h
+ DEPENDS
+ libc.src.math.fmaximum_mag_numf16
+ libc.src.__support.FPUtil.basic_operations
libc.src.__support.FPUtil.fp_bits
)
@@ -2005,6 +2116,7 @@ add_fp_unittest(
FMaximumMagNumTest.h
DEPENDS
libc.src.math.fmaximum_mag_numf128
+ libc.src.__support.FPUtil.basic_operations
libc.src.__support.FPUtil.fp_bits
)
@@ -2018,6 +2130,21 @@ add_fp_unittest(
FMinimumTest.h
DEPENDS
libc.src.math.fminimuml
+ libc.src.__support.CPP.algorithm
+ libc.src.__support.FPUtil.fp_bits
+)
+
+add_fp_unittest(
+ fminimumf16_test
+ SUITE
+ libc-math-smoke-tests
+ SRCS
+ fminimumf16_test.cpp
+ HDRS
+ FMinimumTest.h
+ DEPENDS
+ libc.src.math.fminimumf16
+ libc.src.__support.CPP.algorithm
libc.src.__support.FPUtil.fp_bits
)
@@ -2031,6 +2158,7 @@ add_fp_unittest(
FMinimumTest.h
DEPENDS
libc.src.math.fminimumf128
+ libc.src.__support.CPP.algorithm
libc.src.__support.FPUtil.fp_bits
)
@@ -2044,6 +2172,7 @@ add_fp_unittest(
FMinimumTest.h
DEPENDS
libc.src.math.fminimum
+ libc.src.__support.CPP.algorithm
libc.src.__support.FPUtil.fp_bits
)
@@ -2057,6 +2186,7 @@ add_fp_unittest(
FMinimumTest.h
DEPENDS
libc.src.math.fminimumf
+ libc.src.__support.CPP.algorithm
libc.src.__support.FPUtil.fp_bits
)
@@ -2070,6 +2200,7 @@ add_fp_unittest(
FMinimumNumTest.h
DEPENDS
libc.src.math.fminimum_numf
+ libc.src.__support.CPP.algorithm
libc.src.__support.FPUtil.fp_bits
)
@@ -2083,6 +2214,7 @@ add_fp_unittest(
FMinimumNumTest.h
DEPENDS
libc.src.math.fminimum_num
+ libc.src.__support.CPP.algorithm
libc.src.__support.FPUtil.fp_bits
)
@@ -2096,6 +2228,21 @@ add_fp_unittest(
FMinimumNumTest.h
DEPENDS
libc.src.math.fminimum_numl
+ libc.src.__support.CPP.algorithm
+ libc.src.__support.FPUtil.fp_bits
+)
+
+add_fp_unittest(
+ fminimum_numf16_test
+ SUITE
+ libc-math-smoke-tests
+ SRCS
+ fminimum_numf16_test.cpp
+ HDRS
+ FMinimumNumTest.h
+ DEPENDS
+ libc.src.math.fminimum_numf16
+ libc.src.__support.CPP.algorithm
libc.src.__support.FPUtil.fp_bits
)
@@ -2109,6 +2256,7 @@ add_fp_unittest(
FMinimumNumTest.h
DEPENDS
libc.src.math.fminimum_numf128
+ libc.src.__support.CPP.algorithm
libc.src.__support.FPUtil.fp_bits
)
@@ -2122,6 +2270,7 @@ add_fp_unittest(
FMinimumMagTest.h
DEPENDS
libc.src.math.fminimum_magf
+ libc.src.__support.CPP.algorithm
libc.src.__support.FPUtil.fp_bits
)
@@ -2135,6 +2284,7 @@ add_fp_unittest(
FMinimumMagTest.h
DEPENDS
libc.src.math.fminimum_mag
+ libc.src.__support.CPP.algorithm
libc.src.__support.FPUtil.fp_bits
)
@@ -2148,6 +2298,21 @@ add_fp_unittest(
FMinimumMagTest.h
DEPENDS
libc.src.math.fminimum_magl
+ libc.src.__support.CPP.algorithm
+ libc.src.__support.FPUtil.fp_bits
+)
+
+add_fp_unittest(
+ fminimum_magf16_test
+ SUITE
+ libc-math-smoke-tests
+ SRCS
+ fminimum_magf16_test.cpp
+ HDRS
+ FMinimumMagTest.h
+ DEPENDS
+ libc.src.math.fminimum_magf16
+ libc.src.__support.CPP.algorithm
libc.src.__support.FPUtil.fp_bits
)
@@ -2161,10 +2326,10 @@ add_fp_unittest(
FMinimumMagTest.h
DEPENDS
libc.src.math.fminimum_magf128
+ libc.src.__support.CPP.algorithm
libc.src.__support.FPUtil.fp_bits
)
-
add_fp_unittest(
fminimum_mag_numf_test
SUITE
@@ -2175,6 +2340,7 @@ add_fp_unittest(
FMinimumMagNumTest.h
DEPENDS
libc.src.math.fminimum_mag_numf
+ libc.src.__support.CPP.algorithm
libc.src.__support.FPUtil.fp_bits
)
@@ -2188,6 +2354,7 @@ add_fp_unittest(
FMinimumMagNumTest.h
DEPENDS
libc.src.math.fminimum_mag_num
+ libc.src.__support.CPP.algorithm
libc.src.__support.FPUtil.fp_bits
)
@@ -2201,6 +2368,21 @@ add_fp_unittest(
FMinimumMagNumTest.h
DEPENDS
libc.src.math.fminimum_mag_numl
+ libc.src.__support.CPP.algorithm
+ libc.src.__support.FPUtil.fp_bits
+)
+
+add_fp_unittest(
+ fminimum_mag_numf16_test
+ SUITE
+ libc-math-smoke-tests
+ SRCS
+ fminimum_mag_numf16_test.cpp
+ HDRS
+ FMinimumMagNumTest.h
+ DEPENDS
+ libc.src.math.fminimum_mag_numf16
+ libc.src.__support.CPP.algorithm
libc.src.__support.FPUtil.fp_bits
)
@@ -2214,6 +2396,7 @@ add_fp_unittest(
FMinimumMagNumTest.h
DEPENDS
libc.src.math.fminimum_mag_numf128
+ libc.src.__support.CPP.algorithm
libc.src.__support.FPUtil.fp_bits
)
@@ -2515,8 +2698,10 @@ add_fp_unittest(
HDRS
NextAfterTest.h
DEPENDS
+ libc.hdr.fenv_macros
libc.src.math.nextafter
- libc.src.__support.FPUtil.basic_operations
+ libc.src.__support.CPP.bit
+ libc.src.__support.FPUtil.fenv_impl
libc.src.__support.FPUtil.fp_bits
)
@@ -2529,8 +2714,10 @@ add_fp_unittest(
HDRS
NextAfterTest.h
DEPENDS
+ libc.hdr.fenv_macros
libc.src.math.nextafterf
- libc.src.__support.FPUtil.basic_operations
+ libc.src.__support.CPP.bit
+ libc.src.__support.FPUtil.fenv_impl
libc.src.__support.FPUtil.fp_bits
)
@@ -2543,8 +2730,26 @@ add_fp_unittest(
HDRS
NextAfterTest.h
DEPENDS
+ libc.hdr.fenv_macros
libc.src.math.nextafterl
- libc.src.__support.FPUtil.basic_operations
+ libc.src.__support.CPP.bit
+ libc.src.__support.FPUtil.fenv_impl
+ libc.src.__support.FPUtil.fp_bits
+)
+
+add_fp_unittest(
+ nextafterf16_test
+ SUITE
+ libc-math-smoke-tests
+ SRCS
+ nextafterf16_test.cpp
+ HDRS
+ NextAfterTest.h
+ DEPENDS
+ libc.hdr.fenv_macros
+ libc.src.math.nextafterf16
+ libc.src.__support.CPP.bit
+ libc.src.__support.FPUtil.fenv_impl
libc.src.__support.FPUtil.fp_bits
)
@@ -2557,8 +2762,10 @@ add_fp_unittest(
HDRS
NextAfterTest.h
DEPENDS
+ libc.hdr.fenv_macros
libc.src.math.nextafterf128
- libc.src.__support.FPUtil.basic_operations
+ libc.src.__support.CPP.bit
+ libc.src.__support.FPUtil.fenv_impl
libc.src.__support.FPUtil.fp_bits
)
@@ -2573,8 +2780,10 @@ if(NOT LIBC_TARGET_OS_IS_GPU)
HDRS
NextTowardTest.h
DEPENDS
+ libc.hdr.fenv_macros
libc.src.math.nexttoward
- libc.src.__support.FPUtil.basic_operations
+ libc.src.__support.CPP.bit
+ libc.src.__support.FPUtil.fenv_impl
libc.src.__support.FPUtil.fp_bits
)
@@ -2587,8 +2796,10 @@ if(NOT LIBC_TARGET_OS_IS_GPU)
HDRS
NextTowardTest.h
DEPENDS
+ libc.hdr.fenv_macros
libc.src.math.nexttowardf
- libc.src.__support.FPUtil.basic_operations
+ libc.src.__support.CPP.bit
+ libc.src.__support.FPUtil.fenv_impl
libc.src.__support.FPUtil.fp_bits
)
endif()
@@ -2602,8 +2813,26 @@ add_fp_unittest(
HDRS
NextTowardTest.h
DEPENDS
+ libc.hdr.fenv_macros
libc.src.math.nexttowardl
- libc.src.__support.FPUtil.basic_operations
+ libc.src.__support.CPP.bit
+ libc.src.__support.FPUtil.fenv_impl
+ libc.src.__support.FPUtil.fp_bits
+)
+
+add_fp_unittest(
+ nexttowardf16_test
+ SUITE
+ libc-math-smoke-tests
+ SRCS
+ nexttowardf16_test.cpp
+ HDRS
+ NextTowardTest.h
+ DEPENDS
+ libc.hdr.fenv_macros
+ libc.src.math.nexttowardf16
+ libc.src.__support.CPP.bit
+ libc.src.__support.FPUtil.fenv_impl
libc.src.__support.FPUtil.fp_bits
)
@@ -2617,7 +2846,6 @@ add_fp_unittest(
NextDownTest.h
DEPENDS
libc.src.math.nextdown
- libc.src.__support.FPUtil.manipulation_functions
)
add_fp_unittest(
@@ -2630,7 +2858,6 @@ add_fp_unittest(
NextDownTest.h
DEPENDS
libc.src.math.nextdownf
- libc.src.__support.FPUtil.manipulation_functions
)
add_fp_unittest(
@@ -2643,7 +2870,18 @@ add_fp_unittest(
NextDownTest.h
DEPENDS
libc.src.math.nextdownl
- libc.src.__support.FPUtil.manipulation_functions
+)
+
+add_fp_unittest(
+ nextdownf16_test
+ SUITE
+ libc-math-smoke-tests
+ SRCS
+ nextdownf16_test.cpp
+ HDRS
+ NextDownTest.h
+ DEPENDS
+ libc.src.math.nextdownf16
)
add_fp_unittest(
@@ -2656,7 +2894,6 @@ add_fp_unittest(
NextDownTest.h
DEPENDS
libc.src.math.nextdownf128
- libc.src.__support.FPUtil.manipulation_functions
)
add_fp_unittest(
@@ -2669,7 +2906,6 @@ add_fp_unittest(
NextUpTest.h
DEPENDS
libc.src.math.nextup
- libc.src.__support.FPUtil.manipulation_functions
)
add_fp_unittest(
@@ -2682,7 +2918,6 @@ add_fp_unittest(
NextUpTest.h
DEPENDS
libc.src.math.nextupf
- libc.src.__support.FPUtil.manipulation_functions
)
add_fp_unittest(
@@ -2695,7 +2930,18 @@ add_fp_unittest(
NextUpTest.h
DEPENDS
libc.src.math.nextupl
- libc.src.__support.FPUtil.manipulation_functions
+)
+
+add_fp_unittest(
+ nextupf16_test
+ SUITE
+ libc-math-smoke-tests
+ SRCS
+ nextupf16_test.cpp
+ HDRS
+ NextUpTest.h
+ DEPENDS
+ libc.src.math.nextupf16
)
add_fp_unittest(
@@ -2708,7 +2954,6 @@ add_fp_unittest(
NextUpTest.h
DEPENDS
libc.src.math.nextupf128
- libc.src.__support.FPUtil.manipulation_functions
)
# TODO(lntue): The current implementation of fputil::general::fma<float> is only
diff --git a/libc/test/src/math/smoke/FMaxTest.h b/libc/test/src/math/smoke/FMaxTest.h
index df8e35e..f4c78b5 100644
--- a/libc/test/src/math/smoke/FMaxTest.h
+++ b/libc/test/src/math/smoke/FMaxTest.h
@@ -9,6 +9,7 @@
#ifndef LLVM_LIBC_TEST_SRC_MATH_SMOKE_FMAXTEST_H
#define LLVM_LIBC_TEST_SRC_MATH_SMOKE_FMAXTEST_H
+#include "src/__support/CPP/algorithm.h"
#include "test/UnitTest/FEnvSafeTest.h"
#include "test/UnitTest/FPMatcher.h"
#include "test/UnitTest/Test.h"
@@ -55,10 +56,11 @@ public:
}
void testRange(FMaxFunc func) {
- constexpr StorageType COUNT = 100'001;
- constexpr StorageType STEP = STORAGE_MAX / COUNT;
- for (StorageType i = 0, v = 0, w = STORAGE_MAX; i <= COUNT;
- ++i, v += STEP, w -= STEP) {
+ constexpr int COUNT = 100'001;
+ constexpr StorageType STEP = LIBC_NAMESPACE::cpp::max(
+ static_cast<StorageType>(STORAGE_MAX / COUNT), StorageType(1));
+ StorageType v = 0, w = STORAGE_MAX;
+ for (int i = 0; i <= COUNT; ++i, v += STEP, w -= STEP) {
FPBits xbits(v), ybits(w);
if (xbits.is_inf_or_nan())
continue;
diff --git a/libc/test/src/math/smoke/FMaximumMagNumTest.h b/libc/test/src/math/smoke/FMaximumMagNumTest.h
index aafb6d2..726f870 100644
--- a/libc/test/src/math/smoke/FMaximumMagNumTest.h
+++ b/libc/test/src/math/smoke/FMaximumMagNumTest.h
@@ -9,6 +9,7 @@
#ifndef LLVM_LIBC_TEST_SRC_MATH_SMOKE_FMAXIMUMMAG_NUMTEST_H
#define LLVM_LIBC_TEST_SRC_MATH_SMOKE_FMAXIMUMMAG_NUMTEST_H
+#include "src/__support/CPP/algorithm.h"
#include "src/__support/FPUtil/BasicOperations.h"
#include "src/__support/FPUtil/FPBits.h"
#include "test/UnitTest/FEnvSafeTest.h"
@@ -68,10 +69,11 @@ public:
}
void testRange(FMaximumMagNumFunc func) {
- constexpr StorageType COUNT = 100'001;
- constexpr StorageType STEP = STORAGE_MAX / COUNT;
- for (StorageType i = 0, v = 0, w = STORAGE_MAX; i <= COUNT;
- ++i, v += STEP, w -= STEP) {
+ constexpr int COUNT = 100'001;
+ constexpr StorageType STEP = LIBC_NAMESPACE::cpp::max(
+ static_cast<StorageType>(STORAGE_MAX / COUNT), StorageType(1));
+ StorageType v = 0, w = STORAGE_MAX;
+ for (int i = 0; i <= COUNT; ++i, v += STEP, w -= STEP) {
FPBits xbits(v), ybits(w);
if (xbits.is_inf_or_nan())
continue;
@@ -82,11 +84,10 @@ public:
if ((x == 0) && (y == 0))
continue;
- if (LIBC_NAMESPACE::fputil::abs(x) > LIBC_NAMESPACE::fputil::abs(y)) {
+ if (LIBC_NAMESPACE::fputil::abs(x) > LIBC_NAMESPACE::fputil::abs(y))
EXPECT_FP_EQ(x, func(x, y));
- } else {
+ else
EXPECT_FP_EQ(y, func(x, y));
- }
}
}
};
diff --git a/libc/test/src/math/smoke/FMaximumMagTest.h b/libc/test/src/math/smoke/FMaximumMagTest.h
index 7bb79a6..b5b2c1c 100644
--- a/libc/test/src/math/smoke/FMaximumMagTest.h
+++ b/libc/test/src/math/smoke/FMaximumMagTest.h
@@ -9,6 +9,7 @@
#ifndef LLVM_LIBC_TEST_SRC_MATH_SMOKE_FMAXIMUM_MAGTEST_H
#define LLVM_LIBC_TEST_SRC_MATH_SMOKE_FMAXIMUM_MAGTEST_H
+#include "src/__support/CPP/algorithm.h"
#include "src/__support/FPUtil/BasicOperations.h"
#include "test/UnitTest/FEnvSafeTest.h"
#include "test/UnitTest/FPMatcher.h"
@@ -56,10 +57,11 @@ public:
}
void testRange(FMaximumMagFunc func) {
- constexpr StorageType COUNT = 100'001;
- constexpr StorageType STEP = STORAGE_MAX / COUNT;
- for (StorageType i = 0, v = 0, w = STORAGE_MAX; i <= COUNT;
- ++i, v += STEP, w -= STEP) {
+ constexpr int COUNT = 100'001;
+ constexpr StorageType STEP = LIBC_NAMESPACE::cpp::max(
+ static_cast<StorageType>(STORAGE_MAX / COUNT), StorageType(1));
+ StorageType v = 0, w = STORAGE_MAX;
+ for (int i = 0; i <= COUNT; ++i, v += STEP, w -= STEP) {
FPBits xbits(v), ybits(w);
if (xbits.is_inf_or_nan())
continue;
@@ -70,11 +72,10 @@ public:
if ((x == 0) && (y == 0))
continue;
- if (LIBC_NAMESPACE::fputil::abs(x) > LIBC_NAMESPACE::fputil::abs(y)) {
+ if (LIBC_NAMESPACE::fputil::abs(x) > LIBC_NAMESPACE::fputil::abs(y))
EXPECT_FP_EQ(x, func(x, y));
- } else {
+ else
EXPECT_FP_EQ(y, func(x, y));
- }
}
}
};
diff --git a/libc/test/src/math/smoke/FMaximumNumTest.h b/libc/test/src/math/smoke/FMaximumNumTest.h
index da0ea2c..ec79135 100644
--- a/libc/test/src/math/smoke/FMaximumNumTest.h
+++ b/libc/test/src/math/smoke/FMaximumNumTest.h
@@ -9,6 +9,7 @@
#ifndef LLVM_LIBC_TEST_SRC_MATH_SMOKE_FMAXIMUMNUMTEST_H
#define LLVM_LIBC_TEST_SRC_MATH_SMOKE_FMAXIMUMNUMTEST_H
+#include "src/__support/CPP/algorithm.h"
#include "src/__support/FPUtil/FPBits.h"
#include "test/UnitTest/FEnvSafeTest.h"
#include "test/UnitTest/FPMatcher.h"
@@ -67,10 +68,11 @@ public:
}
void testRange(FMaximumNumFunc func) {
- constexpr StorageType COUNT = 100'001;
- constexpr StorageType STEP = STORAGE_MAX / COUNT;
- for (StorageType i = 0, v = 0, w = STORAGE_MAX; i <= COUNT;
- ++i, v += STEP, w -= STEP) {
+ constexpr int COUNT = 100'001;
+ constexpr StorageType STEP = LIBC_NAMESPACE::cpp::max(
+ static_cast<StorageType>(STORAGE_MAX / COUNT), StorageType(1));
+ StorageType v = 0, w = STORAGE_MAX;
+ for (int i = 0; i <= COUNT; ++i, v += STEP, w -= STEP) {
FPBits xbits(v), ybits(w);
if (xbits.is_inf_or_nan())
continue;
@@ -81,11 +83,10 @@ public:
if ((x == 0) && (y == 0))
continue;
- if (x > y) {
+ if (x > y)
EXPECT_FP_EQ(x, func(x, y));
- } else {
+ else
EXPECT_FP_EQ(y, func(x, y));
- }
}
}
};
diff --git a/libc/test/src/math/smoke/FMaximumTest.h b/libc/test/src/math/smoke/FMaximumTest.h
index 1bd1516..94e4a34 100644
--- a/libc/test/src/math/smoke/FMaximumTest.h
+++ b/libc/test/src/math/smoke/FMaximumTest.h
@@ -9,6 +9,7 @@
#ifndef LLVM_LIBC_TEST_SRC_MATH_SMOKE_FMAXIMUMTEST_H
#define LLVM_LIBC_TEST_SRC_MATH_SMOKE_FMAXIMUMTEST_H
+#include "src/__support/CPP/algorithm.h"
#include "test/UnitTest/FEnvSafeTest.h"
#include "test/UnitTest/FPMatcher.h"
#include "test/UnitTest/Test.h"
@@ -55,10 +56,11 @@ public:
}
void testRange(FMaximumFunc func) {
- constexpr StorageType COUNT = 100'001;
- constexpr StorageType STEP = STORAGE_MAX / COUNT;
- for (StorageType i = 0, v = 0, w = STORAGE_MAX; i <= COUNT;
- ++i, v += STEP, w -= STEP) {
+ constexpr int COUNT = 100'001;
+ constexpr StorageType STEP = LIBC_NAMESPACE::cpp::max(
+ static_cast<StorageType>(STORAGE_MAX / COUNT), StorageType(1));
+ StorageType v = 0, w = STORAGE_MAX;
+ for (int i = 0; i <= COUNT; ++i, v += STEP, w -= STEP) {
FPBits xbits(v), ybits(w);
if (xbits.is_inf_or_nan())
continue;
@@ -69,11 +71,10 @@ public:
if ((x == 0) && (y == 0))
continue;
- if (x > y) {
+ if (x > y)
EXPECT_FP_EQ(x, func(x, y));
- } else {
+ else
EXPECT_FP_EQ(y, func(x, y));
- }
}
}
};
diff --git a/libc/test/src/math/smoke/FMinTest.h b/libc/test/src/math/smoke/FMinTest.h
index f71b558..629aaab 100644
--- a/libc/test/src/math/smoke/FMinTest.h
+++ b/libc/test/src/math/smoke/FMinTest.h
@@ -9,6 +9,7 @@
#ifndef LLVM_LIBC_TEST_SRC_MATH_SMOKE_FMINTEST_H
#define LLVM_LIBC_TEST_SRC_MATH_SMOKE_FMINTEST_H
+#include "src/__support/CPP/algorithm.h"
#include "test/UnitTest/FEnvSafeTest.h"
#include "test/UnitTest/FPMatcher.h"
#include "test/UnitTest/Test.h"
@@ -55,10 +56,11 @@ public:
}
void testRange(FMinFunc func) {
- constexpr StorageType COUNT = 100'001;
- constexpr StorageType STEP = STORAGE_MAX / COUNT;
- for (StorageType i = 0, v = 0, w = STORAGE_MAX; i <= COUNT;
- ++i, v += STEP, w -= STEP) {
+ constexpr int COUNT = 100'001;
+ constexpr StorageType STEP = LIBC_NAMESPACE::cpp::max(
+ static_cast<StorageType>(STORAGE_MAX / COUNT), StorageType(1));
+ StorageType v = 0, w = STORAGE_MAX;
+ for (int i = 0; i <= COUNT; ++i, v += STEP, w -= STEP) {
FPBits xbits(v), ybits(w);
if (xbits.is_inf_or_nan())
continue;
diff --git a/libc/test/src/math/smoke/FMinimumMagNumTest.h b/libc/test/src/math/smoke/FMinimumMagNumTest.h
index e4b8fd9..2ceca6f 100644
--- a/libc/test/src/math/smoke/FMinimumMagNumTest.h
+++ b/libc/test/src/math/smoke/FMinimumMagNumTest.h
@@ -9,6 +9,7 @@
#ifndef LLVM_LIBC_TEST_SRC_MATH_SMOKE_FMINIMUMMAG_NUMTEST_H
#define LLVM_LIBC_TEST_SRC_MATH_SMOKE_FMINIMUMMAG_NUMTEST_H
+#include "src/__support/CPP/algorithm.h"
#include "src/__support/FPUtil/BasicOperations.h"
#include "src/__support/FPUtil/FPBits.h"
#include "test/UnitTest/FEnvSafeTest.h"
@@ -68,10 +69,11 @@ public:
}
void testRange(FMinimumMagNumFunc func) {
- constexpr StorageType COUNT = 100'001;
- constexpr StorageType STEP = STORAGE_MAX / COUNT;
- for (StorageType i = 0, v = 0, w = STORAGE_MAX; i <= COUNT;
- ++i, v += STEP, w -= STEP) {
+ constexpr int COUNT = 100'001;
+ constexpr StorageType STEP = LIBC_NAMESPACE::cpp::max(
+ static_cast<StorageType>(STORAGE_MAX / COUNT), StorageType(1));
+ StorageType v = 0, w = STORAGE_MAX;
+ for (int i = 0; i <= COUNT; ++i, v += STEP, w -= STEP) {
FPBits xbits(v), ybits(w);
if (xbits.is_inf_or_nan())
continue;
@@ -82,11 +84,10 @@ public:
if ((x == 0) && (y == 0))
continue;
- if (LIBC_NAMESPACE::fputil::abs(x) > LIBC_NAMESPACE::fputil::abs(y)) {
+ if (LIBC_NAMESPACE::fputil::abs(x) > LIBC_NAMESPACE::fputil::abs(y))
EXPECT_FP_EQ(y, func(x, y));
- } else {
+ else
EXPECT_FP_EQ(x, func(x, y));
- }
}
}
};
diff --git a/libc/test/src/math/smoke/FMinimumMagTest.h b/libc/test/src/math/smoke/FMinimumMagTest.h
index 3e16622..9c49446 100644
--- a/libc/test/src/math/smoke/FMinimumMagTest.h
+++ b/libc/test/src/math/smoke/FMinimumMagTest.h
@@ -9,6 +9,7 @@
#ifndef LLVM_LIBC_TEST_SRC_MATH_SMOKE_FMINIMUM_MAGTEST_H
#define LLVM_LIBC_TEST_SRC_MATH_SMOKE_FMINIMUM_MAGTEST_H
+#include "src/__support/CPP/algorithm.h"
#include "src/__support/FPUtil/BasicOperations.h"
#include "test/UnitTest/FEnvSafeTest.h"
#include "test/UnitTest/FPMatcher.h"
@@ -56,10 +57,11 @@ public:
}
void testRange(FMinimumMagFunc func) {
- constexpr StorageType COUNT = 100'001;
- constexpr StorageType STEP = STORAGE_MAX / COUNT;
- for (StorageType i = 0, v = 0, w = STORAGE_MAX; i <= COUNT;
- ++i, v += STEP, w -= STEP) {
+ constexpr int COUNT = 100'001;
+ constexpr StorageType STEP = LIBC_NAMESPACE::cpp::max(
+ static_cast<StorageType>(STORAGE_MAX / COUNT), StorageType(1));
+ StorageType v = 0, w = STORAGE_MAX;
+ for (int i = 0; i <= COUNT; ++i, v += STEP, w -= STEP) {
FPBits xbits(v), ybits(w);
if (xbits.is_inf_or_nan())
continue;
@@ -70,11 +72,10 @@ public:
if ((x == 0) && (y == 0))
continue;
- if (LIBC_NAMESPACE::fputil::abs(x) < LIBC_NAMESPACE::fputil::abs(y)) {
+ if (LIBC_NAMESPACE::fputil::abs(x) < LIBC_NAMESPACE::fputil::abs(y))
EXPECT_FP_EQ(x, func(x, y));
- } else {
+ else
EXPECT_FP_EQ(y, func(x, y));
- }
}
}
};
diff --git a/libc/test/src/math/smoke/FMinimumNumTest.h b/libc/test/src/math/smoke/FMinimumNumTest.h
index 6186ea0..8004ee9 100644
--- a/libc/test/src/math/smoke/FMinimumNumTest.h
+++ b/libc/test/src/math/smoke/FMinimumNumTest.h
@@ -9,6 +9,7 @@
#ifndef LLVM_LIBC_TEST_SRC_MATH_SMOKE_FMINIMUMNUMTEST_H
#define LLVM_LIBC_TEST_SRC_MATH_SMOKE_FMINIMUMNUMTEST_H
+#include "src/__support/CPP/algorithm.h"
#include "src/__support/FPUtil/FPBits.h"
#include "test/UnitTest/FEnvSafeTest.h"
#include "test/UnitTest/FPMatcher.h"
@@ -67,10 +68,11 @@ public:
}
void testRange(FMinimumNumFunc func) {
- constexpr StorageType COUNT = 100'001;
- constexpr StorageType STEP = STORAGE_MAX / COUNT;
- for (StorageType i = 0, v = 0, w = STORAGE_MAX; i <= COUNT;
- ++i, v += STEP, w -= STEP) {
+ constexpr int COUNT = 100'001;
+ constexpr StorageType STEP = LIBC_NAMESPACE::cpp::max(
+ static_cast<StorageType>(STORAGE_MAX / COUNT), StorageType(1));
+ StorageType v = 0, w = STORAGE_MAX;
+ for (int i = 0; i <= COUNT; ++i, v += STEP, w -= STEP) {
FPBits xbits(v), ybits(w);
if (xbits.is_inf_or_nan())
continue;
@@ -81,11 +83,10 @@ public:
if ((x == 0) && (y == 0))
continue;
- if (x > y) {
+ if (x > y)
EXPECT_FP_EQ(y, func(x, y));
- } else {
+ else
EXPECT_FP_EQ(x, func(x, y));
- }
}
}
};
diff --git a/libc/test/src/math/smoke/FMinimumTest.h b/libc/test/src/math/smoke/FMinimumTest.h
index a267f6c..242c857 100644
--- a/libc/test/src/math/smoke/FMinimumTest.h
+++ b/libc/test/src/math/smoke/FMinimumTest.h
@@ -9,6 +9,7 @@
#ifndef LLVM_LIBC_TEST_SRC_MATH_SMOKE_FMINIMUMTEST_H
#define LLVM_LIBC_TEST_SRC_MATH_SMOKE_FMINIMUMTEST_H
+#include "src/__support/CPP/algorithm.h"
#include "test/UnitTest/FEnvSafeTest.h"
#include "test/UnitTest/FPMatcher.h"
#include "test/UnitTest/Test.h"
@@ -55,10 +56,11 @@ public:
}
void testRange(FMinimumFunc func) {
- constexpr StorageType COUNT = 100'001;
- constexpr StorageType STEP = STORAGE_MAX / COUNT;
- for (StorageType i = 0, v = 0, w = STORAGE_MAX; i <= COUNT;
- ++i, v += STEP, w -= STEP) {
+ constexpr int COUNT = 100'001;
+ constexpr StorageType STEP = LIBC_NAMESPACE::cpp::max(
+ static_cast<StorageType>(STORAGE_MAX / COUNT), StorageType(1));
+ StorageType v = 0, w = STORAGE_MAX;
+ for (int i = 0; i <= COUNT; ++i, v += STEP, w -= STEP) {
FPBits xbits(v), ybits(w);
if (xbits.is_inf_or_nan())
continue;
@@ -69,11 +71,10 @@ public:
if ((x == 0) && (y == 0))
continue;
- if (x > y) {
+ if (x > y)
EXPECT_FP_EQ(y, func(x, y));
- } else {
+ else
EXPECT_FP_EQ(x, func(x, y));
- }
}
}
};
diff --git a/libc/test/src/math/smoke/NextAfterTest.h b/libc/test/src/math/smoke/NextAfterTest.h
index d65ccdf..6278f89 100644
--- a/libc/test/src/math/smoke/NextAfterTest.h
+++ b/libc/test/src/math/smoke/NextAfterTest.h
@@ -9,15 +9,15 @@
#ifndef LLVM_LIBC_TEST_SRC_MATH_NEXTAFTERTEST_H
#define LLVM_LIBC_TEST_SRC_MATH_NEXTAFTERTEST_H
-#include "hdr/math_macros.h"
#include "src/__support/CPP/bit.h"
-#include "src/__support/CPP/type_traits.h"
-#include "src/__support/FPUtil/BasicOperations.h"
+#include "src/__support/FPUtil/FEnvImpl.h"
#include "src/__support/FPUtil/FPBits.h"
#include "test/UnitTest/FEnvSafeTest.h"
#include "test/UnitTest/FPMatcher.h"
#include "test/UnitTest/Test.h"
+#include "hdr/fenv_macros.h"
+
// TODO: Strengthen errno,exception checks and remove these assert macros
// after new matchers/test fixtures are added
#define ASSERT_FP_EQ_WITH_EXCEPTION(result, expected, expected_exception) \
@@ -181,7 +181,7 @@ public:
result_bits = FPBits(result);
ASSERT_EQ(result_bits.get_biased_exponent(), x_bits.get_biased_exponent());
ASSERT_EQ(result_bits.get_mantissa(),
- x_bits.get_mantissa() + StorageType(1));
+ static_cast<StorageType>(x_bits.get_mantissa() + StorageType(1)));
x = -x;
@@ -195,7 +195,7 @@ public:
result_bits = FPBits(result);
ASSERT_EQ(result_bits.get_biased_exponent(), x_bits.get_biased_exponent());
ASSERT_EQ(result_bits.get_mantissa(),
- x_bits.get_mantissa() + StorageType(1));
+ static_cast<StorageType>(x_bits.get_mantissa() + StorageType(1)));
}
};
diff --git a/libc/test/src/math/smoke/NextTowardTest.h b/libc/test/src/math/smoke/NextTowardTest.h
index a24ec9f..5992273 100644
--- a/libc/test/src/math/smoke/NextTowardTest.h
+++ b/libc/test/src/math/smoke/NextTowardTest.h
@@ -9,16 +9,15 @@
#ifndef LLVM_LIBC_TEST_SRC_MATH_NEXTTOWARDTEST_H
#define LLVM_LIBC_TEST_SRC_MATH_NEXTTOWARDTEST_H
-#include "hdr/fenv_macros.h"
-#include "hdr/math_macros.h"
#include "src/__support/CPP/bit.h"
-#include "src/__support/CPP/type_traits.h"
-#include "src/__support/FPUtil/BasicOperations.h"
+#include "src/__support/FPUtil/FEnvImpl.h"
#include "src/__support/FPUtil/FPBits.h"
#include "test/UnitTest/FEnvSafeTest.h"
#include "test/UnitTest/FPMatcher.h"
#include "test/UnitTest/Test.h"
+#include "hdr/fenv_macros.h"
+
// TODO: Strengthen errno,exception checks and remove these assert macros
// after new matchers/test fixtures are added
#define ASSERT_FP_EQ_WITH_EXCEPTION(result, expected, expected_exception) \
@@ -194,7 +193,7 @@ public:
result_bits = FPBits(result);
ASSERT_EQ(result_bits.get_biased_exponent(), x_bits.get_biased_exponent());
ASSERT_EQ(result_bits.get_mantissa(),
- x_bits.get_mantissa() + StorageType(1));
+ static_cast<StorageType>(x_bits.get_mantissa() + StorageType(1)));
x = -x;
@@ -208,7 +207,7 @@ public:
result_bits = FPBits(result);
ASSERT_EQ(result_bits.get_biased_exponent(), x_bits.get_biased_exponent());
ASSERT_EQ(result_bits.get_mantissa(),
- x_bits.get_mantissa() + StorageType(1));
+ static_cast<StorageType>(x_bits.get_mantissa() + StorageType(1)));
}
};
diff --git a/libc/test/src/math/smoke/fmaxf16_test.cpp b/libc/test/src/math/smoke/fmaxf16_test.cpp
new file mode 100644
index 0000000..79c03b7
--- /dev/null
+++ b/libc/test/src/math/smoke/fmaxf16_test.cpp
@@ -0,0 +1,13 @@
+//===-- Unittests for fmaxf16 --------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "FMaxTest.h"
+
+#include "src/math/fmaxf16.h"
+
+LIST_FMAX_TESTS(float16, LIBC_NAMESPACE::fmaxf16)
diff --git a/libc/test/src/math/smoke/fmaximum_mag_numf16_test.cpp b/libc/test/src/math/smoke/fmaximum_mag_numf16_test.cpp
new file mode 100644
index 0000000..b11653e
--- /dev/null
+++ b/libc/test/src/math/smoke/fmaximum_mag_numf16_test.cpp
@@ -0,0 +1,13 @@
+//===-- Unittests for fmaximum_mag_numf16 ---------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "FMaximumMagNumTest.h"
+
+#include "src/math/fmaximum_mag_numf16.h"
+
+LIST_FMAXIMUM_MAG_NUM_TESTS(float16, LIBC_NAMESPACE::fmaximum_mag_numf16)
diff --git a/libc/test/src/math/smoke/fmaximum_magf16_test.cpp b/libc/test/src/math/smoke/fmaximum_magf16_test.cpp
new file mode 100644
index 0000000..6df1e4a
--- /dev/null
+++ b/libc/test/src/math/smoke/fmaximum_magf16_test.cpp
@@ -0,0 +1,13 @@
+//===-- Unittests for fmaximum_magf16 -------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "FMaximumMagTest.h"
+
+#include "src/math/fmaximum_magf16.h"
+
+LIST_FMAXIMUM_MAG_TESTS(float16, LIBC_NAMESPACE::fmaximum_magf16)
diff --git a/libc/test/src/math/smoke/fmaximum_numf16_test.cpp b/libc/test/src/math/smoke/fmaximum_numf16_test.cpp
new file mode 100644
index 0000000..7cb9cb0
--- /dev/null
+++ b/libc/test/src/math/smoke/fmaximum_numf16_test.cpp
@@ -0,0 +1,13 @@
+//===-- Unittests for fmaximum_numf16 -------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "FMaximumNumTest.h"
+
+#include "src/math/fmaximum_numf16.h"
+
+LIST_FMAXIMUM_NUM_TESTS(float16, LIBC_NAMESPACE::fmaximum_numf16)
diff --git a/libc/test/src/math/smoke/fmaximumf16_test.cpp b/libc/test/src/math/smoke/fmaximumf16_test.cpp
new file mode 100644
index 0000000..4cbf846
--- /dev/null
+++ b/libc/test/src/math/smoke/fmaximumf16_test.cpp
@@ -0,0 +1,13 @@
+//===-- Unittests for fmaximumf16 -----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "FMaximumTest.h"
+
+#include "src/math/fmaximumf16.h"
+
+LIST_FMAXIMUM_TESTS(float16, LIBC_NAMESPACE::fmaximumf16)
diff --git a/libc/test/src/math/smoke/fminf16_test.cpp b/libc/test/src/math/smoke/fminf16_test.cpp
new file mode 100644
index 0000000..4379a6e
--- /dev/null
+++ b/libc/test/src/math/smoke/fminf16_test.cpp
@@ -0,0 +1,13 @@
+//===-- Unittests for fminf16 ---------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "FMinTest.h"
+
+#include "src/math/fminf16.h"
+
+LIST_FMIN_TESTS(float16, LIBC_NAMESPACE::fminf16)
diff --git a/libc/test/src/math/smoke/fminimum_mag_numf16_test.cpp b/libc/test/src/math/smoke/fminimum_mag_numf16_test.cpp
new file mode 100644
index 0000000..2c6aede
--- /dev/null
+++ b/libc/test/src/math/smoke/fminimum_mag_numf16_test.cpp
@@ -0,0 +1,13 @@
+//===-- Unittests for fminimum_mag_numf16 ---------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "FMinimumMagNumTest.h"
+
+#include "src/math/fminimum_mag_numf16.h"
+
+LIST_FMINIMUM_MAG_NUM_TESTS(float16, LIBC_NAMESPACE::fminimum_mag_numf16)
diff --git a/libc/test/src/math/smoke/fminimum_magf16_test.cpp b/libc/test/src/math/smoke/fminimum_magf16_test.cpp
new file mode 100644
index 0000000..3687aec
--- /dev/null
+++ b/libc/test/src/math/smoke/fminimum_magf16_test.cpp
@@ -0,0 +1,13 @@
+//===-- Unittests for fminimum_magf16 -------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "FMinimumMagTest.h"
+
+#include "src/math/fminimum_magf16.h"
+
+LIST_FMINIMUM_MAG_TESTS(float16, LIBC_NAMESPACE::fminimum_magf16)
diff --git a/libc/test/src/math/smoke/fminimum_numf16_test.cpp b/libc/test/src/math/smoke/fminimum_numf16_test.cpp
new file mode 100644
index 0000000..6775081
--- /dev/null
+++ b/libc/test/src/math/smoke/fminimum_numf16_test.cpp
@@ -0,0 +1,13 @@
+//===-- Unittests for fminimum_numf16 -------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "FMinimumNumTest.h"
+
+#include "src/math/fminimum_numf16.h"
+
+LIST_FMINIMUM_NUM_TESTS(float16, LIBC_NAMESPACE::fminimum_numf16)
diff --git a/libc/test/src/math/smoke/fminimumf16_test.cpp b/libc/test/src/math/smoke/fminimumf16_test.cpp
new file mode 100644
index 0000000..f8b0577
--- /dev/null
+++ b/libc/test/src/math/smoke/fminimumf16_test.cpp
@@ -0,0 +1,13 @@
+//===-- Unittests for fminimumf16 -----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "FMinimumTest.h"
+
+#include "src/math/fminimumf16.h"
+
+LIST_FMINIMUM_TESTS(float16, LIBC_NAMESPACE::fminimumf16)
diff --git a/libc/test/src/math/smoke/nextafterf16_test.cpp b/libc/test/src/math/smoke/nextafterf16_test.cpp
new file mode 100644
index 0000000..860a0c7
--- /dev/null
+++ b/libc/test/src/math/smoke/nextafterf16_test.cpp
@@ -0,0 +1,13 @@
+//===-- Unittests for nextafterf16 ----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "NextAfterTest.h"
+
+#include "src/math/nextafterf16.h"
+
+LIST_NEXTAFTER_TESTS(float16, LIBC_NAMESPACE::nextafterf16)
diff --git a/libc/test/src/math/smoke/nextdownf16_test.cpp b/libc/test/src/math/smoke/nextdownf16_test.cpp
new file mode 100644
index 0000000..353f085
--- /dev/null
+++ b/libc/test/src/math/smoke/nextdownf16_test.cpp
@@ -0,0 +1,13 @@
+//===-- Unittests for nextdownf16 -----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "NextDownTest.h"
+
+#include "src/math/nextdownf16.h"
+
+LIST_NEXTDOWN_TESTS(float16, LIBC_NAMESPACE::nextdownf16)
diff --git a/libc/test/src/math/smoke/nexttowardf16_test.cpp b/libc/test/src/math/smoke/nexttowardf16_test.cpp
new file mode 100644
index 0000000..8490e8d
--- /dev/null
+++ b/libc/test/src/math/smoke/nexttowardf16_test.cpp
@@ -0,0 +1,13 @@
+//===-- Unittests for nexttowardf16 ---------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "NextTowardTest.h"
+
+#include "src/math/nexttowardf16.h"
+
+LIST_NEXTTOWARD_TESTS(float16, LIBC_NAMESPACE::nexttowardf16)
diff --git a/libc/test/src/math/smoke/nextupf16_test.cpp b/libc/test/src/math/smoke/nextupf16_test.cpp
new file mode 100644
index 0000000..a146d27
--- /dev/null
+++ b/libc/test/src/math/smoke/nextupf16_test.cpp
@@ -0,0 +1,13 @@
+//===-- Unittests for nextupf16 -------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "NextUpTest.h"
+
+#include "src/math/nextupf16.h"
+
+LIST_NEXTUP_TESTS(float16, LIBC_NAMESPACE::nextupf16)
diff --git a/libc/test/src/stdlib/CMakeLists.txt b/libc/test/src/stdlib/CMakeLists.txt
index 6a7faed..3848877 100644
--- a/libc/test/src/stdlib/CMakeLists.txt
+++ b/libc/test/src/stdlib/CMakeLists.txt
@@ -354,7 +354,20 @@ if(LLVM_LIBC_FULL_BUILD)
libc.src.stdlib.exit
libc.src.stdlib.atexit
libc.src.__support.CPP.array
- libc.src.__support.CPP.utility
+ )
+
+ add_libc_test(
+ at_quick_exit_test
+ # The EXPECT_EXITS test is only availible for unit tests.
+ UNIT_TEST_ONLY
+ SUITE
+ libc-stdlib-tests
+ SRCS
+ at_quick_exit_test.cpp
+ DEPENDS
+ libc.src.stdlib.quick_exit
+ libc.src.stdlib.at_quick_exit
+ libc.src.__support.CPP.array
)
add_libc_test(
diff --git a/libc/test/src/stdlib/at_quick_exit_test.cpp b/libc/test/src/stdlib/at_quick_exit_test.cpp
new file mode 100644
index 0000000..e0a258d
--- /dev/null
+++ b/libc/test/src/stdlib/at_quick_exit_test.cpp
@@ -0,0 +1,90 @@
+//===-- Unittests for at_quick_exit ---------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/__support/CPP/array.h"
+#include "src/__support/CPP/utility.h"
+#include "src/stdlib/at_quick_exit.h"
+#include "src/stdlib/quick_exit.h"
+#include "test/UnitTest/Test.h"
+
+static int a;
+TEST(LlvmLibcAtQuickExit, Basic) {
+ // In case tests ever run multiple times.
+ a = 0;
+
+ auto test = [] {
+ int status = LIBC_NAMESPACE::at_quick_exit(+[] {
+ if (a != 1)
+ __builtin_trap();
+ });
+ status |= LIBC_NAMESPACE::at_quick_exit(+[] { a++; });
+ if (status)
+ __builtin_trap();
+
+ LIBC_NAMESPACE::quick_exit(0);
+ };
+ EXPECT_EXITS(test, 0);
+}
+
+TEST(LlvmLibcAtQuickExit, AtQuickExitCallsSysExit) {
+ auto test = [] {
+ LIBC_NAMESPACE::at_quick_exit(+[] { _Exit(1); });
+ LIBC_NAMESPACE::quick_exit(0);
+ };
+ EXPECT_EXITS(test, 1);
+}
+
+static int size;
+static LIBC_NAMESPACE::cpp::array<int, 256> arr;
+
+template <int... Ts>
+void register_at_quick_exit_handlers(
+ LIBC_NAMESPACE::cpp::integer_sequence<int, Ts...>) {
+ (LIBC_NAMESPACE::at_quick_exit(+[] { arr[size++] = Ts; }), ...);
+}
+
+template <int count> constexpr auto get_test() {
+ return [] {
+ LIBC_NAMESPACE::at_quick_exit(+[] {
+ if (size != count)
+ __builtin_trap();
+ for (int i = 0; i < count; i++)
+ if (arr[i] != count - 1 - i)
+ __builtin_trap();
+ });
+ register_at_quick_exit_handlers(
+ LIBC_NAMESPACE::cpp::make_integer_sequence<int, count>{});
+ LIBC_NAMESPACE::quick_exit(0);
+ };
+}
+
+TEST(LlvmLibcAtQuickExit, ReverseOrder) {
+ // In case tests ever run multiple times.
+ size = 0;
+
+ auto test = get_test<32>();
+ EXPECT_EXITS(test, 0);
+}
+
+TEST(LlvmLibcAtQuickExit, Many) {
+ // In case tests ever run multiple times.
+ size = 0;
+
+ auto test = get_test<256>();
+ EXPECT_EXITS(test, 0);
+}
+
+TEST(LlvmLibcAtQuickExit, HandlerCallsAtQuickExit) {
+ auto test = [] {
+ LIBC_NAMESPACE::at_quick_exit(+[] {
+ LIBC_NAMESPACE::at_quick_exit(+[] { LIBC_NAMESPACE::quick_exit(1); });
+ });
+ LIBC_NAMESPACE::quick_exit(0);
+ };
+ EXPECT_EXITS(test, 1);
+}
diff --git a/libclc/CMakeLists.txt b/libclc/CMakeLists.txt
index 5ce1795..9858ae9 100644
--- a/libclc/CMakeLists.txt
+++ b/libclc/CMakeLists.txt
@@ -198,7 +198,7 @@ set( tahiti_aliases pitcairn verde oland hainan bonaire kabini kaveri hawaii
gfx1010 gfx1011 gfx1012 gfx1013
gfx1030 gfx1031 gfx1032 gfx1033 gfx1034 gfx1035 gfx1036
gfx1100 gfx1101 gfx1102 gfx1103
- gfx1150 gfx1151
+ gfx1150 gfx1151 gfx1152
gfx1200 gfx1201
)
diff --git a/libcxx/include/__type_traits/datasizeof.h b/libcxx/include/__type_traits/datasizeof.h
index 54fde24..35c1292 100644
--- a/libcxx/include/__type_traits/datasizeof.h
+++ b/libcxx/include/__type_traits/datasizeof.h
@@ -26,7 +26,7 @@
_LIBCPP_BEGIN_NAMESPACE_STD
-#if __has_extension(datasizeof)
+#if __has_keyword(__datasizeof) || __has_extension(datasizeof)
template <class _Tp>
inline const size_t __datasizeof_v = __datasizeof(_Tp);
#else
diff --git a/libcxx/test/std/experimental/simd/simd.class/simd_copy.pass.cpp b/libcxx/test/std/experimental/simd/simd.class/simd_copy.pass.cpp
index 8fcc811..7d91ca0 100644
--- a/libcxx/test/std/experimental/simd/simd.class/simd_copy.pass.cpp
+++ b/libcxx/test/std/experimental/simd/simd.class/simd_copy.pass.cpp
@@ -8,9 +8,9 @@
// UNSUPPORTED: c++03, c++11, c++14
-// FIXME: Fatal error with following targets (remove XFAIL when fixed):
+// Older versions of clang may encounter a backend error (see 0295c2ad):
// Pass-by-value arguments with alignment greater than register width are not supported.
-// XFAIL: target=powerpc{{.*}}-ibm-aix7.2.5.7
+// XFAIL: target=powerpc{{.*}}-ibm-{{.*}} && (clang-17 || clang-18)
// <experimental/simd>
//
diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp
index ff6d9be..544db20 100644
--- a/lld/ELF/Writer.cpp
+++ b/lld/ELF/Writer.cpp
@@ -1320,8 +1320,8 @@ template <class ELFT> void Writer<ELFT>::sortOrphanSections() {
i = firstSectionOrDotAssignment;
while (nonScriptI != e) {
- OutputSection *orphan = &cast<OutputDesc>(*nonScriptI)->osec;
auto pos = findOrphanPos(i, nonScriptI);
+ OutputSection *orphan = &cast<OutputDesc>(*nonScriptI)->osec;
// As an optimization, find all sections with the same sort rank
// and insert them with one rotate.
diff --git a/lld/test/ELF/linkerscript/sections-nonalloc.s b/lld/test/ELF/linkerscript/sections-nonalloc.s
index b4fab8c..d66e524 100644
--- a/lld/test/ELF/linkerscript/sections-nonalloc.s
+++ b/lld/test/ELF/linkerscript/sections-nonalloc.s
@@ -34,6 +34,11 @@
# RUN: ld.lld -T %t/b.lds %t.o -o %tb
# RUN: llvm-readelf -S -l %tb | FileCheck %s --check-prefix=CHECK1
+## --section-start causes the orphan other3 to be considered before .data3.
+## The non-alloc other3 does not disable the placement of .data3.
+# RUN: ld.lld -T %t/b.lds %t.o -o %tb --section-start=other3=0
+# RUN: llvm-readelf -S -l %tb | FileCheck %s --check-prefix=CHECK1
+
# CHECK1: [Nr] Name Type Address Off Size ES Flg Lk
# CHECK1-NEXT: [ 0] NULL 0000000000000000 000000 000000 00 0
# CHECK1-NEXT: [ 1] .text PROGBITS 00000000000000b0 0000b0 000001 00 AX 0
diff --git a/lldb/examples/python/crashlog.py b/lldb/examples/python/crashlog.py
index 641b2e6..1c0d717 100755
--- a/lldb/examples/python/crashlog.py
+++ b/lldb/examples/python/crashlog.py
@@ -284,7 +284,9 @@ class CrashLog(symbolication.Symbolicator):
"""Class that represents a binary images in a darwin crash log"""
dsymForUUIDBinary = "/usr/local/bin/dsymForUUID"
- if not os.path.exists(dsymForUUIDBinary):
+ if "LLDB_APPLE_DSYMFORUUID_EXECUTABLE" in os.environ:
+ dsymForUUIDBinary = os.environ["LLDB_APPLE_DSYMFORUUID_EXECUTABLE"]
+ elif not os.path.exists(dsymForUUIDBinary):
try:
dsymForUUIDBinary = (
subprocess.check_output("which dsymForUUID", shell=True)
@@ -545,9 +547,9 @@ class CrashLog(symbolication.Symbolicator):
for image in self.images:
image.resolve = True
elif options.crashed_only:
+ images_to_load = []
for thread in self.threads:
- if thread.did_crash():
- images_to_load = []
+ if thread.did_crash() or thread.app_specific_backtrace:
for ident in thread.idents:
for image in self.find_images_with_identifier(ident):
image.resolve = True
@@ -555,11 +557,15 @@ class CrashLog(symbolication.Symbolicator):
futures = []
with tempfile.TemporaryDirectory() as obj_dir:
- with concurrent.futures.ThreadPoolExecutor() as executor:
- def add_module(image, target, obj_dir):
- return image, image.add_module(target, obj_dir)
+ def add_module(image, target, obj_dir):
+ return image, image.add_module(target, obj_dir)
+ max_worker = None
+ if options.no_parallel_image_loading:
+ max_worker = 1
+
+ with concurrent.futures.ThreadPoolExecutor(max_worker) as executor:
for image in images_to_load:
if image not in loaded_images:
if image.uuid == uuid.UUID(int=0):
@@ -858,7 +864,7 @@ class JSONCrashLogParser(CrashLogParser):
thread = self.crashlog.Thread(
len(self.crashlog.threads), True, self.crashlog.process_arch
)
- thread.queue = "Application Specific Backtrace"
+ thread.name = "Application Specific Backtrace"
if self.parse_asi_backtrace(thread, json_app_specific_bts[0]):
self.crashlog.threads.append(thread)
else:
@@ -868,7 +874,7 @@ class JSONCrashLogParser(CrashLogParser):
thread = self.crashlog.Thread(
len(self.crashlog.threads), True, self.crashlog.process_arch
)
- thread.queue = "Last Exception Backtrace"
+ thread.name = "Last Exception Backtrace"
self.parse_frames(thread, json_last_exc_bts)
self.crashlog.threads.append(thread)
@@ -1168,11 +1174,13 @@ class TextCrashLogParser(CrashLogParser):
self.thread = self.crashlog.Thread(
idx, True, self.crashlog.process_arch
)
+ self.thread.name = "Application Specific Backtrace"
elif line.startswith("Last Exception Backtrace:"): # iOS
self.parse_mode = self.CrashLogParseMode.THREAD
self.app_specific_backtrace = True
idx = 1
self.thread = self.crashlog.Thread(idx, True, self.crashlog.process_arch)
+ self.thread.name = "Last Exception Backtrace"
self.crashlog.info_lines.append(line.strip())
def parse_thread(self, line):
@@ -1528,6 +1536,7 @@ def load_crashlog_in_scripted_process(debugger, crashlog_path, options, result):
"file_path": crashlog_path,
"load_all_images": options.load_all_images,
"crashed_only": options.crashed_only,
+ "no_parallel_image_loading": options.no_parallel_image_loading,
}
)
)
@@ -1720,6 +1729,13 @@ def CreateSymbolicateCrashLogOptions(
help="show source for all threads, not just the crashed thread",
default=False,
)
+ arg_parser.add_argument(
+ "--no-parallel-image-loading",
+ dest="no_parallel_image_loading",
+ action="store_true",
+ help=argparse.SUPPRESS,
+ default=False,
+ )
if add_interactive_options:
arg_parser.add_argument(
"-i",
@@ -1798,6 +1814,9 @@ def SymbolicateCrashLogs(debugger, command_args, result, is_command):
)
)
+ if "NO_PARALLEL_IMG_LOADING" in os.environ:
+ options.no_parallel_image_loading = True
+
if options.version:
print(debugger.GetVersionString())
return
diff --git a/lldb/examples/python/crashlog_scripted_process.py b/lldb/examples/python/crashlog_scripted_process.py
index 26c5c37..be0ed49 100644
--- a/lldb/examples/python/crashlog_scripted_process.py
+++ b/lldb/examples/python/crashlog_scripted_process.py
@@ -53,6 +53,7 @@ class CrashLogScriptedProcess(ScriptedProcess):
class CrashLogOptions:
load_all_images = False
crashed_only = True
+ no_parallel_image_loading = False
def __init__(self, exe_ctx: lldb.SBExecutionContext, args: lldb.SBStructuredData):
super().__init__(exe_ctx, args)
@@ -84,6 +85,13 @@ class CrashLogScriptedProcess(ScriptedProcess):
if crashed_only.GetType() == lldb.eStructuredDataTypeBoolean:
self.options.crashed_only = crashed_only.GetBooleanValue()
+ no_parallel_image_loading = args.GetValueForKey("no_parallel_image_loading")
+ if no_parallel_image_loading and no_parallel_image_loading.IsValid():
+ if no_parallel_image_loading.GetType() == lldb.eStructuredDataTypeBoolean:
+ self.options.no_parallel_image_loading = (
+ no_parallel_image_loading.GetBooleanValue()
+ )
+
self.pid = super().get_process_id()
self.crashed_thread_idx = 0
self.exception = None
@@ -165,10 +173,7 @@ class CrashLogScriptedThread(ScriptedThread):
self.backing_thread = crashlog_thread
self.idx = self.backing_thread.index
self.tid = self.backing_thread.id
- if self.backing_thread.app_specific_backtrace:
- self.name = "Application Specific Backtrace"
- else:
- self.name = self.backing_thread.name
+ self.name = self.backing_thread.name
self.queue = self.backing_thread.queue
self.has_crashed = self.originating_process.crashed_thread_idx == self.idx
self.create_stackframes()
diff --git a/lldb/include/lldb/Expression/DWARFExpression.h b/lldb/include/lldb/Expression/DWARFExpression.h
index 1d85308..e85ba46 100644
--- a/lldb/include/lldb/Expression/DWARFExpression.h
+++ b/lldb/include/lldb/Expression/DWARFExpression.h
@@ -132,13 +132,12 @@ public:
/// \return
/// True on success; false otherwise. If error_ptr is non-NULL,
/// details of the failure are provided through it.
- static bool Evaluate(ExecutionContext *exe_ctx, RegisterContext *reg_ctx,
- lldb::ModuleSP module_sp, const DataExtractor &opcodes,
- const plugin::dwarf::DWARFUnit *dwarf_cu,
- const lldb::RegisterKind reg_set,
- const Value *initial_value_ptr,
- const Value *object_address_ptr, Value &result,
- Status *error_ptr);
+ static llvm::Expected<Value>
+ Evaluate(ExecutionContext *exe_ctx, RegisterContext *reg_ctx,
+ lldb::ModuleSP module_sp, const DataExtractor &opcodes,
+ const plugin::dwarf::DWARFUnit *dwarf_cu,
+ const lldb::RegisterKind reg_set, const Value *initial_value_ptr,
+ const Value *object_address_ptr);
static bool ParseDWARFLocationList(const plugin::dwarf::DWARFUnit *dwarf_cu,
const DataExtractor &data,
diff --git a/lldb/include/lldb/Expression/DWARFExpressionList.h b/lldb/include/lldb/Expression/DWARFExpressionList.h
index c2218ad..f711a1c 100644
--- a/lldb/include/lldb/Expression/DWARFExpressionList.h
+++ b/lldb/include/lldb/Expression/DWARFExpressionList.h
@@ -9,6 +9,7 @@
#ifndef LLDB_EXPRESSION_DWARFEXPRESSIONLIST_H
#define LLDB_EXPRESSION_DWARFEXPRESSIONLIST_H
+#include "lldb/Core/Value.h"
#include "lldb/Expression/DWARFExpression.h"
#include "lldb/Utility/RangeMap.h"
#include "lldb/lldb-private.h"
@@ -113,10 +114,11 @@ public:
void SetModule(const lldb::ModuleSP &module) { m_module_wp = module; }
- bool Evaluate(ExecutionContext *exe_ctx, RegisterContext *reg_ctx,
- lldb::addr_t func_load_addr, const Value *initial_value_ptr,
- const Value *object_address_ptr, Value &result,
- Status *error_ptr) const;
+ llvm::Expected<Value> Evaluate(ExecutionContext *exe_ctx,
+ RegisterContext *reg_ctx,
+ lldb::addr_t func_load_addr,
+ const Value *initial_value_ptr,
+ const Value *object_address_ptr) const;
private:
// RangeDataVector requires a comparator for DWARFExpression, but it doesn't
diff --git a/lldb/source/Core/ValueObject.cpp b/lldb/source/Core/ValueObject.cpp
index 1443d9d..c5c434a 100644
--- a/lldb/source/Core/ValueObject.cpp
+++ b/lldb/source/Core/ValueObject.cpp
@@ -216,7 +216,7 @@ bool ValueObject::UpdateFormatsIfNeeded() {
m_last_format_mgr_revision = DataVisualization::GetCurrentRevision();
any_change = true;
- SetValueFormat(DataVisualization::GetFormat(*this, eNoDynamicValues));
+ SetValueFormat(DataVisualization::GetFormat(*this, GetDynamicValueType()));
SetSummaryFormat(
DataVisualization::GetSummaryFormat(*this, GetDynamicValueType()));
SetSyntheticChildren(
diff --git a/lldb/source/Core/ValueObjectVariable.cpp b/lldb/source/Core/ValueObjectVariable.cpp
index 67d71c9..51eb11d 100644
--- a/lldb/source/Core/ValueObjectVariable.cpp
+++ b/lldb/source/Core/ValueObjectVariable.cpp
@@ -164,8 +164,11 @@ bool ValueObjectVariable::UpdateValue() {
target);
}
Value old_value(m_value);
- if (expr_list.Evaluate(&exe_ctx, nullptr, loclist_base_load_addr, nullptr,
- nullptr, m_value, &m_error)) {
+ llvm::Expected<Value> maybe_value = expr_list.Evaluate(
+ &exe_ctx, nullptr, loclist_base_load_addr, nullptr, nullptr);
+
+ if (maybe_value) {
+ m_value = *maybe_value;
m_resolved_value = m_value;
m_value.SetContext(Value::ContextType::Variable, variable);
@@ -246,6 +249,7 @@ bool ValueObjectVariable::UpdateValue() {
SetValueIsValid(m_error.Success());
} else {
+ m_error = maybe_value.takeError();
// could not find location, won't allow editing
m_resolved_value.SetContext(Value::ContextType::Invalid, nullptr);
}
diff --git a/lldb/source/Expression/DWARFExpression.cpp b/lldb/source/Expression/DWARFExpression.cpp
index 7473bb8..05767a8 100644
--- a/lldb/source/Expression/DWARFExpression.cpp
+++ b/lldb/source/Expression/DWARFExpression.cpp
@@ -94,51 +94,38 @@ void DWARFExpression::SetRegisterKind(RegisterKind reg_kind) {
m_reg_kind = reg_kind;
}
-
-static bool ReadRegisterValueAsScalar(RegisterContext *reg_ctx,
- lldb::RegisterKind reg_kind,
- uint32_t reg_num, Status *error_ptr,
- Value &value) {
- if (reg_ctx == nullptr) {
- if (error_ptr)
- error_ptr->SetErrorString("No register context in frame.\n");
- } else {
- uint32_t native_reg =
- reg_ctx->ConvertRegisterKindToRegisterNumber(reg_kind, reg_num);
- if (native_reg == LLDB_INVALID_REGNUM) {
- if (error_ptr)
- error_ptr->SetErrorStringWithFormat("Unable to convert register "
- "kind=%u reg_num=%u to a native "
- "register number.\n",
- reg_kind, reg_num);
- } else {
- const RegisterInfo *reg_info =
- reg_ctx->GetRegisterInfoAtIndex(native_reg);
- RegisterValue reg_value;
- if (reg_ctx->ReadRegister(reg_info, reg_value)) {
- if (reg_value.GetScalarValue(value.GetScalar())) {
- value.SetValueType(Value::ValueType::Scalar);
- value.SetContext(Value::ContextType::RegisterInfo,
- const_cast<RegisterInfo *>(reg_info));
- if (error_ptr)
- error_ptr->Clear();
- return true;
- } else {
- // If we get this error, then we need to implement a value buffer in
- // the dwarf expression evaluation function...
- if (error_ptr)
- error_ptr->SetErrorStringWithFormat(
- "register %s can't be converted to a scalar value",
- reg_info->name);
- }
- } else {
- if (error_ptr)
- error_ptr->SetErrorStringWithFormat("register %s is not available",
- reg_info->name);
- }
+static llvm::Error ReadRegisterValueAsScalar(RegisterContext *reg_ctx,
+ lldb::RegisterKind reg_kind,
+ uint32_t reg_num, Value &value) {
+ if (reg_ctx == nullptr)
+ return llvm::createStringError("no register context in frame");
+
+ const uint32_t native_reg =
+ reg_ctx->ConvertRegisterKindToRegisterNumber(reg_kind, reg_num);
+ if (native_reg == LLDB_INVALID_REGNUM)
+ return llvm::createStringError(
+ "unable to convert register kind=%u reg_num=%u to a native "
+ "register number",
+ reg_kind, reg_num);
+
+ const RegisterInfo *reg_info = reg_ctx->GetRegisterInfoAtIndex(native_reg);
+ RegisterValue reg_value;
+ if (reg_ctx->ReadRegister(reg_info, reg_value)) {
+ if (reg_value.GetScalarValue(value.GetScalar())) {
+ value.SetValueType(Value::ValueType::Scalar);
+ value.SetContext(Value::ContextType::RegisterInfo,
+ const_cast<RegisterInfo *>(reg_info));
+ return llvm::Error::success();
}
+
+ // If we get this error, then we need to implement a value buffer in
+ // the dwarf expression evaluation function...
+ return llvm::createStringError(
+ "register %s can't be converted to a scalar value", reg_info->name);
}
- return false;
+
+ return llvm::createStringError("register %s is not available",
+ reg_info->name);
}
/// Return the length in bytes of the set of operands for \p op. No guarantees
@@ -541,12 +528,12 @@ bool DWARFExpression::LinkThreadLocalStorage(
return true;
}
-static bool Evaluate_DW_OP_entry_value(std::vector<Value> &stack,
- ExecutionContext *exe_ctx,
- RegisterContext *reg_ctx,
- const DataExtractor &opcodes,
- lldb::offset_t &opcode_offset,
- Status *error_ptr, Log *log) {
+static llvm::Error Evaluate_DW_OP_entry_value(std::vector<Value> &stack,
+ ExecutionContext *exe_ctx,
+ RegisterContext *reg_ctx,
+ const DataExtractor &opcodes,
+ lldb::offset_t &opcode_offset,
+ Log *log) {
// DW_OP_entry_value(sub-expr) describes the location a variable had upon
// function entry: this variable location is presumed to be optimized out at
// the current PC value. The caller of the function may have call site
@@ -593,16 +580,13 @@ static bool Evaluate_DW_OP_entry_value(std::vector<Value> &stack,
// 1. Find the function which pushed the current frame onto the stack.
if ((!exe_ctx || !exe_ctx->HasTargetScope()) || !reg_ctx) {
- LLDB_LOG(log, "Evaluate_DW_OP_entry_value: no exe/reg context");
- return false;
+ return llvm::createStringError("no exe/reg context");
}
StackFrame *current_frame = exe_ctx->GetFramePtr();
Thread *thread = exe_ctx->GetThreadPtr();
- if (!current_frame || !thread) {
- LLDB_LOG(log, "Evaluate_DW_OP_entry_value: no current frame/thread");
- return false;
- }
+ if (!current_frame || !thread)
+ return llvm::createStringError("no current frame/thread");
Target &target = exe_ctx->GetTargetRef();
StackFrameSP parent_frame = nullptr;
@@ -620,9 +604,7 @@ static bool Evaluate_DW_OP_entry_value(std::vector<Value> &stack,
// parent frame.
if (return_pc == LLDB_INVALID_ADDRESS) {
return_pc = parent_frame->GetFrameCodeAddress().GetLoadAddress(&target);
- LLDB_LOG(log,
- "Evaluate_DW_OP_entry_value: immediate ancestor with pc = {0:x}",
- return_pc);
+ LLDB_LOG(log, "immediate ancestor with pc = {0:x}", return_pc);
}
// If we've found an inlined frame, skip it (these have no call site
@@ -634,25 +616,20 @@ static bool Evaluate_DW_OP_entry_value(std::vector<Value> &stack,
break;
}
if (!parent_frame || !parent_frame->GetRegisterContext()) {
- LLDB_LOG(log, "Evaluate_DW_OP_entry_value: no parent frame with reg ctx");
- return false;
+ return llvm::createStringError("no parent frame with reg ctx");
}
Function *parent_func =
parent_frame->GetSymbolContext(eSymbolContextFunction).function;
- if (!parent_func) {
- LLDB_LOG(log, "Evaluate_DW_OP_entry_value: no parent function");
- return false;
- }
+ if (!parent_func)
+ return llvm::createStringError("no parent function");
// 2. Find the call edge in the parent function responsible for creating the
// current activation.
Function *current_func =
current_frame->GetSymbolContext(eSymbolContextFunction).function;
- if (!current_func) {
- LLDB_LOG(log, "Evaluate_DW_OP_entry_value: no current function");
- return false;
- }
+ if (!current_func)
+ return llvm::createStringError("no current function");
CallEdge *call_edge = nullptr;
ModuleList &modlist = target.GetImages();
@@ -663,17 +640,14 @@ static bool Evaluate_DW_OP_entry_value(std::vector<Value> &stack,
// produced by an ambiguous tail call. In this case, refuse to proceed.
call_edge = parent_func->GetCallEdgeForReturnAddress(return_pc, target);
if (!call_edge) {
- LLDB_LOG(log,
- "Evaluate_DW_OP_entry_value: no call edge for retn-pc = {0:x} "
- "in parent frame {1}",
- return_pc, parent_func->GetName());
- return false;
+ return llvm::createStringError(
+ llvm::formatv("no call edge for retn-pc = {0:x} in parent frame {1}",
+ return_pc, parent_func->GetName()));
}
Function *callee_func = call_edge->GetCallee(modlist, parent_exe_ctx);
if (callee_func != current_func) {
- LLDB_LOG(log, "Evaluate_DW_OP_entry_value: ambiguous call sequence, "
- "can't find real parent frame");
- return false;
+ return llvm::createStringError(
+ "ambiguous call sequence, can't find real parent frame");
}
} else {
// The StackFrameList solver machinery has deduced that an unambiguous tail
@@ -686,21 +660,17 @@ static bool Evaluate_DW_OP_entry_value(std::vector<Value> &stack,
}
}
}
- if (!call_edge) {
- LLDB_LOG(log, "Evaluate_DW_OP_entry_value: no unambiguous edge from parent "
- "to current function");
- return false;
- }
+ if (!call_edge)
+ return llvm::createStringError("no unambiguous edge from parent "
+ "to current function");
// 3. Attempt to locate the DW_OP_entry_value expression in the set of
// available call site parameters. If found, evaluate the corresponding
// parameter in the context of the parent frame.
const uint32_t subexpr_len = opcodes.GetULEB128(&opcode_offset);
const void *subexpr_data = opcodes.GetData(&opcode_offset, subexpr_len);
- if (!subexpr_data) {
- LLDB_LOG(log, "Evaluate_DW_OP_entry_value: subexpr could not be read");
- return false;
- }
+ if (!subexpr_data)
+ return llvm::createStringError("subexpr could not be read");
const CallSiteParameter *matched_param = nullptr;
for (const CallSiteParameter &param : call_edge->GetCallSiteParameters()) {
@@ -726,28 +696,26 @@ static bool Evaluate_DW_OP_entry_value(std::vector<Value> &stack,
break;
}
}
- if (!matched_param) {
- LLDB_LOG(log,
- "Evaluate_DW_OP_entry_value: no matching call site param found");
- return false;
- }
+ if (!matched_param)
+ return llvm::createStringError("no matching call site param found");
// TODO: Add support for DW_OP_push_object_address within a DW_OP_entry_value
// subexpresion whenever llvm does.
- Value result;
const DWARFExpressionList &param_expr = matched_param->LocationInCaller;
- if (!param_expr.Evaluate(&parent_exe_ctx,
- parent_frame->GetRegisterContext().get(),
- LLDB_INVALID_ADDRESS,
- /*initial_value_ptr=*/nullptr,
- /*object_address_ptr=*/nullptr, result, error_ptr)) {
+
+ llvm::Expected<Value> maybe_result = param_expr.Evaluate(
+ &parent_exe_ctx, parent_frame->GetRegisterContext().get(),
+ LLDB_INVALID_ADDRESS,
+ /*initial_value_ptr=*/nullptr,
+ /*object_address_ptr=*/nullptr);
+ if (!maybe_result) {
LLDB_LOG(log,
"Evaluate_DW_OP_entry_value: call site param evaluation failed");
- return false;
+ return maybe_result.takeError();
}
- stack.push_back(result);
- return true;
+ stack.push_back(*maybe_result);
+ return llvm::Error::success();
}
namespace {
@@ -801,7 +769,6 @@ void UpdateValueTypeFromLocationDescription(Log *log, const DWARFUnit *dwarf_cu,
///
/// \param exe_ctx Pointer to the execution context
/// \param module_sp shared_ptr contains the module if we have one
-/// \param error_ptr pointer to Status object if we have one
/// \param dw_op_type C-style string used to vary the error output
/// \param file_addr the file address we are trying to resolve and turn into a
/// load address
@@ -812,32 +779,22 @@ void UpdateValueTypeFromLocationDescription(Log *log, const DWARFUnit *dwarf_cu,
/// the load address succeed or an empty Optinal otherwise. If
/// check_sectionoffset is true we consider LLDB_INVALID_ADDRESS a
/// success if so_addr.IsSectionOffset() is true.
-static std::optional<lldb::addr_t>
+static llvm::Expected<lldb::addr_t>
ResolveLoadAddress(ExecutionContext *exe_ctx, lldb::ModuleSP &module_sp,
- Status *error_ptr, const char *dw_op_type,
- lldb::addr_t file_addr, Address &so_addr,
- bool check_sectionoffset = false) {
- if (!module_sp) {
- if (error_ptr)
- error_ptr->SetErrorStringWithFormat(
- "need module to resolve file address for %s", dw_op_type);
- return {};
- }
+ const char *dw_op_type, lldb::addr_t file_addr,
+ Address &so_addr, bool check_sectionoffset = false) {
+ if (!module_sp)
+ return llvm::createStringError("need module to resolve file address for %s",
+ dw_op_type);
- if (!module_sp->ResolveFileAddress(file_addr, so_addr)) {
- if (error_ptr)
- error_ptr->SetErrorString("failed to resolve file address in module");
- return {};
- }
+ if (!module_sp->ResolveFileAddress(file_addr, so_addr))
+ return llvm::createStringError("failed to resolve file address in module");
- addr_t load_addr = so_addr.GetLoadAddress(exe_ctx->GetTargetPtr());
+ const addr_t load_addr = so_addr.GetLoadAddress(exe_ctx->GetTargetPtr());
if (load_addr == LLDB_INVALID_ADDRESS &&
- (check_sectionoffset && !so_addr.IsSectionOffset())) {
- if (error_ptr)
- error_ptr->SetErrorString("failed to resolve load address");
- return {};
- }
+ (check_sectionoffset && !so_addr.IsSectionOffset()))
+ return llvm::createStringError("failed to resolve load address");
return load_addr;
}
@@ -862,19 +819,15 @@ static Scalar DerefSizeExtractDataHelper(uint8_t *addr_bytes,
return addr_data.GetAddress(&addr_data_offset);
}
-bool DWARFExpression::Evaluate(
+llvm::Expected<Value> DWARFExpression::Evaluate(
ExecutionContext *exe_ctx, RegisterContext *reg_ctx,
lldb::ModuleSP module_sp, const DataExtractor &opcodes,
const DWARFUnit *dwarf_cu, const lldb::RegisterKind reg_kind,
- const Value *initial_value_ptr, const Value *object_address_ptr,
- Value &result, Status *error_ptr) {
+ const Value *initial_value_ptr, const Value *object_address_ptr) {
- if (opcodes.GetByteSize() == 0) {
- if (error_ptr)
- error_ptr->SetErrorString(
- "no location, value may have been optimized out");
- return false;
- }
+ if (opcodes.GetByteSize() == 0)
+ return llvm::createStringError(
+ "no location, value may have been optimized out");
std::vector<Value> stack;
Process *process = nullptr;
@@ -994,11 +947,9 @@ bool DWARFExpression::Evaluate(
// retrieved from the dereferenced address is the size of an address on the
// target machine.
case DW_OP_deref: {
- if (stack.empty()) {
- if (error_ptr)
- error_ptr->SetErrorString("Expression stack empty for DW_OP_deref.");
- return false;
- }
+ if (stack.empty())
+ return llvm::createStringError(
+ "expression stack empty for DW_OP_deref");
Value::ValueType value_type = stack.back().GetValueType();
switch (value_type) {
case Value::ValueType::HostAddress: {
@@ -1014,10 +965,10 @@ bool DWARFExpression::Evaluate(
Address so_addr;
auto maybe_load_addr = ResolveLoadAddress(
- exe_ctx, module_sp, error_ptr, "DW_OP_deref", file_addr, so_addr);
+ exe_ctx, module_sp, "DW_OP_deref", file_addr, so_addr);
if (!maybe_load_addr)
- return false;
+ return maybe_load_addr.takeError();
stack.back().GetScalar() = *maybe_load_addr;
// Fall through to load address promotion code below.
@@ -1041,30 +992,22 @@ bool DWARFExpression::Evaluate(
stack.back().GetScalar() = pointer_value;
stack.back().ClearContext();
} else {
- if (error_ptr)
- error_ptr->SetErrorStringWithFormat(
- "Failed to dereference pointer from 0x%" PRIx64
- " for DW_OP_deref: %s\n",
- pointer_addr, error.AsCString());
- return false;
+ return llvm::createStringError(
+ "Failed to dereference pointer from 0x%" PRIx64
+ " for DW_OP_deref: %s\n",
+ pointer_addr, error.AsCString());
}
} else {
- if (error_ptr)
- error_ptr->SetErrorString("NULL process for DW_OP_deref.\n");
- return false;
+ return llvm::createStringError("NULL process for DW_OP_deref");
}
} else {
- if (error_ptr)
- error_ptr->SetErrorString(
- "NULL execution context for DW_OP_deref.\n");
- return false;
+ return llvm::createStringError(
+ "NULL execution context for DW_OP_deref");
}
break;
case Value::ValueType::Invalid:
- if (error_ptr)
- error_ptr->SetErrorString("Invalid value type for DW_OP_deref.\n");
- return false;
+ return llvm::createStringError("invalid value type for DW_OP_deref");
}
} break;
@@ -1083,18 +1026,13 @@ bool DWARFExpression::Evaluate(
// expression stack.
case DW_OP_deref_size: {
if (stack.empty()) {
- if (error_ptr)
- error_ptr->SetErrorString(
- "Expression stack empty for DW_OP_deref_size.");
- return false;
+ return llvm::createStringError(
+ "expression stack empty for DW_OP_deref_size");
}
uint8_t size = opcodes.GetU8(&offset);
if (size > 8) {
- if (error_ptr)
- error_ptr->SetErrorStringWithFormat(
- "Invalid address size for DW_OP_deref_size: %d\n",
- size);
- return false;
+ return llvm::createStringError(
+ "Invalid address size for DW_OP_deref_size: %d\n", size);
}
Value::ValueType value_type = stack.back().GetValueType();
switch (value_type) {
@@ -1142,13 +1080,12 @@ bool DWARFExpression::Evaluate(
auto file_addr =
stack.back().GetScalar().ULongLong(LLDB_INVALID_ADDRESS);
Address so_addr;
- auto maybe_load_addr =
- ResolveLoadAddress(exe_ctx, module_sp, error_ptr,
- "DW_OP_deref_size", file_addr, so_addr,
- /*check_sectionoffset=*/true);
+ auto maybe_load_addr = ResolveLoadAddress(
+ exe_ctx, module_sp, "DW_OP_deref_size", file_addr, so_addr,
+ /*check_sectionoffset=*/true);
if (!maybe_load_addr)
- return false;
+ return maybe_load_addr.takeError();
addr_t load_addr = *maybe_load_addr;
@@ -1166,12 +1103,10 @@ bool DWARFExpression::Evaluate(
stack.back().ClearContext();
break;
} else {
- if (error_ptr)
- error_ptr->SetErrorStringWithFormat(
- "Failed to dereference pointer for DW_OP_deref_size: "
- "%s\n",
- error.AsCString());
- return false;
+ return llvm::createStringError(
+ "Failed to dereference pointer for DW_OP_deref_size: "
+ "%s\n",
+ error.AsCString());
}
}
stack.back().GetScalar() = load_addr;
@@ -1195,30 +1130,24 @@ bool DWARFExpression::Evaluate(
process->GetByteOrder(), size);
stack.back().ClearContext();
} else {
- if (error_ptr)
- error_ptr->SetErrorStringWithFormat(
- "Failed to dereference pointer from 0x%" PRIx64
- " for DW_OP_deref: %s\n",
- pointer_addr, error.AsCString());
- return false;
+ return llvm::createStringError(
+ "Failed to dereference pointer from 0x%" PRIx64
+ " for DW_OP_deref: %s\n",
+ pointer_addr, error.AsCString());
}
} else {
- if (error_ptr)
- error_ptr->SetErrorString("NULL process for DW_OP_deref_size.\n");
- return false;
+
+ return llvm::createStringError("NULL process for DW_OP_deref_size");
}
} else {
- if (error_ptr)
- error_ptr->SetErrorString(
- "NULL execution context for DW_OP_deref_size.\n");
- return false;
+ return llvm::createStringError(
+ "NULL execution context for DW_OP_deref_size");
}
break;
case Value::ValueType::Invalid:
- if (error_ptr)
- error_ptr->SetErrorString("Invalid value for DW_OP_deref_size.\n");
- return false;
+
+ return llvm::createStringError("invalid value for DW_OP_deref_size");
}
} break;
@@ -1239,9 +1168,7 @@ bool DWARFExpression::Evaluate(
// extended to the size of an address on the target machine before being
// pushed on the expression stack.
case DW_OP_xderef_size:
- if (error_ptr)
- error_ptr->SetErrorString("Unimplemented opcode: DW_OP_xderef_size.");
- return false;
+ return llvm::createStringError("unimplemented opcode: DW_OP_xderef_size");
// OPCODE: DW_OP_xderef
// OPERANDS: none
// DESCRIPTION: Provides an extended dereference mechanism. The entry at
@@ -1253,9 +1180,7 @@ bool DWARFExpression::Evaluate(
// retrieved from the dereferenced address is the size of an address on the
// target machine.
case DW_OP_xderef:
- if (error_ptr)
- error_ptr->SetErrorString("Unimplemented opcode: DW_OP_xderef.");
- return false;
+ return llvm::createStringError("unimplemented opcode: DW_OP_xderef");
// All DW_OP_constXXX opcodes have a single operand as noted below:
//
@@ -1308,9 +1233,7 @@ bool DWARFExpression::Evaluate(
// DESCRIPTION: duplicates the value at the top of the stack
case DW_OP_dup:
if (stack.empty()) {
- if (error_ptr)
- error_ptr->SetErrorString("Expression stack empty for DW_OP_dup.");
- return false;
+ return llvm::createStringError("expression stack empty for DW_OP_dup");
} else
stack.push_back(stack.back());
break;
@@ -1320,9 +1243,7 @@ bool DWARFExpression::Evaluate(
// DESCRIPTION: pops the value at the top of the stack
case DW_OP_drop:
if (stack.empty()) {
- if (error_ptr)
- error_ptr->SetErrorString("Expression stack empty for DW_OP_drop.");
- return false;
+ return llvm::createStringError("expression stack empty for DW_OP_drop");
} else
stack.pop_back();
break;
@@ -1333,10 +1254,8 @@ bool DWARFExpression::Evaluate(
// the top of the stack.
case DW_OP_over:
if (stack.size() < 2) {
- if (error_ptr)
- error_ptr->SetErrorString(
- "Expression stack needs at least 2 items for DW_OP_over.");
- return false;
+ return llvm::createStringError(
+ "expression stack needs at least 2 items for DW_OP_over");
} else
stack.push_back(stack[stack.size() - 2]);
break;
@@ -1350,10 +1269,8 @@ bool DWARFExpression::Evaluate(
if (pick_idx < stack.size())
stack.push_back(stack[stack.size() - 1 - pick_idx]);
else {
- if (error_ptr)
- error_ptr->SetErrorStringWithFormat(
- "Index %u out of range for DW_OP_pick.\n", pick_idx);
- return false;
+ return llvm::createStringError(
+ "Index %u out of range for DW_OP_pick.\n", pick_idx);
}
} break;
@@ -1364,10 +1281,8 @@ bool DWARFExpression::Evaluate(
// becomes the top of the stack
case DW_OP_swap:
if (stack.size() < 2) {
- if (error_ptr)
- error_ptr->SetErrorString(
- "Expression stack needs at least 2 items for DW_OP_swap.");
- return false;
+ return llvm::createStringError(
+ "expression stack needs at least 2 items for DW_OP_swap");
} else {
tmp = stack.back();
stack.back() = stack[stack.size() - 2];
@@ -1383,10 +1298,8 @@ bool DWARFExpression::Evaluate(
// entry.
case DW_OP_rot:
if (stack.size() < 3) {
- if (error_ptr)
- error_ptr->SetErrorString(
- "Expression stack needs at least 3 items for DW_OP_rot.");
- return false;
+ return llvm::createStringError(
+ "expression stack needs at least 3 items for DW_OP_rot");
} else {
size_t last_idx = stack.size() - 1;
Value old_top = stack[last_idx];
@@ -1403,15 +1316,11 @@ bool DWARFExpression::Evaluate(
// represented, the result is undefined.
case DW_OP_abs:
if (stack.empty()) {
- if (error_ptr)
- error_ptr->SetErrorString(
- "Expression stack needs at least 1 item for DW_OP_abs.");
- return false;
+ return llvm::createStringError(
+ "expression stack needs at least 1 item for DW_OP_abs");
} else if (!stack.back().ResolveValue(exe_ctx).AbsoluteValue()) {
- if (error_ptr)
- error_ptr->SetErrorString(
- "Failed to take the absolute value of the first stack item.");
- return false;
+ return llvm::createStringError(
+ "failed to take the absolute value of the first stack item");
}
break;
@@ -1421,10 +1330,8 @@ bool DWARFExpression::Evaluate(
// operation on the two, and pushes the result.
case DW_OP_and:
if (stack.size() < 2) {
- if (error_ptr)
- error_ptr->SetErrorString(
- "Expression stack needs at least 2 items for DW_OP_and.");
- return false;
+ return llvm::createStringError(
+ "expression stack needs at least 2 items for DW_OP_and");
} else {
tmp = stack.back();
stack.pop_back();
@@ -1440,30 +1347,23 @@ bool DWARFExpression::Evaluate(
// the result.
case DW_OP_div:
if (stack.size() < 2) {
- if (error_ptr)
- error_ptr->SetErrorString(
- "Expression stack needs at least 2 items for DW_OP_div.");
- return false;
+ return llvm::createStringError(
+ "expression stack needs at least 2 items for DW_OP_div");
} else {
tmp = stack.back();
- if (tmp.ResolveValue(exe_ctx).IsZero()) {
- if (error_ptr)
- error_ptr->SetErrorString("Divide by zero.");
- return false;
- } else {
- stack.pop_back();
- Scalar divisor, dividend;
- divisor = tmp.ResolveValue(exe_ctx);
- dividend = stack.back().ResolveValue(exe_ctx);
- divisor.MakeSigned();
- dividend.MakeSigned();
- stack.back() = dividend / divisor;
- if (!stack.back().ResolveValue(exe_ctx).IsValid()) {
- if (error_ptr)
- error_ptr->SetErrorString("Divide failed.");
- return false;
- }
- }
+ if (tmp.ResolveValue(exe_ctx).IsZero())
+ return llvm::createStringError("divide by zero");
+
+ stack.pop_back();
+ Scalar divisor, dividend;
+ divisor = tmp.ResolveValue(exe_ctx);
+ dividend = stack.back().ResolveValue(exe_ctx);
+ divisor.MakeSigned();
+ dividend.MakeSigned();
+ stack.back() = dividend / divisor;
+
+ if (!stack.back().ResolveValue(exe_ctx).IsValid())
+ return llvm::createStringError("divide failed");
}
break;
@@ -1473,10 +1373,8 @@ bool DWARFExpression::Evaluate(
// of the stack from the former second entry, and pushes the result.
case DW_OP_minus:
if (stack.size() < 2) {
- if (error_ptr)
- error_ptr->SetErrorString(
- "Expression stack needs at least 2 items for DW_OP_minus.");
- return false;
+ return llvm::createStringError(
+ "expression stack needs at least 2 items for DW_OP_minus");
} else {
tmp = stack.back();
stack.pop_back();
@@ -1492,10 +1390,8 @@ bool DWARFExpression::Evaluate(
// stack.
case DW_OP_mod:
if (stack.size() < 2) {
- if (error_ptr)
- error_ptr->SetErrorString(
- "Expression stack needs at least 2 items for DW_OP_mod.");
- return false;
+ return llvm::createStringError(
+ "expression stack needs at least 2 items for DW_OP_mod");
} else {
tmp = stack.back();
stack.pop_back();
@@ -1510,10 +1406,8 @@ bool DWARFExpression::Evaluate(
// together, and pushes the result.
case DW_OP_mul:
if (stack.size() < 2) {
- if (error_ptr)
- error_ptr->SetErrorString(
- "Expression stack needs at least 2 items for DW_OP_mul.");
- return false;
+ return llvm::createStringError(
+ "expression stack needs at least 2 items for DW_OP_mul");
} else {
tmp = stack.back();
stack.pop_back();
@@ -1527,16 +1421,11 @@ bool DWARFExpression::Evaluate(
// DESCRIPTION: pops the top stack entry, and pushes its negation.
case DW_OP_neg:
if (stack.empty()) {
- if (error_ptr)
- error_ptr->SetErrorString(
- "Expression stack needs at least 1 item for DW_OP_neg.");
- return false;
+ return llvm::createStringError(
+ "expression stack needs at least 1 item for DW_OP_neg");
} else {
- if (!stack.back().ResolveValue(exe_ctx).UnaryNegate()) {
- if (error_ptr)
- error_ptr->SetErrorString("Unary negate failed.");
- return false;
- }
+ if (!stack.back().ResolveValue(exe_ctx).UnaryNegate())
+ return llvm::createStringError("unary negate failed");
}
break;
@@ -1546,15 +1435,11 @@ bool DWARFExpression::Evaluate(
// complement
case DW_OP_not:
if (stack.empty()) {
- if (error_ptr)
- error_ptr->SetErrorString(
- "Expression stack needs at least 1 item for DW_OP_not.");
- return false;
+ return llvm::createStringError(
+ "expression stack needs at least 1 item for DW_OP_not");
} else {
if (!stack.back().ResolveValue(exe_ctx).OnesComplement()) {
- if (error_ptr)
- error_ptr->SetErrorString("Logical NOT failed.");
- return false;
+ return llvm::createStringError("logical NOT failed");
}
}
break;
@@ -1565,10 +1450,8 @@ bool DWARFExpression::Evaluate(
// operation on the two, and pushes the result.
case DW_OP_or:
if (stack.size() < 2) {
- if (error_ptr)
- error_ptr->SetErrorString(
- "Expression stack needs at least 2 items for DW_OP_or.");
- return false;
+ return llvm::createStringError(
+ "expression stack needs at least 2 items for DW_OP_or");
} else {
tmp = stack.back();
stack.pop_back();
@@ -1583,10 +1466,8 @@ bool DWARFExpression::Evaluate(
// pushes the result.
case DW_OP_plus:
if (stack.size() < 2) {
- if (error_ptr)
- error_ptr->SetErrorString(
- "Expression stack needs at least 2 items for DW_OP_plus.");
- return false;
+ return llvm::createStringError(
+ "expression stack needs at least 2 items for DW_OP_plus");
} else {
tmp = stack.back();
stack.pop_back();
@@ -1600,19 +1481,14 @@ bool DWARFExpression::Evaluate(
// constant operand and pushes the result.
case DW_OP_plus_uconst:
if (stack.empty()) {
- if (error_ptr)
- error_ptr->SetErrorString(
- "Expression stack needs at least 1 item for DW_OP_plus_uconst.");
- return false;
+ return llvm::createStringError(
+ "expression stack needs at least 1 item for DW_OP_plus_uconst");
} else {
const uint64_t uconst_value = opcodes.GetULEB128(&offset);
// Implicit conversion from a UINT to a Scalar...
stack.back().GetScalar() += uconst_value;
- if (!stack.back().GetScalar().IsValid()) {
- if (error_ptr)
- error_ptr->SetErrorString("DW_OP_plus_uconst failed.");
- return false;
- }
+ if (!stack.back().GetScalar().IsValid())
+ return llvm::createStringError("DW_OP_plus_uconst failed");
}
break;
@@ -1623,10 +1499,8 @@ bool DWARFExpression::Evaluate(
// the stack, and pushes the result.
case DW_OP_shl:
if (stack.size() < 2) {
- if (error_ptr)
- error_ptr->SetErrorString(
- "Expression stack needs at least 2 items for DW_OP_shl.");
- return false;
+ return llvm::createStringError(
+ "expression stack needs at least 2 items for DW_OP_shl");
} else {
tmp = stack.back();
stack.pop_back();
@@ -1641,18 +1515,14 @@ bool DWARFExpression::Evaluate(
// specified by the former top of the stack, and pushes the result.
case DW_OP_shr:
if (stack.size() < 2) {
- if (error_ptr)
- error_ptr->SetErrorString(
- "Expression stack needs at least 2 items for DW_OP_shr.");
- return false;
+ return llvm::createStringError(
+ "expression stack needs at least 2 items for DW_OP_shr");
} else {
tmp = stack.back();
stack.pop_back();
if (!stack.back().ResolveValue(exe_ctx).ShiftRightLogical(
tmp.ResolveValue(exe_ctx))) {
- if (error_ptr)
- error_ptr->SetErrorString("DW_OP_shr failed.");
- return false;
+ return llvm::createStringError("DW_OP_shr failed");
}
}
break;
@@ -1665,10 +1535,8 @@ bool DWARFExpression::Evaluate(
// of the stack, and pushes the result.
case DW_OP_shra:
if (stack.size() < 2) {
- if (error_ptr)
- error_ptr->SetErrorString(
- "Expression stack needs at least 2 items for DW_OP_shra.");
- return false;
+ return llvm::createStringError(
+ "expression stack needs at least 2 items for DW_OP_shra");
} else {
tmp = stack.back();
stack.pop_back();
@@ -1682,10 +1550,8 @@ bool DWARFExpression::Evaluate(
// exclusive-or operation on the two, and pushes the result.
case DW_OP_xor:
if (stack.size() < 2) {
- if (error_ptr)
- error_ptr->SetErrorString(
- "Expression stack needs at least 2 items for DW_OP_xor.");
- return false;
+ return llvm::createStringError(
+ "expression stack needs at least 2 items for DW_OP_xor");
} else {
tmp = stack.back();
stack.pop_back();
@@ -1709,11 +1575,9 @@ bool DWARFExpression::Evaluate(
if (new_offset <= opcodes.GetByteSize())
offset = new_offset;
else {
- if (error_ptr)
- error_ptr->SetErrorStringWithFormatv(
- "Invalid opcode offset in DW_OP_skip: {0}+({1}) > {2}", offset,
- skip_offset, opcodes.GetByteSize());
- return false;
+ return llvm::createStringError(llvm::formatv(
+ "Invalid opcode offset in DW_OP_skip: {0}+({1}) > {2}", offset,
+ skip_offset, opcodes.GetByteSize()));
}
} break;
@@ -1726,10 +1590,8 @@ bool DWARFExpression::Evaluate(
// the current operation, beginning after the 2-byte constant.
case DW_OP_bra:
if (stack.empty()) {
- if (error_ptr)
- error_ptr->SetErrorString(
- "Expression stack needs at least 1 item for DW_OP_bra.");
- return false;
+ return llvm::createStringError(
+ "expression stack needs at least 1 item for DW_OP_bra");
} else {
tmp = stack.back();
stack.pop_back();
@@ -1743,11 +1605,9 @@ bool DWARFExpression::Evaluate(
if (new_offset <= opcodes.GetByteSize())
offset = new_offset;
else {
- if (error_ptr)
- error_ptr->SetErrorStringWithFormatv(
- "Invalid opcode offset in DW_OP_bra: {0}+({1}) > {2}", offset,
- bra_offset, opcodes.GetByteSize());
- return false;
+ return llvm::createStringError(llvm::formatv(
+ "Invalid opcode offset in DW_OP_bra: {0}+({1}) > {2}", offset,
+ bra_offset, opcodes.GetByteSize()));
}
}
}
@@ -1762,10 +1622,8 @@ bool DWARFExpression::Evaluate(
// operation is false.
case DW_OP_eq:
if (stack.size() < 2) {
- if (error_ptr)
- error_ptr->SetErrorString(
- "Expression stack needs at least 2 items for DW_OP_eq.");
- return false;
+ return llvm::createStringError(
+ "expression stack needs at least 2 items for DW_OP_eq");
} else {
tmp = stack.back();
stack.pop_back();
@@ -1783,10 +1641,8 @@ bool DWARFExpression::Evaluate(
// operation is false.
case DW_OP_ge:
if (stack.size() < 2) {
- if (error_ptr)
- error_ptr->SetErrorString(
- "Expression stack needs at least 2 items for DW_OP_ge.");
- return false;
+ return llvm::createStringError(
+ "expression stack needs at least 2 items for DW_OP_ge");
} else {
tmp = stack.back();
stack.pop_back();
@@ -1804,10 +1660,8 @@ bool DWARFExpression::Evaluate(
// operation is false.
case DW_OP_gt:
if (stack.size() < 2) {
- if (error_ptr)
- error_ptr->SetErrorString(
- "Expression stack needs at least 2 items for DW_OP_gt.");
- return false;
+ return llvm::createStringError(
+ "expression stack needs at least 2 items for DW_OP_gt");
} else {
tmp = stack.back();
stack.pop_back();
@@ -1825,10 +1679,8 @@ bool DWARFExpression::Evaluate(
// operation is false.
case DW_OP_le:
if (stack.size() < 2) {
- if (error_ptr)
- error_ptr->SetErrorString(
- "Expression stack needs at least 2 items for DW_OP_le.");
- return false;
+ return llvm::createStringError(
+ "expression stack needs at least 2 items for DW_OP_le");
} else {
tmp = stack.back();
stack.pop_back();
@@ -1846,10 +1698,8 @@ bool DWARFExpression::Evaluate(
// operation is false.
case DW_OP_lt:
if (stack.size() < 2) {
- if (error_ptr)
- error_ptr->SetErrorString(
- "Expression stack needs at least 2 items for DW_OP_lt.");
- return false;
+ return llvm::createStringError(
+ "expression stack needs at least 2 items for DW_OP_lt");
} else {
tmp = stack.back();
stack.pop_back();
@@ -1867,10 +1717,8 @@ bool DWARFExpression::Evaluate(
// operation is false.
case DW_OP_ne:
if (stack.size() < 2) {
- if (error_ptr)
- error_ptr->SetErrorString(
- "Expression stack needs at least 2 items for DW_OP_ne.");
- return false;
+ return llvm::createStringError(
+ "expression stack needs at least 2 items for DW_OP_ne");
} else {
tmp = stack.back();
stack.pop_back();
@@ -1957,10 +1805,10 @@ bool DWARFExpression::Evaluate(
dwarf4_location_description_kind = Register;
reg_num = op - DW_OP_reg0;
- if (ReadRegisterValueAsScalar(reg_ctx, reg_kind, reg_num, error_ptr, tmp))
- stack.push_back(tmp);
- else
- return false;
+ if (llvm::Error err =
+ ReadRegisterValueAsScalar(reg_ctx, reg_kind, reg_num, tmp))
+ return err;
+ stack.push_back(tmp);
} break;
// OPCODE: DW_OP_regx
// OPERANDS:
@@ -1969,10 +1817,11 @@ bool DWARFExpression::Evaluate(
case DW_OP_regx: {
dwarf4_location_description_kind = Register;
reg_num = opcodes.GetULEB128(&offset);
- if (ReadRegisterValueAsScalar(reg_ctx, reg_kind, reg_num, error_ptr, tmp))
- stack.push_back(tmp);
- else
- return false;
+ Status read_err;
+ if (llvm::Error err =
+ ReadRegisterValueAsScalar(reg_ctx, reg_kind, reg_num, tmp))
+ return err;
+ stack.push_back(tmp);
} break;
// OPCODE: DW_OP_bregN
@@ -2013,16 +1862,15 @@ bool DWARFExpression::Evaluate(
case DW_OP_breg30:
case DW_OP_breg31: {
reg_num = op - DW_OP_breg0;
-
- if (ReadRegisterValueAsScalar(reg_ctx, reg_kind, reg_num, error_ptr,
- tmp)) {
- int64_t breg_offset = opcodes.GetSLEB128(&offset);
- tmp.ResolveValue(exe_ctx) += (uint64_t)breg_offset;
- tmp.ClearContext();
- stack.push_back(tmp);
- stack.back().SetValueType(Value::ValueType::LoadAddress);
- } else
- return false;
+ if (llvm::Error err =
+ ReadRegisterValueAsScalar(reg_ctx, reg_kind, reg_num, tmp))
+ return err;
+
+ int64_t breg_offset = opcodes.GetSLEB128(&offset);
+ tmp.ResolveValue(exe_ctx) += (uint64_t)breg_offset;
+ tmp.ClearContext();
+ stack.push_back(tmp);
+ stack.back().SetValueType(Value::ValueType::LoadAddress);
} break;
// OPCODE: DW_OP_bregx
// OPERANDS: 2
@@ -2032,40 +1880,36 @@ bool DWARFExpression::Evaluate(
// N plus an offset.
case DW_OP_bregx: {
reg_num = opcodes.GetULEB128(&offset);
-
- if (ReadRegisterValueAsScalar(reg_ctx, reg_kind, reg_num, error_ptr,
- tmp)) {
- int64_t breg_offset = opcodes.GetSLEB128(&offset);
- tmp.ResolveValue(exe_ctx) += (uint64_t)breg_offset;
- tmp.ClearContext();
- stack.push_back(tmp);
- stack.back().SetValueType(Value::ValueType::LoadAddress);
- } else
- return false;
+ if (llvm::Error err =
+ ReadRegisterValueAsScalar(reg_ctx, reg_kind, reg_num, tmp))
+ return err;
+
+ int64_t breg_offset = opcodes.GetSLEB128(&offset);
+ tmp.ResolveValue(exe_ctx) += (uint64_t)breg_offset;
+ tmp.ClearContext();
+ stack.push_back(tmp);
+ stack.back().SetValueType(Value::ValueType::LoadAddress);
} break;
case DW_OP_fbreg:
if (exe_ctx) {
if (frame) {
Scalar value;
- if (frame->GetFrameBaseValue(value, error_ptr)) {
+ Status fb_err;
+ if (frame->GetFrameBaseValue(value, &fb_err)) {
int64_t fbreg_offset = opcodes.GetSLEB128(&offset);
value += fbreg_offset;
stack.push_back(value);
stack.back().SetValueType(Value::ValueType::LoadAddress);
} else
- return false;
+ return fb_err.ToError();
} else {
- if (error_ptr)
- error_ptr->SetErrorString(
- "Invalid stack frame in context for DW_OP_fbreg opcode.");
- return false;
+ return llvm::createStringError(
+ "invalid stack frame in context for DW_OP_fbreg opcode");
}
} else {
- if (error_ptr)
- error_ptr->SetErrorString(
- "NULL execution context for DW_OP_fbreg.\n");
- return false;
+ return llvm::createStringError(
+ "NULL execution context for DW_OP_fbreg");
}
break;
@@ -2127,7 +1971,7 @@ bool DWARFExpression::Evaluate(
const lldb::addr_t addr = scalar.ULongLong(LLDB_INVALID_ADDRESS);
switch (curr_piece_source_value_type) {
case Value::ValueType::Invalid:
- return false;
+ return llvm::createStringError("invalid value type");
case Value::ValueType::LoadAddress:
case Value::ValueType::FileAddress: {
if (target) {
@@ -2136,35 +1980,28 @@ bool DWARFExpression::Evaluate(
piece_byte_size, error,
/*force_live_memory=*/false) !=
piece_byte_size) {
- if (error_ptr) {
- const char *addr_type = (curr_piece_source_value_type ==
- Value::ValueType::LoadAddress)
- ? "load"
- : "file";
- error_ptr->SetErrorStringWithFormat(
- "failed to read memory DW_OP_piece(%" PRIu64
- ") from %s address 0x%" PRIx64,
- piece_byte_size, addr_type, addr);
- }
- return false;
+ const char *addr_type = (curr_piece_source_value_type ==
+ Value::ValueType::LoadAddress)
+ ? "load"
+ : "file";
+ return llvm::createStringError(
+ "failed to read memory DW_OP_piece(%" PRIu64
+ ") from %s address 0x%" PRIx64,
+ piece_byte_size, addr_type, addr);
}
} else {
- if (error_ptr)
- error_ptr->SetErrorStringWithFormat(
- "failed to resize the piece memory buffer for "
- "DW_OP_piece(%" PRIu64 ")",
- piece_byte_size);
- return false;
+ return llvm::createStringError(
+ "failed to resize the piece memory buffer for "
+ "DW_OP_piece(%" PRIu64 ")",
+ piece_byte_size);
}
}
} break;
case Value::ValueType::HostAddress: {
- if (error_ptr)
- error_ptr->SetErrorStringWithFormat(
- "failed to read memory DW_OP_piece(%" PRIu64
- ") from host address 0x%" PRIx64,
- piece_byte_size, addr);
- return false;
+ return llvm::createStringError(
+ "failed to read memory DW_OP_piece(%" PRIu64
+ ") from host address 0x%" PRIx64,
+ piece_byte_size, addr);
} break;
case Value::ValueType::Scalar: {
@@ -2172,14 +2009,11 @@ bool DWARFExpression::Evaluate(
uint32_t bit_offset = 0;
if (!scalar.ExtractBitfield(
bit_size, bit_offset)) {
- if (error_ptr)
- error_ptr->SetErrorStringWithFormat(
- "unable to extract %" PRIu64 " bytes from a %" PRIu64
- " byte scalar value.",
- piece_byte_size,
- (uint64_t)curr_piece_source_value.GetScalar()
- .GetByteSize());
- return false;
+ return llvm::createStringError(
+ "unable to extract %" PRIu64 " bytes from a %" PRIu64
+ " byte scalar value.",
+ piece_byte_size,
+ (uint64_t)curr_piece_source_value.GetScalar().GetByteSize());
}
// Create curr_piece with bit_size. By default Scalar
// grows to the nearest host integer type.
@@ -2198,27 +2032,20 @@ bool DWARFExpression::Evaluate(
// so subsequent pieces will be able to access this piece and add
// to it.
if (pieces.AppendDataToHostBuffer(curr_piece) == 0) {
- if (error_ptr)
- error_ptr->SetErrorString("failed to append piece data");
- return false;
+ return llvm::createStringError("failed to append piece data");
}
} else {
// If this is the second or later piece there should be a value on
// the stack.
if (pieces.GetBuffer().GetByteSize() != op_piece_offset) {
- if (error_ptr)
- error_ptr->SetErrorStringWithFormat(
- "DW_OP_piece for offset %" PRIu64
- " but top of stack is of size %" PRIu64,
- op_piece_offset, pieces.GetBuffer().GetByteSize());
- return false;
+ return llvm::createStringError(
+ "DW_OP_piece for offset %" PRIu64
+ " but top of stack is of size %" PRIu64,
+ op_piece_offset, pieces.GetBuffer().GetByteSize());
}
- if (pieces.AppendDataToHostBuffer(curr_piece) == 0) {
- if (error_ptr)
- error_ptr->SetErrorString("failed to append piece data");
- return false;
- }
+ if (pieces.AppendDataToHostBuffer(curr_piece) == 0)
+ return llvm::createStringError("failed to append piece data");
}
}
op_piece_offset += piece_byte_size;
@@ -2231,10 +2058,8 @@ bool DWARFExpression::Evaluate(
LocationDescriptionKind::Empty);
// Reset for the next piece.
dwarf4_location_description_kind = Memory;
- if (error_ptr)
- error_ptr->SetErrorString(
- "Expression stack needs at least 1 item for DW_OP_bit_piece.");
- return false;
+ return llvm::createStringError(
+ "expression stack needs at least 1 item for DW_OP_bit_piece");
} else {
UpdateValueTypeFromLocationDescription(
log, dwarf_cu, dwarf4_location_description_kind, &stack.back());
@@ -2244,30 +2069,26 @@ bool DWARFExpression::Evaluate(
const uint64_t piece_bit_offset = opcodes.GetULEB128(&offset);
switch (stack.back().GetValueType()) {
case Value::ValueType::Invalid:
- return false;
+ return llvm::createStringError(
+ "unable to extract bit value from invalid value");
case Value::ValueType::Scalar: {
if (!stack.back().GetScalar().ExtractBitfield(piece_bit_size,
piece_bit_offset)) {
- if (error_ptr)
- error_ptr->SetErrorStringWithFormat(
- "unable to extract %" PRIu64 " bit value with %" PRIu64
- " bit offset from a %" PRIu64 " bit scalar value.",
- piece_bit_size, piece_bit_offset,
- (uint64_t)(stack.back().GetScalar().GetByteSize() * 8));
- return false;
+ return llvm::createStringError(
+ "unable to extract %" PRIu64 " bit value with %" PRIu64
+ " bit offset from a %" PRIu64 " bit scalar value.",
+ piece_bit_size, piece_bit_offset,
+ (uint64_t)(stack.back().GetScalar().GetByteSize() * 8));
}
} break;
case Value::ValueType::FileAddress:
case Value::ValueType::LoadAddress:
case Value::ValueType::HostAddress:
- if (error_ptr) {
- error_ptr->SetErrorStringWithFormat(
- "unable to extract DW_OP_bit_piece(bit_size = %" PRIu64
- ", bit_offset = %" PRIu64 ") from an address value.",
- piece_bit_size, piece_bit_offset);
- }
- return false;
+ return llvm::createStringError(
+ "unable to extract DW_OP_bit_piece(bit_size = %" PRIu64
+ ", bit_offset = %" PRIu64 ") from an address value.",
+ piece_bit_size, piece_bit_offset);
}
}
break;
@@ -2287,9 +2108,8 @@ bool DWARFExpression::Evaluate(
if (!data) {
LLDB_LOG(log, "Evaluate_DW_OP_implicit_value: could not be read data");
- LLDB_ERRORF(error_ptr, "Could not evaluate %s.",
- DW_OP_value_to_name(op));
- return false;
+ return llvm::createStringError("could not evaluate %s",
+ DW_OP_value_to_name(op));
}
Value result(data, len);
@@ -2299,8 +2119,8 @@ bool DWARFExpression::Evaluate(
case DW_OP_implicit_pointer: {
dwarf4_location_description_kind = Implicit;
- LLDB_ERRORF(error_ptr, "Could not evaluate %s.", DW_OP_value_to_name(op));
- return false;
+ return llvm::createStringError("Could not evaluate %s.",
+ DW_OP_value_to_name(op));
}
// OPCODE: DW_OP_push_object_address
@@ -2315,10 +2135,8 @@ bool DWARFExpression::Evaluate(
if (object_address_ptr)
stack.push_back(*object_address_ptr);
else {
- if (error_ptr)
- error_ptr->SetErrorString("DW_OP_push_object_address used without "
- "specifying an object address");
- return false;
+ return llvm::createStringError("DW_OP_push_object_address used without "
+ "specifying an object address");
}
break;
@@ -2341,9 +2159,7 @@ bool DWARFExpression::Evaluate(
// the stack by the called expression may be used as return values by prior
// agreement between the calling and called expressions.
case DW_OP_call2:
- if (error_ptr)
- error_ptr->SetErrorString("Unimplemented opcode DW_OP_call2.");
- return false;
+ return llvm::createStringError("unimplemented opcode DW_OP_call2");
// OPCODE: DW_OP_call4
// OPERANDS: 1
// uint32_t compile unit relative offset of a DIE
@@ -2364,9 +2180,7 @@ bool DWARFExpression::Evaluate(
// the stack by the called expression may be used as return values by prior
// agreement between the calling and called expressions.
case DW_OP_call4:
- if (error_ptr)
- error_ptr->SetErrorString("Unimplemented opcode DW_OP_call4.");
- return false;
+ return llvm::createStringError("unimplemented opcode DW_OP_call4");
// OPCODE: DW_OP_stack_value
// OPERANDS: None
@@ -2375,12 +2189,9 @@ bool DWARFExpression::Evaluate(
// value to be used. This is the actual object value and not the location.
case DW_OP_stack_value:
dwarf4_location_description_kind = Implicit;
- if (stack.empty()) {
- if (error_ptr)
- error_ptr->SetErrorString(
- "Expression stack needs at least 1 item for DW_OP_stack_value.");
- return false;
- }
+ if (stack.empty())
+ return llvm::createStringError(
+ "expression stack needs at least 1 item for DW_OP_stack_value");
stack.back().SetValueType(Value::ValueType::Scalar);
break;
@@ -2393,10 +2204,8 @@ bool DWARFExpression::Evaluate(
// different type, and push the result.
case DW_OP_convert: {
if (stack.size() < 1) {
- if (error_ptr)
- error_ptr->SetErrorString(
- "Expression stack needs at least 1 item for DW_OP_convert.");
- return false;
+ return llvm::createStringError(
+ "expression stack needs at least 1 item for DW_OP_convert");
}
const uint64_t die_offset = opcodes.GetULEB128(&offset);
uint64_t bit_size;
@@ -2405,39 +2214,29 @@ bool DWARFExpression::Evaluate(
// The generic type has the size of an address on the target
// machine and an unspecified signedness. Scalar has no
// "unspecified signedness", so we use unsigned types.
- if (!module_sp) {
- if (error_ptr)
- error_ptr->SetErrorString("No module");
- return false;
- }
+ if (!module_sp)
+ return llvm::createStringError("no module");
sign = false;
bit_size = module_sp->GetArchitecture().GetAddressByteSize() * 8;
- if (!bit_size) {
- if (error_ptr)
- error_ptr->SetErrorString("unspecified architecture");
- return false;
- }
+ if (!bit_size)
+ return llvm::createStringError("unspecified architecture");
} else {
// Retrieve the type DIE that the value is being converted to. This
// offset is compile unit relative so we need to fix it up.
const uint64_t abs_die_offset = die_offset + dwarf_cu->GetOffset();
// FIXME: the constness has annoying ripple effects.
DWARFDIE die = const_cast<DWARFUnit *>(dwarf_cu)->GetDIE(abs_die_offset);
- if (!die) {
- if (error_ptr)
- error_ptr->SetErrorString("Cannot resolve DW_OP_convert type DIE");
- return false;
- }
+ if (!die)
+ return llvm::createStringError(
+ "cannot resolve DW_OP_convert type DIE");
uint64_t encoding =
die.GetAttributeValueAsUnsigned(DW_AT_encoding, DW_ATE_hi_user);
bit_size = die.GetAttributeValueAsUnsigned(DW_AT_byte_size, 0) * 8;
if (!bit_size)
bit_size = die.GetAttributeValueAsUnsigned(DW_AT_bit_size, 0);
- if (!bit_size) {
- if (error_ptr)
- error_ptr->SetErrorString("Unsupported type size in DW_OP_convert");
- return false;
- }
+ if (!bit_size)
+ return llvm::createStringError(
+ "unsupported type size in DW_OP_convert");
switch (encoding) {
case DW_ATE_signed:
case DW_ATE_signed_char:
@@ -2448,9 +2247,8 @@ bool DWARFExpression::Evaluate(
sign = false;
break;
default:
- if (error_ptr)
- error_ptr->SetErrorString("Unsupported encoding in DW_OP_convert");
- return false;
+ return llvm::createStringError(
+ "unsupported encoding in DW_OP_convert");
}
}
Scalar &top = stack.back().ResolveValue(exe_ctx);
@@ -2472,15 +2270,15 @@ bool DWARFExpression::Evaluate(
if (cfa != LLDB_INVALID_ADDRESS) {
stack.push_back(Scalar(cfa));
stack.back().SetValueType(Value::ValueType::LoadAddress);
- } else if (error_ptr)
- error_ptr->SetErrorString("Stack frame does not include a canonical "
- "frame address for DW_OP_call_frame_cfa "
- "opcode.");
+ } else {
+ return llvm::createStringError(
+ "stack frame does not include a canonical "
+ "frame address for DW_OP_call_frame_cfa "
+ "opcode");
+ }
} else {
- if (error_ptr)
- error_ptr->SetErrorString("Invalid stack frame in context for "
- "DW_OP_call_frame_cfa opcode.");
- return false;
+ return llvm::createStringError("unvalid stack frame in context for "
+ "DW_OP_call_frame_cfa opcode");
}
break;
@@ -2493,29 +2291,20 @@ bool DWARFExpression::Evaluate(
case DW_OP_form_tls_address:
case DW_OP_GNU_push_tls_address: {
if (stack.size() < 1) {
- if (error_ptr) {
- if (op == DW_OP_form_tls_address)
- error_ptr->SetErrorString(
- "DW_OP_form_tls_address needs an argument.");
- else
- error_ptr->SetErrorString(
- "DW_OP_GNU_push_tls_address needs an argument.");
- }
- return false;
+ if (op == DW_OP_form_tls_address)
+ return llvm::createStringError(
+ "DW_OP_form_tls_address needs an argument");
+ else
+ return llvm::createStringError(
+ "DW_OP_GNU_push_tls_address needs an argument");
}
- if (!exe_ctx || !module_sp) {
- if (error_ptr)
- error_ptr->SetErrorString("No context to evaluate TLS within.");
- return false;
- }
+ if (!exe_ctx || !module_sp)
+ return llvm::createStringError("no context to evaluate TLS within");
Thread *thread = exe_ctx->GetThreadPtr();
- if (!thread) {
- if (error_ptr)
- error_ptr->SetErrorString("No thread to evaluate TLS within.");
- return false;
- }
+ if (!thread)
+ return llvm::createStringError("no thread to evaluate TLS within");
// Lookup the TLS block address for this thread and module.
const addr_t tls_file_addr =
@@ -2523,12 +2312,9 @@ bool DWARFExpression::Evaluate(
const addr_t tls_load_addr =
thread->GetThreadLocalData(module_sp, tls_file_addr);
- if (tls_load_addr == LLDB_INVALID_ADDRESS) {
- if (error_ptr)
- error_ptr->SetErrorString(
- "No TLS data currently exists for this thread.");
- return false;
- }
+ if (tls_load_addr == LLDB_INVALID_ADDRESS)
+ return llvm::createStringError(
+ "no TLS data currently exists for this thread");
stack.back().GetScalar() = tls_load_addr;
stack.back().SetValueType(Value::ValueType::LoadAddress);
@@ -2542,12 +2328,9 @@ bool DWARFExpression::Evaluate(
// and the 0 based index is the ULEB128 encoded index.
case DW_OP_addrx:
case DW_OP_GNU_addr_index: {
- if (!dwarf_cu) {
- if (error_ptr)
- error_ptr->SetErrorString("DW_OP_GNU_addr_index found without a "
- "compile unit being specified");
- return false;
- }
+ if (!dwarf_cu)
+ return llvm::createStringError("DW_OP_GNU_addr_index found without a "
+ "compile unit being specified");
uint64_t index = opcodes.GetULEB128(&offset);
lldb::addr_t value = dwarf_cu->ReadAddressFromDebugAddrSection(index);
stack.push_back(Scalar(value));
@@ -2570,10 +2353,8 @@ bool DWARFExpression::Evaluate(
// encoded index.
case DW_OP_GNU_const_index: {
if (!dwarf_cu) {
- if (error_ptr)
- error_ptr->SetErrorString("DW_OP_GNU_const_index found without a "
- "compile unit being specified");
- return false;
+ return llvm::createStringError("DW_OP_GNU_const_index found without a "
+ "compile unit being specified");
}
uint64_t index = opcodes.GetULEB128(&offset);
lldb::addr_t value = dwarf_cu->ReadAddressFromDebugAddrSection(index);
@@ -2582,12 +2363,11 @@ bool DWARFExpression::Evaluate(
case DW_OP_GNU_entry_value:
case DW_OP_entry_value: {
- if (!Evaluate_DW_OP_entry_value(stack, exe_ctx, reg_ctx, opcodes, offset,
- error_ptr, log)) {
- LLDB_ERRORF(error_ptr, "Could not evaluate %s.",
- DW_OP_value_to_name(op));
- return false;
- }
+ if (llvm::Error err = Evaluate_DW_OP_entry_value(stack, exe_ctx, reg_ctx,
+ opcodes, offset, log))
+ return llvm::createStringError(
+ "could not evaluate DW_OP_entry_value: %s",
+ llvm::toString(std::move(err)).c_str());
break;
}
@@ -2598,23 +2378,18 @@ bool DWARFExpression::Evaluate(
break;
}
}
- if (error_ptr)
- error_ptr->SetErrorStringWithFormatv(
- "Unhandled opcode {0} in DWARFExpression", LocationAtom(op));
- return false;
+ return llvm::createStringError(llvm::formatv(
+ "Unhandled opcode {0} in DWARFExpression", LocationAtom(op)));
}
}
if (stack.empty()) {
// Nothing on the stack, check if we created a piece value from DW_OP_piece
// or DW_OP_bit_piece opcodes
- if (pieces.GetBuffer().GetByteSize()) {
- result = pieces;
- return true;
- }
- if (error_ptr)
- error_ptr->SetErrorString("Stack empty after evaluation.");
- return false;
+ if (pieces.GetBuffer().GetByteSize())
+ return pieces;
+
+ return llvm::createStringError("stack empty after evaluation");
}
UpdateValueTypeFromLocationDescription(
@@ -2631,8 +2406,7 @@ bool DWARFExpression::Evaluate(
LLDB_LOGF(log, " %s", new_value.GetData());
}
}
- result = stack.back();
- return true; // Return true on success
+ return stack.back();
}
bool DWARFExpression::ParseDWARFLocationList(
diff --git a/lldb/source/Expression/DWARFExpressionList.cpp b/lldb/source/Expression/DWARFExpressionList.cpp
index cba4e4e..7a5cf9f 100644
--- a/lldb/source/Expression/DWARFExpressionList.cpp
+++ b/lldb/source/Expression/DWARFExpressionList.cpp
@@ -198,12 +198,10 @@ void DWARFExpressionList::GetDescription(Stream *s,
}
}
-bool DWARFExpressionList::Evaluate(ExecutionContext *exe_ctx,
- RegisterContext *reg_ctx,
- lldb::addr_t func_load_addr,
- const Value *initial_value_ptr,
- const Value *object_address_ptr,
- Value &result, Status *error_ptr) const {
+llvm::Expected<Value> DWARFExpressionList::Evaluate(
+ ExecutionContext *exe_ctx, RegisterContext *reg_ctx,
+ lldb::addr_t func_load_addr, const Value *initial_value_ptr,
+ const Value *object_address_ptr) const {
ModuleSP module_sp = m_module_wp.lock();
DataExtractor data;
RegisterKind reg_kind;
@@ -217,32 +215,26 @@ bool DWARFExpressionList::Evaluate(ExecutionContext *exe_ctx,
if (exe_ctx)
frame = exe_ctx->GetFramePtr();
if (!frame)
- return false;
+ return llvm::createStringError("no frame");
RegisterContextSP reg_ctx_sp = frame->GetRegisterContext();
if (!reg_ctx_sp)
- return false;
+ return llvm::createStringError("no register context");
reg_ctx_sp->GetPCForSymbolication(pc);
}
if (!pc.IsValid()) {
- if (error_ptr)
- error_ptr->SetErrorString("Invalid PC in frame.");
- return false;
+ return llvm::createStringError("Invalid PC in frame.");
}
addr_t pc_load_addr = pc.GetLoadAddress(exe_ctx->GetTargetPtr());
const DWARFExpression *entry =
GetExpressionAtAddress(func_load_addr, pc_load_addr);
- if (!entry) {
- if (error_ptr) {
- error_ptr->SetErrorString("variable not available");
- }
- return false;
- }
+ if (!entry)
+ return llvm::createStringError("variable not available");
expr = *entry;
}
expr.GetExpressionData(data);
reg_kind = expr.GetRegisterKind();
return DWARFExpression::Evaluate(exe_ctx, reg_ctx, module_sp, data,
m_dwarf_cu, reg_kind, initial_value_ptr,
- object_address_ptr, result, error_ptr);
+ object_address_ptr);
}
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.h
index e144cf0..66db396 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.h
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.h
@@ -60,8 +60,6 @@ public:
virtual ConstString GetDIEClassTemplateParams(const DWARFDIE &die) = 0;
- virtual lldb_private::Type *FindDefinitionTypeForDIE(const DWARFDIE &die) = 0;
-
static std::optional<SymbolFile::ArrayInfo>
ParseChildArrayInfo(const DWARFDIE &parent_die,
const ExecutionContext *exe_ctx = nullptr);
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp
index dc4cfc9..579a538 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp
@@ -154,26 +154,6 @@ static bool TagIsRecordType(dw_tag_t tag) {
}
}
-static bool IsForwardDeclaration(const DWARFDIE &die,
- const ParsedDWARFTypeAttributes &attrs,
- LanguageType cu_language) {
- if (attrs.is_forward_declaration)
- return true;
-
- // Work around an issue with clang at the moment where forward
- // declarations for objective C classes are emitted as:
- // DW_TAG_structure_type [2]
- // DW_AT_name( "ForwardObjcClass" )
- // DW_AT_byte_size( 0x00 )
- // DW_AT_decl_file( "..." )
- // DW_AT_decl_line( 1 )
- //
- // Note that there is no DW_AT_declaration and there are no children,
- // and the byte size is zero.
- return attrs.byte_size && *attrs.byte_size == 0 && attrs.name &&
- !die.HasChildren() && cu_language == eLanguageTypeObjC;
-}
-
TypeSP DWARFASTParserClang::ParseTypeFromClangModule(const SymbolContext &sc,
const DWARFDIE &die,
Log *log) {
@@ -269,9 +249,11 @@ static void ForcefullyCompleteType(CompilerType type) {
/// This function serves a similar purpose as RequireCompleteType above, but it
/// avoids completing the type if it is not immediately necessary. It only
/// ensures we _can_ complete the type later.
-void DWARFASTParserClang::PrepareContextToReceiveMembers(
- clang::DeclContext *decl_ctx, const DWARFDIE &decl_ctx_die,
- const DWARFDIE &die, const char *type_name_cstr) {
+static void PrepareContextToReceiveMembers(TypeSystemClang &ast,
+ ClangASTImporter &ast_importer,
+ clang::DeclContext *decl_ctx,
+ DWARFDIE die,
+ const char *type_name_cstr) {
auto *tag_decl_ctx = clang::dyn_cast<clang::TagDecl>(decl_ctx);
if (!tag_decl_ctx)
return; // Non-tag context are always ready.
@@ -286,8 +268,7 @@ void DWARFASTParserClang::PrepareContextToReceiveMembers(
// gmodules case), we can complete the type by doing a full import.
// If this type was not imported from an external AST, there's nothing to do.
- CompilerType type = m_ast.GetTypeForDecl(tag_decl_ctx);
- ClangASTImporter &ast_importer = GetClangASTImporter();
+ CompilerType type = ast.GetTypeForDecl(tag_decl_ctx);
if (type && ast_importer.CanImport(type)) {
auto qual_type = ClangUtil::GetQualType(type);
if (ast_importer.RequireCompleteType(qual_type))
@@ -298,13 +279,6 @@ void DWARFASTParserClang::PrepareContextToReceiveMembers(
type_name_cstr ? type_name_cstr : "", die.GetOffset());
}
- // By searching for the definition DIE of the decl_ctx type, we will either:
- // 1. Found the the definition DIE and start its definition with
- // TypeSystemClang::StartTagDeclarationDefinition.
- // 2. Unable to find it, then need to forcefully complete it.
- FindDefinitionTypeForDIE(decl_ctx_die);
- if (tag_decl_ctx->isCompleteDefinition() || tag_decl_ctx->isBeingDefined())
- return;
// We don't have a type definition and/or the import failed. We must
// forcefully complete the type to avoid crashes.
ForcefullyCompleteType(type);
@@ -572,6 +546,8 @@ TypeSP DWARFASTParserClang::ParseTypeFromDWARF(const SymbolContext &sc,
static std::optional<uint32_t>
ExtractDataMemberLocation(DWARFDIE const &die, DWARFFormValue const &form_value,
ModuleSP module_sp) {
+ Log *log = GetLog(DWARFLog::TypeCompletion | DWARFLog::Lookups);
+
// With DWARF 3 and later, if the value is an integer constant,
// this form value is the offset in bytes from the beginning of
// the containing entity.
@@ -579,21 +555,23 @@ ExtractDataMemberLocation(DWARFDIE const &die, DWARFFormValue const &form_value,
return form_value.Unsigned();
Value initialValue(0);
- Value memberOffset(0);
const DWARFDataExtractor &debug_info_data = die.GetData();
uint32_t block_length = form_value.Unsigned();
uint32_t block_offset =
form_value.BlockData() - debug_info_data.GetDataStart();
- if (!DWARFExpression::Evaluate(
- nullptr, // ExecutionContext *
- nullptr, // RegisterContext *
- module_sp, DataExtractor(debug_info_data, block_offset, block_length),
- die.GetCU(), eRegisterKindDWARF, &initialValue, nullptr, memberOffset,
- nullptr)) {
+
+ llvm::Expected<Value> memberOffset = DWARFExpression::Evaluate(
+ /*ExecutionContext=*/nullptr,
+ /*RegisterContext=*/nullptr, module_sp,
+ DataExtractor(debug_info_data, block_offset, block_length), die.GetCU(),
+ eRegisterKindDWARF, &initialValue, nullptr);
+ if (!memberOffset) {
+ LLDB_LOG_ERROR(log, memberOffset.takeError(),
+ "ExtractDataMemberLocation failed: {0}");
return {};
}
- return memberOffset.ResolveValue(nullptr).UInt();
+ return memberOffset->ResolveValue(nullptr).UInt();
}
static TypePayloadClang GetPtrAuthMofidierPayload(const DWARFDIE &die) {
@@ -646,11 +624,10 @@ DWARFASTParserClang::ParseTypeModifier(const SymbolContext &sc,
if (tag == DW_TAG_typedef) {
// DeclContext will be populated when the clang type is materialized in
// Type::ResolveCompilerType.
- DWARFDIE decl_ctx_die;
- clang::DeclContext *decl_ctx =
- GetClangDeclContextContainingDIE(die, &decl_ctx_die);
- PrepareContextToReceiveMembers(decl_ctx, decl_ctx_die, die,
- attrs.name.GetCString());
+ PrepareContextToReceiveMembers(
+ m_ast, GetClangASTImporter(),
+ GetClangDeclContextContainingDIE(die, nullptr), die,
+ attrs.name.GetCString());
if (attrs.type.IsValid()) {
// Try to parse a typedef from the (DWARF embedded in the) Clang
@@ -1130,6 +1107,32 @@ DWARFASTParserClang::ParseSubroutine(const DWARFDIE &die,
// struct and see if this is actually a C++ method
Type *class_type = dwarf->ResolveType(decl_ctx_die);
if (class_type) {
+ if (class_type->GetID() != decl_ctx_die.GetID() ||
+ IsClangModuleFwdDecl(decl_ctx_die)) {
+
+ // We uniqued the parent class of this function to another
+ // class so we now need to associate all dies under
+ // "decl_ctx_die" to DIEs in the DIE for "class_type"...
+ DWARFDIE class_type_die = dwarf->GetDIE(class_type->GetID());
+
+ if (class_type_die) {
+ std::vector<DWARFDIE> failures;
+
+ CopyUniqueClassMethodTypes(decl_ctx_die, class_type_die,
+ class_type, failures);
+
+ // FIXME do something with these failures that's
+ // smarter than just dropping them on the ground.
+ // Unfortunately classes don't like having stuff added
+ // to them after their definitions are complete...
+
+ Type *type_ptr = dwarf->GetDIEToType()[die.GetDIE()];
+ if (type_ptr && type_ptr != DIE_IS_BEING_PARSED) {
+ return type_ptr->shared_from_this();
+ }
+ }
+ }
+
if (attrs.specification.IsValid()) {
// We have a specification which we are going to base our
// function prototype off of, so we need this type to be
@@ -1264,39 +1267,6 @@ DWARFASTParserClang::ParseSubroutine(const DWARFDIE &die,
}
}
}
- // By here, we should have already completed the c++ class_type
- // because if either specification or abstract_origin is present, we
- // call GetClangDeclContextForDIE to resolve the DW_TAG_subprogram
- // refered by this one until we reached the DW_TAG_subprogram without
- // specification or abstract_origin (the else branch above). Then the
- // above GetFullCompilerType() will complete the class_type if it's
- // not completed yet. After that, we will have the mapping from DIEs
- // in class_type_die to DeclContexts in m_die_to_decl_ctx.
- if (class_type->GetID() != decl_ctx_die.GetID() ||
- IsClangModuleFwdDecl(decl_ctx_die)) {
-
- // We uniqued the parent class of this function to another
- // class so we now need to associate all dies under
- // "decl_ctx_die" to DIEs in the DIE for "class_type"...
- DWARFDIE class_type_die = dwarf->GetDIE(class_type->GetID());
-
- if (class_type_die) {
- std::vector<DWARFDIE> failures;
-
- CopyUniqueClassMethodTypes(decl_ctx_die, class_type_die,
- class_type, failures);
-
- // FIXME do something with these failures that's
- // smarter than just dropping them on the ground.
- // Unfortunately classes don't like having stuff added
- // to them after their definitions are complete...
-
- Type *type_ptr = dwarf->GetDIEToType()[die.GetDIE()];
- if (type_ptr && type_ptr != DIE_IS_BEING_PARSED) {
- return type_ptr->shared_from_this();
- }
- }
- }
}
}
}
@@ -1669,93 +1639,6 @@ DWARFASTParserClang::GetCPlusPlusQualifiedName(const DWARFDIE &die) {
return qualified_name;
}
-lldb_private::Type *
-DWARFASTParserClang::FindDefinitionTypeForDIE(const DWARFDIE &die) {
- SymbolFileDWARF *dwarf = die.GetDWARF();
- ParsedDWARFTypeAttributes attrs(die);
- bool is_forward_declaration = IsForwardDeclaration(
- die, attrs, SymbolFileDWARF::GetLanguage(*die.GetCU()));
- if (!is_forward_declaration)
- return dwarf->GetDIEToType()[die.GetDIE()];
-
- const dw_tag_t tag = die.Tag();
- TypeSP type_sp;
- Log *log = GetLog(DWARFLog::TypeCompletion | DWARFLog::Lookups);
- if (log) {
- dwarf->GetObjectFile()->GetModule()->LogMessage(
- log,
- "SymbolFileDWARF({0:p}) - {1:x16}: {2} type \"{3}\" is a "
- "forward declaration DIE, trying to find definition DIE",
- static_cast<void *>(this), die.GetOffset(), DW_TAG_value_to_name(tag),
- attrs.name.GetCString());
- }
- // We haven't parse definition die for this type, starting to search for it.
- // After we found the definition die, the GetDeclarationDIEToDefinitionDIE()
- // map will have the new mapping from this declaration die to definition die.
- if (attrs.class_language == eLanguageTypeObjC ||
- attrs.class_language == eLanguageTypeObjC_plus_plus) {
- if (!attrs.is_complete_objc_class &&
- die.Supports_DW_AT_APPLE_objc_complete_type()) {
- // We have a valid eSymbolTypeObjCClass class symbol whose name
- // matches the current objective C class that we are trying to find
- // and this DIE isn't the complete definition (we checked
- // is_complete_objc_class above and know it is false), so the real
- // definition is in here somewhere
- type_sp =
- dwarf->FindCompleteObjCDefinitionTypeForDIE(die, attrs.name, true);
-
- if (!type_sp) {
- SymbolFileDWARFDebugMap *debug_map_symfile =
- dwarf->GetDebugMapSymfile();
- if (debug_map_symfile) {
- // We weren't able to find a full declaration in this DWARF,
- // see if we have a declaration anywhere else...
- type_sp = debug_map_symfile->FindCompleteObjCDefinitionTypeForDIE(
- die, attrs.name, true);
- }
- }
-
- if (type_sp && log) {
- dwarf->GetObjectFile()->GetModule()->LogMessage(
- log,
- "SymbolFileDWARF({0:p}) - {1:x16}: {2} ({3}) type \"{4}\" is an "
- "incomplete objc type, complete type is {5:x8}",
- static_cast<void *>(this), die.GetOffset(),
- DW_TAG_value_to_name(tag), tag, attrs.name.GetCString(),
- type_sp->GetID());
- }
- }
- }
-
- type_sp = dwarf->FindDefinitionTypeForDWARFDeclContext(die);
- if (!type_sp) {
- SymbolFileDWARFDebugMap *debug_map_symfile = dwarf->GetDebugMapSymfile();
- if (debug_map_symfile) {
- // We weren't able to find a full declaration in this DWARF, see
- // if we have a declaration anywhere else...
- type_sp = debug_map_symfile->FindDefinitionTypeForDWARFDeclContext(die);
- }
- if (type_sp && log) {
- dwarf->GetObjectFile()->GetModule()->LogMessage(
- log,
- "SymbolFileDWARF({0:p}) - {1:x16}: {2} type \"{3}\" is a "
- "forward declaration, complete type is {4:x8}",
- static_cast<void *>(this), die.GetOffset(), DW_TAG_value_to_name(tag),
- attrs.name.GetCString(), type_sp->GetID());
- }
- }
-
- if (!type_sp && log) {
- dwarf->GetObjectFile()->GetModule()->LogMessage(
- log,
- "SymbolFileDWARF({0:p}) - {1:x16}: {2} type \"{3}\" is a "
- "forward declaration, unable to find definition DIE for it",
- static_cast<void *>(this), die.GetOffset(), DW_TAG_value_to_name(tag),
- attrs.name.GetCString());
- }
- return type_sp.get();
-}
-
TypeSP
DWARFASTParserClang::ParseStructureLikeDIE(const SymbolContext &sc,
const DWARFDIE &die,
@@ -1767,10 +1650,14 @@ DWARFASTParserClang::ParseStructureLikeDIE(const SymbolContext &sc,
LanguageType cu_language = SymbolFileDWARF::GetLanguage(*die.GetCU());
Log *log = GetLog(DWARFLog::TypeCompletion | DWARFLog::Lookups);
+ // UniqueDWARFASTType is large, so don't create a local variables on the
+ // stack, put it on the heap. This function is often called recursively and
+ // clang isn't good at sharing the stack space for variables in different
+ // blocks.
+ auto unique_ast_entry_up = std::make_unique<UniqueDWARFASTType>();
+
ConstString unique_typename(attrs.name);
Declaration unique_decl(attrs.decl);
- uint64_t byte_size = attrs.byte_size.value_or(0);
- attrs.is_forward_declaration = IsForwardDeclaration(die, attrs, cu_language);
if (attrs.name) {
if (Language::LanguageIsCPlusPlus(cu_language)) {
@@ -1783,42 +1670,14 @@ DWARFASTParserClang::ParseStructureLikeDIE(const SymbolContext &sc,
unique_decl.Clear();
}
- if (UniqueDWARFASTType *unique_ast_entry_type =
- dwarf->GetUniqueDWARFASTTypeMap().Find(
- unique_typename, die, unique_decl, byte_size,
- attrs.is_forward_declaration)) {
- type_sp = unique_ast_entry_type->m_type_sp;
+ if (dwarf->GetUniqueDWARFASTTypeMap().Find(
+ unique_typename, die, unique_decl, attrs.byte_size.value_or(-1),
+ *unique_ast_entry_up)) {
+ type_sp = unique_ast_entry_up->m_type_sp;
if (type_sp) {
dwarf->GetDIEToType()[die.GetDIE()] = type_sp.get();
LinkDeclContextToDIE(
- GetCachedClangDeclContextForDIE(unique_ast_entry_type->m_die), die);
- if (!attrs.is_forward_declaration) {
- // If the DIE being parsed in this function is a definition and the
- // entry in the map is a declaration, then we need to update the entry
- // to point to the definition DIE.
- if (unique_ast_entry_type->m_is_forward_declaration) {
- unique_ast_entry_type->m_die = die;
- unique_ast_entry_type->m_byte_size = byte_size;
- unique_ast_entry_type->m_declaration = unique_decl;
- unique_ast_entry_type->m_is_forward_declaration = false;
- // Need to update Type ID to refer to the definition DIE. because
- // it's used in ParseSubroutine to determine if we need to copy cxx
- // method types from a declaration DIE to this definition DIE.
- type_sp->SetID(die.GetID());
- clang_type = type_sp->GetForwardCompilerType();
- if (attrs.class_language != eLanguageTypeObjC &&
- attrs.class_language != eLanguageTypeObjC_plus_plus)
- TypeSystemClang::StartTagDeclarationDefinition(clang_type);
-
- CompilerType compiler_type_no_qualifiers =
- ClangUtil::RemoveFastQualifiers(clang_type);
- auto result = dwarf->GetForwardDeclCompilerTypeToDIE().try_emplace(
- compiler_type_no_qualifiers.GetOpaqueQualType(),
- *die.GetDIERef());
- if (!result.second)
- result.first->second = *die.GetDIERef();
- }
- }
+ GetCachedClangDeclContextForDIE(unique_ast_entry_up->m_die), die);
return type_sp;
}
}
@@ -1840,21 +1699,125 @@ DWARFASTParserClang::ParseStructureLikeDIE(const SymbolContext &sc,
default_accessibility = eAccessPrivate;
}
+ if (attrs.byte_size && *attrs.byte_size == 0 && attrs.name &&
+ !die.HasChildren() && cu_language == eLanguageTypeObjC) {
+ // Work around an issue with clang at the moment where forward
+ // declarations for objective C classes are emitted as:
+ // DW_TAG_structure_type [2]
+ // DW_AT_name( "ForwardObjcClass" )
+ // DW_AT_byte_size( 0x00 )
+ // DW_AT_decl_file( "..." )
+ // DW_AT_decl_line( 1 )
+ //
+ // Note that there is no DW_AT_declaration and there are no children,
+ // and the byte size is zero.
+ attrs.is_forward_declaration = true;
+ }
+
+ if (attrs.class_language == eLanguageTypeObjC ||
+ attrs.class_language == eLanguageTypeObjC_plus_plus) {
+ if (!attrs.is_complete_objc_class &&
+ die.Supports_DW_AT_APPLE_objc_complete_type()) {
+ // We have a valid eSymbolTypeObjCClass class symbol whose name
+ // matches the current objective C class that we are trying to find
+ // and this DIE isn't the complete definition (we checked
+ // is_complete_objc_class above and know it is false), so the real
+ // definition is in here somewhere
+ type_sp =
+ dwarf->FindCompleteObjCDefinitionTypeForDIE(die, attrs.name, true);
+
+ if (!type_sp) {
+ SymbolFileDWARFDebugMap *debug_map_symfile =
+ dwarf->GetDebugMapSymfile();
+ if (debug_map_symfile) {
+ // We weren't able to find a full declaration in this DWARF,
+ // see if we have a declaration anywhere else...
+ type_sp = debug_map_symfile->FindCompleteObjCDefinitionTypeForDIE(
+ die, attrs.name, true);
+ }
+ }
+
+ if (type_sp) {
+ if (log) {
+ dwarf->GetObjectFile()->GetModule()->LogMessage(
+ log,
+ "SymbolFileDWARF({0:p}) - {1:x16}: {2} ({3}) type \"{4}\" is an "
+ "incomplete objc type, complete type is {5:x8}",
+ static_cast<void *>(this), die.GetOffset(),
+ DW_TAG_value_to_name(tag), tag, attrs.name.GetCString(),
+ type_sp->GetID());
+ }
+
+ // We found a real definition for this type elsewhere so lets use
+ // it and cache the fact that we found a complete type for this
+ // die
+ dwarf->GetDIEToType()[die.GetDIE()] = type_sp.get();
+ return type_sp;
+ }
+ }
+ }
+
if (attrs.is_forward_declaration) {
+ // We have a forward declaration to a type and we need to try and
+ // find a full declaration. We look in the current type index just in
+ // case we have a forward declaration followed by an actual
+ // declarations in the DWARF. If this fails, we need to look
+ // elsewhere...
+ if (log) {
+ dwarf->GetObjectFile()->GetModule()->LogMessage(
+ log,
+ "SymbolFileDWARF({0:p}) - {1:x16}: {2} ({3}) type \"{4}\" is a "
+ "forward declaration, trying to find complete type",
+ static_cast<void *>(this), die.GetOffset(), DW_TAG_value_to_name(tag),
+ tag, attrs.name.GetCString());
+ }
+
// See if the type comes from a Clang module and if so, track down
// that type.
type_sp = ParseTypeFromClangModule(sc, die, log);
if (type_sp)
return type_sp;
- }
+ // type_sp = FindDefinitionTypeForDIE (dwarf_cu, die,
+ // type_name_const_str);
+ type_sp = dwarf->FindDefinitionTypeForDWARFDeclContext(die);
+
+ if (!type_sp) {
+ SymbolFileDWARFDebugMap *debug_map_symfile = dwarf->GetDebugMapSymfile();
+ if (debug_map_symfile) {
+ // We weren't able to find a full declaration in this DWARF, see
+ // if we have a declaration anywhere else...
+ type_sp = debug_map_symfile->FindDefinitionTypeForDWARFDeclContext(die);
+ }
+ }
+
+ if (type_sp) {
+ if (log) {
+ dwarf->GetObjectFile()->GetModule()->LogMessage(
+ log,
+ "SymbolFileDWARF({0:p}) - {1:x16}: {2} ({3}) type \"{4}\" is a "
+ "forward declaration, complete type is {5:x8}",
+ static_cast<void *>(this), die.GetOffset(),
+ DW_TAG_value_to_name(tag), tag, attrs.name.GetCString(),
+ type_sp->GetID());
+ }
+
+ // We found a real definition for this type elsewhere so lets use
+ // it and cache the fact that we found a complete type for this die
+ dwarf->GetDIEToType()[die.GetDIE()] = type_sp.get();
+ clang::DeclContext *defn_decl_ctx =
+ GetCachedClangDeclContextForDIE(dwarf->GetDIE(type_sp->GetID()));
+ if (defn_decl_ctx)
+ LinkDeclContextToDIE(defn_decl_ctx, die);
+ return type_sp;
+ }
+ }
assert(tag_decl_kind != -1);
UNUSED_IF_ASSERT_DISABLED(tag_decl_kind);
- DWARFDIE decl_ctx_die;
- clang::DeclContext *decl_ctx =
- GetClangDeclContextContainingDIE(die, &decl_ctx_die);
+ bool clang_type_was_created = false;
+ clang::DeclContext *decl_ctx = GetClangDeclContextContainingDIE(die, nullptr);
- PrepareContextToReceiveMembers(decl_ctx, decl_ctx_die, die,
+ PrepareContextToReceiveMembers(m_ast, GetClangASTImporter(), decl_ctx, die,
attrs.name.GetCString());
if (attrs.accessibility == eAccessNone && decl_ctx) {
@@ -1893,17 +1856,20 @@ DWARFASTParserClang::ParseStructureLikeDIE(const SymbolContext &sc,
tag_decl_kind, template_param_infos);
clang_type =
m_ast.CreateClassTemplateSpecializationType(class_specialization_decl);
+ clang_type_was_created = true;
m_ast.SetMetadata(class_template_decl, metadata);
m_ast.SetMetadata(class_specialization_decl, metadata);
}
- if (!clang_type) {
+ if (!clang_type_was_created) {
+ clang_type_was_created = true;
clang_type = m_ast.CreateRecordType(
decl_ctx, GetOwningClangModule(die), attrs.accessibility,
attrs.name.GetCString(), tag_decl_kind, attrs.class_language, &metadata,
attrs.exports_symbols);
}
+
// Store a forward declaration to this class type in case any
// parameters in any class methods need it for the clang types for
// function prototypes.
@@ -1914,19 +1880,13 @@ DWARFASTParserClang::ParseStructureLikeDIE(const SymbolContext &sc,
Type::ResolveState::Forward,
TypePayloadClang(OptionalClangModuleID(), attrs.is_complete_objc_class));
- // UniqueDWARFASTType is large, so don't create a local variables on the
- // stack, put it on the heap. This function is often called recursively and
- // clang isn't good at sharing the stack space for variables in different
- // blocks.
- auto unique_ast_entry_up = std::make_unique<UniqueDWARFASTType>();
// Add our type to the unique type map so we don't end up creating many
// copies of the same type over and over in the ASTContext for our
// module
unique_ast_entry_up->m_type_sp = type_sp;
unique_ast_entry_up->m_die = die;
unique_ast_entry_up->m_declaration = unique_decl;
- unique_ast_entry_up->m_byte_size = byte_size;
- unique_ast_entry_up->m_is_forward_declaration = attrs.is_forward_declaration;
+ unique_ast_entry_up->m_byte_size = attrs.byte_size.value_or(0);
dwarf->GetUniqueDWARFASTTypeMap().Insert(unique_typename,
*unique_ast_entry_up);
@@ -1967,7 +1927,7 @@ DWARFASTParserClang::ParseStructureLikeDIE(const SymbolContext &sc,
GetClangASTImporter().SetRecordLayout(record_decl, layout);
}
}
- } else {
+ } else if (clang_type_was_created) {
// Start the definition if the class is not objective C since the
// underlying decls respond to isCompleteDefinition(). Objective
// C decls don't respond to isCompleteDefinition() so we can't
@@ -1979,21 +1939,26 @@ DWARFASTParserClang::ParseStructureLikeDIE(const SymbolContext &sc,
if (attrs.class_language != eLanguageTypeObjC &&
attrs.class_language != eLanguageTypeObjC_plus_plus)
TypeSystemClang::StartTagDeclarationDefinition(clang_type);
+
+ // Leave this as a forward declaration until we need to know the
+ // details of the type. lldb_private::Type will automatically call
+ // the SymbolFile virtual function
+ // "SymbolFileDWARF::CompleteType(Type *)" When the definition
+ // needs to be defined.
+ assert(!dwarf->GetForwardDeclCompilerTypeToDIE().count(
+ ClangUtil::RemoveFastQualifiers(clang_type)
+ .GetOpaqueQualType()) &&
+ "Type already in the forward declaration map!");
+ // Can't assume m_ast.GetSymbolFile() is actually a
+ // SymbolFileDWARF, it can be a SymbolFileDWARFDebugMap for Apple
+ // binaries.
+ dwarf->GetForwardDeclCompilerTypeToDIE().try_emplace(
+ ClangUtil::RemoveFastQualifiers(clang_type).GetOpaqueQualType(),
+ *die.GetDIERef());
+ m_ast.SetHasExternalStorage(clang_type.GetOpaqueQualType(), true);
}
}
- // If this is a declaration DIE, leave this as a forward declaration until we
- // need to know the details of the type. lldb_private::Type will automatically
- // call the SymbolFile virtual function "SymbolFileDWARF::CompleteType(Type
- // *)" When the definition needs to be defined.
- assert(!dwarf->GetForwardDeclCompilerTypeToDIE().count(
- ClangUtil::RemoveFastQualifiers(clang_type).GetOpaqueQualType()) &&
- "Type already in the forward declaration map!");
- dwarf->GetForwardDeclCompilerTypeToDIE().try_emplace(
- ClangUtil::RemoveFastQualifiers(clang_type).GetOpaqueQualType(),
- *die.GetDIERef());
- m_ast.SetHasExternalStorage(clang_type.GetOpaqueQualType(), true);
-
// If we made a clang type, set the trivial abi if applicable: We only
// do this for pass by value - which implies the Trivial ABI. There
// isn't a way to assert that something that would normally be pass by
@@ -2232,10 +2197,6 @@ bool DWARFASTParserClang::CompleteRecordType(const DWARFDIE &die,
// For objective C we don't start the definition when the class is
// created.
TypeSystemClang::StartTagDeclarationDefinition(clang_type);
- } else {
- assert(clang_type.IsBeingDefined() &&
- "Trying to complete a definition without a prior call to "
- "StartTagDeclarationDefinition.");
}
AccessType default_accessibility = eAccessNone;
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h
index 853b8cc..8d4af20 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h
@@ -42,40 +42,40 @@ struct ParsedDWARFTypeAttributes;
class DWARFASTParserClang : public lldb_private::plugin::dwarf::DWARFASTParser {
public:
- typedef lldb_private::plugin::dwarf::DWARFDIE DWARFDIE;
-
DWARFASTParserClang(lldb_private::TypeSystemClang &ast);
~DWARFASTParserClang() override;
// DWARFASTParser interface.
- lldb::TypeSP ParseTypeFromDWARF(const lldb_private::SymbolContext &sc,
- const DWARFDIE &die,
- bool *type_is_new_ptr) override;
+ lldb::TypeSP
+ ParseTypeFromDWARF(const lldb_private::SymbolContext &sc,
+ const lldb_private::plugin::dwarf::DWARFDIE &die,
+ bool *type_is_new_ptr) override;
- lldb_private::ConstString
- ConstructDemangledNameFromDWARF(const DWARFDIE &die) override;
+ lldb_private::ConstString ConstructDemangledNameFromDWARF(
+ const lldb_private::plugin::dwarf::DWARFDIE &die) override;
lldb_private::Function *
ParseFunctionFromDWARF(lldb_private::CompileUnit &comp_unit,
- const DWARFDIE &die,
+ const lldb_private::plugin::dwarf::DWARFDIE &die,
const lldb_private::AddressRange &func_range) override;
bool
- CompleteTypeFromDWARF(const DWARFDIE &die, lldb_private::Type *type,
+ CompleteTypeFromDWARF(const lldb_private::plugin::dwarf::DWARFDIE &die,
+ lldb_private::Type *type,
lldb_private::CompilerType &compiler_type) override;
- lldb_private::CompilerDecl
- GetDeclForUIDFromDWARF(const DWARFDIE &die) override;
+ lldb_private::CompilerDecl GetDeclForUIDFromDWARF(
+ const lldb_private::plugin::dwarf::DWARFDIE &die) override;
void EnsureAllDIEsInDeclContextHaveBeenParsed(
lldb_private::CompilerDeclContext decl_context) override;
- lldb_private::CompilerDeclContext
- GetDeclContextForUIDFromDWARF(const DWARFDIE &die) override;
+ lldb_private::CompilerDeclContext GetDeclContextForUIDFromDWARF(
+ const lldb_private::plugin::dwarf::DWARFDIE &die) override;
- lldb_private::CompilerDeclContext
- GetDeclContextContainingUIDFromDWARF(const DWARFDIE &die) override;
+ lldb_private::CompilerDeclContext GetDeclContextContainingUIDFromDWARF(
+ const lldb_private::plugin::dwarf::DWARFDIE &die) override;
lldb_private::ClangASTImporter &GetClangASTImporter();
@@ -105,13 +105,8 @@ public:
/// \return A string, including surrounding '<>', of the template parameters.
/// If the DIE's name already has '<>', returns an empty ConstString because
/// it's assumed that the caller is using the DIE name anyway.
- lldb_private::ConstString
- GetDIEClassTemplateParams(const DWARFDIE &die) override;
-
- // Searching for definition DIE for the given DIE and return the type
- // associated with the definition DIE, or nullptr if definition DIE is not
- // found.
- lldb_private::Type *FindDefinitionTypeForDIE(const DWARFDIE &die) override;
+ lldb_private::ConstString GetDIEClassTemplateParams(
+ const lldb_private::plugin::dwarf::DWARFDIE &die) override;
protected:
/// Protected typedefs and members.
@@ -123,7 +118,8 @@ protected:
const lldb_private::plugin::dwarf::DWARFDebugInfoEntry *,
clang::DeclContext *>
DIEToDeclContextMap;
- typedef std::multimap<const clang::DeclContext *, const DWARFDIE>
+ typedef std::multimap<const clang::DeclContext *,
+ const lldb_private::plugin::dwarf::DWARFDIE>
DeclContextToDIEMap;
typedef llvm::DenseMap<
const lldb_private::plugin::dwarf::DWARFDebugInfoEntry *,
@@ -141,11 +137,14 @@ protected:
std::unique_ptr<lldb_private::ClangASTImporter> m_clang_ast_importer_up;
/// @}
- clang::DeclContext *GetDeclContextForBlock(const DWARFDIE &die);
+ clang::DeclContext *
+ GetDeclContextForBlock(const lldb_private::plugin::dwarf::DWARFDIE &die);
- clang::BlockDecl *ResolveBlockDIE(const DWARFDIE &die);
+ clang::BlockDecl *
+ ResolveBlockDIE(const lldb_private::plugin::dwarf::DWARFDIE &die);
- clang::NamespaceDecl *ResolveNamespaceDIE(const DWARFDIE &die);
+ clang::NamespaceDecl *
+ ResolveNamespaceDIE(const lldb_private::plugin::dwarf::DWARFDIE &die);
/// Returns the namespace decl that a DW_TAG_imported_declaration imports.
///
@@ -156,86 +155,96 @@ protected:
/// 'die' imports. If the imported entity is not a namespace
/// or another import declaration, returns nullptr. If an error
/// occurs, returns nullptr.
- clang::NamespaceDecl *ResolveImportedDeclarationDIE(const DWARFDIE &die);
+ clang::NamespaceDecl *ResolveImportedDeclarationDIE(
+ const lldb_private::plugin::dwarf::DWARFDIE &die);
- bool ParseTemplateDIE(const DWARFDIE &die,
+ bool ParseTemplateDIE(const lldb_private::plugin::dwarf::DWARFDIE &die,
lldb_private::TypeSystemClang::TemplateParameterInfos
&template_param_infos);
bool ParseTemplateParameterInfos(
- const DWARFDIE &parent_die,
+ const lldb_private::plugin::dwarf::DWARFDIE &parent_die,
lldb_private::TypeSystemClang::TemplateParameterInfos
&template_param_infos);
- std::string GetCPlusPlusQualifiedName(const DWARFDIE &die);
+ std::string
+ GetCPlusPlusQualifiedName(const lldb_private::plugin::dwarf::DWARFDIE &die);
bool ParseChildMembers(
- const DWARFDIE &die, lldb_private::CompilerType &class_compiler_type,
+ const lldb_private::plugin::dwarf::DWARFDIE &die,
+ lldb_private::CompilerType &class_compiler_type,
std::vector<std::unique_ptr<clang::CXXBaseSpecifier>> &base_classes,
- std::vector<DWARFDIE> &member_function_dies,
- std::vector<DWARFDIE> &contained_type_dies,
+ std::vector<lldb_private::plugin::dwarf::DWARFDIE> &member_function_dies,
+ std::vector<lldb_private::plugin::dwarf::DWARFDIE> &contained_type_dies,
DelayedPropertyList &delayed_properties,
const lldb::AccessType default_accessibility,
lldb_private::ClangASTImporter::LayoutInfo &layout_info);
size_t
ParseChildParameters(clang::DeclContext *containing_decl_ctx,
- const DWARFDIE &parent_die, bool skip_artificial,
- bool &is_static, bool &is_variadic,
+ const lldb_private::plugin::dwarf::DWARFDIE &parent_die,
+ bool skip_artificial, bool &is_static, bool &is_variadic,
bool &has_template_params,
std::vector<lldb_private::CompilerType> &function_args,
std::vector<clang::ParmVarDecl *> &function_param_decls,
unsigned &type_quals);
- size_t ParseChildEnumerators(lldb_private::CompilerType &compiler_type,
- bool is_signed, uint32_t enumerator_byte_size,
- const DWARFDIE &parent_die);
+ size_t ParseChildEnumerators(
+ lldb_private::CompilerType &compiler_type, bool is_signed,
+ uint32_t enumerator_byte_size,
+ const lldb_private::plugin::dwarf::DWARFDIE &parent_die);
/// Parse a structure, class, or union type DIE.
- lldb::TypeSP ParseStructureLikeDIE(const lldb_private::SymbolContext &sc,
- const DWARFDIE &die,
- ParsedDWARFTypeAttributes &attrs);
+ lldb::TypeSP
+ ParseStructureLikeDIE(const lldb_private::SymbolContext &sc,
+ const lldb_private::plugin::dwarf::DWARFDIE &die,
+ ParsedDWARFTypeAttributes &attrs);
- clang::Decl *GetClangDeclForDIE(const DWARFDIE &die);
+ clang::Decl *
+ GetClangDeclForDIE(const lldb_private::plugin::dwarf::DWARFDIE &die);
- clang::DeclContext *GetClangDeclContextForDIE(const DWARFDIE &die);
+ clang::DeclContext *
+ GetClangDeclContextForDIE(const lldb_private::plugin::dwarf::DWARFDIE &die);
- clang::DeclContext *GetClangDeclContextContainingDIE(const DWARFDIE &die,
- DWARFDIE *decl_ctx_die);
- lldb_private::OptionalClangModuleID GetOwningClangModule(const DWARFDIE &die);
+ clang::DeclContext *GetClangDeclContextContainingDIE(
+ const lldb_private::plugin::dwarf::DWARFDIE &die,
+ lldb_private::plugin::dwarf::DWARFDIE *decl_ctx_die);
+ lldb_private::OptionalClangModuleID
+ GetOwningClangModule(const lldb_private::plugin::dwarf::DWARFDIE &die);
- bool CopyUniqueClassMethodTypes(const DWARFDIE &src_class_die,
- const DWARFDIE &dst_class_die,
- lldb_private::Type *class_type,
- std::vector<DWARFDIE> &failures);
+ bool CopyUniqueClassMethodTypes(
+ const lldb_private::plugin::dwarf::DWARFDIE &src_class_die,
+ const lldb_private::plugin::dwarf::DWARFDIE &dst_class_die,
+ lldb_private::Type *class_type,
+ std::vector<lldb_private::plugin::dwarf::DWARFDIE> &failures);
- clang::DeclContext *GetCachedClangDeclContextForDIE(const DWARFDIE &die);
+ clang::DeclContext *GetCachedClangDeclContextForDIE(
+ const lldb_private::plugin::dwarf::DWARFDIE &die);
- void LinkDeclContextToDIE(clang::DeclContext *decl_ctx, const DWARFDIE &die);
+ void LinkDeclContextToDIE(clang::DeclContext *decl_ctx,
+ const lldb_private::plugin::dwarf::DWARFDIE &die);
- void LinkDeclToDIE(clang::Decl *decl, const DWARFDIE &die);
+ void LinkDeclToDIE(clang::Decl *decl,
+ const lldb_private::plugin::dwarf::DWARFDIE &die);
/// If \p type_sp is valid, calculate and set its symbol context scope, and
/// update the type list for its backing symbol file.
///
/// Returns \p type_sp.
- lldb::TypeSP
- UpdateSymbolContextScopeForType(const lldb_private::SymbolContext &sc,
- const DWARFDIE &die, lldb::TypeSP type_sp);
+ lldb::TypeSP UpdateSymbolContextScopeForType(
+ const lldb_private::SymbolContext &sc,
+ const lldb_private::plugin::dwarf::DWARFDIE &die, lldb::TypeSP type_sp);
/// Follow Clang Module Skeleton CU references to find a type definition.
- lldb::TypeSP ParseTypeFromClangModule(const lldb_private::SymbolContext &sc,
- const DWARFDIE &die,
- lldb_private::Log *log);
+ lldb::TypeSP
+ ParseTypeFromClangModule(const lldb_private::SymbolContext &sc,
+ const lldb_private::plugin::dwarf::DWARFDIE &die,
+ lldb_private::Log *log);
// Return true if this type is a declaration to a type in an external
// module.
- lldb::ModuleSP GetModuleForType(const DWARFDIE &die);
-
- void PrepareContextToReceiveMembers(clang::DeclContext *decl_ctx,
- const DWARFDIE &decl_ctx_die,
- const DWARFDIE &die,
- const char *type_name_cstr);
+ lldb::ModuleSP
+ GetModuleForType(const lldb_private::plugin::dwarf::DWARFDIE &die);
static bool classof(const DWARFASTParser *Parser) {
return Parser->GetKind() == Kind::DWARFASTParserClang;
@@ -265,8 +274,10 @@ private:
/// Parsed form of all attributes that are relevant for parsing type members.
struct MemberAttributes {
- explicit MemberAttributes(const DWARFDIE &die, const DWARFDIE &parent_die,
- lldb::ModuleSP module_sp);
+ explicit MemberAttributes(
+ const lldb_private::plugin::dwarf::DWARFDIE &die,
+ const lldb_private::plugin::dwarf::DWARFDIE &parent_die,
+ lldb::ModuleSP module_sp);
const char *name = nullptr;
/// Indicates how many bits into the word (according to the host endianness)
/// the low-order bit of the field starts. Can be negative.
@@ -313,12 +324,15 @@ private:
/// created property.
/// \param delayed_properties The list of delayed properties that the result
/// will be appended to.
- void ParseObjCProperty(const DWARFDIE &die, const DWARFDIE &parent_die,
- const lldb_private::CompilerType &class_clang_type,
- DelayedPropertyList &delayed_properties);
+ void
+ ParseObjCProperty(const lldb_private::plugin::dwarf::DWARFDIE &die,
+ const lldb_private::plugin::dwarf::DWARFDIE &parent_die,
+ const lldb_private::CompilerType &class_clang_type,
+ DelayedPropertyList &delayed_properties);
void
- ParseSingleMember(const DWARFDIE &die, const DWARFDIE &parent_die,
+ ParseSingleMember(const lldb_private::plugin::dwarf::DWARFDIE &die,
+ const lldb_private::plugin::dwarf::DWARFDIE &parent_die,
const lldb_private::CompilerType &class_clang_type,
lldb::AccessType default_accessibility,
lldb_private::ClangASTImporter::LayoutInfo &layout_info,
@@ -336,25 +350,31 @@ private:
/// \param[in] class_clang_type The parent RecordType of the static
/// member this function will create.
void CreateStaticMemberVariable(
- const DWARFDIE &die, const MemberAttributes &attrs,
+ const lldb_private::plugin::dwarf::DWARFDIE &die,
+ const MemberAttributes &attrs,
const lldb_private::CompilerType &class_clang_type);
- bool CompleteRecordType(const DWARFDIE &die, lldb_private::Type *type,
+ bool CompleteRecordType(const lldb_private::plugin::dwarf::DWARFDIE &die,
+ lldb_private::Type *type,
lldb_private::CompilerType &clang_type);
- bool CompleteEnumType(const DWARFDIE &die, lldb_private::Type *type,
+ bool CompleteEnumType(const lldb_private::plugin::dwarf::DWARFDIE &die,
+ lldb_private::Type *type,
lldb_private::CompilerType &clang_type);
- lldb::TypeSP ParseTypeModifier(const lldb_private::SymbolContext &sc,
- const DWARFDIE &die,
- ParsedDWARFTypeAttributes &attrs);
+ lldb::TypeSP
+ ParseTypeModifier(const lldb_private::SymbolContext &sc,
+ const lldb_private::plugin::dwarf::DWARFDIE &die,
+ ParsedDWARFTypeAttributes &attrs);
lldb::TypeSP ParseEnum(const lldb_private::SymbolContext &sc,
- const DWARFDIE &die, ParsedDWARFTypeAttributes &attrs);
- lldb::TypeSP ParseSubroutine(const DWARFDIE &die,
+ const lldb_private::plugin::dwarf::DWARFDIE &die,
+ ParsedDWARFTypeAttributes &attrs);
+ lldb::TypeSP ParseSubroutine(const lldb_private::plugin::dwarf::DWARFDIE &die,
const ParsedDWARFTypeAttributes &attrs);
- lldb::TypeSP ParseArrayType(const DWARFDIE &die,
+ lldb::TypeSP ParseArrayType(const lldb_private::plugin::dwarf::DWARFDIE &die,
const ParsedDWARFTypeAttributes &attrs);
- lldb::TypeSP ParsePointerToMemberType(const DWARFDIE &die,
- const ParsedDWARFTypeAttributes &attrs);
+ lldb::TypeSP
+ ParsePointerToMemberType(const lldb_private::plugin::dwarf::DWARFDIE &die,
+ const ParsedDWARFTypeAttributes &attrs);
/// Parses a DW_TAG_inheritance DIE into a base/super class.
///
@@ -371,7 +391,8 @@ private:
/// \param layout_info The layout information that will be updated for C++
/// base classes with the base offset.
void ParseInheritance(
- const DWARFDIE &die, const DWARFDIE &parent_die,
+ const lldb_private::plugin::dwarf::DWARFDIE &die,
+ const lldb_private::plugin::dwarf::DWARFDIE &parent_die,
const lldb_private::CompilerType class_clang_type,
const lldb::AccessType default_accessibility,
const lldb::ModuleSP &module_sp,
@@ -388,7 +409,8 @@ private:
/// \param layout_info The layout information that will be updated for
// base classes with the base offset
void
- ParseRustVariantPart(DWARFDIE &die, const DWARFDIE &parent_die,
+ ParseRustVariantPart(lldb_private::plugin::dwarf::DWARFDIE &die,
+ const lldb_private::plugin::dwarf::DWARFDIE &parent_die,
lldb_private::CompilerType &class_clang_type,
const lldb::AccessType default_accesibility,
lldb_private::ClangASTImporter::LayoutInfo &layout_info);
@@ -398,9 +420,8 @@ private:
/// Some attributes are relevant for all kinds of types (declaration), while
/// others are only meaningful to a specific type (is_virtual)
struct ParsedDWARFTypeAttributes {
- typedef lldb_private::plugin::dwarf::DWARFDIE DWARFDIE;
-
- explicit ParsedDWARFTypeAttributes(const DWARFDIE &die);
+ explicit ParsedDWARFTypeAttributes(
+ const lldb_private::plugin::dwarf::DWARFDIE &die);
lldb::AccessType accessibility = lldb::eAccessNone;
bool is_artificial = false;
@@ -417,7 +438,7 @@ struct ParsedDWARFTypeAttributes {
const char *mangled_name = nullptr;
lldb_private::ConstString name;
lldb_private::Declaration decl;
- DWARFDIE object_pointer;
+ lldb_private::plugin::dwarf::DWARFDIE object_pointer;
lldb_private::plugin::dwarf::DWARFFormValue abstract_origin;
lldb_private::plugin::dwarf::DWARFFormValue containing_type;
lldb_private::plugin::dwarf::DWARFFormValue signature;
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.cpp
index 6330470..90e42be 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.cpp
@@ -85,11 +85,6 @@ bool DebugNamesDWARFIndex::ProcessEntry(
DWARFDIE die = GetDIE(entry);
if (!die)
return true;
- // Clang erroneously emits index entries for declaration DIEs in case when the
- // definition is in a type unit (llvm.org/pr77696). Weed those out.
- if (die.IsStructUnionOrClass() &&
- die.GetAttributeValueAsUnsigned(DW_AT_declaration, 0))
- return true;
return callback(die);
}
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp
index 661e4a7..a52a7d6 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp
@@ -481,13 +481,6 @@ static ConstString GetDWARFMachOSegmentName() {
return g_dwarf_section_name;
}
-llvm::DenseMap<lldb::opaque_compiler_type_t, DIERef> &
-SymbolFileDWARF::GetForwardDeclCompilerTypeToDIE() {
- if (SymbolFileDWARFDebugMap *debug_map_symfile = GetDebugMapSymfile())
- return debug_map_symfile->GetForwardDeclCompilerTypeToDIE();
- return m_forward_decl_compiler_type_to_die;
-}
-
UniqueDWARFASTTypeMap &SymbolFileDWARF::GetUniqueDWARFASTTypeMap() {
SymbolFileDWARFDebugMap *debug_map_symfile = GetDebugMapSymfile();
if (debug_map_symfile)
@@ -1639,33 +1632,27 @@ bool SymbolFileDWARF::CompleteType(CompilerType &compiler_type) {
return true;
}
- // Once we start resolving this type, remove it from the forward
- // declaration map in case anyone's child members or other types require this
- // type to get resolved.
- DWARFDIE dwarf_die = GetDIE(die_it->second);
- GetForwardDeclCompilerTypeToDIE().erase(die_it);
- Type *type = nullptr;
- if (DWARFASTParser *dwarf_ast = GetDWARFParser(*dwarf_die.GetCU()))
- type = dwarf_ast->FindDefinitionTypeForDIE(dwarf_die);
- if (!type)
- return false;
-
- die_it = GetForwardDeclCompilerTypeToDIE().find(
- compiler_type_no_qualifiers.GetOpaqueQualType());
- if (die_it != GetForwardDeclCompilerTypeToDIE().end()) {
- dwarf_die = GetDIE(die_it->getSecond());
+ DWARFDIE dwarf_die = GetDIE(die_it->getSecond());
+ if (dwarf_die) {
+ // Once we start resolving this type, remove it from the forward
+ // declaration map in case anyone child members or other types require this
+ // type to get resolved. The type will get resolved when all of the calls
+ // to SymbolFileDWARF::ResolveClangOpaqueTypeDefinition are done.
GetForwardDeclCompilerTypeToDIE().erase(die_it);
- }
- if (Log *log = GetLog(DWARFLog::DebugInfo | DWARFLog::TypeCompletion))
- GetObjectFile()->GetModule()->LogMessageVerboseBacktrace(
- log, "{0:x8}: {1} ({2}) '{3}' resolving forward declaration...",
- dwarf_die.GetID(), DW_TAG_value_to_name(dwarf_die.Tag()),
- dwarf_die.Tag(), type->GetName().AsCString());
- assert(compiler_type);
- if (DWARFASTParser *dwarf_ast = GetDWARFParser(*dwarf_die.GetCU()))
- return dwarf_ast->CompleteTypeFromDWARF(dwarf_die, type, compiler_type);
- return true;
+ Type *type = GetDIEToType().lookup(dwarf_die.GetDIE());
+
+ Log *log = GetLog(DWARFLog::DebugInfo | DWARFLog::TypeCompletion);
+ if (log)
+ GetObjectFile()->GetModule()->LogMessageVerboseBacktrace(
+ log, "{0:x8}: {1} ({2}) '{3}' resolving forward declaration...",
+ dwarf_die.GetID(), DW_TAG_value_to_name(dwarf_die.Tag()),
+ dwarf_die.Tag(), type->GetName().AsCString());
+ assert(compiler_type);
+ if (DWARFASTParser *dwarf_ast = GetDWARFParser(*dwarf_die.GetCU()))
+ return dwarf_ast->CompleteTypeFromDWARF(dwarf_die, type, compiler_type);
+ }
+ return false;
}
Type *SymbolFileDWARF::ResolveType(const DWARFDIE &die,
@@ -2105,16 +2092,14 @@ SymbolFileDWARF::GlobalVariableMap &SymbolFileDWARF::GetGlobalAranges() {
if (var_sp && !var_sp->GetLocationIsConstantValueData()) {
const DWARFExpressionList &location =
var_sp->LocationExpressionList();
- Value location_result;
- Status error;
ExecutionContext exe_ctx;
- if (location.Evaluate(&exe_ctx, nullptr, LLDB_INVALID_ADDRESS,
- nullptr, nullptr, location_result,
- &error)) {
- if (location_result.GetValueType() ==
+ llvm::Expected<Value> location_result = location.Evaluate(
+ &exe_ctx, nullptr, LLDB_INVALID_ADDRESS, nullptr, nullptr);
+ if (location_result) {
+ if (location_result->GetValueType() ==
Value::ValueType::FileAddress) {
lldb::addr_t file_addr =
- location_result.GetScalar().ULongLong();
+ location_result->GetScalar().ULongLong();
lldb::addr_t byte_size = 1;
if (var_sp->GetType())
byte_size =
@@ -2122,6 +2107,10 @@ SymbolFileDWARF::GlobalVariableMap &SymbolFileDWARF::GetGlobalAranges() {
m_global_aranges_up->Append(GlobalVariableMap::Entry(
file_addr, byte_size, var_sp.get()));
}
+ } else {
+ LLDB_LOG_ERROR(GetLog(LLDBLog::Symbols),
+ location_result.takeError(),
+ "location expression failed to execute: {0}");
}
}
}
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h
index 35893f2..7282c08 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h
+++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h
@@ -335,8 +335,12 @@ public:
virtual DIEToTypePtr &GetDIEToType() { return m_die_to_type; }
- virtual llvm::DenseMap<lldb::opaque_compiler_type_t, DIERef> &
- GetForwardDeclCompilerTypeToDIE();
+ typedef llvm::DenseMap<lldb::opaque_compiler_type_t, DIERef>
+ CompilerTypeToDIE;
+
+ virtual CompilerTypeToDIE &GetForwardDeclCompilerTypeToDIE() {
+ return m_forward_decl_compiler_type_to_die;
+ }
typedef llvm::DenseMap<const DWARFDebugInfoEntry *, lldb::VariableSP>
DIEToVariableSP;
@@ -529,14 +533,9 @@ protected:
NameToOffsetMap m_function_scope_qualified_name_map;
std::unique_ptr<DWARFDebugRanges> m_ranges;
UniqueDWARFASTTypeMap m_unique_ast_type_map;
- // A map from DIE to lldb_private::Type. For record type, the key might be
- // either declaration DIE or definition DIE.
DIEToTypePtr m_die_to_type;
DIEToVariableSP m_die_to_variable_sp;
- // A map from CompilerType to the struct/class/union/enum DIE (might be a
- // declaration or a definition) that is used to construct it.
- llvm::DenseMap<lldb::opaque_compiler_type_t, DIERef>
- m_forward_decl_compiler_type_to_die;
+ CompilerTypeToDIE m_forward_decl_compiler_type_to_die;
llvm::DenseMap<dw_offset_t, std::unique_ptr<SupportFileList>>
m_type_unit_support_files;
std::vector<uint32_t> m_lldb_cu_to_dwarf_unit;
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.h b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.h
index d7d5719..de22dd6 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.h
+++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.h
@@ -284,11 +284,6 @@ protected:
lldb::TypeSP FindCompleteObjCDefinitionTypeForDIE(
const DWARFDIE &die, ConstString type_name, bool must_be_implementation);
- llvm::DenseMap<lldb::opaque_compiler_type_t, DIERef> &
- GetForwardDeclCompilerTypeToDIE() {
- return m_forward_decl_compiler_type_to_die;
- }
-
UniqueDWARFASTTypeMap &GetUniqueDWARFASTTypeMap() {
return m_unique_ast_type_map;
}
@@ -326,10 +321,6 @@ protected:
std::vector<uint32_t> m_func_indexes; // Sorted by address
std::vector<uint32_t> m_glob_indexes;
std::map<std::pair<ConstString, llvm::sys::TimePoint<>>, OSOInfoSP> m_oso_map;
- // A map from CompilerType to the struct/class/union/enum DIE (might be a
- // declaration or a definition) that is used to construct it.
- llvm::DenseMap<lldb::opaque_compiler_type_t, DIERef>
- m_forward_decl_compiler_type_to_die;
UniqueDWARFASTTypeMap m_unique_ast_type_map;
LazyBool m_supports_DW_AT_APPLE_objc_complete_type;
DebugMap m_debug_map;
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDwo.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDwo.cpp
index e4db39c..71c9997 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDwo.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDwo.cpp
@@ -110,7 +110,7 @@ SymbolFileDWARF::DIEToVariableSP &SymbolFileDWARFDwo::GetDIEToVariable() {
return GetBaseSymbolFile().GetDIEToVariable();
}
-llvm::DenseMap<lldb::opaque_compiler_type_t, DIERef> &
+SymbolFileDWARF::CompilerTypeToDIE &
SymbolFileDWARFDwo::GetForwardDeclCompilerTypeToDIE() {
return GetBaseSymbolFile().GetForwardDeclCompilerTypeToDIE();
}
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDwo.h b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDwo.h
index 2f0ac41..1500540 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDwo.h
+++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDwo.h
@@ -72,8 +72,7 @@ protected:
DIEToVariableSP &GetDIEToVariable() override;
- llvm::DenseMap<lldb::opaque_compiler_type_t, DIERef> &
- GetForwardDeclCompilerTypeToDIE() override;
+ CompilerTypeToDIE &GetForwardDeclCompilerTypeToDIE() override;
UniqueDWARFASTTypeMap &GetUniqueDWARFASTTypeMap() override;
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/UniqueDWARFASTType.cpp b/lldb/source/Plugins/SymbolFile/DWARF/UniqueDWARFASTType.cpp
index 3d201e9..223518f 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/UniqueDWARFASTType.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/UniqueDWARFASTType.cpp
@@ -13,75 +13,66 @@
using namespace lldb_private::dwarf;
using namespace lldb_private::plugin::dwarf;
-static bool IsStructOrClassTag(llvm::dwarf::Tag Tag) {
- return Tag == llvm::dwarf::Tag::DW_TAG_class_type ||
- Tag == llvm::dwarf::Tag::DW_TAG_structure_type;
-}
-
-UniqueDWARFASTType *UniqueDWARFASTTypeList::Find(
- const DWARFDIE &die, const lldb_private::Declaration &decl,
- const int32_t byte_size, bool is_forward_declaration) {
- for (UniqueDWARFASTType &udt : m_collection) {
+bool UniqueDWARFASTTypeList::Find(const DWARFDIE &die,
+ const lldb_private::Declaration &decl,
+ const int32_t byte_size,
+ UniqueDWARFASTType &entry) const {
+ for (const UniqueDWARFASTType &udt : m_collection) {
// Make sure the tags match
- if (udt.m_die.Tag() == die.Tag() || (IsStructOrClassTag(udt.m_die.Tag()) &&
- IsStructOrClassTag(die.Tag()))) {
- // If they are not both definition DIEs or both declaration DIEs, then
- // don't check for byte size and declaration location, because declaration
- // DIEs usually don't have those info.
- bool matching_size_declaration =
- udt.m_is_forward_declaration != is_forward_declaration
- ? true
- : (udt.m_byte_size < 0 || byte_size < 0 ||
- udt.m_byte_size == byte_size) &&
- udt.m_declaration == decl;
- if (!matching_size_declaration)
- continue;
- // The type has the same name, and was defined on the same file and
- // line. Now verify all of the parent DIEs match.
- DWARFDIE parent_arg_die = die.GetParent();
- DWARFDIE parent_pos_die = udt.m_die.GetParent();
- bool match = true;
- bool done = false;
- while (!done && match && parent_arg_die && parent_pos_die) {
- const dw_tag_t parent_arg_tag = parent_arg_die.Tag();
- const dw_tag_t parent_pos_tag = parent_pos_die.Tag();
- if (parent_arg_tag == parent_pos_tag ||
- (IsStructOrClassTag(parent_arg_tag) &&
- IsStructOrClassTag(parent_pos_tag))) {
- switch (parent_arg_tag) {
- case DW_TAG_class_type:
- case DW_TAG_structure_type:
- case DW_TAG_union_type:
- case DW_TAG_namespace: {
- const char *parent_arg_die_name = parent_arg_die.GetName();
- if (parent_arg_die_name == nullptr) {
- // Anonymous (i.e. no-name) struct
- match = false;
- } else {
- const char *parent_pos_die_name = parent_pos_die.GetName();
- if (parent_pos_die_name == nullptr ||
- ((parent_arg_die_name != parent_pos_die_name) &&
- strcmp(parent_arg_die_name, parent_pos_die_name)))
- match = false;
+ if (udt.m_die.Tag() == die.Tag()) {
+ // Validate byte sizes of both types only if both are valid.
+ if (udt.m_byte_size < 0 || byte_size < 0 ||
+ udt.m_byte_size == byte_size) {
+ // Make sure the file and line match
+ if (udt.m_declaration == decl) {
+ // The type has the same name, and was defined on the same file and
+ // line. Now verify all of the parent DIEs match.
+ DWARFDIE parent_arg_die = die.GetParent();
+ DWARFDIE parent_pos_die = udt.m_die.GetParent();
+ bool match = true;
+ bool done = false;
+ while (!done && match && parent_arg_die && parent_pos_die) {
+ const dw_tag_t parent_arg_tag = parent_arg_die.Tag();
+ const dw_tag_t parent_pos_tag = parent_pos_die.Tag();
+ if (parent_arg_tag == parent_pos_tag) {
+ switch (parent_arg_tag) {
+ case DW_TAG_class_type:
+ case DW_TAG_structure_type:
+ case DW_TAG_union_type:
+ case DW_TAG_namespace: {
+ const char *parent_arg_die_name = parent_arg_die.GetName();
+ if (parent_arg_die_name ==
+ nullptr) // Anonymous (i.e. no-name) struct
+ {
+ match = false;
+ } else {
+ const char *parent_pos_die_name = parent_pos_die.GetName();
+ if (parent_pos_die_name == nullptr ||
+ ((parent_arg_die_name != parent_pos_die_name) &&
+ strcmp(parent_arg_die_name, parent_pos_die_name)))
+ match = false;
+ }
+ } break;
+
+ case DW_TAG_compile_unit:
+ case DW_TAG_partial_unit:
+ done = true;
+ break;
+ default:
+ break;
+ }
}
- } break;
+ parent_arg_die = parent_arg_die.GetParent();
+ parent_pos_die = parent_pos_die.GetParent();
+ }
- case DW_TAG_compile_unit:
- case DW_TAG_partial_unit:
- done = true;
- break;
- default:
- break;
+ if (match) {
+ entry = udt;
+ return true;
}
}
- parent_arg_die = parent_arg_die.GetParent();
- parent_pos_die = parent_pos_die.GetParent();
- }
-
- if (match) {
- return &udt;
}
}
}
- return nullptr;
+ return false;
}
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/UniqueDWARFASTType.h b/lldb/source/Plugins/SymbolFile/DWARF/UniqueDWARFASTType.h
index 29e5c02..bf3cbae 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/UniqueDWARFASTType.h
+++ b/lldb/source/Plugins/SymbolFile/DWARF/UniqueDWARFASTType.h
@@ -23,19 +23,31 @@ public:
// Constructors and Destructors
UniqueDWARFASTType() : m_type_sp(), m_die(), m_declaration() {}
+ UniqueDWARFASTType(lldb::TypeSP &type_sp, const DWARFDIE &die,
+ const Declaration &decl, int32_t byte_size)
+ : m_type_sp(type_sp), m_die(die), m_declaration(decl),
+ m_byte_size(byte_size) {}
+
UniqueDWARFASTType(const UniqueDWARFASTType &rhs)
: m_type_sp(rhs.m_type_sp), m_die(rhs.m_die),
- m_declaration(rhs.m_declaration), m_byte_size(rhs.m_byte_size),
- m_is_forward_declaration(rhs.m_is_forward_declaration) {}
+ m_declaration(rhs.m_declaration), m_byte_size(rhs.m_byte_size) {}
~UniqueDWARFASTType() = default;
+ UniqueDWARFASTType &operator=(const UniqueDWARFASTType &rhs) {
+ if (this != &rhs) {
+ m_type_sp = rhs.m_type_sp;
+ m_die = rhs.m_die;
+ m_declaration = rhs.m_declaration;
+ m_byte_size = rhs.m_byte_size;
+ }
+ return *this;
+ }
+
lldb::TypeSP m_type_sp;
DWARFDIE m_die;
Declaration m_declaration;
int32_t m_byte_size = -1;
- // True if the m_die is a forward declaration DIE.
- bool m_is_forward_declaration = true;
};
class UniqueDWARFASTTypeList {
@@ -50,9 +62,8 @@ public:
m_collection.push_back(entry);
}
- UniqueDWARFASTType *Find(const DWARFDIE &die, const Declaration &decl,
- const int32_t byte_size,
- bool is_forward_declaration);
+ bool Find(const DWARFDIE &die, const Declaration &decl,
+ const int32_t byte_size, UniqueDWARFASTType &entry) const;
protected:
typedef std::vector<UniqueDWARFASTType> collection;
@@ -69,15 +80,14 @@ public:
m_collection[name.GetCString()].Append(entry);
}
- UniqueDWARFASTType *Find(ConstString name, const DWARFDIE &die,
- const Declaration &decl, const int32_t byte_size,
- bool is_forward_declaration) {
+ bool Find(ConstString name, const DWARFDIE &die, const Declaration &decl,
+ const int32_t byte_size, UniqueDWARFASTType &entry) const {
const char *unique_name_cstr = name.GetCString();
- collection::iterator pos = m_collection.find(unique_name_cstr);
+ collection::const_iterator pos = m_collection.find(unique_name_cstr);
if (pos != m_collection.end()) {
- return pos->second.Find(die, decl, byte_size, is_forward_declaration);
+ return pos->second.Find(die, decl, byte_size, entry);
}
- return nullptr;
+ return false;
}
protected:
diff --git a/lldb/source/Symbol/Function.cpp b/lldb/source/Symbol/Function.cpp
index 194f89b..96d8322 100644
--- a/lldb/source/Symbol/Function.cpp
+++ b/lldb/source/Symbol/Function.cpp
@@ -220,17 +220,18 @@ Function *IndirectCallEdge::GetCallee(ModuleList &images,
ExecutionContext &exe_ctx) {
Log *log = GetLog(LLDBLog::Step);
Status error;
- Value callee_addr_val;
- if (!call_target.Evaluate(
- &exe_ctx, exe_ctx.GetRegisterContext(), LLDB_INVALID_ADDRESS,
- /*initial_value_ptr=*/nullptr,
- /*object_address_ptr=*/nullptr, callee_addr_val, &error)) {
- LLDB_LOGF(log, "IndirectCallEdge: Could not evaluate expression: %s",
- error.AsCString());
+ llvm::Expected<Value> callee_addr_val = call_target.Evaluate(
+ &exe_ctx, exe_ctx.GetRegisterContext(), LLDB_INVALID_ADDRESS,
+ /*initial_value_ptr=*/nullptr,
+ /*object_address_ptr=*/nullptr);
+ if (!callee_addr_val) {
+ LLDB_LOG_ERROR(log, callee_addr_val.takeError(),
+ "IndirectCallEdge: Could not evaluate expression: {0}");
return nullptr;
}
- addr_t raw_addr = callee_addr_val.GetScalar().ULongLong(LLDB_INVALID_ADDRESS);
+ addr_t raw_addr =
+ callee_addr_val->GetScalar().ULongLong(LLDB_INVALID_ADDRESS);
if (raw_addr == LLDB_INVALID_ADDRESS) {
LLDB_LOG(log, "IndirectCallEdge: Could not extract address from scalar");
return nullptr;
diff --git a/lldb/source/Target/RegisterContextUnwind.cpp b/lldb/source/Target/RegisterContextUnwind.cpp
index e2d712c..95e8abd 100644
--- a/lldb/source/Target/RegisterContextUnwind.cpp
+++ b/lldb/source/Target/RegisterContextUnwind.cpp
@@ -1661,12 +1661,14 @@ RegisterContextUnwind::SavedLocationForRegister(
unwindplan_registerkind);
Value cfa_val = Scalar(m_cfa);
cfa_val.SetValueType(Value::ValueType::LoadAddress);
- Value result;
- Status error;
- if (dwarfexpr.Evaluate(&exe_ctx, this, 0, &cfa_val, nullptr, result,
- &error)) {
+ llvm::Expected<Value> result =
+ dwarfexpr.Evaluate(&exe_ctx, this, 0, &cfa_val, nullptr);
+ if (!result) {
+ LLDB_LOG_ERROR(log, result.takeError(),
+ "DWARF expression failed to evaluate: {0}");
+ } else {
addr_t val;
- val = result.GetScalar().ULongLong();
+ val = result->GetScalar().ULongLong();
if (unwindplan_regloc.IsDWARFExpression()) {
regloc.type = UnwindLLDB::RegisterLocation::eRegisterValueInferred;
regloc.location.inferred_value = val;
@@ -2029,11 +2031,10 @@ bool RegisterContextUnwind::ReadFrameAddress(
DWARFExpressionList dwarfexpr(opcode_ctx, dwarfdata, nullptr);
dwarfexpr.GetMutableExpressionAtAddress()->SetRegisterKind(
row_register_kind);
- Value result;
- Status error;
- if (dwarfexpr.Evaluate(&exe_ctx, this, 0, nullptr, nullptr, result,
- &error)) {
- address = result.GetScalar().ULongLong();
+ llvm::Expected<Value> result =
+ dwarfexpr.Evaluate(&exe_ctx, this, 0, nullptr, nullptr);
+ if (result) {
+ address = result->GetScalar().ULongLong();
if (ABISP abi_sp = m_thread.GetProcess()->GetABI())
address = abi_sp->FixCodeAddress(address);
@@ -2042,7 +2043,7 @@ bool RegisterContextUnwind::ReadFrameAddress(
return true;
}
UnwindLogMsg("Failed to set CFA value via DWARF expression: %s",
- error.AsCString());
+ llvm::toString(result.takeError()).c_str());
break;
}
case UnwindPlan::Row::FAValue::isRaSearch: {
diff --git a/lldb/source/Target/StackFrame.cpp b/lldb/source/Target/StackFrame.cpp
index 246871d..3a2b4d0 100644
--- a/lldb/source/Target/StackFrame.cpp
+++ b/lldb/source/Target/StackFrame.cpp
@@ -1091,24 +1091,19 @@ bool StackFrame::GetFrameBaseValue(Scalar &frame_base, Status *error_ptr) {
m_flags.Set(GOT_FRAME_BASE);
ExecutionContext exe_ctx(shared_from_this());
- Value expr_value;
addr_t loclist_base_addr = LLDB_INVALID_ADDRESS;
if (!m_sc.function->GetFrameBaseExpression().IsAlwaysValidSingleExpr())
loclist_base_addr =
m_sc.function->GetAddressRange().GetBaseAddress().GetLoadAddress(
exe_ctx.GetTargetPtr());
- if (!m_sc.function->GetFrameBaseExpression().Evaluate(
- &exe_ctx, nullptr, loclist_base_addr, nullptr, nullptr,
- expr_value, &m_frame_base_error)) {
- // We should really have an error if evaluate returns, but in case we
- // don't, lets set the error to something at least.
- if (m_frame_base_error.Success())
- m_frame_base_error.SetErrorString(
- "Evaluation of the frame base expression failed.");
- } else {
- m_frame_base = expr_value.ResolveValue(&exe_ctx);
- }
+ llvm::Expected<Value> expr_value =
+ m_sc.function->GetFrameBaseExpression().Evaluate(
+ &exe_ctx, nullptr, loclist_base_addr, nullptr, nullptr);
+ if (!expr_value)
+ m_frame_base_error = expr_value.takeError();
+ else
+ m_frame_base = expr_value->ResolveValue(&exe_ctx);
} else {
m_frame_base_error.SetErrorString("No function in symbol context.");
}
diff --git a/lldb/source/Target/Statistics.cpp b/lldb/source/Target/Statistics.cpp
index be08485..2a53000 100644
--- a/lldb/source/Target/Statistics.cpp
+++ b/lldb/source/Target/Statistics.cpp
@@ -355,14 +355,14 @@ llvm::json::Value DebuggerStats::ReportStatistics(
}
global_stats.try_emplace("targets", std::move(json_targets));
+ ConstStringStats const_string_stats;
+ json::Object json_memory{
+ {"strings", const_string_stats.ToJSON()},
+ };
+ global_stats.try_emplace("memory", std::move(json_memory));
if (!summary_only) {
- ConstStringStats const_string_stats;
- json::Object json_memory{
- {"strings", const_string_stats.ToJSON()},
- };
json::Value cmd_stats = debugger.GetCommandInterpreter().GetStatistics();
global_stats.try_emplace("modules", std::move(json_modules));
- global_stats.try_emplace("memory", std::move(json_memory));
global_stats.try_emplace("commands", std::move(cmd_stats));
}
diff --git a/lldb/source/Target/ThreadPlanStepOverRange.cpp b/lldb/source/Target/ThreadPlanStepOverRange.cpp
index 84f282f..3fe02e0 100644
--- a/lldb/source/Target/ThreadPlanStepOverRange.cpp
+++ b/lldb/source/Target/ThreadPlanStepOverRange.cpp
@@ -355,7 +355,7 @@ bool ThreadPlanStepOverRange::DoPlanExplainsStop(Event *event_ptr) {
return_value = NextRangeBreakpointExplainsStop(stop_info_sp);
} else {
if (log)
- log->PutCString("ThreadPlanStepInRange got asked if it explains the "
+ log->PutCString("ThreadPlanStepOverRange got asked if it explains the "
"stop for some reason other than step.");
return_value = false;
}
diff --git a/lldb/test/API/functionalities/param_entry_vals/basic_entry_values/main.cpp b/lldb/test/API/functionalities/param_entry_vals/basic_entry_values/main.cpp
index 91769e8..7ad72b4 100644
--- a/lldb/test/API/functionalities/param_entry_vals/basic_entry_values/main.cpp
+++ b/lldb/test/API/functionalities/param_entry_vals/basic_entry_values/main.cpp
@@ -52,9 +52,10 @@ __attribute__((noinline)) void func4_amb(int &sink, int x) {
//% expect_cmd_failure=True)
//% self.filecheck("expr sink", "main.cpp","-check-prefix=FUNC4-EXPR",
//% expect_cmd_failure=True)
- // FUNC4-EXPR-FAIL: couldn't get the value of variable x: Could not evaluate
- // DW_OP_entry_value. FUNC4-EXPR: couldn't get the value of variable sink:
- // Could not evaluate DW_OP_entry_value.
+ // clang-format off
+ // FUNC4-EXPR-FAIL: couldn't get the value of variable x: could not evaluate DW_OP_entry_value: no matching call site param found
+ // FUNC4-EXPR: couldn't get the value of variable sink: could not evaluate DW_OP_entry_value: no matching call site param found
+ // clang-format on
}
__attribute__((noinline)) void func5_amb() {}
diff --git a/lldb/test/API/lang/c/inlines/Makefile b/lldb/test/API/lang/c/inlines/Makefile
new file mode 100644
index 0000000..f9555f9
--- /dev/null
+++ b/lldb/test/API/lang/c/inlines/Makefile
@@ -0,0 +1,3 @@
+C_SOURCES := main.c
+
+include Makefile.rules
diff --git a/lldb/test/API/lang/c/inlines/TestRedefinitionsInInlines.py b/lldb/test/API/lang/c/inlines/TestRedefinitionsInInlines.py
index 024b9da..062fd88 100644
--- a/lldb/test/API/lang/c/inlines/TestRedefinitionsInInlines.py
+++ b/lldb/test/API/lang/c/inlines/TestRedefinitionsInInlines.py
@@ -1,14 +1,60 @@
-from lldbsuite.test import lldbinline
-from lldbsuite.test import decorators
-
-lldbinline.MakeInlineTest(
- __file__,
- globals(),
- [
- decorators.expectedFailureAll(
- compiler="clang",
- compiler_version=["<", "3.5"],
- bugnumber="llvm.org/pr27845",
+"""Test that inlined argument variables have their correct location in debuginfo"""
+
+import lldb
+from lldbsuite.test.decorators import *
+from lldbsuite.test.lldbtest import *
+from lldbsuite.test import lldbutil
+
+
+class TestRedefinitionsInInlines(TestBase):
+ # https://github.com/llvm/llvm-project/issues/28219
+ @skipIf(compiler="clang", compiler_version=["<", "3.5"])
+ def test(self):
+ self.source = "main.c"
+ self.build()
+ (target, process, thread, bp1) = lldbutil.run_to_source_breakpoint(
+ self, "first breakpoint", lldb.SBFileSpec(self.source, False)
+ )
+
+ bp2 = target.BreakpointCreateBySourceRegex(
+ "second breakpoint", lldb.SBFileSpec(self.source, False)
+ )
+ bp3 = target.BreakpointCreateBySourceRegex(
+ "third breakpoint", lldb.SBFileSpec(self.source, False)
)
- ],
-)
+
+ # When called from main(), test2 is passed in the value of 42 in 'b'
+ self.expect("expression b", DATA_TYPES_DISPLAYED_CORRECTLY, substrs=["42"])
+
+ process.Continue()
+
+ self.assertState(process.GetState(), lldb.eStateStopped)
+ thread = lldbutil.get_stopped_thread(process, lldb.eStopReasonBreakpoint)
+ self.assertIsNotNone(thread)
+ bp_id = thread.GetStopReasonDataAtIndex(0)
+ self.assertEqual(bp_id, bp2.GetID())
+
+ self.expect("expression b", DATA_TYPES_DISPLAYED_CORRECTLY, substrs=["42"])
+ self.expect("expression c", DATA_TYPES_DISPLAYED_CORRECTLY, substrs=["84"])
+
+ process.Continue()
+
+ # Now we're in test1(), and the first thing it does is call test2(24). "Step in"
+ # and check that we have the value 24 as the argument.
+ self.assertState(process.GetState(), lldb.eStateStopped)
+ thread = lldbutil.get_stopped_thread(process, lldb.eStopReasonBreakpoint)
+ self.assertIsNotNone(thread)
+ bp_id = thread.GetStopReasonDataAtIndex(0)
+ self.assertEqual(bp_id, bp3.GetID())
+
+ frame = thread.GetFrameAtIndex(0)
+ self.assertTrue(frame.IsInlined())
+ self.assertEqual(frame.GetFunctionName(), "test1")
+
+ thread.StepInto()
+
+ frame = thread.GetFrameAtIndex(0)
+ self.assertTrue(frame.IsInlined())
+ self.assertEqual(frame.GetFunctionName(), "test2")
+
+ self.expect("expression b", DATA_TYPES_DISPLAYED_CORRECTLY, substrs=["24"])
diff --git a/lldb/test/API/lang/c/inlines/main.c b/lldb/test/API/lang/c/inlines/main.c
index 8fe4918..6ecc04d 100644
--- a/lldb/test/API/lang/c/inlines/main.c
+++ b/lldb/test/API/lang/c/inlines/main.c
@@ -3,23 +3,22 @@
inline void test1(int) __attribute__ ((always_inline));
inline void test2(int) __attribute__ ((always_inline));
+// Called once from main with b==42 then called from test1 with b==24.
void test2(int b) {
- printf("test2(%d)\n", b); //% self.expect("expression b", DATA_TYPES_DISPLAYED_CORRECTLY, substrs = ["42"])
- {
- int c = b * 2;
- printf("c=%d\n", c); //% self.expect("expression b", DATA_TYPES_DISPLAYED_CORRECTLY, substrs = ["42"])
- //% self.expect("expression c", DATA_TYPES_DISPLAYED_CORRECTLY, substrs = ["84"])
- }
+ printf("test2(%d)\n", b); // first breakpoint
+ {
+ int c = b * 2;
+ printf("c=%d\n", c); // second breakpoint
+ }
}
void test1(int a) {
printf("test1(%d)\n", a);
- test2(a+1);//% self.runCmd("step")
- //% self.expect("expression b", DATA_TYPES_DISPLAYED_CORRECTLY, substrs = ["24"])
+ test2(a + 1); // third breakpoint
}
-int main() {
- test2(42);
- test1(23);
- return 0;
+int main(int argc) {
+ test2(42);
+ test1(23);
+ return 0;
}
diff --git a/lldb/test/API/tools/lldb-dap/optimized/TestDAP_optimized.py b/lldb/test/API/tools/lldb-dap/optimized/TestDAP_optimized.py
index dc7f4f9..42a95de 100644
--- a/lldb/test/API/tools/lldb-dap/optimized/TestDAP_optimized.py
+++ b/lldb/test/API/tools/lldb-dap/optimized/TestDAP_optimized.py
@@ -47,6 +47,6 @@ class TestDAP_optimized(lldbdap_testcase.DAPTestCaseBase):
self.assertTrue(optimized_variable["value"].startswith("<error:"))
error_msg = optimized_variable["$__lldb_extensions"]["error"]
self.assertTrue(
- ("Could not evaluate DW_OP_entry_value" in error_msg)
+ ("could not evaluate DW_OP_entry_value: no parent function" in error_msg)
or ("variable not available" in error_msg)
)
diff --git a/lldb/test/API/tools/lldb-server/TestPtyServer.py b/lldb/test/API/tools/lldb-server/TestPtyServer.py
index 4bfcf70..345f68f 100644
--- a/lldb/test/API/tools/lldb-server/TestPtyServer.py
+++ b/lldb/test/API/tools/lldb-server/TestPtyServer.py
@@ -7,6 +7,7 @@ from lldbgdbserverutils import *
import xml.etree.ElementTree as ET
+@skipIfRemote
@skipIf(hostoslist=["windows"])
class PtyServerTestCase(gdbremote_testcase.GdbRemoteTestCaseBase):
def setUp(self):
diff --git a/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/app_specific_backtrace_crashlog.test b/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/app_specific_backtrace_crashlog.test
index c57cefd..9c0510c 100644
--- a/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/app_specific_backtrace_crashlog.test
+++ b/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/app_specific_backtrace_crashlog.test
@@ -1,9 +1,9 @@
-# REQUIRES: python, native && target-aarch64 && system-darwin
+# REQUIRES: python, native && system-darwin
# RUN: mkdir -p %t.dir
# RUN: yaml2obj %S/Inputs/application_specific_info/asi.yaml > %t.dir/asi
# RUN: %lldb -o 'command script import lldb.macosx.crashlog' \
-# RUN: -o 'crashlog -a -i -t %t.dir/asi %S/Inputs/application_specific_info/asi.txt' \
+# RUN: -o 'crashlog -i -t %t.dir/asi %S/Inputs/application_specific_info/asi.txt' \
# RUN: -o "thread list" -o "bt all" 2>&1 | FileCheck %s
# CHECK: "crashlog" {{.*}} commands have been installed, use the "--help" options on these commands
diff --git a/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/interactive_crashlog_invalid_target.test b/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/interactive_crashlog_invalid_target.test
index abd1e7c..eb1f5f4 100644
--- a/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/interactive_crashlog_invalid_target.test
+++ b/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/interactive_crashlog_invalid_target.test
@@ -1,4 +1,4 @@
-# REQUIRES: python, native && target-aarch64 && system-darwin
+# REQUIRES: python, native && system-darwin
# RUN: %lldb -o 'command script import lldb.macosx.crashlog' \
# RUN: -o 'crashlog -V' \
diff --git a/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/interactive_crashlog_json.test b/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/interactive_crashlog_json.test
index fccd71c..684be28 100644
--- a/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/interactive_crashlog_json.test
+++ b/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/interactive_crashlog_json.test
@@ -1,4 +1,4 @@
-# REQUIRES: python, native && target-aarch64 && system-darwin
+# REQUIRES: python, native && system-darwin
# RUN: mkdir -p %t.dir
# RUN: yaml2obj %S/Inputs/interactive_crashlog/multithread-test.yaml > %t.dir/multithread-test
diff --git a/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/interactive_crashlog_legacy.test b/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/interactive_crashlog_legacy.test
index 6e2826e..271a4c2 100644
--- a/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/interactive_crashlog_legacy.test
+++ b/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/interactive_crashlog_legacy.test
@@ -1,4 +1,4 @@
-# REQUIRES: python, native && target-aarch64 && system-darwin
+# REQUIRES: python, native && system-darwin
# RUN: mkdir -p %t.dir
# RUN: yaml2obj %S/Inputs/interactive_crashlog/multithread-test.yaml > %t.dir/multithread-test
diff --git a/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/last_exception_backtrace_crashlog.test b/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/last_exception_backtrace_crashlog.test
index c2f6196..a17b7ac 100644
--- a/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/last_exception_backtrace_crashlog.test
+++ b/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/last_exception_backtrace_crashlog.test
@@ -1,9 +1,9 @@
-# REQUIRES: python, native && target-aarch64 && system-darwin
+# REQUIRES: python, native && system-darwin
# RUN: mkdir -p %t.dir
# RUN: yaml2obj %S/Inputs/application_specific_info/asi.yaml > %t.dir/asi
# RUN: %lldb -o 'command script import lldb.macosx.crashlog' \
-# RUN: -o 'crashlog -a -i -t %t.dir/asi %S/Inputs/application_specific_info/leb.txt' \
+# RUN: -o 'crashlog -i -t %t.dir/asi %S/Inputs/application_specific_info/leb.txt' \
# RUN: -o "thread list" -o "bt all" 2>&1 | FileCheck %s
# CHECK: "crashlog" {{.*}} commands have been installed, use the "--help" options on these commands
diff --git a/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/lit.local.cfg b/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/lit.local.cfg
index 4170696..b72b294 100644
--- a/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/lit.local.cfg
+++ b/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/lit.local.cfg
@@ -3,3 +3,8 @@ if 'system-darwin' not in config.available_features:
if 'lldb-repro' in config.available_features:
config.unsupported = True
+
+config.environment["LLDB_APPLE_DSYMFORUUID_EXECUTABLE"] = ""
+
+# Temporary parallel image loading deadlock workaround
+config.environment["NO_PARALLEL_IMG_LOADING"] = ""
diff --git a/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/skipped_status_interactive_crashlog.test b/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/skipped_status_interactive_crashlog.test
index 81e0686..64cd090 100644
--- a/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/skipped_status_interactive_crashlog.test
+++ b/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/skipped_status_interactive_crashlog.test
@@ -1,4 +1,4 @@
-# REQUIRES: python, native && target-aarch64 && system-darwin
+# REQUIRES: python, native && system-darwin
# RUN: mkdir -p %t.dir
# RUN: yaml2obj %S/Inputs/interactive_crashlog/multithread-test.yaml > %t.dir/multithread-test
diff --git a/lldb/test/Shell/SymbolFile/DWARF/delayed-definition-die-searching.test b/lldb/test/Shell/SymbolFile/DWARF/delayed-definition-die-searching.test
deleted file mode 100644
index d253981..0000000
--- a/lldb/test/Shell/SymbolFile/DWARF/delayed-definition-die-searching.test
+++ /dev/null
@@ -1,36 +0,0 @@
-# Test definition DIE searching is delayed until complete type is required.
-
-# UNSUPPORTED: system-windows
-
-# RUN: split-file %s %t
-# RUN: %clangxx_host %t/main.cpp %t/t1_def.cpp -gdwarf -o %t.out
-# RUN: %lldb -b %t.out -s %t/lldb.cmd | FileCheck %s
-
-# CHECK: (lldb) p v1
-# CHECK: DWARFASTParserClang::ParseTypeFromDWARF{{.*}}DW_TAG_structure_type (DW_TAG_structure_type) name = 't2<t1>'
-# CHECK: DWARFASTParserClang::ParseTypeFromDWARF{{.*}}DW_TAG_structure_type (DW_TAG_structure_type) name = 't1'
-# CHECK: DW_TAG_structure_type (DW_TAG_structure_type) 't2<t1>' resolving forward declaration...
-# CHECK: (t2<t1>) {}
-# CHECK: (lldb) p v2
-# CHECK: DWARFASTParserClang::ParseTypeFromDWARF{{.*}}DW_TAG_structure_type (DW_TAG_structure_type) name = 't1'
-# CHECK: DW_TAG_structure_type (DW_TAG_structure_type) 't1' resolving forward declaration...
-
-#--- lldb.cmd
-log enable dwarf comp
-p v1
-p v2
-
-#--- main.cpp
-template<typename T>
-struct t2 {
-};
-struct t1;
-t2<t1> v1; // this CU doesn't have definition DIE for t1, but only declaration DIE for it.
-int main() {
-}
-
-#--- t1_def.cpp
-struct t1 { // this CU contains definition DIE for t1.
- int x;
-};
-t1 v2;
diff --git a/lldb/test/Shell/SymbolFile/DWARF/x86/simple-template-names-context.cpp b/lldb/test/Shell/SymbolFile/DWARF/x86/simple-template-names-context.cpp
new file mode 100644
index 0000000..a8a4d3b
--- /dev/null
+++ b/lldb/test/Shell/SymbolFile/DWARF/x86/simple-template-names-context.cpp
@@ -0,0 +1,44 @@
+// Test that we can correctly resolve forward declared types when they only
+// differ in the template arguments of the surrounding context. The reproducer
+// is sensitive to the order of declarations, so we test in both directions.
+
+// REQUIRES: lld
+
+// RUN: %clang --target=x86_64-pc-linux -c %s -o %t-a.o -g -gsimple-template-names -DFILE_A
+// RUN: %clang --target=x86_64-pc-linux -c %s -o %t-b.o -g -gsimple-template-names -DFILE_B
+// RUN: ld.lld %t-a.o %t-b.o -o %t
+// RUN: %lldb %t -o "target variable --ptr-depth 1 --show-types both_a both_b" -o exit | FileCheck %s
+
+// CHECK: (lldb) target variable
+// CHECK-NEXT: (ReferencesBoth<'A'>) both_a = {
+// CHECK-NEXT: (Outer<'A'>::Inner *) a = 0x{{[0-9A-Fa-f]*}} {}
+// CHECK-NEXT: (Outer<'A'>::Inner *) b = 0x{{[0-9A-Fa-f]*}} {}
+// CHECK-NEXT: }
+// CHECK-NEXT: (ReferencesBoth<'B'>) both_b = {
+// CHECK-NEXT: (Outer<'A'>::Inner *) a = 0x{{[0-9A-Fa-f]*}} {}
+// CHECK-NEXT: (Outer<'B'>::Inner *) b = 0x{{[0-9A-Fa-f]*}} {}
+// CHECK-NEXT: }
+
+template<char C>
+struct Outer {
+ struct Inner {};
+};
+
+template<char C>
+struct ReferencesBoth {
+ Outer<'A'>::Inner *a;
+ Outer<'B'>::Inner *b;
+};
+
+#ifdef FILE_A
+Outer<'A'>::Inner inner_a;
+extern Outer<'B'>::Inner inner_b;
+
+ReferencesBoth<'A'> both_a{&inner_a, &inner_b};
+
+#else
+extern Outer<'A'>::Inner inner_a;
+Outer<'B'>::Inner inner_b;
+
+ReferencesBoth<'B'> both_b{&inner_a, &inner_b};
+#endif
diff --git a/lldb/unittests/Expression/DWARFExpressionTest.cpp b/lldb/unittests/Expression/DWARFExpressionTest.cpp
index 602bd19..f9e0605 100644
--- a/lldb/unittests/Expression/DWARFExpressionTest.cpp
+++ b/lldb/unittests/Expression/DWARFExpressionTest.cpp
@@ -33,23 +33,23 @@ static llvm::Expected<Scalar> Evaluate(llvm::ArrayRef<uint8_t> expr,
ExecutionContext *exe_ctx = nullptr) {
DataExtractor extractor(expr.data(), expr.size(), lldb::eByteOrderLittle,
/*addr_size*/ 4);
- Value result;
- Status status;
- if (!DWARFExpression::Evaluate(exe_ctx, /*reg_ctx*/ nullptr, module_sp,
- extractor, unit, lldb::eRegisterKindLLDB,
- /*initial_value_ptr*/ nullptr,
- /*object_address_ptr*/ nullptr, result,
- &status))
- return status.ToError();
-
- switch (result.GetValueType()) {
+
+ llvm::Expected<Value> result =
+ DWARFExpression::Evaluate(exe_ctx, /*reg_ctx*/ nullptr, module_sp,
+ extractor, unit, lldb::eRegisterKindLLDB,
+ /*initial_value_ptr*/ nullptr,
+ /*object_address_ptr*/ nullptr);
+ if (!result)
+ return result.takeError();
+
+ switch (result->GetValueType()) {
case Value::ValueType::Scalar:
- return result.GetScalar();
+ return result->GetScalar();
case Value::ValueType::LoadAddress:
return LLDB_INVALID_ADDRESS;
case Value::ValueType::HostAddress: {
// Convert small buffers to scalars to simplify the tests.
- DataBufferHeap &buf = result.GetBuffer();
+ DataBufferHeap &buf = result->GetBuffer();
if (buf.GetByteSize() <= 8) {
uint64_t val = 0;
memcpy(&val, buf.GetBytes(), buf.GetByteSize());
@@ -58,8 +58,9 @@ static llvm::Expected<Scalar> Evaluate(llvm::ArrayRef<uint8_t> expr,
}
[[fallthrough]];
default:
- return status.ToError();
+ break;
}
+ return llvm::createStringError("unsupported value type");
}
class DWARFExpressionTester : public YAMLModuleTester {
@@ -454,16 +455,15 @@ TEST_F(DWARFExpressionMockProcessTest, WASM_DW_OP_addr) {
uint8_t expr[] = {DW_OP_addr, 0x40, 0x0, 0x0, 0x0};
DataExtractor extractor(expr, sizeof(expr), lldb::eByteOrderLittle,
/*addr_size*/ 4);
- Value result;
- Status status;
- ASSERT_TRUE(DWARFExpression::Evaluate(
+
+ llvm::Expected<Value> result = DWARFExpression::Evaluate(
&exe_ctx, /*reg_ctx*/ nullptr, /*module_sp*/ {}, extractor,
/*unit*/ nullptr, lldb::eRegisterKindLLDB,
/*initial_value_ptr*/ nullptr,
- /*object_address_ptr*/ nullptr, result, &status))
- << status.ToError();
+ /*object_address_ptr*/ nullptr);
- ASSERT_EQ(result.GetValueType(), Value::ValueType::LoadAddress);
+ ASSERT_THAT_EXPECTED(result, llvm::Succeeded());
+ ASSERT_EQ(result->GetValueType(), Value::ValueType::LoadAddress);
}
TEST_F(DWARFExpressionMockProcessTest, WASM_DW_OP_addr_index) {
@@ -530,14 +530,14 @@ DWARF:
ExecutionContext exe_ctx(target_sp, false);
- auto evaluate = [&](DWARFExpression &expr, Status &status, Value &result) {
+ auto evaluate = [&](DWARFExpression &expr) -> llvm::Expected<Value> {
DataExtractor extractor;
expr.GetExpressionData(extractor);
- return DWARFExpression::Evaluate(
- &exe_ctx, /*reg_ctx*/ nullptr, /*module_sp*/ {}, extractor, dwarf_cu,
- lldb::eRegisterKindLLDB,
- /*initial_value_ptr*/ nullptr,
- /*object_address_ptr*/ nullptr, result, &status);
+ return DWARFExpression::Evaluate(&exe_ctx, /*reg_ctx*/ nullptr,
+ /*module_sp*/ {}, extractor, dwarf_cu,
+ lldb::eRegisterKindLLDB,
+ /*initial_value_ptr*/ nullptr,
+ /*object_address_ptr*/ nullptr);
};
// DW_OP_addrx takes a single leb128 operand, the index in the addr table:
@@ -546,16 +546,16 @@ DWARF:
/*addr_size*/ 4);
DWARFExpression expr(extractor);
- Status status;
- Value result;
- ASSERT_TRUE(evaluate(expr, status, result)) << status.ToError();
- ASSERT_EQ(result.GetValueType(), Value::ValueType::LoadAddress);
- ASSERT_EQ(result.GetScalar().UInt(), 0x5678u);
+ llvm::Expected<Value> result = evaluate(expr);
+ ASSERT_THAT_EXPECTED(result, llvm::Succeeded());
+ ASSERT_EQ(result->GetValueType(), Value::ValueType::LoadAddress);
+ ASSERT_EQ(result->GetScalar().UInt(), 0x5678u);
ASSERT_TRUE(expr.Update_DW_OP_addr(dwarf_cu, 0xdeadbeef));
- ASSERT_TRUE(evaluate(expr, status, result)) << status.ToError();
- ASSERT_EQ(result.GetValueType(), Value::ValueType::LoadAddress);
- ASSERT_EQ(result.GetScalar().UInt(), 0xdeadbeefu);
+ result = evaluate(expr);
+ ASSERT_THAT_EXPECTED(result, llvm::Succeeded());
+ ASSERT_EQ(result->GetValueType(), Value::ValueType::LoadAddress);
+ ASSERT_EQ(result->GetScalar().UInt(), 0xdeadbeefu);
}
class CustomSymbolFileDWARF : public SymbolFileDWARF {
@@ -825,15 +825,13 @@ TEST_F(DWARFExpressionMockProcessTest, DW_OP_piece_file_addr) {
DW_OP_addr, 0x50, 0x0, 0x0, 0x0, DW_OP_piece, 1};
DataExtractor extractor(expr, sizeof(expr), lldb::eByteOrderLittle,
/*addr_size*/ 4);
- Value result;
- Status status;
- ASSERT_TRUE(DWARFExpression::Evaluate(
+ llvm::Expected<Value> result = DWARFExpression::Evaluate(
&exe_ctx, /*reg_ctx*/ nullptr, /*module_sp*/ {}, extractor,
/*unit*/ nullptr, lldb::eRegisterKindLLDB,
/*initial_value_ptr*/ nullptr,
- /*object_address_ptr*/ nullptr, result, &status))
- << status.ToError();
+ /*object_address_ptr*/ nullptr);
- ASSERT_EQ(result.GetValueType(), Value::ValueType::HostAddress);
- ASSERT_THAT(result.GetBuffer().GetData(), ElementsAre(0x11, 0x22));
+ ASSERT_THAT_EXPECTED(result, llvm::Succeeded());
+ ASSERT_EQ(result->GetValueType(), Value::ValueType::HostAddress);
+ ASSERT_THAT(result->GetBuffer().GetData(), ElementsAre(0x11, 0x22));
}
diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst
index bb67510..aa50ce3 100644
--- a/llvm/docs/AMDGPUUsage.rst
+++ b/llvm/docs/AMDGPUUsage.rst
@@ -504,6 +504,13 @@ Every processor supports every OS ABI (see :ref:`amdgpu-os`) with the following
work-item Add product
IDs names.
+ ``gfx1152`` ``amdgcn`` APU - cumode - Architected *TBA*
+ - wavefrontsize64 flat
+ scratch .. TODO::
+ - Packed
+ work-item Add product
+ IDs names.
+
``gfx1200`` ``amdgcn`` dGPU - cumode - Architected *TBA*
- wavefrontsize64 flat
scratch .. TODO::
@@ -591,11 +598,13 @@ Generic processor code objects are versioned. See :ref:`amdgpu-generic-processor
- ``gfx1102`` - Packed hazards specific to some targets
- ``gfx1103`` work-item within this family.
- ``gfx1150`` IDs
- - ``gfx1151`` Not all VGPRs can be used on:
+ - ``gfx1151``
+ - ``gfx1152`` Not all VGPRs can be used on:
- ``gfx1100``
- ``gfx1101``
- ``gfx1151``
+ - ``gfx1152``
SALU floating point instructions
and single-use VGPR hint
@@ -604,12 +613,14 @@ Generic processor code objects are versioned. See :ref:`amdgpu-generic-processor
- ``gfx1150``
- ``gfx1151``
+ - ``gfx1152``
SGPRs are not supported for src1
in dpp instructions for:
- ``gfx1150``
- ``gfx1151``
+ - ``gfx1152``
``gfx12-generic`` ``amdgcn`` - ``gfx1200`` - wavefrontsize64 - Architected No restrictions.
@@ -1979,7 +1990,7 @@ The AMDGPU backend uses the following ELF header:
``EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC`` 0x052 ``gfx10-1-generic``
``EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC`` 0x053 ``gfx10-3-generic``
``EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC`` 0x054 ``gfx11-generic``
- *reserved* 0x055 Reserved.
+ ``EF_AMDGPU_MACH_AMDGCN_GFX1152`` 0x055 ``gfx1152``.
*reserved* 0x056 Reserved.
*reserved* 0x057 Reserved.
*reserved* 0x058 Reserved.
diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst
index 393b97e..c4a8562 100644
--- a/llvm/docs/ReleaseNotes.rst
+++ b/llvm/docs/ReleaseNotes.rst
@@ -116,6 +116,10 @@ Changes to the Hexagon Backend
Changes to the LoongArch Backend
--------------------------------
+* i32 is now a native type in the datalayout string. This enables
+ LoopStrengthReduce for loops with i32 induction variables, among other
+ optimizations.
+
Changes to the MIPS Backend
---------------------------
diff --git a/llvm/include/llvm/Analysis/CodeMetrics.h b/llvm/include/llvm/Analysis/CodeMetrics.h
index a9431bc..d09018d 100644
--- a/llvm/include/llvm/Analysis/CodeMetrics.h
+++ b/llvm/include/llvm/Analysis/CodeMetrics.h
@@ -20,12 +20,15 @@
namespace llvm {
class AssumptionCache;
class BasicBlock;
+class Instruction;
class Loop;
class Function;
template <class T> class SmallPtrSetImpl;
class TargetTransformInfo;
class Value;
+enum struct ConvergenceKind { None, Controlled, ExtendedLoop, Uncontrolled };
+
/// Utility to calculate the size and a few similar metrics for a set
/// of basic blocks.
struct CodeMetrics {
@@ -42,8 +45,8 @@ struct CodeMetrics {
/// one or more 'noduplicate' instructions.
bool notDuplicatable = false;
- /// True if this function contains a call to a convergent function.
- bool convergent = false;
+ /// The kind of convergence specified in this function.
+ ConvergenceKind Convergence = ConvergenceKind::None;
/// True if this function calls alloca (in the C sense).
bool usesDynamicAlloca = false;
@@ -77,7 +80,7 @@ struct CodeMetrics {
/// Add information about a block to the current state.
void analyzeBasicBlock(const BasicBlock *BB, const TargetTransformInfo &TTI,
const SmallPtrSetImpl<const Value *> &EphValues,
- bool PrepareForLTO = false);
+ bool PrepareForLTO = false, const Loop *L = nullptr);
/// Collect a loop's ephemeral values (those used only by an assume
/// or similar intrinsics in the loop).
diff --git a/llvm/include/llvm/Analysis/LoopInfo.h b/llvm/include/llvm/Analysis/LoopInfo.h
index 5208463..4f06a7e 100644
--- a/llvm/include/llvm/Analysis/LoopInfo.h
+++ b/llvm/include/llvm/Analysis/LoopInfo.h
@@ -649,6 +649,9 @@ int getIntLoopAttribute(const Loop *TheLoop, StringRef Name, int Default = 0);
std::optional<const MDOperand *> findStringMetadataForLoop(const Loop *TheLoop,
StringRef Name);
+/// Find the convergence heart of the loop.
+CallBase *getLoopConvergenceHeart(const Loop *TheLoop);
+
/// Look for the loop attribute that requires progress within the loop.
/// Note: Most consumers probably want "isMustProgress" which checks
/// the containing function attribute too.
diff --git a/llvm/include/llvm/Analysis/VecFuncs.def b/llvm/include/llvm/Analysis/VecFuncs.def
index e12eb70..4021897 100644
--- a/llvm/include/llvm/Analysis/VecFuncs.def
+++ b/llvm/include/llvm/Analysis/VecFuncs.def
@@ -49,6 +49,7 @@ TLI_DEFINE_VECFUNC("llvm.sin.f32", "vsinf", FIXED(4), "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("cosf", "vcosf", FIXED(4), "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("llvm.cos.f32", "vcosf", FIXED(4), "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("tanf", "vtanf", FIXED(4), "_ZGV_LLVM_N4v")
+TLI_DEFINE_VECFUNC("llvm.tan.f32", "vtanf", FIXED(4), "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("asinf", "vasinf", FIXED(4), "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("acosf", "vacosf", FIXED(4), "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("atanf", "vatanf", FIXED(4), "_ZGV_LLVM_N4v")
@@ -142,6 +143,18 @@ TLI_DEFINE_VECFUNC("llvm.cos.f64", "_ZGVdN4v_cos", FIXED(4), "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("llvm.cos.f32", "_ZGVbN4v_cosf", FIXED(4), "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("llvm.cos.f32", "_ZGVdN8v_cosf", FIXED(8), "_ZGV_LLVM_N8v")
+TLI_DEFINE_VECFUNC("tan", "_ZGVbN2v_tan", FIXED(2), "_ZGV_LLVM_N2v")
+TLI_DEFINE_VECFUNC("tan", "_ZGVdN4v_tan", FIXED(4), "_ZGV_LLVM_N4v")
+
+TLI_DEFINE_VECFUNC("tanf", "_ZGVbN4v_tanf", FIXED(4), "_ZGV_LLVM_N4v")
+TLI_DEFINE_VECFUNC("tanf", "_ZGVdN8v_tanf", FIXED(8), "_ZGV_LLVM_N8v")
+
+TLI_DEFINE_VECFUNC("llvm.tan.f64", "_ZGVbN2v_tan", FIXED(2), "_ZGV_LLVM_N2v")
+TLI_DEFINE_VECFUNC("llvm.tan.f64", "_ZGVdN4v_tan", FIXED(4), "_ZGV_LLVM_N4v")
+
+TLI_DEFINE_VECFUNC("llvm.tan.f32", "_ZGVbN4v_tanf", FIXED(4), "_ZGV_LLVM_N4v")
+TLI_DEFINE_VECFUNC("llvm.tan.f32", "_ZGVdN8v_tanf", FIXED(8), "_ZGV_LLVM_N8v")
+
TLI_DEFINE_VECFUNC("pow", "_ZGVbN2vv_pow", FIXED(2), "_ZGV_LLVM_N2vv")
TLI_DEFINE_VECFUNC("pow", "_ZGVdN4vv_pow", FIXED(4), "_ZGV_LLVM_N4vv")
@@ -303,6 +316,22 @@ TLI_DEFINE_VECFUNC("llvm.cos.f32", "__svml_cosf4", FIXED(4), "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("llvm.cos.f32", "__svml_cosf8", FIXED(8), "_ZGV_LLVM_N8v")
TLI_DEFINE_VECFUNC("llvm.cos.f32", "__svml_cosf16", FIXED(16), "_ZGV_LLVM_N16v")
+TLI_DEFINE_VECFUNC("tan", "__svml_tan2", FIXED(2), "_ZGV_LLVM_N2v")
+TLI_DEFINE_VECFUNC("tan", "__svml_tan4", FIXED(4), "_ZGV_LLVM_N4v")
+TLI_DEFINE_VECFUNC("tan", "__svml_tan8", FIXED(8), "_ZGV_LLVM_N8v")
+
+TLI_DEFINE_VECFUNC("tanf", "__svml_tanf4", FIXED(4), "_ZGV_LLVM_N4v")
+TLI_DEFINE_VECFUNC("tanf", "__svml_tanf8", FIXED(8), "_ZGV_LLVM_N8v")
+TLI_DEFINE_VECFUNC("tanf", "__svml_tanf16", FIXED(16), "_ZGV_LLVM_N16v")
+
+TLI_DEFINE_VECFUNC("llvm.tan.f64", "__svml_tan2", FIXED(2), "_ZGV_LLVM_N2v")
+TLI_DEFINE_VECFUNC("llvm.tan.f64", "__svml_tan4", FIXED(4), "_ZGV_LLVM_N4v")
+TLI_DEFINE_VECFUNC("llvm.tan.f64", "__svml_tan8", FIXED(8), "_ZGV_LLVM_N8v")
+
+TLI_DEFINE_VECFUNC("llvm.tan.f32", "__svml_tanf4", FIXED(4), "_ZGV_LLVM_N4v")
+TLI_DEFINE_VECFUNC("llvm.tan.f32", "__svml_tanf8", FIXED(8), "_ZGV_LLVM_N8v")
+TLI_DEFINE_VECFUNC("llvm.tan.f32", "__svml_tanf16", FIXED(16), "_ZGV_LLVM_N16v")
+
TLI_DEFINE_VECFUNC("pow", "__svml_pow2", FIXED(2), "_ZGV_LLVM_N2vv")
TLI_DEFINE_VECFUNC("pow", "__svml_pow4", FIXED(4), "_ZGV_LLVM_N4vv")
TLI_DEFINE_VECFUNC("pow", "__svml_pow8", FIXED(8), "_ZGV_LLVM_N8vv")
@@ -1237,6 +1266,13 @@ TLI_DEFINE_VECFUNC("tanf", "amd_vrs4_tanf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("tanf", "amd_vrs8_tanf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")
TLI_DEFINE_VECFUNC("tanf", "amd_vrs16_tanf", FIXED(16), NOMASK, "_ZGV_LLVM_N16v")
+TLI_DEFINE_VECFUNC("llvm.tan.f32", "amd_vrs16_tanf", FIXED(16), NOMASK, "_ZGV_LLVM_N16v")
+TLI_DEFINE_VECFUNC("llvm.tan.f32", "amd_vrs8_tanf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")
+TLI_DEFINE_VECFUNC("llvm.tan.f32", "amd_vrs4_tanf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
+TLI_DEFINE_VECFUNC("llvm.tan.f64", "amd_vrd8_tan", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")
+TLI_DEFINE_VECFUNC("llvm.tan.f64", "amd_vrd4_tan", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
+TLI_DEFINE_VECFUNC("llvm.tan.f64", "amd_vrd2_tan", FIXED(2), NOMASK, "_ZGV_LLVM_N2v")
+
TLI_DEFINE_VECFUNC("asin", "amd_vrd8_asin", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")
TLI_DEFINE_VECFUNC("asinf", "amd_vrs4_asinf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("asinf", "amd_vrs8_asinf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")
diff --git a/llvm/include/llvm/AsmParser/LLToken.h b/llvm/include/llvm/AsmParser/LLToken.h
index 69821c2..db6780b 100644
--- a/llvm/include/llvm/AsmParser/LLToken.h
+++ b/llvm/include/llvm/AsmParser/LLToken.h
@@ -147,6 +147,7 @@ enum Kind {
kw_aarch64_vector_pcs,
kw_aarch64_sve_vector_pcs,
kw_aarch64_sme_preservemost_from_x0,
+ kw_aarch64_sme_preservemost_from_x1,
kw_aarch64_sme_preservemost_from_x2,
kw_msp430_intrcc,
kw_avr_intrcc,
diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h
index 7364d61..dfba180 100644
--- a/llvm/include/llvm/BinaryFormat/ELF.h
+++ b/llvm/include/llvm/BinaryFormat/ELF.h
@@ -795,7 +795,7 @@ enum : unsigned {
EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC = 0x052,
EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC = 0x053,
EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC = 0x054,
- EF_AMDGPU_MACH_AMDGCN_RESERVED_0X55 = 0x055,
+ EF_AMDGPU_MACH_AMDGCN_GFX1152 = 0x055,
EF_AMDGPU_MACH_AMDGCN_RESERVED_0X56 = 0x056,
EF_AMDGPU_MACH_AMDGCN_RESERVED_0X57 = 0x057,
EF_AMDGPU_MACH_AMDGCN_RESERVED_0X58 = 0x058,
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index ef4e0fd..9f8d3de 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -1977,6 +1977,9 @@ public:
case Intrinsic::cos:
ISD = ISD::FCOS;
break;
+ case Intrinsic::tan:
+ ISD = ISD::FTAN;
+ break;
case Intrinsic::exp:
ISD = ISD::FEXP;
break;
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 12e5b31..4365956 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -871,6 +871,14 @@ public:
bool matchFreezeOfSingleMaybePoisonOperand(MachineInstr &MI,
BuildFnTy &MatchInfo);
+ bool matchAddOfVScale(const MachineOperand &MO, BuildFnTy &MatchInfo);
+
+ bool matchMulOfVScale(const MachineOperand &MO, BuildFnTy &MatchInfo);
+
+ bool matchSubOfVScale(const MachineOperand &MO, BuildFnTy &MatchInfo);
+
+ bool matchShlOfVScale(const MachineOperand &MO, BuildFnTy &MatchInfo);
+
private:
/// Checks for legality of an indexed variant of \p LdSt.
bool isIndexedLoadStoreLegal(GLoadStore &LdSt) const;
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
index 2b3efc3..2273725 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
@@ -14,10 +14,12 @@
#ifndef LLVM_CODEGEN_GLOBALISEL_GENERICMACHINEINSTRS_H
#define LLVM_CODEGEN_GLOBALISEL_GENERICMACHINEINSTRS_H
-#include "llvm/IR/Instructions.h"
+#include "llvm/ADT/APInt.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/Support/Casting.h"
namespace llvm {
@@ -856,6 +858,43 @@ public:
};
};
+/// Represents a vscale.
+class GVScale : public GenericMachineInstr {
+public:
+ APInt getSrc() const { return getOperand(1).getCImm()->getValue(); }
+
+ static bool classof(const MachineInstr *MI) {
+ return MI->getOpcode() == TargetOpcode::G_VSCALE;
+ };
+};
+
+/// Represents an integer subtraction.
+class GSub : public GIntBinOp {
+public:
+ static bool classof(const MachineInstr *MI) {
+ return MI->getOpcode() == TargetOpcode::G_SUB;
+ };
+};
+
+/// Represents an integer multiplication.
+class GMul : public GIntBinOp {
+public:
+ static bool classof(const MachineInstr *MI) {
+ return MI->getOpcode() == TargetOpcode::G_MUL;
+ };
+};
+
+/// Represents a shift left.
+class GShl : public GenericMachineInstr {
+public:
+ Register getSrcReg() const { return getOperand(1).getReg(); }
+ Register getShiftReg() const { return getOperand(2).getReg(); }
+
+ static bool classof(const MachineInstr *MI) {
+ return MI->getOpcode() == TargetOpcode::G_SHL;
+ };
+};
+
} // namespace llvm
#endif // LLVM_CODEGEN_GLOBALISEL_GENERICMACHINEINSTRS_H
diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h
index 0f87e06..c8c86ed 100644
--- a/llvm/include/llvm/CodeGen/ISDOpcodes.h
+++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h
@@ -415,6 +415,7 @@ enum NodeType {
STRICT_FLDEXP,
STRICT_FSIN,
STRICT_FCOS,
+ STRICT_FTAN,
STRICT_FEXP,
STRICT_FEXP2,
STRICT_FLOG,
@@ -934,6 +935,7 @@ enum NodeType {
FCBRT,
FSIN,
FCOS,
+ FTAN,
FPOW,
FPOWI,
/// FLDEXP - ldexp, inspired by libm (op0 * 2**op1).
diff --git a/llvm/include/llvm/CodeGen/MachineFunction.h b/llvm/include/llvm/CodeGen/MachineFunction.h
index 9f8e846c..6e7292a 100644
--- a/llvm/include/llvm/CodeGen/MachineFunction.h
+++ b/llvm/include/llvm/CodeGen/MachineFunction.h
@@ -18,7 +18,6 @@
#define LLVM_CODEGEN_MACHINEFUNCTION_H
#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/GraphTraits.h"
#include "llvm/ADT/SmallVector.h"
@@ -34,6 +33,7 @@
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Recycler.h"
#include "llvm/Target/TargetOptions.h"
+#include <bitset>
#include <cassert>
#include <cstdint>
#include <memory>
@@ -217,22 +217,21 @@ public:
}
MachineFunctionProperties &reset(const MachineFunctionProperties &MFP) {
- Properties.reset(MFP.Properties);
+ Properties &= ~MFP.Properties;
return *this;
}
// Returns true if all properties set in V (i.e. required by a pass) are set
// in this.
bool verifyRequiredProperties(const MachineFunctionProperties &V) const {
- return !V.Properties.test(Properties);
+ return (Properties | ~V.Properties).all();
}
/// Print the MachineFunctionProperties in human-readable form.
void print(raw_ostream &OS) const;
private:
- BitVector Properties =
- BitVector(static_cast<unsigned>(Property::LastProperty)+1);
+ std::bitset<static_cast<unsigned>(Property::LastProperty) + 1> Properties;
};
struct SEHHandler {
diff --git a/llvm/include/llvm/CodeGen/SDPatternMatch.h b/llvm/include/llvm/CodeGen/SDPatternMatch.h
index 7970441..071a27a 100644
--- a/llvm/include/llvm/CodeGen/SDPatternMatch.h
+++ b/llvm/include/llvm/CodeGen/SDPatternMatch.h
@@ -330,9 +330,7 @@ template <typename... Preds> struct And {
template <typename Pred, typename... Preds>
struct And<Pred, Preds...> : And<Preds...> {
Pred P;
- And(Pred &&p, Preds &&...preds)
- : And<Preds...>(std::forward<Preds>(preds)...), P(std::forward<Pred>(p)) {
- }
+ And(const Pred &p, const Preds &...preds) : And<Preds...>(preds...), P(p) {}
template <typename MatchContext>
bool match(const MatchContext &Ctx, SDValue N) {
@@ -349,8 +347,7 @@ template <typename... Preds> struct Or {
template <typename Pred, typename... Preds>
struct Or<Pred, Preds...> : Or<Preds...> {
Pred P;
- Or(Pred &&p, Preds &&...preds)
- : Or<Preds...>(std::forward<Preds>(preds)...), P(std::forward<Pred>(p)) {}
+ Or(const Pred &p, const Preds &...preds) : Or<Preds...>(preds...), P(p) {}
template <typename MatchContext>
bool match(const MatchContext &Ctx, SDValue N) {
@@ -376,16 +373,16 @@ template <typename Pred> inline Not<Pred> m_Unless(const Pred &P) {
return Not{P};
}
-template <typename... Preds> And<Preds...> m_AllOf(Preds &&...preds) {
- return And<Preds...>(std::forward<Preds>(preds)...);
+template <typename... Preds> And<Preds...> m_AllOf(const Preds &...preds) {
+ return And<Preds...>(preds...);
}
-template <typename... Preds> Or<Preds...> m_AnyOf(Preds &&...preds) {
- return Or<Preds...>(std::forward<Preds>(preds)...);
+template <typename... Preds> Or<Preds...> m_AnyOf(const Preds &...preds) {
+ return Or<Preds...>(preds...);
}
-template <typename... Preds> auto m_NoneOf(Preds &&...preds) {
- return m_Unless(m_AnyOf(std::forward<Preds>(preds)...));
+template <typename... Preds> auto m_NoneOf(const Preds &...preds) {
+ return m_Unless(m_AnyOf(preds...));
}
// === Generic node matching ===
@@ -402,10 +399,8 @@ struct Operands_match<OpIdx, OpndPred, OpndPreds...>
: Operands_match<OpIdx + 1, OpndPreds...> {
OpndPred P;
- Operands_match(OpndPred &&p, OpndPreds &&...preds)
- : Operands_match<OpIdx + 1, OpndPreds...>(
- std::forward<OpndPreds>(preds)...),
- P(std::forward<OpndPred>(p)) {}
+ Operands_match(const OpndPred &p, const OpndPreds &...preds)
+ : Operands_match<OpIdx + 1, OpndPreds...>(preds...), P(p) {}
template <typename MatchContext>
bool match(const MatchContext &Ctx, SDValue N) {
@@ -419,9 +414,8 @@ struct Operands_match<OpIdx, OpndPred, OpndPreds...>
};
template <typename... OpndPreds>
-auto m_Node(unsigned Opcode, OpndPreds &&...preds) {
- return m_AllOf(m_Opc(Opcode), Operands_match<0, OpndPreds...>(
- std::forward<OpndPreds>(preds)...));
+auto m_Node(unsigned Opcode, const OpndPreds &...preds) {
+ return m_AllOf(m_Opc(Opcode), Operands_match<0, OpndPreds...>(preds...));
}
/// Provide number of operands that are not chain or glue, as well as the first
@@ -647,10 +641,9 @@ template <typename Opnd> inline UnaryOpc_match<Opnd> m_ZExt(const Opnd &Op) {
return UnaryOpc_match<Opnd>(ISD::ZERO_EXTEND, Op);
}
-template <typename Opnd> inline auto m_SExt(Opnd &&Op) {
- return m_AnyOf(
- UnaryOpc_match<Opnd>(ISD::SIGN_EXTEND, Op),
- m_Node(ISD::SIGN_EXTEND_INREG, std::forward<Opnd>(Op), m_Value()));
+template <typename Opnd> inline auto m_SExt(const Opnd &Op) {
+ return m_AnyOf(UnaryOpc_match<Opnd>(ISD::SIGN_EXTEND, Op),
+ m_Node(ISD::SIGN_EXTEND_INREG, Op, m_Value()));
}
template <typename Opnd> inline UnaryOpc_match<Opnd> m_AnyExt(const Opnd &Op) {
@@ -663,30 +656,28 @@ template <typename Opnd> inline UnaryOpc_match<Opnd> m_Trunc(const Opnd &Op) {
/// Match a zext or identity
/// Allows to peek through optional extensions
-template <typename Opnd> inline auto m_ZExtOrSelf(Opnd &&Op) {
- return m_AnyOf(m_ZExt(std::forward<Opnd>(Op)), std::forward<Opnd>(Op));
+template <typename Opnd> inline auto m_ZExtOrSelf(const Opnd &Op) {
+ return m_AnyOf(m_ZExt(Op), Op);
}
/// Match a sext or identity
/// Allows to peek through optional extensions
-template <typename Opnd> inline auto m_SExtOrSelf(Opnd &&Op) {
- return m_AnyOf(m_SExt(std::forward<Opnd>(Op)), std::forward<Opnd>(Op));
+template <typename Opnd> inline auto m_SExtOrSelf(const Opnd &Op) {
+ return m_AnyOf(m_SExt(Op), Op);
}
/// Match a aext or identity
/// Allows to peek through optional extensions
template <typename Opnd>
-inline Or<UnaryOpc_match<Opnd>, Opnd> m_AExtOrSelf(Opnd &&Op) {
- return Or<UnaryOpc_match<Opnd>, Opnd>(m_AnyExt(std::forward<Opnd>(Op)),
- std::forward<Opnd>(Op));
+inline Or<UnaryOpc_match<Opnd>, Opnd> m_AExtOrSelf(const Opnd &Op) {
+ return Or<UnaryOpc_match<Opnd>, Opnd>(m_AnyExt(Op), Op);
}
/// Match a trunc or identity
/// Allows to peek through optional truncations
template <typename Opnd>
-inline Or<UnaryOpc_match<Opnd>, Opnd> m_TruncOrSelf(Opnd &&Op) {
- return Or<UnaryOpc_match<Opnd>, Opnd>(m_Trunc(std::forward<Opnd>(Op)),
- std::forward<Opnd>(Op));
+inline Or<UnaryOpc_match<Opnd>, Opnd> m_TruncOrSelf(const Opnd &Op) {
+ return Or<UnaryOpc_match<Opnd>, Opnd>(m_Trunc(Op), Op);
}
// === Constants ===
diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h
index 48cb0cd..7b0e5e7 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAG.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAG.h
@@ -1893,7 +1893,8 @@ public:
const SDNode *N2);
SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT,
- ArrayRef<SDValue> Ops);
+ ArrayRef<SDValue> Ops,
+ SDNodeFlags Flags = SDNodeFlags());
/// Fold floating-point operations when all operands are constants and/or
/// undefined.
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index d1912b1..aa7a32e 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -96,7 +96,7 @@ class Value;
namespace Sched {
-enum Preference {
+enum Preference : uint8_t {
None, // No preference
Source, // Follow source order.
RegPressure, // Scheduling for lowest register pressure.
diff --git a/llvm/include/llvm/IR/CallingConv.h b/llvm/include/llvm/IR/CallingConv.h
index a05d1a4..55e3202 100644
--- a/llvm/include/llvm/IR/CallingConv.h
+++ b/llvm/include/llvm/IR/CallingConv.h
@@ -267,6 +267,9 @@ namespace CallingConv {
/// Calling convention used for RISC-V V-extension.
RISCV_VectorCall = 110,
+ /// Preserve X1-X15, X19-X29, SP, Z0-Z31, P0-P15.
+ AArch64_SME_ABI_Support_Routines_PreserveMost_From_X1 = 111,
+
/// The highest possible ID. Must be some 2^k - 1.
MaxID = 1023
};
diff --git a/llvm/include/llvm/IR/InstrTypes.h b/llvm/include/llvm/IR/InstrTypes.h
index 9dd1bb4..ad649b5 100644
--- a/llvm/include/llvm/IR/InstrTypes.h
+++ b/llvm/include/llvm/IR/InstrTypes.h
@@ -1588,6 +1588,14 @@ public:
static CallBase *removeOperandBundle(CallBase *CB, uint32_t ID,
BasicBlock::iterator InsertPt);
+ /// Return the convergence control token for this call, if it exists.
+ Value *getConvergenceControlToken() const {
+ if (auto Bundle = getOperandBundle(llvm::LLVMContext::OB_convergencectrl)) {
+ return Bundle->Inputs[0].get();
+ }
+ return nullptr;
+ }
+
static bool classof(const Instruction *I) {
return I->getOpcode() == Instruction::Call ||
I->getOpcode() == Instruction::Invoke ||
@@ -2120,6 +2128,15 @@ public:
return Attrs.getParamStackAlignment(ArgNo);
}
+ /// Extract the byref type for a call or parameter.
+ Type *getParamByRefType(unsigned ArgNo) const {
+ if (auto *Ty = Attrs.getParamByRefType(ArgNo))
+ return Ty;
+ if (const Function *F = getCalledFunction())
+ return F->getAttributes().getParamByRefType(ArgNo);
+ return nullptr;
+ }
+
/// Extract the byval type for a call or parameter.
Type *getParamByValType(unsigned ArgNo) const {
if (auto *Ty = Attrs.getParamByValType(ArgNo))
diff --git a/llvm/include/llvm/IR/IntrinsicInst.h b/llvm/include/llvm/IR/IntrinsicInst.h
index fcd3a10..9010e1a 100644
--- a/llvm/include/llvm/IR/IntrinsicInst.h
+++ b/llvm/include/llvm/IR/IntrinsicInst.h
@@ -1799,17 +1799,14 @@ public:
return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
}
- // Returns the convergence intrinsic referenced by |I|'s convergencectrl
- // attribute if any.
- static IntrinsicInst *getParentConvergenceToken(Instruction *I) {
- auto *CI = dyn_cast<llvm::CallInst>(I);
- if (!CI)
- return nullptr;
-
- auto Bundle = CI->getOperandBundle(llvm::LLVMContext::OB_convergencectrl);
- assert(Bundle->Inputs.size() == 1 &&
- Bundle->Inputs[0]->getType()->isTokenTy());
- return dyn_cast<llvm::IntrinsicInst>(Bundle->Inputs[0].get());
+ bool isAnchor() {
+ return getIntrinsicID() == Intrinsic::experimental_convergence_anchor;
+ }
+ bool isEntry() {
+ return getIntrinsicID() == Intrinsic::experimental_convergence_entry;
+ }
+ bool isLoop() {
+ return getIntrinsicID() == Intrinsic::experimental_convergence_loop;
}
};
diff --git a/llvm/include/llvm/IR/Operator.h b/llvm/include/llvm/IR/Operator.h
index fda2689..f63f54e 100644
--- a/llvm/include/llvm/IR/Operator.h
+++ b/llvm/include/llvm/IR/Operator.h
@@ -330,8 +330,6 @@ public:
unsigned Opcode;
if (auto *I = dyn_cast<Instruction>(V))
Opcode = I->getOpcode();
- else if (auto *CE = dyn_cast<ConstantExpr>(V))
- Opcode = CE->getOpcode();
else
return false;
diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.def b/llvm/include/llvm/IR/RuntimeLibcalls.def
index a5a7288..e900bcd 100644
--- a/llvm/include/llvm/IR/RuntimeLibcalls.def
+++ b/llvm/include/llvm/IR/RuntimeLibcalls.def
@@ -197,6 +197,11 @@ HANDLE_LIBCALL(COS_F64, "cos")
HANDLE_LIBCALL(COS_F80, "cosl")
HANDLE_LIBCALL(COS_F128, "cosl")
HANDLE_LIBCALL(COS_PPCF128, "cosl")
+HANDLE_LIBCALL(TAN_F32, "tanf")
+HANDLE_LIBCALL(TAN_F64, "tan")
+HANDLE_LIBCALL(TAN_F80, "tanl")
+HANDLE_LIBCALL(TAN_F128,"tanl")
+HANDLE_LIBCALL(TAN_PPCF128, "tanl")
HANDLE_LIBCALL(SINCOS_F32, nullptr)
HANDLE_LIBCALL(SINCOS_F64, nullptr)
HANDLE_LIBCALL(SINCOS_F80, nullptr)
diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h
index c4c1825..8803ef5 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -106,6 +106,7 @@ void initializeExpandLargeDivRemLegacyPassPass(PassRegistry&);
void initializeExpandMemCmpLegacyPassPass(PassRegistry &);
void initializeExpandPostRAPass(PassRegistry&);
void initializeExpandReductionsPass(PassRegistry&);
+void initializeExpandVariadicsPass(PassRegistry &);
void initializeExpandVectorPredicationPass(PassRegistry &);
void initializeExternalAAWrapperPassPass(PassRegistry&);
void initializeFEntryInserterPass(PassRegistry&);
diff --git a/llvm/include/llvm/MC/MCSymbolWasm.h b/llvm/include/llvm/MC/MCSymbolWasm.h
index 0ce95c7..0c2b97a 100644
--- a/llvm/include/llvm/MC/MCSymbolWasm.h
+++ b/llvm/include/llvm/MC/MCSymbolWasm.h
@@ -114,9 +114,11 @@ public:
return isTable() && hasTableType() &&
getTableType().ElemType == wasm::ValType::FUNCREF;
}
- void setFunctionTable() {
+ void setFunctionTable(bool is64) {
setType(wasm::WASM_SYMBOL_TYPE_TABLE);
- setTableType(wasm::ValType::FUNCREF);
+ uint8_t flags =
+ is64 ? wasm::WASM_LIMITS_FLAG_IS_64 : wasm::WASM_LIMITS_FLAG_NONE;
+ setTableType(wasm::ValType::FUNCREF, flags);
}
void setUsedInGOT() const { IsUsedInGOT = true; }
@@ -140,10 +142,11 @@ public:
return *TableType;
}
void setTableType(wasm::WasmTableType TT) { TableType = TT; }
- void setTableType(wasm::ValType VT) {
+ void setTableType(wasm::ValType VT,
+ uint8_t flags = wasm::WASM_LIMITS_FLAG_NONE) {
// Declare a table with element type VT and no limits (min size 0, no max
// size).
- wasm::WasmLimits Limits = {wasm::WASM_LIMITS_FLAG_NONE, 0, 0};
+ wasm::WasmLimits Limits = {flags, 0, 0};
setTableType({VT, Limits});
}
};
diff --git a/llvm/include/llvm/ProfileData/MemProf.h b/llvm/include/llvm/ProfileData/MemProf.h
index 406144d..528abe1 100644
--- a/llvm/include/llvm/ProfileData/MemProf.h
+++ b/llvm/include/llvm/ProfileData/MemProf.h
@@ -199,7 +199,7 @@ struct Frame {
GlobalValue::GUID Function;
// The symbol name for the function. Only populated in the Frame by the reader
// if requested during initialization. This field should not be serialized.
- std::optional<std::string> SymbolName;
+ std::unique_ptr<std::string> SymbolName;
// The source line offset of the call from the beginning of parent function.
uint32_t LineOffset;
// The source column number of the call to help distinguish multiple calls
@@ -210,7 +210,9 @@ struct Frame {
Frame(const Frame &Other) {
Function = Other.Function;
- SymbolName = Other.SymbolName;
+ SymbolName = Other.SymbolName
+ ? std::make_unique<std::string>(*Other.SymbolName)
+ : nullptr;
LineOffset = Other.LineOffset;
Column = Other.Column;
IsInlineFrame = Other.IsInlineFrame;
@@ -228,7 +230,9 @@ struct Frame {
Frame &operator=(const Frame &Other) {
Function = Other.Function;
- SymbolName = Other.SymbolName;
+ SymbolName = Other.SymbolName
+ ? std::make_unique<std::string>(*Other.SymbolName)
+ : nullptr;
LineOffset = Other.LineOffset;
Column = Other.Column;
IsInlineFrame = Other.IsInlineFrame;
@@ -237,10 +241,10 @@ struct Frame {
bool operator!=(const Frame &Other) const { return !operator==(Other); }
- bool hasSymbolName() const { return SymbolName.has_value(); }
+ bool hasSymbolName() const { return !!SymbolName; }
StringRef getSymbolName() const {
- assert(SymbolName.has_value());
+ assert(hasSymbolName());
return *SymbolName;
}
diff --git a/llvm/include/llvm/Support/Error.h b/llvm/include/llvm/Support/Error.h
index 662c3ea4..1fa0d8c 100644
--- a/llvm/include/llvm/Support/Error.h
+++ b/llvm/include/llvm/Support/Error.h
@@ -1278,6 +1278,11 @@ inline Error createStringError(const Twine &S) {
}
template <typename... Ts>
+inline Error createStringError(char const *Fmt, const Ts &...Vals) {
+ return createStringError(llvm::inconvertibleErrorCode(), Fmt, Vals...);
+}
+
+template <typename... Ts>
inline Error createStringError(std::errc EC, char const *Fmt,
const Ts &... Vals) {
return createStringError(std::make_error_code(EC), Fmt, Vals...);
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 1ea2652..bd43b95 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -1605,6 +1605,37 @@ def insert_vector_elt_oob : GICombineRule<
[{ return Helper.matchInsertVectorElementOOB(*${root}, ${matchinfo}); }]),
(apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>;
+def add_of_vscale : GICombineRule<
+ (defs root:$root, build_fn_matchinfo:$matchinfo),
+ (match (G_VSCALE $left, $imm1),
+ (G_VSCALE $right, $imm2),
+ (G_ADD $root, $left, $right, (MIFlags NoSWrap)),
+ [{ return Helper.matchAddOfVScale(${root}, ${matchinfo}); }]),
+ (apply [{ Helper.applyBuildFnMO(${root}, ${matchinfo}); }])>;
+
+def mul_of_vscale : GICombineRule<
+ (defs root:$root, build_fn_matchinfo:$matchinfo),
+ (match (G_VSCALE $left, $scale),
+ (G_CONSTANT $x, $imm1),
+ (G_MUL $root, $left, $x, (MIFlags NoSWrap)),
+ [{ return Helper.matchMulOfVScale(${root}, ${matchinfo}); }]),
+ (apply [{ Helper.applyBuildFnMO(${root}, ${matchinfo}); }])>;
+
+def shl_of_vscale : GICombineRule<
+ (defs root:$root, build_fn_matchinfo:$matchinfo),
+ (match (G_VSCALE $left, $imm),
+ (G_CONSTANT $x, $imm1),
+ (G_SHL $root, $left, $x, (MIFlags NoSWrap)),
+ [{ return Helper.matchShlOfVScale(${root}, ${matchinfo}); }]),
+ (apply [{ Helper.applyBuildFnMO(${root}, ${matchinfo}); }])>;
+
+def sub_of_vscale : GICombineRule<
+ (defs root:$root, build_fn_matchinfo:$matchinfo),
+ (match (G_VSCALE $right, $imm),
+ (G_SUB $root, $x, $right, (MIFlags NoSWrap)),
+ [{ return Helper.matchSubOfVScale(${root}, ${matchinfo}); }]),
+ (apply [{ Helper.applyBuildFnMO(${root}, ${matchinfo}); }])>;
+
// match_extract_of_element and insert_vector_elt_oob must be the first!
def vector_ops_combines: GICombineGroup<[
match_extract_of_element_undef_vector,
@@ -1637,7 +1668,11 @@ extract_vector_element_build_vector_trunc6,
extract_vector_element_build_vector_trunc7,
extract_vector_element_build_vector_trunc8,
extract_vector_element_shuffle_vector,
-insert_vector_element_extract_vector_element
+insert_vector_element_extract_vector_element,
+add_of_vscale,
+mul_of_vscale,
+shl_of_vscale,
+sub_of_vscale,
]>;
diff --git a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
index 8fa0e4b..560d3b4 100644
--- a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
+++ b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
@@ -148,6 +148,7 @@ def : GINodeEquiv<G_BUILD_VECTOR, build_vector>;
def : GINodeEquiv<G_FCEIL, fceil>;
def : GINodeEquiv<G_FCOS, fcos>;
def : GINodeEquiv<G_FSIN, fsin>;
+def : GINodeEquiv<G_FTAN, ftan>;
def : GINodeEquiv<G_FABS, fabs>;
def : GINodeEquiv<G_FSQRT, fsqrt>;
def : GINodeEquiv<G_FFLOOR, ffloor>;
diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td
index 1c95a60..15e02eb 100644
--- a/llvm/include/llvm/Target/TargetSelectionDAG.td
+++ b/llvm/include/llvm/Target/TargetSelectionDAG.td
@@ -509,6 +509,7 @@ def fneg : SDNode<"ISD::FNEG" , SDTFPUnaryOp>;
def fsqrt : SDNode<"ISD::FSQRT" , SDTFPUnaryOp>;
def fsin : SDNode<"ISD::FSIN" , SDTFPUnaryOp>;
def fcos : SDNode<"ISD::FCOS" , SDTFPUnaryOp>;
+def ftan : SDNode<"ISD::FTAN" , SDTFPUnaryOp>;
def fexp2 : SDNode<"ISD::FEXP2" , SDTFPUnaryOp>;
def fexp10 : SDNode<"ISD::FEXP10" , SDTFPUnaryOp>;
def fpow : SDNode<"ISD::FPOW" , SDTFPBinOp>;
@@ -562,6 +563,8 @@ def strict_fsin : SDNode<"ISD::STRICT_FSIN",
SDTFPUnaryOp, [SDNPHasChain]>;
def strict_fcos : SDNode<"ISD::STRICT_FCOS",
SDTFPUnaryOp, [SDNPHasChain]>;
+def strict_ftan : SDNode<"ISD::STRICT_FTAN",
+ SDTFPUnaryOp, [SDNPHasChain]>;
def strict_fexp2 : SDNode<"ISD::STRICT_FEXP2",
SDTFPUnaryOp, [SDNPHasChain]>;
def strict_fpow : SDNode<"ISD::STRICT_FPOW",
@@ -1517,6 +1520,9 @@ def any_fsin : PatFrags<(ops node:$src),
def any_fcos : PatFrags<(ops node:$src),
[(strict_fcos node:$src),
(fcos node:$src)]>;
+def any_ftan : PatFrags<(ops node:$src),
+ [(strict_ftan node:$src),
+ (ftan node:$src)]>;
def any_fexp2 : PatFrags<(ops node:$src),
[(strict_fexp2 node:$src),
(fexp2 node:$src)]>;
diff --git a/llvm/include/llvm/TargetParser/AArch64TargetParser.h b/llvm/include/llvm/TargetParser/AArch64TargetParser.h
index 5025ab2..afe6789 100644
--- a/llvm/include/llvm/TargetParser/AArch64TargetParser.h
+++ b/llvm/include/llvm/TargetParser/AArch64TargetParser.h
@@ -577,6 +577,11 @@ inline constexpr CpuInfo CpuInfos[] = {
AArch64::AEK_SHA2, AArch64::AEK_AES,
AArch64::AEK_MTE, AArch64::AEK_SB,
AArch64::AEK_SSBS, AArch64::AEK_CSSC})},
+ {"oryon-1", ARMV8_6A,
+ (AArch64::ExtensionBitset({AArch64::AEK_AES, AArch64::AEK_CRYPTO,
+ AArch64::AEK_RAND, AArch64::AEK_SM4,
+ AArch64::AEK_SHA3, AArch64::AEK_SHA2,
+ AArch64::AEK_PROFILE}))},
};
// Name alias.
diff --git a/llvm/include/llvm/TargetParser/TargetParser.h b/llvm/include/llvm/TargetParser/TargetParser.h
index 8fc6fa3..e03d8f6 100644
--- a/llvm/include/llvm/TargetParser/TargetParser.h
+++ b/llvm/include/llvm/TargetParser/TargetParser.h
@@ -105,6 +105,7 @@ enum GPUKind : uint32_t {
GK_GFX1103 = 93,
GK_GFX1150 = 94,
GK_GFX1151 = 95,
+ GK_GFX1152 = 96,
GK_GFX1200 = 100,
GK_GFX1201 = 101,
diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h
index d3d3a9c..6ba04db 100644
--- a/llvm/include/llvm/Transforms/IPO/Attributor.h
+++ b/llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -5143,9 +5143,7 @@ struct DenormalFPMathState : public AbstractState {
return Mode != Other.Mode || ModeF32 != Other.ModeF32;
}
- bool isValid() const {
- return Mode.isValid() && ModeF32.isValid();
- }
+ bool isValid() const { return Mode.isValid() && ModeF32.isValid(); }
static DenormalMode::DenormalModeKind
unionDenormalKind(DenormalMode::DenormalModeKind Callee,
@@ -5185,9 +5183,7 @@ struct DenormalFPMathState : public AbstractState {
// state.
DenormalState getAssumed() const { return Known; }
- bool isValidState() const override {
- return Known.isValid();
- }
+ bool isValidState() const override { return Known.isValid(); }
/// Return true if there are no dynamic components to the denormal mode worth
/// specializing.
@@ -5198,9 +5194,7 @@ struct DenormalFPMathState : public AbstractState {
Known.ModeF32.Output != DenormalMode::Dynamic;
}
- bool isAtFixpoint() const override {
- return IsAtFixedpoint;
- }
+ bool isAtFixpoint() const override { return IsAtFixedpoint; }
ChangeStatus indicateFixpoint() {
bool Changed = !IsAtFixedpoint;
diff --git a/llvm/include/llvm/Transforms/IPO/ExpandVariadics.h b/llvm/include/llvm/Transforms/IPO/ExpandVariadics.h
new file mode 100644
index 0000000..4c5a1b6
--- /dev/null
+++ b/llvm/include/llvm/Transforms/IPO/ExpandVariadics.h
@@ -0,0 +1,40 @@
+//===- ExpandVariadics.h - expand variadic functions ------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_TRANSFORMS_IPO_EXPANDVARIADICS_H
+#define LLVM_TRANSFORMS_IPO_EXPANDVARIADICS_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+class Module;
+class ModulePass;
+class OptimizationLevel;
+
+enum class ExpandVariadicsMode {
+ Unspecified, // Use the implementation defaults
+ Disable, // Disable the pass entirely
+ Optimize, // Optimise without changing ABI
+ Lowering, // Change variadic calling convention
+};
+
+class ExpandVariadicsPass : public PassInfoMixin<ExpandVariadicsPass> {
+ const ExpandVariadicsMode Mode;
+
+public:
+ // Operates under passed mode unless overridden on commandline
+ ExpandVariadicsPass(ExpandVariadicsMode Mode);
+
+ PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+};
+
+ModulePass *createExpandVariadicsPass(ExpandVariadicsMode);
+
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_IPO_EXPANDVARIADICS_H
diff --git a/llvm/include/llvm/Transforms/Utils/UnrollLoop.h b/llvm/include/llvm/Transforms/Utils/UnrollLoop.h
index bd804dc..797c082 100644
--- a/llvm/include/llvm/Transforms/Utils/UnrollLoop.h
+++ b/llvm/include/llvm/Transforms/Utils/UnrollLoop.h
@@ -16,6 +16,7 @@
#define LLVM_TRANSFORMS_UTILS_UNROLLLOOP_H
#include "llvm/ADT/DenseMap.h"
+#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Support/InstructionCost.h"
@@ -73,6 +74,7 @@ struct UnrollLoopOptions {
bool AllowExpensiveTripCount;
bool UnrollRemainder;
bool ForgetAllSCEV;
+ const Instruction *Heart = nullptr;
};
LoopUnrollResult UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
@@ -128,14 +130,15 @@ class UnrollCostEstimator {
public:
unsigned NumInlineCandidates;
- bool Convergent;
+ ConvergenceKind Convergence;
+ bool ConvergenceAllowsRuntime;
UnrollCostEstimator(const Loop *L, const TargetTransformInfo &TTI,
const SmallPtrSetImpl<const Value *> &EphValues,
unsigned BEInsns);
/// Whether it is legal to unroll this loop.
- bool canUnroll() const { return LoopSize.isValid() && !NotDuplicatable; }
+ bool canUnroll() const;
uint64_t getRolledLoopSize() const { return *LoopSize.getValue(); }
diff --git a/llvm/lib/Analysis/CodeMetrics.cpp b/llvm/lib/Analysis/CodeMetrics.cpp
index 2637e2f..ea67b52 100644
--- a/llvm/lib/Analysis/CodeMetrics.cpp
+++ b/llvm/lib/Analysis/CodeMetrics.cpp
@@ -16,6 +16,7 @@
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/InstructionCost.h"
@@ -111,11 +112,24 @@ void CodeMetrics::collectEphemeralValues(
completeEphemeralValues(Visited, Worklist, EphValues);
}
+static bool extendsConvergenceOutsideLoop(const Instruction &I, const Loop *L) {
+ if (!L)
+ return false;
+ if (!isa<ConvergenceControlInst>(I))
+ return false;
+ for (const auto *U : I.users()) {
+ if (!L->contains(cast<Instruction>(U)))
+ return true;
+ }
+ return false;
+}
+
/// Fill in the current structure with information gleaned from the specified
/// block.
void CodeMetrics::analyzeBasicBlock(
const BasicBlock *BB, const TargetTransformInfo &TTI,
- const SmallPtrSetImpl<const Value *> &EphValues, bool PrepareForLTO) {
+ const SmallPtrSetImpl<const Value *> &EphValues, bool PrepareForLTO,
+ const Loop *L) {
++NumBlocks;
InstructionCost NumInstsBeforeThisBB = NumInsts;
for (const Instruction &I : *BB) {
@@ -163,19 +177,38 @@ void CodeMetrics::analyzeBasicBlock(
if (isa<ExtractElementInst>(I) || I.getType()->isVectorTy())
++NumVectorInsts;
- if (I.getType()->isTokenTy() && I.isUsedOutsideOfBlock(BB))
+ if (I.getType()->isTokenTy() && !isa<ConvergenceControlInst>(I) &&
+ I.isUsedOutsideOfBlock(BB)) {
+ LLVM_DEBUG(dbgs() << I
+ << "\n Cannot duplicate a token value used outside "
+ "the current block (except convergence control).\n");
notDuplicatable = true;
-
- if (const CallInst *CI = dyn_cast<CallInst>(&I)) {
- if (CI->cannotDuplicate())
- notDuplicatable = true;
- if (CI->isConvergent())
- convergent = true;
}
- if (const InvokeInst *InvI = dyn_cast<InvokeInst>(&I))
- if (InvI->cannotDuplicate())
+ if (const CallBase *CB = dyn_cast<CallBase>(&I)) {
+ if (CB->cannotDuplicate())
notDuplicatable = true;
+ // Compute a meet over the visited blocks for the following partial order:
+ //
+ // None -> { Controlled, ExtendedLoop, Uncontrolled}
+ // Controlled -> ExtendedLoop
+ if (Convergence <= ConvergenceKind::Controlled && CB->isConvergent()) {
+ if (isa<ConvergenceControlInst>(CB) ||
+ CB->getConvergenceControlToken()) {
+ assert(Convergence != ConvergenceKind::Uncontrolled);
+ LLVM_DEBUG(dbgs() << "Found controlled convergence:\n" << I << "\n");
+ if (extendsConvergenceOutsideLoop(I, L))
+ Convergence = ConvergenceKind::ExtendedLoop;
+ else {
+ assert(Convergence != ConvergenceKind::ExtendedLoop);
+ Convergence = ConvergenceKind::Controlled;
+ }
+ } else {
+ assert(Convergence == ConvergenceKind::None);
+ Convergence = ConvergenceKind::Uncontrolled;
+ }
+ }
+ }
NumInsts += TTI.getInstructionCost(&I, TargetTransformInfo::TCK_CodeSize);
}
diff --git a/llvm/lib/Analysis/LoopInfo.cpp b/llvm/lib/Analysis/LoopInfo.cpp
index 369ab08..c34c497 100644
--- a/llvm/lib/Analysis/LoopInfo.cpp
+++ b/llvm/lib/Analysis/LoopInfo.cpp
@@ -1105,6 +1105,26 @@ int llvm::getIntLoopAttribute(const Loop *TheLoop, StringRef Name,
return getOptionalIntLoopAttribute(TheLoop, Name).value_or(Default);
}
+CallBase *llvm::getLoopConvergenceHeart(const Loop *TheLoop) {
+ BasicBlock *H = TheLoop->getHeader();
+ for (Instruction &II : *H) {
+ if (auto *CB = dyn_cast<CallBase>(&II)) {
+ if (!CB->isConvergent())
+ continue;
+ // This is the heart if it uses a token defined outside the loop. The
+ // verifier has already checked that only the loop intrinsic can use such
+ // a token.
+ if (auto *Token = CB->getConvergenceControlToken()) {
+ auto *TokenDef = cast<Instruction>(Token);
+ if (!TheLoop->contains(TokenDef->getParent()))
+ return CB;
+ }
+ return nullptr;
+ }
+ }
+ return nullptr;
+}
+
bool llvm::isFinite(const Loop *L) {
return L->getHeader()->getParent()->willReturn();
}
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 08138a5..782c28c 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -7296,10 +7296,13 @@ static bool isGuaranteedNotToBeUndefOrPoison(
isa<ConstantPointerNull>(C) || isa<Function>(C))
return true;
- if (C->getType()->isVectorTy() && !isa<ConstantExpr>(C))
- return (!includesUndef(Kind) ? !C->containsPoisonElement()
- : !C->containsUndefOrPoisonElement()) &&
- !C->containsConstantExpression();
+ if (C->getType()->isVectorTy() && !isa<ConstantExpr>(C)) {
+ if (includesUndef(Kind) && C->containsUndefElement())
+ return false;
+ if (includesPoison(Kind) && C->containsPoisonElement())
+ return false;
+ return !C->containsConstantExpression();
+ }
}
// Strip cast operations from a pointer value.
diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp
index 9170942..30728ed 100644
--- a/llvm/lib/Analysis/VectorUtils.cpp
+++ b/llvm/lib/Analysis/VectorUtils.cpp
@@ -68,6 +68,7 @@ bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) {
case Intrinsic::sqrt: // Begin floating-point.
case Intrinsic::sin:
case Intrinsic::cos:
+ case Intrinsic::tan:
case Intrinsic::exp:
case Intrinsic::exp2:
case Intrinsic::log:
diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp
index d3ab306..7d7fe19 100644
--- a/llvm/lib/AsmParser/LLLexer.cpp
+++ b/llvm/lib/AsmParser/LLLexer.cpp
@@ -604,6 +604,7 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(aarch64_vector_pcs);
KEYWORD(aarch64_sve_vector_pcs);
KEYWORD(aarch64_sme_preservemost_from_x0);
+ KEYWORD(aarch64_sme_preservemost_from_x1);
KEYWORD(aarch64_sme_preservemost_from_x2);
KEYWORD(msp430_intrcc);
KEYWORD(avr_intrcc);
diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp
index 07c8aa2..f0fde9a 100644
--- a/llvm/lib/AsmParser/LLParser.cpp
+++ b/llvm/lib/AsmParser/LLParser.cpp
@@ -2153,6 +2153,7 @@ void LLParser::parseOptionalDLLStorageClass(unsigned &Res) {
/// ::= 'aarch64_vector_pcs'
/// ::= 'aarch64_sve_vector_pcs'
/// ::= 'aarch64_sme_preservemost_from_x0'
+/// ::= 'aarch64_sme_preservemost_from_x1'
/// ::= 'aarch64_sme_preservemost_from_x2'
/// ::= 'msp430_intrcc'
/// ::= 'avr_intrcc'
@@ -2212,6 +2213,9 @@ bool LLParser::parseOptionalCallingConv(unsigned &CC) {
case lltok::kw_aarch64_sme_preservemost_from_x0:
CC = CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X0;
break;
+ case lltok::kw_aarch64_sme_preservemost_from_x1:
+ CC = CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X1;
+ break;
case lltok::kw_aarch64_sme_preservemost_from_x2:
CC = CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2;
break;
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelperVectorOps.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelperVectorOps.cpp
index b4765fb..66b1c5f 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelperVectorOps.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelperVectorOps.cpp
@@ -6,7 +6,8 @@
//
//===----------------------------------------------------------------------===//
//
-// This file implements CombinerHelper for G_EXTRACT_VECTOR_ELT.
+// This file implements CombinerHelper for G_EXTRACT_VECTOR_ELT,
+// G_INSERT_VECTOR_ELT, and G_VSCALE
//
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
@@ -400,3 +401,86 @@ bool CombinerHelper::matchInsertVectorElementOOB(MachineInstr &MI,
return false;
}
+
+bool CombinerHelper::matchAddOfVScale(const MachineOperand &MO,
+ BuildFnTy &MatchInfo) {
+ GAdd *Add = cast<GAdd>(MRI.getVRegDef(MO.getReg()));
+ GVScale *LHSVScale = cast<GVScale>(MRI.getVRegDef(Add->getLHSReg()));
+ GVScale *RHSVScale = cast<GVScale>(MRI.getVRegDef(Add->getRHSReg()));
+
+ Register Dst = Add->getReg(0);
+
+ if (!MRI.hasOneNonDBGUse(LHSVScale->getReg(0)) ||
+ !MRI.hasOneNonDBGUse(RHSVScale->getReg(0)))
+ return false;
+
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.buildVScale(Dst, LHSVScale->getSrc() + RHSVScale->getSrc());
+ };
+
+ return true;
+}
+
+bool CombinerHelper::matchMulOfVScale(const MachineOperand &MO,
+ BuildFnTy &MatchInfo) {
+ GMul *Mul = cast<GMul>(MRI.getVRegDef(MO.getReg()));
+ GVScale *LHSVScale = cast<GVScale>(MRI.getVRegDef(Mul->getLHSReg()));
+
+ std::optional<APInt> MaybeRHS = getIConstantVRegVal(Mul->getRHSReg(), MRI);
+ if (!MaybeRHS)
+ return false;
+
+ Register Dst = MO.getReg();
+
+ if (!MRI.hasOneNonDBGUse(LHSVScale->getReg(0)))
+ return false;
+
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.buildVScale(Dst, LHSVScale->getSrc() * *MaybeRHS);
+ };
+
+ return true;
+}
+
+bool CombinerHelper::matchSubOfVScale(const MachineOperand &MO,
+ BuildFnTy &MatchInfo) {
+ GSub *Sub = cast<GSub>(MRI.getVRegDef(MO.getReg()));
+ GVScale *RHSVScale = cast<GVScale>(MRI.getVRegDef(Sub->getRHSReg()));
+
+ Register Dst = MO.getReg();
+ LLT DstTy = MRI.getType(Dst);
+
+ if (!MRI.hasOneNonDBGUse(RHSVScale->getReg(0)) ||
+ !isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, DstTy}))
+ return false;
+
+ MatchInfo = [=](MachineIRBuilder &B) {
+ auto VScale = B.buildVScale(DstTy, -RHSVScale->getSrc());
+ B.buildAdd(Dst, Sub->getLHSReg(), VScale, Sub->getFlags());
+ };
+
+ return true;
+}
+
+bool CombinerHelper::matchShlOfVScale(const MachineOperand &MO,
+ BuildFnTy &MatchInfo) {
+ GShl *Shl = cast<GShl>(MRI.getVRegDef(MO.getReg()));
+ GVScale *LHSVScale = cast<GVScale>(MRI.getVRegDef(Shl->getSrcReg()));
+
+ std::optional<APInt> MaybeRHS = getIConstantVRegVal(Shl->getShiftReg(), MRI);
+ if (!MaybeRHS)
+ return false;
+
+ Register Dst = MO.getReg();
+ LLT DstTy = MRI.getType(Dst);
+
+ if (!MRI.hasOneNonDBGUse(LHSVScale->getReg(0)) ||
+ !isLegalOrBeforeLegalizer({TargetOpcode::G_VSCALE, DstTy}))
+ return false;
+
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.buildVScale(Dst, LHSVScale->getSrc().shl(*MaybeRHS));
+ };
+
+ return true;
+}
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 6f0cae2..9830b52 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -449,6 +449,8 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
RTLIBCASE(SIN_F);
case TargetOpcode::G_FCOS:
RTLIBCASE(COS_F);
+ case TargetOpcode::G_FTAN:
+ RTLIBCASE(TAN_F);
case TargetOpcode::G_FLOG10:
RTLIBCASE(LOG10_F);
case TargetOpcode::G_FLOG:
@@ -1037,6 +1039,7 @@ LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) {
case TargetOpcode::G_FREM:
case TargetOpcode::G_FCOS:
case TargetOpcode::G_FSIN:
+ case TargetOpcode::G_FTAN:
case TargetOpcode::G_FLOG10:
case TargetOpcode::G_FLOG:
case TargetOpcode::G_FLOG2:
@@ -2893,6 +2896,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
case TargetOpcode::G_FFLOOR:
case TargetOpcode::G_FCOS:
case TargetOpcode::G_FSIN:
+ case TargetOpcode::G_FTAN:
case TargetOpcode::G_FLOG10:
case TargetOpcode::G_FLOG:
case TargetOpcode::G_FLOG2:
@@ -4659,6 +4663,7 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
case G_INTRINSIC_TRUNC:
case G_FCOS:
case G_FSIN:
+ case G_FTAN:
case G_FSQRT:
case G_BSWAP:
case G_BITREVERSE:
diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index e8438be..129e696 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -833,6 +833,7 @@ bool llvm::isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI,
case TargetOpcode::G_FREM:
case TargetOpcode::G_FSIN:
case TargetOpcode::G_FCOS:
+ case TargetOpcode::G_FTAN:
case TargetOpcode::G_FMA:
case TargetOpcode::G_FMAD:
if (SNaN)
@@ -1713,6 +1714,7 @@ bool llvm::isPreISelGenericFloatingPointOpcode(unsigned Opc) {
case TargetOpcode::G_FREM:
case TargetOpcode::G_FRINT:
case TargetOpcode::G_FSIN:
+ case TargetOpcode::G_FTAN:
case TargetOpcode::G_FSQRT:
case TargetOpcode::G_FSUB:
case TargetOpcode::G_INTRINSIC_ROUND:
diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp
index 3397bd0..a808a54 100644
--- a/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -1339,14 +1339,13 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
if (SrcIdx && DstIdx)
return false;
- [[maybe_unused]] const unsigned DefSubIdx = DefMI->getOperand(0).getSubReg();
+ const unsigned DefSubIdx = DefMI->getOperand(0).getSubReg();
const TargetRegisterClass *DefRC = TII->getRegClass(MCID, 0, TRI, *MF);
if (!DefMI->isImplicitDef()) {
if (DstReg.isPhysical()) {
Register NewDstReg = DstReg;
- unsigned NewDstIdx = TRI->composeSubRegIndices(CP.getSrcIdx(),
- DefMI->getOperand(0).getSubReg());
+ unsigned NewDstIdx = TRI->composeSubRegIndices(CP.getSrcIdx(), DefSubIdx);
if (NewDstIdx)
NewDstReg = TRI->getSubReg(DstReg, NewDstIdx);
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 9a53590..02cd125 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -4041,17 +4041,11 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
return DAG.getNode(ISD::ADD, DL, VT, N0, SExt);
}
- // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X)
- if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
- if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) {
- SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1);
- SDValue S0 = N1.getOperand(0);
- if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0))
- if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1)))
- if (C->getAPIntValue() == (BitWidth - 1))
- return DAG.getNode(ISD::ABS, DL, VT, S0);
- }
- }
+ // fold B = sra (A, size(A)-1); sub (xor (A, B), B) -> (abs A)
+ if (hasOperation(ISD::ABS, VT) &&
+ sd_match(N1, m_Sra(m_Value(A), m_SpecificInt(BitWidth - 1))) &&
+ sd_match(N0, m_Xor(m_Specific(A), m_Specific(N1))))
+ return DAG.getNode(ISD::ABS, DL, VT, A);
// If the relocation model supports it, consider symbol offsets.
if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 8cd2bb6..27c45ca 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -4514,6 +4514,11 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
RTLIB::COS_F80, RTLIB::COS_F128,
RTLIB::COS_PPCF128, Results);
break;
+ case ISD::FTAN:
+ case ISD::STRICT_FTAN:
+ ExpandFPLibCall(Node, RTLIB::TAN_F32, RTLIB::TAN_F64, RTLIB::TAN_F80,
+ RTLIB::TAN_F128, RTLIB::TAN_PPCF128, Results);
+ break;
case ISD::FSINCOS:
// Expand into sincos libcall.
ExpandSinCosLibCall(Node, Results);
@@ -5468,6 +5473,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
case ISD::FSQRT:
case ISD::FSIN:
case ISD::FCOS:
+ case ISD::FTAN:
case ISD::FLOG:
case ISD::FLOG2:
case ISD::FLOG10:
@@ -5492,6 +5498,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
case ISD::STRICT_FSQRT:
case ISD::STRICT_FSIN:
case ISD::STRICT_FCOS:
+ case ISD::STRICT_FTAN:
case ISD::STRICT_FLOG:
case ISD::STRICT_FLOG2:
case ISD::STRICT_FLOG10:
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index fb1424f..aa116c9 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -131,6 +131,8 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
case ISD::FSQRT: R = SoftenFloatRes_FSQRT(N); break;
case ISD::STRICT_FSUB:
case ISD::FSUB: R = SoftenFloatRes_FSUB(N); break;
+ case ISD::STRICT_FTAN:
+ case ISD::FTAN: R = SoftenFloatRes_FTAN(N); break;
case ISD::STRICT_FTRUNC:
case ISD::FTRUNC: R = SoftenFloatRes_FTRUNC(N); break;
case ISD::LOAD: R = SoftenFloatRes_LOAD(N); break;
@@ -774,6 +776,12 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB(SDNode *N) {
RTLIB::SUB_PPCF128));
}
+SDValue DAGTypeLegalizer::SoftenFloatRes_FTAN(SDNode *N) {
+ return SoftenFloatRes_Unary(
+ N, GetFPLibCall(N->getValueType(0), RTLIB::TAN_F32, RTLIB::TAN_F64,
+ RTLIB::TAN_F80, RTLIB::TAN_F128, RTLIB::TAN_PPCF128));
+}
+
SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) {
return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
RTLIB::TRUNC_F32,
@@ -1330,7 +1338,7 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) {
#endif
report_fatal_error("Do not know how to expand the result of this "
"operator!");
-
+ // clang-format off
case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break;
case ISD::SELECT: SplitRes_Select(N, Lo, Hi); break;
case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break;
@@ -1399,6 +1407,8 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) {
case ISD::FSQRT: ExpandFloatRes_FSQRT(N, Lo, Hi); break;
case ISD::STRICT_FSUB:
case ISD::FSUB: ExpandFloatRes_FSUB(N, Lo, Hi); break;
+ case ISD::STRICT_FTAN:
+ case ISD::FTAN: ExpandFloatRes_FTAN(N, Lo, Hi); break;
case ISD::STRICT_FTRUNC:
case ISD::FTRUNC: ExpandFloatRes_FTRUNC(N, Lo, Hi); break;
case ISD::LOAD: ExpandFloatRes_LOAD(N, Lo, Hi); break;
@@ -1408,6 +1418,7 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) {
case ISD::UINT_TO_FP: ExpandFloatRes_XINT_TO_FP(N, Lo, Hi); break;
case ISD::STRICT_FREM:
case ISD::FREM: ExpandFloatRes_FREM(N, Lo, Hi); break;
+ // clang-format on
}
// If Lo/Hi is null, the sub-method took care of registering results etc.
@@ -1768,6 +1779,15 @@ void DAGTypeLegalizer::ExpandFloatRes_FSUB(SDNode *N, SDValue &Lo,
RTLIB::SUB_PPCF128), Lo, Hi);
}
+void DAGTypeLegalizer::ExpandFloatRes_FTAN(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ ExpandFloatRes_Unary(N,
+ GetFPLibCall(N->getValueType(0), RTLIB::TAN_F32,
+ RTLIB::TAN_F64, RTLIB::TAN_F80,
+ RTLIB::TAN_F128, RTLIB::TAN_PPCF128),
+ Lo, Hi);
+}
+
void DAGTypeLegalizer::ExpandFloatRes_FTRUNC(SDNode *N,
SDValue &Lo, SDValue &Hi) {
ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
@@ -2479,6 +2499,7 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) {
case ISD::FSIN:
case ISD::FSQRT:
case ISD::FTRUNC:
+ case ISD::FTAN:
case ISD::FCANONICALIZE: R = PromoteFloatRes_UnaryOp(N); break;
// Binary FP Operations
@@ -2914,6 +2935,7 @@ void DAGTypeLegalizer::SoftPromoteHalfResult(SDNode *N, unsigned ResNo) {
case ISD::FSIN:
case ISD::FSQRT:
case ISD::FTRUNC:
+ case ISD::FTAN:
case ISD::FCANONICALIZE: R = SoftPromoteHalfRes_UnaryOp(N); break;
// Binary FP Operations
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index bec9cb4..2350b56 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -586,6 +586,7 @@ private:
SDValue SoftenFloatRes_FSIN(SDNode *N);
SDValue SoftenFloatRes_FSQRT(SDNode *N);
SDValue SoftenFloatRes_FSUB(SDNode *N);
+ SDValue SoftenFloatRes_FTAN(SDNode *N);
SDValue SoftenFloatRes_FTRUNC(SDNode *N);
SDValue SoftenFloatRes_LOAD(SDNode *N);
SDValue SoftenFloatRes_ATOMIC_LOAD(SDNode *N);
@@ -635,6 +636,7 @@ private:
SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_Binary(SDNode *N, RTLIB::Libcall LC,
SDValue &Lo, SDValue &Hi);
+ // clang-format off
void ExpandFloatRes_FABS (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FMINNUM (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FMAXNUM (SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -667,9 +669,11 @@ private:
void ExpandFloatRes_FSIN (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FSQRT (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FSUB (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FTAN (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FTRUNC (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_LOAD (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, SDValue &Hi);
+ // clang-format on
// Float Operand Expansion.
bool ExpandFloatOperand(SDNode *N, unsigned OpNo);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 6acbc04..8cdb4ba 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -397,6 +397,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::FSQRT:
case ISD::FSIN:
case ISD::FCOS:
+ case ISD::FTAN:
case ISD::FLDEXP:
case ISD::FPOWI:
case ISD::FPOW:
@@ -506,7 +507,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
break; \
} \
/* Defer non-vector results to LegalizeDAG. */ \
- if (!Node->getValueType(0).isVector()) { \
+ if (!Node->getValueType(0).isVector() && \
+ Node->getValueType(0) != MVT::Other) { \
Action = TargetLowering::Legal; \
break; \
} \
@@ -990,11 +992,8 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
break;
case ISD::FMINIMUM:
case ISD::FMAXIMUM:
- if (SDValue Expanded = TLI.expandFMINIMUM_FMAXIMUM(Node, DAG)) {
- Results.push_back(Expanded);
- return;
- }
- break;
+ Results.push_back(TLI.expandFMINIMUM_FMAXIMUM(Node, DAG));
+ return;
case ISD::SMIN:
case ISD::SMAX:
case ISD::UMIN:
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 361416e..92ce3b1 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -108,6 +108,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FROUNDEVEN:
case ISD::FSIN:
case ISD::FSQRT:
+ case ISD::FTAN:
case ISD::FTRUNC:
case ISD::SIGN_EXTEND:
case ISD::SINT_TO_FP:
@@ -1140,6 +1141,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::VP_FROUNDEVEN:
case ISD::FSIN:
case ISD::FSQRT: case ISD::VP_SQRT:
+ case ISD::FTAN:
case ISD::FTRUNC:
case ISD::VP_FROUNDTOZERO:
case ISD::SINT_TO_FP:
@@ -4400,6 +4402,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FROUNDEVEN:
case ISD::FSIN:
case ISD::FSQRT:
+ case ISD::FTAN:
case ISD::FTRUNC:
if (unrollExpandedOp())
break;
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 4a6a431..e176cf2 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5375,6 +5375,7 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const
case ISD::FREM:
case ISD::FSIN:
case ISD::FCOS:
+ case ISD::FTAN:
case ISD::FMA:
case ISD::FMAD: {
if (SNaN)
@@ -6332,7 +6333,8 @@ bool SelectionDAG::isUndef(unsigned Opcode, ArrayRef<SDValue> Ops) {
}
SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
- EVT VT, ArrayRef<SDValue> Ops) {
+ EVT VT, ArrayRef<SDValue> Ops,
+ SDNodeFlags Flags) {
// If the opcode is a target-specific ISD node, there's nothing we can
// do here and the operand rules may not line up with the below, so
// bail early.
@@ -6689,7 +6691,7 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
}
// Constant fold the scalar operands.
- SDValue ScalarResult = getNode(Opcode, DL, SVT, ScalarOps);
+ SDValue ScalarResult = getNode(Opcode, DL, SVT, ScalarOps, Flags);
// Legalize the (integer) scalar constant if necessary.
if (LegalSVT != SVT)
@@ -7260,7 +7262,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
}
// Perform trivial constant folding.
- if (SDValue SV = FoldConstantArithmetic(Opcode, DL, VT, {N1, N2}))
+ if (SDValue SV = FoldConstantArithmetic(Opcode, DL, VT, {N1, N2}, Flags))
return SV;
// Canonicalize an UNDEF to the RHS, even over a constant.
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index ba76456..2f3626f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -1684,7 +1684,7 @@ bool SelectionDAGBuilder::handleDebugValue(ArrayRef<const Value *> Values,
if (!FragmentExpr)
continue;
SDDbgValue *SDV = DAG.getVRegDbgValue(
- Var, *FragmentExpr, RegAndSize.first, false, DbgLoc, SDNodeOrder);
+ Var, *FragmentExpr, RegAndSize.first, false, DbgLoc, Order);
DAG.AddDbgValue(SDV, false);
Offset += RegisterSize;
}
@@ -1699,11 +1699,10 @@ bool SelectionDAGBuilder::handleDebugValue(ArrayRef<const Value *> Values,
}
// We have created a SDDbgOperand for each Value in Values.
- // Should use Order instead of SDNodeOrder?
assert(!LocationOps.empty());
- SDDbgValue *SDV = DAG.getDbgValueList(Var, Expr, LocationOps, Dependencies,
- /*IsIndirect=*/false, DbgLoc,
- SDNodeOrder, IsVariadic);
+ SDDbgValue *SDV =
+ DAG.getDbgValueList(Var, Expr, LocationOps, Dependencies,
+ /*IsIndirect=*/false, DbgLoc, Order, IsVariadic);
DAG.AddDbgValue(SDV, /*isParameter=*/false);
return true;
}
@@ -6742,6 +6741,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
case Intrinsic::fabs:
case Intrinsic::sin:
case Intrinsic::cos:
+ case Intrinsic::tan:
case Intrinsic::exp10:
case Intrinsic::floor:
case Intrinsic::ceil:
@@ -6759,6 +6759,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
case Intrinsic::fabs: Opcode = ISD::FABS; break;
case Intrinsic::sin: Opcode = ISD::FSIN; break;
case Intrinsic::cos: Opcode = ISD::FCOS; break;
+ case Intrinsic::tan: Opcode = ISD::FTAN; break;
case Intrinsic::exp10: Opcode = ISD::FEXP10; break;
case Intrinsic::floor: Opcode = ISD::FFLOOR; break;
case Intrinsic::ceil: Opcode = ISD::FCEIL; break;
@@ -9160,6 +9161,12 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
if (visitUnaryFloatCall(I, ISD::FCOS))
return;
break;
+ case LibFunc_tan:
+ case LibFunc_tanf:
+ case LibFunc_tanl:
+ if (visitUnaryFloatCall(I, ISD::FTAN))
+ return;
+ break;
case LibFunc_sqrt:
case LibFunc_sqrtf:
case LibFunc_sqrtl:
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 2198c23..52da24b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -210,6 +210,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::FCOS: return "fcos";
case ISD::STRICT_FCOS: return "strict_fcos";
case ISD::FSINCOS: return "fsincos";
+ case ISD::FTAN: return "ftan";
+ case ISD::STRICT_FTAN: return "strict_ftan";
case ISD::FTRUNC: return "ftrunc";
case ISD::STRICT_FTRUNC: return "strict_ftrunc";
case ISD::FFLOOR: return "ffloor";
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index f856c8a..e1c1a6b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -8427,10 +8427,6 @@ SDValue TargetLowering::expandFMINIMUM_FMAXIMUM(SDNode *N,
bool IsMax = Opc == ISD::FMAXIMUM;
SDNodeFlags Flags = N->getFlags();
- if (VT.isVector() &&
- isOperationLegalOrCustomOrPromote(Opc, VT.getScalarType()))
- return SDValue();
-
// First, implement comparison not propagating NaN. If no native fmin or fmax
// available, use plain select with setcc instead.
SDValue MinMax;
@@ -8447,6 +8443,9 @@ SDValue TargetLowering::expandFMINIMUM_FMAXIMUM(SDNode *N,
} else if (isOperationLegalOrCustom(CompOpc, VT)) {
MinMax = DAG.getNode(CompOpc, DL, VT, LHS, RHS, Flags);
} else {
+ if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))
+ return DAG.UnrollVectorOp(N);
+
// NaN (if exists) will be propagated later, so orderness doesn't matter.
SDValue Compare =
DAG.getSetCC(DL, CCVT, LHS, RHS, IsMax ? ISD::SETGT : ISD::SETLT);
@@ -9159,6 +9158,7 @@ SDValue TargetLowering::expandABS(SDNode *N, SelectionDAG &DAG,
if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
isOperationLegal(ISD::SMAX, VT)) {
SDValue Zero = DAG.getConstant(0, dl, VT);
+ Op = DAG.getFreeze(Op);
return DAG.getNode(ISD::SMAX, dl, VT, Op,
DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
}
@@ -9175,8 +9175,8 @@ SDValue TargetLowering::expandABS(SDNode *N, SelectionDAG &DAG,
// 0 - abs(x) -> smin(x, sub(0,x))
if (IsNegative && isOperationLegal(ISD::SUB, VT) &&
isOperationLegal(ISD::SMIN, VT)) {
- Op = DAG.getFreeze(Op);
SDValue Zero = DAG.getConstant(0, dl, VT);
+ Op = DAG.getFreeze(Op);
return DAG.getNode(ISD::SMIN, dl, VT, Op,
DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
}
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 3aec704..8240a1f 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -141,6 +141,7 @@ void TargetLoweringBase::InitLibcalls(const Triple &TT) {
setLibcallName(RTLIB::EXP10_F128, "exp10f128");
setLibcallName(RTLIB::SIN_F128, "sinf128");
setLibcallName(RTLIB::COS_F128, "cosf128");
+ setLibcallName(RTLIB::TAN_F128, "tanf128");
setLibcallName(RTLIB::SINCOS_F128, "sincosf128");
setLibcallName(RTLIB::POW_F128, "powf128");
setLibcallName(RTLIB::POW_FINITE_F128, "__powf128_finite");
@@ -1015,7 +1016,8 @@ void TargetLoweringBase::initActions() {
setOperationAction({ISD::FCBRT, ISD::FLOG, ISD::FLOG2, ISD::FLOG10, ISD::FEXP,
ISD::FEXP2, ISD::FEXP10, ISD::FFLOOR, ISD::FNEARBYINT,
ISD::FCEIL, ISD::FRINT, ISD::FTRUNC, ISD::LROUND,
- ISD::LLROUND, ISD::LRINT, ISD::LLRINT, ISD::FROUNDEVEN},
+ ISD::LLROUND, ISD::LRINT, ISD::LLRINT, ISD::FROUNDEVEN,
+ ISD::FTAN},
{MVT::f32, MVT::f64, MVT::f128}, Expand);
// Default ISD::TRAP to expand (which turns it into abort).
diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
index 0046220..f44a6a4 100644
--- a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
+++ b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
@@ -1183,8 +1183,7 @@ void RuntimeDyldELF::resolveAArch64Branch(unsigned SectionID,
StubMap::const_iterator i = Stubs.find(Value);
if (i != Stubs.end()) {
resolveRelocation(Section, Offset,
- (uint64_t)Section.getAddressWithOffset(i->second),
- RelType, 0);
+ Section.getLoadAddressWithOffset(i->second), RelType, 0);
LLVM_DEBUG(dbgs() << " Stub function found\n");
} else if (!resolveAArch64ShortBranch(SectionID, RelI, Value)) {
// Create a new stub function.
@@ -1217,8 +1216,7 @@ void RuntimeDyldELF::resolveAArch64Branch(unsigned SectionID,
addRelocationForSection(REmovk_g0, Value.SectionID);
}
resolveRelocation(Section, Offset,
- reinterpret_cast<uint64_t>(Section.getAddressWithOffset(
- Section.getStubOffset())),
+ Section.getLoadAddressWithOffset(Section.getStubOffset()),
RelType, 0);
Section.advanceStubOffset(getMaxStubSize());
}
@@ -1349,10 +1347,9 @@ RuntimeDyldELF::processRelocationRef(
// Look for an existing stub.
StubMap::const_iterator i = Stubs.find(Value);
if (i != Stubs.end()) {
- resolveRelocation(
- Section, Offset,
- reinterpret_cast<uint64_t>(Section.getAddressWithOffset(i->second)),
- RelType, 0);
+ resolveRelocation(Section, Offset,
+ Section.getLoadAddressWithOffset(i->second), RelType,
+ 0);
LLVM_DEBUG(dbgs() << " Stub function found\n");
} else {
// Create a new stub function.
@@ -1367,10 +1364,10 @@ RuntimeDyldELF::processRelocationRef(
else
addRelocationForSection(RE, Value.SectionID);
- resolveRelocation(Section, Offset, reinterpret_cast<uint64_t>(
- Section.getAddressWithOffset(
- Section.getStubOffset())),
- RelType, 0);
+ resolveRelocation(
+ Section, Offset,
+ Section.getLoadAddressWithOffset(Section.getStubOffset()), RelType,
+ 0);
Section.advanceStubOffset(getMaxStubSize());
}
} else {
@@ -1609,8 +1606,7 @@ RuntimeDyldELF::processRelocationRef(
if (i != Stubs.end()) {
// Symbol function stub already created, just relocate to it
resolveRelocation(Section, Offset,
- reinterpret_cast<uint64_t>(
- Section.getAddressWithOffset(i->second)),
+ Section.getLoadAddressWithOffset(i->second),
RelType, 0);
LLVM_DEBUG(dbgs() << " Stub function found\n");
} else {
@@ -1652,10 +1648,10 @@ RuntimeDyldELF::processRelocationRef(
addRelocationForSection(REl, Value.SectionID);
}
- resolveRelocation(Section, Offset, reinterpret_cast<uint64_t>(
- Section.getAddressWithOffset(
- Section.getStubOffset())),
- RelType, 0);
+ resolveRelocation(
+ Section, Offset,
+ Section.getLoadAddressWithOffset(Section.getStubOffset()),
+ RelType, 0);
Section.advanceStubOffset(getMaxStubSize());
}
if (IsExtern || (AbiVariant == 2 && Value.SectionID != SectionID)) {
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 2c4b452..92213e1 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -3961,7 +3961,7 @@ static int32_t computeHeuristicUnrollFactor(CanonicalLoopInfo *CLI) {
UnrollCostEstimator UCE(L, TTI, EphValues, UP.BEInsns);
// Loop is not unrollable if the loop contains certain instructions.
- if (!UCE.canUnroll() || UCE.Convergent) {
+ if (!UCE.canUnroll()) {
LLVM_DEBUG(dbgs() << "Loop not considered unrollable\n");
return 1;
}
diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp
index 7a5f18f..0bf8be9 100644
--- a/llvm/lib/IR/AsmWriter.cpp
+++ b/llvm/lib/IR/AsmWriter.cpp
@@ -326,6 +326,9 @@ static void PrintCallingConv(unsigned cc, raw_ostream &Out) {
case CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X0:
Out << "aarch64_sme_preservemost_from_x0";
break;
+ case CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X1:
+ Out << "aarch64_sme_preservemost_from_x1";
+ break;
case CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2:
Out << "aarch64_sme_preservemost_from_x2";
break;
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index a7ed2de..2f4b835 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -5368,8 +5368,8 @@ std::string llvm::UpgradeDataLayoutString(StringRef DL, StringRef TT) {
return DL.empty() ? std::string("G1") : (DL + "-G1").str();
}
- if (T.isRISCV64()) {
- // Make i32 a native type for 64-bit RISC-V.
+ if (T.isLoongArch64() || T.isRISCV64()) {
+ // Make i32 a native type for 64-bit LoongArch and RISC-V.
auto I = DL.find("-n64-");
if (I != StringRef::npos)
return (DL.take_front(I) + "-n32:64-" + DL.drop_front(I + 5)).str();
diff --git a/llvm/lib/MC/WasmObjectWriter.cpp b/llvm/lib/MC/WasmObjectWriter.cpp
index 985f9351..788e92f 100644
--- a/llvm/lib/MC/WasmObjectWriter.cpp
+++ b/llvm/lib/MC/WasmObjectWriter.cpp
@@ -877,7 +877,7 @@ void WasmObjectWriter::writeImportSection(ArrayRef<wasm::WasmImport> Imports,
break;
case wasm::WASM_EXTERNAL_TABLE:
W->OS << char(Import.Table.ElemType);
- encodeULEB128(0, W->OS); // flags
+ encodeULEB128(Import.Table.Limits.Flags, W->OS);
encodeULEB128(NumElements, W->OS); // initial
break;
case wasm::WASM_EXTERNAL_TAG:
@@ -1022,7 +1022,8 @@ void WasmObjectWriter::writeElemSection(
encodeULEB128(TableNumber, W->OS); // the table number
// init expr for starting offset
- W->OS << char(wasm::WASM_OPCODE_I32_CONST);
+ W->OS << char(is64Bit() ? wasm::WASM_OPCODE_I64_CONST
+ : wasm::WASM_OPCODE_I32_CONST);
encodeSLEB128(InitialTableOffset, W->OS);
W->OS << char(wasm::WASM_OPCODE_END);
diff --git a/llvm/lib/Object/ELFObjectFile.cpp b/llvm/lib/Object/ELFObjectFile.cpp
index 2b6bdbf..cbc55a1 100644
--- a/llvm/lib/Object/ELFObjectFile.cpp
+++ b/llvm/lib/Object/ELFObjectFile.cpp
@@ -586,6 +586,8 @@ StringRef ELFObjectFileBase::getAMDGPUCPUName() const {
return "gfx1150";
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1151:
return "gfx1151";
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1152:
+ return "gfx1152";
// AMDGCN GFX12.
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1200:
diff --git a/llvm/lib/ObjectYAML/ELFYAML.cpp b/llvm/lib/ObjectYAML/ELFYAML.cpp
index 8e2a948..0fee299 100644
--- a/llvm/lib/ObjectYAML/ELFYAML.cpp
+++ b/llvm/lib/ObjectYAML/ELFYAML.cpp
@@ -611,6 +611,7 @@ void ScalarBitSetTraits<ELFYAML::ELF_EF>::bitset(IO &IO,
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1103, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1150, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1151, EF_AMDGPU_MACH);
+ BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1152, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1200, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1201, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC, EF_AMDGPU_MACH);
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index 316d05b..8dd060d 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -139,6 +139,7 @@
#include "llvm/Transforms/IPO/DeadArgumentElimination.h"
#include "llvm/Transforms/IPO/ElimAvailExtern.h"
#include "llvm/Transforms/IPO/EmbedBitcodePass.h"
+#include "llvm/Transforms/IPO/ExpandVariadics.h"
#include "llvm/Transforms/IPO/ForceFunctionAttrs.h"
#include "llvm/Transforms/IPO/FunctionAttrs.h"
#include "llvm/Transforms/IPO/FunctionImport.h"
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index 50682ca..dad9714 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -59,6 +59,7 @@ MODULE_PASS("dot-callgraph", CallGraphDOTPrinterPass())
MODULE_PASS("dxil-upgrade", DXILUpgradePass())
MODULE_PASS("elim-avail-extern", EliminateAvailableExternallyPass())
MODULE_PASS("extract-blocks", BlockExtractorPass({}, false))
+MODULE_PASS("expand-variadics", ExpandVariadicsPass(ExpandVariadicsMode::Disable))
MODULE_PASS("forceattrs", ForceFunctionAttrsPass())
MODULE_PASS("function-import", FunctionImportPass())
MODULE_PASS("globalopt", GlobalOptPass())
diff --git a/llvm/lib/ProfileData/MemProfReader.cpp b/llvm/lib/ProfileData/MemProfReader.cpp
index fc3be71..693897f 100644
--- a/llvm/lib/ProfileData/MemProfReader.cpp
+++ b/llvm/lib/ProfileData/MemProfReader.cpp
@@ -690,7 +690,7 @@ Error RawMemProfReader::readNextRecord(
return F;
auto Iter = this->GuidToSymbolName.find(F.Function);
assert(Iter != this->GuidToSymbolName.end());
- F.SymbolName = Iter->getSecond();
+ F.SymbolName = std::make_unique<std::string>(Iter->getSecond());
return F;
};
return MemProfReader::readNextRecord(GuidRecord, IdToFrameCallback);
diff --git a/llvm/lib/Support/VirtualFileSystem.cpp b/llvm/lib/Support/VirtualFileSystem.cpp
index fcefdef9..7360901 100644
--- a/llvm/lib/Support/VirtualFileSystem.cpp
+++ b/llvm/lib/Support/VirtualFileSystem.cpp
@@ -867,21 +867,16 @@ bool InMemoryFileSystem::addFile(const Twine &P, time_t ModificationTime,
// Any intermediate directories we create should be accessible by
// the owner, even if Perms says otherwise for the final path.
const auto NewDirectoryPerms = ResolvedPerms | sys::fs::owner_all;
+
+ StringRef Name = *I;
while (true) {
- StringRef Name = *I;
- detail::InMemoryNode *Node = Dir->getChild(Name);
+ Name = *I;
++I;
+ if (I == E)
+ break;
+ detail::InMemoryNode *Node = Dir->getChild(Name);
if (!Node) {
- if (I == E) {
- // End of the path.
- Dir->addChild(
- Name, MakeNode({Dir->getUniqueID(), Path, Name, ModificationTime,
- std::move(Buffer), ResolvedUser, ResolvedGroup,
- ResolvedType, ResolvedPerms}));
- return true;
- }
-
- // Create a new directory. Use the path up to here.
+ // This isn't the last element, so we create a new directory.
Status Stat(
StringRef(Path.str().begin(), Name.end() - Path.str().begin()),
getDirectoryID(Dir->getUniqueID(), Name),
@@ -891,27 +886,33 @@ bool InMemoryFileSystem::addFile(const Twine &P, time_t ModificationTime,
Name, std::make_unique<detail::InMemoryDirectory>(std::move(Stat))));
continue;
}
+ // Creating file under another file.
+ if (!isa<detail::InMemoryDirectory>(Node))
+ return false;
+ Dir = cast<detail::InMemoryDirectory>(Node);
+ }
+ detail::InMemoryNode *Node = Dir->getChild(Name);
+ if (!Node) {
+ Dir->addChild(Name,
+ MakeNode({Dir->getUniqueID(), Path, Name, ModificationTime,
+ std::move(Buffer), ResolvedUser, ResolvedGroup,
+ ResolvedType, ResolvedPerms}));
+ return true;
+ }
+ if (isa<detail::InMemoryDirectory>(Node))
+ return ResolvedType == sys::fs::file_type::directory_file;
- if (auto *NewDir = dyn_cast<detail::InMemoryDirectory>(Node)) {
- Dir = NewDir;
- } else {
- assert((isa<detail::InMemoryFile>(Node) ||
- isa<detail::InMemoryHardLink>(Node)) &&
- "Must be either file, hardlink or directory!");
-
- // Trying to insert a directory in place of a file.
- if (I != E)
- return false;
+ assert((isa<detail::InMemoryFile>(Node) ||
+ isa<detail::InMemoryHardLink>(Node)) &&
+ "Must be either file, hardlink or directory!");
- // Return false only if the new file is different from the existing one.
- if (auto Link = dyn_cast<detail::InMemoryHardLink>(Node)) {
- return Link->getResolvedFile().getBuffer()->getBuffer() ==
- Buffer->getBuffer();
- }
- return cast<detail::InMemoryFile>(Node)->getBuffer()->getBuffer() ==
- Buffer->getBuffer();
- }
+ // Return false only if the new file is different from the existing one.
+ if (auto *Link = dyn_cast<detail::InMemoryHardLink>(Node)) {
+ return Link->getResolvedFile().getBuffer()->getBuffer() ==
+ Buffer->getBuffer();
}
+ return cast<detail::InMemoryFile>(Node)->getBuffer()->getBuffer() ==
+ Buffer->getBuffer();
}
bool InMemoryFileSystem::addFile(const Twine &P, time_t ModificationTime,
diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td
index 4b2ce0d..5708b61 100644
--- a/llvm/lib/Target/AArch64/AArch64.td
+++ b/llvm/lib/Target/AArch64/AArch64.td
@@ -85,6 +85,10 @@ def SMEUnsupported : AArch64Unsupported {
SME2Unsupported.F);
}
+def MTEUnsupported : AArch64Unsupported {
+ let F = [HasMTE];
+}
+
let F = [HasPAuth, HasPAuthLR] in
def PAUnsupported : AArch64Unsupported;
@@ -109,6 +113,7 @@ include "AArch64SchedNeoverseN1.td"
include "AArch64SchedNeoverseN2.td"
include "AArch64SchedNeoverseV1.td"
include "AArch64SchedNeoverseV2.td"
+include "AArch64SchedOryon.td"
include "AArch64Processors.td"
diff --git a/llvm/lib/Target/AArch64/AArch64CallingConvention.td b/llvm/lib/Target/AArch64/AArch64CallingConvention.td
index 32646c6..941990c 100644
--- a/llvm/lib/Target/AArch64/AArch64CallingConvention.td
+++ b/llvm/lib/Target/AArch64/AArch64CallingConvention.td
@@ -589,6 +589,14 @@ def CSR_AArch64_SME_ABI_Support_Routines_PreserveMost_From_X0
(sequence "X%u",19, 28),
LR, FP)>;
+// SME ABI support routines such as __arm_get_current_vg preserve most registers.
+def CSR_AArch64_SME_ABI_Support_Routines_PreserveMost_From_X1
+ : CalleeSavedRegs<(add (sequence "Z%u", 0, 31),
+ (sequence "P%u", 0, 15),
+ (sequence "X%u", 1, 15),
+ (sequence "X%u",19, 28),
+ LR, FP)>;
+
// SME ABI support routines __arm_sme_state preserves most registers.
def CSR_AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2
: CalleeSavedRegs<(add (sequence "Z%u", 0, 31),
diff --git a/llvm/lib/Target/AArch64/AArch64Processors.td b/llvm/lib/Target/AArch64/AArch64Processors.td
index 8d16709..a759efc 100644
--- a/llvm/lib/Target/AArch64/AArch64Processors.td
+++ b/llvm/lib/Target/AArch64/AArch64Processors.td
@@ -617,6 +617,27 @@ def TuneAmpere1B : SubtargetFeature<"ampere1b", "ARMProcFamily", "Ampere1B",
FeatureLdpAlignedOnly,
FeatureStpAlignedOnly]>;
+def TuneOryon : SubtargetFeature<"oryon-1", "ARMProcFamily",
+ "Oryon",
+ "Nuvia Inc Oryon processors", [
+ FeatureCrypto,
+ FeatureFPARMv8,
+ FeatureNEON,
+ FeatureFuseAES,
+ FeatureFuseAdrpAdd,
+ FeatureEnableSelectOptimize,
+ FeatureFuseCryptoEOR,
+ FeatureFuseAddress,
+ FeatureSM4,
+ FeatureSHA2,
+ FeatureSHA3,
+ FeatureAES,
+ FeatureFullFP16,
+ FeatureFP16FML,
+ FeaturePerfMon,
+ FeatureSPE,
+ FeaturePostRAScheduler,
+ HasV8_6aOps]>;
def ProcessorFeatures {
list<SubtargetFeature> A53 = [HasV8_0aOps, FeatureCRC, FeatureCrypto,
@@ -806,6 +827,11 @@ def ProcessorFeatures {
FeatureSHA3, FeatureAES, FeatureCSSC,
FeatureWFxT, FeatureFullFP16];
+ list<SubtargetFeature> Oryon = [HasV8_6aOps, FeatureNEON, FeaturePerfMon,
+ FeatureCrypto, FeatureRandGen,
+ FeaturePAuth, FeatureSM4, FeatureSHA2,
+ FeatureSHA3, FeatureAES];
+
// ETE and TRBE are future architecture extensions. We temporarily enable them
// by default for users targeting generic AArch64. The extensions do not
// affect code generated by the compiler and can be used only by explicitly
@@ -988,3 +1014,7 @@ def : ProcessorModel<"ampere1a", Ampere1Model, ProcessorFeatures.Ampere1A,
def : ProcessorModel<"ampere1b", Ampere1BModel, ProcessorFeatures.Ampere1B,
[TuneAmpere1B]>;
+
+// Qualcomm Oryon
+def : ProcessorModel<"oryon-1", OryonModel, ProcessorFeatures.Oryon,
+ [TuneOryon]>;
diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
index e97d7e3..cc50b59 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -107,13 +107,22 @@ AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
if (MF->getFunction().getCallingConv() ==
CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X0)
report_fatal_error(
- "Calling convention AArch64_SME_ABI_Support_Routines_PreserveMost_From_X0 is "
- "only supported to improve calls to SME ACLE save/restore/disable-za "
+ "Calling convention "
+ "AArch64_SME_ABI_Support_Routines_PreserveMost_From_X0 is only "
+ "supported to improve calls to SME ACLE save/restore/disable-za "
"functions, and is not intended to be used beyond that scope.");
if (MF->getFunction().getCallingConv() ==
+ CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X1)
+ report_fatal_error(
+ "Calling convention "
+ "AArch64_SME_ABI_Support_Routines_PreserveMost_From_X1 is "
+ "only supported to improve calls to SME ACLE __arm_get_current_vg "
+ "function, and is not intended to be used beyond that scope.");
+ if (MF->getFunction().getCallingConv() ==
CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2)
report_fatal_error(
- "Calling convention AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2 is "
+ "Calling convention "
+ "AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2 is "
"only supported to improve calls to SME ACLE __arm_sme_state "
"and is not intended to be used beyond that scope.");
if (MF->getSubtarget<AArch64Subtarget>().getTargetLowering()
@@ -153,13 +162,22 @@ AArch64RegisterInfo::getDarwinCalleeSavedRegs(const MachineFunction *MF) const {
if (MF->getFunction().getCallingConv() ==
CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X0)
report_fatal_error(
- "Calling convention AArch64_SME_ABI_Support_Routines_PreserveMost_From_X0 is "
+ "Calling convention "
+ "AArch64_SME_ABI_Support_Routines_PreserveMost_From_X0 is "
"only supported to improve calls to SME ACLE save/restore/disable-za "
"functions, and is not intended to be used beyond that scope.");
if (MF->getFunction().getCallingConv() ==
+ CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X1)
+ report_fatal_error(
+ "Calling convention "
+ "AArch64_SME_ABI_Support_Routines_PreserveMost_From_X1 is "
+ "only supported to improve calls to SME ACLE __arm_get_current_vg "
+ "function, and is not intended to be used beyond that scope.");
+ if (MF->getFunction().getCallingConv() ==
CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2)
report_fatal_error(
- "Calling convention AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2 is "
+ "Calling convention "
+ "AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2 is "
"only supported to improve calls to SME ACLE __arm_sme_state "
"and is not intended to be used beyond that scope.");
if (MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS)
@@ -236,6 +254,8 @@ AArch64RegisterInfo::getDarwinCallPreservedMask(const MachineFunction &MF,
"Calling convention SVE_VectorCall is unsupported on Darwin.");
if (CC == CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X0)
return CSR_AArch64_SME_ABI_Support_Routines_PreserveMost_From_X0_RegMask;
+ if (CC == CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X1)
+ return CSR_AArch64_SME_ABI_Support_Routines_PreserveMost_From_X1_RegMask;
if (CC == CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2)
return CSR_AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2_RegMask;
if (CC == CallingConv::CFGuard_Check)
@@ -282,6 +302,8 @@ AArch64RegisterInfo::getCallPreservedMask(const MachineFunction &MF,
: CSR_AArch64_SVE_AAPCS_RegMask;
if (CC == CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X0)
return CSR_AArch64_SME_ABI_Support_Routines_PreserveMost_From_X0_RegMask;
+ if (CC == CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X1)
+ return CSR_AArch64_SME_ABI_Support_Routines_PreserveMost_From_X1_RegMask;
if (CC == CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2)
return CSR_AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2_RegMask;
if (CC == CallingConv::CFGuard_Check)
@@ -643,6 +665,7 @@ bool AArch64RegisterInfo::isArgumentRegister(const MachineFunction &MF,
case CallingConv::AArch64_VectorCall:
case CallingConv::AArch64_SVE_VectorCall:
case CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X0:
+ case CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X1:
case CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2:
if (STI.isTargetWindows())
return HasReg(CC_AArch64_Win64PCS_ArgRegs, Reg);
diff --git a/llvm/lib/Target/AArch64/AArch64SchedOryon.td b/llvm/lib/Target/AArch64/AArch64SchedOryon.td
new file mode 100644
index 0000000..09d1af2
--- /dev/null
+++ b/llvm/lib/Target/AArch64/AArch64SchedOryon.td
@@ -0,0 +1,1659 @@
+//=- AArch64SchedOryon.td - Qualcomm Oryon CPU 001 ---*- tablegen -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the scheduling model for Qualcomm Oryon
+// family of processors.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Pipeline Description.
+
+def OryonModel : SchedMachineModel {
+ let IssueWidth = 14;
+ let MicroOpBufferSize = 376;
+ let LoadLatency = 4;
+ let MispredictPenalty = 13; // 13 cycles for mispredicted branch.
+ let LoopMicroOpBufferSize = 0; // Do not have a LoopMicroOpBuffer
+ let PostRAScheduler = 1; // Using PostRA sched.
+ let CompleteModel = 1;
+
+ list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
+ SMEUnsupported.F,
+ MTEUnsupported.F,
+ PAUnsupported.F,
+ [HasPAuth, HasCSSC]);
+}
+
+let SchedModel = OryonModel in {
+
+// Issue ports.
+// IXU has 6 ports p0 ~ p5
+// LSU has 4 ports p6 ~ p9(ls0 ~ ls3), p10/p11(std0, std1) has to work with ls0~ls3
+// VXU has 4 ports p12 ~ p15
+
+// cross IXU/LSU/VXU resource group for FMOV P41 of VXU
+// I2V
+def ORYONI4FP0 : ProcResource<1>;
+def ORYONI5FP1 : ProcResource<1>;
+// V2I
+def ORYONFP0I4 : ProcResource<1>;
+def ORYONFP1I5 : ProcResource<1>;
+
+// store 1 for normal store instructions
+def ORYONST0 : ProcResource<1>;
+// store 2 for normal store instructions
+def ORYONST1 : ProcResource<1>;
+
+// Port 0: ALU/Indirect/Direct Branch.
+def ORYONP0 : ProcResource<1>;
+
+// Port 1: ALU/Direct Branch.
+def ORYONP1 : ProcResource<1>;
+
+// Port 2: ALU.
+def ORYONP2 : ProcResource<1>;
+
+// Port 3: ALU.
+def ORYONP3 : ProcResource<1>;
+
+// Port 4: ALU.
+def ORYONP4 : ProcResource<1> {
+ let Super = ORYONI4FP0;
+ let Super = ORYONFP0I4; }
+
+// Port 5: ALU.
+def ORYONP5 : ProcResource<1> {
+ let Super = ORYONI5FP1;
+ let Super = ORYONFP1I5; }
+
+// Port 6: Load/Store. LS0
+def ORYONP6 : ProcResource<1> {
+ let Super = ORYONST0; }
+
+// Port 7: Load/store. LS1
+def ORYONP7 : ProcResource<1> {
+ let Super = ORYONST0; }
+
+// Port 8: Load/Store. LS2
+def ORYONP8 : ProcResource<1> {
+ let Super = ORYONST1; }
+
+// Port 9: Load/store. LS3
+def ORYONP9 : ProcResource<1> {
+ let Super = ORYONST1; }
+
+// Port 10: Load/Store. STD0
+def ORYONP10SD0 : ProcResource<1> {
+ let Super = ORYONST0; }
+
+// Port 11: Load/store. STD1
+def ORYONP11SD1 : ProcResource<1> {
+ let Super = ORYONST1; }
+
+// Port 12: FP/Neon/SIMD/Crypto.
+def ORYONP12FP0 : ProcResource<1> {
+ let Super = ORYONI4FP0;
+ let Super = ORYONFP0I4; }
+
+// Port 13: FP/Neon/SIMD/Crypto.
+def ORYONP13FP1 : ProcResource<1> {
+ let Super = ORYONI5FP1;
+ let Super = ORYONFP1I5; }
+
+// Port 14: FP/Neon/SIMD/Crypto.
+def ORYONP14FP2 : ProcResource<1>;
+
+// Port 15: FP/Neon/SIMD/Crypto.
+def ORYONP15FP3 : ProcResource<1>;
+
+// Define groups for the functional units on each issue port. Each group
+// created will be used by a WriteRes.
+
+// Integer add/shift/logical/misc. instructions on port I0/I1/I2/I3/I4/I5.
+def ORYONI012345 : ProcResGroup<[ORYONP0, ORYONP1, ORYONP2,
+ ORYONP3, ORYONP4, ORYONP5]> {
+ let BufferSize = 120;
+}
+
+// Direct Conditional Branch instructions on ports I0/I1.
+def ORYONI01 : ProcResGroup<[ORYONP0, ORYONP1]> {
+ let BufferSize = 40;
+}
+
+// Indirect/crypto Conditional Branch instructions on ports I0.
+def ORYONI0 : ProcResGroup<[ORYONP0]> {
+ let BufferSize = 20;
+}
+
+// Crypto/CRC/PAU instructions on ports I2.
+def ORYONI2 : ProcResGroup<[ORYONP2]> {
+ let BufferSize = 20;
+}
+
+// Multiply/Multiply-ADD instructions on ports I4/I5.
+def ORYONI45 : ProcResGroup<[ORYONP4, ORYONP5]> {
+ let BufferSize = 40;
+}
+
+// Divide instructions on ports I5.
+def ORYONI5 : ProcResGroup<[ORYONP5]> {
+ let BufferSize = 20;
+}
+
+// Comparison instructions on ports I0/I1/I2/I3.
+def ORYONI0123 : ProcResGroup<[ORYONP0, ORYONP1,
+ ORYONP2, ORYONP3]> {
+ let BufferSize = 80;
+}
+
+// Load instructions on ports P6/P7/P8/P9.
+def ORYONLD : ProcResGroup<[ORYONP6, ORYONP7, ORYONP8, ORYONP9]> {
+ let BufferSize = 64;
+}
+
+// Store instructions on combo of STA/STD pipes
+def ORYONST : ProcResGroup<[ORYONST0, ORYONST1]> {
+ let BufferSize = 64;
+}
+
+// Arithmetic and CRYP-AED ASIMD/FP instructions on ports FP0/FP1/FP2/FP3.
+def ORYONFP0123 : ProcResGroup<[ORYONP12FP0, ORYONP13FP1,
+ ORYONP14FP2, ORYONP15FP3]> {
+ let BufferSize = 192;
+}
+
+// FP Comparison and F/I move instructions on ports FP0/FP1.
+def ORYONFP01 : ProcResGroup<[ORYONP12FP0, ORYONP13FP1]> {
+ let BufferSize = 96;
+}
+
+// FDIV instructions on ports FP3.
+def ORYONFP3 : ProcResGroup<[ORYONP15FP3]> {
+ let BufferSize = 48;
+}
+
+// CRYP-SHA instructions on ports FP1.
+def ORYONFP1 : ProcResGroup<[ORYONP14FP2]> {
+ let BufferSize = 48;
+}
+
+def ORYONFP2 : ProcResGroup<[ORYONP14FP2]> {
+ let BufferSize = 48;
+}
+
+// Reciprocal, Squre root on FP0.
+def ORYONFP0 : ProcResGroup<[ORYONP12FP0]> {
+ let BufferSize = 48;
+}
+
+// cross IXU/LSU/VXU resource group for FMOV P41 of VXU
+// I2V
+def ORYONI2V : ProcResGroup<[ORYONI4FP0, ORYONI5FP1]> {
+ let BufferSize = 40;
+}
+
+// V2I
+def ORYONV2I : ProcResGroup<[ORYONFP0I4, ORYONFP1I5]> {
+ let BufferSize = 96;
+}
+
+// Define commonly used write types for InstRW specializations.
+// All definitions follow the format: ORYONWrite_<NumCycles>Cyc_<Resources>.
+
+// Because of the complexity of Oryon CPU, we skip the following
+// generic definitions and define each instruction specifically
+
+// These WriteRes entries are not used in the Falkor sched model.
+def : WriteRes<WriteImm, []> { let Unsupported = 1; }
+def : WriteRes<WriteI, []> { let Unsupported = 1; }
+def : WriteRes<WriteISReg, []> { let Unsupported = 1; }
+def : WriteRes<WriteIEReg, []> { let Unsupported = 1; }
+def : WriteRes<WriteExtr, []> { let Unsupported = 1; }
+def : WriteRes<WriteIS, []> { let Unsupported = 1; }
+def : WriteRes<WriteID32, []> { let Unsupported = 1; }
+def : WriteRes<WriteID64, []> { let Unsupported = 1; }
+def : WriteRes<WriteIM32, []> { let Unsupported = 1; }
+def : WriteRes<WriteIM64, []> { let Unsupported = 1; }
+def : WriteRes<WriteBr, []> { let Unsupported = 1; }
+def : WriteRes<WriteBrReg, []> { let Unsupported = 1; }
+def : WriteRes<WriteLD, []> { let Unsupported = 1; }
+def : WriteRes<WriteST, []> { let Unsupported = 1; }
+def : WriteRes<WriteSTP, []> { let Unsupported = 1; }
+def : WriteRes<WriteAdr, []> { let Unsupported = 1; }
+def : WriteRes<WriteLDIdx, []> { let Unsupported = 1; }
+def : WriteRes<WriteSTIdx, []> { let Unsupported = 1; }
+def : WriteRes<WriteF, []> { let Unsupported = 1; }
+def : WriteRes<WriteFCmp, []> { let Unsupported = 1; }
+def : WriteRes<WriteFCvt, []> { let Unsupported = 1; }
+def : WriteRes<WriteFCopy, []> { let Unsupported = 1; }
+def : WriteRes<WriteFImm, []> { let Unsupported = 1; }
+def : WriteRes<WriteFMul, []> { let Unsupported = 1; }
+def : WriteRes<WriteFDiv, []> { let Unsupported = 1; }
+def : WriteRes<WriteVd, []> { let Unsupported = 1; }
+def : WriteRes<WriteVq, []> { let Unsupported = 1; }
+def : WriteRes<WriteVLD, []> { let Unsupported = 1; }
+def : WriteRes<WriteVST, []> { let Unsupported = 1; }
+def : WriteRes<WriteSys, []> { let Unsupported = 1; }
+def : WriteRes<WriteBarrier, []> { let Unsupported = 1; }
+def : WriteRes<WriteHint, []> { let Unsupported = 1; }
+def : WriteRes<WriteLDHi, []> { let Unsupported = 1; }
+def : WriteRes<WriteAtomic, []> { let Unsupported = 1; }
+
+// These ReadAdvance entries will be defined in later implementation
+def : ReadAdvance<ReadI, 0>;
+def : ReadAdvance<ReadISReg, 0>;
+def : ReadAdvance<ReadIEReg, 0>;
+def : ReadAdvance<ReadIM, 0>;
+def : ReadAdvance<ReadIMA, 0>;
+def : ReadAdvance<ReadID, 0>;
+def : ReadAdvance<ReadExtrHi, 0>;
+def : ReadAdvance<ReadAdrBase, 0>;
+def : ReadAdvance<ReadVLD, 0>;
+def : ReadAdvance<ReadST, 0>;
+
+
+//IXU resource definition
+// 1 cycles NO pipe
+def ORYONWrite_1Cyc_NONE : SchedWriteRes<[]>;
+
+// 1 cycles on I01.
+def ORYONWrite_1Cyc_I01 : SchedWriteRes<[ORYONI01]>;
+
+def ORYONWrite_1Cyc_2Uops_I01 : SchedWriteRes<[ORYONI01]> {
+ let NumMicroOps = 2;
+}
+
+def ORYONWrite_1Cyc_I0 : SchedWriteRes<[ORYONI0]>;
+
+// 7 cycles on I2. PAC*/AUT* instructions
+def ORYONWrite_7Cyc_I2 : SchedWriteRes<[ORYONI2]> {
+ let Latency = 7;
+}
+
+// 7 cycles on I2. PAC*/AUT* instructions
+def ORYONWrite_7Cyc_3Uops_I2 : SchedWriteRes<[ORYONI2]> {
+ let Latency = 7;
+ let NumMicroOps = 3;
+}
+
+// 9 (7+1+1) cycles on I2 and I0/I1, I0. Authentication branch instructions
+// these instructions are broken down to three uops
+// a. PtrAuth on pipe 2 taking 7 cycles
+// b. Link Register Update on pipes 0 and 1 taking 1 cycle
+// c. Indirect branch on pipe 0 taking 1 cycle
+
+def ORYONWrite_9Cyc_I012 : SchedWriteRes<[ORYONI2, ORYONI01]> {
+ let Latency = 9;
+ let NumMicroOps = 3;
+}
+
+// 3 cycles on I2. CRC32 and CRC32C instructions
+def ORYONWrite_3Cyc_I2 : SchedWriteRes<[ORYONI2]> {
+ let Latency = 3;
+}
+
+// 1 cycle on I012345
+def ORYONWrite_1Cyc_I012345 : SchedWriteRes<[ORYONI012345]>;
+
+// 1 cycle on I0123
+def ORYONWrite_1Cyc_I0123 : SchedWriteRes<[ORYONI0123]>;
+
+// 1 cycle on 2 of I012345
+def ORYONWrite_1Cyc_I012345_I012345 :
+SchedWriteRes<[ORYONI012345, ORYONI012345]> ;
+
+// 2 cycle on 2 of I0123 with ReleaseAtCycles
+def ORYONWrite_2Cyc_I0123_I0123_RC :
+SchedWriteRes<[ORYONI0123, ORYONI0123]> {
+ let Latency = 2;
+ let ReleaseAtCycles = [2,2];
+}
+
+// 2 cycle on 2 of I012345
+def ORYONWrite_2Cyc_I012345_I012345_RC :
+SchedWriteRes<[ORYONI012345, ORYONI012345]> {
+ let Latency = 2;
+ let ReleaseAtCycles = [2,2];
+}
+
+// 3 cycle on 2 of I45
+def ORYONWrite_3Cyc_I45_I45_RC :
+SchedWriteRes<[ORYONI45, ORYONI45]> {
+ let Latency = 3;
+ let ReleaseAtCycles = [2,2];
+}
+
+// 3 cycle on I45
+def ORYONWrite_3Cyc_I45 : SchedWriteRes<[ORYONI45]> {
+ let Latency = 3;
+}
+
+// 7 cycle on I2 32-bit integer division
+def ORYONWrite_7Cyc_I2_RC : SchedWriteRes<[ORYONI2]> {
+ let Latency = 7;
+ let ReleaseAtCycles = [2];
+}
+
+// 9 cycle on I2 64-bit integer division
+def ORYONWrite_9Cyc_I2_RC : SchedWriteRes<[ORYONI2]> {
+ let Latency = 9;
+ let ReleaseAtCycles = [2];
+}
+
+// LSU resource definition
+// need to define WriteLDAdr, WriteAdrAdr, WriteLDHi, WriteSTX
+// 4 cycle on LS(P6789)
+def ORYONWrite_4Cyc_LD : SchedWriteRes<[ORYONLD]> {
+ let Latency = 4;
+}
+
+// 4 cycle for Post/Pre inc/dec access, also covers all pair loads Post/Pre
+def ORYONWrite_4Cyc_LD_I012345 : SchedWriteRes<[ORYONLD, ORYONI012345]> {
+ let Latency = 4;
+}
+
+// 5 (4+1) for VXU SIMD access/could also include FP
+// resource might not be correct, as VXU resource not included
+def ORYONWrite_5Cyc_LD : SchedWriteRes<[ORYONLD]> {
+ let Latency = 5;
+}
+
+def ORYONWrite_5Cyc_2Uops_LD : SchedWriteRes<[ORYONLD]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
+
+def ORYONWrite_5Cyc_3Uops_LD : SchedWriteRes<[ORYONLD]> {
+ let Latency = 5;
+ let NumMicroOps = 3;
+}
+
+def ORYONWrite_5Cyc_4Uops_LD : SchedWriteRes<[ORYONLD]> {
+ let Latency = 5;
+ let NumMicroOps = 4;
+}
+
+def ORYONWrite_5Cyc_5Uops_LD : SchedWriteRes<[ORYONLD]> {
+ let Latency = 5;
+ let NumMicroOps = 5;
+}
+
+def ORYONWrite_5Cyc_6Uops_LD : SchedWriteRes<[ORYONLD]> {
+ let Latency = 5;
+ let NumMicroOps = 6;
+}
+
+def ORYONWrite_5Cyc_8Uops_LD : SchedWriteRes<[ORYONLD]> {
+ let Latency = 5;
+ let NumMicroOps = 8;
+}
+
+def ORYONWrite_5Cyc_10Uops_LD : SchedWriteRes<[ORYONLD]> {
+ let Latency = 5;
+ let NumMicroOps = 10;
+}
+
+// 6 cycle for Post/Pre inc/dec access
+def ORYONWrite_5Cyc_LD_I012345 : SchedWriteRes<[ORYONLD, ORYONI012345]> {
+ let Latency = 5;
+}
+
+def ORYONWrite_5Cyc_2Uops_LD_I012345 : SchedWriteRes<[ORYONLD, ORYONI012345]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
+
+def ORYONWrite_5Cyc_3Uops_LD_I012345 : SchedWriteRes<[ORYONLD, ORYONI012345]> {
+ let Latency = 5;
+ let NumMicroOps = 3;
+}
+
+def ORYONWrite_5Cyc_4Uops_LD_I012345 : SchedWriteRes<[ORYONLD, ORYONI012345]> {
+ let Latency = 5;
+ let NumMicroOps = 4;
+}
+
+def ORYONWrite_5Cyc_5Uops_LD_I012345 : SchedWriteRes<[ORYONLD, ORYONI012345]> {
+ let Latency = 5;
+ let NumMicroOps = 5;
+}
+
+def ORYONWrite_5Cyc_6Uops_LD_I012345 : SchedWriteRes<[ORYONLD, ORYONI012345]> {
+ let Latency = 5;
+ let NumMicroOps = 6;
+}
+
+def ORYONWrite_5Cyc_8Uops_LD_I012345 : SchedWriteRes<[ORYONLD, ORYONI012345]> {
+ let Latency = 5;
+ let NumMicroOps = 8;
+}
+
+def ORYONWrite_5Cyc_10Uops_LD_I012345 : SchedWriteRes<[ORYONLD, ORYONI012345]> {
+ let Latency = 5;
+ let NumMicroOps = 10;
+}
+
+// 1 cycle for all generic stores
+def ORYONWrite_1Cyc_ST : SchedWriteRes<[ORYONST]>;
+
+def ORYONWrite_1Cyc_2Uops_ST : SchedWriteRes<[ORYONST]> {
+ let NumMicroOps = 2;
+}
+
+def ORYONWrite_1Cyc_3Uops_ST : SchedWriteRes<[ORYONST]> {
+ let NumMicroOps = 3;
+}
+
+def ORYONWrite_1Cyc_4Uops_ST : SchedWriteRes<[ORYONST]> {
+ let NumMicroOps = 4;
+}
+
+def ORYONWrite_1Cyc_5Uops_ST : SchedWriteRes<[ORYONST]> {
+ let NumMicroOps = 5;
+}
+
+def ORYONWrite_1Cyc_6Uops_ST : SchedWriteRes<[ORYONST]> {
+ let NumMicroOps = 6;
+}
+
+def ORYONWrite_1Cyc_8Uops_ST : SchedWriteRes<[ORYONST]> {
+ let NumMicroOps = 8;
+}
+
+def ORYONWrite_1Cyc_10Uops_ST : SchedWriteRes<[ORYONST]> {
+ let NumMicroOps = 10;
+}
+
+// 1 cycle for neon write: float + ASIMD with Post/Pre Inc/Dec access
+// also includes Pair store until further informed
+def ORYONWrite_1Cyc_ST_I012345 : SchedWriteRes<[ORYONST, ORYONI012345]> {
+ let NumMicroOps = 3;
+}
+
+def ORYONWrite_1Cyc_2Uops_ST_I012345 : SchedWriteRes<[ORYONST, ORYONI012345]> {
+ let NumMicroOps = 2;
+}
+
+def ORYONWrite_1Cyc_3Uops_ST_I012345 : SchedWriteRes<[ORYONST, ORYONI012345]> {
+ let NumMicroOps = 3;
+}
+
+def ORYONWrite_1Cyc_4Uops_ST_I012345 : SchedWriteRes<[ORYONST, ORYONI012345]> {
+ let NumMicroOps = 4;
+}
+
+def ORYONWrite_1Cyc_5Uops_ST_I012345 : SchedWriteRes<[ORYONST, ORYONI012345]> {
+ let NumMicroOps = 5;
+}
+
+def ORYONWrite_1Cyc_6Uops_ST_I012345 : SchedWriteRes<[ORYONST, ORYONI012345]> {
+ let NumMicroOps = 6;
+}
+
+def ORYONWrite_1Cyc_8Uops_ST_I012345 : SchedWriteRes<[ORYONST, ORYONI012345]> {
+ let NumMicroOps = 8;
+}
+
+def ORYONWrite_1Cyc_10Uops_ST_I012345 : SchedWriteRes<[ORYONST, ORYONI012345]> {
+ let NumMicroOps = 10;
+}
+
+// VXU resource definition
+
+// I2V instruction has 1 uOp
+// I2v with convert has 2 uOps
+// all I2V, V2I's throughputs are 2
+// On VXU doc, p37 -- latencies and throughput
+// P41, resource taken, P42, uOps
+def ORYONWrite_I2V_4Cyc_I45 : SchedWriteRes<[ORYONI2V]> {
+ let Latency = 4;
+}
+
+// inline a FCVT, so add one more uOp
+def ORYONWrite_I2V_7Cyc_I45 : SchedWriteRes<[ORYONI2V]> {
+ let Latency = 7;
+ let NumMicroOps = 2;
+}
+
+// V2I move instruction has 1/2 uOps, P42 in VXU doc
+// Latency is 3, FCVT is also 3 cycle
+// move + convert is 6 (3+3) cycles
+// throughput is 2
+def ORYONWrite_V2I_3Cyc_FP01 : SchedWriteRes<[ORYONV2I]> {
+ let Latency = 3;
+}
+
+// inline a FCVT, so add one more uOp
+def ORYONWrite_V2I_6Cyc_FP01 : SchedWriteRes<[ORYONV2I]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+
+def ORYONWrite_V2V_2Cyc_FP0123 : SchedWriteRes<[ORYONFP0123]> {
+ let Latency = 2;
+}
+
+def ORYONWrite_V2V_3Cyc_FP0123 : SchedWriteRes<[ORYONFP0123]> {
+ let Latency = 3;
+}
+
+def ORYONWrite_V2V_6Cyc_FP01 : SchedWriteRes<[ORYONFP0123]> {
+ let Latency = 6;
+ let NumMicroOps = 3;
+}
+
+def ORYONWrite_4Cyc_FP0123 : SchedWriteRes<[ORYONFP0123]> {
+ let Latency = 4;
+}
+
+def ORYONWrite_3Cyc_FP0 : SchedWriteRes<[ORYONFP0]> {
+ let Latency = 3;
+}
+
+def ORYONWrite_3Cyc_FP0123 : SchedWriteRes<[ORYONFP0123]> {
+ let Latency = 3;
+}
+
+def ORYONWrite_3Cyc_2Uops_FP0123 : SchedWriteRes<[ORYONFP0123]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+}
+
+def ORYONWrite_2Cyc_FP0123 : SchedWriteRes<[ORYONFP0123]> {
+ let Latency = 2;
+}
+
+def ORYONWrite_2Cyc_FP01 : SchedWriteRes<[ORYONFP01]> {
+ let Latency = 2;
+}
+
+// 2 cycle on FP1
+def ORYONWrite_2Cyc_FP1 : SchedWriteRes<[ORYONFP1]> {
+ let Latency = 2;
+}
+
+// 3 cycle on FP1
+def ORYONWrite_3Cyc_FP1 : SchedWriteRes<[ORYONFP1]> {
+ let Latency = 3;
+}
+
+// 4 cycle , 0.5 throughput on FP1
+def ORYONWrite_4Cyc_FP1_RC4 : SchedWriteRes<[ORYONFP1]> {
+ let Latency = 4;
+ let ReleaseAtCycles = [4];
+}
+
+// 5 cycle , 1 throughput on FP1
+def ORYONWrite_5Cyc_FP1 : SchedWriteRes<[ORYONFP1]> {
+ let Latency = 5;
+}
+
+// 8 cycle , 2 throughput on FP0123
+def ORYONWrite_8Cyc_FP0123_RC : SchedWriteRes<[ORYONFP0123]> {
+ let Latency = 8;
+ let ReleaseAtCycles = [2];
+}
+
+def ORYONWrite_6Cyc_FP3 : SchedWriteRes<[ORYONFP3]> {
+ let Latency = 6;
+}
+
+def ORYONWrite_7Cyc_FP3 : SchedWriteRes<[ORYONFP3]> {
+ let Latency = 7;
+}
+
+def ORYONWrite_8Cyc_FP3 : SchedWriteRes<[ORYONFP3]> {
+ let Latency = 8;
+}
+
+def ORYONWrite_9Cyc_FP3 : SchedWriteRes<[ORYONFP3]> {
+ let Latency = 9;
+}
+
+def ORYONWrite_10Cyc_FP3 : SchedWriteRes<[ORYONFP3]> {
+ let Latency = 10;
+}
+
+def ORYONWrite_8Cyc_FP3_RC : SchedWriteRes<[ORYONFP3]> {
+ let Latency = 8;
+ let ReleaseAtCycles = [2];
+}
+
+def ORYONWrite_10Cyc_FP3_RC : SchedWriteRes<[ORYONFP3]> {
+ let Latency = 10;
+ let ReleaseAtCycles = [2];
+}
+
+def ORYONWrite_13Cyc_FP3_RC : SchedWriteRes<[ORYONFP3]> {
+ let Latency = 13;
+ let ReleaseAtCycles = [2];
+}
+
+def ORYONWrite_4Cyc_FP0123_RC :
+SchedWriteRes<[ORYONFP0123]> {
+ let Latency = 4;
+ let ReleaseAtCycles = [2];
+}
+
+def ORYONWrite_4Cyc_FP0123_FP0123_RC :
+SchedWriteRes<[ORYONFP0123, ORYONFP0123]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+ let ReleaseAtCycles = [2,2];
+}
+
+def ORYONWrite_4Cyc_FP0123_FP0123_FP0123_RC :
+SchedWriteRes<[ORYONFP0123, ORYONFP0123, ORYONFP0123]> {
+ let Latency = 4;
+ let NumMicroOps = 3;
+ let ReleaseAtCycles = [3,3,3];
+}
+
+def ORYONWrite_6Cyc_FP0123_FP0123_FP0123_FP0123_RC :
+SchedWriteRes<[ORYONFP0123, ORYONFP0123, ORYONFP0123, ORYONFP0123]> {
+ let Latency = 6;
+ let NumMicroOps = 4;
+ let ReleaseAtCycles = [6,6,6,6];
+}
+
+//===----------------------------------------------------------------------===//
+// Instruction Tables in IXU
+//===----------------------------------------------------------------------===//
+
+//---
+// Arithmetic Instructions
+//---
+
+//1, 1, 6
+def : InstRW<[ORYONWrite_1Cyc_I012345],
+ (instregex "^ADD(W|X)r(i|r|x)", "^SUB(W|X)r(i|r|x)")>;
+
+//2,2,3
+def : InstRW<[ORYONWrite_2Cyc_I012345_I012345_RC],
+ (instregex "^ADD(W|X)rs", "^SUB(W|X)rs")>;
+
+//1,1,4 alias CMP, CMN on page 75
+def : InstRW<[ORYONWrite_1Cyc_I0123],
+ (instregex "^ADDS(W|X)r(i|r|x)(64)?", "^SUBS(W|X)r(i|r|x)")>;
+
+//2,2,2 alias CMP, CMN on page 75
+def : InstRW<[ORYONWrite_2Cyc_I0123_I0123_RC],
+ (instregex "^ADDS(W|X)rs", "^SUBS(W|X)rs")>;
+
+//1,1,4
+def : InstRW<[ORYONWrite_1Cyc_I0123],
+ (instregex "^ADC(W|X)r","^SBC(W|X)r",
+ "^ADCS(W|X)r","^SBCS(W|X)r")>;
+
+//1,1,2
+def : InstRW<[ORYONWrite_1Cyc_2Uops_I01],
+ (instrs ADR,ADRP)>;
+
+//1,1,4
+def : InstRW<[ORYONWrite_1Cyc_I0123],
+ (instregex "^CSEL(W|X)r", "^CSINV(W|X)r",
+ "^CSNEG(W|X)r", "^CSINC(W|X)r")>;
+
+//---
+//Compare Instruciton
+//---
+
+// We have CCMP, CCMN as LLVM DAG node
+// CMP is an alias of SUBS as above
+// CMN is an alias of ADDS as above
+// We also have no way to get shift compare node in LLVM
+//2,2,1.5 CMP, CMN
+
+//1,1,4
+def : InstRW<[ORYONWrite_1Cyc_I0123],
+ (instregex "^CCMP(W|X)(i|r)", "^CCMN(W|X)(i|r)")>;
+
+//---
+// Branch
+//---
+
+def : InstRW<[ORYONWrite_1Cyc_NONE], (instrs B)>;
+def : InstRW<[ORYONWrite_1Cyc_I01], (instrs BL)>;
+def : InstRW<[ORYONWrite_1Cyc_I01],
+ (instrs Bcc, CBZW, CBZX, CBNZW, CBNZX, TBZW, TBZX, TBNZW, TBNZX)>;
+def : InstRW<[ORYONWrite_1Cyc_I0], (instrs BR, BLR)>;
+def : InstRW<[ORYONWrite_1Cyc_I0], (instrs RET)>;
+
+// 3 uOp, 1 cycle for branch, 7 cycle for Authentication,
+// 1 cycle for updating link register
+// V8.3a PAC
+def : InstRW<[ORYONWrite_9Cyc_I012],
+ (instrs BLRAA, BLRAAZ, BLRAB, BLRABZ,
+ BRAA, BRAAZ, BRAB, BRABZ)>;
+def : InstRW<[ORYONWrite_9Cyc_I012], (instrs RETAA, RETAB, ERETAA, ERETAB)>;
+
+def : InstRW<[ORYONWrite_7Cyc_3Uops_I2], (instregex "^LDRAA", "^LDRAB")>;
+
+// Logical Instructions
+//---
+
+//1,1,4 TST is an alias of ANDS
+def : InstRW<[ORYONWrite_1Cyc_I0123],
+ (instregex "^ANDS(W|X)r(i|r|x)", "^BICS(W|X)r(i|r|x)")>;
+
+//2,2,2 TST shift is an alias
+def : InstRW<[ORYONWrite_2Cyc_I0123_I0123_RC],
+ (instregex "^ANDS(W|X)rs", "^BICS(W|X)rs")>;
+
+//1,1,6
+def : InstRW<[ORYONWrite_1Cyc_I012345],
+ (instregex "^AND(W|X)r(i|r|x)", "^EOR(W|X)r(i|r|x)",
+ "^ORR(W|X)r(i|r|x)", "^BIC(W|X)r(i|r|x)",
+ "^EON(W|X)r(i|r|x)", "^ORN(W|X)r(i|r|x)")>;
+
+//2,2,3
+def : InstRW<[ORYONWrite_2Cyc_I012345_I012345_RC],
+ (instregex "^AND(W|X)rs", "^EOR(W|X)rs", "^ORR(W|X)rs",
+ "^BIC(W|X)rs", "^EON(W|X)rs", "^ORN(W|X)rs")>;
+
+
+//---
+// Shift Instructions
+//---
+
+//1,1,6
+def : InstRW<[ORYONWrite_1Cyc_I012345],
+ (instregex "^ASRV(W|X)r", "^LSLV(W|X)r",
+ "^LSRV(W|X)r", "^RORV(W|X)r",
+ "RMIF")>;
+
+//---
+// Move-Data Bit-field and Sign_Extension Instructions
+//---
+
+//1,1,6
+def : InstRW<[ORYONWrite_1Cyc_I012345],
+ (instregex "^MOVK(W|X)i", "^MOVN(W|X)i",
+ "^MOVZ(W|X)i", "^SBFM(W|X)ri",
+ "^UBFM(W|X)ri", "^BFM(W|X)ri",
+ "^SXT(W|B|H|X)", "^UXT(H|B)")>;
+
+// COPY instruction is an LLVM internal DAG node, needs further study
+def : InstRW<[ORYONWrite_1Cyc_I012345], (instrs COPY)>;
+
+//---
+// Reverse Instructions
+//---
+
+//1,1,6
+def : InstRW<[ORYONWrite_1Cyc_I012345],
+ (instregex "^RBIT(W|X)r", "^REV(16|32|64)?(W|X)r")>;
+
+
+//---
+// Flag Manipulate Instructions
+//---
+
+//1,1,4
+def : InstRW<[ORYONWrite_1Cyc_I0123],
+ (instregex "^SETF8", "^SETF16", "^CFINV")>;
+
+//---
+// Miscellaneous Instructions
+//---
+
+//1,1,6
+def : InstRW<[ORYONWrite_1Cyc_I012345],
+ (instregex "^CLS(W|X)r$", "^CLZ(W|X)r$", "^EXTR(W|X)rri")>;
+
+
+//---
+// Multiply Instructions
+//---
+
+//1,3,2
+def : InstRW<[ORYONWrite_3Cyc_I45],
+ (instregex "^MADD(W|X)rrr", "^MSUB(W|X)rrr",
+ "^(S|U)MADDLrrr", "^(S|U)MSUBLrrr",
+ "^(S|U)MULHrr")>;
+
+//---
+// Divide Instructions
+//---
+
+def : InstRW<[ORYONWrite_7Cyc_I2_RC],
+ (instregex "^(S|U)DIVWr")>;
+
+def : InstRW<[ORYONWrite_9Cyc_I2_RC],
+ (instregex "^(S|U)DIVXr")>;
+
+
+//---
+// Cryptgraphy Instructions
+//
+//1,3,1 on I2
+def : InstRW<[ORYONWrite_3Cyc_I2],
+ (instregex "^CRC32(B|H|W|X)rr", "^CRC32C(B|H|W|X)rr")>;
+
+//---
+// PAU instructions
+//---
+
+// on p47 of IXU document, we have 7 cycles for all PAU instructions
+// here we just assume all signing and pauth instructions are 7 cycles
+// assume all are 7 cycles here
+
+// signing instrucitons
+def : InstRW<[ORYONWrite_7Cyc_I2], (instrs PACIA, PACIB,
+ PACDA, PACDB,
+ PACIZA, PACIZB,
+ PACDZA, PACDZB,
+ PACGA)>;
+// authentication instrucitons
+def : InstRW<[ORYONWrite_7Cyc_I2], (instrs AUTIA, AUTIB,
+ AUTDA, AUTDB,
+ AUTIZA, AUTIZB,
+ AUTDZA, AUTDZB)>;
+def : InstRW<[ORYONWrite_7Cyc_I2], (instrs XPACI, XPACD)>;
+
+//===----------------------------------------------------------------------===//
+// Instruction Tables in LSU
+//===----------------------------------------------------------------------===//
+
+// 4 cycle Load-to-use from L1D$
+// Neon load with 5 cycle
+// 6 cycle to STA ?
+// STD cycle ?
+// NEON STD + 2
+
+// Load Instructions
+// FP Load Instructions
+
+// Load pair, immed pre-index, normal
+// Load pair, immed pre-index, signed words
+// Load pair, immed post-index, normal
+// Load pair, immed post-index, signed words
+// NOTE: Handled by WriteLD, WriteLDHi, WriteAdr.
+
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDNPDi)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDNPQi)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDNPSi)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDNPWi)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDNPXi)>;
+
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDPDi)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDPQi)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDPSi)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDPSWi)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDPWi)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDPXi)>;
+
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRBui)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRDui)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRHui)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRQui)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRSui)>;
+
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRDl)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRQl)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRWl)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRXl)>;
+
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDTRBi)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDTRHi)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDTRWi)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDTRXi)>;
+
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDTRSBWi)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDTRSBXi)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDTRSHWi)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDTRSHXi)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDTRSWi)>;
+
+def : InstRW<[ORYONWrite_4Cyc_LD_I012345],
+ (instrs LDPDpre)>;
+def : InstRW<[ORYONWrite_4Cyc_LD_I012345],
+ (instrs LDPQpre)>;
+def : InstRW<[ORYONWrite_4Cyc_LD_I012345],
+ (instrs LDPSpre)>;
+def : InstRW<[ORYONWrite_4Cyc_LD_I012345],
+ (instrs LDPWpre)>;
+
+def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRBpre)>;
+def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRDpre)>;
+def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRHpre)>;
+def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRQpre)>;
+def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRSpre)>;
+def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRWpre)>;
+def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRXpre)>;
+
+def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRSBWpre)>;
+def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRSBXpre)>;
+def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRSBWpost)>;
+def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRSBXpost)>;
+
+def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRSHWpre)>;
+def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRSHXpre)>;
+def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRSHWpost)>;
+def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRSHXpost)>;
+
+def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRBBpre)>;
+def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRBBpost)>;
+
+def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRHHpre)>;
+def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRHHpost)>;
+
+def : InstRW<[ORYONWrite_4Cyc_LD_I012345],
+ (instrs LDPDpost)>;
+def : InstRW<[ORYONWrite_4Cyc_LD_I012345],
+ (instrs LDPQpost)>;
+def : InstRW<[ORYONWrite_4Cyc_LD_I012345],
+ (instrs LDPSpost)>;
+def : InstRW<[ORYONWrite_4Cyc_LD_I012345],
+ (instrs LDPWpost)>;
+def : InstRW<[ORYONWrite_4Cyc_LD_I012345],
+ (instrs LDPXpost)>;
+
+def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRBpost)>;
+def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRDpost)>;
+def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRHpost)>;
+def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRQpost)>;
+def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRSpost)>;
+def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRWpost)>;
+def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRXpost)>;
+
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRBroW)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRDroW)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRHroW)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRHHroW)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRQroW)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRSroW)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRSHWroW)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRSHXroW)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRWroW)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRXroW)>;
+
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRBroX)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRDroX)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRHHroX)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRHroX)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRQroX)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRSroX)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRSHWroX)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRSHXroX)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRWroX)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRXroX)>;
+
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDURBi)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDURBBi)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDURDi)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDURHi)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDURHHi)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDURQi)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDURSi)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDURXi)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDURSBWi)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDURSBXi)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDURSHWi)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDURSHXi)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDURSWi)>;
+
+
+
+// Store register, immed post-index
+// NOTE: Handled by WriteST, ReadAdrBase
+
+// Store register, immed pre-index
+// NOTE: Handled by WriteST
+
+// Store pair, immed post-index, W-form
+// Store pair, immed post-indx, X-form
+// Store pair, immed pre-index, W-form
+// Store pair, immed pre-index, X-form
+// NOTE: Handled by WriteSTP.
+
+def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STURBi)>;
+def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STURBBi)>;
+def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STURDi)>;
+def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STURHi)>;
+def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STURHHi)>;
+def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STURQi)>;
+def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STURSi)>;
+def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STURWi)>;
+def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STURXi)>;
+
+def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STTRBi)>;
+def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STTRHi)>;
+def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STTRWi)>;
+def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STTRXi)>;
+
+def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STNPDi)>;
+def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STNPQi)>;
+def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STNPXi)>;
+def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STNPWi)>;
+
+def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STPDi)>;
+def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STPQi)>;
+def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STPXi)>;
+def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STPWi)>;
+
+def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STRBui)>;
+def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STRDui)>;
+def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STRHui)>;
+def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STRQui)>;
+def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STRXui)>;
+def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STRWui)>;
+
+def : InstRW<[ORYONWrite_1Cyc_ST_I012345],
+ (instrs STPDpre, STPDpost)>;
+def : InstRW<[ORYONWrite_1Cyc_ST_I012345],
+ (instrs STPSpre, STPSpost)>;
+def : InstRW<[ORYONWrite_1Cyc_ST_I012345],
+ (instrs STPWpre, STPWpost)>;
+def : InstRW<[ORYONWrite_1Cyc_ST_I012345],
+ (instrs STPXpre, STPXpost)>;
+
+def : InstRW<[ORYONWrite_1Cyc_ST_I012345],
+ (instrs STRBpre, STRBpost)>;
+def : InstRW<[ORYONWrite_1Cyc_ST_I012345],
+ (instrs STRBBpre, STRBBpost)>;
+def : InstRW<[ORYONWrite_1Cyc_ST_I012345],
+ (instrs STRDpre, STRDpost)>;
+def : InstRW<[ORYONWrite_1Cyc_ST_I012345],
+ (instrs STRHpre, STRHpost)>;
+def : InstRW<[ORYONWrite_1Cyc_ST_I012345],
+ (instrs STRHHpre, STRHHpost)>;
+def : InstRW<[ORYONWrite_1Cyc_ST_I012345],
+ (instrs STRQpre, STRQpost)>;
+def : InstRW<[ORYONWrite_1Cyc_ST_I012345],
+ (instrs STRSpre, STRSpost)>;
+def : InstRW<[ORYONWrite_1Cyc_ST_I012345],
+ (instrs STRWpre, STRWpost)>;
+def : InstRW<[ORYONWrite_1Cyc_ST_I012345],
+ (instrs STRXpre, STRXpost)>;
+
+def : InstRW<[ORYONWrite_1Cyc_ST],
+ (instrs STRBroW, STRBroX)>;
+def : InstRW<[ORYONWrite_1Cyc_ST],
+ (instrs STRDroW, STRDroX)>;
+def : InstRW<[ORYONWrite_1Cyc_ST],
+ (instrs STRHroW, STRHroX)>;
+def : InstRW<[ORYONWrite_1Cyc_ST],
+ (instrs STRHHroW, STRHHroX)>;
+def : InstRW<[ORYONWrite_1Cyc_ST],
+ (instrs STRQroW, STRQroX)>;
+def : InstRW<[ORYONWrite_1Cyc_ST],
+ (instrs STRSroW, STRSroX)>;
+def : InstRW<[ORYONWrite_1Cyc_ST],
+ (instrs STRWroW, STRWroX)>;
+def : InstRW<[ORYONWrite_1Cyc_ST],
+ (instrs STRXroW, STRXroX)>;
+
+// ASIMD Load instructions, 4 cycle access + 2 cycle NEON access
+// ASIMD load, 1 element, multiple, 1 reg, D-form 1uOps
+// ASIMD load, 1 element, multiple, 1 reg, Q-form 1uOps
+def : InstRW<[ORYONWrite_5Cyc_LD],
+ (instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+
+def : InstRW<[ORYONWrite_5Cyc_LD_I012345],
+ (instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+// ASIMD load, 1 element, multiple, 2 reg, D-form 3 uOps
+// ASIMD load, 1 element, multiple, 2 reg, Q-form 2 uOps
+def : InstRW<[ORYONWrite_5Cyc_3Uops_LD],
+ (instregex "^LD1Twov(8b|4h|2s|1d)$")>;
+
+def : InstRW<[ORYONWrite_5Cyc_2Uops_LD],
+ (instregex "^LD1Twov(16b|8h|4s|2d)$")>;
+
+def : InstRW<[ORYONWrite_5Cyc_3Uops_LD_I012345],
+ (instregex "^LD1Twov(8b|4h|2s|1d)_POST$")>;
+
+def : InstRW<[ORYONWrite_5Cyc_2Uops_LD_I012345],
+ (instregex "^LD1Twov(16b|8h|4s|2d)_POST$")>;
+
+// ASIMD load, 1 element, multiple, 3 reg, D-form 4 uOps
+// ASIMD load, 1 element, multiple, 3 reg, Q-form 3 uOps
+def : InstRW<[ORYONWrite_5Cyc_4Uops_LD],
+ (instregex "^LD1Threev(8b|4h|2s|1d)$")>;
+
+def : InstRW<[ORYONWrite_5Cyc_3Uops_LD],
+ (instregex "^LD1Threev(16b|8h|4s|2d)$")>;
+
+def : InstRW<[ORYONWrite_5Cyc_4Uops_LD_I012345],
+ (instregex "^LD1Threev(8b|4h|2s|1d)_POST$")>;
+
+def : InstRW<[ORYONWrite_5Cyc_3Uops_LD_I012345],
+ (instregex "^LD1Threev(16b|8h|4s|2d)_POST$")>;
+
+// ASIMD load, 1 element, multiple, 4 reg, D-form 6 uOps
+// ASIMD load, 1 element, multiple, 4 reg, Q-form 4 uOps
+def : InstRW<[ORYONWrite_5Cyc_6Uops_LD],
+ (instregex "^LD1Fourv(8b|4h|2s|1d)$")>;
+def : InstRW<[ORYONWrite_5Cyc_4Uops_LD],
+ (instregex "^LD1Fourv(16b|8h|4s|2d)$")>;
+
+def : InstRW<[ORYONWrite_5Cyc_6Uops_LD_I012345],
+ (instregex "^LD1Fourv(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[ORYONWrite_5Cyc_4Uops_LD_I012345],
+ (instregex "^LD1Fourv(16b|8h|4s|2d)_POST$")>;
+
+// ASIMD load, 1 element, one lane, B/H/S 2uOps
+// ASIMD load, 1 element, one lane, D 2UOps
+def : InstRW<[ORYONWrite_5Cyc_2Uops_LD], (instregex "^LD1i(8|16|32|64)$")>;
+def : InstRW<[ORYONWrite_5Cyc_2Uops_LD_I012345],
+ (instregex "^LD1i(8|16|32|64)_POST$")>;
+
+// ASIMD load, 1 element, all lanes, D-form, B/H/S 2uOps
+// ASIMD load, 1 element, all lanes, D-form, D 2uOps
+// ASIMD load, 1 element, all lanes, Q-form 2uOps
+def : InstRW<[ORYONWrite_5Cyc_2Uops_LD],
+ (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[ORYONWrite_5Cyc_2Uops_LD_I012345],
+ (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+// ASIMD load, 2 element, multiple, D-form, B/H/S 3 uOps
+// ASIMD load, 2 element, multiple, Q-form, D 4 uOps
+def : InstRW<[ORYONWrite_5Cyc_3Uops_LD],
+ (instregex "^LD2Twov(8b|4h|2s)$")>;
+def : InstRW<[ORYONWrite_5Cyc_4Uops_LD],
+ (instregex "^LD2Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[ORYONWrite_5Cyc_3Uops_LD_I012345],
+ (instregex "^LD2Twov(8b|4h|2s)_POST$")>;
+def : InstRW<[ORYONWrite_5Cyc_4Uops_LD_I012345],
+ (instregex "^LD2Twov(16b|8h|4s|2d)_POST$")>;
+
+// ASIMD load, 2 element, one lane, B/H 3 uOps
+// ASIMD load, 2 element, one lane, S 3 uOps
+// ASIMD load, 2 element, one lane, D 3 uOps
+def : InstRW<[ORYONWrite_5Cyc_3Uops_LD], (instregex "^LD2i(8|16|32|64)$")>;
+def : InstRW<[ORYONWrite_5Cyc_3Uops_LD_I012345],
+ (instregex "^LD2i(8|16|32|64)_POST$")>;
+
+// ASIMD load, 2 element, all lanes, D-form, B/H/S 3 uOps
+// ASIMD load, 2 element, all lanes, D-form, D 3 uOps
+// ASIMD load, 2 element, all lanes, Q-form 3 uOps
+def : InstRW<[ORYONWrite_5Cyc_3Uops_LD],
+ (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[ORYONWrite_5Cyc_3Uops_LD_I012345],
+ (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+// ASIMD load, 3 element, multiple, D-form, B/H/S 5 uOps
+// ASIMD load, 3 element, multiple, Q-form, B/H/S 6 uOps
+// ASIMD load, 3 element, multiple, Q-form, D 6 uOps
+def : InstRW<[ORYONWrite_5Cyc_5Uops_LD],
+ (instregex "^LD3Threev(8b|4h|2s)$")>;
+def : InstRW<[ORYONWrite_5Cyc_6Uops_LD],
+ (instregex "^LD3Threev(16b|8h|4s|2d)$")>;
+def : InstRW<[ORYONWrite_5Cyc_5Uops_LD_I012345],
+ (instregex "^LD3Threev(8b|4h|2s)_POST$")>;
+def : InstRW<[ORYONWrite_5Cyc_6Uops_LD_I012345],
+ (instregex "^LD3Threev(16b|8h|4s|2d)_POST$")>;
+
+// ASIMD load, 3 element, one lone, B/H 4 uOps
+// ASIMD load, 3 element, one lane, S 4 uOps
+// ASIMD load, 3 element, one lane, D 5 uOps
+def : InstRW<[ORYONWrite_5Cyc_4Uops_LD], (instregex "^LD3i(8|16|32)$")>;
+def : InstRW<[ORYONWrite_5Cyc_5Uops_LD], (instregex "^LD3i(64)$")>;
+def : InstRW<[ORYONWrite_5Cyc_4Uops_LD_I012345],
+ (instregex "^LD3i(8|16|32)_POST$")>;
+def : InstRW<[ORYONWrite_5Cyc_5Uops_LD_I012345],
+ (instregex "^LD3i(64)_POST$")>;
+
+// ASIMD load, 3 element, all lanes, D-form, B/H/S 4 uOps
+// ASIMD load, 3 element, all lanes, D-form, D 5 uOps
+// ASIMD load, 3 element, all lanes, Q-form, B/H/S 4 uOps
+// ASIMD load, 3 element, all lanes, Q-form, D 5 uOps
+def : InstRW<[ORYONWrite_5Cyc_4Uops_LD],
+ (instregex "^LD3Rv(8b|4h|2s|16b|8h|4s)$")>;
+def : InstRW<[ORYONWrite_5Cyc_5Uops_LD],
+ (instregex "^LD3Rv(1d|2d)$")>;
+def : InstRW<[ORYONWrite_5Cyc_4Uops_LD_I012345],
+ (instregex "^LD3Rv(8b|4h|2s|16b|8h|4s)_POST$")>;
+def : InstRW<[ORYONWrite_5Cyc_5Uops_LD_I012345],
+ (instregex "^LD3Rv(1d|2d)_POST$")>;
+
+// ASIMD load, 4 element, multiple, D-form, B/H/S 6 uOps
+// ASIMD load, 4 element, multiple, Q-form, B/H/S 10 uOps
+// ASIMD load, 4 element, multiple, Q-form, D 8 uOps
+def : InstRW<[ORYONWrite_5Cyc_6Uops_LD],
+ (instregex "^LD4Fourv(8b|4h|2s)$")>;
+def : InstRW<[ORYONWrite_5Cyc_10Uops_LD],
+ (instregex "^LD4Fourv(16b|8h|4s)$")>;
+def : InstRW<[ORYONWrite_5Cyc_8Uops_LD],
+ (instregex "^LD4Fourv(2d)$")>;
+def : InstRW<[ORYONWrite_5Cyc_6Uops_LD_I012345],
+ (instregex "^LD4Fourv(8b|4h|2s)_POST$")>;
+def : InstRW<[ORYONWrite_5Cyc_10Uops_LD_I012345],
+ (instregex "^LD4Fourv(16b|8h|4s)_POST$")>;
+def : InstRW<[ORYONWrite_5Cyc_8Uops_LD_I012345],
+ (instregex "^LD4Fourv(2d)_POST$")>;
+
+// ASIMD load, 4 element, one lane, B/H 5 uOps
+// ASIMD load, 4 element, one lane, S 5 uOps
+// ASIMD load, 4 element, one lane, D 6 uOps
+def : InstRW<[ORYONWrite_5Cyc_5Uops_LD], (instregex "^LD4i(8|16|32)$")>;
+def : InstRW<[ORYONWrite_5Cyc_6Uops_LD], (instregex "^LD4i(64)$")>;
+def : InstRW<[ORYONWrite_5Cyc_5Uops_LD_I012345],
+ (instregex "^LD4i(8|16|32)_POST$")>;
+def : InstRW<[ORYONWrite_5Cyc_6Uops_LD_I012345],
+ (instregex "^LD4i(64)_POST$")>;
+
+// ASIMD load, 4 element, all lanes, D-form, B/H/S 5 uOps
+// ASIMD load, 4 element, all lanes, D-form, D 6 uOps
+// ASIMD load, 4 element, all lanes, Q-form, B/H/S 5 uOps
+// ASIMD load, 4 element, all lanes, Q-form, D 6 uOps
+def : InstRW<[ORYONWrite_5Cyc_5Uops_LD],
+ (instregex "^LD4Rv(8b|4h|2s|16b|8h|4s)$")>;
+def : InstRW<[ORYONWrite_5Cyc_6Uops_LD],
+ (instregex "^LD4Rv(1d|2d)$")>;
+def : InstRW<[ORYONWrite_5Cyc_5Uops_LD_I012345],
+ (instregex "^LD4Rv(8b|4h|2s|16b|8h|4s)_POST$")>;
+def : InstRW<[ORYONWrite_5Cyc_6Uops_LD_I012345],
+ (instregex "^LD4Rv(1d|2d)_POST$")>;
+
+// ASIMD Store Instructions
+// ASIMD store, 1 element, multiple, 1 reg, D-form 1 uOps
+// ASIMD store, 1 element, multiple, 1 reg, Q-form 1 uops
+def : InstRW<[ORYONWrite_1Cyc_ST],
+ (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[ORYONWrite_1Cyc_ST_I012345],
+ (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+// ASIMD store, 1 element, multiple, 2 reg, D-form 2 uOps
+// ASIMD store, 1 element, multiple, 2 reg, Q-form 2 uOps
+def : InstRW<[ORYONWrite_1Cyc_2Uops_ST],
+ (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[ORYONWrite_1Cyc_2Uops_ST_I012345],
+ (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+// ASIMD store, 1 element, multiple, 3 reg, D-form 3 uOps
+// ASIMD store, 1 element, multiple, 3 reg, Q-form 3 uOps
+def : InstRW<[ORYONWrite_1Cyc_3Uops_ST],
+ (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[ORYONWrite_1Cyc_3Uops_ST_I012345],
+ (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+// ASIMD store, 1 element, multiple, 4 reg, D-form 4 uOps
+// ASIMD store, 1 element, multiple, 4 reg, Q-form 4 uOps
+def : InstRW<[ORYONWrite_1Cyc_4Uops_ST],
+ (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[ORYONWrite_1Cyc_4Uops_ST_I012345],
+ (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+// ASIMD store, 1 element, one lane, B/H/S 2 uOps
+// ASIMD store, 1 element, one lane, D 2 uOps
+def : InstRW<[ORYONWrite_1Cyc_2Uops_ST],
+ (instregex "^ST1i(8|16|32|64)$")>;
+def : InstRW<[ORYONWrite_1Cyc_2Uops_ST_I012345],
+ (instregex "^ST1i(8|16|32|64)_POST$")>;
+
+// ASIMD store, 2 element, multiple, D-form, B/H/S 2 uOps
+// ASIMD store, 2 element, multiple, Q-form, B/H/S 4 uOps
+// ASIMD store, 2 element, multiple, Q-form, D 4 uOps
+def : InstRW<[ORYONWrite_1Cyc_2Uops_ST],
+ (instregex "^ST2Twov(8b|4h|2s)$")>;
+def : InstRW<[ORYONWrite_1Cyc_4Uops_ST],
+ (instregex "^ST2Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[ORYONWrite_1Cyc_2Uops_ST_I012345],
+ (instregex "^ST2Twov(8b|4h|2s)_POST$")>;
+def : InstRW<[ORYONWrite_1Cyc_4Uops_ST_I012345],
+ (instregex "^ST2Twov(16b|8h|4s|2d)_POST$")>;
+
+// ASIMD store, 2 element, one lane, B/H/S 2 uOps
+// ASIMD store, 2 element, one lane, D 2 uOps
+def : InstRW<[ORYONWrite_1Cyc_2Uops_ST],
+ (instregex "^ST2i(8|16|32|64)$")>;
+def : InstRW<[ORYONWrite_1Cyc_2Uops_ST_I012345],
+ (instregex "^ST2i(8|16|32|64)_POST$")>;
+
+// ASIMD store, 3 element, multiple, D-form, B/H/S 4 uOps
+// ASIMD store, 3 element, multiple, Q-form, B/H/S 6 uOps
+// ASIMD store, 3 element, multiple, Q-form, D 6 uOps
+def : InstRW<[ORYONWrite_1Cyc_4Uops_ST],
+ (instregex "^ST3Threev(8b|4h|2s)$")>;
+def : InstRW<[ORYONWrite_1Cyc_6Uops_ST],
+ (instregex "^ST3Threev(16b|8h|4s|2d)$")>;
+def : InstRW<[ORYONWrite_1Cyc_4Uops_ST_I012345],
+ (instregex "^ST3Threev(8b|4h|2s)_POST$")>;
+def : InstRW<[ORYONWrite_1Cyc_6Uops_ST_I012345],
+ (instregex "^ST3Threev(16b|8h|4s|2d)_POST$")>;
+
+// ASIMD store, 3 element, one lane, B/H 2 uOps
+// ASIMD store, 3 element, one lane, S 2 uOps
+// ASIMD store, 3 element, one lane, D 4 uOps
+def : InstRW<[ORYONWrite_1Cyc_2Uops_ST], (instregex "^ST3i(8|16|32)$")>;
+def : InstRW<[ORYONWrite_1Cyc_4Uops_ST], (instregex "^ST3i(64)$")>;
+def : InstRW<[ORYONWrite_1Cyc_2Uops_ST_I012345],
+ (instregex "^ST3i(8|16|32)_POST$")>;
+def : InstRW<[ORYONWrite_1Cyc_4Uops_ST_I012345],
+ (instregex "^ST3i(64)_POST$")>;
+
+
+// ASIMD store, 4 element, multiple, D-form, B/H/S 5 uOps
+// ASIMD store, 4 element, multiple, Q-form, B/H/S 10 uOps
+// ASIMD store, 4 element, multiple, Q-form, D 8 uOps
+def : InstRW<[ORYONWrite_1Cyc_5Uops_ST],
+ (instregex "^ST4Fourv(8b|4h|2s)$")>;
+def : InstRW<[ORYONWrite_1Cyc_10Uops_ST],
+ (instregex "^ST4Fourv(16b|8h|4s)$")>;
+def : InstRW<[ORYONWrite_1Cyc_8Uops_ST],
+ (instregex "^ST4Fourv(2d)$")>;
+def : InstRW<[ORYONWrite_1Cyc_5Uops_ST_I012345],
+ (instregex "^ST4Fourv(8b|4h|2s)_POST$")>;
+def : InstRW<[ORYONWrite_1Cyc_10Uops_ST_I012345],
+ (instregex "^ST4Fourv(16b|8h|4s)_POST$")>;
+def : InstRW<[ORYONWrite_1Cyc_8Uops_ST_I012345],
+ (instregex "^ST4Fourv(2d)_POST$")>;
+
+// ASIMD store, 4 element, one lane, B/H 3 uOps
+// ASIMD store, 4 element, one lane, S 3 uOps
+// ASIMD store, 4 element, one lane, D 4 uOps
+def : InstRW<[ORYONWrite_1Cyc_3Uops_ST], (instregex "^ST4i(8|16|32)$")>;
+def : InstRW<[ORYONWrite_1Cyc_4Uops_ST], (instregex "^ST4i(64)$")>;
+def : InstRW<[ORYONWrite_1Cyc_3Uops_ST_I012345],
+ (instregex "^ST4i(8|16|32)_POST$")>;
+def : InstRW<[ORYONWrite_1Cyc_4Uops_ST_I012345],
+ (instregex "^ST4i(64)_POST$")>;
+
+
+//===----------------------------------------------------------------------===//
+// Instruction Tables in VXU
+//===----------------------------------------------------------------------===//
+// all uOps are not clearly written in the VXU document
+
+// I2V
+def : InstRW<[ORYONWrite_I2V_4Cyc_I45], (instregex "^FMOV[HSD][WX]r", "^FMOVDXHighr")>;
+
+// I2V with convert
+def : InstRW<[ORYONWrite_I2V_7Cyc_I45], (instregex "^[SU]CVTF[SU][XW][HSD]ri")>;
+
+// V2I
+def : InstRW<[ORYONWrite_V2I_3Cyc_FP01], (instregex "^FMOV[WX][HSD]r", "FMOVXDHighr")>;
+
+// V2I with convert 2nd [SU] necessary?
+def : InstRW<[ORYONWrite_V2I_6Cyc_FP01], (instregex "^FCVT[AMNPZ][SU][SU][XW][HSD]r")>;
+
+// float to float move immediate, row 7 in big chart
+def : InstRW<[ORYONWrite_V2V_2Cyc_FP0123], (instregex "^FMOV[HSD]r")>;
+def : InstRW<[ORYONWrite_V2V_2Cyc_FP0123], (instregex "^FMOV[HSD]i")>;
+
+// float to float conversion within VXU, precision conversion
+def : InstRW<[ORYONWrite_V2V_6Cyc_FP01], (instregex "^FJCVTZS")>;
+def : InstRW<[ORYONWrite_V2V_3Cyc_FP0123], (instregex "^FCVT[HSD][HSD]r",
+ "^FRINT(A|I|M|N|P|X|Z)(Sr|Dr)")>;
+
+// floating comparison write to NZCV
+def : InstRW<[ORYONWrite_2Cyc_FP01], (instregex "^FCMP(E)?[HSD]r[ir]")>;
+def : InstRW<[ORYONWrite_2Cyc_FP01], (instregex "^FCCMP(E)?[HSD]rr")>;
+
+// floating point conditional select
+def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^FCSEL")>;
+
+// floating multiply-add
+def : InstRW<[ORYONWrite_4Cyc_FP0123], (instregex "^(F|FN)MADD", "^(F|FN)MSUB")>;
+
+// floating unary, cycle/throughput? xls row14
+def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^F(ABS|NEG)[SD]r")>;
+
+//floating division/square root
+def : InstRW<[ORYONWrite_7Cyc_FP3], (instregex "^FDIVHrr")>;
+def : InstRW<[ORYONWrite_8Cyc_FP3], (instregex "^FDIVSrr")>;
+def : InstRW<[ORYONWrite_10Cyc_FP3], (instregex "^FDIVDrr")>;
+
+def : InstRW<[ORYONWrite_8Cyc_FP3_RC], (instregex "^FSQRTHr")>;
+def : InstRW<[ORYONWrite_10Cyc_FP3_RC], (instregex "^FSQRTSr")>;
+def : InstRW<[ORYONWrite_13Cyc_FP3_RC], (instregex "^FSQRTDr")>;
+
+//==========
+// SIMD move instructions
+//==========
+
+// ASIMD DUP element
+def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^DUPv.+lane")>;
+// ASIMD DUP general thoughput undecided, 3? FP0123
+// VXU doc, p42, 2 uOps
+def : InstRW<[ORYONWrite_3Cyc_2Uops_FP0123], (instregex "^DUPv.+gpr")>;
+
+// ASIMD insert, element to element
+def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^INSv.+lane")>;
+// ASIMD insert, gen reg 3? FP0123?
+def : InstRW<[ORYONWrite_3Cyc_2Uops_FP0123], (instregex "^INSv.+gpr")>;
+
+// ASIMD move, FP immed
+def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^FMOVv")>;
+
+// ASIMD transfer, element to gen reg
+def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^[SU]MOVv")>;
+
+//==========
+// SIMD arithmetic instructions
+//==========
+def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^ADDv", "^SUBv",
+ "^BIFv", "^BITv", "^BSLv",
+ "^ANDv", "^BICv", "^EORv",
+ "^ORRv", "^ORNv")>;
+
+
+def : InstRW<[ORYONWrite_3Cyc_FP0123], (instregex "^FABDv", "^FADDv", "^FSUBv")>;
+
+// floating division
+def : InstRW<[ORYONWrite_6Cyc_FP3], (instregex "^FDIVv.*16$")>;
+def : InstRW<[ORYONWrite_7Cyc_FP3], (instregex "^FDIVv.*32$")>;
+def : InstRW<[ORYONWrite_9Cyc_FP3], (instregex "^FDIVv.*64$")>;
+
+def : InstRW<[ORYONWrite_4Cyc_FP0123], (instregex "^FMUL(X)?v",
+ "^FRECPSv", "^FRSQRTSv")>;
+
+def : InstRW<[ORYONWrite_3Cyc_FP0123], (instregex "^MLAv","^MLSv", "^MULv",
+ "^PMULv", "UABAv")>;
+
+def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "SABAv", "SABDv",
+ "^(SH|UH)(ADD|SUB)v",
+ "^S(MAX|MIN)v",
+ "^(SQ|UQ)(ADD|SUB)v",
+ "^(SQ|SQR|UQ|UQR)SHLv",
+ "^(SR|UR)HADDv",
+ "^(SR|UR)SHLv",
+ "^UABDv",
+ "^U(MAX|MIN)v")>;
+// IMAX or UMAX in the above line
+//==========
+// SIMD compare instructions
+//==========
+
+def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^CMEQv","^CMGEv","^CMGTv",
+ "^CMLEv","^CMLTv", "^CMHIv",
+ "^CMHSv",
+ "^FCMEQv", "^FCMGEv",
+ "^FCMGTv", "^FCMLEv",
+ "^FCMLTv",
+ "^FACGEv", "^FACGTv")>;
+
+//==========
+// SIMD widening and narrowing arithmetic instructions
+//==========
+// NO need to list ADDHN2, RADDHN2, RSUBHN2 as they are not distinguished
+// from ADDHN, RADDHN, RSUBHN in td file(v16i8, v8i16, v4i32).
+def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^ADDHNv",
+ "^SUBHNv",
+ "^RADDHNv",
+ "^RSUBHNv",
+ "^SABD(L|L2)v", "^UABD(L|L2)v",
+ "^(S|U)(ADD|SUB)(L|L2|W|W2)v")>;
+
+def : InstRW<[ORYONWrite_3Cyc_FP0123], (instregex "^PMUL(L|L2)v","^SABA(L|L2)v",
+ "^(S|U|SQ)(MLA|MSL|MUL)(L|L2)v")>;
+
+//==========
+// SIMD unary arithmetic instructions
+//==========
+//^MVNv is an alias of ^NOTv
+def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^ABSv", "^CLSv","^CLZv", "^CNTv",
+ "^NEGv", "^NOTv",
+ "^RBITv", "^REV(16|32|64)v",
+ "^SQ(ABS|NEG)v", "^SQ(XT|XTU)(N|N2)v",
+ "^(SU|US)QADDv",
+ "^UQXT(N|N2)v", "^XTN2?v")>;
+
+def : InstRW<[ORYONWrite_3Cyc_FP0123], (instregex "^FCVT(L|L2|N|N2|XN|XN2)v",
+ "^FRINT[AIMNPXZ]v",
+ "^FRSQRTEv",
+ "^(S|U)ADALPv",
+ "^(S|U)ADDLPv")>;
+
+
+def : InstRW<[ORYONWrite_3Cyc_FP0], (instregex "^URECPEv", "^URSQRTEv",
+ "^FRECPEv", "^FRECPXv")>;
+
+def : InstRW<[ORYONWrite_8Cyc_FP3_RC], (instregex "^FSQRTv.*16$")>;
+def : InstRW<[ORYONWrite_10Cyc_FP3_RC], (instregex "^FSQRTv.*32$")>;
+def : InstRW<[ORYONWrite_13Cyc_FP3_RC], (instregex "^FSQRTv.*64$")>;
+
+//==========
+// SIMD binary elememt arithmetic instructions
+//==========
+
+def : InstRW<[ORYONWrite_4Cyc_FP0123], (instregex "^FMLAv", "^FMLSv")>;
+
+def : InstRW<[ORYONWrite_3Cyc_FP0123], (instregex "^SQDMULHv",
+ "^SQRD(MLA|MLS|MUL)Hv")>;
+
+//==========
+// SIMD permute instructions
+//==========
+
+def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^EXTv", "^TRN(1|2)v",
+ "^UZP(1|2)v", "^ZIP(1|2)v")>;
+
+//==========
+// SIMD immediate instructions
+//==========
+
+def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^MOVIv", "^MVNIv")>;
+
+//==========
+// SIMD shift(immediate) instructions
+//==========
+def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^RSHR(N|N2)v", "^SHLv",
+ "^(SHL|SHR)(N|N2)v",
+ "^SLIv",
+ "^(SQ|SQR)SHR(U)?(N|N2)v",
+ "^(UQ|UQR)SHR(N|N2)v",
+ "^SQSHLUv",
+ "^SRIv",
+ "^(S|SR|U|UR)SHRv",
+ "^(S|SR|U|UR)SRAv",
+ "^(S|U)SHL(L|L2)v")>;
+
+//==========
+// SIMD floating-point and integer conversion instructions
+//==========
+// same as above conversion
+
+//==========
+// SIMD reduce (acoss vector lanes) instructions
+//==========
+
+def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^ADDVv",
+ "^(FMAX|FMIN)(V|NMV)v",
+ "^(S|U)ADDLVv",
+ "^(S|U)(MAX|MIN)Vv")>;
+//==========
+// SIMD pairwise arithmetic instructions
+//==========
+
+def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^ADDPv", "^FADDPv",
+ "^(FMAX|FMIN)(NMP|P)v",
+ "^(S|U)(MIN|MAX)Pv")>;
+//==========
+// SIMD dot prodcut instructions
+//==========
+
+def : InstRW<[ORYONWrite_3Cyc_FP0123], (instregex "^(U|S)DOTv")>;
+
+//==========
+// SIMD table lookup instructions
+//==========
+// TBL 1-reg/2-reg; TBX 1-reg, 1uOp, throughput=4 latency=2
+def : InstRW<[ORYONWrite_2Cyc_FP0123], (instrs TBLv8i8One, TBLv16i8One,
+ TBXv8i8One, TBXv16i8One,
+ TBLv8i8Two, TBLv16i8Two)>;
+
+// TBL 3-reg/4-reg, 3uops, throughtput=4/3=1.33 latency=4
+def : InstRW<[ORYONWrite_4Cyc_FP0123_FP0123_FP0123_RC],
+ (instrs TBLv8i8Three, TBLv16i8Three,
+ TBLv8i8Four, TBLv16i8Four)>;
+
+
+// TBX 2-reg 2 uOps, throughput=2 latency=4
+def : InstRW<[ORYONWrite_4Cyc_FP0123_FP0123_RC], (instrs TBXv8i8Two, TBXv16i8Two)>;
+
+// TBX 3-reg/4-reg, 4uOps, throughput=1, latency=6
+def : InstRW<[ORYONWrite_6Cyc_FP0123_FP0123_FP0123_FP0123_RC],
+ (instrs TBXv8i8Three, TBXv16i8Three,
+ TBXv8i8Four, TBXv16i8Four)>;
+
+
+//==========
+// SIMD complex number arithmetic instructions
+//==========
+
+def : InstRW<[ORYONWrite_4Cyc_FP0123], (instregex "^FCADDv", "^FCMLAv")>;
+
+//==========
+// SIMD cryptographic instructions
+//==========
+// 3,4 on IMLA, CRYP
+def : InstRW<[ORYONWrite_3Cyc_FP0123], (instregex "^AES[DE]",
+ "^SM3(TT1|TT2)(A|B)")>;
+
+// 2,4 on CRYP
+def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^AESI?MC",
+ "^EOR3",
+ "^RAX1",
+ "^XAR",
+ "^BCAX",
+ "^SM3SS1",
+ "^SM3PART(W1|W2)")>;
+// 5,1 on CRYP
+def : InstRW<[ORYONWrite_5Cyc_FP1], (instregex "^SM4E",
+ "^SM4EKEY")>;
+
+// 2,1 on CRYP
+def : InstRW<[ORYONWrite_2Cyc_FP1], (instregex "^SHA1(H|SU0|SU1)",
+ "^SHA256SU0",
+ "^SHA512(SU0|SU1)")>;
+
+// 3,1 on CRYP
+def : InstRW<[ORYONWrite_3Cyc_FP1], (instregex "^SHA256SU1",
+ "^SHA512(H|H2)")>;
+
+// 4,0.25 on CRYP
+def : InstRW<[ORYONWrite_4Cyc_FP1_RC4], (instregex "^SHA1(C|P|M)",
+ "^SHA256(H|H2)")>;
+
+//==========
+// SIMD v8.6 instructions
+//==========
+// 4,2 on IMLA
+def : InstRW<[ORYONWrite_4Cyc_FP0123_RC], (instregex "^(S|U|US)MMLA$")>;
+
+// 4,0.5 on IMLA
+def : InstRW<[ORYONWrite_8Cyc_FP0123_RC], (instregex "^BFMMLA$")>;
+
+// 4,0.5 on IMLA
+def : InstRW<[ORYONWrite_8Cyc_FP0123_RC], (instregex "^BFMLAL(B|T)")>;
+
+// 3,4
+def : InstRW<[ORYONWrite_3Cyc_FP0123], (instregex "^(US|SU)DOTv")>;
+
+// 3,1
+def : InstRW<[ORYONWrite_4Cyc_FP0123], (instregex "^BF(16)?DOTv")>;
+
+// 3,4
+def : InstRW<[ORYONWrite_3Cyc_FP0123], (instregex "^BFCVT(N|N2)?$")>;
+
+
+} // SchedModel = OryonModel
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
index 8bc26ee..93ea729 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -299,6 +299,13 @@ void AArch64Subtarget::initializeProperties(bool HasMinSize) {
PrefLoopAlignment = Align(64);
MaxInterleaveFactor = 4;
break;
+ case Oryon:
+ CacheLineSize = 64;
+ PrefFunctionAlignment = Align(16);
+ MaxInterleaveFactor = 4;
+ PrefetchDistance = 128;
+ MinPrefetchStride = 1024;
+ break;
}
if (AArch64MinimumJumpTableEntries.getNumOccurrences() > 0 || !HasMinSize)
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index f49c73d..9f5756f 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -58,6 +58,9 @@ static cl::opt<unsigned> InlineCallPenaltyChangeSM(
static cl::opt<bool> EnableOrLikeSelectOpt("enable-aarch64-or-like-select",
cl::init(true), cl::Hidden);
+static cl::opt<bool> EnableLSRCostOpt("enable-aarch64-lsr-cost-opt",
+ cl::init(true), cl::Hidden);
+
namespace {
class TailFoldingOption {
// These bitfields will only ever be set to something non-zero in operator=,
@@ -4216,3 +4219,19 @@ bool AArch64TTIImpl::shouldTreatInstructionLikeSelect(const Instruction *I) {
return true;
return BaseT::shouldTreatInstructionLikeSelect(I);
}
+
+bool AArch64TTIImpl::isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
+ const TargetTransformInfo::LSRCost &C2) {
+ // AArch64 specific here is adding the number of instructions to the
+ // comparison (though not as the first consideration, as some targets do)
+ // along with changing the priority of the base additions.
+ // TODO: Maybe a more nuanced tradeoff between instruction count
+ // and number of registers? To be investigated at a later date.
+ if (EnableLSRCostOpt)
+ return std::tie(C1.NumRegs, C1.Insns, C1.NumBaseAdds, C1.AddRecCost,
+ C1.NumIVMuls, C1.ScaleCost, C1.ImmCost, C1.SetupCost) <
+ std::tie(C2.NumRegs, C2.Insns, C2.NumBaseAdds, C2.AddRecCost,
+ C2.NumIVMuls, C2.ScaleCost, C2.ImmCost, C2.SetupCost);
+
+ return TargetTransformInfoImplBase::isLSRCostLess(C1, C2);
+} \ No newline at end of file
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
index 2f44aaa..feec1a4 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -425,6 +425,9 @@ public:
}
std::optional<unsigned> getMinPageSize() const { return 4096; }
+
+ bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
+ const TargetTransformInfo::LSRCost &C2);
};
} // end namespace llvm
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 8e30278..d0d7a9d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -1534,6 +1534,12 @@ def FeatureISAVersion11_5_1 : FeatureSet<
FeatureVGPRSingleUseHintInsts,
Feature1_5xVGPRs])>;
+def FeatureISAVersion11_5_2 : FeatureSet<
+ !listconcat(FeatureISAVersion11_Common.Features,
+ [FeatureSALUFloatInsts,
+ FeatureDPPSrc1SGPR,
+ FeatureVGPRSingleUseHintInsts])>;
+
def FeatureISAVersion12 : FeatureSet<
[FeatureGFX12,
FeatureLDSBankCount32,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
index 625ac02..2bdbf41 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
@@ -1017,7 +1017,7 @@ public:
//
// TODO: We could filter out subgraphs that do not access LDS globals.
for (Function *F : KernelsThatAllocateTableLDS)
- removeFnAttrFromReachable(CG, F, "amdgpu-no-lds-kernel-id");
+ removeFnAttrFromReachable(CG, F, {"amdgpu-no-lds-kernel-id"});
}
DenseMap<Function *, GlobalVariable *> KernelToCreatedDynamicLDS =
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 9c94ca1..17c9615 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -57,6 +57,7 @@
#include "llvm/Transforms/HipStdPar/HipStdPar.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/AlwaysInliner.h"
+#include "llvm/Transforms/IPO/ExpandVariadics.h"
#include "llvm/Transforms/IPO/GlobalDCE.h"
#include "llvm/Transforms/IPO/Internalize.h"
#include "llvm/Transforms/Scalar.h"
@@ -992,6 +993,10 @@ void AMDGPUPassConfig::addIRPasses() {
if (isPassEnabled(EnableImageIntrinsicOptimizer))
addPass(createAMDGPUImageIntrinsicOptimizerPass(&TM));
+ // This can be disabled by passing ::Disable here or on the command line
+ // with --expand-variadics-override=disable.
+ addPass(createExpandVariadicsPass(ExpandVariadicsMode::Lowering));
+
// Function calls are not supported, so make sure we inline everything.
addPass(createAMDGPUAlwaysInlinePass());
addPass(createAlwaysInlinerLegacyPass());
diff --git a/llvm/lib/Target/AMDGPU/GCNProcessors.td b/llvm/lib/Target/AMDGPU/GCNProcessors.td
index 2ada981..d218ffe 100644
--- a/llvm/lib/Target/AMDGPU/GCNProcessors.td
+++ b/llvm/lib/Target/AMDGPU/GCNProcessors.td
@@ -295,7 +295,11 @@ def : ProcessorModel<"gfx1151", GFX11SpeedModel,
FeatureISAVersion11_5_1.Features
>;
-// [gfx1100, gfx1101, gfx1102, gfx1103, gfx1150, gfx1151]
+def : ProcessorModel<"gfx1152", GFX11SpeedModel,
+ FeatureISAVersion11_5_2.Features
+>;
+
+// [gfx1100, gfx1101, gfx1102, gfx1103, gfx1150, gfx1151, gfx1152]
def : ProcessorModel<"gfx11-generic", GFX11SpeedModel,
FeatureISAVersion11_Generic.Features
>;
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
index d7d6e00..e805e96 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -113,6 +113,7 @@ StringRef AMDGPUTargetStreamer::getArchNameFromElfMach(unsigned ElfMach) {
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1103: AK = GK_GFX1103; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1150: AK = GK_GFX1150; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1151: AK = GK_GFX1151; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1152: AK = GK_GFX1152; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1200: AK = GK_GFX1200; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1201: AK = GK_GFX1201; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC: AK = GK_GFX9_GENERIC; break;
@@ -196,6 +197,7 @@ unsigned AMDGPUTargetStreamer::getElfMach(StringRef GPU) {
case GK_GFX1103: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1103;
case GK_GFX1150: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1150;
case GK_GFX1151: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1151;
+ case GK_GFX1152: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1152;
case GK_GFX1200: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1200;
case GK_GFX1201: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1201;
case GK_GFX9_GENERIC: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC;
diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
index c47eea2..8b42d4a 100644
--- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
@@ -2052,9 +2052,6 @@ bool SILoadStoreOptimizer::promoteConstantOffsetToImm(
MemInfoMap &Visited,
SmallPtrSet<MachineInstr *, 4> &AnchorList) const {
- if (!(MI.mayLoad() ^ MI.mayStore()))
- return false;
-
if (!STM->hasFlatInstOffsets() || !SIInstrInfo::isFLAT(MI))
return false;
@@ -2065,10 +2062,6 @@ bool SILoadStoreOptimizer::promoteConstantOffsetToImm(
unsigned AS = SIInstrInfo::isFLATGlobal(MI) ? AMDGPUAS::GLOBAL_ADDRESS
: AMDGPUAS::FLAT_ADDRESS;
- if (MI.mayLoad() &&
- TII->getNamedOperand(MI, AMDGPU::OpName::vdata) != nullptr)
- return false;
-
if (AnchorList.count(&MI))
return false;
diff --git a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
index f178324..5dc3457 100644
--- a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
+++ b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
@@ -103,8 +103,6 @@ private:
MachineBasicBlock *emitEndCf(MachineInstr &MI);
- void lowerInitExec(MachineBasicBlock *MBB, MachineInstr &MI);
-
void findMaskOperands(MachineInstr &MI, unsigned OpNo,
SmallVectorImpl<MachineOperand> &Src) const;
@@ -709,95 +707,6 @@ MachineBasicBlock *SILowerControlFlow::process(MachineInstr &MI) {
return SplitBB;
}
-void SILowerControlFlow::lowerInitExec(MachineBasicBlock *MBB,
- MachineInstr &MI) {
- MachineFunction &MF = *MBB->getParent();
- const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
- bool IsWave32 = ST.isWave32();
-
- if (MI.getOpcode() == AMDGPU::SI_INIT_EXEC) {
- // This should be before all vector instructions.
- MachineInstr *InitMI = BuildMI(*MBB, MBB->begin(), MI.getDebugLoc(),
- TII->get(IsWave32 ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64), Exec)
- .addImm(MI.getOperand(0).getImm());
- if (LIS) {
- LIS->RemoveMachineInstrFromMaps(MI);
- LIS->InsertMachineInstrInMaps(*InitMI);
- }
- MI.eraseFromParent();
- return;
- }
-
- // Extract the thread count from an SGPR input and set EXEC accordingly.
- // Since BFM can't shift by 64, handle that case with CMP + CMOV.
- //
- // S_BFE_U32 count, input, {shift, 7}
- // S_BFM_B64 exec, count, 0
- // S_CMP_EQ_U32 count, 64
- // S_CMOV_B64 exec, -1
- Register InputReg = MI.getOperand(0).getReg();
- MachineInstr *FirstMI = &*MBB->begin();
- if (InputReg.isVirtual()) {
- MachineInstr *DefInstr = MRI->getVRegDef(InputReg);
- assert(DefInstr && DefInstr->isCopy());
- if (DefInstr->getParent() == MBB) {
- if (DefInstr != FirstMI) {
- // If the `InputReg` is defined in current block, we also need to
- // move that instruction to the beginning of the block.
- DefInstr->removeFromParent();
- MBB->insert(FirstMI, DefInstr);
- if (LIS)
- LIS->handleMove(*DefInstr);
- } else {
- // If first instruction is definition then move pointer after it.
- FirstMI = &*std::next(FirstMI->getIterator());
- }
- }
- }
-
- // Insert instruction sequence at block beginning (before vector operations).
- const DebugLoc DL = MI.getDebugLoc();
- const unsigned WavefrontSize = ST.getWavefrontSize();
- const unsigned Mask = (WavefrontSize << 1) - 1;
- Register CountReg = MRI->createVirtualRegister(&AMDGPU::SGPR_32RegClass);
- auto BfeMI = BuildMI(*MBB, FirstMI, DL, TII->get(AMDGPU::S_BFE_U32), CountReg)
- .addReg(InputReg)
- .addImm((MI.getOperand(1).getImm() & Mask) | 0x70000);
- if (LV)
- LV->recomputeForSingleDefVirtReg(InputReg);
- auto BfmMI =
- BuildMI(*MBB, FirstMI, DL,
- TII->get(IsWave32 ? AMDGPU::S_BFM_B32 : AMDGPU::S_BFM_B64), Exec)
- .addReg(CountReg)
- .addImm(0);
- auto CmpMI = BuildMI(*MBB, FirstMI, DL, TII->get(AMDGPU::S_CMP_EQ_U32))
- .addReg(CountReg, RegState::Kill)
- .addImm(WavefrontSize);
- if (LV)
- LV->getVarInfo(CountReg).Kills.push_back(CmpMI);
- auto CmovMI =
- BuildMI(*MBB, FirstMI, DL,
- TII->get(IsWave32 ? AMDGPU::S_CMOV_B32 : AMDGPU::S_CMOV_B64),
- Exec)
- .addImm(-1);
-
- if (!LIS) {
- MI.eraseFromParent();
- return;
- }
-
- LIS->RemoveMachineInstrFromMaps(MI);
- MI.eraseFromParent();
-
- LIS->InsertMachineInstrInMaps(*BfeMI);
- LIS->InsertMachineInstrInMaps(*BfmMI);
- LIS->InsertMachineInstrInMaps(*CmpMI);
- LIS->InsertMachineInstrInMaps(*CmovMI);
-
- RecomputeRegs.insert(InputReg);
- LIS->createAndComputeVirtRegInterval(CountReg);
-}
-
bool SILowerControlFlow::removeMBBifRedundant(MachineBasicBlock &MBB) {
for (auto &I : MBB.instrs()) {
if (!I.isDebugInstr() && !I.isUnconditionalBranch())
@@ -927,18 +836,6 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
SplitMBB = process(MI);
Changed = true;
break;
-
- // FIXME: find a better place for this
- case AMDGPU::SI_INIT_EXEC:
- case AMDGPU::SI_INIT_EXEC_FROM_INPUT:
- lowerInitExec(MBB, MI);
- if (LIS)
- LIS->removeAllRegUnitsForPhysReg(AMDGPU::EXEC);
- Changed = true;
- break;
-
- default:
- break;
}
if (SplitMBB != MBB) {
diff --git a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
index 09dc1c7..5b4c443 100644
--- a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
+++ b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
@@ -177,6 +177,7 @@ private:
SmallVector<MachineInstr *, 4> LowerToMovInstrs;
SmallVector<MachineInstr *, 4> LowerToCopyInstrs;
SmallVector<MachineInstr *, 4> KillInstrs;
+ SmallVector<MachineInstr *, 4> InitExecInstrs;
void printInfo();
@@ -223,6 +224,8 @@ private:
void lowerLiveMaskQueries();
void lowerCopyInstrs();
void lowerKillInstrs(bool IsWQM);
+ void lowerInitExec(MachineInstr &MI);
+ void lowerInitExecInstrs();
public:
static char ID;
@@ -580,6 +583,9 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF,
Opcode == AMDGPU::SI_DEMOTE_I1) {
KillInstrs.push_back(&MI);
BBI.NeedsLowering = true;
+ } else if (Opcode == AMDGPU::SI_INIT_EXEC ||
+ Opcode == AMDGPU::SI_INIT_EXEC_FROM_INPUT) {
+ InitExecInstrs.push_back(&MI);
} else if (WQMOutputs) {
// The function is in machine SSA form, which means that physical
// VGPRs correspond to shader inputs and outputs. Inputs are
@@ -1556,6 +1562,97 @@ void SIWholeQuadMode::lowerKillInstrs(bool IsWQM) {
}
}
+void SIWholeQuadMode::lowerInitExec(MachineInstr &MI) {
+ MachineBasicBlock *MBB = MI.getParent();
+ bool IsWave32 = ST->isWave32();
+
+ if (MI.getOpcode() == AMDGPU::SI_INIT_EXEC) {
+ // This should be before all vector instructions.
+ MachineInstr *InitMI =
+ BuildMI(*MBB, MBB->begin(), MI.getDebugLoc(),
+ TII->get(IsWave32 ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64),
+ Exec)
+ .addImm(MI.getOperand(0).getImm());
+ if (LIS) {
+ LIS->RemoveMachineInstrFromMaps(MI);
+ LIS->InsertMachineInstrInMaps(*InitMI);
+ }
+ MI.eraseFromParent();
+ return;
+ }
+
+ // Extract the thread count from an SGPR input and set EXEC accordingly.
+ // Since BFM can't shift by 64, handle that case with CMP + CMOV.
+ //
+ // S_BFE_U32 count, input, {shift, 7}
+ // S_BFM_B64 exec, count, 0
+ // S_CMP_EQ_U32 count, 64
+ // S_CMOV_B64 exec, -1
+ Register InputReg = MI.getOperand(0).getReg();
+ MachineInstr *FirstMI = &*MBB->begin();
+ if (InputReg.isVirtual()) {
+ MachineInstr *DefInstr = MRI->getVRegDef(InputReg);
+ assert(DefInstr && DefInstr->isCopy());
+ if (DefInstr->getParent() == MBB) {
+ if (DefInstr != FirstMI) {
+ // If the `InputReg` is defined in current block, we also need to
+ // move that instruction to the beginning of the block.
+ DefInstr->removeFromParent();
+ MBB->insert(FirstMI, DefInstr);
+ if (LIS)
+ LIS->handleMove(*DefInstr);
+ } else {
+ // If first instruction is definition then move pointer after it.
+ FirstMI = &*std::next(FirstMI->getIterator());
+ }
+ }
+ }
+
+ // Insert instruction sequence at block beginning (before vector operations).
+ const DebugLoc DL = MI.getDebugLoc();
+ const unsigned WavefrontSize = ST->getWavefrontSize();
+ const unsigned Mask = (WavefrontSize << 1) - 1;
+ Register CountReg = MRI->createVirtualRegister(&AMDGPU::SGPR_32RegClass);
+ auto BfeMI = BuildMI(*MBB, FirstMI, DL, TII->get(AMDGPU::S_BFE_U32), CountReg)
+ .addReg(InputReg)
+ .addImm((MI.getOperand(1).getImm() & Mask) | 0x70000);
+ auto BfmMI =
+ BuildMI(*MBB, FirstMI, DL,
+ TII->get(IsWave32 ? AMDGPU::S_BFM_B32 : AMDGPU::S_BFM_B64), Exec)
+ .addReg(CountReg)
+ .addImm(0);
+ auto CmpMI = BuildMI(*MBB, FirstMI, DL, TII->get(AMDGPU::S_CMP_EQ_U32))
+ .addReg(CountReg, RegState::Kill)
+ .addImm(WavefrontSize);
+ auto CmovMI =
+ BuildMI(*MBB, FirstMI, DL,
+ TII->get(IsWave32 ? AMDGPU::S_CMOV_B32 : AMDGPU::S_CMOV_B64),
+ Exec)
+ .addImm(-1);
+
+ if (!LIS) {
+ MI.eraseFromParent();
+ return;
+ }
+
+ LIS->RemoveMachineInstrFromMaps(MI);
+ MI.eraseFromParent();
+
+ LIS->InsertMachineInstrInMaps(*BfeMI);
+ LIS->InsertMachineInstrInMaps(*BfmMI);
+ LIS->InsertMachineInstrInMaps(*CmpMI);
+ LIS->InsertMachineInstrInMaps(*CmovMI);
+
+ LIS->removeInterval(InputReg);
+ LIS->createAndComputeVirtRegInterval(InputReg);
+ LIS->createAndComputeVirtRegInterval(CountReg);
+}
+
+void SIWholeQuadMode::lowerInitExecInstrs() {
+ for (MachineInstr *MI : InitExecInstrs)
+ lowerInitExec(*MI);
+}
+
bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
LLVM_DEBUG(dbgs() << "SI Whole Quad Mode on " << MF.getName()
<< " ------------- \n");
@@ -1567,6 +1664,7 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
LowerToCopyInstrs.clear();
LowerToMovInstrs.clear();
KillInstrs.clear();
+ InitExecInstrs.clear();
StateTransition.clear();
ST = &MF.getSubtarget<GCNSubtarget>();
@@ -1606,10 +1704,13 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
// Shader is simple does not need any state changes or any complex lowering
if (!(GlobalFlags & (StateWQM | StateStrict)) && LowerToCopyInstrs.empty() &&
LowerToMovInstrs.empty() && KillInstrs.empty()) {
+ lowerInitExecInstrs();
lowerLiveMaskQueries();
- return !LiveMaskQueries.empty();
+ return !InitExecInstrs.empty() || !LiveMaskQueries.empty();
}
+ lowerInitExecInstrs();
+
MachineBasicBlock &Entry = MF.front();
MachineBasicBlock::iterator EntryMI = Entry.getFirstNonPHI();
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp
index 239e0ee..04c6e94 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp
@@ -235,8 +235,9 @@ LDSUsesInfoTy getTransitiveUsesOfLDS(const CallGraph &CG, Module &M) {
}
void removeFnAttrFromReachable(CallGraph &CG, Function *KernelRoot,
- StringRef FnAttr) {
- KernelRoot->removeFnAttr(FnAttr);
+ ArrayRef<StringRef> FnAttrs) {
+ for (StringRef Attr : FnAttrs)
+ KernelRoot->removeFnAttr(Attr);
SmallVector<Function *> WorkList = {CG[KernelRoot]->getFunction()};
SmallPtrSet<Function *, 8> Visited;
@@ -261,12 +262,15 @@ void removeFnAttrFromReachable(CallGraph &CG, Function *KernelRoot,
Function *PotentialCallee =
ExternalCallRecord.second->getFunction();
assert(PotentialCallee);
- if (!isKernelLDS(PotentialCallee))
- PotentialCallee->removeFnAttr(FnAttr);
+ if (!isKernelLDS(PotentialCallee)) {
+ for (StringRef Attr : FnAttrs)
+ PotentialCallee->removeFnAttr(Attr);
+ }
}
}
} else {
- Callee->removeFnAttr(FnAttr);
+ for (StringRef Attr : FnAttrs)
+ Callee->removeFnAttr(Attr);
if (Visited.insert(Callee).second)
WorkList.push_back(Callee);
}
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.h
index 4d3ad32..e1cd4d0 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.h
@@ -9,6 +9,7 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUMEMORYUTILS_H
#define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUMEMORYUTILS_H
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
@@ -54,7 +55,7 @@ LDSUsesInfoTy getTransitiveUsesOfLDS(const CallGraph &CG, Module &M);
/// Strip FnAttr attribute from any functions where we may have
/// introduced its use.
void removeFnAttrFromReachable(CallGraph &CG, Function *KernelRoot,
- StringRef FnAttr);
+ ArrayRef<StringRef> FnAttrs);
/// Given a \p Def clobbering a load from \p Ptr according to the MSSA check
/// if this is actually a memory update or an artificial clobber to facilitate
diff --git a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
index a46c383..9198287 100644
--- a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
+++ b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
@@ -115,6 +115,12 @@ static bool shouldInspect(MachineInstr &MI) {
return isDomainMVE(&MI) || isVectorPredicate(&MI) || hasVPRUse(MI);
}
+static bool isHorizontalReduction(const MachineInstr &MI) {
+ const MCInstrDesc &MCID = MI.getDesc();
+ uint64_t Flags = MCID.TSFlags;
+ return (Flags & ARMII::HorizontalReduction) != 0;
+}
+
namespace {
using InstSet = SmallPtrSetImpl<MachineInstr *>;
@@ -275,6 +281,16 @@ namespace {
if (VPT->getOpcode() == ARM::MVE_VPST)
return false;
+ // If the VPT block does not define something that is an "output", then
+ // the tail-predicated version will just perform a subset of the original
+ // vpt block, where the last lanes should not be used.
+ if (isVPTOpcode(VPT->getOpcode()) &&
+ all_of(Block.getInsts(), [](const MachineInstr *MI) {
+ return !MI->mayStore() && !MI->mayLoad() &&
+ !isHorizontalReduction(*MI) && !isVCTP(MI);
+ }))
+ return true;
+
auto IsOperandPredicated = [&](MachineInstr *MI, unsigned Idx) {
MachineInstr *Op = RDA.getMIOperand(MI, MI->getOperand(Idx));
return Op && PredicatedInsts.count(Op) && isPredicatedOnVCTP(Op);
@@ -813,12 +829,6 @@ static bool producesDoubleWidthResult(const MachineInstr &MI) {
return (Flags & ARMII::DoubleWidthResult) != 0;
}
-static bool isHorizontalReduction(const MachineInstr &MI) {
- const MCInstrDesc &MCID = MI.getDesc();
- uint64_t Flags = MCID.TSFlags;
- return (Flags & ARMII::HorizontalReduction) != 0;
-}
-
// Can this instruction generate a non-zero result when given only zeroed
// operands? This allows us to know that, given operands with false bytes
// zeroed by masked loads, that the result will also contain zeros in those
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 51384f2..9d7e463 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -171,6 +171,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
// Set operations for 'F' feature.
if (Subtarget.hasBasicF()) {
+ setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
+ setTruncStoreAction(MVT::f32, MVT::f16, Expand);
setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
@@ -186,6 +188,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
setOperationAction(ISD::FPOW, MVT::f32, Expand);
setOperationAction(ISD::FREM, MVT::f32, Expand);
+ setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
+ setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
if (Subtarget.is64Bit())
setOperationAction(ISD::FRINT, MVT::f32, Legal);
@@ -202,7 +206,9 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
// Set operations for 'D' feature.
if (Subtarget.hasBasicD()) {
+ setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
+ setTruncStoreAction(MVT::f64, MVT::f16, Expand);
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
@@ -219,6 +225,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
setOperationAction(ISD::FPOW, MVT::f64, Expand);
setOperationAction(ISD::FREM, MVT::f64, Expand);
+ setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
+ setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
if (Subtarget.is64Bit())
setOperationAction(ISD::FRINT, MVT::f64, Legal);
@@ -5004,6 +5012,10 @@ bool LoongArchTargetLowering::isSExtCheaperThanZExt(EVT SrcVT,
return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
}
+bool LoongArchTargetLowering::signExtendConstant(const ConstantInt *CI) const {
+ return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
+}
+
bool LoongArchTargetLowering::hasAndNotCompare(SDValue Y) const {
// TODO: Support vectors.
if (Y.getValueType().isVector())
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
index f274b19..9328831 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
@@ -229,6 +229,7 @@ public:
bool isLegalAddImmediate(int64_t Imm) const override;
bool isZExtFree(SDValue Val, EVT VT2) const override;
bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override;
+ bool signExtendConstant(const ConstantInt *CI) const override;
bool hasAndNotCompare(SDValue Y) const override;
diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp
index 83466d5..c29c1b5 100644
--- a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp
@@ -46,7 +46,7 @@ static cl::opt<bool>
static std::string computeDataLayout(const Triple &TT) {
if (TT.isArch64Bit())
- return "e-m:e-p:64:64-i64:64-i128:128-n64-S128";
+ return "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128";
assert(TT.isArch32Bit() && "only LA32 and LA64 are currently supported");
return "e-m:e-p:32:32-i64:64-n32-S128";
}
diff --git a/llvm/lib/Target/NVPTX/NVPTX.h b/llvm/lib/Target/NVPTX/NVPTX.h
index 5eefab5..b0cb24c 100644
--- a/llvm/lib/Target/NVPTX/NVPTX.h
+++ b/llvm/lib/Target/NVPTX/NVPTX.h
@@ -40,7 +40,7 @@ FunctionPass *createNVPTXISelDag(NVPTXTargetMachine &TM,
ModulePass *createNVPTXAssignValidGlobalNamesPass();
ModulePass *createGenericToNVVMLegacyPass();
ModulePass *createNVPTXCtorDtorLoweringLegacyPass();
-FunctionPass *createNVVMIntrRangePass(unsigned int SmVersion);
+FunctionPass *createNVVMIntrRangePass();
FunctionPass *createNVVMReflectPass(unsigned int SmVersion);
MachineFunctionPass *createNVPTXPrologEpilogPass();
MachineFunctionPass *createNVPTXReplaceImageHandlesPass();
@@ -53,12 +53,7 @@ MachineFunctionPass *createNVPTXPeephole();
MachineFunctionPass *createNVPTXProxyRegErasurePass();
struct NVVMIntrRangePass : PassInfoMixin<NVVMIntrRangePass> {
- NVVMIntrRangePass();
- NVVMIntrRangePass(unsigned SmVersion) : SmVersion(SmVersion) {}
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
-
-private:
- unsigned SmVersion;
};
struct NVVMReflectPass : PassInfoMixin<NVVMReflectPass> {
diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index f636979..82770f8 100644
--- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -542,30 +542,24 @@ void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F,
// If the NVVM IR has some of reqntid* specified, then output
// the reqntid directive, and set the unspecified ones to 1.
// If none of Reqntid* is specified, don't output reqntid directive.
- unsigned Reqntidx, Reqntidy, Reqntidz;
- Reqntidx = Reqntidy = Reqntidz = 1;
- bool ReqSpecified = false;
- ReqSpecified |= getReqNTIDx(F, Reqntidx);
- ReqSpecified |= getReqNTIDy(F, Reqntidy);
- ReqSpecified |= getReqNTIDz(F, Reqntidz);
+ std::optional<unsigned> Reqntidx = getReqNTIDx(F);
+ std::optional<unsigned> Reqntidy = getReqNTIDy(F);
+ std::optional<unsigned> Reqntidz = getReqNTIDz(F);
- if (ReqSpecified)
- O << ".reqntid " << Reqntidx << ", " << Reqntidy << ", " << Reqntidz
- << "\n";
+ if (Reqntidx || Reqntidy || Reqntidz)
+ O << ".reqntid " << Reqntidx.value_or(1) << ", " << Reqntidy.value_or(1)
+ << ", " << Reqntidz.value_or(1) << "\n";
// If the NVVM IR has some of maxntid* specified, then output
// the maxntid directive, and set the unspecified ones to 1.
// If none of maxntid* is specified, don't output maxntid directive.
- unsigned Maxntidx, Maxntidy, Maxntidz;
- Maxntidx = Maxntidy = Maxntidz = 1;
- bool MaxSpecified = false;
- MaxSpecified |= getMaxNTIDx(F, Maxntidx);
- MaxSpecified |= getMaxNTIDy(F, Maxntidy);
- MaxSpecified |= getMaxNTIDz(F, Maxntidz);
-
- if (MaxSpecified)
- O << ".maxntid " << Maxntidx << ", " << Maxntidy << ", " << Maxntidz
- << "\n";
+ std::optional<unsigned> Maxntidx = getMaxNTIDx(F);
+ std::optional<unsigned> Maxntidy = getMaxNTIDy(F);
+ std::optional<unsigned> Maxntidz = getMaxNTIDz(F);
+
+ if (Maxntidx || Maxntidy || Maxntidz)
+ O << ".maxntid " << Maxntidx.value_or(1) << ", " << Maxntidy.value_or(1)
+ << ", " << Maxntidz.value_or(1) << "\n";
unsigned Mincta = 0;
if (getMinCTASm(F, Mincta))
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index 4dc3cea..b60a1d7 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -233,9 +233,9 @@ void NVPTXTargetMachine::registerPassBuilderCallbacks(
[this](ModulePassManager &PM, OptimizationLevel Level) {
FunctionPassManager FPM;
FPM.addPass(NVVMReflectPass(Subtarget.getSmVersion()));
- // FIXME: NVVMIntrRangePass is causing numerical discrepancies,
- // investigate and re-enable.
- // FPM.addPass(NVVMIntrRangePass(Subtarget.getSmVersion()));
+ // Note: NVVMIntrRangePass was causing numerical discrepancies at one
+ // point, if issues crop up, consider disabling.
+ FPM.addPass(NVVMIntrRangePass());
PM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
});
}
diff --git a/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp b/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp
index 013afe9..3a536db 100644
--- a/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp
@@ -128,6 +128,14 @@ bool findOneNVVMAnnotation(const GlobalValue *gv, const std::string &prop,
return true;
}
+static std::optional<unsigned>
+findOneNVVMAnnotation(const GlobalValue &GV, const std::string &PropName) {
+ unsigned RetVal;
+ if (findOneNVVMAnnotation(&GV, PropName, RetVal))
+ return RetVal;
+ return std::nullopt;
+}
+
bool findAllNVVMAnnotation(const GlobalValue *gv, const std::string &prop,
std::vector<unsigned> &retval) {
auto &AC = getAnnotationCache();
@@ -252,32 +260,57 @@ std::string getSamplerName(const Value &val) {
return std::string(val.getName());
}
-bool getMaxNTIDx(const Function &F, unsigned &x) {
- return findOneNVVMAnnotation(&F, "maxntidx", x);
+std::optional<unsigned> getMaxNTIDx(const Function &F) {
+ return findOneNVVMAnnotation(F, "maxntidx");
}
-bool getMaxNTIDy(const Function &F, unsigned &y) {
- return findOneNVVMAnnotation(&F, "maxntidy", y);
+std::optional<unsigned> getMaxNTIDy(const Function &F) {
+ return findOneNVVMAnnotation(F, "maxntidy");
}
-bool getMaxNTIDz(const Function &F, unsigned &z) {
- return findOneNVVMAnnotation(&F, "maxntidz", z);
+std::optional<unsigned> getMaxNTIDz(const Function &F) {
+ return findOneNVVMAnnotation(F, "maxntidz");
+}
+
+std::optional<unsigned> getMaxNTID(const Function &F) {
+ // Note: The semantics here are a bit strange. The PTX ISA states the
+ // following (11.4.2. Performance-Tuning Directives: .maxntid):
+ //
+ // Note that this directive guarantees that the total number of threads does
+ // not exceed the maximum, but does not guarantee that the limit in any
+ // particular dimension is not exceeded.
+ std::optional<unsigned> MaxNTIDx = getMaxNTIDx(F);
+ std::optional<unsigned> MaxNTIDy = getMaxNTIDy(F);
+ std::optional<unsigned> MaxNTIDz = getMaxNTIDz(F);
+ if (MaxNTIDx || MaxNTIDy || MaxNTIDz)
+ return MaxNTIDx.value_or(1) * MaxNTIDy.value_or(1) * MaxNTIDz.value_or(1);
+ return std::nullopt;
}
bool getMaxClusterRank(const Function &F, unsigned &x) {
return findOneNVVMAnnotation(&F, "maxclusterrank", x);
}
-bool getReqNTIDx(const Function &F, unsigned &x) {
- return findOneNVVMAnnotation(&F, "reqntidx", x);
+std::optional<unsigned> getReqNTIDx(const Function &F) {
+ return findOneNVVMAnnotation(F, "reqntidx");
+}
+
+std::optional<unsigned> getReqNTIDy(const Function &F) {
+ return findOneNVVMAnnotation(F, "reqntidy");
}
-bool getReqNTIDy(const Function &F, unsigned &y) {
- return findOneNVVMAnnotation(&F, "reqntidy", y);
+std::optional<unsigned> getReqNTIDz(const Function &F) {
+ return findOneNVVMAnnotation(F, "reqntidz");
}
-bool getReqNTIDz(const Function &F, unsigned &z) {
- return findOneNVVMAnnotation(&F, "reqntidz", z);
+std::optional<unsigned> getReqNTID(const Function &F) {
+ // Note: The semantics here are a bit strange. See getMaxNTID.
+ std::optional<unsigned> ReqNTIDx = getReqNTIDx(F);
+ std::optional<unsigned> ReqNTIDy = getReqNTIDy(F);
+ std::optional<unsigned> ReqNTIDz = getReqNTIDz(F);
+ if (ReqNTIDx || ReqNTIDy || ReqNTIDz)
+ return ReqNTIDx.value_or(1) * ReqNTIDy.value_or(1) * ReqNTIDz.value_or(1);
+ return std::nullopt;
}
bool getMinCTASm(const Function &F, unsigned &x) {
diff --git a/llvm/lib/Target/NVPTX/NVPTXUtilities.h b/llvm/lib/Target/NVPTX/NVPTXUtilities.h
index 2872db9..e020bc0 100644
--- a/llvm/lib/Target/NVPTX/NVPTXUtilities.h
+++ b/llvm/lib/Target/NVPTX/NVPTXUtilities.h
@@ -48,13 +48,15 @@ std::string getTextureName(const Value &);
std::string getSurfaceName(const Value &);
std::string getSamplerName(const Value &);
-bool getMaxNTIDx(const Function &, unsigned &);
-bool getMaxNTIDy(const Function &, unsigned &);
-bool getMaxNTIDz(const Function &, unsigned &);
-
-bool getReqNTIDx(const Function &, unsigned &);
-bool getReqNTIDy(const Function &, unsigned &);
-bool getReqNTIDz(const Function &, unsigned &);
+std::optional<unsigned> getMaxNTIDx(const Function &);
+std::optional<unsigned> getMaxNTIDy(const Function &);
+std::optional<unsigned> getMaxNTIDz(const Function &);
+std::optional<unsigned> getMaxNTID(const Function &F);
+
+std::optional<unsigned> getReqNTIDx(const Function &);
+std::optional<unsigned> getReqNTIDy(const Function &);
+std::optional<unsigned> getReqNTIDz(const Function &);
+std::optional<unsigned> getReqNTID(const Function &);
bool getMaxClusterRank(const Function &, unsigned &);
bool getMinCTASm(const Function &, unsigned &);
diff --git a/llvm/lib/Target/NVPTX/NVVMIntrRange.cpp b/llvm/lib/Target/NVPTX/NVVMIntrRange.cpp
index 5381646..f9d21b3 100644
--- a/llvm/lib/Target/NVPTX/NVVMIntrRange.cpp
+++ b/llvm/lib/Target/NVPTX/NVVMIntrRange.cpp
@@ -1,4 +1,4 @@
-//===- NVVMIntrRange.cpp - Set !range metadata for NVVM intrinsics --------===//
+//===- NVVMIntrRange.cpp - Set range attributes for NVVM intrinsics -------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,19 +6,21 @@
//
//===----------------------------------------------------------------------===//
//
-// This pass adds appropriate !range metadata for calls to NVVM
+// This pass adds appropriate range attributes for calls to NVVM
// intrinsics that return a limited range of values.
//
//===----------------------------------------------------------------------===//
#include "NVPTX.h"
-#include "llvm/IR/Constants.h"
+#include "NVPTXUtilities.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicsNVPTX.h"
#include "llvm/IR/PassManager.h"
#include "llvm/Support/CommandLine.h"
+#include <cstdint>
using namespace llvm;
@@ -26,31 +28,20 @@ using namespace llvm;
namespace llvm { void initializeNVVMIntrRangePass(PassRegistry &); }
-// Add !range metadata based on limits of given SM variant.
-static cl::opt<unsigned> NVVMIntrRangeSM("nvvm-intr-range-sm", cl::init(20),
- cl::Hidden, cl::desc("SM variant"));
-
namespace {
class NVVMIntrRange : public FunctionPass {
- private:
- unsigned SmVersion;
-
- public:
- static char ID;
- NVVMIntrRange() : NVVMIntrRange(NVVMIntrRangeSM) {}
- NVVMIntrRange(unsigned int SmVersion)
- : FunctionPass(ID), SmVersion(SmVersion) {
+public:
+ static char ID;
+ NVVMIntrRange() : FunctionPass(ID) {
- initializeNVVMIntrRangePass(*PassRegistry::getPassRegistry());
- }
+ initializeNVVMIntrRangePass(*PassRegistry::getPassRegistry());
+ }
- bool runOnFunction(Function &) override;
+ bool runOnFunction(Function &) override;
};
-}
+} // namespace
-FunctionPass *llvm::createNVVMIntrRangePass(unsigned int SmVersion) {
- return new NVVMIntrRange(SmVersion);
-}
+FunctionPass *llvm::createNVVMIntrRangePass() { return new NVVMIntrRange(); }
char NVVMIntrRange::ID = 0;
INITIALIZE_PASS(NVVMIntrRange, "nvvm-intr-range",
@@ -58,112 +49,110 @@ INITIALIZE_PASS(NVVMIntrRange, "nvvm-intr-range",
// Adds the passed-in [Low,High) range information as metadata to the
// passed-in call instruction.
-static bool addRangeMetadata(uint64_t Low, uint64_t High, CallInst *C) {
- // This call already has range metadata, nothing to do.
- if (C->getMetadata(LLVMContext::MD_range))
+static bool addRangeAttr(uint64_t Low, uint64_t High, IntrinsicInst *II) {
+ if (II->getMetadata(LLVMContext::MD_range))
return false;
- LLVMContext &Context = C->getParent()->getContext();
- IntegerType *Int32Ty = Type::getInt32Ty(Context);
- Metadata *LowAndHigh[] = {
- ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Low)),
- ConstantAsMetadata::get(ConstantInt::get(Int32Ty, High))};
- C->setMetadata(LLVMContext::MD_range, MDNode::get(Context, LowAndHigh));
+ const uint64_t BitWidth = II->getType()->getIntegerBitWidth();
+ ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High));
+
+ if (auto CurrentRange = II->getRange())
+ Range = Range.intersectWith(CurrentRange.value());
+
+ II->addRangeRetAttr(Range);
return true;
}
-static bool runNVVMIntrRange(Function &F, unsigned SmVersion) {
+static bool runNVVMIntrRange(Function &F) {
struct {
unsigned x, y, z;
} MaxBlockSize, MaxGridSize;
- MaxBlockSize.x = 1024;
- MaxBlockSize.y = 1024;
- MaxBlockSize.z = 64;
- MaxGridSize.x = SmVersion >= 30 ? 0x7fffffff : 0xffff;
+ const unsigned MetadataNTID = getReqNTID(F).value_or(
+ getMaxNTID(F).value_or(std::numeric_limits<unsigned>::max()));
+
+ MaxBlockSize.x = std::min(1024u, MetadataNTID);
+ MaxBlockSize.y = std::min(1024u, MetadataNTID);
+ MaxBlockSize.z = std::min(64u, MetadataNTID);
+
+ MaxGridSize.x = 0x7fffffff;
MaxGridSize.y = 0xffff;
MaxGridSize.z = 0xffff;
// Go through the calls in this function.
bool Changed = false;
for (Instruction &I : instructions(F)) {
- CallInst *Call = dyn_cast<CallInst>(&I);
- if (!Call)
+ IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I);
+ if (!II)
continue;
- if (Function *Callee = Call->getCalledFunction()) {
- switch (Callee->getIntrinsicID()) {
- // Index within block
- case Intrinsic::nvvm_read_ptx_sreg_tid_x:
- Changed |= addRangeMetadata(0, MaxBlockSize.x, Call);
- break;
- case Intrinsic::nvvm_read_ptx_sreg_tid_y:
- Changed |= addRangeMetadata(0, MaxBlockSize.y, Call);
- break;
- case Intrinsic::nvvm_read_ptx_sreg_tid_z:
- Changed |= addRangeMetadata(0, MaxBlockSize.z, Call);
- break;
-
- // Block size
- case Intrinsic::nvvm_read_ptx_sreg_ntid_x:
- Changed |= addRangeMetadata(1, MaxBlockSize.x+1, Call);
- break;
- case Intrinsic::nvvm_read_ptx_sreg_ntid_y:
- Changed |= addRangeMetadata(1, MaxBlockSize.y+1, Call);
- break;
- case Intrinsic::nvvm_read_ptx_sreg_ntid_z:
- Changed |= addRangeMetadata(1, MaxBlockSize.z+1, Call);
- break;
-
- // Index within grid
- case Intrinsic::nvvm_read_ptx_sreg_ctaid_x:
- Changed |= addRangeMetadata(0, MaxGridSize.x, Call);
- break;
- case Intrinsic::nvvm_read_ptx_sreg_ctaid_y:
- Changed |= addRangeMetadata(0, MaxGridSize.y, Call);
- break;
- case Intrinsic::nvvm_read_ptx_sreg_ctaid_z:
- Changed |= addRangeMetadata(0, MaxGridSize.z, Call);
- break;
-
- // Grid size
- case Intrinsic::nvvm_read_ptx_sreg_nctaid_x:
- Changed |= addRangeMetadata(1, MaxGridSize.x+1, Call);
- break;
- case Intrinsic::nvvm_read_ptx_sreg_nctaid_y:
- Changed |= addRangeMetadata(1, MaxGridSize.y+1, Call);
- break;
- case Intrinsic::nvvm_read_ptx_sreg_nctaid_z:
- Changed |= addRangeMetadata(1, MaxGridSize.z+1, Call);
- break;
-
- // warp size is constant 32.
- case Intrinsic::nvvm_read_ptx_sreg_warpsize:
- Changed |= addRangeMetadata(32, 32+1, Call);
- break;
-
- // Lane ID is [0..warpsize)
- case Intrinsic::nvvm_read_ptx_sreg_laneid:
- Changed |= addRangeMetadata(0, 32, Call);
- break;
-
- default:
- break;
- }
+ switch (II->getIntrinsicID()) {
+ // Index within block
+ case Intrinsic::nvvm_read_ptx_sreg_tid_x:
+ Changed |= addRangeAttr(0, MaxBlockSize.x, II);
+ break;
+ case Intrinsic::nvvm_read_ptx_sreg_tid_y:
+ Changed |= addRangeAttr(0, MaxBlockSize.y, II);
+ break;
+ case Intrinsic::nvvm_read_ptx_sreg_tid_z:
+ Changed |= addRangeAttr(0, MaxBlockSize.z, II);
+ break;
+
+ // Block size
+ case Intrinsic::nvvm_read_ptx_sreg_ntid_x:
+ Changed |= addRangeAttr(1, MaxBlockSize.x + 1, II);
+ break;
+ case Intrinsic::nvvm_read_ptx_sreg_ntid_y:
+ Changed |= addRangeAttr(1, MaxBlockSize.y + 1, II);
+ break;
+ case Intrinsic::nvvm_read_ptx_sreg_ntid_z:
+ Changed |= addRangeAttr(1, MaxBlockSize.z + 1, II);
+ break;
+
+ // Index within grid
+ case Intrinsic::nvvm_read_ptx_sreg_ctaid_x:
+ Changed |= addRangeAttr(0, MaxGridSize.x, II);
+ break;
+ case Intrinsic::nvvm_read_ptx_sreg_ctaid_y:
+ Changed |= addRangeAttr(0, MaxGridSize.y, II);
+ break;
+ case Intrinsic::nvvm_read_ptx_sreg_ctaid_z:
+ Changed |= addRangeAttr(0, MaxGridSize.z, II);
+ break;
+
+ // Grid size
+ case Intrinsic::nvvm_read_ptx_sreg_nctaid_x:
+ Changed |= addRangeAttr(1, MaxGridSize.x + 1, II);
+ break;
+ case Intrinsic::nvvm_read_ptx_sreg_nctaid_y:
+ Changed |= addRangeAttr(1, MaxGridSize.y + 1, II);
+ break;
+ case Intrinsic::nvvm_read_ptx_sreg_nctaid_z:
+ Changed |= addRangeAttr(1, MaxGridSize.z + 1, II);
+ break;
+
+ // warp size is constant 32.
+ case Intrinsic::nvvm_read_ptx_sreg_warpsize:
+ Changed |= addRangeAttr(32, 32 + 1, II);
+ break;
+
+ // Lane ID is [0..warpsize)
+ case Intrinsic::nvvm_read_ptx_sreg_laneid:
+ Changed |= addRangeAttr(0, 32, II);
+ break;
+
+ default:
+ break;
}
}
return Changed;
}
-bool NVVMIntrRange::runOnFunction(Function &F) {
- return runNVVMIntrRange(F, SmVersion);
-}
-
-NVVMIntrRangePass::NVVMIntrRangePass() : NVVMIntrRangePass(NVVMIntrRangeSM) {}
+bool NVVMIntrRange::runOnFunction(Function &F) { return runNVVMIntrRange(F); }
PreservedAnalyses NVVMIntrRangePass::run(Function &F,
FunctionAnalysisManager &AM) {
- return runNVVMIntrRange(F, SmVersion) ? PreservedAnalyses::none()
- : PreservedAnalyses::all();
+ return runNVVMIntrRange(F) ? PreservedAnalyses::none()
+ : PreservedAnalyses::all();
}
diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index f4e84ad..bc0ae7a 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -1079,13 +1079,13 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
assert(IsAIX && TM.getCodeModel() == CodeModel::Small &&
"PseudoOp only valid for small code model AIX");
- // Transform %rN = ADDItoc/8 @op1, %r2.
+ // Transform %rN = ADDItoc/8 %r2, @op1.
LowerPPCMachineInstrToMCInst(MI, TmpInst, *this);
// Change the opcode to load address.
TmpInst.setOpcode((!IsPPC64) ? (PPC::LA) : (PPC::LA8));
- const MachineOperand &MO = MI->getOperand(1);
+ const MachineOperand &MO = MI->getOperand(2);
assert(MO.isGlobal() && "Invalid operand for ADDItoc[8].");
// Map the operand to its corresponding MCSymbol.
@@ -1094,7 +1094,6 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
const MCExpr *Exp =
MCSymbolRefExpr::create(MOSymbol, MCSymbolRefExpr::VK_None, OutContext);
- TmpInst.getOperand(1) = TmpInst.getOperand(2);
TmpInst.getOperand(2) = MCOperand::createExpr(Exp);
EmitToStreamer(*OutStreamer, TmpInst);
return;
diff --git a/llvm/lib/Target/PowerPC/PPCFastISel.cpp b/llvm/lib/Target/PowerPC/PPCFastISel.cpp
index 7350506..a07954bd 100644
--- a/llvm/lib/Target/PowerPC/PPCFastISel.cpp
+++ b/llvm/lib/Target/PowerPC/PPCFastISel.cpp
@@ -2080,13 +2080,15 @@ unsigned PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) {
cast<GlobalVariable>(GV)->hasAttribute("toc-data");
// For small code model, generate a simple TOC load.
- if (CModel == CodeModel::Small)
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
- IsAIXTocData ? TII.get(PPC::ADDItoc8) : TII.get(PPC::LDtoc),
- DestReg)
- .addGlobalAddress(GV)
- .addReg(PPC::X2);
- else {
+ if (CModel == CodeModel::Small) {
+ auto MIB = BuildMI(
+ *FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
+ IsAIXTocData ? TII.get(PPC::ADDItoc8) : TII.get(PPC::LDtoc), DestReg);
+ if (IsAIXTocData)
+ MIB.addReg(PPC::X2).addGlobalAddress(GV);
+ else
+ MIB.addGlobalAddress(GV).addReg(PPC::X2);
+ } else {
// If the address is an externally defined symbol, a symbol with common
// or externally available linkage, a non-local function address, or a
// jump table address (not yet needed), or if we are generating code
diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 275b333..1a69d1e 100644
--- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -6102,8 +6102,15 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
EVT OperandTy) {
SDValue GA = TocEntry->getOperand(0);
SDValue TocBase = TocEntry->getOperand(1);
- SDNode *MN = CurDAG->getMachineNode(OpCode, dl, OperandTy, GA, TocBase);
- transferMemOperands(TocEntry, MN);
+ SDNode *MN = nullptr;
+ if (OpCode == PPC::ADDItoc || OpCode == PPC::ADDItoc8)
+ // toc-data access doesn't involve in loading from got, no need to
+ // keep memory operands.
+ MN = CurDAG->getMachineNode(OpCode, dl, OperandTy, TocBase, GA);
+ else {
+ MN = CurDAG->getMachineNode(OpCode, dl, OperandTy, GA, TocBase);
+ transferMemOperands(TocEntry, MN);
+ }
ReplaceNode(TocEntry, MN);
};
diff --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
index 9af8ada7..eda5eb9 100644
--- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -1485,11 +1485,9 @@ def ADDItocL8: PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, tocentry:
}
// Local Data Transform
-def ADDItoc8 : PPCEmitTimePseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc_nox0:$reg),
+def ADDItoc8 : PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, tocentry:$disp),
"#ADDItoc8",
- [(set i64:$rD,
- (PPCtoc_entry tglobaladdr:$disp, i64:$reg))]>, isPPC64;
-
+ []>, isPPC64;
let mayLoad = 1 in
def LDtocL: PPCEmitTimePseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc_nox0:$reg),
"#LDtocL", []>, isPPC64;
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index df6b2bf..09f8299 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -3345,10 +3345,8 @@ def LWZtocL : PPCEmitTimePseudo<(outs gprc:$rD), (ins tocentry32:$disp, gprc_nor
def ADDIStocHA : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, tocentry32:$disp),
"#ADDIStocHA", []>;
// TOC Data Transform on AIX
-def ADDItoc : PPCEmitTimePseudo<(outs gprc:$rD), (ins tocentry32:$disp, gprc:$reg),
- "#ADDItoc",
- [(set i32:$rD,
- (PPCtoc_entry tglobaladdr:$disp, i32:$reg))]>;
+def ADDItoc : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc:$reg, tocentry32:$disp),
+ "#ADDItoc", []>;
def ADDItocL : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, tocentry32:$disp),
"#ADDItocL", []>;
diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
index a967682..82358cd 100644
--- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
@@ -932,11 +932,11 @@ RISCVInsertVSETVLI::getInfoForVSETVLI(const MachineInstr &MI) const {
"Can't handle X0, X0 vsetvli yet");
if (AVLReg == RISCV::X0)
NewInfo.setAVLVLMAX();
- else if (VNInfo *VNI = getVNInfoFromReg(AVLReg, MI, LIS))
- NewInfo.setAVLRegDef(VNI, AVLReg);
- else {
- assert(MI.getOperand(1).isUndef());
+ else if (MI.getOperand(1).isUndef())
NewInfo.setAVLIgnored();
+ else {
+ VNInfo *VNI = getVNInfoFromReg(AVLReg, MI, LIS);
+ NewInfo.setAVLRegDef(VNI, AVLReg);
}
}
NewInfo.setVTYPE(MI.getOperand(2).getImm());
@@ -1008,11 +1008,11 @@ RISCVInsertVSETVLI::computeInfoForInstr(const MachineInstr &MI) const {
}
else
InstrInfo.setAVLImm(Imm);
- } else if (VNInfo *VNI = getVNInfoFromReg(VLOp.getReg(), MI, LIS)) {
- InstrInfo.setAVLRegDef(VNI, VLOp.getReg());
- } else {
- assert(VLOp.isUndef());
+ } else if (VLOp.isUndef()) {
InstrInfo.setAVLIgnored();
+ } else {
+ VNInfo *VNI = getVNInfoFromReg(VLOp.getReg(), MI, LIS);
+ InstrInfo.setAVLRegDef(VNI, VLOp.getReg());
}
} else {
assert(isScalarExtractInstr(MI));
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
index 6d926ce..b0949f5 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
@@ -1033,6 +1033,22 @@ class VPseudoUnaryNoMask<DAGOperand RetClass,
let HasVecPolicyOp = 1;
}
+class VPseudoUnaryNoMaskNoPolicy<DAGOperand RetClass,
+ DAGOperand OpClass,
+ string Constraint = "",
+ int TargetConstraintType = 1> :
+ Pseudo<(outs RetClass:$rd),
+ (ins OpClass:$rs2, AVL:$vl, ixlenimm:$sew), []>,
+ RISCVVPseudo {
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+ let Constraints = Constraint;
+ let TargetOverlapConstraintType = TargetConstraintType;
+ let HasVLOp = 1;
+ let HasSEWOp = 1;
+}
+
class VPseudoUnaryNoMaskRoundingMode<DAGOperand RetClass,
DAGOperand OpClass,
string Constraint = "",
@@ -1422,24 +1438,6 @@ class VPseudoTernaryMaskPolicyRoundingMode<VReg RetClass,
let UsesVXRM = 0;
}
-// Like VPseudoBinaryNoMask, but output can be V0.
-class VPseudoBinaryMOutNoMask<VReg RetClass,
- VReg Op1Class,
- DAGOperand Op2Class,
- string Constraint,
- int TargetConstraintType = 1> :
- Pseudo<(outs RetClass:$rd),
- (ins Op1Class:$rs2, Op2Class:$rs1, AVL:$vl, ixlenimm:$sew), []>,
- RISCVVPseudo {
- let mayLoad = 0;
- let mayStore = 0;
- let hasSideEffects = 0;
- let Constraints = Constraint;
- let TargetOverlapConstraintType = TargetConstraintType;
- let HasVLOp = 1;
- let HasSEWOp = 1;
-}
-
// Like VPseudoBinaryMask, but output can be V0.
class VPseudoBinaryMOutMask<VReg RetClass,
RegisterClass Op1Class,
@@ -2056,9 +2054,10 @@ multiclass VPseudoVSFS_M {
foreach mti = AllMasks in {
defvar mx = mti.LMul.MX;
let VLMul = mti.LMul.value in {
- def "_M_" # mti.BX : VPseudoUnaryNoMask<VR, VR, constraint>,
+ def "_M_" # mti.BX : VPseudoUnaryNoMaskNoPolicy<VR, VR, constraint>,
SchedUnary<"WriteVMSFSV", "ReadVMSFSV", mx,
forceMergeOpRead=true>;
+ let ForceTailAgnostic = true in
def "_M_" # mti.BX # "_MASK" : VPseudoUnaryMask<VR, VR, constraint>,
SchedUnary<"WriteVMSFSV", "ReadVMSFSV", mx,
forceMergeOpRead=true>;
@@ -2172,8 +2171,8 @@ multiclass VPseudoBinaryM<VReg RetClass,
int TargetConstraintType = 1,
bit Commutable = 0> {
let VLMul = MInfo.value, isCommutable = Commutable in {
- def "_" # MInfo.MX : VPseudoBinaryMOutNoMask<RetClass, Op1Class, Op2Class,
- Constraint, TargetConstraintType>;
+ def "_" # MInfo.MX : VPseudoBinaryNoMask<RetClass, Op1Class, Op2Class,
+ Constraint, TargetConstraintType>;
let ForceTailAgnostic = true in
def "_" # MInfo.MX # "_MASK" : VPseudoBinaryMOutMask<RetClass, Op1Class,
Op2Class, Constraint, TargetConstraintType>,
@@ -4078,9 +4077,8 @@ class VPatMaskUnaryNoMask<string intrinsic_name,
(mti.Mask VR:$rs2),
VLOpFrag)),
(!cast<Instruction>(inst#"_M_"#mti.BX)
- (mti.Mask (IMPLICIT_DEF)),
(mti.Mask VR:$rs2),
- GPR:$vl, mti.Log2SEW, TA_MA)>;
+ GPR:$vl, mti.Log2SEW)>;
class VPatMaskUnaryMask<string intrinsic_name,
string inst,
@@ -4153,27 +4151,6 @@ class VPatBinaryNoMaskTU<string intrinsic_name,
(op2_type op2_kind:$rs2),
GPR:$vl, sew, TU_MU)>;
-class VPatBinaryNoMaskRoundingMode<string intrinsic_name,
- string inst,
- ValueType result_type,
- ValueType op1_type,
- ValueType op2_type,
- int sew,
- VReg op1_reg_class,
- DAGOperand op2_kind> :
- Pat<(result_type (!cast<Intrinsic>(intrinsic_name)
- (result_type (undef)),
- (op1_type op1_reg_class:$rs1),
- (op2_type op2_kind:$rs2),
- (XLenVT timm:$round),
- VLOpFrag)),
- (!cast<Instruction>(inst)
- (result_type (IMPLICIT_DEF)),
- (op1_type op1_reg_class:$rs1),
- (op2_type op2_kind:$rs2),
- (XLenVT timm:$round),
- GPR:$vl, sew, TA_MA)>;
-
class VPatBinaryNoMaskTURoundingMode<string intrinsic_name,
string inst,
ValueType result_type,
@@ -4827,8 +4804,6 @@ multiclass VPatBinaryRoundingMode<string intrinsic,
VReg result_reg_class,
VReg op1_reg_class,
DAGOperand op2_kind> {
- def : VPatBinaryNoMaskRoundingMode<intrinsic, inst, result_type, op1_type, op2_type,
- sew, op1_reg_class, op2_kind>;
def : VPatBinaryNoMaskTURoundingMode<intrinsic, inst, result_type, op1_type, op2_type,
sew, result_reg_class, op1_reg_class, op2_kind>;
def : VPatBinaryMaskTARoundingMode<intrinsic, inst, result_type, op1_type, op2_type,
@@ -6962,12 +6937,12 @@ defm : VPatBinaryV_VV_VX_VI<"int_riscv_vsra", "PseudoVSRA", AllIntegerVectors,
foreach vti = AllIntegerVectors in {
// Emit shift by 1 as an add since it might be faster.
let Predicates = GetVTypePredicates<vti>.Predicates in {
- def : Pat<(vti.Vector (int_riscv_vsll (vti.Vector undef),
+ def : Pat<(vti.Vector (int_riscv_vsll (vti.Vector vti.RegClass:$merge),
(vti.Vector vti.RegClass:$rs1),
(XLenVT 1), VLOpFrag)),
(!cast<Instruction>("PseudoVADD_VV_"#vti.LMul.MX)
- (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1,
- vti.RegClass:$rs1, GPR:$vl, vti.Log2SEW, TA_MA)>;
+ vti.RegClass:$merge, vti.RegClass:$rs1,
+ vti.RegClass:$rs1, GPR:$vl, vti.Log2SEW, TU_MU)>;
def : Pat<(vti.Vector (int_riscv_vsll_mask (vti.Vector vti.RegClass:$merge),
(vti.Vector vti.RegClass:$rs1),
(XLenVT 1),
diff --git a/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp b/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp
index 956b851..49838e6 100644
--- a/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp
@@ -1459,11 +1459,22 @@ static bool generateImageSizeQueryInst(const SPIRV::IncomingCall *Call,
Component == 3 ? NumActualRetComponents - 1 : Component;
assert(ExtractedComposite < NumActualRetComponents &&
"Invalid composite index!");
+ Register TypeReg = GR->getSPIRVTypeID(Call->ReturnType);
+ SPIRVType *NewType = nullptr;
+ if (QueryResultType->getOpcode() == SPIRV::OpTypeVector) {
+ Register NewTypeReg = QueryResultType->getOperand(1).getReg();
+ if (TypeReg != NewTypeReg &&
+ (NewType = GR->getSPIRVTypeForVReg(NewTypeReg)) != nullptr)
+ TypeReg = NewTypeReg;
+ }
MIRBuilder.buildInstr(SPIRV::OpCompositeExtract)
.addDef(Call->ReturnRegister)
- .addUse(GR->getSPIRVTypeID(Call->ReturnType))
+ .addUse(TypeReg)
.addUse(QueryResult)
.addImm(ExtractedComposite);
+ if (NewType != nullptr)
+ insertAssignInstr(Call->ReturnRegister, nullptr, NewType, GR, MIRBuilder,
+ MIRBuilder.getMF().getRegInfo());
} else {
// More than 1 component is expected, fill a new vector.
auto MIB = MIRBuilder.buildInstr(SPIRV::OpVectorShuffle)
@@ -2063,16 +2074,30 @@ static bool generateAsyncCopy(const SPIRV::IncomingCall *Call,
auto Scope = buildConstantIntReg(SPIRV::Scope::Workgroup, MIRBuilder, GR);
switch (Opcode) {
- case SPIRV::OpGroupAsyncCopy:
- return MIRBuilder.buildInstr(Opcode)
- .addDef(Call->ReturnRegister)
- .addUse(GR->getSPIRVTypeID(Call->ReturnType))
- .addUse(Scope)
- .addUse(Call->Arguments[0])
- .addUse(Call->Arguments[1])
- .addUse(Call->Arguments[2])
- .addUse(buildConstantIntReg(1, MIRBuilder, GR))
- .addUse(Call->Arguments[3]);
+ case SPIRV::OpGroupAsyncCopy: {
+ SPIRVType *NewType =
+ Call->ReturnType->getOpcode() == SPIRV::OpTypeEvent
+ ? nullptr
+ : GR->getOrCreateSPIRVTypeByName("spirv.Event", MIRBuilder);
+ Register TypeReg = GR->getSPIRVTypeID(NewType ? NewType : Call->ReturnType);
+ unsigned NumArgs = Call->Arguments.size();
+ Register EventReg = Call->Arguments[NumArgs - 1];
+ bool Res = MIRBuilder.buildInstr(Opcode)
+ .addDef(Call->ReturnRegister)
+ .addUse(TypeReg)
+ .addUse(Scope)
+ .addUse(Call->Arguments[0])
+ .addUse(Call->Arguments[1])
+ .addUse(Call->Arguments[2])
+ .addUse(Call->Arguments.size() > 4
+ ? Call->Arguments[3]
+ : buildConstantIntReg(1, MIRBuilder, GR))
+ .addUse(EventReg);
+ if (NewType != nullptr)
+ insertAssignInstr(Call->ReturnRegister, nullptr, NewType, GR, MIRBuilder,
+ MIRBuilder.getMF().getRegInfo());
+ return Res;
+ }
case SPIRV::OpGroupWaitEvents:
return MIRBuilder.buildInstr(Opcode)
.addUse(Scope)
diff --git a/llvm/lib/Target/SPIRV/SPIRVBuiltins.td b/llvm/lib/Target/SPIRV/SPIRVBuiltins.td
index 24c6c26..edc9e1a 100644
--- a/llvm/lib/Target/SPIRV/SPIRVBuiltins.td
+++ b/llvm/lib/Target/SPIRV/SPIRVBuiltins.td
@@ -586,6 +586,7 @@ defm : DemangledNativeBuiltin<"__spirv_SpecConstantComposite", OpenCL_std, SpecC
// Async Copy and Prefetch builtin records:
defm : DemangledNativeBuiltin<"async_work_group_copy", OpenCL_std, AsyncCopy, 4, 4, OpGroupAsyncCopy>;
+defm : DemangledNativeBuiltin<"async_work_group_strided_copy", OpenCL_std, AsyncCopy, 5, 5, OpGroupAsyncCopy>;
defm : DemangledNativeBuiltin<"__spirv_GroupAsyncCopy", OpenCL_std, AsyncCopy, 6, 6, OpGroupAsyncCopy>;
defm : DemangledNativeBuiltin<"wait_group_events", OpenCL_std, AsyncCopy, 2, 2, OpGroupWaitEvents>;
defm : DemangledNativeBuiltin<"__spirv_GroupWaitEvents", OpenCL_std, AsyncCopy, 3, 3, OpGroupWaitEvents>;
diff --git a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
index 5ef0be1..bbd25dc 100644
--- a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
@@ -61,9 +61,6 @@ class SPIRVEmitIntrinsics
DenseMap<Instruction *, Type *> AggrConstTypes;
DenseSet<Instruction *> AggrStores;
- // a registry of created Intrinsic::spv_assign_ptr_type instructions
- DenseMap<Value *, CallInst *> AssignPtrTypeInstr;
-
// deduce element type of untyped pointers
Type *deduceElementType(Value *I);
Type *deduceElementTypeHelper(Value *I);
@@ -98,14 +95,16 @@ class SPIRVEmitIntrinsics
return B.CreateIntrinsic(IntrID, {Types}, Args);
}
+ void buildAssignType(IRBuilder<> &B, Type *ElemTy, Value *Arg);
void buildAssignPtr(IRBuilder<> &B, Type *ElemTy, Value *Arg);
+ void updateAssignType(CallInst *AssignCI, Value *Arg, Value *OfType);
void replaceMemInstrUses(Instruction *Old, Instruction *New, IRBuilder<> &B);
void processInstrAfterVisit(Instruction *I, IRBuilder<> &B);
void insertAssignPtrTypeIntrs(Instruction *I, IRBuilder<> &B);
void insertAssignTypeIntrs(Instruction *I, IRBuilder<> &B);
- void insertAssignTypeInstrForTargetExtTypes(TargetExtType *AssignedType,
- Value *V, IRBuilder<> &B);
+ void insertAssignPtrTypeTargetExt(TargetExtType *AssignedType, Value *V,
+ IRBuilder<> &B);
void replacePointerOperandWithPtrCast(Instruction *I, Value *Pointer,
Type *ExpectedElementType,
unsigned OperandToReplace,
@@ -218,15 +217,39 @@ static inline void reportFatalOnTokenType(const Instruction *I) {
false);
}
+void SPIRVEmitIntrinsics::buildAssignType(IRBuilder<> &B, Type *Ty,
+ Value *Arg) {
+ Value *OfType = PoisonValue::get(Ty);
+ CallInst *AssignCI = buildIntrWithMD(Intrinsic::spv_assign_type,
+ {Arg->getType()}, OfType, Arg, {}, B);
+ GR->addAssignPtrTypeInstr(Arg, AssignCI);
+}
+
void SPIRVEmitIntrinsics::buildAssignPtr(IRBuilder<> &B, Type *ElemTy,
Value *Arg) {
- CallInst *AssignPtrTyCI =
- buildIntrWithMD(Intrinsic::spv_assign_ptr_type, {Arg->getType()},
- Constant::getNullValue(ElemTy), Arg,
- {B.getInt32(getPointerAddressSpace(Arg->getType()))}, B);
+ Value *OfType = PoisonValue::get(ElemTy);
+ CallInst *AssignPtrTyCI = buildIntrWithMD(
+ Intrinsic::spv_assign_ptr_type, {Arg->getType()}, OfType, Arg,
+ {B.getInt32(getPointerAddressSpace(Arg->getType()))}, B);
GR->addDeducedElementType(AssignPtrTyCI, ElemTy);
GR->addDeducedElementType(Arg, ElemTy);
- AssignPtrTypeInstr[Arg] = AssignPtrTyCI;
+ GR->addAssignPtrTypeInstr(Arg, AssignPtrTyCI);
+}
+
+void SPIRVEmitIntrinsics::updateAssignType(CallInst *AssignCI, Value *Arg,
+ Value *OfType) {
+ LLVMContext &Ctx = Arg->getContext();
+ AssignCI->setArgOperand(
+ 1, MetadataAsValue::get(
+ Ctx, MDNode::get(Ctx, ValueAsMetadata::getConstant(OfType))));
+ if (cast<IntrinsicInst>(AssignCI)->getIntrinsicID() !=
+ Intrinsic::spv_assign_ptr_type)
+ return;
+
+ // update association with the pointee type
+ Type *ElemTy = OfType->getType();
+ GR->addDeducedElementType(AssignCI, ElemTy);
+ GR->addDeducedElementType(Arg, ElemTy);
}
// Set element pointer type to the given value of ValueTy and tries to
@@ -513,19 +536,16 @@ void SPIRVEmitIntrinsics::deduceOperandElementType(Instruction *I) {
if (!Ty) {
GR->addDeducedElementType(Op, KnownElemTy);
// check if there is existing Intrinsic::spv_assign_ptr_type instruction
- auto It = AssignPtrTypeInstr.find(Op);
- if (It == AssignPtrTypeInstr.end()) {
+ CallInst *AssignCI = GR->findAssignPtrTypeInstr(Op);
+ if (AssignCI == nullptr) {
Instruction *User = dyn_cast<Instruction>(Op->use_begin()->get());
setInsertPointSkippingPhis(B, User ? User->getNextNode() : I);
CallInst *CI =
buildIntrWithMD(Intrinsic::spv_assign_ptr_type, {OpTy}, OpTyVal, Op,
{B.getInt32(getPointerAddressSpace(OpTy))}, B);
- AssignPtrTypeInstr[Op] = CI;
+ GR->addAssignPtrTypeInstr(Op, CI);
} else {
- It->second->setArgOperand(
- 1,
- MetadataAsValue::get(
- Ctx, MDNode::get(Ctx, ValueAsMetadata::getConstant(OpTyVal))));
+ updateAssignType(AssignCI, Op, OpTyVal);
}
} else {
if (auto *OpI = dyn_cast<Instruction>(Op)) {
@@ -559,7 +579,9 @@ void SPIRVEmitIntrinsics::replaceMemInstrUses(Instruction *Old,
if (isAssignTypeInstr(U)) {
B.SetInsertPoint(U);
SmallVector<Value *, 2> Args = {New, U->getOperand(1)};
- B.CreateIntrinsic(Intrinsic::spv_assign_type, {New->getType()}, Args);
+ CallInst *AssignCI =
+ B.CreateIntrinsic(Intrinsic::spv_assign_type, {New->getType()}, Args);
+ GR->addAssignPtrTypeInstr(New, AssignCI);
U->eraseFromParent();
} else if (isMemInstrToReplace(U) || isa<ReturnInst>(U) ||
isa<CallInst>(U)) {
@@ -751,33 +773,39 @@ Instruction *SPIRVEmitIntrinsics::visitBitCastInst(BitCastInst &I) {
return NewI;
}
-void SPIRVEmitIntrinsics::insertAssignTypeInstrForTargetExtTypes(
+void SPIRVEmitIntrinsics::insertAssignPtrTypeTargetExt(
TargetExtType *AssignedType, Value *V, IRBuilder<> &B) {
- // Do not emit spv_assign_type if the V is of the AssignedType already.
- if (V->getType() == AssignedType)
- return;
+ Type *VTy = V->getType();
- // Do not emit spv_assign_type if there is one already targetting V. If the
- // found spv_assign_type assigns a type different than AssignedType, report an
- // error. Builtin types cannot be redeclared or casted.
- for (auto User : V->users()) {
- auto *II = dyn_cast<IntrinsicInst>(User);
- if (!II || II->getIntrinsicID() != Intrinsic::spv_assign_type)
- continue;
+ // A couple of sanity checks.
+ assert(isPointerTy(VTy) && "Expect a pointer type!");
+ if (auto PType = dyn_cast<TypedPointerType>(VTy))
+ if (PType->getElementType() != AssignedType)
+ report_fatal_error("Unexpected pointer element type!");
- MetadataAsValue *VMD = cast<MetadataAsValue>(II->getOperand(1));
- Type *BuiltinType =
- dyn_cast<ConstantAsMetadata>(VMD->getMetadata())->getType();
- if (BuiltinType != AssignedType)
- report_fatal_error("Type mismatch " + BuiltinType->getTargetExtName() +
- "/" + AssignedType->getTargetExtName() +
- " for value " + V->getName(),
- false);
+ CallInst *AssignCI = GR->findAssignPtrTypeInstr(V);
+ if (!AssignCI) {
+ buildAssignType(B, AssignedType, V);
return;
}
- Constant *Const = UndefValue::get(AssignedType);
- buildIntrWithMD(Intrinsic::spv_assign_type, {V->getType()}, Const, V, {}, B);
+ Type *CurrentType =
+ dyn_cast<ConstantAsMetadata>(
+ cast<MetadataAsValue>(AssignCI->getOperand(1))->getMetadata())
+ ->getType();
+ if (CurrentType == AssignedType)
+ return;
+
+ // Builtin types cannot be redeclared or casted.
+ if (CurrentType->isTargetExtTy())
+ report_fatal_error("Type mismatch " + CurrentType->getTargetExtName() +
+ "/" + AssignedType->getTargetExtName() +
+ " for value " + V->getName(),
+ false);
+
+ // Our previous guess about the type seems to be wrong, let's update
+ // inferred type according to a new, more precise type information.
+ updateAssignType(AssignCI, V, PoisonValue::get(AssignedType));
}
void SPIRVEmitIntrinsics::replacePointerOperandWithPtrCast(
@@ -850,7 +878,7 @@ void SPIRVEmitIntrinsics::replacePointerOperandWithPtrCast(
ExpectedElementTypeConst, Pointer, {B.getInt32(AddressSpace)}, B);
GR->addDeducedElementType(CI, ExpectedElementType);
GR->addDeducedElementType(Pointer, ExpectedElementType);
- AssignPtrTypeInstr[Pointer] = CI;
+ GR->addAssignPtrTypeInstr(Pointer, CI);
return;
}
@@ -929,8 +957,7 @@ void SPIRVEmitIntrinsics::insertPtrCastOrAssignTypeInstr(Instruction *I,
for (unsigned OpIdx = 0; OpIdx < CI->arg_size(); OpIdx++) {
Value *ArgOperand = CI->getArgOperand(OpIdx);
- if (!isa<PointerType>(ArgOperand->getType()) &&
- !isa<TypedPointerType>(ArgOperand->getType()))
+ if (!isPointerTy(ArgOperand->getType()))
continue;
// Constants (nulls/undefs) are handled in insertAssignPtrTypeIntrs()
@@ -952,8 +979,8 @@ void SPIRVEmitIntrinsics::insertPtrCastOrAssignTypeInstr(Instruction *I,
continue;
if (ExpectedType->isTargetExtTy())
- insertAssignTypeInstrForTargetExtTypes(cast<TargetExtType>(ExpectedType),
- ArgOperand, B);
+ insertAssignPtrTypeTargetExt(cast<TargetExtType>(ExpectedType),
+ ArgOperand, B);
else
replacePointerOperandWithPtrCast(CI, ArgOperand, ExpectedType, OpIdx, B);
}
@@ -1145,7 +1172,7 @@ void SPIRVEmitIntrinsics::insertAssignPtrTypeIntrs(Instruction *I,
CallInst *CI = buildIntrWithMD(Intrinsic::spv_assign_ptr_type, {I->getType()},
EltTyConst, I, {B.getInt32(AddressSpace)}, B);
GR->addDeducedElementType(CI, ElemTy);
- AssignPtrTypeInstr[I] = CI;
+ GR->addAssignPtrTypeInstr(I, CI);
}
void SPIRVEmitIntrinsics::insertAssignTypeIntrs(Instruction *I,
@@ -1164,20 +1191,32 @@ void SPIRVEmitIntrinsics::insertAssignTypeIntrs(Instruction *I,
TypeToAssign = It->second;
}
}
- Constant *Const = UndefValue::get(TypeToAssign);
- buildIntrWithMD(Intrinsic::spv_assign_type, {Ty}, Const, I, {}, B);
+ buildAssignType(B, TypeToAssign, I);
}
for (const auto &Op : I->operands()) {
if (isa<ConstantPointerNull>(Op) || isa<UndefValue>(Op) ||
// Check GetElementPtrConstantExpr case.
(isa<ConstantExpr>(Op) && isa<GEPOperator>(Op))) {
setInsertPointSkippingPhis(B, I);
- if (isa<UndefValue>(Op) && Op->getType()->isAggregateType())
- buildIntrWithMD(Intrinsic::spv_assign_type, {B.getInt32Ty()}, Op,
- UndefValue::get(B.getInt32Ty()), {}, B);
- else if (!isa<Instruction>(Op))
- buildIntrWithMD(Intrinsic::spv_assign_type, {Op->getType()}, Op, Op, {},
- B);
+ Type *OpTy = Op->getType();
+ if (isa<UndefValue>(Op) && OpTy->isAggregateType()) {
+ CallInst *AssignCI =
+ buildIntrWithMD(Intrinsic::spv_assign_type, {B.getInt32Ty()}, Op,
+ UndefValue::get(B.getInt32Ty()), {}, B);
+ GR->addAssignPtrTypeInstr(Op, AssignCI);
+ } else if (!isa<Instruction>(Op)) {
+ Type *OpTy = Op->getType();
+ if (auto PType = dyn_cast<TypedPointerType>(OpTy)) {
+ buildAssignPtr(B, PType->getElementType(), Op);
+ } else if (isPointerTy(OpTy)) {
+ Type *ElemTy = GR->findDeducedElementType(Op);
+ buildAssignPtr(B, ElemTy ? ElemTy : deduceElementType(Op), Op);
+ } else {
+ CallInst *AssignCI = buildIntrWithMD(Intrinsic::spv_assign_type,
+ {OpTy}, Op, Op, {}, B);
+ GR->addAssignPtrTypeInstr(Op, AssignCI);
+ }
+ }
}
}
}
@@ -1368,14 +1407,12 @@ bool SPIRVEmitIntrinsics::runOnFunction(Function &Func) {
continue;
insertAssignPtrTypeIntrs(I, B);
+ deduceOperandElementType(I);
insertAssignTypeIntrs(I, B);
insertPtrCastOrAssignTypeInstr(I, B);
insertSpirvDecorations(I, B);
}
- for (auto &I : instructions(Func))
- deduceOperandElementType(&I);
-
for (auto *I : Worklist) {
TrackConstants = true;
if (!I->getType()->isVoidTy() || isa<StoreInst>(I))
diff --git a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h
index ef0973d..db01f68 100644
--- a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h
+++ b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h
@@ -73,8 +73,11 @@ class SPIRVGlobalRegistry {
// untyped pointers.
DenseMap<Value *, Type *> DeducedElTys;
// Maps composite values to deduced types where untyped pointers are replaced
- // with typed ones
+ // with typed ones.
DenseMap<Value *, Type *> DeducedNestedTys;
+ // Maps values to "assign type" calls, thus being a registry of created
+ // Intrinsic::spv_assign_ptr_type instructions.
+ DenseMap<Value *, CallInst *> AssignPtrTypeInstr;
// Add a new OpTypeXXX instruction without checking for duplicates.
SPIRVType *createSPIRVType(const Type *Type, MachineIRBuilder &MIRBuilder,
@@ -149,6 +152,17 @@ public:
return It == FunResPointerTypes.end() ? nullptr : It->second;
}
+ // A registry of "assign type" records:
+ // - Add a record.
+ void addAssignPtrTypeInstr(Value *Val, CallInst *AssignPtrTyCI) {
+ AssignPtrTypeInstr[Val] = AssignPtrTyCI;
+ }
+ // - Find a record.
+ CallInst *findAssignPtrTypeInstr(const Value *Val) {
+ auto It = AssignPtrTypeInstr.find(Val);
+ return It == AssignPtrTypeInstr.end() ? nullptr : It->second;
+ }
+
// Deduced element types of untyped pointers and composites:
// - Add a record to the map of deduced element types.
void addDeducedElementType(Value *Val, Type *Ty) { DeducedElTys[Val] = Ty; }
diff --git a/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp b/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp
index 3d53608..a0a253c 100644
--- a/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp
@@ -417,7 +417,8 @@ generateAssignInstrs(MachineFunction &MF, SPIRVGlobalRegistry *GR,
MachineInstr *Def = MRI.getVRegDef(Reg);
assert(Def && "Expecting an instruction that defines the register");
// G_GLOBAL_VALUE already has type info.
- if (Def->getOpcode() != TargetOpcode::G_GLOBAL_VALUE)
+ if (Def->getOpcode() != TargetOpcode::G_GLOBAL_VALUE &&
+ Def->getOpcode() != SPIRV::ASSIGN_TYPE)
insertAssignInstr(Reg, nullptr, AssignedPtrType, GR, MIB,
MF.getRegInfo());
ToErase.push_back(&MI);
@@ -427,7 +428,8 @@ generateAssignInstrs(MachineFunction &MF, SPIRVGlobalRegistry *GR,
MachineInstr *Def = MRI.getVRegDef(Reg);
assert(Def && "Expecting an instruction that defines the register");
// G_GLOBAL_VALUE already has type info.
- if (Def->getOpcode() != TargetOpcode::G_GLOBAL_VALUE)
+ if (Def->getOpcode() != TargetOpcode::G_GLOBAL_VALUE &&
+ Def->getOpcode() != SPIRV::ASSIGN_TYPE)
insertAssignInstr(Reg, Ty, nullptr, GR, MIB, MF.getRegInfo());
ToErase.push_back(&MI);
} else if (MIOp == TargetOpcode::G_CONSTANT ||
diff --git a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp
index 8e20631..f5bc584 100644
--- a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp
+++ b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp
@@ -178,14 +178,15 @@ static wasm::WasmLimits DefaultLimits() {
}
static MCSymbolWasm *GetOrCreateFunctionTableSymbol(MCContext &Ctx,
- const StringRef &Name) {
+ const StringRef &Name,
+ bool is64) {
MCSymbolWasm *Sym = cast_or_null<MCSymbolWasm>(Ctx.lookupSymbol(Name));
if (Sym) {
if (!Sym->isFunctionTable())
Ctx.reportError(SMLoc(), "symbol is not a wasm funcref table");
} else {
Sym = cast<MCSymbolWasm>(Ctx.getOrCreateSymbol(Name));
- Sym->setFunctionTable();
+ Sym->setFunctionTable(is64);
// The default function table is synthesized by the linker.
Sym->setUndefined();
}
@@ -258,7 +259,7 @@ public:
MCAsmParserExtension::Initialize(Parser);
DefaultFunctionTable = GetOrCreateFunctionTableSymbol(
- getContext(), "__indirect_function_table");
+ getContext(), "__indirect_function_table", is64);
if (!STI->checkFeatures("+reference-types"))
DefaultFunctionTable->setOmitFromLinkingSection();
}
@@ -508,7 +509,7 @@ public:
auto &Tok = Lexer.getTok();
if (Tok.is(AsmToken::Identifier)) {
auto *Sym =
- GetOrCreateFunctionTableSymbol(getContext(), Tok.getString());
+ GetOrCreateFunctionTableSymbol(getContext(), Tok.getString(), is64);
const auto *Val = MCSymbolRefExpr::create(Sym, getContext());
*Op = std::make_unique<WebAssemblyOperand>(
WebAssemblyOperand::Symbol, Tok.getLoc(), Tok.getEndLoc(),
@@ -836,6 +837,9 @@ public:
// symbol
auto WasmSym = cast<MCSymbolWasm>(Ctx.getOrCreateSymbol(SymName));
WasmSym->setType(wasm::WASM_SYMBOL_TYPE_TABLE);
+ if (is64) {
+ Limits.Flags |= wasm::WASM_LIMITS_FLAG_IS_64;
+ }
wasm::WasmTableType Type = {*ElemType, Limits};
WasmSym->setTableType(Type);
TOut.emitTableType(WasmSym);
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp
index 5e727980..c5a047e 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp
@@ -108,8 +108,9 @@ MCSymbolWasm *WebAssembly::getOrCreateFunctionTableSymbol(
if (!Sym->isFunctionTable())
Ctx.reportError(SMLoc(), "symbol is not a wasm funcref table");
} else {
+ bool is64 = Subtarget && Subtarget->getTargetTriple().isArch64Bit();
Sym = cast<MCSymbolWasm>(Ctx.getOrCreateSymbol(Name));
- Sym->setFunctionTable();
+ Sym->setFunctionTable(is64);
// The default function table is synthesized by the linker.
Sym->setUndefined();
}
diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index 0bf3294..3933e82 100644
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -5120,6 +5120,9 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
case Intrinsic::x86_tileloaddt164_internal: {
if (!Subtarget->hasAMXTILE())
break;
+ auto *MFI =
+ CurDAG->getMachineFunction().getInfo<X86MachineFunctionInfo>();
+ MFI->setAMXProgModel(AMXProgModelEnum::ManagedRA);
unsigned Opc = IntNo == Intrinsic::x86_tileloadd64_internal
? X86::PTILELOADDV
: X86::PTILELOADDT1V;
@@ -5201,6 +5204,9 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
break;
}
case Intrinsic::x86_tilestored64_internal: {
+ auto *MFI =
+ CurDAG->getMachineFunction().getInfo<X86MachineFunctionInfo>();
+ MFI->setAMXProgModel(AMXProgModelEnum::ManagedRA);
unsigned Opc = X86::PTILESTOREDV;
// _tile_stored_internal(row, col, buf, STRIDE, c)
SDValue Base = Node->getOperand(4);
@@ -5228,6 +5234,9 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
case Intrinsic::x86_tilestored64: {
if (!Subtarget->hasAMXTILE())
break;
+ auto *MFI =
+ CurDAG->getMachineFunction().getInfo<X86MachineFunctionInfo>();
+ MFI->setAMXProgModel(AMXProgModelEnum::DirectReg);
unsigned Opc;
switch (IntNo) {
default: llvm_unreachable("Unexpected intrinsic!");
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 7d30de1..3fbab3a 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -615,6 +615,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FSIN, VT, Action);
setOperationAction(ISD::FCOS, VT, Action);
setOperationAction(ISD::FSINCOS, VT, Action);
+ setOperationAction(ISD::FTAN, VT, Action);
setOperationAction(ISD::FSQRT, VT, Action);
setOperationAction(ISD::FPOW, VT, Action);
setOperationAction(ISD::FLOG, VT, Action);
@@ -833,9 +834,12 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
}
// Always expand sin/cos functions even though x87 has an instruction.
+ // clang-format off
setOperationAction(ISD::FSIN , MVT::f80, Expand);
setOperationAction(ISD::FCOS , MVT::f80, Expand);
setOperationAction(ISD::FSINCOS, MVT::f80, Expand);
+ setOperationAction(ISD::FTAN , MVT::f80, Expand);
+ // clang-format on
setOperationAction(ISD::FFLOOR, MVT::f80, Expand);
setOperationAction(ISD::FCEIL, MVT::f80, Expand);
@@ -888,11 +892,15 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FNEG, MVT::f128, Custom);
setOperationAction(ISD::FCOPYSIGN, MVT::f128, Custom);
+ // clang-format off
setOperationAction(ISD::FSIN, MVT::f128, LibCall);
setOperationAction(ISD::STRICT_FSIN, MVT::f128, LibCall);
setOperationAction(ISD::FCOS, MVT::f128, LibCall);
setOperationAction(ISD::STRICT_FCOS, MVT::f128, LibCall);
setOperationAction(ISD::FSINCOS, MVT::f128, LibCall);
+ setOperationAction(ISD::FTAN, MVT::f128, LibCall);
+ setOperationAction(ISD::STRICT_FTAN, MVT::f128, LibCall);
+ // clang-format on
// No STRICT_FSINCOS
setOperationAction(ISD::FSQRT, MVT::f128, LibCall);
setOperationAction(ISD::STRICT_FSQRT, MVT::f128, LibCall);
@@ -944,9 +952,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
for (auto VT : { MVT::v8f16, MVT::v16f16, MVT::v32f16,
MVT::v4f32, MVT::v8f32, MVT::v16f32,
MVT::v2f64, MVT::v4f64, MVT::v8f64 }) {
+ // clang-format off
setOperationAction(ISD::FSIN, VT, Expand);
setOperationAction(ISD::FSINCOS, VT, Expand);
setOperationAction(ISD::FCOS, VT, Expand);
+ setOperationAction(ISD::FTAN, VT, Expand);
setOperationAction(ISD::FREM, VT, Expand);
setOperationAction(ISD::FCOPYSIGN, VT, Expand);
setOperationAction(ISD::FPOW, VT, Expand);
@@ -956,6 +966,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FEXP, VT, Expand);
setOperationAction(ISD::FEXP2, VT, Expand);
setOperationAction(ISD::FEXP10, VT, Expand);
+ // clang-format on
}
// First set operation action for all vector types to either promote
@@ -2473,7 +2484,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// function casting to f64 and calling `fmod`.
if (Subtarget.is32Bit() &&
(Subtarget.isTargetWindowsMSVC() || Subtarget.isTargetWindowsItanium()))
- for (ISD::NodeType Op :
+ // clang-format off
+ for (ISD::NodeType Op :
{ISD::FCEIL, ISD::STRICT_FCEIL,
ISD::FCOS, ISD::STRICT_FCOS,
ISD::FEXP, ISD::STRICT_FEXP,
@@ -2482,9 +2494,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
ISD::FLOG, ISD::STRICT_FLOG,
ISD::FLOG10, ISD::STRICT_FLOG10,
ISD::FPOW, ISD::STRICT_FPOW,
- ISD::FSIN, ISD::STRICT_FSIN})
+ ISD::FSIN, ISD::STRICT_FSIN,
+ ISD::FTAN, ISD::STRICT_FTAN})
if (isOperationExpand(Op, MVT::f32))
setOperationAction(Op, MVT::f32, Promote);
+ // clang-format on
// We have target-specific dag combine patterns for the following nodes:
setTargetDAGCombine({ISD::VECTOR_SHUFFLE,
@@ -26776,7 +26790,7 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
case Intrinsic::swift_async_context_addr: {
SDLoc dl(Op);
auto &MF = DAG.getMachineFunction();
- auto X86FI = MF.getInfo<X86MachineFunctionInfo>();
+ auto *X86FI = MF.getInfo<X86MachineFunctionInfo>();
if (X86::isExtendedSwiftAsyncFrameSupported(Subtarget, MF)) {
MF.getFrameInfo().setFrameAddressIsTaken(true);
X86FI->setHasSwiftAsyncContext(true);
@@ -36781,7 +36795,7 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
}
case TargetOpcode::PREALLOCATED_SETUP: {
assert(Subtarget.is32Bit() && "preallocated only used in 32-bit");
- auto MFI = MF->getInfo<X86MachineFunctionInfo>();
+ auto *MFI = MF->getInfo<X86MachineFunctionInfo>();
MFI->setHasPreallocatedCall(true);
int64_t PreallocatedId = MI.getOperand(0).getImm();
size_t StackAdjustment = MFI->getPreallocatedStackSize(PreallocatedId);
@@ -36798,7 +36812,7 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
assert(Subtarget.is32Bit() && "preallocated calls only used in 32-bit");
int64_t PreallocatedId = MI.getOperand(1).getImm();
int64_t ArgIdx = MI.getOperand(2).getImm();
- auto MFI = MF->getInfo<X86MachineFunctionInfo>();
+ auto *MFI = MF->getInfo<X86MachineFunctionInfo>();
size_t ArgOffset = MFI->getPreallocatedArgOffsets(PreallocatedId)[ArgIdx];
LLVM_DEBUG(dbgs() << "PREALLOCATED_ARG arg index " << ArgIdx
<< ", arg offset " << ArgOffset << "\n");
@@ -36841,6 +36855,13 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
unsigned Imm = MI.getOperand(0).getImm();
BuildMI(*BB, MI, MIMD, TII->get(X86::TILEZERO), TMMImmToTMMReg(Imm));
MI.eraseFromParent(); // The pseudo is gone now.
+ auto *MFI = MF->getInfo<X86MachineFunctionInfo>();
+ MFI->setAMXProgModel(AMXProgModelEnum::DirectReg);
+ return BB;
+ }
+ case X86::PTILEZEROV: {
+ auto *MFI = MF->getInfo<X86MachineFunctionInfo>();
+ MFI->setAMXProgModel(AMXProgModelEnum::ManagedRA);
return BB;
}
case X86::PTILELOADD:
diff --git a/llvm/lib/Target/X86/X86InstrAMX.td b/llvm/lib/Target/X86/X86InstrAMX.td
index c47bee0..99deacc 100644
--- a/llvm/lib/Target/X86/X86InstrAMX.td
+++ b/llvm/lib/Target/X86/X86InstrAMX.td
@@ -74,7 +74,7 @@ let SchedRW = [WriteSystem] in {
GR16:$src2, opaquemem:$src3,
TILE:$src4), []>;
let isPseudo = true, isReMaterializable = 1, isAsCheapAsAMove = 1,
- canFoldAsLoad = 1 in
+ canFoldAsLoad = 1, usesCustomInserter = 1 in
def PTILEZEROV : PseudoI<(outs TILE:$dst), (ins GR16:$src1, GR16:$src2),
[(set TILE:$dst, (int_x86_tilezero_internal
GR16:$src1, GR16:$src2))]>;
diff --git a/llvm/lib/Target/X86/X86LowerAMXType.cpp b/llvm/lib/Target/X86/X86LowerAMXType.cpp
index b690587..079ac98 100644
--- a/llvm/lib/Target/X86/X86LowerAMXType.cpp
+++ b/llvm/lib/Target/X86/X86LowerAMXType.cpp
@@ -92,6 +92,14 @@ static bool isAMXIntrinsic(Value *I) {
return false;
}
+static bool containsAMXCode(Function &F) {
+ for (BasicBlock &BB : F)
+ for (Instruction &I : BB)
+ if (I.getType()->isX86_AMXTy())
+ return true;
+ return false;
+}
+
static AllocaInst *createAllocaInstAtEntry(IRBuilder<> &Builder, BasicBlock *BB,
Type *Ty) {
Function &F = *BB->getParent();
@@ -1230,6 +1238,14 @@ public:
}
bool runOnFunction(Function &F) override {
+ // Performance optimization: most code doesn't use AMX, so return early if
+ // there are no instructions that produce AMX values. This is sufficient, as
+ // AMX arguments and constants are not allowed -- so any producer of an AMX
+ // value must be an instruction.
+ // TODO: find a cheaper way for this, without looking at all instructions.
+ if (!containsAMXCode(F))
+ return false;
+
bool C = false;
TargetMachine *TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
TargetLibraryInfo *TLI =
diff --git a/llvm/lib/Target/X86/X86LowerTileCopy.cpp b/llvm/lib/Target/X86/X86LowerTileCopy.cpp
index f27676a..613722b 100644
--- a/llvm/lib/Target/X86/X86LowerTileCopy.cpp
+++ b/llvm/lib/Target/X86/X86LowerTileCopy.cpp
@@ -19,6 +19,7 @@
#include "X86.h"
#include "X86InstrBuilder.h"
#include "X86InstrInfo.h"
+#include "X86MachineFunctionInfo.h"
#include "X86Subtarget.h"
#include "llvm/CodeGen/LiveRegUnits.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
@@ -71,6 +72,10 @@ FunctionPass *llvm::createX86LowerTileCopyPass() {
}
bool X86LowerTileCopy::runOnMachineFunction(MachineFunction &MF) {
+ X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
+ if (FuncInfo->getAMXProgModel() != AMXProgModelEnum::ManagedRA)
+ return false;
+
const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
const X86InstrInfo *TII = ST.getInstrInfo();
const TargetRegisterInfo *TRI = ST.getRegisterInfo();
@@ -81,26 +86,8 @@ bool X86LowerTileCopy::runOnMachineFunction(MachineFunction &MF) {
bool Changed = false;
for (MachineBasicBlock &MBB : MF) {
- // There won't be a tile copy if neither tile register live in nor live out.
- bool HasTileCopy = false;
- for (const auto &LI : MBB.liveins()) {
- if (TILERegs.test(LI.PhysReg)) {
- HasTileCopy = true;
- break;
- }
- }
LiveRegUnits UsedRegs(*TRI);
UsedRegs.addLiveOuts(MBB);
- if (!HasTileCopy) {
- for (auto RegT : TILERegs.set_bits()) {
- if (UsedRegs.available(RegT)) {
- HasTileCopy = true;
- break;
- }
- }
- }
- if (!HasTileCopy)
- continue;
for (MachineInstr &MI : llvm::make_early_inc_range(reverse(MBB))) {
UsedRegs.stepBackward(MI);
if (!MI.isCopy())
diff --git a/llvm/lib/Target/X86/X86MachineFunctionInfo.h b/llvm/lib/Target/X86/X86MachineFunctionInfo.h
index f6e8532..8aaa499 100644
--- a/llvm/lib/Target/X86/X86MachineFunctionInfo.h
+++ b/llvm/lib/Target/X86/X86MachineFunctionInfo.h
@@ -21,6 +21,8 @@
namespace llvm {
+enum AMXProgModelEnum { None = 0, DirectReg = 1, ManagedRA = 2 };
+
/// X86MachineFunctionInfo - This class is derived from MachineFunction and
/// contains private X86 target-specific information for each MachineFunction.
class X86MachineFunctionInfo : public MachineFunctionInfo {
@@ -96,6 +98,9 @@ class X86MachineFunctionInfo : public MachineFunctionInfo {
/// used to address arguments in a function using a base pointer.
int SEHFramePtrSaveIndex = 0;
+ /// The AMX programing model used in the function.
+ AMXProgModelEnum AMXProgModel = AMXProgModelEnum::None;
+
/// True if this function has a subset of CSRs that is handled explicitly via
/// copies.
bool IsSplitCSR = false;
@@ -219,6 +224,13 @@ public:
int getSEHFramePtrSaveIndex() const { return SEHFramePtrSaveIndex; }
void setSEHFramePtrSaveIndex(int Index) { SEHFramePtrSaveIndex = Index; }
+ AMXProgModelEnum getAMXProgModel() const { return AMXProgModel; }
+ void setAMXProgModel(AMXProgModelEnum Model) {
+ assert((AMXProgModel == AMXProgModelEnum::None || AMXProgModel == Model) &&
+ "mixed model is not supported");
+ AMXProgModel = Model;
+ }
+
SmallVectorImpl<ForwardedRegister> &getForwardedMustTailRegParms() {
return ForwardedMustTailRegParms;
}
diff --git a/llvm/lib/Target/X86/X86SchedIceLake.td b/llvm/lib/Target/X86/X86SchedIceLake.td
index 2d29677..186d4d8 100644
--- a/llvm/lib/Target/X86/X86SchedIceLake.td
+++ b/llvm/lib/Target/X86/X86SchedIceLake.td
@@ -620,11 +620,11 @@ def : WriteRes<WriteNop, []>;
// Horizontal add/sub instructions.
////////////////////////////////////////////////////////////////////////////////
-defm : ICXWriteResPair<WriteFHAdd, [ICXPort5,ICXPort015], 6, [2,1], 3, 6>;
-defm : ICXWriteResPair<WriteFHAddY, [ICXPort5,ICXPort015], 6, [2,1], 3, 7>;
+defm : ICXWriteResPair<WriteFHAdd, [ICXPort5,ICXPort01], 6, [2,1], 3, 6>;
+defm : ICXWriteResPair<WriteFHAddY, [ICXPort5,ICXPort01], 6, [2,1], 3, 7>;
defm : ICXWriteResPair<WritePHAdd, [ICXPort5,ICXPort05], 3, [2,1], 3, 5>;
-defm : ICXWriteResPair<WritePHAddX, [ICXPort5,ICXPort015], 3, [2,1], 3, 6>;
-defm : ICXWriteResPair<WritePHAddY, [ICXPort5,ICXPort015], 3, [2,1], 3, 7>;
+defm : ICXWriteResPair<WritePHAddX, [ICXPort15,ICXPort015], 3, [2,1], 3, 6>;
+defm : ICXWriteResPair<WritePHAddY, [ICXPort15,ICXPort015], 3, [2,1], 3, 7>;
// Remaining instrs.
@@ -886,7 +886,7 @@ def ICXWriteResGroup37 : SchedWriteRes<[ICXPort0,ICXPort5]> {
}
def: InstRW<[ICXWriteResGroup37], (instregex "MMX_PH(ADD|SUB)SWrr")>;
-def ICXWriteResGroup38 : SchedWriteRes<[ICXPort5,ICXPort01]> {
+def ICXWriteResGroup38 : SchedWriteRes<[ICXPort15,ICXPort01]> {
let Latency = 3;
let NumMicroOps = 3;
let ReleaseAtCycles = [2,1];
@@ -1739,13 +1739,13 @@ def ICXWriteResGroup137 : SchedWriteRes<[ICXPort23,ICXPort01]> {
def: InstRW<[ICXWriteResGroup137], (instregex "MMX_CVT(T?)PS2PIrm",
"(V?)CVTPS2PDrm")>;
-def ICXWriteResGroup143 : SchedWriteRes<[ICXPort5,ICXPort01,ICXPort23]> {
+def ICXWriteResGroup143 : SchedWriteRes<[ICXPort15,ICXPort01,ICXPort23]> {
let Latency = 9;
let NumMicroOps = 4;
let ReleaseAtCycles = [2,1,1];
}
-def: InstRW<[ICXWriteResGroup143], (instregex "(V?)PHADDSWrm",
- "(V?)PHSUBSWrm")>;
+def: InstRW<[ICXWriteResGroup143], (instrs PHADDSWrm, VPHADDSWrm,
+ PHSUBSWrm, VPHSUBSWrm)>;
def ICXWriteResGroup146 : SchedWriteRes<[ICXPort1,ICXPort6,ICXPort23,ICXPort0156]> {
let Latency = 9;
@@ -1842,7 +1842,7 @@ def: InstRW<[ICXWriteResGroup151], (instregex "VEXPANDPDZ128rm(b?)",
"VPEXPANDDZ128rm(b?)",
"VPEXPANDQZ128rm(b?)")>;
-def ICXWriteResGroup154 : SchedWriteRes<[ICXPort5,ICXPort01,ICXPort23]> {
+def ICXWriteResGroup154 : SchedWriteRes<[ICXPort15,ICXPort01,ICXPort23]> {
let Latency = 10;
let NumMicroOps = 4;
let ReleaseAtCycles = [2,1,1];
diff --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td
index a7dff0e..4fded44 100644
--- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td
+++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td
@@ -615,8 +615,8 @@ def : WriteRes<WriteNop, []>;
// Horizontal add/sub instructions.
////////////////////////////////////////////////////////////////////////////////
-defm : SKXWriteResPair<WriteFHAdd, [SKXPort5,SKXPort015], 6, [2,1], 3, 6>;
-defm : SKXWriteResPair<WriteFHAddY, [SKXPort5,SKXPort015], 6, [2,1], 3, 7>;
+defm : SKXWriteResPair<WriteFHAdd, [SKXPort5,SKXPort01], 6, [2,1], 3, 6>;
+defm : SKXWriteResPair<WriteFHAddY, [SKXPort5,SKXPort01], 6, [2,1], 3, 7>;
defm : SKXWriteResPair<WritePHAdd, [SKXPort5,SKXPort05], 3, [2,1], 3, 5>;
defm : SKXWriteResPair<WritePHAddX, [SKXPort5,SKXPort015], 3, [2,1], 3, 6>;
defm : SKXWriteResPair<WritePHAddY, [SKXPort5,SKXPort015], 3, [2,1], 3, 7>;
diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp
index 68155ac..b3b8486 100644
--- a/llvm/lib/TargetParser/Host.cpp
+++ b/llvm/lib/TargetParser/Host.cpp
@@ -302,6 +302,7 @@ StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) {
.Case("0x805", "cortex-a76") // Kryo 4xx/5xx Silver
.Case("0xc00", "falkor")
.Case("0xc01", "saphira")
+ .Case("0x001", "oryon-1")
.Default("generic");
if (Implementer == "0x53") { // Samsung Electronics Co., Ltd.
// The Exynos chips have a convoluted ID scheme that doesn't seem to follow
diff --git a/llvm/lib/TargetParser/TargetParser.cpp b/llvm/lib/TargetParser/TargetParser.cpp
index 7464237..60a784e 100644
--- a/llvm/lib/TargetParser/TargetParser.cpp
+++ b/llvm/lib/TargetParser/TargetParser.cpp
@@ -124,6 +124,7 @@ constexpr GPUInfo AMDGCNGPUs[] = {
{{"gfx1103"}, {"gfx1103"}, GK_GFX1103, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
{{"gfx1150"}, {"gfx1150"}, GK_GFX1150, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
{{"gfx1151"}, {"gfx1151"}, GK_GFX1151, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
+ {{"gfx1152"}, {"gfx1152"}, GK_GFX1152, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
{{"gfx1200"}, {"gfx1200"}, GK_GFX1200, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
{{"gfx1201"}, {"gfx1201"}, GK_GFX1201, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
@@ -275,6 +276,7 @@ AMDGPU::IsaVersion AMDGPU::getIsaVersion(StringRef GPU) {
case GK_GFX1103: return {11, 0, 3};
case GK_GFX1150: return {11, 5, 0};
case GK_GFX1151: return {11, 5, 1};
+ case GK_GFX1152: return {11, 5, 2};
case GK_GFX1200: return {12, 0, 0};
case GK_GFX1201: return {12, 0, 1};
@@ -341,6 +343,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
Features["image-insts"] = true;
Features["fp8-conversion-insts"] = true;
break;
+ case GK_GFX1152:
case GK_GFX1151:
case GK_GFX1150:
case GK_GFX1103:
@@ -542,6 +545,7 @@ static bool isWave32Capable(StringRef GPU, const Triple &T) {
switch (parseArchAMDGCN(GPU)) {
case GK_GFX1201:
case GK_GFX1200:
+ case GK_GFX1152:
case GK_GFX1151:
case GK_GFX1150:
case GK_GFX1103:
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 9a5732dc..549d036 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -419,7 +419,8 @@ struct AAReturnedFromReturnedValues : public BaseType {
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
StateType S(StateType::getBestState(this->getState()));
- clampReturnedValueStates<AAType, StateType, IRAttributeKind, RecurseForSelectAndPHI>(
+ clampReturnedValueStates<AAType, StateType, IRAttributeKind,
+ RecurseForSelectAndPHI>(
A, *this, S,
PropagateCallBaseContext ? this->getCallBaseContext() : nullptr);
// TODO: If we know we visited all returned values, thus no are assumed
@@ -6973,10 +6974,9 @@ ChangeStatus AAHeapToStackFunction::updateImpl(Attributor &A) {
if (AI.LibraryFunctionId != LibFunc___kmpc_alloc_shared) {
Instruction *CtxI = isa<InvokeInst>(AI.CB) ? AI.CB : AI.CB->getNextNode();
if (!Explorer || !Explorer->findInContextOf(UniqueFree, CtxI)) {
- LLVM_DEBUG(
- dbgs()
- << "[H2S] unique free call might not be executed with the allocation "
- << *UniqueFree << "\n");
+ LLVM_DEBUG(dbgs() << "[H2S] unique free call might not be executed "
+ "with the allocation "
+ << *UniqueFree << "\n");
return false;
}
}
@@ -10406,11 +10406,12 @@ struct AANoFPClassFloating : public AANoFPClassImpl {
struct AANoFPClassReturned final
: AAReturnedFromReturnedValues<AANoFPClass, AANoFPClassImpl,
- AANoFPClassImpl::StateType, false, Attribute::None, false> {
+ AANoFPClassImpl::StateType, false,
+ Attribute::None, false> {
AANoFPClassReturned(const IRPosition &IRP, Attributor &A)
: AAReturnedFromReturnedValues<AANoFPClass, AANoFPClassImpl,
- AANoFPClassImpl::StateType, false, Attribute::None, false>(
- IRP, A) {}
+ AANoFPClassImpl::StateType, false,
+ Attribute::None, false>(IRP, A) {}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override {
diff --git a/llvm/lib/Transforms/IPO/CMakeLists.txt b/llvm/lib/Transforms/IPO/CMakeLists.txt
index 5fbdbc3..92a9697 100644
--- a/llvm/lib/Transforms/IPO/CMakeLists.txt
+++ b/llvm/lib/Transforms/IPO/CMakeLists.txt
@@ -12,6 +12,7 @@ add_llvm_component_library(LLVMipo
DeadArgumentElimination.cpp
ElimAvailExtern.cpp
EmbedBitcodePass.cpp
+ ExpandVariadics.cpp
ExtractGV.cpp
ForceFunctionAttrs.cpp
FunctionAttrs.cpp
diff --git a/llvm/lib/Transforms/IPO/ExpandVariadics.cpp b/llvm/lib/Transforms/IPO/ExpandVariadics.cpp
new file mode 100644
index 0000000..d340bc0
--- /dev/null
+++ b/llvm/lib/Transforms/IPO/ExpandVariadics.cpp
@@ -0,0 +1,1012 @@
+//===-- ExpandVariadicsPass.cpp --------------------------------*- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This is an optimization pass for variadic functions. If called from codegen,
+// it can serve as the implementation of variadic functions for a given target.
+//
+// The strategy is to turn the ... part of a variadic function into a va_list
+// and fix up the call sites. The majority of the pass is target independent.
+// The exceptions are the va_list type itself and the rules for where to store
+// variables in memory such that va_arg can iterate over them given a va_list.
+//
+// The majority of the plumbing is splitting the variadic function into a
+// single basic block that packs the variadic arguments into a va_list and
+// a second function that does the work of the original. That packing is
+// exactly what is done by va_start. Further, the transform from ... to va_list
+// replaced va_start with an operation to copy a va_list from the new argument,
+// which is exactly a va_copy. This is useful for reducing target-dependence.
+//
+// A va_list instance is a forward iterator, where the primary operation va_arg
+// is dereference-then-increment. This interface forces significant convergent
+// evolution between target specific implementations. The variation in runtime
+// data layout is limited to that representable by the iterator, parameterised
+// by the type passed to the va_arg instruction.
+//
+// Therefore the majority of the target specific subtlety is packing arguments
+// into a stack allocated buffer such that a va_list can be initialised with it
+// and the va_arg expansion for the target will find the arguments at runtime.
+//
+// The aggregate effect is to unblock other transforms, most critically the
+// general purpose inliner. Known calls to variadic functions become zero cost.
+//
+// Consistency with clang is primarily tested by emitting va_arg using clang
+// then expanding the variadic functions using this pass, followed by trying
+// to constant fold the functions to no-ops.
+//
+// Target specific behaviour is tested in IR - mainly checking that values are
+// put into positions in call frames that make sense for that particular target.
+//
+// There is one "clever" invariant in use. va_start intrinsics that are not
+// within a varidic functions are an error in the IR verifier. When this
+// transform moves blocks from a variadic function into a fixed arity one, it
+// moves va_start intrinsics along with everything else. That means that the
+// va_start intrinsics that need to be rewritten to use the trailing argument
+// are exactly those that are in non-variadic functions so no further state
+// is needed to distinguish those that need to be rewritten.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO/ExpandVariadics.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/TargetParser/Triple.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+
+#define DEBUG_TYPE "expand-variadics"
+
+using namespace llvm;
+
+namespace {
+
+cl::opt<ExpandVariadicsMode> ExpandVariadicsModeOption(
+ DEBUG_TYPE "-override", cl::desc("Override the behaviour of " DEBUG_TYPE),
+ cl::init(ExpandVariadicsMode::Unspecified),
+ cl::values(clEnumValN(ExpandVariadicsMode::Unspecified, "unspecified",
+ "Use the implementation defaults"),
+ clEnumValN(ExpandVariadicsMode::Disable, "disable",
+ "Disable the pass entirely"),
+ clEnumValN(ExpandVariadicsMode::Optimize, "optimize",
+ "Optimise without changing ABI"),
+ clEnumValN(ExpandVariadicsMode::Lowering, "lowering",
+ "Change variadic calling convention")));
+
+bool commandLineOverride() {
+ return ExpandVariadicsModeOption != ExpandVariadicsMode::Unspecified;
+}
+
+// Instances of this class encapsulate the target-dependant behaviour as a
+// function of triple. Implementing a new ABI is adding a case to the switch
+// in create(llvm::Triple) at the end of this file.
+// This class may end up instantiated in TargetMachine instances, keeping it
+// here for now until enough targets are implemented for the API to evolve.
+class VariadicABIInfo {
+protected:
+ VariadicABIInfo() = default;
+
+public:
+ static std::unique_ptr<VariadicABIInfo> create(const Triple &T);
+
+ // Allow overriding whether the pass runs on a per-target basis
+ virtual bool enableForTarget() = 0;
+
+ // Whether a valist instance is passed by value or by address
+ // I.e. does it need to be alloca'ed and stored into, or can
+ // it be passed directly in a SSA register
+ virtual bool vaListPassedInSSARegister() = 0;
+
+ // The type of a va_list iterator object
+ virtual Type *vaListType(LLVMContext &Ctx) = 0;
+
+ // The type of a va_list as a function argument as lowered by C
+ virtual Type *vaListParameterType(Module &M) = 0;
+
+ // Initialize an allocated va_list object to point to an already
+ // initialized contiguous memory region.
+ // Return the value to pass as the va_list argument
+ virtual Value *initializeVaList(Module &M, LLVMContext &Ctx,
+ IRBuilder<> &Builder, AllocaInst *VaList,
+ Value *Buffer) = 0;
+
+ struct VAArgSlotInfo {
+ Align DataAlign; // With respect to the call frame
+ bool Indirect; // Passed via a pointer
+ };
+ virtual VAArgSlotInfo slotInfo(const DataLayout &DL, Type *Parameter) = 0;
+
+ // Targets implemented so far all have the same trivial lowering for these
+ bool vaEndIsNop() { return true; }
+ bool vaCopyIsMemcpy() { return true; }
+
+ virtual ~VariadicABIInfo() = default;
+};
+
+// Module implements getFunction() which returns nullptr on missing declaration
+// and getOrInsertFunction which creates one when absent. Intrinsics.h only
+// implements getDeclaration which creates one when missing. Checking whether
+// an intrinsic exists thus inserts it in the module and it then needs to be
+// deleted again to clean up.
+// The right name for the two functions on intrinsics would match Module::,
+// but doing that in a single change would introduce nullptr dereferences
+// where currently there are none. The minimal collateral damage approach
+// would split the change over a release to help downstream branches. As it
+// is unclear what approach will be preferred, implementing the trivial
+// function here in the meantime to decouple from that discussion.
+Function *getPreexistingDeclaration(Module *M, Intrinsic::ID Id,
+ ArrayRef<Type *> Tys = {}) {
+ auto *FT = Intrinsic::getType(M->getContext(), Id, Tys);
+ return M->getFunction(Tys.empty() ? Intrinsic::getName(Id)
+ : Intrinsic::getName(Id, Tys, M, FT));
+}
+
+class ExpandVariadics : public ModulePass {
+
+ // The pass construction sets the default to optimize when called from middle
+ // end and lowering when called from the backend. The command line variable
+ // overrides that. This is useful for testing and debugging. It also allows
+ // building an applications with variadic functions wholly removed if one
+ // has sufficient control over the dependencies, e.g. a statically linked
+ // clang that has no variadic function calls remaining in the binary.
+
+public:
+ static char ID;
+ const ExpandVariadicsMode Mode;
+ std::unique_ptr<VariadicABIInfo> ABI;
+
+ ExpandVariadics(ExpandVariadicsMode Mode)
+ : ModulePass(ID),
+ Mode(commandLineOverride() ? ExpandVariadicsModeOption : Mode) {}
+
+ StringRef getPassName() const override { return "Expand variadic functions"; }
+
+ bool rewriteABI() { return Mode == ExpandVariadicsMode::Lowering; }
+
+ bool runOnModule(Module &M) override;
+
+ bool runOnFunction(Module &M, IRBuilder<> &Builder, Function *F);
+
+ Function *replaceAllUsesWithNewDeclaration(Module &M,
+ Function *OriginalFunction);
+
+ Function *deriveFixedArityReplacement(Module &M, IRBuilder<> &Builder,
+ Function *OriginalFunction);
+
+ Function *defineVariadicWrapper(Module &M, IRBuilder<> &Builder,
+ Function *VariadicWrapper,
+ Function *FixedArityReplacement);
+
+ bool expandCall(Module &M, IRBuilder<> &Builder, CallBase *CB, FunctionType *,
+ Function *NF);
+
+ // The intrinsic functions va_copy and va_end are removed unconditionally.
+ // They correspond to a memcpy and a no-op on all implemented targets.
+ // The va_start intrinsic is removed from basic blocks that were not created
+ // by this pass, some may remain if needed to maintain the external ABI.
+
+ template <Intrinsic::ID ID, typename InstructionType>
+ bool expandIntrinsicUsers(Module &M, IRBuilder<> &Builder,
+ PointerType *IntrinsicArgType) {
+ bool Changed = false;
+ const DataLayout &DL = M.getDataLayout();
+ if (Function *Intrinsic =
+ getPreexistingDeclaration(&M, ID, {IntrinsicArgType})) {
+ for (User *U : make_early_inc_range(Intrinsic->users()))
+ if (auto *I = dyn_cast<InstructionType>(U))
+ Changed |= expandVAIntrinsicCall(Builder, DL, I);
+
+ if (Intrinsic->use_empty())
+ Intrinsic->eraseFromParent();
+ }
+ return Changed;
+ }
+
+ bool expandVAIntrinsicUsersWithAddrspace(Module &M, IRBuilder<> &Builder,
+ unsigned Addrspace) {
+ auto &Ctx = M.getContext();
+ PointerType *IntrinsicArgType = PointerType::get(Ctx, Addrspace);
+ bool Changed = false;
+
+ // expand vastart before vacopy as vastart may introduce a vacopy
+ Changed |= expandIntrinsicUsers<Intrinsic::vastart, VAStartInst>(
+ M, Builder, IntrinsicArgType);
+ Changed |= expandIntrinsicUsers<Intrinsic::vaend, VAEndInst>(
+ M, Builder, IntrinsicArgType);
+ Changed |= expandIntrinsicUsers<Intrinsic::vacopy, VACopyInst>(
+ M, Builder, IntrinsicArgType);
+ return Changed;
+ }
+
+ bool expandVAIntrinsicCall(IRBuilder<> &Builder, const DataLayout &DL,
+ VAStartInst *Inst);
+
+ bool expandVAIntrinsicCall(IRBuilder<> &, const DataLayout &,
+ VAEndInst *Inst);
+
+ bool expandVAIntrinsicCall(IRBuilder<> &Builder, const DataLayout &DL,
+ VACopyInst *Inst);
+
+ FunctionType *inlinableVariadicFunctionType(Module &M, FunctionType *FTy) {
+ // The type of "FTy" with the ... removed and a va_list appended
+ SmallVector<Type *> ArgTypes(FTy->param_begin(), FTy->param_end());
+ ArgTypes.push_back(ABI->vaListParameterType(M));
+ return FunctionType::get(FTy->getReturnType(), ArgTypes,
+ /*IsVarArgs=*/false);
+ }
+
+ static ConstantInt *sizeOfAlloca(LLVMContext &Ctx, const DataLayout &DL,
+ AllocaInst *Alloced) {
+ std::optional<TypeSize> AllocaTypeSize = Alloced->getAllocationSize(DL);
+ uint64_t AsInt = AllocaTypeSize ? AllocaTypeSize->getFixedValue() : 0;
+ return ConstantInt::get(Type::getInt64Ty(Ctx), AsInt);
+ }
+
+ bool expansionApplicableToFunction(Module &M, Function *F) {
+ if (F->isIntrinsic() || !F->isVarArg() ||
+ F->hasFnAttribute(Attribute::Naked))
+ return false;
+
+ if (F->getCallingConv() != CallingConv::C)
+ return false;
+
+ if (rewriteABI())
+ return true;
+
+ if (!F->hasExactDefinition())
+ return false;
+
+ return true;
+ }
+
+ bool expansionApplicableToFunctionCall(CallBase *CB) {
+ if (CallInst *CI = dyn_cast<CallInst>(CB)) {
+ if (CI->isMustTailCall()) {
+ // Cannot expand musttail calls
+ return false;
+ }
+
+ if (CI->getCallingConv() != CallingConv::C)
+ return false;
+
+ return true;
+ }
+
+ if (isa<InvokeInst>(CB)) {
+ // Invoke not implemented in initial implementation of pass
+ return false;
+ }
+
+ // Other unimplemented derivative of CallBase
+ return false;
+ }
+
+ class ExpandedCallFrame {
+ // Helper for constructing an alloca instance containing the arguments bound
+ // to the variadic ... parameter, rearranged to allow indexing through a
+ // va_list iterator
+ enum { N = 4 };
+ SmallVector<Type *, N> FieldTypes;
+ enum Tag { Store, Memcpy, Padding };
+ SmallVector<std::tuple<Value *, uint64_t, Tag>, N> Source;
+
+ template <Tag tag> void append(Type *FieldType, Value *V, uint64_t Bytes) {
+ FieldTypes.push_back(FieldType);
+ Source.push_back({V, Bytes, tag});
+ }
+
+ public:
+ void store(LLVMContext &Ctx, Type *T, Value *V) { append<Store>(T, V, 0); }
+
+ void memcpy(LLVMContext &Ctx, Type *T, Value *V, uint64_t Bytes) {
+ append<Memcpy>(T, V, Bytes);
+ }
+
+ void padding(LLVMContext &Ctx, uint64_t By) {
+ append<Padding>(ArrayType::get(Type::getInt8Ty(Ctx), By), nullptr, 0);
+ }
+
+ size_t size() const { return FieldTypes.size(); }
+ bool empty() const { return FieldTypes.empty(); }
+
+ StructType *asStruct(LLVMContext &Ctx, StringRef Name) {
+ const bool IsPacked = true;
+ return StructType::create(Ctx, FieldTypes,
+ (Twine(Name) + ".vararg").str(), IsPacked);
+ }
+
+ void initializeStructAlloca(const DataLayout &DL, IRBuilder<> &Builder,
+ AllocaInst *Alloced) {
+
+ StructType *VarargsTy = cast<StructType>(Alloced->getAllocatedType());
+
+ for (size_t I = 0; I < size(); I++) {
+
+ auto [V, bytes, tag] = Source[I];
+
+ if (tag == Padding) {
+ assert(V == nullptr);
+ continue;
+ }
+
+ auto Dst = Builder.CreateStructGEP(VarargsTy, Alloced, I);
+
+ assert(V != nullptr);
+
+ if (tag == Store)
+ Builder.CreateStore(V, Dst);
+
+ if (tag == Memcpy)
+ Builder.CreateMemCpy(Dst, {}, V, {}, bytes);
+ }
+ }
+ };
+};
+
+bool ExpandVariadics::runOnModule(Module &M) {
+ bool Changed = false;
+ if (Mode == ExpandVariadicsMode::Disable)
+ return Changed;
+
+ Triple TT(M.getTargetTriple());
+ ABI = VariadicABIInfo::create(TT);
+ if (!ABI)
+ return Changed;
+
+ if (!ABI->enableForTarget())
+ return Changed;
+
+ auto &Ctx = M.getContext();
+ const DataLayout &DL = M.getDataLayout();
+ IRBuilder<> Builder(Ctx);
+
+ // Lowering needs to run on all functions exactly once.
+ // Optimize could run on functions containing va_start exactly once.
+ for (Function &F : make_early_inc_range(M))
+ Changed |= runOnFunction(M, Builder, &F);
+
+ // After runOnFunction, all known calls to known variadic functions have been
+ // replaced. va_start intrinsics are presently (and invalidly!) only present
+ // in functions that used to be variadic and have now been replaced to take a
+ // va_list instead. If lowering as opposed to optimising, calls to unknown
+ // variadic functions have also been replaced.
+
+ {
+ // 0 and AllocaAddrSpace are sufficient for the targets implemented so far
+ unsigned Addrspace = 0;
+ Changed |= expandVAIntrinsicUsersWithAddrspace(M, Builder, Addrspace);
+
+ Addrspace = DL.getAllocaAddrSpace();
+ if (Addrspace != 0)
+ Changed |= expandVAIntrinsicUsersWithAddrspace(M, Builder, Addrspace);
+ }
+
+ if (Mode != ExpandVariadicsMode::Lowering)
+ return Changed;
+
+ for (Function &F : make_early_inc_range(M)) {
+ if (F.isDeclaration())
+ continue;
+
+ // Now need to track down indirect calls. Can't find those
+ // by walking uses of variadic functions, need to crawl the instruction
+ // stream. Fortunately this is only necessary for the ABI rewrite case.
+ for (BasicBlock &BB : F) {
+ for (Instruction &I : make_early_inc_range(BB)) {
+ if (CallBase *CB = dyn_cast<CallBase>(&I)) {
+ if (CB->isIndirectCall()) {
+ FunctionType *FTy = CB->getFunctionType();
+ if (FTy->isVarArg())
+ Changed |= expandCall(M, Builder, CB, FTy, 0);
+ }
+ }
+ }
+ }
+ }
+
+ return Changed;
+}
+
+bool ExpandVariadics::runOnFunction(Module &M, IRBuilder<> &Builder,
+ Function *OriginalFunction) {
+ bool Changed = false;
+
+ if (!expansionApplicableToFunction(M, OriginalFunction))
+ return Changed;
+
+ [[maybe_unused]] const bool OriginalFunctionIsDeclaration =
+ OriginalFunction->isDeclaration();
+ assert(rewriteABI() || !OriginalFunctionIsDeclaration);
+
+ // Declare a new function and redirect every use to that new function
+ Function *VariadicWrapper =
+ replaceAllUsesWithNewDeclaration(M, OriginalFunction);
+ assert(VariadicWrapper->isDeclaration());
+ assert(OriginalFunction->use_empty());
+
+ // Create a new function taking va_list containing the implementation of the
+ // original
+ Function *FixedArityReplacement =
+ deriveFixedArityReplacement(M, Builder, OriginalFunction);
+ assert(OriginalFunction->isDeclaration());
+ assert(FixedArityReplacement->isDeclaration() ==
+ OriginalFunctionIsDeclaration);
+ assert(VariadicWrapper->isDeclaration());
+
+ // Create a single block forwarding wrapper that turns a ... into a va_list
+ [[maybe_unused]] Function *VariadicWrapperDefine =
+ defineVariadicWrapper(M, Builder, VariadicWrapper, FixedArityReplacement);
+ assert(VariadicWrapperDefine == VariadicWrapper);
+ assert(!VariadicWrapper->isDeclaration());
+
+ // We now have:
+ // 1. the original function, now as a declaration with no uses
+ // 2. a variadic function that unconditionally calls a fixed arity replacement
+ // 3. a fixed arity function equivalent to the original function
+
+ // Replace known calls to the variadic with calls to the va_list equivalent
+ for (User *U : make_early_inc_range(VariadicWrapper->users())) {
+ if (CallBase *CB = dyn_cast<CallBase>(U)) {
+ Value *calledOperand = CB->getCalledOperand();
+ if (VariadicWrapper == calledOperand)
+ Changed |=
+ expandCall(M, Builder, CB, VariadicWrapper->getFunctionType(),
+ FixedArityReplacement);
+ }
+ }
+
+ // The original function will be erased.
+ // One of the two new functions will become a replacement for the original.
+ // When preserving the ABI, the other is an internal implementation detail.
+ // When rewriting the ABI, RAUW then the variadic one.
+ Function *const ExternallyAccessible =
+ rewriteABI() ? FixedArityReplacement : VariadicWrapper;
+ Function *const InternalOnly =
+ rewriteABI() ? VariadicWrapper : FixedArityReplacement;
+
+ // The external function is the replacement for the original
+ ExternallyAccessible->setLinkage(OriginalFunction->getLinkage());
+ ExternallyAccessible->setVisibility(OriginalFunction->getVisibility());
+ ExternallyAccessible->setComdat(OriginalFunction->getComdat());
+ ExternallyAccessible->takeName(OriginalFunction);
+
+ // Annotate the internal one as internal
+ InternalOnly->setVisibility(GlobalValue::DefaultVisibility);
+ InternalOnly->setLinkage(GlobalValue::InternalLinkage);
+
+ // The original is unused and obsolete
+ OriginalFunction->eraseFromParent();
+
+ InternalOnly->removeDeadConstantUsers();
+
+ if (rewriteABI()) {
+ // All known calls to the function have been removed by expandCall
+ // Resolve everything else by replaceAllUsesWith
+ VariadicWrapper->replaceAllUsesWith(FixedArityReplacement);
+ VariadicWrapper->eraseFromParent();
+ }
+
+ return Changed;
+}
+
+Function *
+ExpandVariadics::replaceAllUsesWithNewDeclaration(Module &M,
+ Function *OriginalFunction) {
+ auto &Ctx = M.getContext();
+ Function &F = *OriginalFunction;
+ FunctionType *FTy = F.getFunctionType();
+ Function *NF = Function::Create(FTy, F.getLinkage(), F.getAddressSpace());
+
+ NF->setName(F.getName() + ".varargs");
+ NF->IsNewDbgInfoFormat = F.IsNewDbgInfoFormat;
+
+ F.getParent()->getFunctionList().insert(F.getIterator(), NF);
+
+ AttrBuilder ParamAttrs(Ctx);
+ AttributeList Attrs = NF->getAttributes();
+ Attrs = Attrs.addParamAttributes(Ctx, FTy->getNumParams(), ParamAttrs);
+ NF->setAttributes(Attrs);
+
+ OriginalFunction->replaceAllUsesWith(NF);
+ return NF;
+}
+
+Function *
+ExpandVariadics::deriveFixedArityReplacement(Module &M, IRBuilder<> &Builder,
+ Function *OriginalFunction) {
+ Function &F = *OriginalFunction;
+ // The purpose here is split the variadic function F into two functions
+ // One is a variadic function that bundles the passed argument into a va_list
+ // and passes it to the second function. The second function does whatever
+ // the original F does, except that it takes a va_list instead of the ...
+
+ assert(expansionApplicableToFunction(M, &F));
+
+ auto &Ctx = M.getContext();
+
+ // Returned value isDeclaration() is equal to F.isDeclaration()
+ // but that property is not invariant throughout this function
+ const bool FunctionIsDefinition = !F.isDeclaration();
+
+ FunctionType *FTy = F.getFunctionType();
+ SmallVector<Type *> ArgTypes(FTy->param_begin(), FTy->param_end());
+ ArgTypes.push_back(ABI->vaListParameterType(M));
+
+ FunctionType *NFTy = inlinableVariadicFunctionType(M, FTy);
+ Function *NF = Function::Create(NFTy, F.getLinkage(), F.getAddressSpace());
+
+ // Note - same attribute handling as DeadArgumentElimination
+ NF->copyAttributesFrom(&F);
+ NF->setComdat(F.getComdat());
+ F.getParent()->getFunctionList().insert(F.getIterator(), NF);
+ NF->setName(F.getName() + ".valist");
+ NF->IsNewDbgInfoFormat = F.IsNewDbgInfoFormat;
+
+ AttrBuilder ParamAttrs(Ctx);
+
+ AttributeList Attrs = NF->getAttributes();
+ Attrs = Attrs.addParamAttributes(Ctx, NFTy->getNumParams() - 1, ParamAttrs);
+ NF->setAttributes(Attrs);
+
+ // Splice the implementation into the new function with minimal changes
+ if (FunctionIsDefinition) {
+ NF->splice(NF->begin(), &F);
+
+ auto NewArg = NF->arg_begin();
+ for (Argument &Arg : F.args()) {
+ Arg.replaceAllUsesWith(NewArg);
+ NewArg->setName(Arg.getName()); // takeName without killing the old one
+ ++NewArg;
+ }
+ NewArg->setName("varargs");
+ }
+
+ SmallVector<std::pair<unsigned, MDNode *>, 1> MDs;
+ F.getAllMetadata(MDs);
+ for (auto [KindID, Node] : MDs)
+ NF->addMetadata(KindID, *Node);
+ F.clearMetadata();
+
+ return NF;
+}
+
+Function *
+ExpandVariadics::defineVariadicWrapper(Module &M, IRBuilder<> &Builder,
+ Function *VariadicWrapper,
+ Function *FixedArityReplacement) {
+ auto &Ctx = Builder.getContext();
+ const DataLayout &DL = M.getDataLayout();
+ assert(VariadicWrapper->isDeclaration());
+ Function &F = *VariadicWrapper;
+
+ assert(F.isDeclaration());
+ Type *VaListTy = ABI->vaListType(Ctx);
+
+ auto *BB = BasicBlock::Create(Ctx, "entry", &F);
+ Builder.SetInsertPoint(BB);
+
+ AllocaInst *VaListInstance =
+ Builder.CreateAlloca(VaListTy, nullptr, "va_start");
+
+ Builder.CreateLifetimeStart(VaListInstance,
+ sizeOfAlloca(Ctx, DL, VaListInstance));
+
+ Builder.CreateIntrinsic(Intrinsic::vastart, {DL.getAllocaPtrType(Ctx)},
+ {VaListInstance});
+
+ SmallVector<Value *> Args;
+ for (Argument &A : F.args())
+ Args.push_back(&A);
+
+ Type *ParameterType = ABI->vaListParameterType(M);
+ if (ABI->vaListPassedInSSARegister())
+ Args.push_back(Builder.CreateLoad(ParameterType, VaListInstance));
+ else
+ Args.push_back(Builder.CreateAddrSpaceCast(VaListInstance, ParameterType));
+
+ CallInst *Result = Builder.CreateCall(FixedArityReplacement, Args);
+
+ Builder.CreateIntrinsic(Intrinsic::vaend, {DL.getAllocaPtrType(Ctx)},
+ {VaListInstance});
+ Builder.CreateLifetimeEnd(VaListInstance,
+ sizeOfAlloca(Ctx, DL, VaListInstance));
+
+ if (Result->getType()->isVoidTy())
+ Builder.CreateRetVoid();
+ else
+ Builder.CreateRet(Result);
+
+ return VariadicWrapper;
+}
+
+bool ExpandVariadics::expandCall(Module &M, IRBuilder<> &Builder, CallBase *CB,
+ FunctionType *VarargFunctionType,
+ Function *NF) {
+ bool Changed = false;
+ const DataLayout &DL = M.getDataLayout();
+
+ if (!expansionApplicableToFunctionCall(CB)) {
+ if (rewriteABI())
+ report_fatal_error("Cannot lower callbase instruction");
+ return Changed;
+ }
+
+ // This is tricky. The call instruction's function type might not match
+ // the type of the caller. When optimising, can leave it unchanged.
+ // Webassembly detects that inconsistency and repairs it.
+ FunctionType *FuncType = CB->getFunctionType();
+ if (FuncType != VarargFunctionType) {
+ if (!rewriteABI())
+ return Changed;
+ FuncType = VarargFunctionType;
+ }
+
+ auto &Ctx = CB->getContext();
+
+ Align MaxFieldAlign(1);
+
+ // The strategy is to allocate a call frame containing the variadic
+ // arguments laid out such that a target specific va_list can be initialized
+ // with it, such that target specific va_arg instructions will correctly
+ // iterate over it. This means getting the alignment right and sometimes
+ // embedding a pointer to the value instead of embedding the value itself.
+
+ Function *CBF = CB->getParent()->getParent();
+
+ ExpandedCallFrame Frame;
+
+ uint64_t CurrentOffset = 0;
+
+ for (unsigned I = FuncType->getNumParams(), E = CB->arg_size(); I < E; ++I) {
+ Value *ArgVal = CB->getArgOperand(I);
+ const bool IsByVal = CB->paramHasAttr(I, Attribute::ByVal);
+ const bool IsByRef = CB->paramHasAttr(I, Attribute::ByRef);
+
+ // The type of the value being passed, decoded from byval/byref metadata if
+ // required
+ Type *const UnderlyingType = IsByVal ? CB->getParamByValType(I)
+ : IsByRef ? CB->getParamByRefType(I)
+ : ArgVal->getType();
+ const uint64_t UnderlyingSize =
+ DL.getTypeAllocSize(UnderlyingType).getFixedValue();
+
+ // The type to be written into the call frame
+ Type *FrameFieldType = UnderlyingType;
+
+ // The value to copy from when initialising the frame alloca
+ Value *SourceValue = ArgVal;
+
+ VariadicABIInfo::VAArgSlotInfo SlotInfo = ABI->slotInfo(DL, UnderlyingType);
+
+ if (SlotInfo.Indirect) {
+ // The va_arg lowering loads through a pointer. Set up an alloca to aim
+ // that pointer at.
+ Builder.SetInsertPointPastAllocas(CBF);
+ Builder.SetCurrentDebugLocation(CB->getStableDebugLoc());
+ Value *CallerCopy =
+ Builder.CreateAlloca(UnderlyingType, nullptr, "IndirectAlloca");
+
+ Builder.SetInsertPoint(CB);
+ if (IsByVal)
+ Builder.CreateMemCpy(CallerCopy, {}, ArgVal, {}, UnderlyingSize);
+ else
+ Builder.CreateStore(ArgVal, CallerCopy);
+
+ // Indirection now handled, pass the alloca ptr by value
+ FrameFieldType = DL.getAllocaPtrType(Ctx);
+ SourceValue = CallerCopy;
+ }
+
+ // Alignment of the value within the frame
+ // This probably needs to be controllable as a function of type
+ Align DataAlign = SlotInfo.DataAlign;
+
+ MaxFieldAlign = std::max(MaxFieldAlign, DataAlign);
+
+ uint64_t DataAlignV = DataAlign.value();
+ if (uint64_t Rem = CurrentOffset % DataAlignV) {
+ // Inject explicit padding to deal with alignment requirements
+ uint64_t Padding = DataAlignV - Rem;
+ Frame.padding(Ctx, Padding);
+ CurrentOffset += Padding;
+ }
+
+ if (SlotInfo.Indirect) {
+ Frame.store(Ctx, FrameFieldType, SourceValue);
+ } else {
+ if (IsByVal)
+ Frame.memcpy(Ctx, FrameFieldType, SourceValue, UnderlyingSize);
+ else
+ Frame.store(Ctx, FrameFieldType, SourceValue);
+ }
+
+ CurrentOffset += DL.getTypeAllocSize(FrameFieldType).getFixedValue();
+ }
+
+ if (Frame.empty()) {
+ // Not passing any arguments, hopefully va_arg won't try to read any
+ // Creating a single byte frame containing nothing to point the va_list
+ // instance as that is less special-casey in the compiler and probably
+ // easier to interpret in a debugger.
+ Frame.padding(Ctx, 1);
+ }
+
+ StructType *VarargsTy = Frame.asStruct(Ctx, CBF->getName());
+
+ // The struct instance needs to be at least MaxFieldAlign for the alignment of
+ // the fields to be correct at runtime. Use the native stack alignment instead
+ // if that's greater as that tends to give better codegen.
+ // This is an awkward way to guess whether there is a known stack alignment
+ // without hitting an assert in DL.getStackAlignment, 1024 is an arbitrary
+ // number likely to be greater than the natural stack alignment.
+ // TODO: DL.getStackAlignment could return a MaybeAlign instead of assert
+ Align AllocaAlign = MaxFieldAlign;
+ if (DL.exceedsNaturalStackAlignment(Align(1024)))
+ AllocaAlign = std::max(AllocaAlign, DL.getStackAlignment());
+
+ // Put the alloca to hold the variadic args in the entry basic block.
+ Builder.SetInsertPointPastAllocas(CBF);
+
+ // SetCurrentDebugLocation when the builder SetInsertPoint method does not
+ Builder.SetCurrentDebugLocation(CB->getStableDebugLoc());
+
+ // The awkward construction here is to set the alignment on the instance
+ AllocaInst *Alloced = Builder.Insert(
+ new AllocaInst(VarargsTy, DL.getAllocaAddrSpace(), nullptr, AllocaAlign),
+ "vararg_buffer");
+ Changed = true;
+ assert(Alloced->getAllocatedType() == VarargsTy);
+
+ // Initialize the fields in the struct
+ Builder.SetInsertPoint(CB);
+ Builder.CreateLifetimeStart(Alloced, sizeOfAlloca(Ctx, DL, Alloced));
+ Frame.initializeStructAlloca(DL, Builder, Alloced);
+
+ const unsigned NumArgs = FuncType->getNumParams();
+ SmallVector<Value *> Args(CB->arg_begin(), CB->arg_begin() + NumArgs);
+
+ // Initialize a va_list pointing to that struct and pass it as the last
+ // argument
+ AllocaInst *VaList = nullptr;
+ {
+ if (!ABI->vaListPassedInSSARegister()) {
+ Type *VaListTy = ABI->vaListType(Ctx);
+ Builder.SetInsertPointPastAllocas(CBF);
+ Builder.SetCurrentDebugLocation(CB->getStableDebugLoc());
+ VaList = Builder.CreateAlloca(VaListTy, nullptr, "va_argument");
+ Builder.SetInsertPoint(CB);
+ Builder.CreateLifetimeStart(VaList, sizeOfAlloca(Ctx, DL, VaList));
+ }
+ Builder.SetInsertPoint(CB);
+ Args.push_back(ABI->initializeVaList(M, Ctx, Builder, VaList, Alloced));
+ }
+
+ // Attributes excluding any on the vararg arguments
+ AttributeList PAL = CB->getAttributes();
+ if (!PAL.isEmpty()) {
+ SmallVector<AttributeSet, 8> ArgAttrs;
+ for (unsigned ArgNo = 0; ArgNo < NumArgs; ArgNo++)
+ ArgAttrs.push_back(PAL.getParamAttrs(ArgNo));
+ PAL =
+ AttributeList::get(Ctx, PAL.getFnAttrs(), PAL.getRetAttrs(), ArgAttrs);
+ }
+
+ SmallVector<OperandBundleDef, 1> OpBundles;
+ CB->getOperandBundlesAsDefs(OpBundles);
+
+ CallBase *NewCB = nullptr;
+
+ if (CallInst *CI = dyn_cast<CallInst>(CB)) {
+ Value *Dst = NF ? NF : CI->getCalledOperand();
+ FunctionType *NFTy = inlinableVariadicFunctionType(M, VarargFunctionType);
+
+ NewCB = CallInst::Create(NFTy, Dst, Args, OpBundles, "", CI);
+
+ CallInst::TailCallKind TCK = CI->getTailCallKind();
+ assert(TCK != CallInst::TCK_MustTail);
+
+ // Can't tail call a function that is being passed a pointer to an alloca
+ if (TCK == CallInst::TCK_Tail)
+ TCK = CallInst::TCK_None;
+ CI->setTailCallKind(TCK);
+
+ } else {
+ llvm_unreachable("Unreachable when !expansionApplicableToFunctionCall()");
+ }
+
+ if (VaList)
+ Builder.CreateLifetimeEnd(VaList, sizeOfAlloca(Ctx, DL, VaList));
+
+ Builder.CreateLifetimeEnd(Alloced, sizeOfAlloca(Ctx, DL, Alloced));
+
+ NewCB->setAttributes(PAL);
+ NewCB->takeName(CB);
+ NewCB->setCallingConv(CB->getCallingConv());
+ NewCB->setDebugLoc(DebugLoc());
+
+ // DeadArgElim and ArgPromotion copy exactly this metadata
+ NewCB->copyMetadata(*CB, {LLVMContext::MD_prof, LLVMContext::MD_dbg});
+
+ CB->replaceAllUsesWith(NewCB);
+ CB->eraseFromParent();
+ return Changed;
+}
+
+bool ExpandVariadics::expandVAIntrinsicCall(IRBuilder<> &Builder,
+ const DataLayout &DL,
+ VAStartInst *Inst) {
+ // Only removing va_start instructions that are not in variadic functions.
+ // Those would be rejected by the IR verifier before this pass.
+ // After splicing basic blocks from a variadic function into a fixed arity
+ // one the va_start that used to refer to the ... parameter still exist.
+ // There are also variadic functions that this pass did not change and
+ // va_start instances in the created single block wrapper functions.
+ // Replace exactly the instances in non-variadic functions as those are
+ // the ones to be fixed up to use the va_list passed as the final argument.
+
+ Function *ContainingFunction = Inst->getFunction();
+ if (ContainingFunction->isVarArg()) {
+ return false;
+ }
+
+ // The last argument is a vaListParameterType, either a va_list
+ // or a pointer to one depending on the target.
+ bool PassedByValue = ABI->vaListPassedInSSARegister();
+ Argument *PassedVaList =
+ ContainingFunction->getArg(ContainingFunction->arg_size() - 1);
+
+ // va_start takes a pointer to a va_list, e.g. one on the stack
+ Value *VaStartArg = Inst->getArgList();
+
+ Builder.SetInsertPoint(Inst);
+
+ if (PassedByValue) {
+ // The general thing to do is create an alloca, store the va_list argument
+ // to it, then create a va_copy. When vaCopyIsMemcpy(), this optimises to a
+ // store to the VaStartArg.
+ assert(ABI->vaCopyIsMemcpy());
+ Builder.CreateStore(PassedVaList, VaStartArg);
+ } else {
+
+ // Otherwise emit a vacopy to pick up target-specific handling if any
+ auto &Ctx = Builder.getContext();
+
+ Builder.CreateIntrinsic(Intrinsic::vacopy, {DL.getAllocaPtrType(Ctx)},
+ {VaStartArg, PassedVaList});
+ }
+
+ Inst->eraseFromParent();
+ return true;
+}
+
+bool ExpandVariadics::expandVAIntrinsicCall(IRBuilder<> &, const DataLayout &,
+ VAEndInst *Inst) {
+ assert(ABI->vaEndIsNop());
+ Inst->eraseFromParent();
+ return true;
+}
+
+bool ExpandVariadics::expandVAIntrinsicCall(IRBuilder<> &Builder,
+ const DataLayout &DL,
+ VACopyInst *Inst) {
+ assert(ABI->vaCopyIsMemcpy());
+ Builder.SetInsertPoint(Inst);
+
+ auto &Ctx = Builder.getContext();
+ Type *VaListTy = ABI->vaListType(Ctx);
+ uint64_t Size = DL.getTypeAllocSize(VaListTy).getFixedValue();
+
+ Builder.CreateMemCpy(Inst->getDest(), {}, Inst->getSrc(), {},
+ Builder.getInt32(Size));
+
+ Inst->eraseFromParent();
+ return true;
+}
+
+struct Amdgpu final : public VariadicABIInfo {
+
+ bool enableForTarget() override { return true; }
+
+ bool vaListPassedInSSARegister() override { return true; }
+
+ Type *vaListType(LLVMContext &Ctx) override {
+ return PointerType::getUnqual(Ctx);
+ }
+
+ Type *vaListParameterType(Module &M) override {
+ return PointerType::getUnqual(M.getContext());
+ }
+
+ Value *initializeVaList(Module &M, LLVMContext &Ctx, IRBuilder<> &Builder,
+ AllocaInst * /*va_list*/, Value *Buffer) override {
+ // Given Buffer, which is an AllocInst of vararg_buffer
+ // need to return something usable as parameter type
+ return Builder.CreateAddrSpaceCast(Buffer, vaListParameterType(M));
+ }
+
+ VAArgSlotInfo slotInfo(const DataLayout &DL, Type *Parameter) override {
+ return {Align(4), false};
+ }
+};
+
+struct Wasm final : public VariadicABIInfo {
+
+ bool enableForTarget() override {
+ // Currently wasm is only used for testing.
+ return commandLineOverride();
+ }
+
+ bool vaListPassedInSSARegister() override { return true; }
+
+ Type *vaListType(LLVMContext &Ctx) override {
+ return PointerType::getUnqual(Ctx);
+ }
+
+ Type *vaListParameterType(Module &M) override {
+ return PointerType::getUnqual(M.getContext());
+ }
+
+ Value *initializeVaList(Module &M, LLVMContext &Ctx, IRBuilder<> &Builder,
+ AllocaInst * /*va_list*/, Value *Buffer) override {
+ return Buffer;
+ }
+
+ VAArgSlotInfo slotInfo(const DataLayout &DL, Type *Parameter) override {
+ LLVMContext &Ctx = Parameter->getContext();
+ const unsigned MinAlign = 4;
+ Align A = DL.getABITypeAlign(Parameter);
+ if (A < MinAlign)
+ A = Align(MinAlign);
+
+ if (auto s = dyn_cast<StructType>(Parameter)) {
+ if (s->getNumElements() > 1) {
+ return {DL.getABITypeAlign(PointerType::getUnqual(Ctx)), true};
+ }
+ }
+
+ return {A, false};
+ }
+};
+
+std::unique_ptr<VariadicABIInfo> VariadicABIInfo::create(const Triple &T) {
+ switch (T.getArch()) {
+ case Triple::r600:
+ case Triple::amdgcn: {
+ return std::make_unique<Amdgpu>();
+ }
+
+ case Triple::wasm32: {
+ return std::make_unique<Wasm>();
+ }
+
+ default:
+ return {};
+ }
+}
+
+} // namespace
+
+char ExpandVariadics::ID = 0;
+
+INITIALIZE_PASS(ExpandVariadics, DEBUG_TYPE, "Expand variadic functions", false,
+ false)
+
+ModulePass *llvm::createExpandVariadicsPass(ExpandVariadicsMode M) {
+ return new ExpandVariadics(M);
+}
+
+PreservedAnalyses ExpandVariadicsPass::run(Module &M, ModuleAnalysisManager &) {
+ return ExpandVariadics(Mode).runOnModule(M) ? PreservedAnalyses::none()
+ : PreservedAnalyses::all();
+}
+
+ExpandVariadicsPass::ExpandVariadicsPass(ExpandVariadicsMode M) : Mode(M) {}
diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
index 03923b8..f033d2b 100644
--- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
+++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
@@ -262,8 +262,70 @@ public:
// TODO: Should this be a map (from Caller node) for more efficient lookup?
std::vector<std::shared_ptr<ContextEdge>> CallerEdges;
- // The set of IDs for contexts including this node.
- DenseSet<uint32_t> ContextIds;
+ // Get the list of edges from which we can compute allocation information
+ // such as the context ids and allocation type of this node.
+ const std::vector<std::shared_ptr<ContextEdge>> *
+ getEdgesWithAllocInfo() const {
+ // If node has any callees, compute from those, otherwise compute from
+ // callers (i.e. if this is the leaf allocation node).
+ if (!CalleeEdges.empty())
+ return &CalleeEdges;
+ if (!CallerEdges.empty()) {
+ // A node with caller edges but no callee edges must be the allocation
+ // node.
+ assert(IsAllocation);
+ return &CallerEdges;
+ }
+ return nullptr;
+ }
+
+ // Compute the context ids for this node from the union of its edge context
+ // ids.
+ DenseSet<uint32_t> getContextIds() const {
+ DenseSet<uint32_t> ContextIds;
+ auto *Edges = getEdgesWithAllocInfo();
+ if (!Edges)
+ return {};
+ unsigned Count = 0;
+ for (auto &Edge : *Edges)
+ Count += Edge->getContextIds().size();
+ ContextIds.reserve(Count);
+ for (auto &Edge : *Edges)
+ ContextIds.insert(Edge->getContextIds().begin(),
+ Edge->getContextIds().end());
+ return ContextIds;
+ }
+
+ // Compute the allocation type for this node from the OR of its edge
+ // allocation types.
+ uint8_t computeAllocType() const {
+ auto *Edges = getEdgesWithAllocInfo();
+ if (!Edges)
+ return (uint8_t)AllocationType::None;
+ uint8_t BothTypes =
+ (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold;
+ uint8_t AllocType = (uint8_t)AllocationType::None;
+ for (auto &Edge : *Edges) {
+ AllocType |= Edge->AllocTypes;
+ // Bail early if alloc type reached both, no further refinement.
+ if (AllocType == BothTypes)
+ return AllocType;
+ }
+ return AllocType;
+ }
+
+ // The context ids set for this node is empty if its edge context ids are
+ // also all empty.
+ bool emptyContextIds() const {
+ auto *Edges = getEdgesWithAllocInfo();
+ if (!Edges)
+ return true;
+ for (auto &Edge : *Edges) {
+ if (!Edge->getContextIds().empty())
+ return false;
+ }
+ return true;
+ }
// List of clones of this ContextNode, initially empty.
std::vector<ContextNode *> Clones;
@@ -308,15 +370,11 @@ public:
void printCall(raw_ostream &OS) const { Call.print(OS); }
// True if this node was effectively removed from the graph, in which case
- // its context id set, caller edges, and callee edges should all be empty.
+ // it should have an allocation type of None and empty context ids.
bool isRemoved() const {
- // Note that we can have non-empty context ids with empty caller and
- // callee edges if the graph ends up with a single node.
- if (ContextIds.empty())
- assert(CalleeEdges.empty() && CallerEdges.empty() &&
- "Context ids empty but at least one of callee and caller edges "
- "were not!");
- return ContextIds.empty();
+ assert((AllocTypes == (uint8_t)AllocationType::None) ==
+ emptyContextIds());
+ return AllocTypes == (uint8_t)AllocationType::None;
}
void dump() const;
@@ -429,7 +487,8 @@ private:
/// else to its callers. Also updates OrigNode's edges to remove any context
/// ids moved to the newly created edge.
void connectNewNode(ContextNode *NewNode, ContextNode *OrigNode,
- bool TowardsCallee);
+ bool TowardsCallee,
+ DenseSet<uint32_t> RemainingContextIds);
/// Get the stack id corresponding to the given Id or Index (for IR this will
/// return itself, for a summary index this will return the id recorded in the
@@ -958,7 +1017,6 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addStackNodesForMIB(
// Update alloc type and context ids for this MIB.
AllocNode->AllocTypes |= (uint8_t)AllocType;
- AllocNode->ContextIds.insert(LastContextId);
// Now add or update nodes for each stack id in alloc's context.
// Later when processing the stack ids on non-alloc callsites we will adjust
@@ -983,7 +1041,6 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addStackNodesForMIB(
auto Ins = StackIdSet.insert(StackId);
if (!Ins.second)
StackNode->Recursive = true;
- StackNode->ContextIds.insert(LastContextId);
StackNode->AllocTypes |= (uint8_t)AllocType;
PrevNode->addOrUpdateCallerEdge(StackNode, AllocType, LastContextId);
PrevNode = StackNode;
@@ -1034,7 +1091,6 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
// it resulted in any added ids to NextNode.
if (!NewIdsToAdd.empty()) {
Edge->getContextIds().insert(NewIdsToAdd.begin(), NewIdsToAdd.end());
- NextNode->ContextIds.insert(NewIdsToAdd.begin(), NewIdsToAdd.end());
UpdateCallers(NextNode, Visited, UpdateCallers);
}
}
@@ -1043,21 +1099,16 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
DenseSet<const ContextEdge *> Visited;
for (auto &Entry : AllocationCallToContextNodeMap) {
auto *Node = Entry.second;
- // Update ids on the allocation nodes before calling the recursive
- // update along caller edges, since this simplifies the logic during
- // that traversal.
- DenseSet<uint32_t> NewIdsToAdd = GetNewIds(Node->ContextIds);
- Node->ContextIds.insert(NewIdsToAdd.begin(), NewIdsToAdd.end());
UpdateCallers(Node, Visited, UpdateCallers);
}
}
template <typename DerivedCCG, typename FuncTy, typename CallTy>
void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::connectNewNode(
- ContextNode *NewNode, ContextNode *OrigNode, bool TowardsCallee) {
- // Make a copy of the context ids, since this will be adjusted below as they
- // are moved.
- DenseSet<uint32_t> RemainingContextIds = NewNode->ContextIds;
+ ContextNode *NewNode, ContextNode *OrigNode, bool TowardsCallee,
+ // This must be passed by value to make a copy since it will be adjusted
+ // as ids are moved.
+ DenseSet<uint32_t> RemainingContextIds) {
auto &OrigEdges =
TowardsCallee ? OrigNode->CalleeEdges : OrigNode->CallerEdges;
// Increment iterator in loop so that we can remove edges as needed.
@@ -1104,6 +1155,51 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::connectNewNode(
}
template <typename DerivedCCG, typename FuncTy, typename CallTy>
+static void checkEdge(
+ const std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>> &Edge) {
+ // Confirm that alloc type is not None and that we have at least one context
+ // id.
+ assert(Edge->AllocTypes != (uint8_t)AllocationType::None);
+ assert(!Edge->ContextIds.empty());
+}
+
+template <typename DerivedCCG, typename FuncTy, typename CallTy>
+static void checkNode(const ContextNode<DerivedCCG, FuncTy, CallTy> *Node,
+ bool CheckEdges = true) {
+ if (Node->isRemoved())
+ return;
+#ifndef NDEBUG
+ // Compute node's context ids once for use in asserts.
+ auto NodeContextIds = Node->getContextIds();
+#endif
+ // Node's context ids should be the union of both its callee and caller edge
+ // context ids.
+ if (Node->CallerEdges.size()) {
+ DenseSet<uint32_t> CallerEdgeContextIds(
+ Node->CallerEdges.front()->ContextIds);
+ for (const auto &Edge : llvm::drop_begin(Node->CallerEdges)) {
+ if (CheckEdges)
+ checkEdge<DerivedCCG, FuncTy, CallTy>(Edge);
+ set_union(CallerEdgeContextIds, Edge->ContextIds);
+ }
+ // Node can have more context ids than callers if some contexts terminate at
+ // node and some are longer.
+ assert(NodeContextIds == CallerEdgeContextIds ||
+ set_is_subset(CallerEdgeContextIds, NodeContextIds));
+ }
+ if (Node->CalleeEdges.size()) {
+ DenseSet<uint32_t> CalleeEdgeContextIds(
+ Node->CalleeEdges.front()->ContextIds);
+ for (const auto &Edge : llvm::drop_begin(Node->CalleeEdges)) {
+ if (CheckEdges)
+ checkEdge<DerivedCCG, FuncTy, CallTy>(Edge);
+ set_union(CalleeEdgeContextIds, Edge->getContextIds());
+ }
+ assert(NodeContextIds == CalleeEdgeContextIds);
+ }
+}
+
+template <typename DerivedCCG, typename FuncTy, typename CallTy>
void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
assignStackNodesPostOrder(ContextNode *Node,
DenseSet<const ContextNode *> &Visited,
@@ -1178,7 +1274,7 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
// duplicated context ids. We have to recompute as we might have overlap
// overlap between the saved context ids for different last nodes, and
// removed them already during the post order traversal.
- set_intersect(SavedContextIds, FirstNode->ContextIds);
+ set_intersect(SavedContextIds, FirstNode->getContextIds());
ContextNode *PrevNode = nullptr;
for (auto Id : Ids) {
ContextNode *CurNode = getNodeForStackId(Id);
@@ -1211,18 +1307,17 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
ContextNode *NewNode = NodeOwner.back().get();
NodeToCallingFunc[NewNode] = Func;
NonAllocationCallToContextNodeMap[Call] = NewNode;
- NewNode->ContextIds = SavedContextIds;
- NewNode->AllocTypes = computeAllocType(NewNode->ContextIds);
+ NewNode->AllocTypes = computeAllocType(SavedContextIds);
// Connect to callees of innermost stack frame in inlined call chain.
// This updates context ids for FirstNode's callee's to reflect those
// moved to NewNode.
- connectNewNode(NewNode, FirstNode, /*TowardsCallee=*/true);
+ connectNewNode(NewNode, FirstNode, /*TowardsCallee=*/true, SavedContextIds);
// Connect to callers of outermost stack frame in inlined call chain.
// This updates context ids for FirstNode's caller's to reflect those
// moved to NewNode.
- connectNewNode(NewNode, LastNode, /*TowardsCallee=*/false);
+ connectNewNode(NewNode, LastNode, /*TowardsCallee=*/false, SavedContextIds);
// Now we need to remove context ids from edges/nodes between First and
// Last Node.
@@ -1234,18 +1329,32 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
// Remove the context ids moved to NewNode from CurNode, and the
// edge from the prior node.
- set_subtract(CurNode->ContextIds, NewNode->ContextIds);
if (PrevNode) {
auto *PrevEdge = CurNode->findEdgeFromCallee(PrevNode);
assert(PrevEdge);
- set_subtract(PrevEdge->getContextIds(), NewNode->ContextIds);
+ set_subtract(PrevEdge->getContextIds(), SavedContextIds);
if (PrevEdge->getContextIds().empty()) {
PrevNode->eraseCallerEdge(PrevEdge);
CurNode->eraseCalleeEdge(PrevEdge);
}
}
+ // Since we update the edges from leaf to tail, only look at the callee
+ // edges. This isn't an alloc node, so if there are no callee edges, the
+ // alloc type is None.
+ CurNode->AllocTypes = CurNode->CalleeEdges.empty()
+ ? (uint8_t)AllocationType::None
+ : CurNode->computeAllocType();
PrevNode = CurNode;
}
+ if (VerifyNodes) {
+ checkNode<DerivedCCG, FuncTy, CallTy>(NewNode, /*CheckEdges=*/true);
+ for (auto Id : Ids) {
+ ContextNode *CurNode = getNodeForStackId(Id);
+ // We should only have kept stack ids that had nodes.
+ assert(CurNode);
+ checkNode<DerivedCCG, FuncTy, CallTy>(CurNode, /*CheckEdges=*/true);
+ }
+ }
}
}
@@ -1319,7 +1428,7 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() {
// Initialize the context ids with the last node's. We will subsequently
// refine the context ids by computing the intersection along all edges.
- DenseSet<uint32_t> LastNodeContextIds = LastNode->ContextIds;
+ DenseSet<uint32_t> LastNodeContextIds = LastNode->getContextIds();
assert(!LastNodeContextIds.empty());
for (unsigned I = 0; I < Calls.size(); I++) {
@@ -1442,6 +1551,8 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() {
DenseSet<const ContextNode *> Visited;
for (auto &Entry : AllocationCallToContextNodeMap)
assignStackNodesPostOrder(Entry.second, Visited, StackIdToMatchingCalls);
+ if (VerifyCCG)
+ check();
}
uint64_t ModuleCallsiteContextGraph::getLastStackId(Instruction *Call) {
@@ -1786,8 +1897,6 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::calleesMatch(
// First check if we have already synthesized a node for this tail call.
if (TailCallToContextNodeMap.count(NewCall)) {
NewNode = TailCallToContextNodeMap[NewCall];
- NewNode->ContextIds.insert(Edge->ContextIds.begin(),
- Edge->ContextIds.end());
NewNode->AllocTypes |= Edge->AllocTypes;
} else {
FuncToCallsWithMetadata[Func].push_back({NewCall});
@@ -1797,7 +1906,6 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::calleesMatch(
NewNode = NodeOwner.back().get();
NodeToCallingFunc[NewNode] = Func;
TailCallToContextNodeMap[NewCall] = NewNode;
- NewNode->ContextIds = Edge->ContextIds;
NewNode->AllocTypes = Edge->AllocTypes;
}
@@ -2091,6 +2199,8 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::print(
OS << "\n";
OS << "\tAllocTypes: " << getAllocTypeString(AllocTypes) << "\n";
OS << "\tContextIds:";
+ // Make a copy of the computed context ids that we can sort for stability.
+ auto ContextIds = getContextIds();
std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end());
std::sort(SortedIds.begin(), SortedIds.end());
for (auto Id : SortedIds)
@@ -2151,53 +2261,6 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::print(
}
template <typename DerivedCCG, typename FuncTy, typename CallTy>
-static void checkEdge(
- const std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>> &Edge) {
- // Confirm that alloc type is not None and that we have at least one context
- // id.
- assert(Edge->AllocTypes != (uint8_t)AllocationType::None);
- assert(!Edge->ContextIds.empty());
-}
-
-template <typename DerivedCCG, typename FuncTy, typename CallTy>
-static void checkNode(const ContextNode<DerivedCCG, FuncTy, CallTy> *Node,
- bool CheckEdges = true) {
- if (Node->isRemoved())
- return;
- // Node's context ids should be the union of both its callee and caller edge
- // context ids.
- if (Node->CallerEdges.size()) {
- auto EI = Node->CallerEdges.begin();
- auto &FirstEdge = *EI;
- EI++;
- DenseSet<uint32_t> CallerEdgeContextIds(FirstEdge->ContextIds);
- for (; EI != Node->CallerEdges.end(); EI++) {
- const auto &Edge = *EI;
- if (CheckEdges)
- checkEdge<DerivedCCG, FuncTy, CallTy>(Edge);
- set_union(CallerEdgeContextIds, Edge->ContextIds);
- }
- // Node can have more context ids than callers if some contexts terminate at
- // node and some are longer.
- assert(Node->ContextIds == CallerEdgeContextIds ||
- set_is_subset(CallerEdgeContextIds, Node->ContextIds));
- }
- if (Node->CalleeEdges.size()) {
- auto EI = Node->CalleeEdges.begin();
- auto &FirstEdge = *EI;
- EI++;
- DenseSet<uint32_t> CalleeEdgeContextIds(FirstEdge->ContextIds);
- for (; EI != Node->CalleeEdges.end(); EI++) {
- const auto &Edge = *EI;
- if (CheckEdges)
- checkEdge<DerivedCCG, FuncTy, CallTy>(Edge);
- set_union(CalleeEdgeContextIds, Edge->ContextIds);
- }
- assert(Node->ContextIds == CalleeEdgeContextIds);
- }
-}
-
-template <typename DerivedCCG, typename FuncTy, typename CallTy>
void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::check() const {
using GraphType = const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
for (const auto Node : nodes<GraphType>(this)) {
@@ -2284,7 +2347,7 @@ struct DOTGraphTraits<const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *>
static std::string getNodeAttributes(NodeRef Node, GraphType) {
std::string AttributeString = (Twine("tooltip=\"") + getNodeId(Node) + " " +
- getContextIds(Node->ContextIds) + "\"")
+ getContextIds(Node->getContextIds()) + "\"")
.str();
AttributeString +=
(Twine(",fillcolor=\"") + getColor(Node->AllocTypes) + "\"").str();
@@ -2443,16 +2506,6 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
set_subtract(Edge->ContextIds, ContextIdsToMove);
Edge->AllocTypes = computeAllocType(Edge->ContextIds);
}
- // Now perform some updates that are common to all cases: the NewCallee gets
- // the moved ids added, and we need to remove those ids from OldCallee and
- // update its alloc type (NewCallee alloc type updates handled above).
- NewCallee->ContextIds.insert(ContextIdsToMove.begin(),
- ContextIdsToMove.end());
- set_subtract(OldCallee->ContextIds, ContextIdsToMove);
- OldCallee->AllocTypes = computeAllocType(OldCallee->ContextIds);
- // OldCallee alloc type should be None iff its context id set is now empty.
- assert((OldCallee->AllocTypes == (uint8_t)AllocationType::None) ==
- OldCallee->ContextIds.empty());
// Now walk the old callee node's callee edges and move Edge's context ids
// over to the corresponding edge into the clone (which is created here if
// this is a newly created clone).
@@ -2484,6 +2537,12 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
NewCallee->CalleeEdges.push_back(NewEdge);
NewEdge->Callee->CallerEdges.push_back(NewEdge);
}
+ // Recompute the node alloc type now that its callee edges have been
+ // updated (since we will compute from those edges).
+ OldCallee->AllocTypes = OldCallee->computeAllocType();
+ // OldCallee alloc type should be None iff its context id set is now empty.
+ assert((OldCallee->AllocTypes == (uint8_t)AllocationType::None) ==
+ OldCallee->emptyContextIds());
if (VerifyCCG) {
checkNode<DerivedCCG, FuncTy, CallTy>(OldCallee, /*CheckEdges=*/false);
checkNode<DerivedCCG, FuncTy, CallTy>(NewCallee, /*CheckEdges=*/false);
@@ -2528,7 +2587,7 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones() {
DenseSet<const ContextNode *> Visited;
for (auto &Entry : AllocationCallToContextNodeMap) {
Visited.clear();
- identifyClones(Entry.second, Visited, Entry.second->ContextIds);
+ identifyClones(Entry.second, Visited, Entry.second->getContextIds());
}
Visited.clear();
for (auto &Entry : AllocationCallToContextNodeMap)
@@ -2714,7 +2773,7 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones(
}
// We should still have some context ids on the original Node.
- assert(!Node->ContextIds.empty());
+ assert(!Node->emptyContextIds());
// Sanity check that no alloc types on node or edges are None.
assert(Node->AllocTypes != (uint8_t)AllocationType::None);
@@ -2918,7 +2977,7 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
// find additional cloning is required.
std::deque<ContextNode *> ClonesWorklist;
// Ignore original Node if we moved all of its contexts to clones.
- if (!Node->ContextIds.empty())
+ if (!Node->emptyContextIds())
ClonesWorklist.push_back(Node);
ClonesWorklist.insert(ClonesWorklist.end(), Node->Clones.begin(),
Node->Clones.end());
@@ -3258,7 +3317,7 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
// Skip if either no call to update, or if we ended up with no context ids
// (we moved all edges onto other clones).
- if (!Node->hasCall() || Node->ContextIds.empty())
+ if (!Node->hasCall() || Node->emptyContextIds())
return;
if (Node->IsAllocation) {
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 89193f8..38c1c26 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -4745,6 +4745,29 @@ static Instruction *foldICmpAndXX(ICmpInst &I, const SimplifyQuery &Q,
Constant::getNullValue(Op1->getType()));
}
+ if (!ICmpInst::isSigned(Pred))
+ return nullptr;
+
+ KnownBits KnownY = IC.computeKnownBits(A, /*Depth=*/0, &I);
+ // (X & NegY) spred X --> (X & NegY) upred X
+ if (KnownY.isNegative())
+ return new ICmpInst(ICmpInst::getUnsignedPredicate(Pred), Op0, Op1);
+
+ if (Pred != ICmpInst::ICMP_SLE && Pred != ICmpInst::ICMP_SGT)
+ return nullptr;
+
+ if (KnownY.isNonNegative())
+ // (X & PosY) s<= X --> X s>= 0
+ // (X & PosY) s> X --> X s< 0
+ return new ICmpInst(ICmpInst::getSwappedPredicate(Pred), Op1,
+ Constant::getNullValue(Op1->getType()));
+
+ if (isKnownNegative(Op1, IC.getSimplifyQuery().getWithInstruction(&I)))
+ // (NegX & Y) s<= NegX --> Y s< 0
+ // (NegX & Y) s> NegX --> Y s>= 0
+ return new ICmpInst(ICmpInst::getFlippedStrictnessPredicate(Pred), A,
+ Constant::getNullValue(A->getType()));
+
return nullptr;
}
@@ -4772,7 +4795,7 @@ static Instruction *foldICmpOrXX(ICmpInst &I, const SimplifyQuery &Q,
if (ICmpInst::isEquality(Pred) && Op0->hasOneUse()) {
// icmp (X | Y) eq/ne Y --> (X & ~Y) eq/ne 0 if Y is freely invertible
if (Value *NotOp1 =
- IC.getFreelyInverted(Op1, Op1->hasOneUse(), &IC.Builder))
+ IC.getFreelyInverted(Op1, !Op1->hasNUsesOrMore(3), &IC.Builder))
return new ICmpInst(Pred, IC.Builder.CreateAnd(A, NotOp1),
Constant::getNullValue(Op1->getType()));
// icmp (X | Y) eq/ne Y --> (~X | Y) eq/ne -1 if X is freely invertible.
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 313beb7..d2aaa5e 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -1294,8 +1294,7 @@ Instruction *InstCombinerImpl::foldSelectValueEquivalence(SelectInst &Sel,
// X == Y ? X : Z with X == Y ? Y : Z, as that would lead to an infinite
// replacement cycle.
Value *CmpLHS = Cmp.getOperand(0), *CmpRHS = Cmp.getOperand(1);
- if (TrueVal != CmpLHS &&
- isGuaranteedNotToBeUndefOrPoison(CmpRHS, SQ.AC, &Sel, &DT)) {
+ if (TrueVal != CmpLHS && isGuaranteedNotToBeUndef(CmpRHS, SQ.AC, &Sel, &DT)) {
if (Value *V = simplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, SQ,
/* AllowRefinement */ true))
// Require either the replacement or the simplification result to be a
@@ -1316,8 +1315,7 @@ Instruction *InstCombinerImpl::foldSelectValueEquivalence(SelectInst &Sel,
if (replaceInInstruction(TrueVal, CmpLHS, CmpRHS))
return &Sel;
}
- if (TrueVal != CmpRHS &&
- isGuaranteedNotToBeUndefOrPoison(CmpLHS, SQ.AC, &Sel, &DT))
+ if (TrueVal != CmpRHS && isGuaranteedNotToBeUndef(CmpLHS, SQ.AC, &Sel, &DT))
if (Value *V = simplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, SQ,
/* AllowRefinement */ true))
if (isa<Constant>(CmpLHS) || isa<Constant>(V))
diff --git a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
index 2aa2175..a0e63bf1 100644
--- a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
@@ -337,13 +337,17 @@ private:
unsigned AccessSizeIndex,
Instruction *InsertBefore, DomTreeUpdater &DTU,
LoopInfo *LI);
- bool ignoreMemIntrinsic(MemIntrinsic *MI);
+ bool ignoreMemIntrinsic(OptimizationRemarkEmitter &ORE, MemIntrinsic *MI);
void instrumentMemIntrinsic(MemIntrinsic *MI);
bool instrumentMemAccess(InterestingMemoryOperand &O, DomTreeUpdater &DTU,
LoopInfo *LI);
- bool ignoreAccess(Instruction *Inst, Value *Ptr);
+ bool ignoreAccessWithoutRemark(Instruction *Inst, Value *Ptr);
+ bool ignoreAccess(OptimizationRemarkEmitter &ORE, Instruction *Inst,
+ Value *Ptr);
+
void getInterestingMemoryOperands(
- Instruction *I, const TargetLibraryInfo &TLI,
+ OptimizationRemarkEmitter &ORE, Instruction *I,
+ const TargetLibraryInfo &TLI,
SmallVectorImpl<InterestingMemoryOperand> &Interesting);
void tagAlloca(IRBuilder<> &IRB, AllocaInst *AI, Value *Tag, size_t Size);
@@ -765,7 +769,8 @@ Value *HWAddressSanitizer::getShadowNonTls(IRBuilder<> &IRB) {
return IRB.CreateLoad(PtrTy, GlobalDynamicAddress);
}
-bool HWAddressSanitizer::ignoreAccess(Instruction *Inst, Value *Ptr) {
+bool HWAddressSanitizer::ignoreAccessWithoutRemark(Instruction *Inst,
+ Value *Ptr) {
// Do not instrument accesses from different address spaces; we cannot deal
// with them.
Type *PtrTy = cast<PointerType>(Ptr->getType()->getScalarType());
@@ -795,8 +800,23 @@ bool HWAddressSanitizer::ignoreAccess(Instruction *Inst, Value *Ptr) {
return false;
}
+bool HWAddressSanitizer::ignoreAccess(OptimizationRemarkEmitter &ORE,
+ Instruction *Inst, Value *Ptr) {
+ bool Ignored = ignoreAccessWithoutRemark(Inst, Ptr);
+ if (Ignored) {
+ ORE.emit(
+ [&]() { return OptimizationRemark(DEBUG_TYPE, "ignoreAccess", Inst); });
+ } else {
+ ORE.emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "ignoreAccess", Inst);
+ });
+ }
+ return Ignored;
+}
+
void HWAddressSanitizer::getInterestingMemoryOperands(
- Instruction *I, const TargetLibraryInfo &TLI,
+ OptimizationRemarkEmitter &ORE, Instruction *I,
+ const TargetLibraryInfo &TLI,
SmallVectorImpl<InterestingMemoryOperand> &Interesting) {
// Skip memory accesses inserted by another instrumentation.
if (I->hasMetadata(LLVMContext::MD_nosanitize))
@@ -807,22 +827,22 @@ void HWAddressSanitizer::getInterestingMemoryOperands(
return;
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
- if (!ClInstrumentReads || ignoreAccess(I, LI->getPointerOperand()))
+ if (!ClInstrumentReads || ignoreAccess(ORE, I, LI->getPointerOperand()))
return;
Interesting.emplace_back(I, LI->getPointerOperandIndex(), false,
LI->getType(), LI->getAlign());
} else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
- if (!ClInstrumentWrites || ignoreAccess(I, SI->getPointerOperand()))
+ if (!ClInstrumentWrites || ignoreAccess(ORE, I, SI->getPointerOperand()))
return;
Interesting.emplace_back(I, SI->getPointerOperandIndex(), true,
SI->getValueOperand()->getType(), SI->getAlign());
} else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) {
- if (!ClInstrumentAtomics || ignoreAccess(I, RMW->getPointerOperand()))
+ if (!ClInstrumentAtomics || ignoreAccess(ORE, I, RMW->getPointerOperand()))
return;
Interesting.emplace_back(I, RMW->getPointerOperandIndex(), true,
RMW->getValOperand()->getType(), std::nullopt);
} else if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(I)) {
- if (!ClInstrumentAtomics || ignoreAccess(I, XCHG->getPointerOperand()))
+ if (!ClInstrumentAtomics || ignoreAccess(ORE, I, XCHG->getPointerOperand()))
return;
Interesting.emplace_back(I, XCHG->getPointerOperandIndex(), true,
XCHG->getCompareOperand()->getType(),
@@ -830,7 +850,7 @@ void HWAddressSanitizer::getInterestingMemoryOperands(
} else if (auto *CI = dyn_cast<CallInst>(I)) {
for (unsigned ArgNo = 0; ArgNo < CI->arg_size(); ArgNo++) {
if (!ClInstrumentByval || !CI->isByValArgument(ArgNo) ||
- ignoreAccess(I, CI->getArgOperand(ArgNo)))
+ ignoreAccess(ORE, I, CI->getArgOperand(ArgNo)))
continue;
Type *Ty = CI->getParamByValType(ArgNo);
Interesting.emplace_back(I, ArgNo, false, Ty, Align(1));
@@ -1035,13 +1055,14 @@ void HWAddressSanitizer::instrumentMemAccessInline(Value *Ptr, bool IsWrite,
->setSuccessor(0, TCI.TagMismatchTerm->getParent());
}
-bool HWAddressSanitizer::ignoreMemIntrinsic(MemIntrinsic *MI) {
+bool HWAddressSanitizer::ignoreMemIntrinsic(OptimizationRemarkEmitter &ORE,
+ MemIntrinsic *MI) {
if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) {
- return (!ClInstrumentWrites || ignoreAccess(MTI, MTI->getDest())) &&
- (!ClInstrumentReads || ignoreAccess(MTI, MTI->getSource()));
+ return (!ClInstrumentWrites || ignoreAccess(ORE, MTI, MTI->getDest())) &&
+ (!ClInstrumentReads || ignoreAccess(ORE, MTI, MTI->getSource()));
}
if (isa<MemSetInst>(MI))
- return !ClInstrumentWrites || ignoreAccess(MI, MI->getDest());
+ return !ClInstrumentWrites || ignoreAccess(ORE, MI, MI->getDest());
return false;
}
@@ -1541,6 +1562,9 @@ void HWAddressSanitizer::sanitizeFunction(Function &F,
NumTotalFuncs++;
+ OptimizationRemarkEmitter &ORE =
+ FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
+
if (selectiveInstrumentationShouldSkip(F, FAM))
return;
@@ -1562,10 +1586,10 @@ void HWAddressSanitizer::sanitizeFunction(Function &F,
if (InstrumentLandingPads && isa<LandingPadInst>(Inst))
LandingPadVec.push_back(&Inst);
- getInterestingMemoryOperands(&Inst, TLI, OperandsToInstrument);
+ getInterestingMemoryOperands(ORE, &Inst, TLI, OperandsToInstrument);
if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(&Inst))
- if (!ignoreMemIntrinsic(MI))
+ if (!ignoreMemIntrinsic(ORE, MI))
IntrinToInstrument.push_back(MI);
}
diff --git a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
index ba2546b..4371b82 100644
--- a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
+++ b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
@@ -827,7 +827,8 @@ private:
return false;
}
- if (Metrics.convergent) {
+ // FIXME: Allow jump threading with controlled convergence.
+ if (Metrics.Convergence != ConvergenceKind::None) {
LLVM_DEBUG(dbgs() << "DFA Jump Threading: Not jump threading, contains "
<< "convergent instructions.\n");
ORE->emit([&]() {
diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
index 7b4c543..f8e2f1f 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
@@ -327,8 +327,7 @@ tryToUnrollAndJamLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
UnrollCostEstimator OuterUCE(L, TTI, EphValues, UP.BEInsns);
if (!InnerUCE.canUnroll() || !OuterUCE.canUnroll()) {
- LLVM_DEBUG(dbgs() << " Not unrolling loop which contains instructions"
- << " which cannot be duplicated or have invalid cost.\n");
+ LLVM_DEBUG(dbgs() << " Loop not considered unrollable\n");
return LoopUnrollResult::Unmodified;
}
@@ -341,7 +340,10 @@ tryToUnrollAndJamLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
LLVM_DEBUG(dbgs() << " Not unrolling loop with inlinable calls.\n");
return LoopUnrollResult::Unmodified;
}
- if (InnerUCE.Convergent || OuterUCE.Convergent) {
+ // FIXME: The call to canUnroll() allows some controlled convergent
+ // operations, but we block them here for future changes.
+ if (InnerUCE.Convergence != ConvergenceKind::None ||
+ OuterUCE.Convergence != ConvergenceKind::None) {
LLVM_DEBUG(
dbgs() << " Not unrolling loop with convergent instructions.\n");
return LoopUnrollResult::Unmodified;
diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 10fc9e9..cbc35b6 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -684,11 +684,15 @@ UnrollCostEstimator::UnrollCostEstimator(
const SmallPtrSetImpl<const Value *> &EphValues, unsigned BEInsns) {
CodeMetrics Metrics;
for (BasicBlock *BB : L->blocks())
- Metrics.analyzeBasicBlock(BB, TTI, EphValues);
+ Metrics.analyzeBasicBlock(BB, TTI, EphValues, /* PrepareForLTO= */ false,
+ L);
NumInlineCandidates = Metrics.NumInlineCandidates;
NotDuplicatable = Metrics.notDuplicatable;
- Convergent = Metrics.convergent;
+ Convergence = Metrics.Convergence;
LoopSize = Metrics.NumInsts;
+ ConvergenceAllowsRuntime =
+ Metrics.Convergence != ConvergenceKind::Uncontrolled &&
+ !getLoopConvergenceHeart(L);
// Don't allow an estimate of size zero. This would allows unrolling of loops
// with huge iteration counts, which is a compile time problem even if it's
@@ -701,6 +705,25 @@ UnrollCostEstimator::UnrollCostEstimator(
LoopSize = BEInsns + 1;
}
+bool UnrollCostEstimator::canUnroll() const {
+ switch (Convergence) {
+ case ConvergenceKind::ExtendedLoop:
+ LLVM_DEBUG(dbgs() << " Convergence prevents unrolling.\n");
+ return false;
+ default:
+ break;
+ }
+ if (!LoopSize.isValid()) {
+ LLVM_DEBUG(dbgs() << " Invalid loop size prevents unrolling.\n");
+ return false;
+ }
+ if (NotDuplicatable) {
+ LLVM_DEBUG(dbgs() << " Non-duplicatable blocks prevent unrolling.\n");
+ return false;
+ }
+ return true;
+}
+
uint64_t UnrollCostEstimator::getUnrolledLoopSize(
const TargetTransformInfo::UnrollingPreferences &UP,
unsigned CountOverwrite) const {
@@ -1206,8 +1229,7 @@ tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE,
UnrollCostEstimator UCE(L, TTI, EphValues, UP.BEInsns);
if (!UCE.canUnroll()) {
- LLVM_DEBUG(dbgs() << " Not unrolling loop which contains instructions"
- << " which cannot be duplicated or have invalid cost.\n");
+ LLVM_DEBUG(dbgs() << " Loop not considered unrollable.\n");
return LoopUnrollResult::Unmodified;
}
@@ -1254,15 +1276,9 @@ tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE,
// is unsafe -- it adds a control-flow dependency to the convergent
// operation. Therefore restrict remainder loop (try unrolling without).
//
- // TODO: This is quite conservative. In practice, convergent_op()
- // is likely to be called unconditionally in the loop. In this
- // case, the program would be ill-formed (on most architectures)
- // unless n were the same on all threads in a thread group.
- // Assuming n is the same on all threads, any kind of unrolling is
- // safe. But currently llvm's notion of convergence isn't powerful
- // enough to express this.
- if (UCE.Convergent)
- UP.AllowRemainder = false;
+ // TODO: This is somewhat conservative; we could allow the remainder if the
+ // trip count is uniform.
+ UP.AllowRemainder &= UCE.ConvergenceAllowsRuntime;
// Try to find the trip count upper bound if we cannot find the exact trip
// count.
@@ -1282,6 +1298,8 @@ tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE,
if (!UP.Count)
return LoopUnrollResult::Unmodified;
+ UP.Runtime &= UCE.ConvergenceAllowsRuntime;
+
if (PP.PeelCount) {
assert(UP.Count == 1 && "Cannot perform peel and unroll in the same step");
LLVM_DEBUG(dbgs() << "PEELING loop %" << L->getHeader()->getName()
@@ -1324,11 +1342,16 @@ tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE,
// Unroll the loop.
Loop *RemainderLoop = nullptr;
+ UnrollLoopOptions ULO;
+ ULO.Count = UP.Count;
+ ULO.Force = UP.Force;
+ ULO.AllowExpensiveTripCount = UP.AllowExpensiveTripCount;
+ ULO.UnrollRemainder = UP.UnrollRemainder;
+ ULO.Runtime = UP.Runtime;
+ ULO.ForgetAllSCEV = ForgetAllSCEV;
+ ULO.Heart = getLoopConvergenceHeart(L);
LoopUnrollResult UnrollResult = UnrollLoop(
- L,
- {UP.Count, UP.Force, UP.Runtime, UP.AllowExpensiveTripCount,
- UP.UnrollRemainder, ForgetAllSCEV},
- LI, &SE, &DT, &AC, &TTI, &ORE, PreserveLCSSA, &RemainderLoop, AA);
+ L, ULO, LI, &SE, &DT, &AC, &TTI, &ORE, PreserveLCSSA, &RemainderLoop, AA);
if (UnrollResult == LoopUnrollResult::Unmodified)
return LoopUnrollResult::Unmodified;
diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp
index eb471b2..cfe6349 100644
--- a/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -1221,7 +1221,6 @@ static void AddAliasScopeMetadata(CallBase &CB, ValueToValueMapTy &VMap,
SmallPtrSet<const Value *, 4> ObjSet;
SmallVector<Metadata *, 4> Scopes, NoAliases;
- SmallSetVector<const Argument *, 4> NAPtrArgs;
for (const Value *V : PtrArgs) {
SmallVector<const Value *, 4> Objects;
getUnderlyingObjects(V, Objects, /* LI = */ nullptr);
diff --git a/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp b/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
index 08ba65d..3d950b1 100644
--- a/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
@@ -460,7 +460,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
L->dump());
return Rotated;
}
- if (Metrics.convergent) {
+ if (Metrics.Convergence != ConvergenceKind::None) {
LLVM_DEBUG(dbgs() << "LoopRotation: NOT rotating - contains convergent "
"instructions: ";
L->dump());
diff --git a/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
index 1216538..90d7b99 100644
--- a/llvm/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
@@ -419,6 +419,26 @@ void llvm::simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI,
}
}
+// Loops containing convergent instructions that are uncontrolled or controlled
+// from outside the loop must have a count that divides their TripMultiple.
+LLVM_ATTRIBUTE_USED
+static bool canHaveUnrollRemainder(const Loop *L) {
+ if (getLoopConvergenceHeart(L))
+ return false;
+
+ // Check for uncontrolled convergent operations.
+ for (auto &BB : L->blocks()) {
+ for (auto &I : *BB) {
+ if (isa<ConvergenceControlInst>(I))
+ return true;
+ if (auto *CB = dyn_cast<CallBase>(&I))
+ if (CB->isConvergent())
+ return CB->getConvergenceControlToken();
+ }
+ }
+ return true;
+}
+
/// Unroll the given loop by Count. The loop must be in LCSSA form. Unrolling
/// can only fail when the loop's latch block is not terminated by a conditional
/// branch instruction. However, if the trip count (and multiple) are not known,
@@ -564,19 +584,8 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
return LoopUnrollResult::Unmodified;
}
- // Loops containing convergent instructions cannot use runtime unrolling,
- // as the prologue/epilogue may add additional control-dependencies to
- // convergent operations.
- LLVM_DEBUG(
- {
- bool HasConvergent = false;
- for (auto &BB : L->blocks())
- for (auto &I : *BB)
- if (auto *CB = dyn_cast<CallBase>(&I))
- HasConvergent |= CB->isConvergent();
- assert((!HasConvergent || !ULO.Runtime) &&
- "Can't runtime unroll if loop contains a convergent operation.");
- });
+ assert((!ULO.Runtime || canHaveUnrollRemainder(L)) &&
+ "Can't runtime unroll if loop contains a convergent operation.");
bool EpilogProfitability =
UnrollRuntimeEpilog.getNumOccurrences() ? UnrollRuntimeEpilog
@@ -722,7 +731,7 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
if (OldLoop)
LoopsToSimplify.insert(NewLoops[OldLoop]);
- if (*BB == Header)
+ if (*BB == Header) {
// Loop over all of the PHI nodes in the block, changing them to use
// the incoming values from the previous block.
for (PHINode *OrigPHI : OrigPHINode) {
@@ -735,6 +744,16 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
NewPHI->eraseFromParent();
}
+ // Eliminate copies of the loop heart intrinsic, if any.
+ if (ULO.Heart) {
+ auto it = VMap.find(ULO.Heart);
+ assert(it != VMap.end());
+ Instruction *heartCopy = cast<Instruction>(it->second);
+ heartCopy->eraseFromParent();
+ VMap.erase(it);
+ }
+ }
+
// Update our running map of newest clones
LastValueMap[*BB] = New;
for (ValueToValueMapTy::iterator VI = VMap.begin(), VE = VMap.end();
diff --git a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
index e1af028..dd7150b 100644
--- a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -1016,12 +1016,17 @@ bool llvm::UnrollRuntimeLoopRemainder(
auto UnrollResult = LoopUnrollResult::Unmodified;
if (remainderLoop && UnrollRemainder) {
LLVM_DEBUG(dbgs() << "Unrolling remainder loop\n");
- UnrollResult =
- UnrollLoop(remainderLoop,
- {/*Count*/ Count - 1, /*Force*/ false, /*Runtime*/ false,
- /*AllowExpensiveTripCount*/ false,
- /*UnrollRemainder*/ false, ForgetAllSCEV},
- LI, SE, DT, AC, TTI, /*ORE*/ nullptr, PreserveLCSSA);
+ UnrollLoopOptions ULO;
+ ULO.Count = Count - 1;
+ ULO.Force = false;
+ ULO.Runtime = false;
+ ULO.AllowExpensiveTripCount = false;
+ ULO.UnrollRemainder = false;
+ ULO.ForgetAllSCEV = ForgetAllSCEV;
+ assert(!getLoopConvergenceHeart(L) &&
+ "A loop with a convergence heart does not allow runtime unrolling.");
+ UnrollResult = UnrollLoop(remainderLoop, ULO, LI, SE, DT, AC, TTI,
+ /*ORE*/ nullptr, PreserveLCSSA);
}
if (ResultLoop && UnrollResult != LoopUnrollResult::FullyUnrolled)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
index 0587468..d6b4acb 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
@@ -274,6 +274,13 @@ m_Mul(const Op0_t &Op0, const Op1_t &Op1) {
return m_Binary<Instruction::Mul, Op0_t, Op1_t>(Op0, Op1);
}
+template <typename Op0_t, typename Op1_t>
+inline AllBinaryRecipe_match<Op0_t, Op1_t, Instruction::Mul,
+ /* Commutative =*/true>
+m_c_Mul(const Op0_t &Op0, const Op1_t &Op1) {
+ return m_Binary<Instruction::Mul, Op0_t, Op1_t, true>(Op0, Op1);
+}
+
/// Match a binary OR operation. Note that while conceptually the operands can
/// be matched commutatively, \p Commutative defaults to false in line with the
/// IR-based pattern matching infrastructure. Use m_c_BinaryOr for a commutative
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index ab3b5cf..8ec67eb 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1037,8 +1037,7 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
return;
}
- if (match(&R, m_CombineOr(m_Mul(m_VPValue(A), m_SpecificInt(1)),
- m_Mul(m_SpecificInt(1), m_VPValue(A)))))
+ if (match(&R, m_c_Mul(m_VPValue(A), m_SpecificInt(1))))
return R.getVPSingleValue()->replaceAllUsesWith(A);
}
diff --git a/llvm/test/Analysis/ScalarEvolution/trip-count-unknown-stride.ll b/llvm/test/Analysis/ScalarEvolution/trip-count-unknown-stride.ll
index eb55e6a..ecf1332 100644
--- a/llvm/test/Analysis/ScalarEvolution/trip-count-unknown-stride.ll
+++ b/llvm/test/Analysis/ScalarEvolution/trip-count-unknown-stride.ll
@@ -267,7 +267,228 @@ for.end: ; preds = %for.body, %entry
ret void
}
+define void @ne_nsw_pos_step(ptr nocapture %A, i32 %n, i32 %s) mustprogress {
+;
+; CHECK-LABEL: 'ne_nsw_pos_step'
+; CHECK-NEXT: Determining loop execution counts for: @ne_nsw_pos_step
+; CHECK-NEXT: Loop %for.body: Unpredictable backedge-taken count.
+; CHECK-NEXT: Loop %for.body: Unpredictable constant max backedge-taken count.
+; CHECK-NEXT: Loop %for.body: Unpredictable symbolic max backedge-taken count.
+;
+entry:
+ %pos_step = icmp sgt i32 %s, 0
+ call void @llvm.assume(i1 %pos_step)
+ %cmp4 = icmp sgt i32 %n, 0
+ br i1 %cmp4, label %for.body, label %for.end
+
+for.body: ; preds = %entry, %for.body
+ %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05
+ %0 = load i32, ptr %arrayidx, align 4
+ %inc = add nsw i32 %0, 1
+ store i32 %inc, ptr %arrayidx, align 4
+ %add = add nsw i32 %i.05, %s
+ %cmp = icmp ne i32 %add, %n
+ br i1 %cmp, label %for.body, label %for.end
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+
+define void @ne_nsw_neg_step(ptr nocapture %A, i32 %n, i32 %s) mustprogress {
+;
+; CHECK-LABEL: 'ne_nsw_neg_step'
+; CHECK-NEXT: Determining loop execution counts for: @ne_nsw_neg_step
+; CHECK-NEXT: Loop %for.body: Unpredictable backedge-taken count.
+; CHECK-NEXT: Loop %for.body: Unpredictable constant max backedge-taken count.
+; CHECK-NEXT: Loop %for.body: Unpredictable symbolic max backedge-taken count.
+;
+entry:
+ %neg_step = icmp slt i32 %s, 0
+ call void @llvm.assume(i1 %neg_step)
+ %cmp4 = icmp sgt i32 %n, 0
+ br i1 %cmp4, label %for.body, label %for.end
+
+for.body: ; preds = %entry, %for.body
+ %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05
+ %0 = load i32, ptr %arrayidx, align 4
+ %inc = add nsw i32 %0, 1
+ store i32 %inc, ptr %arrayidx, align 4
+ %add = add nsw i32 %i.05, %s
+ %cmp = icmp ne i32 %add, %n
+ br i1 %cmp, label %for.body, label %for.end
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+
+define void @ne_nsw_nonneg_step(ptr nocapture %A, i32 %n, i32 %s) mustprogress {
+;
+; CHECK-LABEL: 'ne_nsw_nonneg_step'
+; CHECK-NEXT: Determining loop execution counts for: @ne_nsw_nonneg_step
+; CHECK-NEXT: Loop %for.body: Unpredictable backedge-taken count.
+; CHECK-NEXT: Loop %for.body: Unpredictable constant max backedge-taken count.
+; CHECK-NEXT: Loop %for.body: Unpredictable symbolic max backedge-taken count.
+;
+entry:
+ %nonneg_step = icmp sge i32 %s, 0
+ call void @llvm.assume(i1 %nonneg_step)
+ %cmp4 = icmp sgt i32 %n, 0
+ br i1 %cmp4, label %for.body, label %for.end
+
+for.body: ; preds = %entry, %for.body
+ %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05
+ %0 = load i32, ptr %arrayidx, align 4
+ %inc = add nsw i32 %0, 1
+ store i32 %inc, ptr %arrayidx, align 4
+ %add = add nsw i32 %i.05, %s
+ %cmp = icmp ne i32 %add, %n
+ br i1 %cmp, label %for.body, label %for.end
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+
+define void @ne_nsw_unknown_step(ptr nocapture %A, i32 %n, i32 %s) mustprogress {
+;
+; CHECK-LABEL: 'ne_nsw_unknown_step'
+; CHECK-NEXT: Determining loop execution counts for: @ne_nsw_unknown_step
+; CHECK-NEXT: Loop %for.body: Unpredictable backedge-taken count.
+; CHECK-NEXT: Loop %for.body: Unpredictable constant max backedge-taken count.
+; CHECK-NEXT: Loop %for.body: Unpredictable symbolic max backedge-taken count.
+;
+entry:
+ %cmp4 = icmp sgt i32 %n, 0
+ br i1 %cmp4, label %for.body, label %for.end
+
+for.body: ; preds = %entry, %for.body
+ %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05
+ %0 = load i32, ptr %arrayidx, align 4
+ %inc = add nsw i32 %0, 1
+ store i32 %inc, ptr %arrayidx, align 4
+ %add = add nsw i32 %i.05, %s
+ %cmp = icmp ne i32 %add, %n
+ br i1 %cmp, label %for.body, label %for.end
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+
+define void @ne_nuw_pos_step(ptr nocapture %A, i32 %n, i32 %s) mustprogress {
+;
+; CHECK-LABEL: 'ne_nuw_pos_step'
+; CHECK-NEXT: Determining loop execution counts for: @ne_nuw_pos_step
+; CHECK-NEXT: Loop %for.body: Unpredictable backedge-taken count.
+; CHECK-NEXT: Loop %for.body: Unpredictable constant max backedge-taken count.
+; CHECK-NEXT: Loop %for.body: Unpredictable symbolic max backedge-taken count.
+;
+entry:
+ %pos_step = icmp sgt i32 %s, 0
+ call void @llvm.assume(i1 %pos_step)
+ %cmp4 = icmp sgt i32 %n, 0
+ br i1 %cmp4, label %for.body, label %for.end
+
+for.body: ; preds = %entry, %for.body
+ %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05
+ %0 = load i32, ptr %arrayidx, align 4
+ %inc = add nuw i32 %0, 1
+ store i32 %inc, ptr %arrayidx, align 4
+ %add = add nuw i32 %i.05, %s
+ %cmp = icmp ne i32 %add, %n
+ br i1 %cmp, label %for.body, label %for.end
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+
+define void @ne_nuw_neg_step(ptr nocapture %A, i32 %n, i32 %s) mustprogress {
+;
+; CHECK-LABEL: 'ne_nuw_neg_step'
+; CHECK-NEXT: Determining loop execution counts for: @ne_nuw_neg_step
+; CHECK-NEXT: Loop %for.body: Unpredictable backedge-taken count.
+; CHECK-NEXT: Loop %for.body: Unpredictable constant max backedge-taken count.
+; CHECK-NEXT: Loop %for.body: Unpredictable symbolic max backedge-taken count.
+;
+entry:
+ %neg_step = icmp slt i32 %s, 0
+ call void @llvm.assume(i1 %neg_step)
+ %cmp4 = icmp sgt i32 %n, 0
+ br i1 %cmp4, label %for.body, label %for.end
+
+for.body: ; preds = %entry, %for.body
+ %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05
+ %0 = load i32, ptr %arrayidx, align 4
+ %inc = add nuw i32 %0, 1
+ store i32 %inc, ptr %arrayidx, align 4
+ %add = add nuw i32 %i.05, %s
+ %cmp = icmp ne i32 %add, %n
+ br i1 %cmp, label %for.body, label %for.end
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+
+define void @ne_nuw_nonneg_step(ptr nocapture %A, i32 %n, i32 %s) mustprogress {
+;
+; CHECK-LABEL: 'ne_nuw_nonneg_step'
+; CHECK-NEXT: Determining loop execution counts for: @ne_nuw_nonneg_step
+; CHECK-NEXT: Loop %for.body: Unpredictable backedge-taken count.
+; CHECK-NEXT: Loop %for.body: Unpredictable constant max backedge-taken count.
+; CHECK-NEXT: Loop %for.body: Unpredictable symbolic max backedge-taken count.
+;
+entry:
+ %nonneg_step = icmp sge i32 %s, 0
+ call void @llvm.assume(i1 %nonneg_step)
+ %cmp4 = icmp sgt i32 %n, 0
+ br i1 %cmp4, label %for.body, label %for.end
+
+for.body: ; preds = %entry, %for.body
+ %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05
+ %0 = load i32, ptr %arrayidx, align 4
+ %inc = add nuw i32 %0, 1
+ store i32 %inc, ptr %arrayidx, align 4
+ %add = add nuw i32 %i.05, %s
+ %cmp = icmp ne i32 %add, %n
+ br i1 %cmp, label %for.body, label %for.end
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+
+define void @ne_nuw_unknown_step(ptr nocapture %A, i32 %n, i32 %s) mustprogress {
+;
+; CHECK-LABEL: 'ne_nuw_unknown_step'
+; CHECK-NEXT: Determining loop execution counts for: @ne_nuw_unknown_step
+; CHECK-NEXT: Loop %for.body: Unpredictable backedge-taken count.
+; CHECK-NEXT: Loop %for.body: Unpredictable constant max backedge-taken count.
+; CHECK-NEXT: Loop %for.body: Unpredictable symbolic max backedge-taken count.
+;
+entry:
+ %cmp4 = icmp sgt i32 %n, 0
+ br i1 %cmp4, label %for.body, label %for.end
+
+for.body: ; preds = %entry, %for.body
+ %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05
+ %0 = load i32, ptr %arrayidx, align 4
+ %inc = add nuw i32 %0, 1
+ store i32 %inc, ptr %arrayidx, align 4
+ %add = add nuw i32 %i.05, %s
+ %cmp = icmp ne i32 %add, %n
+ br i1 %cmp, label %for.body, label %for.end
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+
+declare void @llvm.assume(i1)
!8 = distinct !{!8, !9}
!9 = !{!"llvm.loop.mustprogress"}
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-vscale.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-vscale.mir
new file mode 100644
index 0000000..9b7a449
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-vscale.mir
@@ -0,0 +1,113 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -o - -mtriple=aarch64 -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s | FileCheck %s
+
+...
+---
+name: sum_of_vscale
+body: |
+ bb.1:
+ liveins: $x0, $x1
+ ; CHECK-LABEL: name: sum_of_vscale
+ ; CHECK: liveins: $x0, $x1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %sum:_(s64) = G_VSCALE i64 20
+ ; CHECK-NEXT: $x0 = COPY %sum(s64)
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
+ %rhs:_(s64) = G_VSCALE i64 11
+ %lhs:_(s64) = G_VSCALE i64 9
+ %sum:_(s64) = nsw G_ADD %lhs(s64), %rhs(s64)
+ $x0 = COPY %sum(s64)
+ RET_ReallyLR implicit $x0
+...
+---
+name: sum_of_vscale_multi_use
+body: |
+ bb.1:
+ liveins: $x0, $x1
+ ; CHECK-LABEL: name: sum_of_vscale_multi_use
+ ; CHECK: liveins: $x0, $x1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %rhs:_(s64) = G_VSCALE i64 11
+ ; CHECK-NEXT: %lhs:_(s64) = G_VSCALE i64 9
+ ; CHECK-NEXT: %sum:_(s64) = nsw G_ADD %lhs, %rhs
+ ; CHECK-NEXT: $x0 = COPY %sum(s64)
+ ; CHECK-NEXT: $x1 = COPY %rhs(s64)
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
+ %rhs:_(s64) = G_VSCALE i64 11
+ %lhs:_(s64) = G_VSCALE i64 9
+ %sum:_(s64) = nsw G_ADD %lhs(s64), %rhs(s64)
+ $x0 = COPY %sum(s64)
+ $x1 = COPY %rhs(s64)
+ RET_ReallyLR implicit $x0
+...
+---
+name: mul_of_vscale
+body: |
+ bb.1:
+ liveins: $x0, $x1
+ ; CHECK-LABEL: name: mul_of_vscale
+ ; CHECK: liveins: $x0, $x1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %mul:_(s64) = G_VSCALE i64 99
+ ; CHECK-NEXT: $x0 = COPY %mul(s64)
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
+ %rhs:_(s64) = G_CONSTANT i64 11
+ %lhs:_(s64) = G_VSCALE i64 9
+ %mul:_(s64) = nsw G_MUL %lhs(s64), %rhs(s64)
+ $x0 = COPY %mul(s64)
+ RET_ReallyLR implicit $x0
+...
+---
+name: sub_of_vscale
+body: |
+ bb.1:
+ liveins: $x0, $x1
+ ; CHECK-LABEL: name: sub_of_vscale
+ ; CHECK: liveins: $x0, $x1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %x:_(s64) = COPY $x0
+ ; CHECK-NEXT: [[VSCALE:%[0-9]+]]:_(s64) = G_VSCALE i64 -9
+ ; CHECK-NEXT: %sub:_(s64) = nsw G_ADD %x, [[VSCALE]]
+ ; CHECK-NEXT: $x0 = COPY %sub(s64)
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
+ %x:_(s64) = COPY $x0
+ %rhs:_(s64) = G_VSCALE i64 9
+ %sub:_(s64) = nsw G_SUB %x(s64), %rhs(s64)
+ $x0 = COPY %sub(s64)
+ RET_ReallyLR implicit $x0
+...
+---
+name: shl_of_vscale
+body: |
+ bb.1:
+ liveins: $x0, $x1
+ ; CHECK-LABEL: name: shl_of_vscale
+ ; CHECK: liveins: $x0, $x1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %shl:_(s64) = G_VSCALE i64 44
+ ; CHECK-NEXT: $x0 = COPY %shl(s64)
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
+ %rhs:_(s64) = G_CONSTANT i64 2
+ %lhs:_(s64) = G_VSCALE i64 11
+ %shl:_(s64) = nsw G_SHL %lhs(s64), %rhs(s64)
+ $x0 = COPY %shl(s64)
+ RET_ReallyLR implicit $x0
+...
+---
+name: shl_of_vscale_wrong_flag
+body: |
+ bb.1:
+ liveins: $x0, $x1
+ ; CHECK-LABEL: name: shl_of_vscale_wrong_flag
+ ; CHECK: liveins: $x0, $x1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %rhs:_(s64) = G_CONSTANT i64 2
+ ; CHECK-NEXT: %lhs:_(s64) = G_VSCALE i64 11
+ ; CHECK-NEXT: %shl:_(s64) = nuw G_SHL %lhs, %rhs(s64)
+ ; CHECK-NEXT: $x0 = COPY %shl(s64)
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
+ %rhs:_(s64) = G_CONSTANT i64 2
+ %lhs:_(s64) = G_VSCALE i64 11
+ %shl:_(s64) = nuw G_SHL %lhs(s64), %rhs(s64)
+ $x0 = COPY %shl(s64)
+ RET_ReallyLR implicit $x0
diff --git a/llvm/test/CodeGen/AArch64/arm64-2011-10-18-LdStOptBug.ll b/llvm/test/CodeGen/AArch64/arm64-2011-10-18-LdStOptBug.ll
index 3b6c4fa..dafdcf8 100644
--- a/llvm/test/CodeGen/AArch64/arm64-2011-10-18-LdStOptBug.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-2011-10-18-LdStOptBug.ll
@@ -12,7 +12,7 @@ entry:
for.body:
; CHECK: for.body
-; CHECK: ldr w{{[0-9]+}}, [x{{[0-9]+}}, x{{[0-9]+}}]
+; CHECK: ldr w{{[0-9]+}}, [x{{[0-9]+}}]
; CHECK: add x[[REG:[0-9]+]],
; CHECK: x[[REG]], #1, lsl #12
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
diff --git a/llvm/test/CodeGen/AArch64/arm64-ldp-cluster.ll b/llvm/test/CodeGen/AArch64/arm64-ldp-cluster.ll
index 8c7b31f..114203e 100644
--- a/llvm/test/CodeGen/AArch64/arm64-ldp-cluster.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-ldp-cluster.ll
@@ -176,13 +176,13 @@ exit:
; CHECK: ********** MI Scheduling **********
; CHECK: LDURDi_LDRDui:%bb.1 vector_body
;
-; CHECK: Cluster ld/st SU(2) - SU(6)
-; CHECK: Cluster ld/st SU(3) - SU(7)
+; CHECK: Cluster ld/st SU(0) - SU(4)
+; CHECK: Cluster ld/st SU(1) - SU(5)
;
-; CHECK: SU(2): %{{[0-9]+}}:fpr64 = LDURDi
-; CHECK: SU(3): %{{[0-9]+}}:fpr64 = LDURDi
-; CHECK: SU(6): %{{[0-9]+}}:fpr64 = LDRDui
-; CHECK: SU(7): %{{[0-9]+}}:fpr64 = LDRDui
+; CHECK: SU(0): %{{[0-9]+}}:fpr64 = LDURDi
+; CHECK: SU(1): %{{[0-9]+}}:fpr64 = LDURDi
+; CHECK: SU(4): %{{[0-9]+}}:fpr64 = LDRDui
+; CHECK: SU(5): %{{[0-9]+}}:fpr64 = LDRDui
;
define void @LDURDi_LDRDui(ptr nocapture readonly %arg) {
entry:
diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-predicated-scalable.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-predicated-scalable.ll
index ac2b21a..2ef3528 100644
--- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-predicated-scalable.ll
+++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-predicated-scalable.ll
@@ -15,36 +15,34 @@ define %"class.std::complex" @complex_mul_v2f64(ptr %a, ptr %b) {
; CHECK-LABEL: complex_mul_v2f64:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov z1.d, #0 // =0x0
-; CHECK-NEXT: mov w9, #100 // =0x64
-; CHECK-NEXT: cntd x10
-; CHECK-NEXT: whilelo p1.d, xzr, x9
-; CHECK-NEXT: mov x8, xzr
-; CHECK-NEXT: rdvl x11, #2
+; CHECK-NEXT: mov w8, #100 // =0x64
+; CHECK-NEXT: cntd x9
+; CHECK-NEXT: whilelo p1.d, xzr, x8
+; CHECK-NEXT: rdvl x10, #2
+; CHECK-NEXT: mov x11, x9
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: mov x12, x10
; CHECK-NEXT: zip2 z0.d, z1.d, z1.d
; CHECK-NEXT: zip1 z1.d, z1.d, z1.d
; CHECK-NEXT: .LBB0_1: // %vector.body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: zip2 p2.d, p1.d, p1.d
-; CHECK-NEXT: add x13, x0, x8
-; CHECK-NEXT: add x14, x1, x8
-; CHECK-NEXT: zip1 p1.d, p1.d, p1.d
; CHECK-NEXT: mov z6.d, z1.d
; CHECK-NEXT: mov z7.d, z0.d
-; CHECK-NEXT: ld1d { z2.d }, p2/z, [x13, #1, mul vl]
-; CHECK-NEXT: ld1d { z4.d }, p2/z, [x14, #1, mul vl]
-; CHECK-NEXT: add x8, x8, x11
-; CHECK-NEXT: ld1d { z3.d }, p1/z, [x13]
-; CHECK-NEXT: ld1d { z5.d }, p1/z, [x14]
+; CHECK-NEXT: zip1 p1.d, p1.d, p1.d
+; CHECK-NEXT: ld1d { z2.d }, p2/z, [x0, #1, mul vl]
+; CHECK-NEXT: ld1d { z4.d }, p2/z, [x1, #1, mul vl]
+; CHECK-NEXT: ld1d { z3.d }, p1/z, [x0]
+; CHECK-NEXT: ld1d { z5.d }, p1/z, [x1]
+; CHECK-NEXT: add x1, x1, x10
+; CHECK-NEXT: add x0, x0, x10
; CHECK-NEXT: fcmla z7.d, p0/m, z4.d, z2.d, #0
; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #0
; CHECK-NEXT: fcmla z7.d, p0/m, z4.d, z2.d, #90
; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #90
; CHECK-NEXT: mov z0.d, p2/m, z7.d
; CHECK-NEXT: mov z1.d, p1/m, z6.d
-; CHECK-NEXT: whilelo p1.d, x12, x9
-; CHECK-NEXT: add x12, x12, x10
+; CHECK-NEXT: whilelo p1.d, x11, x8
+; CHECK-NEXT: add x11, x11, x9
; CHECK-NEXT: b.mi .LBB0_1
; CHECK-NEXT: // %bb.2: // %exit.block
; CHECK-NEXT: uzp1 z2.d, z1.d, z0.d
@@ -114,39 +112,37 @@ define %"class.std::complex" @complex_mul_predicated_v2f64(ptr %a, ptr %b, ptr %
; CHECK-LABEL: complex_mul_predicated_v2f64:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov z1.d, #0 // =0x0
-; CHECK-NEXT: cntd x10
-; CHECK-NEXT: mov w12, #100 // =0x64
-; CHECK-NEXT: neg x11, x10
+; CHECK-NEXT: cntd x9
+; CHECK-NEXT: mov w11, #100 // =0x64
+; CHECK-NEXT: neg x10, x9
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov x8, xzr
-; CHECK-NEXT: mov x9, xzr
-; CHECK-NEXT: and x11, x11, x12
-; CHECK-NEXT: rdvl x12, #2
+; CHECK-NEXT: and x10, x10, x11
+; CHECK-NEXT: rdvl x11, #2
; CHECK-NEXT: zip2 z0.d, z1.d, z1.d
; CHECK-NEXT: zip1 z1.d, z1.d, z1.d
; CHECK-NEXT: .LBB1_1: // %vector.body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ld1w { z2.d }, p0/z, [x2, x9, lsl #2]
-; CHECK-NEXT: add x13, x0, x8
-; CHECK-NEXT: add x14, x1, x8
+; CHECK-NEXT: ld1w { z2.d }, p0/z, [x2, x8, lsl #2]
; CHECK-NEXT: mov z6.d, z1.d
; CHECK-NEXT: mov z7.d, z0.d
-; CHECK-NEXT: add x9, x9, x10
-; CHECK-NEXT: add x8, x8, x12
-; CHECK-NEXT: cmpne p2.d, p0/z, z2.d, #0
-; CHECK-NEXT: cmp x11, x9
-; CHECK-NEXT: zip2 p1.d, p2.d, p2.d
-; CHECK-NEXT: zip1 p2.d, p2.d, p2.d
-; CHECK-NEXT: ld1d { z2.d }, p1/z, [x13, #1, mul vl]
-; CHECK-NEXT: ld1d { z4.d }, p1/z, [x14, #1, mul vl]
-; CHECK-NEXT: ld1d { z3.d }, p2/z, [x13]
-; CHECK-NEXT: ld1d { z5.d }, p2/z, [x14]
+; CHECK-NEXT: add x8, x8, x9
+; CHECK-NEXT: cmpne p1.d, p0/z, z2.d, #0
+; CHECK-NEXT: cmp x10, x8
+; CHECK-NEXT: zip2 p2.d, p1.d, p1.d
+; CHECK-NEXT: zip1 p1.d, p1.d, p1.d
+; CHECK-NEXT: ld1d { z2.d }, p2/z, [x0, #1, mul vl]
+; CHECK-NEXT: ld1d { z4.d }, p2/z, [x1, #1, mul vl]
+; CHECK-NEXT: ld1d { z3.d }, p1/z, [x0]
+; CHECK-NEXT: ld1d { z5.d }, p1/z, [x1]
+; CHECK-NEXT: add x1, x1, x11
+; CHECK-NEXT: add x0, x0, x11
; CHECK-NEXT: fcmla z7.d, p0/m, z4.d, z2.d, #0
; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #0
; CHECK-NEXT: fcmla z7.d, p0/m, z4.d, z2.d, #90
; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #90
-; CHECK-NEXT: mov z0.d, p1/m, z7.d
-; CHECK-NEXT: mov z1.d, p2/m, z6.d
+; CHECK-NEXT: mov z0.d, p2/m, z7.d
+; CHECK-NEXT: mov z1.d, p1/m, z6.d
; CHECK-NEXT: b.ne .LBB1_1
; CHECK-NEXT: // %bb.2: // %exit.block
; CHECK-NEXT: uzp1 z2.d, z1.d, z0.d
@@ -218,38 +214,38 @@ define %"class.std::complex" @complex_mul_predicated_x2_v2f64(ptr %a, ptr %b, pt
; CHECK-LABEL: complex_mul_predicated_x2_v2f64:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov z1.d, #0 // =0x0
-; CHECK-NEXT: mov w10, #100 // =0x64
+; CHECK-NEXT: mov w8, #100 // =0x64
+; CHECK-NEXT: cntd x9
+; CHECK-NEXT: whilelo p1.d, xzr, x8
+; CHECK-NEXT: rdvl x10, #2
+; CHECK-NEXT: cnth x11
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: whilelo p1.d, xzr, x10
-; CHECK-NEXT: mov x8, xzr
-; CHECK-NEXT: mov x9, xzr
-; CHECK-NEXT: cntd x11
-; CHECK-NEXT: rdvl x12, #2
+; CHECK-NEXT: mov x12, x9
; CHECK-NEXT: zip2 z0.d, z1.d, z1.d
; CHECK-NEXT: zip1 z1.d, z1.d, z1.d
; CHECK-NEXT: .LBB2_1: // %vector.body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ld1w { z2.d }, p1/z, [x2, x9, lsl #2]
-; CHECK-NEXT: add x13, x0, x8
-; CHECK-NEXT: add x14, x1, x8
+; CHECK-NEXT: ld1w { z2.d }, p1/z, [x2]
; CHECK-NEXT: mov z6.d, z1.d
; CHECK-NEXT: mov z7.d, z0.d
-; CHECK-NEXT: add x9, x9, x11
-; CHECK-NEXT: add x8, x8, x12
-; CHECK-NEXT: cmpne p2.d, p1/z, z2.d, #0
-; CHECK-NEXT: zip2 p1.d, p2.d, p2.d
-; CHECK-NEXT: zip1 p2.d, p2.d, p2.d
-; CHECK-NEXT: ld1d { z2.d }, p1/z, [x13, #1, mul vl]
-; CHECK-NEXT: ld1d { z4.d }, p1/z, [x14, #1, mul vl]
-; CHECK-NEXT: ld1d { z3.d }, p2/z, [x13]
-; CHECK-NEXT: ld1d { z5.d }, p2/z, [x14]
+; CHECK-NEXT: add x2, x2, x11
+; CHECK-NEXT: cmpne p1.d, p1/z, z2.d, #0
+; CHECK-NEXT: zip2 p2.d, p1.d, p1.d
+; CHECK-NEXT: zip1 p1.d, p1.d, p1.d
+; CHECK-NEXT: ld1d { z2.d }, p2/z, [x0, #1, mul vl]
+; CHECK-NEXT: ld1d { z4.d }, p2/z, [x1, #1, mul vl]
+; CHECK-NEXT: ld1d { z3.d }, p1/z, [x0]
+; CHECK-NEXT: ld1d { z5.d }, p1/z, [x1]
+; CHECK-NEXT: add x1, x1, x10
+; CHECK-NEXT: add x0, x0, x10
; CHECK-NEXT: fcmla z7.d, p0/m, z4.d, z2.d, #0
; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #0
; CHECK-NEXT: fcmla z7.d, p0/m, z4.d, z2.d, #90
; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #90
-; CHECK-NEXT: mov z0.d, p1/m, z7.d
-; CHECK-NEXT: whilelo p1.d, x9, x10
-; CHECK-NEXT: mov z1.d, p2/m, z6.d
+; CHECK-NEXT: mov z0.d, p2/m, z7.d
+; CHECK-NEXT: mov z1.d, p1/m, z6.d
+; CHECK-NEXT: whilelo p1.d, x12, x8
+; CHECK-NEXT: add x12, x12, x9
; CHECK-NEXT: b.mi .LBB2_1
; CHECK-NEXT: // %bb.2: // %exit.block
; CHECK-NEXT: uzp1 z2.d, z1.d, z0.d
diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-scalable.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-scalable.ll
index af07519..8e26ef6 100644
--- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-scalable.ll
+++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-scalable.ll
@@ -15,30 +15,27 @@ define %"class.std::complex" @complex_mul_v2f64(ptr %a, ptr %b) {
; CHECK-LABEL: complex_mul_v2f64:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov z1.d, #0 // =0x0
-; CHECK-NEXT: cntd x9
-; CHECK-NEXT: ptrue p1.b
-; CHECK-NEXT: neg x10, x9
-; CHECK-NEXT: mov w11, #100 // =0x64
+; CHECK-NEXT: cntd x8
+; CHECK-NEXT: mov w10, #100 // =0x64
+; CHECK-NEXT: neg x9, x8
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: mov x8, xzr
-; CHECK-NEXT: and x10, x10, x11
-; CHECK-NEXT: rdvl x11, #2
+; CHECK-NEXT: and x9, x9, x10
+; CHECK-NEXT: rdvl x10, #2
; CHECK-NEXT: zip2 z0.d, z1.d, z1.d
; CHECK-NEXT: zip1 z1.d, z1.d, z1.d
; CHECK-NEXT: .LBB0_1: // %vector.body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: add x12, x0, x8
-; CHECK-NEXT: add x13, x1, x8
-; CHECK-NEXT: ld1b { z2.b }, p1/z, [x0, x8]
-; CHECK-NEXT: ld1d { z3.d }, p0/z, [x12, #1, mul vl]
-; CHECK-NEXT: ld1b { z4.b }, p1/z, [x1, x8]
-; CHECK-NEXT: ld1d { z5.d }, p0/z, [x13, #1, mul vl]
-; CHECK-NEXT: subs x10, x10, x9
-; CHECK-NEXT: add x8, x8, x11
-; CHECK-NEXT: fcmla z1.d, p0/m, z4.d, z2.d, #0
-; CHECK-NEXT: fcmla z0.d, p0/m, z5.d, z3.d, #0
-; CHECK-NEXT: fcmla z1.d, p0/m, z4.d, z2.d, #90
-; CHECK-NEXT: fcmla z0.d, p0/m, z5.d, z3.d, #90
+; CHECK-NEXT: ld1d { z2.d }, p0/z, [x0, #1, mul vl]
+; CHECK-NEXT: ld1d { z3.d }, p0/z, [x0]
+; CHECK-NEXT: subs x9, x9, x8
+; CHECK-NEXT: ld1d { z4.d }, p0/z, [x1, #1, mul vl]
+; CHECK-NEXT: ld1d { z5.d }, p0/z, [x1]
+; CHECK-NEXT: add x1, x1, x10
+; CHECK-NEXT: add x0, x0, x10
+; CHECK-NEXT: fcmla z1.d, p0/m, z5.d, z3.d, #0
+; CHECK-NEXT: fcmla z0.d, p0/m, z4.d, z2.d, #0
+; CHECK-NEXT: fcmla z1.d, p0/m, z5.d, z3.d, #90
+; CHECK-NEXT: fcmla z0.d, p0/m, z4.d, z2.d, #90
; CHECK-NEXT: b.ne .LBB0_1
; CHECK-NEXT: // %bb.2: // %exit.block
; CHECK-NEXT: uzp1 z2.d, z1.d, z0.d
@@ -103,34 +100,31 @@ define %"class.std::complex" @complex_mul_nonzero_init_v2f64(ptr %a, ptr %b) {
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fmov d0, #1.00000000
; CHECK-NEXT: mov z1.d, #0 // =0x0
-; CHECK-NEXT: cntd x9
+; CHECK-NEXT: cntd x8
; CHECK-NEXT: fmov d2, #2.00000000
; CHECK-NEXT: ptrue p0.d, vl1
-; CHECK-NEXT: neg x10, x9
-; CHECK-NEXT: ptrue p1.b
-; CHECK-NEXT: mov w11, #100 // =0x64
-; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: neg x9, x8
+; CHECK-NEXT: mov w10, #100 // =0x64
; CHECK-NEXT: sel z3.d, p0, z0.d, z1.d
-; CHECK-NEXT: and x10, x10, x11
-; CHECK-NEXT: rdvl x11, #2
+; CHECK-NEXT: and x9, x9, x10
+; CHECK-NEXT: rdvl x10, #2
; CHECK-NEXT: mov z1.d, p0/m, z2.d
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: zip2 z0.d, z1.d, z3.d
; CHECK-NEXT: zip1 z1.d, z1.d, z3.d
; CHECK-NEXT: .LBB1_1: // %vector.body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: add x12, x0, x8
-; CHECK-NEXT: add x13, x1, x8
-; CHECK-NEXT: ld1b { z2.b }, p1/z, [x0, x8]
-; CHECK-NEXT: ld1d { z3.d }, p0/z, [x12, #1, mul vl]
-; CHECK-NEXT: ld1b { z4.b }, p1/z, [x1, x8]
-; CHECK-NEXT: ld1d { z5.d }, p0/z, [x13, #1, mul vl]
-; CHECK-NEXT: subs x10, x10, x9
-; CHECK-NEXT: add x8, x8, x11
-; CHECK-NEXT: fcmla z1.d, p0/m, z4.d, z2.d, #0
-; CHECK-NEXT: fcmla z0.d, p0/m, z5.d, z3.d, #0
-; CHECK-NEXT: fcmla z1.d, p0/m, z4.d, z2.d, #90
-; CHECK-NEXT: fcmla z0.d, p0/m, z5.d, z3.d, #90
+; CHECK-NEXT: ld1d { z2.d }, p0/z, [x0, #1, mul vl]
+; CHECK-NEXT: ld1d { z3.d }, p0/z, [x0]
+; CHECK-NEXT: subs x9, x9, x8
+; CHECK-NEXT: ld1d { z4.d }, p0/z, [x1, #1, mul vl]
+; CHECK-NEXT: ld1d { z5.d }, p0/z, [x1]
+; CHECK-NEXT: add x1, x1, x10
+; CHECK-NEXT: add x0, x0, x10
+; CHECK-NEXT: fcmla z1.d, p0/m, z5.d, z3.d, #0
+; CHECK-NEXT: fcmla z0.d, p0/m, z4.d, z2.d, #0
+; CHECK-NEXT: fcmla z1.d, p0/m, z5.d, z3.d, #90
+; CHECK-NEXT: fcmla z0.d, p0/m, z4.d, z2.d, #90
; CHECK-NEXT: b.ne .LBB1_1
; CHECK-NEXT: // %bb.2: // %exit.block
; CHECK-NEXT: uzp1 z2.d, z1.d, z0.d
@@ -190,45 +184,37 @@ define %"class.std::complex" @complex_mul_v2f64_unrolled(ptr %a, ptr %b) {
; CHECK-LABEL: complex_mul_v2f64_unrolled:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov z1.d, #0 // =0x0
-; CHECK-NEXT: cntw x9
-; CHECK-NEXT: mov w11, #1000 // =0x3e8
-; CHECK-NEXT: neg x10, x9
-; CHECK-NEXT: rdvl x12, #2
-; CHECK-NEXT: ptrue p1.b
+; CHECK-NEXT: cntw x8
+; CHECK-NEXT: mov w10, #1000 // =0x3e8
+; CHECK-NEXT: neg x9, x8
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: mov x8, xzr
-; CHECK-NEXT: and x10, x10, x11
+; CHECK-NEXT: and x9, x9, x10
+; CHECK-NEXT: rdvl x10, #4
; CHECK-NEXT: zip2 z0.d, z1.d, z1.d
; CHECK-NEXT: zip1 z1.d, z1.d, z1.d
-; CHECK-NEXT: add x11, x1, x12
-; CHECK-NEXT: add x12, x0, x12
-; CHECK-NEXT: rdvl x13, #4
; CHECK-NEXT: mov z2.d, z1.d
; CHECK-NEXT: mov z3.d, z0.d
; CHECK-NEXT: .LBB2_1: // %vector.body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: add x14, x0, x8
-; CHECK-NEXT: add x15, x12, x8
-; CHECK-NEXT: add x16, x1, x8
-; CHECK-NEXT: add x17, x11, x8
-; CHECK-NEXT: ld1b { z4.b }, p1/z, [x0, x8]
-; CHECK-NEXT: ld1d { z5.d }, p0/z, [x14, #1, mul vl]
-; CHECK-NEXT: ld1b { z6.b }, p1/z, [x12, x8]
-; CHECK-NEXT: ld1b { z7.b }, p1/z, [x1, x8]
-; CHECK-NEXT: ld1d { z16.d }, p0/z, [x16, #1, mul vl]
-; CHECK-NEXT: ld1d { z17.d }, p0/z, [x15, #1, mul vl]
-; CHECK-NEXT: ld1b { z18.b }, p1/z, [x11, x8]
-; CHECK-NEXT: ld1d { z19.d }, p0/z, [x17, #1, mul vl]
-; CHECK-NEXT: subs x10, x10, x9
-; CHECK-NEXT: add x8, x8, x13
-; CHECK-NEXT: fcmla z1.d, p0/m, z7.d, z4.d, #0
-; CHECK-NEXT: fcmla z0.d, p0/m, z16.d, z5.d, #0
-; CHECK-NEXT: fcmla z2.d, p0/m, z18.d, z6.d, #0
-; CHECK-NEXT: fcmla z3.d, p0/m, z19.d, z17.d, #0
-; CHECK-NEXT: fcmla z1.d, p0/m, z7.d, z4.d, #90
-; CHECK-NEXT: fcmla z0.d, p0/m, z16.d, z5.d, #90
-; CHECK-NEXT: fcmla z2.d, p0/m, z18.d, z6.d, #90
-; CHECK-NEXT: fcmla z3.d, p0/m, z19.d, z17.d, #90
+; CHECK-NEXT: ld1d { z4.d }, p0/z, [x0, #1, mul vl]
+; CHECK-NEXT: ld1d { z5.d }, p0/z, [x0]
+; CHECK-NEXT: subs x9, x9, x8
+; CHECK-NEXT: ld1d { z6.d }, p0/z, [x0, #3, mul vl]
+; CHECK-NEXT: ld1d { z7.d }, p0/z, [x1, #1, mul vl]
+; CHECK-NEXT: ld1d { z16.d }, p0/z, [x1]
+; CHECK-NEXT: ld1d { z17.d }, p0/z, [x0, #2, mul vl]
+; CHECK-NEXT: add x0, x0, x10
+; CHECK-NEXT: ld1d { z18.d }, p0/z, [x1, #3, mul vl]
+; CHECK-NEXT: ld1d { z19.d }, p0/z, [x1, #2, mul vl]
+; CHECK-NEXT: add x1, x1, x10
+; CHECK-NEXT: fcmla z1.d, p0/m, z16.d, z5.d, #0
+; CHECK-NEXT: fcmla z0.d, p0/m, z7.d, z4.d, #0
+; CHECK-NEXT: fcmla z3.d, p0/m, z18.d, z6.d, #0
+; CHECK-NEXT: fcmla z2.d, p0/m, z19.d, z17.d, #0
+; CHECK-NEXT: fcmla z1.d, p0/m, z16.d, z5.d, #90
+; CHECK-NEXT: fcmla z0.d, p0/m, z7.d, z4.d, #90
+; CHECK-NEXT: fcmla z3.d, p0/m, z18.d, z6.d, #90
+; CHECK-NEXT: fcmla z2.d, p0/m, z19.d, z17.d, #90
; CHECK-NEXT: b.ne .LBB2_1
; CHECK-NEXT: // %bb.2: // %exit.block
; CHECK-NEXT: uzp1 z4.d, z2.d, z3.d
diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions.ll
index 44d0a93..aed3072 100644
--- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions.ll
+++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions.ll
@@ -148,17 +148,16 @@ define %"struct.std::complex" @complex_mul_v2f64_unrolled(ptr %a, ptr %b) {
; CHECK-NEXT: adrp x8, .LCPI2_0
; CHECK-NEXT: movi v3.2d, #0000000000000000
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI2_0]
-; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: add x8, x0, #32
+; CHECK-NEXT: add x9, x1, #32
+; CHECK-NEXT: mov x10, #-100 // =0xffffffffffffff9c
; CHECK-NEXT: .LBB2_1: // %vector.body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: add x9, x0, x8
-; CHECK-NEXT: add x10, x1, x8
-; CHECK-NEXT: add x8, x8, #64
-; CHECK-NEXT: ldp q5, q4, [x9]
-; CHECK-NEXT: cmp x8, #1600
-; CHECK-NEXT: ldp q7, q6, [x10]
-; CHECK-NEXT: ldp q17, q16, [x9, #32]
-; CHECK-NEXT: ldp q19, q18, [x10, #32]
+; CHECK-NEXT: ldp q5, q4, [x8, #-32]
+; CHECK-NEXT: adds x10, x10, #4
+; CHECK-NEXT: ldp q7, q6, [x9, #-32]
+; CHECK-NEXT: ldp q17, q16, [x8], #64
+; CHECK-NEXT: ldp q19, q18, [x9], #64
; CHECK-NEXT: fcmla v2.2d, v7.2d, v5.2d, #0
; CHECK-NEXT: fcmla v0.2d, v6.2d, v4.2d, #0
; CHECK-NEXT: fcmla v1.2d, v19.2d, v17.2d, #0
diff --git a/llvm/test/CodeGen/AArch64/sme-support-routines-calling-convention.ll b/llvm/test/CodeGen/AArch64/sme-support-routines-calling-convention.ll
index 7535638..63c6533 100644
--- a/llvm/test/CodeGen/AArch64/sme-support-routines-calling-convention.ll
+++ b/llvm/test/CodeGen/AArch64/sme-support-routines-calling-convention.ll
@@ -25,6 +25,25 @@ define void @test_sme_calling_convention_x0() nounwind {
ret void
}
+define i64 @test_sme_calling_convention_x1() nounwind {
+; CHECK-LABEL: test_sme_calling_convention_x1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: bl __arm_get_current_vg
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+; DARWIN-LABEL: test_sme_calling_convention_x1:
+; DARWIN: stp x29, x30, [sp, #-16]!
+; DARWIN: bl ___arm_get_current_vg
+; DARWIN: ldp x29, x30, [sp], #16
+; DARWIN: ret
+;
+; CHECK-CSRMASK-LABEL: name: test_sme_calling_convention_x1
+; CHECK-CSRMASK: BL @__arm_get_current_vg, csr_aarch64_sme_abi_support_routines_preservemost_from_x1
+ %vg = call aarch64_sme_preservemost_from_x1 i64 @__arm_get_current_vg()
+ ret i64 %vg
+}
+
define i64 @test_sme_calling_convention_x2() nounwind {
; CHECK-LABEL: test_sme_calling_convention_x2:
; CHECK: // %bb.0:
@@ -46,4 +65,5 @@ define i64 @test_sme_calling_convention_x2() nounwind {
}
declare void @__arm_tpidr2_save()
+declare i64 @__arm_get_current_vg()
declare {i64, i64} @__arm_sme_state()
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll
index 4c02a52..c993051 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll
@@ -648,7 +648,19 @@ define float @test_v3f32_ninf(<3 x float> %a) nounwind {
define fp128 @test_v2f128(<2 x fp128> %a) nounwind {
; CHECK-LABEL: test_v2f128:
; CHECK: // %bb.0:
-; CHECK-NEXT: b fmaxl
+; CHECK-NEXT: sub sp, sp, #48
+; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill
+; CHECK-NEXT: bl __gttf2
+; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: cmp w0, #0
+; CHECK-NEXT: b.le .LBB18_2
+; CHECK-NEXT: // %bb.1:
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: .LBB18_2:
+; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-NEXT: add sp, sp, #48
+; CHECK-NEXT: ret
%b = call nnan fp128 @llvm.vector.reduce.fmax.v2f128(<2 x fp128> %a)
ret fp128 %b
}
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll
index 18d40cb..0116be5 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll
@@ -648,7 +648,19 @@ define float @test_v3f32_ninf(<3 x float> %a) nounwind {
define fp128 @test_v2f128(<2 x fp128> %a) nounwind {
; CHECK-LABEL: test_v2f128:
; CHECK: // %bb.0:
-; CHECK-NEXT: b fminl
+; CHECK-NEXT: sub sp, sp, #48
+; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill
+; CHECK-NEXT: bl __lttf2
+; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: cmp w0, #0
+; CHECK-NEXT: b.ge .LBB18_2
+; CHECK-NEXT: // %bb.1:
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: .LBB18_2:
+; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-NEXT: add sp, sp, #48
+; CHECK-NEXT: ret
%b = call nnan fp128 @llvm.vector.reduce.fmin.v2f128(<2 x fp128> %a)
ret fp128 %b
}
diff --git a/llvm/test/CodeGen/AArch64/zext-to-tbl.ll b/llvm/test/CodeGen/AArch64/zext-to-tbl.ll
index 599bd81..66bb131 100644
--- a/llvm/test/CodeGen/AArch64/zext-to-tbl.ll
+++ b/llvm/test/CodeGen/AArch64/zext-to-tbl.ll
@@ -1669,42 +1669,41 @@ define void @zext_v8i8_to_v8i64_with_add_in_sequence_in_loop(ptr %src, ptr %dst)
; CHECK-LABEL: zext_v8i8_to_v8i64_with_add_in_sequence_in_loop:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: Lloh18:
-; CHECK-NEXT: adrp x9, lCPI17_0@PAGE
+; CHECK-NEXT: adrp x8, lCPI17_0@PAGE
; CHECK-NEXT: Lloh19:
-; CHECK-NEXT: adrp x10, lCPI17_1@PAGE
-; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: adrp x9, lCPI17_1@PAGE
+; CHECK-NEXT: mov w10, #128 ; =0x80
; CHECK-NEXT: Lloh20:
-; CHECK-NEXT: ldr q0, [x9, lCPI17_0@PAGEOFF]
+; CHECK-NEXT: ldr q0, [x8, lCPI17_0@PAGEOFF]
; CHECK-NEXT: Lloh21:
-; CHECK-NEXT: ldr q1, [x10, lCPI17_1@PAGEOFF]
+; CHECK-NEXT: ldr q1, [x9, lCPI17_1@PAGEOFF]
+; CHECK-NEXT: add x8, x1, #64
; CHECK-NEXT: add x9, x0, #8
; CHECK-NEXT: LBB17_1: ; %loop
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldp d2, d3, [x9, #-8]
-; CHECK-NEXT: add x10, x1, x8
-; CHECK-NEXT: ldp q6, q5, [x10, #32]
-; CHECK-NEXT: add x8, x8, #128
-; CHECK-NEXT: ldp q17, q16, [x10]
-; CHECK-NEXT: cmp x8, #1024
+; CHECK-NEXT: subs x10, x10, #16
+; CHECK-NEXT: ldp q6, q5, [x8, #-32]
+; CHECK-NEXT: add x9, x9, #16
+; CHECK-NEXT: ldp q17, q16, [x8, #-64]
; CHECK-NEXT: tbl.16b v4, { v2 }, v1
; CHECK-NEXT: tbl.16b v2, { v2 }, v0
; CHECK-NEXT: tbl.16b v7, { v3 }, v1
; CHECK-NEXT: tbl.16b v3, { v3 }, v0
-; CHECK-NEXT: add x9, x9, #16
; CHECK-NEXT: uaddw2.2d v5, v5, v4
; CHECK-NEXT: uaddw.2d v4, v6, v4
; CHECK-NEXT: uaddw2.2d v6, v16, v2
-; CHECK-NEXT: ldp q18, q16, [x10, #96]
+; CHECK-NEXT: ldp q18, q16, [x8, #32]
; CHECK-NEXT: uaddw.2d v2, v17, v2
-; CHECK-NEXT: stp q4, q5, [x10, #32]
+; CHECK-NEXT: stp q4, q5, [x8, #-32]
; CHECK-NEXT: uaddw2.2d v5, v16, v7
-; CHECK-NEXT: ldp q16, q4, [x10, #64]
+; CHECK-NEXT: ldp q16, q4, [x8]
; CHECK-NEXT: uaddw.2d v7, v18, v7
-; CHECK-NEXT: stp q2, q6, [x10]
+; CHECK-NEXT: stp q2, q6, [x8, #-64]
; CHECK-NEXT: uaddw2.2d v4, v4, v3
; CHECK-NEXT: uaddw.2d v2, v16, v3
-; CHECK-NEXT: stp q7, q5, [x10, #96]
-; CHECK-NEXT: stp q2, q4, [x10, #64]
+; CHECK-NEXT: stp q7, q5, [x8, #32]
+; CHECK-NEXT: stp q2, q4, [x8], #128
; CHECK-NEXT: b.ne LBB17_1
; CHECK-NEXT: ; %bb.2: ; %exit
; CHECK-NEXT: ret
@@ -1715,67 +1714,67 @@ define void @zext_v8i8_to_v8i64_with_add_in_sequence_in_loop(ptr %src, ptr %dst)
; CHECK-BE: // %bb.0: // %entry
; CHECK-BE-NEXT: adrp x9, .LCPI17_0
; CHECK-BE-NEXT: add x9, x9, :lo12:.LCPI17_0
-; CHECK-BE-NEXT: mov x8, xzr
+; CHECK-BE-NEXT: mov w8, #128 // =0x80
; CHECK-BE-NEXT: ld1 { v0.16b }, [x9]
; CHECK-BE-NEXT: adrp x9, .LCPI17_1
; CHECK-BE-NEXT: add x9, x9, :lo12:.LCPI17_1
; CHECK-BE-NEXT: ld1 { v1.16b }, [x9]
-; CHECK-BE-NEXT: add x9, x0, #8
+; CHECK-BE-NEXT: add x9, x1, #64
+; CHECK-BE-NEXT: add x10, x0, #8
; CHECK-BE-NEXT: .LBB17_1: // %loop
; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-BE-NEXT: sub x10, x9, #8
-; CHECK-BE-NEXT: ld1 { v2.8b }, [x9]
-; CHECK-BE-NEXT: add x9, x9, #16
-; CHECK-BE-NEXT: ld1 { v3.8b }, [x10]
-; CHECK-BE-NEXT: add x10, x1, x8
-; CHECK-BE-NEXT: add x8, x8, #128
-; CHECK-BE-NEXT: add x11, x10, #32
-; CHECK-BE-NEXT: add x14, x10, #64
-; CHECK-BE-NEXT: add x15, x10, #96
+; CHECK-BE-NEXT: ld1 { v2.8b }, [x10]
+; CHECK-BE-NEXT: sub x11, x10, #8
+; CHECK-BE-NEXT: add x15, x9, #32
+; CHECK-BE-NEXT: ld1 { v3.8b }, [x11]
+; CHECK-BE-NEXT: ld1 { v16.2d }, [x15]
+; CHECK-BE-NEXT: sub x11, x9, #64
+; CHECK-BE-NEXT: sub x12, x9, #32
+; CHECK-BE-NEXT: ld1 { v6.2d }, [x9]
+; CHECK-BE-NEXT: ld1 { v21.2d }, [x11]
; CHECK-BE-NEXT: tbl v4.16b, { v2.16b }, v1.16b
; CHECK-BE-NEXT: tbl v2.16b, { v2.16b }, v0.16b
-; CHECK-BE-NEXT: ld1 { v5.2d }, [x10]
-; CHECK-BE-NEXT: tbl v6.16b, { v3.16b }, v1.16b
+; CHECK-BE-NEXT: ld1 { v19.2d }, [x12]
+; CHECK-BE-NEXT: tbl v5.16b, { v3.16b }, v1.16b
; CHECK-BE-NEXT: tbl v3.16b, { v3.16b }, v0.16b
-; CHECK-BE-NEXT: ld1 { v16.2d }, [x15]
-; CHECK-BE-NEXT: ld1 { v19.2d }, [x14]
-; CHECK-BE-NEXT: ld1 { v21.2d }, [x11]
-; CHECK-BE-NEXT: add x12, x10, #48
-; CHECK-BE-NEXT: add x13, x10, #16
-; CHECK-BE-NEXT: add x16, x10, #112
-; CHECK-BE-NEXT: add x17, x10, #80
+; CHECK-BE-NEXT: sub x13, x9, #16
+; CHECK-BE-NEXT: sub x14, x9, #48
+; CHECK-BE-NEXT: add x16, x9, #48
+; CHECK-BE-NEXT: add x17, x9, #16
+; CHECK-BE-NEXT: ld1 { v22.2d }, [x13]
+; CHECK-BE-NEXT: subs x8, x8, #16
+; CHECK-BE-NEXT: add x10, x10, #16
; CHECK-BE-NEXT: rev32 v7.8b, v4.8b
; CHECK-BE-NEXT: ext v4.16b, v4.16b, v4.16b, #8
; CHECK-BE-NEXT: rev32 v17.8b, v2.8b
-; CHECK-BE-NEXT: ext v18.16b, v6.16b, v6.16b, #8
+; CHECK-BE-NEXT: ext v18.16b, v5.16b, v5.16b, #8
; CHECK-BE-NEXT: ext v20.16b, v3.16b, v3.16b, #8
; CHECK-BE-NEXT: ext v2.16b, v2.16b, v2.16b, #8
-; CHECK-BE-NEXT: rev32 v6.8b, v6.8b
+; CHECK-BE-NEXT: rev32 v5.8b, v5.8b
; CHECK-BE-NEXT: rev32 v3.8b, v3.8b
-; CHECK-BE-NEXT: ld1 { v22.2d }, [x12]
-; CHECK-BE-NEXT: cmp x8, #1024
-; CHECK-BE-NEXT: rev32 v4.8b, v4.8b
; CHECK-BE-NEXT: uaddw v7.2d, v16.2d, v7.2s
-; CHECK-BE-NEXT: ld1 { v16.2d }, [x16]
-; CHECK-BE-NEXT: rev32 v18.8b, v18.8b
+; CHECK-BE-NEXT: rev32 v4.8b, v4.8b
+; CHECK-BE-NEXT: uaddw v6.2d, v6.2d, v17.2s
+; CHECK-BE-NEXT: rev32 v17.8b, v18.8b
; CHECK-BE-NEXT: rev32 v20.8b, v20.8b
; CHECK-BE-NEXT: rev32 v2.8b, v2.8b
-; CHECK-BE-NEXT: uaddw v17.2d, v19.2d, v17.2s
-; CHECK-BE-NEXT: ld1 { v19.2d }, [x13]
-; CHECK-BE-NEXT: uaddw v6.2d, v21.2d, v6.2s
-; CHECK-BE-NEXT: uaddw v3.2d, v5.2d, v3.2s
-; CHECK-BE-NEXT: ld1 { v5.2d }, [x17]
+; CHECK-BE-NEXT: ld1 { v16.2d }, [x16]
+; CHECK-BE-NEXT: ld1 { v18.2d }, [x14]
+; CHECK-BE-NEXT: uaddw v5.2d, v19.2d, v5.2s
+; CHECK-BE-NEXT: uaddw v3.2d, v21.2d, v3.2s
; CHECK-BE-NEXT: st1 { v7.2d }, [x15]
+; CHECK-BE-NEXT: ld1 { v7.2d }, [x17]
+; CHECK-BE-NEXT: st1 { v6.2d }, [x9]
+; CHECK-BE-NEXT: add x9, x9, #128
; CHECK-BE-NEXT: uaddw v4.2d, v16.2d, v4.2s
-; CHECK-BE-NEXT: st1 { v6.2d }, [x11]
-; CHECK-BE-NEXT: uaddw v6.2d, v22.2d, v18.2s
-; CHECK-BE-NEXT: st1 { v3.2d }, [x10]
-; CHECK-BE-NEXT: uaddw v3.2d, v19.2d, v20.2s
-; CHECK-BE-NEXT: uaddw v2.2d, v5.2d, v2.2s
-; CHECK-BE-NEXT: st1 { v17.2d }, [x14]
+; CHECK-BE-NEXT: st1 { v5.2d }, [x12]
+; CHECK-BE-NEXT: uaddw v5.2d, v22.2d, v17.2s
+; CHECK-BE-NEXT: st1 { v3.2d }, [x11]
+; CHECK-BE-NEXT: uaddw v3.2d, v18.2d, v20.2s
+; CHECK-BE-NEXT: uaddw v2.2d, v7.2d, v2.2s
; CHECK-BE-NEXT: st1 { v4.2d }, [x16]
-; CHECK-BE-NEXT: st1 { v6.2d }, [x12]
-; CHECK-BE-NEXT: st1 { v3.2d }, [x13]
+; CHECK-BE-NEXT: st1 { v5.2d }, [x13]
+; CHECK-BE-NEXT: st1 { v3.2d }, [x14]
; CHECK-BE-NEXT: st1 { v2.2d }, [x17]
; CHECK-BE-NEXT: b.ne .LBB17_1
; CHECK-BE-NEXT: // %bb.2: // %exit
@@ -1813,14 +1812,14 @@ exit:
define void @zext_v16i8_to_v16i64_in_sequence_in_loop(ptr %src, ptr %dst) {
; CHECK-LABEL: zext_v16i8_to_v16i64_in_sequence_in_loop:
; CHECK: ; %bb.0: ; %entry
-; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: mov w8, #128 ; =0x80
; CHECK-NEXT: add x9, x1, #128
+; CHECK-NEXT: add x10, x0, #16
; CHECK-NEXT: LBB18_1: ; %loop
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: add x10, x0, x8
-; CHECK-NEXT: add x8, x8, #16
-; CHECK-NEXT: ldp q0, q1, [x10]
-; CHECK-NEXT: cmp x8, #128
+; CHECK-NEXT: ldp q0, q1, [x10, #-16]
+; CHECK-NEXT: subs x8, x8, #16
+; CHECK-NEXT: add x10, x10, #16
; CHECK-NEXT: ushll2.8h v2, v0, #0
; CHECK-NEXT: ushll.8h v0, v0, #0
; CHECK-NEXT: ushll2.8h v6, v1, #0
@@ -1863,18 +1862,18 @@ define void @zext_v16i8_to_v16i64_in_sequence_in_loop(ptr %src, ptr %dst) {
;
; CHECK-BE-LABEL: zext_v16i8_to_v16i64_in_sequence_in_loop:
; CHECK-BE: // %bb.0: // %entry
-; CHECK-BE-NEXT: mov x8, xzr
+; CHECK-BE-NEXT: mov w8, #128 // =0x80
; CHECK-BE-NEXT: add x9, x1, #128
+; CHECK-BE-NEXT: add x10, x0, #16
; CHECK-BE-NEXT: .LBB18_1: // %loop
; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-BE-NEXT: add x10, x0, x8
-; CHECK-BE-NEXT: sub x11, x9, #32
-; CHECK-BE-NEXT: add x8, x8, #16
-; CHECK-BE-NEXT: ld1 { v0.16b }, [x10]
-; CHECK-BE-NEXT: add x10, x10, #16
-; CHECK-BE-NEXT: cmp x8, #128
+; CHECK-BE-NEXT: sub x11, x10, #16
; CHECK-BE-NEXT: ld1 { v5.16b }, [x10]
-; CHECK-BE-NEXT: sub x10, x9, #16
+; CHECK-BE-NEXT: sub x12, x9, #32
+; CHECK-BE-NEXT: ld1 { v0.16b }, [x11]
+; CHECK-BE-NEXT: sub x11, x9, #16
+; CHECK-BE-NEXT: subs x8, x8, #16
+; CHECK-BE-NEXT: add x10, x10, #16
; CHECK-BE-NEXT: ushll2 v1.8h, v0.16b, #0
; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-BE-NEXT: ushll2 v2.4s, v1.8h, #0
@@ -1885,54 +1884,54 @@ define void @zext_v16i8_to_v16i64_in_sequence_in_loop(ptr %src, ptr %dst) {
; CHECK-BE-NEXT: ushll v2.2d, v2.2s, #0
; CHECK-BE-NEXT: ushll2 v6.2d, v1.4s, #0
; CHECK-BE-NEXT: ushll v1.2d, v1.2s, #0
-; CHECK-BE-NEXT: st1 { v4.2d }, [x10]
+; CHECK-BE-NEXT: st1 { v4.2d }, [x11]
; CHECK-BE-NEXT: ushll2 v4.2d, v3.4s, #0
; CHECK-BE-NEXT: ushll v3.2d, v3.2s, #0
-; CHECK-BE-NEXT: st1 { v2.2d }, [x11]
+; CHECK-BE-NEXT: st1 { v2.2d }, [x12]
; CHECK-BE-NEXT: ushll2 v2.8h, v5.16b, #0
-; CHECK-BE-NEXT: sub x11, x9, #80
-; CHECK-BE-NEXT: sub x10, x9, #48
-; CHECK-BE-NEXT: st1 { v4.2d }, [x11]
+; CHECK-BE-NEXT: sub x12, x9, #80
+; CHECK-BE-NEXT: sub x11, x9, #48
+; CHECK-BE-NEXT: st1 { v4.2d }, [x12]
; CHECK-BE-NEXT: ushll v4.8h, v5.8b, #0
-; CHECK-BE-NEXT: sub x11, x9, #64
+; CHECK-BE-NEXT: sub x12, x9, #64
; CHECK-BE-NEXT: ushll2 v5.4s, v2.8h, #0
-; CHECK-BE-NEXT: st1 { v1.2d }, [x11]
-; CHECK-BE-NEXT: sub x11, x9, #96
+; CHECK-BE-NEXT: st1 { v1.2d }, [x12]
+; CHECK-BE-NEXT: sub x12, x9, #96
; CHECK-BE-NEXT: ushll2 v1.2d, v0.4s, #0
; CHECK-BE-NEXT: ushll v2.4s, v2.4h, #0
; CHECK-BE-NEXT: ushll v0.2d, v0.2s, #0
-; CHECK-BE-NEXT: st1 { v6.2d }, [x10]
-; CHECK-BE-NEXT: sub x10, x9, #128
-; CHECK-BE-NEXT: st1 { v3.2d }, [x11]
+; CHECK-BE-NEXT: st1 { v6.2d }, [x11]
+; CHECK-BE-NEXT: sub x11, x9, #128
+; CHECK-BE-NEXT: st1 { v3.2d }, [x12]
; CHECK-BE-NEXT: ushll2 v3.4s, v4.8h, #0
; CHECK-BE-NEXT: ushll2 v6.2d, v5.4s, #0
-; CHECK-BE-NEXT: sub x11, x9, #112
+; CHECK-BE-NEXT: sub x12, x9, #112
; CHECK-BE-NEXT: ushll v5.2d, v5.2s, #0
-; CHECK-BE-NEXT: st1 { v0.2d }, [x10]
-; CHECK-BE-NEXT: st1 { v1.2d }, [x11]
+; CHECK-BE-NEXT: st1 { v0.2d }, [x11]
+; CHECK-BE-NEXT: st1 { v1.2d }, [x12]
; CHECK-BE-NEXT: ushll2 v1.2d, v2.4s, #0
-; CHECK-BE-NEXT: add x10, x9, #112
+; CHECK-BE-NEXT: add x11, x9, #112
; CHECK-BE-NEXT: ushll v4.4s, v4.4h, #0
; CHECK-BE-NEXT: ushll2 v0.2d, v3.4s, #0
-; CHECK-BE-NEXT: st1 { v6.2d }, [x10]
-; CHECK-BE-NEXT: add x10, x9, #96
+; CHECK-BE-NEXT: st1 { v6.2d }, [x11]
+; CHECK-BE-NEXT: add x11, x9, #96
; CHECK-BE-NEXT: ushll v2.2d, v2.2s, #0
; CHECK-BE-NEXT: ushll v3.2d, v3.2s, #0
-; CHECK-BE-NEXT: st1 { v5.2d }, [x10]
-; CHECK-BE-NEXT: add x10, x9, #80
-; CHECK-BE-NEXT: st1 { v1.2d }, [x10]
-; CHECK-BE-NEXT: add x10, x9, #48
+; CHECK-BE-NEXT: st1 { v5.2d }, [x11]
+; CHECK-BE-NEXT: add x11, x9, #80
+; CHECK-BE-NEXT: st1 { v1.2d }, [x11]
+; CHECK-BE-NEXT: add x11, x9, #48
; CHECK-BE-NEXT: ushll2 v1.2d, v4.4s, #0
-; CHECK-BE-NEXT: st1 { v0.2d }, [x10]
+; CHECK-BE-NEXT: st1 { v0.2d }, [x11]
; CHECK-BE-NEXT: ushll v0.2d, v4.2s, #0
-; CHECK-BE-NEXT: add x10, x9, #64
-; CHECK-BE-NEXT: st1 { v2.2d }, [x10]
-; CHECK-BE-NEXT: add x10, x9, #32
-; CHECK-BE-NEXT: st1 { v3.2d }, [x10]
-; CHECK-BE-NEXT: add x10, x9, #16
+; CHECK-BE-NEXT: add x11, x9, #64
+; CHECK-BE-NEXT: st1 { v2.2d }, [x11]
+; CHECK-BE-NEXT: add x11, x9, #32
+; CHECK-BE-NEXT: st1 { v3.2d }, [x11]
+; CHECK-BE-NEXT: add x11, x9, #16
; CHECK-BE-NEXT: st1 { v0.2d }, [x9]
; CHECK-BE-NEXT: add x9, x9, #128
-; CHECK-BE-NEXT: st1 { v1.2d }, [x10]
+; CHECK-BE-NEXT: st1 { v1.2d }, [x11]
; CHECK-BE-NEXT: b.ne .LBB18_1
; CHECK-BE-NEXT: // %bb.2: // %exit
; CHECK-BE-NEXT: ret
diff --git a/llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll b/llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll
index c347437..40d77a7 100644
--- a/llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll
+++ b/llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll
@@ -105,6 +105,7 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1103 < %s | FileCheck --check-prefixes=GFX1103 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1150 < %s | FileCheck --check-prefixes=GFX1150 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1151 < %s | FileCheck --check-prefixes=GFX1151 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1152 < %s | FileCheck --check-prefixes=GFX1152 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX1200 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1201 < %s | FileCheck --check-prefixes=GFX1201 %s
@@ -201,6 +202,7 @@
; GFX1103: .amdgcn_target "amdgcn-amd-amdhsa--gfx1103"
; GFX1150: .amdgcn_target "amdgcn-amd-amdhsa--gfx1150"
; GFX1151: .amdgcn_target "amdgcn-amd-amdhsa--gfx1151"
+; GFX1152: .amdgcn_target "amdgcn-amd-amdhsa--gfx1152"
; GFX1200: .amdgcn_target "amdgcn-amd-amdhsa--gfx1200"
; GFX1201: .amdgcn_target "amdgcn-amd-amdhsa--gfx1201"
diff --git a/llvm/test/CodeGen/AMDGPU/elf-header-flags-mach.ll b/llvm/test/CodeGen/AMDGPU/elf-header-flags-mach.ll
index edc2015..560a05a 100644
--- a/llvm/test/CodeGen/AMDGPU/elf-header-flags-mach.ll
+++ b/llvm/test/CodeGen/AMDGPU/elf-header-flags-mach.ll
@@ -74,6 +74,7 @@
; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx1103 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX1103 %s
; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx1150 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX1150 %s
; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx1151 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX1151 %s
+; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx1152 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX1152 %s
; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx1200 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX1200 %s
; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx1201 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX1201 %s
@@ -153,6 +154,7 @@
; GFX1103: EF_AMDGPU_MACH_AMDGCN_GFX1103 (0x44)
; GFX1150: EF_AMDGPU_MACH_AMDGCN_GFX1150 (0x43)
; GFX1151: EF_AMDGPU_MACH_AMDGCN_GFX1151 (0x4A)
+; GFX1152: EF_AMDGPU_MACH_AMDGCN_GFX1152 (0x55)
; GFX1200: EF_AMDGPU_MACH_AMDGCN_GFX1200 (0x48)
; GFX1201: EF_AMDGPU_MACH_AMDGCN_GFX1201 (0x4E)
diff --git a/llvm/test/CodeGen/AMDGPU/expand-variadic-call.ll b/llvm/test/CodeGen/AMDGPU/expand-variadic-call.ll
new file mode 100644
index 0000000..ce55558
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/expand-variadic-call.ll
@@ -0,0 +1,545 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: -p --function-signature
+; RUN: opt -S --passes=expand-variadics --expand-variadics-override=lowering < %s | FileCheck %s
+; REQUIRES: amdgpu-registered-target
+target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9"
+target triple = "amdgcn-amd-amdhsa"
+
+; Check the variables are lowered to the locations this target expects
+
+; The types show the call frames
+; CHECK: %single_i32.vararg = type <{ i32 }>
+; CHECK: %single_double.vararg = type <{ double }>
+; CHECK: %single_v4f32.vararg = type <{ <4 x float> }>
+; CHECK: %single_v8f32.vararg = type <{ <8 x float> }>
+; CHECK: %single_v16f32.vararg = type <{ <16 x float> }>
+; CHECK: %single_v32f32.vararg = type <{ <32 x float> }>
+; CHECK: %i32_double.vararg = type <{ i32, double }>
+; CHECK: %double_i32.vararg = type <{ double, i32 }>
+; CHECK: %i32_libcS.vararg = type <{ i32, %struct.libcS }>
+; CHECK: %libcS_i32.vararg = type <{ %struct.libcS, i32 }>
+; CHECK: %i32_v4f32.vararg = type <{ i32, <4 x float> }>
+; CHECK: %v4f32_i32.vararg = type <{ <4 x float>, i32 }>
+; CHECK: %i32_v8f32.vararg = type <{ i32, <8 x float> }>
+; CHECK: %v8f32_i32.vararg = type <{ <8 x float>, i32 }>
+; CHECK: %i32_v16f32.vararg = type <{ i32, <16 x float> }>
+; CHECK: %v16f32_i32.vararg = type <{ <16 x float>, i32 }>
+; CHECK: %i32_v32f32.vararg = type <{ i32, <32 x float> }>
+; CHECK: %v32f32_i32.vararg = type <{ <32 x float>, i32 }>
+; CHECK: %fptr_single_i32.vararg = type <{ i32 }>
+; CHECK: %fptr_libcS.vararg = type <{ %struct.libcS }>
+
+%struct.libcS = type { i8, i16, i32, i64, float, double }
+
+@vararg_ptr = hidden addrspace(1) global ptr @vararg, align 8
+
+define hidden void @copy(ptr noundef %va) {
+; CHECK-LABEL: define {{[^@]+}}@copy(ptr noundef %va) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %va.addr = alloca ptr, align 8, addrspace(5)
+; CHECK-NEXT: %cp = alloca ptr, align 8, addrspace(5)
+; CHECK-NEXT: %va.addr.ascast = addrspacecast ptr addrspace(5) %va.addr to ptr
+; CHECK-NEXT: %cp.ascast = addrspacecast ptr addrspace(5) %cp to ptr
+; CHECK-NEXT: store ptr %va, ptr addrspace(5) %va.addr, align 8
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) %cp)
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr %cp.ascast, ptr %va.addr.ascast, i32 8, i1 false)
+; CHECK-NEXT: %0 = load ptr, ptr addrspace(5) %cp, align 8
+; CHECK-NEXT: call void @valist(ptr noundef %0)
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) %cp)
+; CHECK-NEXT: ret void
+;
+entry:
+ %va.addr = alloca ptr, align 8, addrspace(5)
+ %cp = alloca ptr, align 8, addrspace(5)
+ %va.addr.ascast = addrspacecast ptr addrspace(5) %va.addr to ptr
+ %cp.ascast = addrspacecast ptr addrspace(5) %cp to ptr
+ store ptr %va, ptr addrspace(5) %va.addr, align 8
+ call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) %cp)
+ call void @llvm.va_copy.p0(ptr %cp.ascast, ptr nonnull %va.addr.ascast)
+ %0 = load ptr, ptr addrspace(5) %cp, align 8
+ call void @valist(ptr noundef %0)
+ call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) %cp)
+ ret void
+}
+
+declare void @llvm.lifetime.start.p5(i64 immarg, ptr addrspace(5) nocapture)
+
+declare void @llvm.va_copy.p0(ptr, ptr)
+
+declare hidden void @valist(ptr noundef)
+
+declare void @llvm.lifetime.end.p5(i64 immarg, ptr addrspace(5) nocapture)
+
+define hidden void @start_once(...) {
+; CHECK-LABEL: define {{[^@]+}}@start_once(ptr %varargs) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %s = alloca ptr, align 8, addrspace(5)
+; CHECK-NEXT: %s.ascast = addrspacecast ptr addrspace(5) %s to ptr
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) %s)
+; CHECK-NEXT: store ptr %varargs, ptr %s.ascast, align 8
+; CHECK-NEXT: %0 = load ptr, ptr addrspace(5) %s, align 8
+; CHECK-NEXT: call void @valist(ptr noundef %0)
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) %s)
+; CHECK-NEXT: ret void
+;
+entry:
+ %s = alloca ptr, align 8, addrspace(5)
+ %s.ascast = addrspacecast ptr addrspace(5) %s to ptr
+ call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) %s)
+ call void @llvm.va_start.p0(ptr %s.ascast)
+ %0 = load ptr, ptr addrspace(5) %s, align 8
+ call void @valist(ptr noundef %0)
+ call void @llvm.va_end.p0(ptr %s.ascast)
+ call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) %s)
+ ret void
+}
+
+declare void @llvm.va_start.p0(ptr)
+
+declare void @llvm.va_end.p0(ptr)
+
+define hidden void @start_twice(...) {
+; CHECK-LABEL: define {{[^@]+}}@start_twice(ptr %varargs) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %s0 = alloca ptr, align 8, addrspace(5)
+; CHECK-NEXT: %s1 = alloca ptr, align 8, addrspace(5)
+; CHECK-NEXT: %s0.ascast = addrspacecast ptr addrspace(5) %s0 to ptr
+; CHECK-NEXT: %s1.ascast = addrspacecast ptr addrspace(5) %s1 to ptr
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) %s0)
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) %s1)
+; CHECK-NEXT: store ptr %varargs, ptr %s0.ascast, align 8
+; CHECK-NEXT: %0 = load ptr, ptr addrspace(5) %s0, align 8
+; CHECK-NEXT: call void @valist(ptr noundef %0)
+; CHECK-NEXT: store ptr %varargs, ptr %s1.ascast, align 8
+; CHECK-NEXT: %1 = load ptr, ptr addrspace(5) %s1, align 8
+; CHECK-NEXT: call void @valist(ptr noundef %1)
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) %s1)
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) %s0)
+; CHECK-NEXT: ret void
+;
+entry:
+ %s0 = alloca ptr, align 8, addrspace(5)
+ %s1 = alloca ptr, align 8, addrspace(5)
+ %s0.ascast = addrspacecast ptr addrspace(5) %s0 to ptr
+ %s1.ascast = addrspacecast ptr addrspace(5) %s1 to ptr
+ call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) %s0)
+ call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) %s1)
+ call void @llvm.va_start.p0(ptr %s0.ascast)
+ %0 = load ptr, ptr addrspace(5) %s0, align 8
+ call void @valist(ptr noundef %0)
+ call void @llvm.va_end.p0(ptr %s0.ascast)
+ call void @llvm.va_start.p0(ptr %s1.ascast)
+ %1 = load ptr, ptr addrspace(5) %s1, align 8
+ call void @valist(ptr noundef %1)
+ call void @llvm.va_end.p0(ptr %s1.ascast)
+ call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) %s1)
+ call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) %s0)
+ ret void
+}
+
+define hidden void @single_i32(i32 noundef %x) {
+; CHECK-LABEL: define {{[^@]+}}@single_i32(i32 noundef %x) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %single_i32.vararg, align 4, addrspace(5)
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %single_i32.vararg, ptr addrspace(5) %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr addrspace(5) %0, align 4
+; CHECK-NEXT: %1 = addrspacecast ptr addrspace(5) %vararg_buffer to ptr
+; CHECK-NEXT: call void @vararg(ptr %1)
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(i32 noundef %x)
+ ret void
+}
+
+declare hidden void @vararg(...)
+
+define hidden void @single_double(double noundef %x) {
+; CHECK-LABEL: define {{[^@]+}}@single_double(double noundef %x) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %single_double.vararg, align 4, addrspace(5)
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %single_double.vararg, ptr addrspace(5) %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store double %x, ptr addrspace(5) %0, align 8
+; CHECK-NEXT: %1 = addrspacecast ptr addrspace(5) %vararg_buffer to ptr
+; CHECK-NEXT: call void @vararg(ptr %1)
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(double noundef %x)
+ ret void
+}
+
+define hidden void @single_v4f32(<4 x float> noundef %x) {
+; CHECK-LABEL: define {{[^@]+}}@single_v4f32(<4 x float> noundef %x) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %single_v4f32.vararg, align 4, addrspace(5)
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 16, ptr addrspace(5) %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %single_v4f32.vararg, ptr addrspace(5) %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store <4 x float> %x, ptr addrspace(5) %0, align 16
+; CHECK-NEXT: %1 = addrspacecast ptr addrspace(5) %vararg_buffer to ptr
+; CHECK-NEXT: call void @vararg(ptr %1)
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 16, ptr addrspace(5) %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(<4 x float> noundef %x)
+ ret void
+}
+
+define hidden void @single_v8f32(<8 x float> noundef %x) {
+; CHECK-LABEL: define {{[^@]+}}@single_v8f32(<8 x float> noundef %x) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %single_v8f32.vararg, align 4, addrspace(5)
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 32, ptr addrspace(5) %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %single_v8f32.vararg, ptr addrspace(5) %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store <8 x float> %x, ptr addrspace(5) %0, align 32
+; CHECK-NEXT: %1 = addrspacecast ptr addrspace(5) %vararg_buffer to ptr
+; CHECK-NEXT: call void @vararg(ptr %1)
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 32, ptr addrspace(5) %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(<8 x float> noundef %x)
+ ret void
+}
+
+define hidden void @single_v16f32(<16 x float> noundef %x) {
+; CHECK-LABEL: define {{[^@]+}}@single_v16f32(<16 x float> noundef %x) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %single_v16f32.vararg, align 4, addrspace(5)
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 64, ptr addrspace(5) %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %single_v16f32.vararg, ptr addrspace(5) %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store <16 x float> %x, ptr addrspace(5) %0, align 64
+; CHECK-NEXT: %1 = addrspacecast ptr addrspace(5) %vararg_buffer to ptr
+; CHECK-NEXT: call void @vararg(ptr %1)
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 64, ptr addrspace(5) %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(<16 x float> noundef %x)
+ ret void
+}
+
+define hidden void @single_v32f32(<32 x float> noundef %x) {
+; CHECK-LABEL: define {{[^@]+}}@single_v32f32(<32 x float> noundef %x) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %single_v32f32.vararg, align 4, addrspace(5)
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 128, ptr addrspace(5) %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %single_v32f32.vararg, ptr addrspace(5) %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store <32 x float> %x, ptr addrspace(5) %0, align 128
+; CHECK-NEXT: %1 = addrspacecast ptr addrspace(5) %vararg_buffer to ptr
+; CHECK-NEXT: call void @vararg(ptr %1)
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 128, ptr addrspace(5) %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(<32 x float> noundef %x)
+ ret void
+}
+
+define hidden void @i32_double(i32 noundef %x, double noundef %y) {
+; CHECK-LABEL: define {{[^@]+}}@i32_double(i32 noundef %x, double noundef %y) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %i32_double.vararg, align 4, addrspace(5)
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 12, ptr addrspace(5) %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %i32_double.vararg, ptr addrspace(5) %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr addrspace(5) %0, align 4
+; CHECK-NEXT: %1 = getelementptr inbounds %i32_double.vararg, ptr addrspace(5) %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: store double %y, ptr addrspace(5) %1, align 8
+; CHECK-NEXT: %2 = addrspacecast ptr addrspace(5) %vararg_buffer to ptr
+; CHECK-NEXT: call void @vararg(ptr %2)
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 12, ptr addrspace(5) %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(i32 noundef %x, double noundef %y)
+ ret void
+}
+
+define hidden void @double_i32(double noundef %x, i32 noundef %y) {
+; CHECK-LABEL: define {{[^@]+}}@double_i32(double noundef %x, i32 noundef %y) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %double_i32.vararg, align 4, addrspace(5)
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 12, ptr addrspace(5) %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %double_i32.vararg, ptr addrspace(5) %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store double %x, ptr addrspace(5) %0, align 8
+; CHECK-NEXT: %1 = getelementptr inbounds %double_i32.vararg, ptr addrspace(5) %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: store i32 %y, ptr addrspace(5) %1, align 4
+; CHECK-NEXT: %2 = addrspacecast ptr addrspace(5) %vararg_buffer to ptr
+; CHECK-NEXT: call void @vararg(ptr %2)
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 12, ptr addrspace(5) %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(double noundef %x, i32 noundef %y)
+ ret void
+}
+
+define hidden void @i32_libcS(i32 noundef %x, i8 %y.coerce0, i16 %y.coerce1, i32 %y.coerce2, i64 %y.coerce3, float %y.coerce4, double %y.coerce5) {
+; CHECK-LABEL: define {{[^@]+}}@i32_libcS(i32 noundef %x, i8 %y.coerce0, i16 %y.coerce1, i32 %y.coerce2, i64 %y.coerce3, float %y.coerce4, double %y.coerce5) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %i32_libcS.vararg, align 4, addrspace(5)
+; CHECK-NEXT: %.fca.0.insert = insertvalue %struct.libcS poison, i8 %y.coerce0, 0
+; CHECK-NEXT: %.fca.1.insert = insertvalue %struct.libcS %.fca.0.insert, i16 %y.coerce1, 1
+; CHECK-NEXT: %.fca.2.insert = insertvalue %struct.libcS %.fca.1.insert, i32 %y.coerce2, 2
+; CHECK-NEXT: %.fca.3.insert = insertvalue %struct.libcS %.fca.2.insert, i64 %y.coerce3, 3
+; CHECK-NEXT: %.fca.4.insert = insertvalue %struct.libcS %.fca.3.insert, float %y.coerce4, 4
+; CHECK-NEXT: %.fca.5.insert = insertvalue %struct.libcS %.fca.4.insert, double %y.coerce5, 5
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 36, ptr addrspace(5) %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %i32_libcS.vararg, ptr addrspace(5) %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr addrspace(5) %0, align 4
+; CHECK-NEXT: %1 = getelementptr inbounds %i32_libcS.vararg, ptr addrspace(5) %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: store %struct.libcS %.fca.5.insert, ptr addrspace(5) %1, align 8
+; CHECK-NEXT: %2 = addrspacecast ptr addrspace(5) %vararg_buffer to ptr
+; CHECK-NEXT: call void @vararg(ptr %2)
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 36, ptr addrspace(5) %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ %.fca.0.insert = insertvalue %struct.libcS poison, i8 %y.coerce0, 0
+ %.fca.1.insert = insertvalue %struct.libcS %.fca.0.insert, i16 %y.coerce1, 1
+ %.fca.2.insert = insertvalue %struct.libcS %.fca.1.insert, i32 %y.coerce2, 2
+ %.fca.3.insert = insertvalue %struct.libcS %.fca.2.insert, i64 %y.coerce3, 3
+ %.fca.4.insert = insertvalue %struct.libcS %.fca.3.insert, float %y.coerce4, 4
+ %.fca.5.insert = insertvalue %struct.libcS %.fca.4.insert, double %y.coerce5, 5
+ tail call void (...) @vararg(i32 noundef %x, %struct.libcS %.fca.5.insert)
+ ret void
+}
+
+define hidden void @libcS_i32(i8 %x.coerce0, i16 %x.coerce1, i32 %x.coerce2, i64 %x.coerce3, float %x.coerce4, double %x.coerce5, i32 noundef %y) {
+; CHECK-LABEL: define {{[^@]+}}@libcS_i32(i8 %x.coerce0, i16 %x.coerce1, i32 %x.coerce2, i64 %x.coerce3, float %x.coerce4, double %x.coerce5, i32 noundef %y) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %libcS_i32.vararg, align 4, addrspace(5)
+; CHECK-NEXT: %.fca.0.insert = insertvalue %struct.libcS poison, i8 %x.coerce0, 0
+; CHECK-NEXT: %.fca.1.insert = insertvalue %struct.libcS %.fca.0.insert, i16 %x.coerce1, 1
+; CHECK-NEXT: %.fca.2.insert = insertvalue %struct.libcS %.fca.1.insert, i32 %x.coerce2, 2
+; CHECK-NEXT: %.fca.3.insert = insertvalue %struct.libcS %.fca.2.insert, i64 %x.coerce3, 3
+; CHECK-NEXT: %.fca.4.insert = insertvalue %struct.libcS %.fca.3.insert, float %x.coerce4, 4
+; CHECK-NEXT: %.fca.5.insert = insertvalue %struct.libcS %.fca.4.insert, double %x.coerce5, 5
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 36, ptr addrspace(5) %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %libcS_i32.vararg, ptr addrspace(5) %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store %struct.libcS %.fca.5.insert, ptr addrspace(5) %0, align 8
+; CHECK-NEXT: %1 = getelementptr inbounds %libcS_i32.vararg, ptr addrspace(5) %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: store i32 %y, ptr addrspace(5) %1, align 4
+; CHECK-NEXT: %2 = addrspacecast ptr addrspace(5) %vararg_buffer to ptr
+; CHECK-NEXT: call void @vararg(ptr %2)
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 36, ptr addrspace(5) %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ %.fca.0.insert = insertvalue %struct.libcS poison, i8 %x.coerce0, 0
+ %.fca.1.insert = insertvalue %struct.libcS %.fca.0.insert, i16 %x.coerce1, 1
+ %.fca.2.insert = insertvalue %struct.libcS %.fca.1.insert, i32 %x.coerce2, 2
+ %.fca.3.insert = insertvalue %struct.libcS %.fca.2.insert, i64 %x.coerce3, 3
+ %.fca.4.insert = insertvalue %struct.libcS %.fca.3.insert, float %x.coerce4, 4
+ %.fca.5.insert = insertvalue %struct.libcS %.fca.4.insert, double %x.coerce5, 5
+ tail call void (...) @vararg(%struct.libcS %.fca.5.insert, i32 noundef %y)
+ ret void
+}
+
+define hidden void @i32_v4f32(i32 noundef %x, <4 x float> noundef %y) {
+; CHECK-LABEL: define {{[^@]+}}@i32_v4f32(i32 noundef %x, <4 x float> noundef %y) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %i32_v4f32.vararg, align 4, addrspace(5)
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 20, ptr addrspace(5) %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %i32_v4f32.vararg, ptr addrspace(5) %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr addrspace(5) %0, align 4
+; CHECK-NEXT: %1 = getelementptr inbounds %i32_v4f32.vararg, ptr addrspace(5) %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: store <4 x float> %y, ptr addrspace(5) %1, align 16
+; CHECK-NEXT: %2 = addrspacecast ptr addrspace(5) %vararg_buffer to ptr
+; CHECK-NEXT: call void @vararg(ptr %2)
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 20, ptr addrspace(5) %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(i32 noundef %x, <4 x float> noundef %y)
+ ret void
+}
+
+define hidden void @v4f32_i32(<4 x float> noundef %x, i32 noundef %y) {
+; CHECK-LABEL: define {{[^@]+}}@v4f32_i32(<4 x float> noundef %x, i32 noundef %y) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %v4f32_i32.vararg, align 4, addrspace(5)
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 20, ptr addrspace(5) %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %v4f32_i32.vararg, ptr addrspace(5) %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store <4 x float> %x, ptr addrspace(5) %0, align 16
+; CHECK-NEXT: %1 = getelementptr inbounds %v4f32_i32.vararg, ptr addrspace(5) %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: store i32 %y, ptr addrspace(5) %1, align 4
+; CHECK-NEXT: %2 = addrspacecast ptr addrspace(5) %vararg_buffer to ptr
+; CHECK-NEXT: call void @vararg(ptr %2)
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 20, ptr addrspace(5) %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(<4 x float> noundef %x, i32 noundef %y)
+ ret void
+}
+
+define hidden void @i32_v8f32(i32 noundef %x, <8 x float> noundef %y) {
+; CHECK-LABEL: define {{[^@]+}}@i32_v8f32(i32 noundef %x, <8 x float> noundef %y) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %i32_v8f32.vararg, align 4, addrspace(5)
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 36, ptr addrspace(5) %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %i32_v8f32.vararg, ptr addrspace(5) %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr addrspace(5) %0, align 4
+; CHECK-NEXT: %1 = getelementptr inbounds %i32_v8f32.vararg, ptr addrspace(5) %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: store <8 x float> %y, ptr addrspace(5) %1, align 32
+; CHECK-NEXT: %2 = addrspacecast ptr addrspace(5) %vararg_buffer to ptr
+; CHECK-NEXT: call void @vararg(ptr %2)
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 36, ptr addrspace(5) %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(i32 noundef %x, <8 x float> noundef %y)
+ ret void
+}
+
+define hidden void @v8f32_i32(<8 x float> noundef %x, i32 noundef %y) {
+; CHECK-LABEL: define {{[^@]+}}@v8f32_i32(<8 x float> noundef %x, i32 noundef %y) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %v8f32_i32.vararg, align 4, addrspace(5)
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 36, ptr addrspace(5) %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %v8f32_i32.vararg, ptr addrspace(5) %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store <8 x float> %x, ptr addrspace(5) %0, align 32
+; CHECK-NEXT: %1 = getelementptr inbounds %v8f32_i32.vararg, ptr addrspace(5) %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: store i32 %y, ptr addrspace(5) %1, align 4
+; CHECK-NEXT: %2 = addrspacecast ptr addrspace(5) %vararg_buffer to ptr
+; CHECK-NEXT: call void @vararg(ptr %2)
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 36, ptr addrspace(5) %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(<8 x float> noundef %x, i32 noundef %y)
+ ret void
+}
+
+define hidden void @i32_v16f32(i32 noundef %x, <16 x float> noundef %y) {
+; CHECK-LABEL: define {{[^@]+}}@i32_v16f32(i32 noundef %x, <16 x float> noundef %y) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %i32_v16f32.vararg, align 4, addrspace(5)
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 68, ptr addrspace(5) %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %i32_v16f32.vararg, ptr addrspace(5) %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr addrspace(5) %0, align 4
+; CHECK-NEXT: %1 = getelementptr inbounds %i32_v16f32.vararg, ptr addrspace(5) %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: store <16 x float> %y, ptr addrspace(5) %1, align 64
+; CHECK-NEXT: %2 = addrspacecast ptr addrspace(5) %vararg_buffer to ptr
+; CHECK-NEXT: call void @vararg(ptr %2)
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 68, ptr addrspace(5) %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(i32 noundef %x, <16 x float> noundef %y)
+ ret void
+}
+
+define hidden void @v16f32_i32(<16 x float> noundef %x, i32 noundef %y) {
+; CHECK-LABEL: define {{[^@]+}}@v16f32_i32(<16 x float> noundef %x, i32 noundef %y) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %v16f32_i32.vararg, align 4, addrspace(5)
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 68, ptr addrspace(5) %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %v16f32_i32.vararg, ptr addrspace(5) %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store <16 x float> %x, ptr addrspace(5) %0, align 64
+; CHECK-NEXT: %1 = getelementptr inbounds %v16f32_i32.vararg, ptr addrspace(5) %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: store i32 %y, ptr addrspace(5) %1, align 4
+; CHECK-NEXT: %2 = addrspacecast ptr addrspace(5) %vararg_buffer to ptr
+; CHECK-NEXT: call void @vararg(ptr %2)
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 68, ptr addrspace(5) %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(<16 x float> noundef %x, i32 noundef %y)
+ ret void
+}
+
+define hidden void @i32_v32f32(i32 noundef %x, <32 x float> noundef %y) {
+; CHECK-LABEL: define {{[^@]+}}@i32_v32f32(i32 noundef %x, <32 x float> noundef %y) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %i32_v32f32.vararg, align 4, addrspace(5)
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 132, ptr addrspace(5) %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %i32_v32f32.vararg, ptr addrspace(5) %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr addrspace(5) %0, align 4
+; CHECK-NEXT: %1 = getelementptr inbounds %i32_v32f32.vararg, ptr addrspace(5) %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: store <32 x float> %y, ptr addrspace(5) %1, align 128
+; CHECK-NEXT: %2 = addrspacecast ptr addrspace(5) %vararg_buffer to ptr
+; CHECK-NEXT: call void @vararg(ptr %2)
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 132, ptr addrspace(5) %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(i32 noundef %x, <32 x float> noundef %y)
+ ret void
+}
+
+define hidden void @v32f32_i32(<32 x float> noundef %x, i32 noundef %y) {
+; CHECK-LABEL: define {{[^@]+}}@v32f32_i32(<32 x float> noundef %x, i32 noundef %y) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %v32f32_i32.vararg, align 4, addrspace(5)
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 132, ptr addrspace(5) %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %v32f32_i32.vararg, ptr addrspace(5) %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store <32 x float> %x, ptr addrspace(5) %0, align 128
+; CHECK-NEXT: %1 = getelementptr inbounds %v32f32_i32.vararg, ptr addrspace(5) %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: store i32 %y, ptr addrspace(5) %1, align 4
+; CHECK-NEXT: %2 = addrspacecast ptr addrspace(5) %vararg_buffer to ptr
+; CHECK-NEXT: call void @vararg(ptr %2)
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 132, ptr addrspace(5) %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(<32 x float> noundef %x, i32 noundef %y)
+ ret void
+}
+
+define hidden void @fptr_single_i32(i32 noundef %x) {
+; CHECK-LABEL: define {{[^@]+}}@fptr_single_i32(i32 noundef %x) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %fptr_single_i32.vararg, align 4, addrspace(5)
+; CHECK-NEXT: %0 = load volatile ptr, ptr addrspacecast (ptr addrspace(1) @vararg_ptr to ptr), align 8
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) %vararg_buffer)
+; CHECK-NEXT: %1 = getelementptr inbounds %fptr_single_i32.vararg, ptr addrspace(5) %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr addrspace(5) %1, align 4
+; CHECK-NEXT: %2 = addrspacecast ptr addrspace(5) %vararg_buffer to ptr
+; CHECK-NEXT: call void %0(ptr %2)
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ %0 = load volatile ptr, ptr addrspacecast (ptr addrspace(1) @vararg_ptr to ptr), align 8
+ tail call void (...) %0(i32 noundef %x)
+ ret void
+}
+
+define hidden void @fptr_libcS(i8 %x.coerce0, i16 %x.coerce1, i32 %x.coerce2, i64 %x.coerce3, float %x.coerce4, double %x.coerce5) {
+; CHECK-LABEL: define {{[^@]+}}@fptr_libcS(i8 %x.coerce0, i16 %x.coerce1, i32 %x.coerce2, i64 %x.coerce3, float %x.coerce4, double %x.coerce5) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %fptr_libcS.vararg, align 4, addrspace(5)
+; CHECK-NEXT: %0 = load volatile ptr, ptr addrspacecast (ptr addrspace(1) @vararg_ptr to ptr), align 8
+; CHECK-NEXT: %.fca.0.insert = insertvalue %struct.libcS poison, i8 %x.coerce0, 0
+; CHECK-NEXT: %.fca.1.insert = insertvalue %struct.libcS %.fca.0.insert, i16 %x.coerce1, 1
+; CHECK-NEXT: %.fca.2.insert = insertvalue %struct.libcS %.fca.1.insert, i32 %x.coerce2, 2
+; CHECK-NEXT: %.fca.3.insert = insertvalue %struct.libcS %.fca.2.insert, i64 %x.coerce3, 3
+; CHECK-NEXT: %.fca.4.insert = insertvalue %struct.libcS %.fca.3.insert, float %x.coerce4, 4
+; CHECK-NEXT: %.fca.5.insert = insertvalue %struct.libcS %.fca.4.insert, double %x.coerce5, 5
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 32, ptr addrspace(5) %vararg_buffer)
+; CHECK-NEXT: %1 = getelementptr inbounds %fptr_libcS.vararg, ptr addrspace(5) %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store %struct.libcS %.fca.5.insert, ptr addrspace(5) %1, align 8
+; CHECK-NEXT: %2 = addrspacecast ptr addrspace(5) %vararg_buffer to ptr
+; CHECK-NEXT: call void %0(ptr %2)
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 32, ptr addrspace(5) %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ %0 = load volatile ptr, ptr addrspacecast (ptr addrspace(1) @vararg_ptr to ptr), align 8
+ %.fca.0.insert = insertvalue %struct.libcS poison, i8 %x.coerce0, 0
+ %.fca.1.insert = insertvalue %struct.libcS %.fca.0.insert, i16 %x.coerce1, 1
+ %.fca.2.insert = insertvalue %struct.libcS %.fca.1.insert, i32 %x.coerce2, 2
+ %.fca.3.insert = insertvalue %struct.libcS %.fca.2.insert, i64 %x.coerce3, 3
+ %.fca.4.insert = insertvalue %struct.libcS %.fca.3.insert, float %x.coerce4, 4
+ %.fca.5.insert = insertvalue %struct.libcS %.fca.4.insert, double %x.coerce5, 5
+ tail call void (...) %0(%struct.libcS %.fca.5.insert)
+ ret void
+}
+
+
diff --git a/llvm/test/CodeGen/AMDGPU/fmaximum3.ll b/llvm/test/CodeGen/AMDGPU/fmaximum3.ll
index 3ec36f0..9ce1ba3 100644
--- a/llvm/test/CodeGen/AMDGPU/fmaximum3.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmaximum3.ll
@@ -497,47 +497,19 @@ define <2 x float> @v_fmaximum3_v2f32(<2 x float> %a, <2 x float> %b, <2 x float
; GFX9-LABEL: v_fmaximum3_v2f32:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v1, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v3, v1, vcc
+; GFX9-NEXT: v_max_f32_e32 v6, v1, v3
; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v7, v6, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v6, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v3, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v6
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v6, v1, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v2, v0, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc
+; GFX9-NEXT: v_max_f32_e32 v3, v0, v2
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v2, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v4, v0
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v0, v4, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc
+; GFX9-NEXT: v_max_f32_e32 v2, v4, v0
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v4, v0
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v4, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v2, v4, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v5, v1
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v1, v5, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc
+; GFX9-NEXT: v_max_f32_e32 v2, v5, v1
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v5, v1
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v5, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v2, v5, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <2 x float> @llvm.maximum.v2f32(<2 x float> %a, <2 x float> %b)
%max1 = call <2 x float> @llvm.maximum.v2f32(<2 x float> %c, <2 x float> %max0)
@@ -559,47 +531,19 @@ define <2 x float> @v_fmaximum3_v2f32_commute(<2 x float> %a, <2 x float> %b, <2
; GFX9-LABEL: v_fmaximum3_v2f32_commute:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v1, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v3, v1, vcc
+; GFX9-NEXT: v_max_f32_e32 v6, v1, v3
; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v7, v6, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v6, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v3, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v6
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v6, v1, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v2, v0, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc
+; GFX9-NEXT: v_max_f32_e32 v3, v0, v2
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v2, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v4, v0, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc
+; GFX9-NEXT: v_max_f32_e32 v2, v0, v4
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v4, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v1, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v5, v1, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc
+; GFX9-NEXT: v_max_f32_e32 v2, v1, v5
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v5, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <2 x float> @llvm.maximum.v2f32(<2 x float> %a, <2 x float> %b)
%max1 = call <2 x float> @llvm.maximum.v2f32(<2 x float> %max0, <2 x float> %c)
@@ -621,47 +565,19 @@ define <2 x float> @v_fmaximum3_v2f32__fabs_all(<2 x float> %a, <2 x float> %b,
; GFX9-LABEL: v_fmaximum3_v2f32__fabs_all:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_gt_f32_e64 vcc, |v1|, |v3|
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v3, v1, vcc
+; GFX9-NEXT: v_max_f32_e64 v6, |v1|, |v3|
; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e64 s[4:5], |v1|, |v3|
-; GFX9-NEXT: v_cndmask_b32_e64 v6, v7, |v6|, s[4:5]
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], |v1|, 64
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v6, |v1|, s[4:5]
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], |v3|, 64
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, |v3|, s[4:5]
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v6
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v6, v1, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e64 vcc, |v0|, |v2|
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v2, v0, vcc
-; GFX9-NEXT: v_cmp_o_f32_e64 s[4:5], |v0|, |v2|
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v7, |v3|, s[4:5]
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], |v0|, 64
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v3, |v0|, s[4:5]
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], |v2|, 64
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, |v2|, s[4:5]
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e64 s[4:5], v0, |v4|
-; GFX9-NEXT: v_cndmask_b32_e64 v2, |v4|, v0, s[4:5]
+; GFX9-NEXT: v_cmp_o_f32_e64 vcc, |v1|, |v3|
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc
+; GFX9-NEXT: v_max_f32_e64 v3, |v0|, |v2|
+; GFX9-NEXT: v_cmp_o_f32_e64 vcc, |v0|, |v2|
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc
+; GFX9-NEXT: v_max_f32_e64 v2, v0, |v4|
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, |v4|
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], |v4|, 64
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, |v4|, s[4:5]
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2
-; GFX9-NEXT: v_cmp_gt_f32_e64 s[4:5], v1, |v5|
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v2, |v5|, v1, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc
+; GFX9-NEXT: v_max_f32_e64 v2, v1, |v5|
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v1, |v5|
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], |v5|, 64
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, |v5|, s[4:5]
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %a)
%b.fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %b)
@@ -686,47 +602,19 @@ define <2 x float> @v_fmaximum3_v2f32__fneg_all(<2 x float> %a, <2 x float> %b,
; GFX9-LABEL: v_fmaximum3_v2f32__fneg_all:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_gt_f32_e64 vcc, -v1, -v3
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v3, v1, vcc
+; GFX9-NEXT: v_max_f32_e64 v6, -v1, -v3
; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e64 s[4:5], -v1, -v3
-; GFX9-NEXT: v_cndmask_b32_e64 v6, v7, -v6, s[4:5]
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], -v1, 64
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v6, -v1, s[4:5]
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], -v3, 64
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, -v3, s[4:5]
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v6
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v6, v1, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e64 vcc, -v0, -v2
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v2, v0, vcc
-; GFX9-NEXT: v_cmp_o_f32_e64 s[4:5], -v0, -v2
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v7, -v3, s[4:5]
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], -v0, 64
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v3, -v0, s[4:5]
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], -v2, 64
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, -v2, s[4:5]
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e64 s[4:5], v0, -v4
-; GFX9-NEXT: v_cndmask_b32_e64 v2, -v4, v0, s[4:5]
+; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -v1, -v3
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc
+; GFX9-NEXT: v_max_f32_e64 v3, -v0, -v2
+; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -v0, -v2
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc
+; GFX9-NEXT: v_max_f32_e64 v2, v0, -v4
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, -v4
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], -v4, 64
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, -v4, s[4:5]
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2
-; GFX9-NEXT: v_cmp_gt_f32_e64 s[4:5], v1, -v5
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v2, -v5, v1, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc
+; GFX9-NEXT: v_max_f32_e64 v2, v1, -v5
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v1, -v5
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], -v5, 64
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, -v5, s[4:5]
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fneg = fneg <2 x float> %a
%b.fneg = fneg <2 x float> %b
@@ -751,35 +639,19 @@ define <2 x float> @v_fmaximum3_v2f32__inlineimm1(<2 x float> %a, <2 x float> %c
; GFX9-LABEL: v_fmaximum3_v2f32__inlineimm1:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, 2.0, v1
-; GFX9-NEXT: v_cndmask_b32_e32 v4, 2.0, v1, vcc
+; GFX9-NEXT: v_max_f32_e32 v4, 2.0, v1
; GFX9-NEXT: v_mov_b32_e32 v5, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, 2.0, v0
-; GFX9-NEXT: v_cndmask_b32_e32 v4, 2.0, v0, vcc
+; GFX9-NEXT: v_max_f32_e32 v4, 2.0, v0
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v2, v0, vcc
+; GFX9-NEXT: v_max_f32_e32 v4, v0, v2
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v5, v4, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v2, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v1, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v1, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
+; GFX9-NEXT: v_max_f32_e32 v2, v1, v3
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v3, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <2 x float> @llvm.maximum.v2f32(<2 x float> %a, <2 x float> <float 2.0, float 2.0>)
%max1 = call <2 x float> @llvm.maximum.v2f32(<2 x float> %max0, <2 x float> %c)
@@ -801,33 +673,17 @@ define <2 x float> @v_fmaximum3_v2f32__inlineimm2(<2 x float> %a, <2 x float> %b
; GFX9-LABEL: v_fmaximum3_v2f32__inlineimm2:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v1, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v3, v1, vcc
+; GFX9-NEXT: v_max_f32_e32 v4, v1, v3
; GFX9-NEXT: v_mov_b32_e32 v5, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v5, v4, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v3, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v2, v0, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
+; GFX9-NEXT: v_max_f32_e32 v3, v0, v2
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v2, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, 4.0, v0
-; GFX9-NEXT: v_cndmask_b32_e32 v2, 4.0, v0, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
+; GFX9-NEXT: v_max_f32_e32 v2, 4.0, v0
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v2, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, 4.0, v1
-; GFX9-NEXT: v_cndmask_b32_e32 v2, 4.0, v1, vcc
+; GFX9-NEXT: v_max_f32_e32 v2, 4.0, v1
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -852,67 +708,25 @@ define <3 x float> @v_fmaximum3_v3f32(<3 x float> %a, <3 x float> %b, <3 x float
; GFX9-LABEL: v_fmaximum3_v3f32:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v2, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v9, v5, v2, vcc
+; GFX9-NEXT: v_max_f32_e32 v9, v2, v5
; GFX9-NEXT: v_mov_b32_e32 v10, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v9, v10, v9, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v2, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v9, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v5, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v9
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v9, v2, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v1, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v5, v4, v1, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc
+; GFX9-NEXT: v_max_f32_e32 v5, v1, v4
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v5, v10, v5, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v4, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v3, v0, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc
+; GFX9-NEXT: v_max_f32_e32 v4, v0, v3
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v10, v4, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v3, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v6, v0
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v0, v6, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc
+; GFX9-NEXT: v_max_f32_e32 v3, v6, v0
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v6, v0
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v10, v3, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v6, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v3, v6, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v7, v1
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v1, v7, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc
+; GFX9-NEXT: v_max_f32_e32 v3, v7, v1
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v7, v1
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v10, v3, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v7, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v3, v7, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v8, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v2, v8, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc
+; GFX9-NEXT: v_max_f32_e32 v3, v8, v2
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v8, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v10, v3, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v8, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v3, v8, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v2, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <3 x float> @llvm.maximum.v3f32(<3 x float> %a, <3 x float> %b)
%max1 = call <3 x float> @llvm.maximum.v3f32(<3 x float> %c, <3 x float> %max0)
@@ -935,67 +749,25 @@ define <3 x float> @v_fmaximum3_v3f32_commute(<3 x float> %a, <3 x float> %b, <3
; GFX9-LABEL: v_fmaximum3_v3f32_commute:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v2, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v9, v5, v2, vcc
+; GFX9-NEXT: v_max_f32_e32 v9, v2, v5
; GFX9-NEXT: v_mov_b32_e32 v10, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v9, v10, v9, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v2, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v9, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v5, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v9
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v9, v2, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v1, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v5, v4, v1, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc
+; GFX9-NEXT: v_max_f32_e32 v5, v1, v4
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v5, v10, v5, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v4, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v3, v0, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc
+; GFX9-NEXT: v_max_f32_e32 v4, v0, v3
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v10, v4, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v3, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v0, v6
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v6, v0, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc
+; GFX9-NEXT: v_max_f32_e32 v3, v0, v6
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v6
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v10, v3, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v6, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v1, v7
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v7, v1, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc
+; GFX9-NEXT: v_max_f32_e32 v3, v1, v7
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v7
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v10, v3, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v7, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v2, v8
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v8, v2, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc
+; GFX9-NEXT: v_max_f32_e32 v3, v2, v8
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v8
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v10, v3, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v2, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v8, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <3 x float> @llvm.maximum.v3f32(<3 x float> %a, <3 x float> %b)
%max1 = call <3 x float> @llvm.maximum.v3f32(<3 x float> %max0, <3 x float> %c)
@@ -1018,67 +790,25 @@ define <3 x float> @v_fmaximum3_v3f32__fabs_all(<3 x float> %a, <3 x float> %b,
; GFX9-LABEL: v_fmaximum3_v3f32__fabs_all:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_gt_f32_e64 vcc, |v2|, |v5|
-; GFX9-NEXT: v_cndmask_b32_e32 v9, v5, v2, vcc
+; GFX9-NEXT: v_max_f32_e64 v9, |v2|, |v5|
; GFX9-NEXT: v_mov_b32_e32 v10, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e64 s[4:5], |v2|, |v5|
-; GFX9-NEXT: v_cndmask_b32_e64 v9, v10, |v9|, s[4:5]
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], |v2|, 64
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v9, |v2|, s[4:5]
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], |v5|, 64
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, |v5|, s[4:5]
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v9
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v9, v2, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e64 vcc, |v1|, |v4|
-; GFX9-NEXT: v_cndmask_b32_e32 v5, v4, v1, vcc
-; GFX9-NEXT: v_cmp_o_f32_e64 s[4:5], |v1|, |v4|
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v10, |v5|, s[4:5]
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], |v1|, 64
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v5, |v1|, s[4:5]
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], |v4|, 64
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, |v4|, s[4:5]
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e64 vcc, |v0|, |v3|
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v3, v0, vcc
-; GFX9-NEXT: v_cmp_o_f32_e64 s[4:5], |v0|, |v3|
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v10, |v4|, s[4:5]
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], |v0|, 64
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v4, |v0|, s[4:5]
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], |v3|, 64
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, |v3|, s[4:5]
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e64 s[4:5], v0, |v6|
-; GFX9-NEXT: v_cndmask_b32_e64 v3, |v6|, v0, s[4:5]
+; GFX9-NEXT: v_cmp_o_f32_e64 vcc, |v2|, |v5|
+; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc
+; GFX9-NEXT: v_max_f32_e64 v5, |v1|, |v4|
+; GFX9-NEXT: v_cmp_o_f32_e64 vcc, |v1|, |v4|
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc
+; GFX9-NEXT: v_max_f32_e64 v4, |v0|, |v3|
+; GFX9-NEXT: v_cmp_o_f32_e64 vcc, |v0|, |v3|
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc
+; GFX9-NEXT: v_max_f32_e64 v3, v0, |v6|
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, |v6|
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v10, v3, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], |v6|, 64
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, |v6|, s[4:5]
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX9-NEXT: v_cmp_gt_f32_e64 s[4:5], v1, |v7|
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v3, |v7|, v1, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc
+; GFX9-NEXT: v_max_f32_e64 v3, v1, |v7|
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v1, |v7|
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v10, v3, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], |v7|, 64
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, |v7|, s[4:5]
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX9-NEXT: v_cmp_gt_f32_e64 s[4:5], v2, |v8|
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v3, |v8|, v2, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc
+; GFX9-NEXT: v_max_f32_e64 v3, v2, |v8|
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v2, |v8|
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v10, v3, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v2, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], |v8|, 64
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, |v8|, s[4:5]
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fabs = call <3 x float> @llvm.fabs.v3f32(<3 x float> %a)
%b.fabs = call <3 x float> @llvm.fabs.v3f32(<3 x float> %b)
@@ -1104,67 +834,25 @@ define <3 x float> @v_fmaximum3_v3f32__fneg_all(<3 x float> %a, <3 x float> %b,
; GFX9-LABEL: v_fmaximum3_v3f32__fneg_all:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_gt_f32_e64 vcc, -v2, -v5
-; GFX9-NEXT: v_cndmask_b32_e32 v9, v5, v2, vcc
+; GFX9-NEXT: v_max_f32_e64 v9, -v2, -v5
; GFX9-NEXT: v_mov_b32_e32 v10, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e64 s[4:5], -v2, -v5
-; GFX9-NEXT: v_cndmask_b32_e64 v9, v10, -v9, s[4:5]
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], -v2, 64
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v9, -v2, s[4:5]
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], -v5, 64
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, -v5, s[4:5]
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v9
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v9, v2, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e64 vcc, -v1, -v4
-; GFX9-NEXT: v_cndmask_b32_e32 v5, v4, v1, vcc
-; GFX9-NEXT: v_cmp_o_f32_e64 s[4:5], -v1, -v4
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v10, -v5, s[4:5]
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], -v1, 64
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v5, -v1, s[4:5]
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], -v4, 64
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, -v4, s[4:5]
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e64 vcc, -v0, -v3
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v3, v0, vcc
-; GFX9-NEXT: v_cmp_o_f32_e64 s[4:5], -v0, -v3
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v10, -v4, s[4:5]
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], -v0, 64
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v4, -v0, s[4:5]
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], -v3, 64
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, -v3, s[4:5]
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e64 s[4:5], v0, -v6
-; GFX9-NEXT: v_cndmask_b32_e64 v3, -v6, v0, s[4:5]
+; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -v2, -v5
+; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc
+; GFX9-NEXT: v_max_f32_e64 v5, -v1, -v4
+; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -v1, -v4
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc
+; GFX9-NEXT: v_max_f32_e64 v4, -v0, -v3
+; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -v0, -v3
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc
+; GFX9-NEXT: v_max_f32_e64 v3, v0, -v6
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, -v6
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v10, v3, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], -v6, 64
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, -v6, s[4:5]
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX9-NEXT: v_cmp_gt_f32_e64 s[4:5], v1, -v7
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v3, -v7, v1, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc
+; GFX9-NEXT: v_max_f32_e64 v3, v1, -v7
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v1, -v7
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v10, v3, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], -v7, 64
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, -v7, s[4:5]
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX9-NEXT: v_cmp_gt_f32_e64 s[4:5], v2, -v8
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v3, -v8, v2, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc
+; GFX9-NEXT: v_max_f32_e64 v3, v2, -v8
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v2, -v8
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v10, v3, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v2, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], -v8, 64
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, -v8, s[4:5]
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fneg = fneg <3 x float> %a
%b.fneg = fneg <3 x float> %b
@@ -1190,49 +878,25 @@ define <3 x float> @v_fmaximum3_v3f32__inlineimm1(<3 x float> %a, <3 x float> %c
; GFX9-LABEL: v_fmaximum3_v3f32__inlineimm1:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, 2.0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v6, 2.0, v2, vcc
+; GFX9-NEXT: v_max_f32_e32 v6, 2.0, v2
; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v2
; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v6, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, 2.0, v1
-; GFX9-NEXT: v_cndmask_b32_e32 v6, 2.0, v1, vcc
+; GFX9-NEXT: v_max_f32_e32 v6, 2.0, v1
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, 2.0, v0
-; GFX9-NEXT: v_cndmask_b32_e32 v6, 2.0, v0, vcc
+; GFX9-NEXT: v_max_f32_e32 v6, 2.0, v0
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v3, v0, vcc
+; GFX9-NEXT: v_max_f32_e32 v6, v0, v3
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v7, v6, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v3, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v6
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v1, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v4, v1, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
+; GFX9-NEXT: v_max_f32_e32 v3, v1, v4
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v4, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v2, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v5, v2, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
+; GFX9-NEXT: v_max_f32_e32 v3, v2, v5
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v2, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v5, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <3 x float> @llvm.maximum.v3f32(<3 x float> %a, <3 x float> <float 2.0, float 2.0, float 2.0>)
%max1 = call <3 x float> @llvm.maximum.v3f32(<3 x float> %max0, <3 x float> %c)
@@ -1255,47 +919,23 @@ define <3 x float> @v_fmaximum3_v3f32__inlineimm2(<3 x float> %a, <3 x float> %b
; GFX9-LABEL: v_fmaximum3_v3f32__inlineimm2:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v2, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v5, v2, vcc
+; GFX9-NEXT: v_max_f32_e32 v6, v2, v5
; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v7, v6, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v2, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v5, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v6
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v1, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v5, v4, v1, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v6, vcc
+; GFX9-NEXT: v_max_f32_e32 v5, v1, v4
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v5, v7, v5, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v4, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v3, v0, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v5, vcc
+; GFX9-NEXT: v_max_f32_e32 v4, v0, v3
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v7, v4, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v3, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, 4.0, v0
-; GFX9-NEXT: v_cndmask_b32_e32 v3, 4.0, v0, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v4, vcc
+; GFX9-NEXT: v_max_f32_e32 v3, 4.0, v0
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, 4.0, v1
-; GFX9-NEXT: v_cndmask_b32_e32 v3, 4.0, v1, vcc
+; GFX9-NEXT: v_max_f32_e32 v3, 4.0, v1
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, 4.0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v3, 4.0, v2, vcc
+; GFX9-NEXT: v_max_f32_e32 v3, 4.0, v2
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v2
; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
diff --git a/llvm/test/CodeGen/AMDGPU/fminimum3.ll b/llvm/test/CodeGen/AMDGPU/fminimum3.ll
index 0e0b73b..21074d5 100644
--- a/llvm/test/CodeGen/AMDGPU/fminimum3.ll
+++ b/llvm/test/CodeGen/AMDGPU/fminimum3.ll
@@ -497,47 +497,19 @@ define <2 x float> @v_fminimum3_v2f32(<2 x float> %a, <2 x float> %b, <2 x float
; GFX9-LABEL: v_fminimum3_v2f32:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v1, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v3, v1, vcc
+; GFX9-NEXT: v_min_f32_e32 v6, v1, v3
; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v7, v6, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v6, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v3, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v6
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v6, v1, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v2, v0, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc
+; GFX9-NEXT: v_min_f32_e32 v3, v0, v2
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v2, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v4, v0
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v0, v4, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc
+; GFX9-NEXT: v_min_f32_e32 v2, v4, v0
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v4, v0
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v4, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v2, v4, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v5, v1
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v1, v5, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc
+; GFX9-NEXT: v_min_f32_e32 v2, v5, v1
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v5, v1
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v5, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v2, v5, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <2 x float> @llvm.minimum.v2f32(<2 x float> %a, <2 x float> %b)
%max1 = call <2 x float> @llvm.minimum.v2f32(<2 x float> %c, <2 x float> %max0)
@@ -559,47 +531,19 @@ define <2 x float> @v_fminimum3_v2f32_commute(<2 x float> %a, <2 x float> %b, <2
; GFX9-LABEL: v_fminimum3_v2f32_commute:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v1, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v3, v1, vcc
+; GFX9-NEXT: v_min_f32_e32 v6, v1, v3
; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v7, v6, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v6, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v3, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v6
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v6, v1, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v2, v0, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc
+; GFX9-NEXT: v_min_f32_e32 v3, v0, v2
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v2, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v4, v0, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc
+; GFX9-NEXT: v_min_f32_e32 v2, v0, v4
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v4, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v1, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v5, v1, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc
+; GFX9-NEXT: v_min_f32_e32 v2, v1, v5
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v5, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <2 x float> @llvm.minimum.v2f32(<2 x float> %a, <2 x float> %b)
%max1 = call <2 x float> @llvm.minimum.v2f32(<2 x float> %max0, <2 x float> %c)
@@ -621,47 +565,19 @@ define <2 x float> @v_fminimum3_v2f32__fabs_all(<2 x float> %a, <2 x float> %b,
; GFX9-LABEL: v_fminimum3_v2f32__fabs_all:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_lt_f32_e64 vcc, |v1|, |v3|
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v3, v1, vcc
+; GFX9-NEXT: v_min_f32_e64 v6, |v1|, |v3|
; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e64 s[4:5], |v1|, |v3|
-; GFX9-NEXT: v_cndmask_b32_e64 v6, v7, |v6|, s[4:5]
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], |v1|, 32
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v6, |v1|, s[4:5]
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], |v3|, 32
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, |v3|, s[4:5]
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v6
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v6, v1, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, |v2|
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v2, v0, vcc
-; GFX9-NEXT: v_cmp_o_f32_e64 s[4:5], |v0|, |v2|
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v7, |v3|, s[4:5]
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], |v0|, 32
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v3, |v0|, s[4:5]
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], |v2|, 32
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, |v2|, s[4:5]
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e64 s[4:5], v0, |v4|
-; GFX9-NEXT: v_cndmask_b32_e64 v2, |v4|, v0, s[4:5]
+; GFX9-NEXT: v_cmp_o_f32_e64 vcc, |v1|, |v3|
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc
+; GFX9-NEXT: v_min_f32_e64 v3, |v0|, |v2|
+; GFX9-NEXT: v_cmp_o_f32_e64 vcc, |v0|, |v2|
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc
+; GFX9-NEXT: v_min_f32_e64 v2, v0, |v4|
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, |v4|
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], |v4|, 32
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, |v4|, s[4:5]
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2
-; GFX9-NEXT: v_cmp_lt_f32_e64 s[4:5], v1, |v5|
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v2, |v5|, v1, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc
+; GFX9-NEXT: v_min_f32_e64 v2, v1, |v5|
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v1, |v5|
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], |v5|, 32
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, |v5|, s[4:5]
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %a)
%b.fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %b)
@@ -686,47 +602,19 @@ define <2 x float> @v_fminimum3_v2f32__fneg_all(<2 x float> %a, <2 x float> %b,
; GFX9-LABEL: v_fminimum3_v2f32__fneg_all:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_lt_f32_e64 vcc, -v1, -v3
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v3, v1, vcc
+; GFX9-NEXT: v_min_f32_e64 v6, -v1, -v3
; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e64 s[4:5], -v1, -v3
-; GFX9-NEXT: v_cndmask_b32_e64 v6, v7, -v6, s[4:5]
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], -v1, 32
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v6, -v1, s[4:5]
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], -v3, 32
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, -v3, s[4:5]
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v6
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v6, v1, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e64 vcc, -v0, -v2
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v2, v0, vcc
-; GFX9-NEXT: v_cmp_o_f32_e64 s[4:5], -v0, -v2
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v7, -v3, s[4:5]
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], -v0, 32
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v3, -v0, s[4:5]
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], -v2, 32
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, -v2, s[4:5]
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e64 s[4:5], v0, -v4
-; GFX9-NEXT: v_cndmask_b32_e64 v2, -v4, v0, s[4:5]
+; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -v1, -v3
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc
+; GFX9-NEXT: v_min_f32_e64 v3, -v0, -v2
+; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -v0, -v2
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc
+; GFX9-NEXT: v_min_f32_e64 v2, v0, -v4
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, -v4
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], -v4, 32
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, -v4, s[4:5]
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2
-; GFX9-NEXT: v_cmp_lt_f32_e64 s[4:5], v1, -v5
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v2, -v5, v1, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc
+; GFX9-NEXT: v_min_f32_e64 v2, v1, -v5
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v1, -v5
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], -v5, 32
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, -v5, s[4:5]
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fneg = fneg <2 x float> %a
%b.fneg = fneg <2 x float> %b
@@ -751,35 +639,19 @@ define <2 x float> @v_fminimum3_v2f32__inlineimm1(<2 x float> %a, <2 x float> %c
; GFX9-LABEL: v_fminimum3_v2f32__inlineimm1:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, 2.0, v1
-; GFX9-NEXT: v_cndmask_b32_e32 v4, 2.0, v1, vcc
+; GFX9-NEXT: v_min_f32_e32 v4, 2.0, v1
; GFX9-NEXT: v_mov_b32_e32 v5, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, 2.0, v0
-; GFX9-NEXT: v_cndmask_b32_e32 v4, 2.0, v0, vcc
+; GFX9-NEXT: v_min_f32_e32 v4, 2.0, v0
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v2, v0, vcc
+; GFX9-NEXT: v_min_f32_e32 v4, v0, v2
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v5, v4, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v2, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v1, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v1, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
+; GFX9-NEXT: v_min_f32_e32 v2, v1, v3
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v3, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <2 x float> @llvm.minimum.v2f32(<2 x float> %a, <2 x float> <float 2.0, float 2.0>)
%max1 = call <2 x float> @llvm.minimum.v2f32(<2 x float> %max0, <2 x float> %c)
@@ -801,33 +673,17 @@ define <2 x float> @v_fminimum3_v2f32__inlineimm2(<2 x float> %a, <2 x float> %b
; GFX9-LABEL: v_fminimum3_v2f32__inlineimm2:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v1, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v3, v1, vcc
+; GFX9-NEXT: v_min_f32_e32 v4, v1, v3
; GFX9-NEXT: v_mov_b32_e32 v5, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v5, v4, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v3, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v2, v0, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
+; GFX9-NEXT: v_min_f32_e32 v3, v0, v2
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v2, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, 4.0, v0
-; GFX9-NEXT: v_cndmask_b32_e32 v2, 4.0, v0, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
+; GFX9-NEXT: v_min_f32_e32 v2, 4.0, v0
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v2, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, 4.0, v1
-; GFX9-NEXT: v_cndmask_b32_e32 v2, 4.0, v1, vcc
+; GFX9-NEXT: v_min_f32_e32 v2, 4.0, v1
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -852,67 +708,25 @@ define <3 x float> @v_fminimum3_v3f32(<3 x float> %a, <3 x float> %b, <3 x float
; GFX9-LABEL: v_fminimum3_v3f32:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v2, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v9, v5, v2, vcc
+; GFX9-NEXT: v_min_f32_e32 v9, v2, v5
; GFX9-NEXT: v_mov_b32_e32 v10, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v9, v10, v9, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v2, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v9, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v5, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v9
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v9, v2, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v1, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v5, v4, v1, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc
+; GFX9-NEXT: v_min_f32_e32 v5, v1, v4
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v5, v10, v5, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v4, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v3, v0, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc
+; GFX9-NEXT: v_min_f32_e32 v4, v0, v3
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v10, v4, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v3, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v6, v0
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v0, v6, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc
+; GFX9-NEXT: v_min_f32_e32 v3, v6, v0
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v6, v0
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v10, v3, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v6, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v3, v6, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v7, v1
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v1, v7, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc
+; GFX9-NEXT: v_min_f32_e32 v3, v7, v1
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v7, v1
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v10, v3, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v7, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v3, v7, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v8, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v2, v8, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc
+; GFX9-NEXT: v_min_f32_e32 v3, v8, v2
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v8, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v10, v3, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v8, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v3, v8, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v2, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <3 x float> @llvm.minimum.v3f32(<3 x float> %a, <3 x float> %b)
%max1 = call <3 x float> @llvm.minimum.v3f32(<3 x float> %c, <3 x float> %max0)
@@ -935,67 +749,25 @@ define <3 x float> @v_fminimum3_v3f32_commute(<3 x float> %a, <3 x float> %b, <3
; GFX9-LABEL: v_fminimum3_v3f32_commute:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v2, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v9, v5, v2, vcc
+; GFX9-NEXT: v_min_f32_e32 v9, v2, v5
; GFX9-NEXT: v_mov_b32_e32 v10, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v9, v10, v9, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v2, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v9, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v5, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v9
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v9, v2, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v1, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v5, v4, v1, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc
+; GFX9-NEXT: v_min_f32_e32 v5, v1, v4
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v5, v10, v5, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v4, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v3, v0, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc
+; GFX9-NEXT: v_min_f32_e32 v4, v0, v3
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v10, v4, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v3, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v0, v6
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v6, v0, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc
+; GFX9-NEXT: v_min_f32_e32 v3, v0, v6
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v6
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v10, v3, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v6, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v1, v7
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v7, v1, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc
+; GFX9-NEXT: v_min_f32_e32 v3, v1, v7
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v7
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v10, v3, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v7, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v2, v8
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v8, v2, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc
+; GFX9-NEXT: v_min_f32_e32 v3, v2, v8
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v8
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v10, v3, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v2, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v8, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <3 x float> @llvm.minimum.v3f32(<3 x float> %a, <3 x float> %b)
%max1 = call <3 x float> @llvm.minimum.v3f32(<3 x float> %max0, <3 x float> %c)
@@ -1018,67 +790,25 @@ define <3 x float> @v_fminimum3_v3f32__fabs_all(<3 x float> %a, <3 x float> %b,
; GFX9-LABEL: v_fminimum3_v3f32__fabs_all:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_lt_f32_e64 vcc, |v2|, |v5|
-; GFX9-NEXT: v_cndmask_b32_e32 v9, v5, v2, vcc
+; GFX9-NEXT: v_min_f32_e64 v9, |v2|, |v5|
; GFX9-NEXT: v_mov_b32_e32 v10, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e64 s[4:5], |v2|, |v5|
-; GFX9-NEXT: v_cndmask_b32_e64 v9, v10, |v9|, s[4:5]
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], |v2|, 32
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v9, |v2|, s[4:5]
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], |v5|, 32
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, |v5|, s[4:5]
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v9
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v9, v2, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e64 vcc, |v1|, |v4|
-; GFX9-NEXT: v_cndmask_b32_e32 v5, v4, v1, vcc
-; GFX9-NEXT: v_cmp_o_f32_e64 s[4:5], |v1|, |v4|
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v10, |v5|, s[4:5]
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], |v1|, 32
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v5, |v1|, s[4:5]
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], |v4|, 32
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, |v4|, s[4:5]
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, |v3|
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v3, v0, vcc
-; GFX9-NEXT: v_cmp_o_f32_e64 s[4:5], |v0|, |v3|
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v10, |v4|, s[4:5]
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], |v0|, 32
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v4, |v0|, s[4:5]
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], |v3|, 32
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, |v3|, s[4:5]
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e64 s[4:5], v0, |v6|
-; GFX9-NEXT: v_cndmask_b32_e64 v3, |v6|, v0, s[4:5]
+; GFX9-NEXT: v_cmp_o_f32_e64 vcc, |v2|, |v5|
+; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc
+; GFX9-NEXT: v_min_f32_e64 v5, |v1|, |v4|
+; GFX9-NEXT: v_cmp_o_f32_e64 vcc, |v1|, |v4|
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc
+; GFX9-NEXT: v_min_f32_e64 v4, |v0|, |v3|
+; GFX9-NEXT: v_cmp_o_f32_e64 vcc, |v0|, |v3|
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc
+; GFX9-NEXT: v_min_f32_e64 v3, v0, |v6|
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, |v6|
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v10, v3, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], |v6|, 32
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, |v6|, s[4:5]
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX9-NEXT: v_cmp_lt_f32_e64 s[4:5], v1, |v7|
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v3, |v7|, v1, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc
+; GFX9-NEXT: v_min_f32_e64 v3, v1, |v7|
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v1, |v7|
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v10, v3, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], |v7|, 32
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, |v7|, s[4:5]
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX9-NEXT: v_cmp_lt_f32_e64 s[4:5], v2, |v8|
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v3, |v8|, v2, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc
+; GFX9-NEXT: v_min_f32_e64 v3, v2, |v8|
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v2, |v8|
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v10, v3, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v2, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], |v8|, 32
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, |v8|, s[4:5]
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fabs = call <3 x float> @llvm.fabs.v3f32(<3 x float> %a)
%b.fabs = call <3 x float> @llvm.fabs.v3f32(<3 x float> %b)
@@ -1104,67 +834,25 @@ define <3 x float> @v_fminimum3_v3f32__fneg_all(<3 x float> %a, <3 x float> %b,
; GFX9-LABEL: v_fminimum3_v3f32__fneg_all:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_lt_f32_e64 vcc, -v2, -v5
-; GFX9-NEXT: v_cndmask_b32_e32 v9, v5, v2, vcc
+; GFX9-NEXT: v_min_f32_e64 v9, -v2, -v5
; GFX9-NEXT: v_mov_b32_e32 v10, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e64 s[4:5], -v2, -v5
-; GFX9-NEXT: v_cndmask_b32_e64 v9, v10, -v9, s[4:5]
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], -v2, 32
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v9, -v2, s[4:5]
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], -v5, 32
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, -v5, s[4:5]
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v9
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v9, v2, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e64 vcc, -v1, -v4
-; GFX9-NEXT: v_cndmask_b32_e32 v5, v4, v1, vcc
-; GFX9-NEXT: v_cmp_o_f32_e64 s[4:5], -v1, -v4
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v10, -v5, s[4:5]
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], -v1, 32
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v5, -v1, s[4:5]
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], -v4, 32
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, -v4, s[4:5]
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e64 vcc, -v0, -v3
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v3, v0, vcc
-; GFX9-NEXT: v_cmp_o_f32_e64 s[4:5], -v0, -v3
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v10, -v4, s[4:5]
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], -v0, 32
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v4, -v0, s[4:5]
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], -v3, 32
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, -v3, s[4:5]
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e64 s[4:5], v0, -v6
-; GFX9-NEXT: v_cndmask_b32_e64 v3, -v6, v0, s[4:5]
+; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -v2, -v5
+; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc
+; GFX9-NEXT: v_min_f32_e64 v5, -v1, -v4
+; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -v1, -v4
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc
+; GFX9-NEXT: v_min_f32_e64 v4, -v0, -v3
+; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -v0, -v3
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc
+; GFX9-NEXT: v_min_f32_e64 v3, v0, -v6
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, -v6
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v10, v3, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], -v6, 32
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, -v6, s[4:5]
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX9-NEXT: v_cmp_lt_f32_e64 s[4:5], v1, -v7
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v3, -v7, v1, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc
+; GFX9-NEXT: v_min_f32_e64 v3, v1, -v7
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v1, -v7
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v10, v3, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], -v7, 32
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, -v7, s[4:5]
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX9-NEXT: v_cmp_lt_f32_e64 s[4:5], v2, -v8
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v3, -v8, v2, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc
+; GFX9-NEXT: v_min_f32_e64 v3, v2, -v8
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v2, -v8
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v10, v3, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v2, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], -v8, 32
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, -v8, s[4:5]
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fneg = fneg <3 x float> %a
%b.fneg = fneg <3 x float> %b
@@ -1190,49 +878,25 @@ define <3 x float> @v_fminimum3_v3f32__inlineimm1(<3 x float> %a, <3 x float> %c
; GFX9-LABEL: v_fminimum3_v3f32__inlineimm1:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, 2.0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v6, 2.0, v2, vcc
+; GFX9-NEXT: v_min_f32_e32 v6, 2.0, v2
; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v2
; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v6, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, 2.0, v1
-; GFX9-NEXT: v_cndmask_b32_e32 v6, 2.0, v1, vcc
+; GFX9-NEXT: v_min_f32_e32 v6, 2.0, v1
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, 2.0, v0
-; GFX9-NEXT: v_cndmask_b32_e32 v6, 2.0, v0, vcc
+; GFX9-NEXT: v_min_f32_e32 v6, 2.0, v0
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v3, v0, vcc
+; GFX9-NEXT: v_min_f32_e32 v6, v0, v3
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v7, v6, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v3, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v6
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v1, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v4, v1, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
+; GFX9-NEXT: v_min_f32_e32 v3, v1, v4
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v4, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v2, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v5, v2, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
+; GFX9-NEXT: v_min_f32_e32 v3, v2, v5
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v2, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v5, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <3 x float> @llvm.minimum.v3f32(<3 x float> %a, <3 x float> <float 2.0, float 2.0, float 2.0>)
%max1 = call <3 x float> @llvm.minimum.v3f32(<3 x float> %max0, <3 x float> %c)
@@ -1255,47 +919,23 @@ define <3 x float> @v_fminimum3_v3f32__inlineimm2(<3 x float> %a, <3 x float> %b
; GFX9-LABEL: v_fminimum3_v3f32__inlineimm2:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v2, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v5, v2, vcc
+; GFX9-NEXT: v_min_f32_e32 v6, v2, v5
; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v7, v6, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v2, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v5, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v6
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v1, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v5, v4, v1, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v6, vcc
+; GFX9-NEXT: v_min_f32_e32 v5, v1, v4
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v5, v7, v5, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v4, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v3, v0, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v5, vcc
+; GFX9-NEXT: v_min_f32_e32 v4, v0, v3
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v7, v4, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v3, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, 4.0, v0
-; GFX9-NEXT: v_cndmask_b32_e32 v3, 4.0, v0, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v4, vcc
+; GFX9-NEXT: v_min_f32_e32 v3, 4.0, v0
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, 4.0, v1
-; GFX9-NEXT: v_cndmask_b32_e32 v3, 4.0, v1, vcc
+; GFX9-NEXT: v_min_f32_e32 v3, 4.0, v1
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, 4.0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v3, 4.0, v2, vcc
+; GFX9-NEXT: v_min_f32_e32 v3, 4.0, v2
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v2
; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
index 0db88d1..08cf83f 100644
--- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
+++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
@@ -31,6 +31,7 @@
; GCN-O0-NEXT: AMDGPU Remove Incompatible Functions
; GCN-O0-NEXT: AMDGPU Printf lowering
; GCN-O0-NEXT: Lower ctors and dtors for AMDGPU
+; GCN-O0-NEXT: Expand variadic functions
; GCN-O0-NEXT: AMDGPU Inline All Functions
; GCN-O0-NEXT: Inliner for always_inline functions
; GCN-O0-NEXT: FunctionPass Manager
@@ -178,6 +179,7 @@
; GCN-O1-NEXT: AMDGPU Remove Incompatible Functions
; GCN-O1-NEXT: AMDGPU Printf lowering
; GCN-O1-NEXT: Lower ctors and dtors for AMDGPU
+; GCN-O1-NEXT: Expand variadic functions
; GCN-O1-NEXT: AMDGPU Inline All Functions
; GCN-O1-NEXT: Inliner for always_inline functions
; GCN-O1-NEXT: FunctionPass Manager
@@ -454,6 +456,7 @@
; GCN-O1-OPTS-NEXT: AMDGPU Remove Incompatible Functions
; GCN-O1-OPTS-NEXT: AMDGPU Printf lowering
; GCN-O1-OPTS-NEXT: Lower ctors and dtors for AMDGPU
+; GCN-O1-OPTS-NEXT: Expand variadic functions
; GCN-O1-OPTS-NEXT: AMDGPU Inline All Functions
; GCN-O1-OPTS-NEXT: Inliner for always_inline functions
; GCN-O1-OPTS-NEXT: FunctionPass Manager
@@ -760,6 +763,7 @@
; GCN-O2-NEXT: Lower ctors and dtors for AMDGPU
; GCN-O2-NEXT: FunctionPass Manager
; GCN-O2-NEXT: AMDGPU Image Intrinsic Optimizer
+; GCN-O2-NEXT: Expand variadic functions
; GCN-O2-NEXT: AMDGPU Inline All Functions
; GCN-O2-NEXT: Inliner for always_inline functions
; GCN-O2-NEXT: FunctionPass Manager
@@ -1070,6 +1074,7 @@
; GCN-O3-NEXT: Lower ctors and dtors for AMDGPU
; GCN-O3-NEXT: FunctionPass Manager
; GCN-O3-NEXT: AMDGPU Image Intrinsic Optimizer
+; GCN-O3-NEXT: Expand variadic functions
; GCN-O3-NEXT: AMDGPU Inline All Functions
; GCN-O3-NEXT: Inliner for always_inline functions
; GCN-O3-NEXT: FunctionPass Manager
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.maximum.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.maximum.f16.ll
index 7d7a462..fa7ee9e 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.maximum.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.maximum.f16.ll
@@ -554,28 +554,14 @@ define <2 x half> @v_maximum_v2f16(<2 x half> %src0, <2 x half> %src1) {
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_lshrrev_b32_e32 v2, 16, v1
; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v0
-; GFX8-NEXT: v_cmp_gt_f16_e32 vcc, v3, v2
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v2, v3, vcc
+; GFX8-NEXT: v_max_f16_e32 v4, v3, v2
; GFX8-NEXT: v_mov_b32_e32 v5, 0x7e00
; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v3, v2
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v5, v4, vcc
-; GFX8-NEXT: v_cmp_class_f16_e64 vcc, v3, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
-; GFX8-NEXT: v_cmp_class_f16_e64 vcc, v2, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
-; GFX8-NEXT: v_cmp_eq_f16_e32 vcc, 0, v4
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
-; GFX8-NEXT: v_cmp_gt_f16_e32 vcc, v0, v1
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v1, v0, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v2, v5, v4, vcc
+; GFX8-NEXT: v_max_f16_e32 v3, v0, v1
; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc
-; GFX8-NEXT: v_cmp_class_f16_e64 vcc, v0, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX8-NEXT: v_cmp_class_f16_e64 vcc, v1, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
-; GFX8-NEXT: v_cmp_eq_f16_e32 vcc, 0, v3
; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v2
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
@@ -654,46 +640,24 @@ define <2 x half> @v_maximum_v2f16__nnan(<2 x half> %src0, <2 x half> %src1) {
; GFX7-LABEL: v_maximum_v2f16__nnan:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
-; GFX7-NEXT: v_mov_b32_e32 v5, 0x7fc00000
-; GFX7-NEXT: v_max_f32_e32 v4, v0, v2
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX7-NEXT: v_max_f32_e32 v2, v1, v3
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
+; GFX7-NEXT: v_max_f32_e32 v0, v0, v2
+; GFX7-NEXT: v_max_f32_e32 v1, v1, v3
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_maximum_v2f16__nnan:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_lshrrev_b32_e32 v2, 16, v1
-; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v0
-; GFX8-NEXT: v_cmp_gt_f16_e32 vcc, v3, v2
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v2, v3, vcc
-; GFX8-NEXT: v_cmp_class_f16_e64 vcc, v3, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
-; GFX8-NEXT: v_cmp_class_f16_e64 vcc, v2, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
-; GFX8-NEXT: v_cmp_eq_f16_e32 vcc, 0, v4
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
-; GFX8-NEXT: v_cmp_gt_f16_e32 vcc, v0, v1
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v1, v0, vcc
-; GFX8-NEXT: v_cmp_class_f16_e64 vcc, v0, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX8-NEXT: v_cmp_class_f16_e64 vcc, v1, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
-; GFX8-NEXT: v_cmp_eq_f16_e32 vcc, 0, v3
-; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v2
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_max_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_maximum_v2f16__nnan:
@@ -759,13 +723,11 @@ define <2 x half> @v_maximum_v2f16__nsz(<2 x half> %src0, <2 x half> %src1) {
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_lshrrev_b32_e32 v2, 16, v1
; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v0
-; GFX8-NEXT: v_cmp_gt_f16_e32 vcc, v3, v2
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v2, v3, vcc
+; GFX8-NEXT: v_max_f16_e32 v4, v3, v2
; GFX8-NEXT: v_mov_b32_e32 v5, 0x7e00
; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v3, v2
; GFX8-NEXT: v_cndmask_b32_e32 v2, v5, v4, vcc
-; GFX8-NEXT: v_cmp_gt_f16_e32 vcc, v0, v1
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v1, v0, vcc
+; GFX8-NEXT: v_max_f16_e32 v3, v0, v1
; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v2
; GFX8-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
@@ -847,34 +809,24 @@ define <2 x half> @v_maximum_v2f16__nnan_nsz(<2 x half> %src0, <2 x half> %src1)
; GFX7-LABEL: v_maximum_v2f16__nnan_nsz:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
-; GFX7-NEXT: v_mov_b32_e32 v5, 0x7fc00000
-; GFX7-NEXT: v_max_f32_e32 v4, v0, v2
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX7-NEXT: v_max_f32_e32 v2, v1, v3
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
+; GFX7-NEXT: v_max_f32_e32 v0, v0, v2
+; GFX7-NEXT: v_max_f32_e32 v1, v1, v3
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_maximum_v2f16__nnan_nsz:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_lshrrev_b32_e32 v2, 16, v1
-; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v0
-; GFX8-NEXT: v_cmp_gt_f16_e32 vcc, v3, v2
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
-; GFX8-NEXT: v_cmp_gt_f16_e32 vcc, v0, v1
-; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v2
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
-; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_max_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_maximum_v2f16__nnan_nsz:
@@ -948,31 +900,15 @@ define void @s_maximum_v2f16(<2 x half> inreg %src0, <2 x half> inreg %src1) {
; GFX8-NEXT: s_lshr_b32 s6, s5, 16
; GFX8-NEXT: s_lshr_b32 s7, s4, 16
; GFX8-NEXT: v_mov_b32_e32 v0, s6
-; GFX8-NEXT: v_mov_b32_e32 v1, s7
-; GFX8-NEXT: v_cmp_gt_f16_e32 vcc, s7, v0
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v0, v1, vcc
-; GFX8-NEXT: v_mov_b32_e32 v3, 0x7e00
+; GFX8-NEXT: v_max_f16_e32 v1, s7, v0
+; GFX8-NEXT: v_mov_b32_e32 v2, 0x7e00
; GFX8-NEXT: v_cmp_o_f16_e32 vcc, s7, v0
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
-; GFX8-NEXT: v_cmp_class_f16_e64 vcc, s7, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX8-NEXT: v_cmp_class_f16_e64 vcc, s6, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
-; GFX8-NEXT: v_cmp_eq_f16_e32 vcc, 0, v2
+; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; GFX8-NEXT: v_mov_b32_e32 v1, s5
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; GFX8-NEXT: v_mov_b32_e32 v2, s4
-; GFX8-NEXT: v_cmp_gt_f16_e32 vcc, s4, v1
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v1, v2, vcc
+; GFX8-NEXT: v_max_f16_e32 v3, s4, v1
; GFX8-NEXT: v_cmp_o_f16_e32 vcc, s4, v1
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc
-; GFX8-NEXT: v_cmp_class_f16_e64 vcc, s4, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
-; GFX8-NEXT: v_cmp_class_f16_e64 vcc, s5, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX8-NEXT: v_cmp_eq_f16_e32 vcc, 0, v3
; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc
; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: ;;#ASMSTART
; GFX8-NEXT: ; use v0
@@ -1216,28 +1152,21 @@ define <3 x half> @v_maximum_v3f16__nnan(<3 x half> %src0, <3 x half> %src1) {
; GFX7-LABEL: v_maximum_v3f16__nnan:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
+; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
-; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
+; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
-; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
-; GFX7-NEXT: v_max_f32_e32 v6, v0, v3
-; GFX7-NEXT: v_mov_b32_e32 v7, 0x7fc00000
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX7-NEXT: v_max_f32_e32 v3, v1, v4
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
-; GFX7-NEXT: v_max_f32_e32 v3, v2, v5
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
+; GFX7-NEXT: v_max_f32_e32 v0, v0, v3
+; GFX7-NEXT: v_max_f32_e32 v1, v1, v4
+; GFX7-NEXT: v_max_f32_e32 v2, v2, v5
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_maximum_v3f16__nnan:
@@ -1427,28 +1356,21 @@ define <3 x half> @v_maximum_v3f16__nnan_nsz(<3 x half> %src0, <3 x half> %src1)
; GFX7-LABEL: v_maximum_v3f16__nnan_nsz:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
+; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
-; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
+; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
-; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
-; GFX7-NEXT: v_max_f32_e32 v6, v0, v3
-; GFX7-NEXT: v_mov_b32_e32 v7, 0x7fc00000
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX7-NEXT: v_max_f32_e32 v3, v1, v4
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
-; GFX7-NEXT: v_max_f32_e32 v3, v2, v5
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
+; GFX7-NEXT: v_max_f32_e32 v0, v0, v3
+; GFX7-NEXT: v_max_f32_e32 v1, v1, v4
+; GFX7-NEXT: v_max_f32_e32 v2, v2, v5
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_maximum_v3f16__nnan_nsz:
@@ -1671,35 +1593,26 @@ define <4 x half> @v_maximum_v4f16__nnan(<4 x half> %src0, <4 x half> %src1) {
; GFX7-LABEL: v_maximum_v4f16__nnan:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7
+; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6
+; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6
-; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
-; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7
-; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7
+; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6
+; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
-; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6
; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
-; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7
; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
-; GFX7-NEXT: v_max_f32_e32 v8, v0, v4
-; GFX7-NEXT: v_mov_b32_e32 v9, 0x7fc00000
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc
-; GFX7-NEXT: v_max_f32_e32 v4, v1, v5
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc
-; GFX7-NEXT: v_max_f32_e32 v4, v2, v6
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v2, v6
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc
-; GFX7-NEXT: v_max_f32_e32 v4, v3, v7
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v3, v7
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
+; GFX7-NEXT: v_max_f32_e32 v0, v0, v4
+; GFX7-NEXT: v_max_f32_e32 v1, v1, v5
+; GFX7-NEXT: v_max_f32_e32 v2, v2, v6
+; GFX7-NEXT: v_max_f32_e32 v3, v3, v7
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_maximum_v4f16__nnan:
@@ -1924,35 +1837,26 @@ define <4 x half> @v_maximum_v4f16__nnan_nsz(<4 x half> %src0, <4 x half> %src1)
; GFX7-LABEL: v_maximum_v4f16__nnan_nsz:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7
+; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6
+; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6
-; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
-; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7
-; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7
+; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6
+; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
-; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6
; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
-; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7
; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
-; GFX7-NEXT: v_max_f32_e32 v8, v0, v4
-; GFX7-NEXT: v_mov_b32_e32 v9, 0x7fc00000
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc
-; GFX7-NEXT: v_max_f32_e32 v4, v1, v5
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc
-; GFX7-NEXT: v_max_f32_e32 v4, v2, v6
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v2, v6
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc
-; GFX7-NEXT: v_max_f32_e32 v4, v3, v7
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v3, v7
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
+; GFX7-NEXT: v_max_f32_e32 v0, v0, v4
+; GFX7-NEXT: v_max_f32_e32 v1, v1, v5
+; GFX7-NEXT: v_max_f32_e32 v2, v2, v6
+; GFX7-NEXT: v_max_f32_e32 v3, v3, v7
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_maximum_v4f16__nnan_nsz:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.maximum.f32.ll b/llvm/test/CodeGen/AMDGPU/llvm.maximum.f32.ll
index 7c5bc7d..f4aa40d 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.maximum.f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.maximum.f32.ll
@@ -495,167 +495,73 @@ define <2 x float> @v_maximum_v2f32(<2 x float> %src0, <2 x float> %src1) {
; GFX7-LABEL: v_maximum_v2f32:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_max_legacy_f32_e32 v4, v0, v2
+; GFX7-NEXT: v_max_f32_e32 v4, v0, v2
; GFX7-NEXT: v_mov_b32_e32 v5, 0x7fc00000
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
-; GFX7-NEXT: v_cndmask_b32_e32 v4, v5, v4, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v0, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v2, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX7-NEXT: v_max_legacy_f32_e32 v2, v1, v3
+; GFX7-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
+; GFX7-NEXT: v_max_f32_e32 v2, v1, v3
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v1, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v3, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX7-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_maximum_v2f32:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v2, v0, vcc
+; GFX8-NEXT: v_max_f32_e32 v4, v0, v2
; GFX8-NEXT: v_mov_b32_e32 v5, 0x7fc00000
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v5, v4, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v0, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v2, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v1, v3
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v3, v1, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
+; GFX8-NEXT: v_max_f32_e32 v2, v1, v3
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v1, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v3, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_maximum_v2f32:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v2, v0, vcc
+; GFX9-NEXT: v_max_f32_e32 v4, v0, v2
; GFX9-NEXT: v_mov_b32_e32 v5, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v5, v4, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v2, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v1, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v1, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
+; GFX9-NEXT: v_max_f32_e32 v2, v1, v3
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v3, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_maximum_v2f32:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
+; GFX940-NEXT: v_max_f32_e32 v4, v0, v2
; GFX940-NEXT: v_mov_b32_e32 v5, 0x7fc00000
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v2, v0, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v5, v4, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v0, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v2, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v1, v3
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v3, v1, vcc
+; GFX940-NEXT: v_max_f32_e32 v2, v1, v3
+; GFX940-NEXT: s_nop 0
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v1, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v3, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v2f32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v0, v2
-; GFX10-NEXT: v_cndmask_b32_e32 v4, v2, v0, vcc_lo
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v1, v3
-; GFX10-NEXT: v_cndmask_b32_e32 v5, v3, v1, vcc_lo
+; GFX10-NEXT: v_max_f32_e32 v4, v0, v2
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v2
-; GFX10-NEXT: v_cndmask_b32_e32 v4, 0x7fc00000, v4, vcc_lo
+; GFX10-NEXT: v_max_f32_e32 v5, v1, v3
+; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v4, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v3
-; GFX10-NEXT: v_cndmask_b32_e32 v5, 0x7fc00000, v5, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v0, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v1, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v2, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v3, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v4
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v5
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v5, vcc_lo
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_maximum_v2f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v0, v2
-; GFX11-NEXT: v_cndmask_b32_e32 v4, v2, v0, vcc_lo
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v1, v3
-; GFX11-NEXT: v_cndmask_b32_e32 v5, v3, v1, vcc_lo
+; GFX11-NEXT: v_dual_max_f32 v4, v0, v2 :: v_dual_max_f32 v5, v1, v3
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v2
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e32 v4, 0x7fc00000, v4, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v4, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v3
-; GFX11-NEXT: v_cndmask_b32_e32 v5, 0x7fc00000, v5, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v0, 64
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v1, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v2, 64
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v3, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v4
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v5
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v5, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_v2f32:
@@ -676,136 +582,42 @@ define <2 x float> @v_maximum_v2f32__nnan(<2 x float> %src0, <2 x float> %src1)
; GFX7-LABEL: v_maximum_v2f32__nnan:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_max_legacy_f32_e32 v4, v0, v2
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v0, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v2, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX7-NEXT: v_max_legacy_f32_e32 v2, v1, v3
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v1, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v3, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX7-NEXT: v_max_f32_e32 v0, v0, v2
+; GFX7-NEXT: v_max_f32_e32 v1, v1, v3
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_maximum_v2f32__nnan:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v2, v0, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v0, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v2, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v1, v3
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v3, v1, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v1, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v3, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX8-NEXT: v_max_f32_e32 v0, v0, v2
+; GFX8-NEXT: v_max_f32_e32 v1, v1, v3
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_maximum_v2f32__nnan:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v2, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v2, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v1, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v3, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX9-NEXT: v_max_f32_e32 v0, v0, v2
+; GFX9-NEXT: v_max_f32_e32 v1, v1, v3
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_maximum_v2f32__nnan:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v2, v0, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v0, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v2, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v1, v3
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v3, v1, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v1, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v3, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX940-NEXT: v_max_f32_e32 v0, v0, v2
+; GFX940-NEXT: v_max_f32_e32 v1, v1, v3
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v2f32__nnan:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v0, v2
-; GFX10-NEXT: v_cndmask_b32_e32 v4, v2, v0, vcc_lo
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v1, v3
-; GFX10-NEXT: v_cndmask_b32_e32 v5, v3, v1, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v0, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v1, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v2, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v3, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v4
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v5
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc_lo
+; GFX10-NEXT: v_max_f32_e32 v0, v0, v2
+; GFX10-NEXT: v_max_f32_e32 v1, v1, v3
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_maximum_v2f32__nnan:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v0, v2
-; GFX11-NEXT: v_cndmask_b32_e32 v4, v2, v0, vcc_lo
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v1, v3
-; GFX11-NEXT: v_cndmask_b32_e32 v5, v3, v1, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v0, 64
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v1, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v2, 64
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v3, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v4
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v5
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc_lo
+; GFX11-NEXT: v_dual_max_f32 v0, v0, v2 :: v_dual_max_f32 v1, v1, v3
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_v2f32__nnan:
@@ -826,11 +638,11 @@ define <2 x float> @v_maximum_v2f32__nsz(<2 x float> %src0, <2 x float> %src1) {
; GFX7-LABEL: v_maximum_v2f32__nsz:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_max_legacy_f32_e32 v4, v0, v2
+; GFX7-NEXT: v_max_f32_e32 v4, v0, v2
; GFX7-NEXT: v_mov_b32_e32 v5, 0x7fc00000
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
; GFX7-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX7-NEXT: v_max_legacy_f32_e32 v2, v1, v3
+; GFX7-NEXT: v_max_f32_e32 v2, v1, v3
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
; GFX7-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
; GFX7-NEXT: s_setpc_b64 s[30:31]
@@ -838,13 +650,11 @@ define <2 x float> @v_maximum_v2f32__nsz(<2 x float> %src0, <2 x float> %src1) {
; GFX8-LABEL: v_maximum_v2f32__nsz:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v2, v0, vcc
+; GFX8-NEXT: v_max_f32_e32 v4, v0, v2
; GFX8-NEXT: v_mov_b32_e32 v5, 0x7fc00000
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
; GFX8-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v1, v3
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v3, v1, vcc
+; GFX8-NEXT: v_max_f32_e32 v2, v1, v3
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
; GFX8-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
@@ -852,13 +662,11 @@ define <2 x float> @v_maximum_v2f32__nsz(<2 x float> %src0, <2 x float> %src1) {
; GFX9-LABEL: v_maximum_v2f32__nsz:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v2, v0, vcc
+; GFX9-NEXT: v_max_f32_e32 v4, v0, v2
; GFX9-NEXT: v_mov_b32_e32 v5, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v1, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v1, vcc
+; GFX9-NEXT: v_max_f32_e32 v2, v1, v3
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -866,16 +674,12 @@ define <2 x float> @v_maximum_v2f32__nsz(<2 x float> %src0, <2 x float> %src1) {
; GFX940-LABEL: v_maximum_v2f32__nsz:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
+; GFX940-NEXT: v_max_f32_e32 v4, v0, v2
; GFX940-NEXT: v_mov_b32_e32 v5, 0x7fc00000
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v2, v0, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
-; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_max_f32_e32 v2, v1, v3
+; GFX940-NEXT: s_nop 0
; GFX940-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v1, v3
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v3, v1, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
; GFX940-NEXT: s_nop 1
; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
@@ -884,11 +688,9 @@ define <2 x float> @v_maximum_v2f32__nsz(<2 x float> %src0, <2 x float> %src1) {
; GFX10-LABEL: v_maximum_v2f32__nsz:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v0, v2
-; GFX10-NEXT: v_cndmask_b32_e32 v4, v2, v0, vcc_lo
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v1, v3
-; GFX10-NEXT: v_cndmask_b32_e32 v5, v3, v1, vcc_lo
+; GFX10-NEXT: v_max_f32_e32 v4, v0, v2
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v2
+; GFX10-NEXT: v_max_f32_e32 v5, v1, v3
; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v4, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v3
; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v5, vcc_lo
@@ -897,12 +699,9 @@ define <2 x float> @v_maximum_v2f32__nsz(<2 x float> %src0, <2 x float> %src1) {
; GFX11-LABEL: v_maximum_v2f32__nsz:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v0, v2
-; GFX11-NEXT: v_cndmask_b32_e32 v4, v2, v0, vcc_lo
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v1, v3
-; GFX11-NEXT: v_cndmask_b32_e32 v5, v3, v1, vcc_lo
+; GFX11-NEXT: v_dual_max_f32 v4, v0, v2 :: v_dual_max_f32 v5, v1, v3
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v2
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_4)
; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v4, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v3
; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v5, vcc_lo
@@ -926,55 +725,42 @@ define <2 x float> @v_maximum_v2f32__nnan_nsz(<2 x float> %src0, <2 x float> %sr
; GFX7-LABEL: v_maximum_v2f32__nnan_nsz:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_max_legacy_f32_e32 v0, v0, v2
-; GFX7-NEXT: v_max_legacy_f32_e32 v1, v1, v3
+; GFX7-NEXT: v_max_f32_e32 v0, v0, v2
+; GFX7-NEXT: v_max_f32_e32 v1, v1, v3
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_maximum_v2f32__nnan_nsz:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v1, v3
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; GFX8-NEXT: v_max_f32_e32 v0, v0, v2
+; GFX8-NEXT: v_max_f32_e32 v1, v1, v3
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_maximum_v2f32__nnan_nsz:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v1, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; GFX9-NEXT: v_max_f32_e32 v0, v0, v2
+; GFX9-NEXT: v_max_f32_e32 v1, v1, v3
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_maximum_v2f32__nnan_nsz:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v1, v3
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; GFX940-NEXT: v_max_f32_e32 v0, v0, v2
+; GFX940-NEXT: v_max_f32_e32 v1, v1, v3
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v2f32__nnan_nsz:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v0, v2
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v1, v3
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
+; GFX10-NEXT: v_max_f32_e32 v0, v0, v2
+; GFX10-NEXT: v_max_f32_e32 v1, v1, v3
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_maximum_v2f32__nnan_nsz:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v0, v2
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v1, v3
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
+; GFX11-NEXT: v_dual_max_f32 v0, v0, v2 :: v_dual_max_f32 v1, v1, v3
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_v2f32__nnan_nsz:
@@ -996,28 +782,14 @@ define void @s_maximum_v2f32(<2 x float> inreg %src0, <2 x float> inreg %src1) {
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: v_mov_b32_e32 v0, s7
-; GFX7-NEXT: v_max_legacy_f32_e32 v1, s5, v0
+; GFX7-NEXT: v_max_f32_e32 v1, s5, v0
; GFX7-NEXT: v_mov_b32_e32 v2, 0x7fc00000
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, s5, v0
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX7-NEXT: v_mov_b32_e32 v3, s5
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, s5, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v1, v3, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, s7, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v1
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc
; GFX7-NEXT: v_mov_b32_e32 v0, s6
-; GFX7-NEXT: v_max_legacy_f32_e32 v3, s4, v0
+; GFX7-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX7-NEXT: v_max_f32_e32 v3, s4, v0
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, s4, v0
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
-; GFX7-NEXT: v_mov_b32_e32 v3, s4
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, s4, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v2, v3, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, s6, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; GFX7-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
; GFX7-NEXT: ;;#ASMSTART
; GFX7-NEXT: ; use v[0:1]
; GFX7-NEXT: ;;#ASMEND
@@ -1027,30 +799,14 @@ define void @s_maximum_v2f32(<2 x float> inreg %src0, <2 x float> inreg %src1) {
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v0, s7
-; GFX8-NEXT: v_mov_b32_e32 v1, s5
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, s5, v0
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v0, v1, vcc
-; GFX8-NEXT: v_mov_b32_e32 v3, 0x7fc00000
+; GFX8-NEXT: v_max_f32_e32 v1, s5, v0
+; GFX8-NEXT: v_mov_b32_e32 v2, 0x7fc00000
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, s5, v0
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, s5, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, s7, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v2, v0, vcc
; GFX8-NEXT: v_mov_b32_e32 v0, s6
-; GFX8-NEXT: v_mov_b32_e32 v2, s4
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v0, v2, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX8-NEXT: v_max_f32_e32 v3, s4, v0
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, s4, v0
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, s4, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, s6, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
; GFX8-NEXT: ;;#ASMSTART
; GFX8-NEXT: ; use v[0:1]
; GFX8-NEXT: ;;#ASMEND
@@ -1060,30 +816,14 @@ define void @s_maximum_v2f32(<2 x float> inreg %src0, <2 x float> inreg %src1) {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s7
-; GFX9-NEXT: v_mov_b32_e32 v1, s5
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, s5, v0
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v0, v1, vcc
-; GFX9-NEXT: v_mov_b32_e32 v3, 0x7fc00000
+; GFX9-NEXT: v_max_f32_e32 v1, s5, v0
+; GFX9-NEXT: v_mov_b32_e32 v2, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, s5, v0
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, s5, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, s7, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v0, vcc
; GFX9-NEXT: v_mov_b32_e32 v0, s6
-; GFX9-NEXT: v_mov_b32_e32 v2, s4
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v0, v2, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX9-NEXT: v_max_f32_e32 v3, s4, v0
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, s4, v0
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, s4, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, s6, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use v[0:1]
; GFX9-NEXT: ;;#ASMEND
@@ -1093,40 +833,15 @@ define void @s_maximum_v2f32(<2 x float> inreg %src0, <2 x float> inreg %src1) {
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX940-NEXT: v_mov_b32_e32 v0, s3
-; GFX940-NEXT: v_mov_b32_e32 v1, s1
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, s1, v0
-; GFX940-NEXT: v_mov_b32_e32 v3, 0x7fc00000
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v0, v1, vcc
+; GFX940-NEXT: v_max_f32_e32 v1, s1, v0
+; GFX940-NEXT: v_mov_b32_e32 v2, 0x7fc00000
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, s1, v0
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, s1, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, s3, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v2, v0, vcc
; GFX940-NEXT: v_mov_b32_e32 v0, s2
-; GFX940-NEXT: v_mov_b32_e32 v2, s0
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, s0, v0
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v0, v2, vcc
+; GFX940-NEXT: v_max_f32_e32 v3, s0, v0
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, s0, v0
; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, s0, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, s2, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
; GFX940-NEXT: ;;#ASMSTART
; GFX940-NEXT: ; use v[0:1]
; GFX940-NEXT: ;;#ASMEND
@@ -1135,28 +850,12 @@ define void @s_maximum_v2f32(<2 x float> inreg %src0, <2 x float> inreg %src1) {
; GFX10-LABEL: s_maximum_v2f32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_mov_b32_e32 v0, s5
-; GFX10-NEXT: v_cmp_gt_f32_e64 vcc_lo, s5, s7
-; GFX10-NEXT: v_mov_b32_e32 v1, s4
-; GFX10-NEXT: v_cmp_class_f32_e64 s8, s5, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v0, s7, v0, vcc_lo
-; GFX10-NEXT: v_cmp_gt_f32_e64 vcc_lo, s4, s6
-; GFX10-NEXT: v_cndmask_b32_e32 v1, s6, v1, vcc_lo
+; GFX10-NEXT: v_max_f32_e64 v0, s5, s7
; GFX10-NEXT: v_cmp_o_f32_e64 vcc_lo, s5, s7
-; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v0, vcc_lo
+; GFX10-NEXT: v_max_f32_e64 v2, s4, s6
+; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v0, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e64 vcc_lo, s4, s6
-; GFX10-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v1, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v1, v0, s5, s8
-; GFX10-NEXT: v_cmp_class_f32_e64 s5, s4, 64
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v0
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v2, s4, s5
-; GFX10-NEXT: v_cmp_class_f32_e64 s4, s7, 64
-; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s7, s4
-; GFX10-NEXT: v_cmp_class_f32_e64 s4, s6, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v0, v1, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, s6, s4
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v2
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; use v[0:1]
; GFX10-NEXT: ;;#ASMEND
@@ -1165,32 +864,13 @@ define void @s_maximum_v2f32(<2 x float> inreg %src0, <2 x float> inreg %src1) {
; GFX11-LABEL: s_maximum_v2f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s0
-; GFX11-NEXT: v_cmp_gt_f32_e64 vcc_lo, s1, s3
-; GFX11-NEXT: v_cmp_class_f32_e64 s4, s1, 64
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e32 v0, s3, v0, vcc_lo
-; GFX11-NEXT: v_cmp_gt_f32_e64 vcc_lo, s0, s2
-; GFX11-NEXT: v_cndmask_b32_e32 v1, s2, v1, vcc_lo
+; GFX11-NEXT: v_max_f32_e64 v0, s1, s3
; GFX11-NEXT: v_cmp_o_f32_e64 vcc_lo, s1, s3
-; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v0, vcc_lo
+; GFX11-NEXT: v_max_f32_e64 v2, s0, s2
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v0, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e64 vcc_lo, s0, s2
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v1, vcc_lo
-; GFX11-NEXT: v_cndmask_b32_e64 v1, v0, s1, s4
-; GFX11-NEXT: v_cmp_class_f32_e64 s1, s0, 64
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v2, s0, s1
-; GFX11-NEXT: v_cmp_class_f32_e64 s0, s3, 64
-; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s3, s0
-; GFX11-NEXT: v_cmp_class_f32_e64 s0, s2, 64
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v3, s2, s0
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v0, v1, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v2
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3)
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use v[0:1]
; GFX11-NEXT: ;;#ASMEND
@@ -1218,227 +898,92 @@ define <3 x float> @v_maximum_v3f32(<3 x float> %src0, <3 x float> %src1) {
; GFX7-LABEL: v_maximum_v3f32:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_max_legacy_f32_e32 v6, v0, v3
+; GFX7-NEXT: v_max_f32_e32 v6, v0, v3
; GFX7-NEXT: v_mov_b32_e32 v7, 0x7fc00000
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
-; GFX7-NEXT: v_cndmask_b32_e32 v6, v7, v6, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v0, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v3, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v6
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
-; GFX7-NEXT: v_max_legacy_f32_e32 v3, v1, v4
+; GFX7-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
+; GFX7-NEXT: v_max_f32_e32 v3, v1, v4
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v1, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v4, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX7-NEXT: v_max_legacy_f32_e32 v3, v2, v5
+; GFX7-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
+; GFX7-NEXT: v_max_f32_e32 v3, v2, v5
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v2, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v5, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
+; GFX7-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_maximum_v3f32:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v0, v3
-; GFX8-NEXT: v_cndmask_b32_e32 v6, v3, v0, vcc
+; GFX8-NEXT: v_max_f32_e32 v6, v0, v3
; GFX8-NEXT: v_mov_b32_e32 v7, 0x7fc00000
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
-; GFX8-NEXT: v_cndmask_b32_e32 v6, v7, v6, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v0, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v3, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v6
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v1, v4
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v4, v1, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
+; GFX8-NEXT: v_max_f32_e32 v3, v1, v4
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v1, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v4, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v2, v5
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v5, v2, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
+; GFX8-NEXT: v_max_f32_e32 v3, v2, v5
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v2, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v5, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_maximum_v3f32:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v3, v0, vcc
+; GFX9-NEXT: v_max_f32_e32 v6, v0, v3
; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v7, v6, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v3, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v6
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v1, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v4, v1, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
+; GFX9-NEXT: v_max_f32_e32 v3, v1, v4
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v4, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v2, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v5, v2, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
+; GFX9-NEXT: v_max_f32_e32 v3, v2, v5
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v2, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v5, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_maximum_v3f32:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v0, v3
+; GFX940-NEXT: v_max_f32_e32 v6, v0, v3
; GFX940-NEXT: v_mov_b32_e32 v7, 0x7fc00000
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v3, v0, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v7, v6, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v0, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v3, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v6
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v1, v4
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v4, v1, vcc
+; GFX940-NEXT: v_max_f32_e32 v3, v1, v4
+; GFX940-NEXT: s_nop 0
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v1, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v4, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v2, v5
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v5, v2, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
+; GFX940-NEXT: v_max_f32_e32 v3, v2, v5
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v2, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v5, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v3f32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v0, v3
-; GFX10-NEXT: v_cndmask_b32_e32 v6, v3, v0, vcc_lo
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v1, v4
-; GFX10-NEXT: v_cndmask_b32_e32 v7, v4, v1, vcc_lo
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v2, v5
-; GFX10-NEXT: v_cndmask_b32_e32 v8, v5, v2, vcc_lo
+; GFX10-NEXT: v_max_f32_e32 v6, v0, v3
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v3
-; GFX10-NEXT: v_cndmask_b32_e32 v6, 0x7fc00000, v6, vcc_lo
+; GFX10-NEXT: v_max_f32_e32 v7, v1, v4
+; GFX10-NEXT: v_max_f32_e32 v8, v2, v5
+; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v6, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v4
-; GFX10-NEXT: v_cndmask_b32_e32 v7, 0x7fc00000, v7, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v7, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v2, v5
-; GFX10-NEXT: v_cndmask_b32_e32 v8, 0x7fc00000, v8, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v0, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v1, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v2, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v3, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v4, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v5, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v6
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v7
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v8
-; GFX10-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v8, vcc_lo
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_maximum_v3f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v0, v3
-; GFX11-NEXT: v_cndmask_b32_e32 v6, v3, v0, vcc_lo
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v1, v4
-; GFX11-NEXT: v_cndmask_b32_e32 v7, v4, v1, vcc_lo
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v2, v5
-; GFX11-NEXT: v_cndmask_b32_e32 v8, v5, v2, vcc_lo
+; GFX11-NEXT: v_dual_max_f32 v6, v0, v3 :: v_dual_max_f32 v7, v1, v4
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v3
-; GFX11-NEXT: v_cndmask_b32_e32 v6, 0x7fc00000, v6, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v6, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v4
-; GFX11-NEXT: v_cndmask_b32_e32 v7, 0x7fc00000, v7, vcc_lo
+; GFX11-NEXT: v_dual_max_f32 v8, v2, v5 :: v_dual_cndmask_b32 v1, 0x7fc00000, v7
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v2, v5
-; GFX11-NEXT: v_cndmask_b32_e32 v8, 0x7fc00000, v8, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v0, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v1, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v2, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v3, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v4, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v5, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v6
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v7
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v8
-; GFX11-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v8, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_v3f32:
@@ -1460,184 +1005,48 @@ define <3 x float> @v_maximum_v3f32__nnan(<3 x float> %src0, <3 x float> %src1)
; GFX7-LABEL: v_maximum_v3f32__nnan:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_max_legacy_f32_e32 v6, v0, v3
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v0, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v3, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v6
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
-; GFX7-NEXT: v_max_legacy_f32_e32 v3, v1, v4
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v1, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v4, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX7-NEXT: v_max_legacy_f32_e32 v3, v2, v5
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v2, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v5, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
+; GFX7-NEXT: v_max_f32_e32 v0, v0, v3
+; GFX7-NEXT: v_max_f32_e32 v1, v1, v4
+; GFX7-NEXT: v_max_f32_e32 v2, v2, v5
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_maximum_v3f32__nnan:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v0, v3
-; GFX8-NEXT: v_cndmask_b32_e32 v6, v3, v0, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v0, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v3, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v6
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v1, v4
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v4, v1, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v1, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v4, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v2, v5
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v5, v2, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v2, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v5, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
+; GFX8-NEXT: v_max_f32_e32 v0, v0, v3
+; GFX8-NEXT: v_max_f32_e32 v1, v1, v4
+; GFX8-NEXT: v_max_f32_e32 v2, v2, v5
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_maximum_v3f32__nnan:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v3, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v3, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v6
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v1, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v4, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v4, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v2, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v5, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v2, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v5, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
+; GFX9-NEXT: v_max_f32_e32 v0, v0, v3
+; GFX9-NEXT: v_max_f32_e32 v1, v1, v4
+; GFX9-NEXT: v_max_f32_e32 v2, v2, v5
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_maximum_v3f32__nnan:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v0, v3
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v3, v0, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v0, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v3, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v6
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v1, v4
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v4, v1, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v1, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v4, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v2, v5
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v5, v2, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v2, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v5, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
+; GFX940-NEXT: v_max_f32_e32 v0, v0, v3
+; GFX940-NEXT: v_max_f32_e32 v1, v1, v4
+; GFX940-NEXT: v_max_f32_e32 v2, v2, v5
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v3f32__nnan:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v0, v3
-; GFX10-NEXT: v_cndmask_b32_e32 v6, v3, v0, vcc_lo
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v1, v4
-; GFX10-NEXT: v_cndmask_b32_e32 v7, v4, v1, vcc_lo
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v2, v5
-; GFX10-NEXT: v_cndmask_b32_e32 v8, v5, v2, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v0, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v1, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v2, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v3, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v4, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v5, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v6
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v7
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v8
-; GFX10-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc_lo
+; GFX10-NEXT: v_max_f32_e32 v0, v0, v3
+; GFX10-NEXT: v_max_f32_e32 v1, v1, v4
+; GFX10-NEXT: v_max_f32_e32 v2, v2, v5
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_maximum_v3f32__nnan:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v0, v3
-; GFX11-NEXT: v_cndmask_b32_e32 v6, v3, v0, vcc_lo
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v1, v4
-; GFX11-NEXT: v_cndmask_b32_e32 v7, v4, v1, vcc_lo
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v2, v5
-; GFX11-NEXT: v_cndmask_b32_e32 v8, v5, v2, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v0, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v1, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v2, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v3, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v4, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v5, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v6
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v7
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v8
-; GFX11-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc_lo
+; GFX11-NEXT: v_dual_max_f32 v0, v0, v3 :: v_dual_max_f32 v1, v1, v4
+; GFX11-NEXT: v_max_f32_e32 v2, v2, v5
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_v3f32__nnan:
@@ -1659,14 +1068,14 @@ define <3 x float> @v_maximum_v3f32__nsz(<3 x float> %src0, <3 x float> %src1) {
; GFX7-LABEL: v_maximum_v3f32__nsz:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_max_legacy_f32_e32 v6, v0, v3
+; GFX7-NEXT: v_max_f32_e32 v6, v0, v3
; GFX7-NEXT: v_mov_b32_e32 v7, 0x7fc00000
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
; GFX7-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX7-NEXT: v_max_legacy_f32_e32 v3, v1, v4
+; GFX7-NEXT: v_max_f32_e32 v3, v1, v4
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
; GFX7-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
-; GFX7-NEXT: v_max_legacy_f32_e32 v3, v2, v5
+; GFX7-NEXT: v_max_f32_e32 v3, v2, v5
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
; GFX7-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
; GFX7-NEXT: s_setpc_b64 s[30:31]
@@ -1674,17 +1083,14 @@ define <3 x float> @v_maximum_v3f32__nsz(<3 x float> %src0, <3 x float> %src1) {
; GFX8-LABEL: v_maximum_v3f32__nsz:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v0, v3
-; GFX8-NEXT: v_cndmask_b32_e32 v6, v3, v0, vcc
+; GFX8-NEXT: v_max_f32_e32 v6, v0, v3
; GFX8-NEXT: v_mov_b32_e32 v7, 0x7fc00000
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
; GFX8-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v1, v4
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v4, v1, vcc
+; GFX8-NEXT: v_max_f32_e32 v3, v1, v4
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
; GFX8-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v2, v5
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v5, v2, vcc
+; GFX8-NEXT: v_max_f32_e32 v3, v2, v5
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
; GFX8-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
@@ -1692,17 +1098,14 @@ define <3 x float> @v_maximum_v3f32__nsz(<3 x float> %src0, <3 x float> %src1) {
; GFX9-LABEL: v_maximum_v3f32__nsz:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v3, v0, vcc
+; GFX9-NEXT: v_max_f32_e32 v6, v0, v3
; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v1, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v4, v1, vcc
+; GFX9-NEXT: v_max_f32_e32 v3, v1, v4
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v2, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v5, v2, vcc
+; GFX9-NEXT: v_max_f32_e32 v3, v2, v5
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -1710,22 +1113,16 @@ define <3 x float> @v_maximum_v3f32__nsz(<3 x float> %src0, <3 x float> %src1) {
; GFX940-LABEL: v_maximum_v3f32__nsz:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v0, v3
+; GFX940-NEXT: v_max_f32_e32 v6, v0, v3
; GFX940-NEXT: v_mov_b32_e32 v7, 0x7fc00000
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v3, v0, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
-; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_max_f32_e32 v3, v1, v4
+; GFX940-NEXT: s_nop 0
; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v1, v4
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v4, v1, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
; GFX940-NEXT: s_nop 1
; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v2, v5
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v5, v2, vcc
+; GFX940-NEXT: v_max_f32_e32 v3, v2, v5
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
; GFX940-NEXT: s_nop 1
; GFX940-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
@@ -1734,13 +1131,10 @@ define <3 x float> @v_maximum_v3f32__nsz(<3 x float> %src0, <3 x float> %src1) {
; GFX10-LABEL: v_maximum_v3f32__nsz:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v0, v3
-; GFX10-NEXT: v_cndmask_b32_e32 v6, v3, v0, vcc_lo
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v1, v4
-; GFX10-NEXT: v_cndmask_b32_e32 v7, v4, v1, vcc_lo
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v2, v5
-; GFX10-NEXT: v_cndmask_b32_e32 v8, v5, v2, vcc_lo
+; GFX10-NEXT: v_max_f32_e32 v6, v0, v3
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v3
+; GFX10-NEXT: v_max_f32_e32 v7, v1, v4
+; GFX10-NEXT: v_max_f32_e32 v8, v2, v5
; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v6, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v4
; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v7, vcc_lo
@@ -1751,17 +1145,14 @@ define <3 x float> @v_maximum_v3f32__nsz(<3 x float> %src0, <3 x float> %src1) {
; GFX11-LABEL: v_maximum_v3f32__nsz:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v0, v3
-; GFX11-NEXT: v_cndmask_b32_e32 v6, v3, v0, vcc_lo
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v1, v4
-; GFX11-NEXT: v_cndmask_b32_e32 v7, v4, v1, vcc_lo
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v2, v5
-; GFX11-NEXT: v_cndmask_b32_e32 v8, v5, v2, vcc_lo
+; GFX11-NEXT: v_dual_max_f32 v6, v0, v3 :: v_dual_max_f32 v7, v1, v4
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v3
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_4)
; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v6, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v4
-; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v7, vcc_lo
+; GFX11-NEXT: v_dual_max_f32 v8, v2, v5 :: v_dual_cndmask_b32 v1, 0x7fc00000, v7
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v2, v5
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v8, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
@@ -1784,67 +1175,48 @@ define <3 x float> @v_maximum_v3f32__nnan_nsz(<3 x float> %src0, <3 x float> %sr
; GFX7-LABEL: v_maximum_v3f32__nnan_nsz:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_max_legacy_f32_e32 v0, v0, v3
-; GFX7-NEXT: v_max_legacy_f32_e32 v1, v1, v4
-; GFX7-NEXT: v_max_legacy_f32_e32 v2, v2, v5
+; GFX7-NEXT: v_max_f32_e32 v0, v0, v3
+; GFX7-NEXT: v_max_f32_e32 v1, v1, v4
+; GFX7-NEXT: v_max_f32_e32 v2, v2, v5
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_maximum_v3f32__nnan_nsz:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v0, v3
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v1, v4
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v2, v5
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc
+; GFX8-NEXT: v_max_f32_e32 v0, v0, v3
+; GFX8-NEXT: v_max_f32_e32 v1, v1, v4
+; GFX8-NEXT: v_max_f32_e32 v2, v2, v5
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_maximum_v3f32__nnan_nsz:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v1, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v2, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc
+; GFX9-NEXT: v_max_f32_e32 v0, v0, v3
+; GFX9-NEXT: v_max_f32_e32 v1, v1, v4
+; GFX9-NEXT: v_max_f32_e32 v2, v2, v5
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_maximum_v3f32__nnan_nsz:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v0, v3
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v1, v4
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v2, v5
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc
+; GFX940-NEXT: v_max_f32_e32 v0, v0, v3
+; GFX940-NEXT: v_max_f32_e32 v1, v1, v4
+; GFX940-NEXT: v_max_f32_e32 v2, v2, v5
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v3f32__nnan_nsz:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v0, v3
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc_lo
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v1, v4
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc_lo
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v2, v5
-; GFX10-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc_lo
+; GFX10-NEXT: v_max_f32_e32 v0, v0, v3
+; GFX10-NEXT: v_max_f32_e32 v1, v1, v4
+; GFX10-NEXT: v_max_f32_e32 v2, v2, v5
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_maximum_v3f32__nnan_nsz:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v0, v3
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc_lo
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v1, v4
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc_lo
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v2, v5
-; GFX11-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc_lo
+; GFX11-NEXT: v_dual_max_f32 v0, v0, v3 :: v_dual_max_f32 v1, v1, v4
+; GFX11-NEXT: v_max_f32_e32 v2, v2, v5
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_v3f32__nnan_nsz:
@@ -1866,292 +1238,111 @@ define <4 x float> @v_maximum_v4f32(<4 x float> %src0, <4 x float> %src1) {
; GFX7-LABEL: v_maximum_v4f32:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_max_legacy_f32_e32 v8, v0, v4
+; GFX7-NEXT: v_max_f32_e32 v8, v0, v4
; GFX7-NEXT: v_mov_b32_e32 v9, 0x7fc00000
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
-; GFX7-NEXT: v_cndmask_b32_e32 v8, v9, v8, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v0, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v4, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX7-NEXT: v_max_legacy_f32_e32 v4, v1, v5
+; GFX7-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc
+; GFX7-NEXT: v_max_f32_e32 v4, v1, v5
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
-; GFX7-NEXT: v_cndmask_b32_e32 v4, v9, v4, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v1, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v5, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
-; GFX7-NEXT: v_max_legacy_f32_e32 v4, v2, v6
+; GFX7-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc
+; GFX7-NEXT: v_max_f32_e32 v4, v2, v6
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v2, v6
-; GFX7-NEXT: v_cndmask_b32_e32 v4, v9, v4, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v2, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v6, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
-; GFX7-NEXT: v_max_legacy_f32_e32 v4, v3, v7
+; GFX7-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc
+; GFX7-NEXT: v_max_f32_e32 v4, v3, v7
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v3, v7
-; GFX7-NEXT: v_cndmask_b32_e32 v4, v9, v4, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v3, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v7, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
+; GFX7-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_maximum_v4f32:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v0, v4
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc
+; GFX8-NEXT: v_max_f32_e32 v8, v0, v4
; GFX8-NEXT: v_mov_b32_e32 v9, 0x7fc00000
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v9, v8, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v0, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v4, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v1, v5
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v5, v1, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc
+; GFX8-NEXT: v_max_f32_e32 v4, v1, v5
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v9, v4, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v1, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v5, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v2, v6
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v6, v2, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc
+; GFX8-NEXT: v_max_f32_e32 v4, v2, v6
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v2, v6
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v9, v4, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v2, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v6, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v3, v7
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v7, v3, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc
+; GFX8-NEXT: v_max_f32_e32 v4, v3, v7
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v3, v7
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v9, v4, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v3, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v7, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_maximum_v4f32:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc
+; GFX9-NEXT: v_max_f32_e32 v8, v0, v4
; GFX9-NEXT: v_mov_b32_e32 v9, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v9, v8, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v4, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v1, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v5, v1, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc
+; GFX9-NEXT: v_max_f32_e32 v4, v1, v5
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v9, v4, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v5, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v2, v6
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v6, v2, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc
+; GFX9-NEXT: v_max_f32_e32 v4, v2, v6
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v6
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v9, v4, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v2, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v6, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v3, v7
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v7, v3, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc
+; GFX9-NEXT: v_max_f32_e32 v4, v3, v7
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v3, v7
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v9, v4, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v3, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v7, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_maximum_v4f32:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v0, v4
+; GFX940-NEXT: v_max_f32_e32 v8, v0, v4
; GFX940-NEXT: v_mov_b32_e32 v9, 0x7fc00000
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v9, v8, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v0, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v4, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v1, v5
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v5, v1, vcc
+; GFX940-NEXT: v_max_f32_e32 v4, v1, v5
+; GFX940-NEXT: s_nop 0
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v9, v4, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v1, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v5, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v2, v6
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v6, v2, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc
+; GFX940-NEXT: v_max_f32_e32 v4, v2, v6
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v6
; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v9, v4, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v2, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v6, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v3, v7
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v7, v3, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc
+; GFX940-NEXT: v_max_f32_e32 v4, v3, v7
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v3, v7
; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v9, v4, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v3, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v7, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v4f32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v0, v4
-; GFX10-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc_lo
+; GFX10-NEXT: v_max_f32_e32 v8, v0, v4
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v4
-; GFX10-NEXT: v_cndmask_b32_e32 v8, 0x7fc00000, v8, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v0, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc_lo
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v1, v5
-; GFX10-NEXT: v_cndmask_b32_e32 v9, v5, v1, vcc_lo
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v2, v6
-; GFX10-NEXT: v_cndmask_b32_e32 v10, v6, v2, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v4, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v3, v7
-; GFX10-NEXT: v_cndmask_b32_e32 v4, v7, v3, vcc_lo
+; GFX10-NEXT: v_max_f32_e32 v9, v1, v5
+; GFX10-NEXT: v_max_f32_e32 v4, v2, v6
+; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v8, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v5
-; GFX10-NEXT: v_cndmask_b32_e32 v9, 0x7fc00000, v9, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v8
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc_lo
+; GFX10-NEXT: v_max_f32_e32 v8, v3, v7
+; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v9, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v2, v6
-; GFX10-NEXT: v_cndmask_b32_e32 v8, 0x7fc00000, v10, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v4, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v3, v7
-; GFX10-NEXT: v_cndmask_b32_e32 v4, 0x7fc00000, v4, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v1, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v2, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v3, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v5, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v6, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v7, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v9
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v8
-; GFX10-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v4
-; GFX10-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e32 v3, 0x7fc00000, v8, vcc_lo
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_maximum_v4f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v0, v4
-; GFX11-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc_lo
+; GFX11-NEXT: v_dual_max_f32 v8, v0, v4 :: v_dual_max_f32 v9, v1, v5
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v4
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_cndmask_b32_e32 v8, 0x7fc00000, v8, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v0, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc_lo
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v1, v5
-; GFX11-NEXT: v_cndmask_b32_e32 v9, v5, v1, vcc_lo
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v2, v6
-; GFX11-NEXT: v_cndmask_b32_e32 v10, v6, v2, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v4, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v3, v7
-; GFX11-NEXT: v_cndmask_b32_e32 v4, v7, v3, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(VALU_DEP_3)
+; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v8, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v5
-; GFX11-NEXT: v_cndmask_b32_e32 v9, 0x7fc00000, v9, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v8
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc_lo
+; GFX11-NEXT: v_max_f32_e32 v4, v2, v6
+; GFX11-NEXT: v_dual_max_f32 v8, v3, v7 :: v_dual_cndmask_b32 v1, 0x7fc00000, v9
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v2, v6
-; GFX11-NEXT: v_cndmask_b32_e32 v8, 0x7fc00000, v10, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v4, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v3, v7
-; GFX11-NEXT: v_cndmask_b32_e32 v4, 0x7fc00000, v4, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v1, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v2, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v3, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v5, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v6, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v7, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v9
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v8
-; GFX11-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v4
-; GFX11-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4)
+; GFX11-NEXT: v_cndmask_b32_e32 v3, 0x7fc00000, v8, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_v4f32:
@@ -2174,236 +1365,53 @@ define <4 x float> @v_maximum_v4f32__nnan(<4 x float> %src0, <4 x float> %src1)
; GFX7-LABEL: v_maximum_v4f32__nnan:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_max_legacy_f32_e32 v8, v0, v4
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v0, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v4, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX7-NEXT: v_max_legacy_f32_e32 v4, v1, v5
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v1, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v5, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
-; GFX7-NEXT: v_max_legacy_f32_e32 v4, v2, v6
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v2, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v6, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
-; GFX7-NEXT: v_max_legacy_f32_e32 v4, v3, v7
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v3, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v7, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
+; GFX7-NEXT: v_max_f32_e32 v0, v0, v4
+; GFX7-NEXT: v_max_f32_e32 v1, v1, v5
+; GFX7-NEXT: v_max_f32_e32 v2, v2, v6
+; GFX7-NEXT: v_max_f32_e32 v3, v3, v7
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_maximum_v4f32__nnan:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v0, v4
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v0, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v4, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v1, v5
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v5, v1, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v1, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v5, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v2, v6
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v6, v2, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v2, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v6, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v3, v7
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v7, v3, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v3, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v7, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
+; GFX8-NEXT: v_max_f32_e32 v0, v0, v4
+; GFX8-NEXT: v_max_f32_e32 v1, v1, v5
+; GFX8-NEXT: v_max_f32_e32 v2, v2, v6
+; GFX8-NEXT: v_max_f32_e32 v3, v3, v7
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_maximum_v4f32__nnan:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v4, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v1, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v5, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v5, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v2, v6
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v6, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v2, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v6, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v3, v7
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v7, v3, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v3, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v7, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
+; GFX9-NEXT: v_max_f32_e32 v0, v0, v4
+; GFX9-NEXT: v_max_f32_e32 v1, v1, v5
+; GFX9-NEXT: v_max_f32_e32 v2, v2, v6
+; GFX9-NEXT: v_max_f32_e32 v3, v3, v7
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_maximum_v4f32__nnan:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v0, v4
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v0, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v4, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v1, v5
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v5, v1, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v1, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v5, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v2, v6
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v6, v2, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v2, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v6, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v3, v7
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v7, v3, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v3, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v7, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
+; GFX940-NEXT: v_max_f32_e32 v0, v0, v4
+; GFX940-NEXT: v_max_f32_e32 v1, v1, v5
+; GFX940-NEXT: v_max_f32_e32 v2, v2, v6
+; GFX940-NEXT: v_max_f32_e32 v3, v3, v7
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v4f32__nnan:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v0, v4
-; GFX10-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v0, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v4, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v1, v5
-; GFX10-NEXT: v_cndmask_b32_e32 v4, v5, v1, vcc_lo
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v2, v6
-; GFX10-NEXT: v_cndmask_b32_e32 v9, v6, v2, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v8
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc_lo
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v3, v7
-; GFX10-NEXT: v_cndmask_b32_e32 v8, v7, v3, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v1, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v2, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v2, v9, v2, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v3, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v3, v8, v3, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v5, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v6, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v7, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v4
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v9
-; GFX10-NEXT: v_cndmask_b32_e32 v2, v9, v2, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v8
-; GFX10-NEXT: v_cndmask_b32_e32 v3, v8, v3, vcc_lo
+; GFX10-NEXT: v_max_f32_e32 v0, v0, v4
+; GFX10-NEXT: v_max_f32_e32 v1, v1, v5
+; GFX10-NEXT: v_max_f32_e32 v2, v2, v6
+; GFX10-NEXT: v_max_f32_e32 v3, v3, v7
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_maximum_v4f32__nnan:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v0, v4
-; GFX11-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v0, 64
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v4, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v1, v5
-; GFX11-NEXT: v_cndmask_b32_e32 v4, v5, v1, vcc_lo
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v2, v6
-; GFX11-NEXT: v_cndmask_b32_e32 v9, v6, v2, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v8
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc_lo
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v3, v7
-; GFX11-NEXT: v_cndmask_b32_e32 v8, v7, v3, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v1, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v2, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v2, v9, v2, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v3, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v3, v8, v3, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v5, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v6, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v7, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v4
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v9
-; GFX11-NEXT: v_cndmask_b32_e32 v2, v9, v2, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v8
-; GFX11-NEXT: v_cndmask_b32_e32 v3, v8, v3, vcc_lo
+; GFX11-NEXT: v_dual_max_f32 v0, v0, v4 :: v_dual_max_f32 v1, v1, v5
+; GFX11-NEXT: v_dual_max_f32 v2, v2, v6 :: v_dual_max_f32 v3, v3, v7
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_v4f32__nnan:
@@ -2426,17 +1434,17 @@ define <4 x float> @v_maximum_v4f32__nsz(<4 x float> %src0, <4 x float> %src1) {
; GFX7-LABEL: v_maximum_v4f32__nsz:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_max_legacy_f32_e32 v8, v0, v4
+; GFX7-NEXT: v_max_f32_e32 v8, v0, v4
; GFX7-NEXT: v_mov_b32_e32 v9, 0x7fc00000
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
; GFX7-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc
-; GFX7-NEXT: v_max_legacy_f32_e32 v4, v1, v5
+; GFX7-NEXT: v_max_f32_e32 v4, v1, v5
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
; GFX7-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc
-; GFX7-NEXT: v_max_legacy_f32_e32 v4, v2, v6
+; GFX7-NEXT: v_max_f32_e32 v4, v2, v6
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v2, v6
; GFX7-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc
-; GFX7-NEXT: v_max_legacy_f32_e32 v4, v3, v7
+; GFX7-NEXT: v_max_f32_e32 v4, v3, v7
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v3, v7
; GFX7-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
; GFX7-NEXT: s_setpc_b64 s[30:31]
@@ -2444,21 +1452,17 @@ define <4 x float> @v_maximum_v4f32__nsz(<4 x float> %src0, <4 x float> %src1) {
; GFX8-LABEL: v_maximum_v4f32__nsz:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v0, v4
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc
+; GFX8-NEXT: v_max_f32_e32 v8, v0, v4
; GFX8-NEXT: v_mov_b32_e32 v9, 0x7fc00000
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
; GFX8-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v1, v5
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v5, v1, vcc
+; GFX8-NEXT: v_max_f32_e32 v4, v1, v5
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
; GFX8-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v2, v6
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v6, v2, vcc
+; GFX8-NEXT: v_max_f32_e32 v4, v2, v6
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v2, v6
; GFX8-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v3, v7
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v7, v3, vcc
+; GFX8-NEXT: v_max_f32_e32 v4, v3, v7
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v3, v7
; GFX8-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
@@ -2466,21 +1470,17 @@ define <4 x float> @v_maximum_v4f32__nsz(<4 x float> %src0, <4 x float> %src1) {
; GFX9-LABEL: v_maximum_v4f32__nsz:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc
+; GFX9-NEXT: v_max_f32_e32 v8, v0, v4
; GFX9-NEXT: v_mov_b32_e32 v9, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
; GFX9-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v1, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v5, v1, vcc
+; GFX9-NEXT: v_max_f32_e32 v4, v1, v5
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
; GFX9-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v2, v6
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v6, v2, vcc
+; GFX9-NEXT: v_max_f32_e32 v4, v2, v6
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v6
; GFX9-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v3, v7
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v7, v3, vcc
+; GFX9-NEXT: v_max_f32_e32 v4, v3, v7
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v3, v7
; GFX9-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -2488,28 +1488,20 @@ define <4 x float> @v_maximum_v4f32__nsz(<4 x float> %src0, <4 x float> %src1) {
; GFX940-LABEL: v_maximum_v4f32__nsz:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v0, v4
+; GFX940-NEXT: v_max_f32_e32 v8, v0, v4
; GFX940-NEXT: v_mov_b32_e32 v9, 0x7fc00000
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
-; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_max_f32_e32 v4, v1, v5
+; GFX940-NEXT: s_nop 0
; GFX940-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v1, v5
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v5, v1, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
; GFX940-NEXT: s_nop 1
; GFX940-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v2, v6
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v6, v2, vcc
+; GFX940-NEXT: v_max_f32_e32 v4, v2, v6
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v6
; GFX940-NEXT: s_nop 1
; GFX940-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v3, v7
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v7, v3, vcc
+; GFX940-NEXT: v_max_f32_e32 v4, v3, v7
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v3, v7
; GFX940-NEXT: s_nop 1
; GFX940-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
@@ -2518,44 +1510,35 @@ define <4 x float> @v_maximum_v4f32__nsz(<4 x float> %src0, <4 x float> %src1) {
; GFX10-LABEL: v_maximum_v4f32__nsz:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v0, v4
-; GFX10-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc_lo
+; GFX10-NEXT: v_max_f32_e32 v8, v0, v4
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v4
+; GFX10-NEXT: v_max_f32_e32 v9, v1, v5
+; GFX10-NEXT: v_max_f32_e32 v4, v2, v6
; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v8, vcc_lo
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v1, v5
-; GFX10-NEXT: v_cndmask_b32_e32 v4, v5, v1, vcc_lo
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v2, v6
-; GFX10-NEXT: v_cndmask_b32_e32 v8, v6, v2, vcc_lo
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v3, v7
-; GFX10-NEXT: v_cndmask_b32_e32 v9, v7, v3, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v5
-; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v4, vcc_lo
+; GFX10-NEXT: v_max_f32_e32 v8, v3, v7
+; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v9, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v2, v6
-; GFX10-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v8, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v4, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v3, v7
-; GFX10-NEXT: v_cndmask_b32_e32 v3, 0x7fc00000, v9, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e32 v3, 0x7fc00000, v8, vcc_lo
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_maximum_v4f32__nsz:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v0, v4
-; GFX11-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc_lo
+; GFX11-NEXT: v_dual_max_f32 v8, v0, v4 :: v_dual_max_f32 v9, v1, v5
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v4
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(VALU_DEP_3)
; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v8, vcc_lo
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v1, v5
-; GFX11-NEXT: v_cndmask_b32_e32 v4, v5, v1, vcc_lo
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v2, v6
-; GFX11-NEXT: v_cndmask_b32_e32 v8, v6, v2, vcc_lo
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v3, v7
-; GFX11-NEXT: v_cndmask_b32_e32 v9, v7, v3, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v5
-; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v4, vcc_lo
+; GFX11-NEXT: v_max_f32_e32 v4, v2, v6
+; GFX11-NEXT: v_dual_max_f32 v8, v3, v7 :: v_dual_cndmask_b32 v1, 0x7fc00000, v9
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v2, v6
-; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v8, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v4, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v3, v7
-; GFX11-NEXT: v_cndmask_b32_e32 v3, 0x7fc00000, v9, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4)
+; GFX11-NEXT: v_cndmask_b32_e32 v3, 0x7fc00000, v8, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_v4f32__nsz:
@@ -2578,79 +1561,53 @@ define <4 x float> @v_maximum_v4f32__nnan_nsz(<4 x float> %src0, <4 x float> %sr
; GFX7-LABEL: v_maximum_v4f32__nnan_nsz:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_max_legacy_f32_e32 v0, v0, v4
-; GFX7-NEXT: v_max_legacy_f32_e32 v1, v1, v5
-; GFX7-NEXT: v_max_legacy_f32_e32 v2, v2, v6
-; GFX7-NEXT: v_max_legacy_f32_e32 v3, v3, v7
+; GFX7-NEXT: v_max_f32_e32 v0, v0, v4
+; GFX7-NEXT: v_max_f32_e32 v1, v1, v5
+; GFX7-NEXT: v_max_f32_e32 v2, v2, v6
+; GFX7-NEXT: v_max_f32_e32 v3, v3, v7
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_maximum_v4f32__nnan_nsz:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v0, v4
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v1, v5
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v2, v6
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v3, v7
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc
+; GFX8-NEXT: v_max_f32_e32 v0, v0, v4
+; GFX8-NEXT: v_max_f32_e32 v1, v1, v5
+; GFX8-NEXT: v_max_f32_e32 v2, v2, v6
+; GFX8-NEXT: v_max_f32_e32 v3, v3, v7
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_maximum_v4f32__nnan_nsz:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v1, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v2, v6
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v3, v7
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc
+; GFX9-NEXT: v_max_f32_e32 v0, v0, v4
+; GFX9-NEXT: v_max_f32_e32 v1, v1, v5
+; GFX9-NEXT: v_max_f32_e32 v2, v2, v6
+; GFX9-NEXT: v_max_f32_e32 v3, v3, v7
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_maximum_v4f32__nnan_nsz:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v0, v4
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v1, v5
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v2, v6
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v3, v7
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc
+; GFX940-NEXT: v_max_f32_e32 v0, v0, v4
+; GFX940-NEXT: v_max_f32_e32 v1, v1, v5
+; GFX940-NEXT: v_max_f32_e32 v2, v2, v6
+; GFX940-NEXT: v_max_f32_e32 v3, v3, v7
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v4f32__nnan_nsz:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v0, v4
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc_lo
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v1, v5
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc_lo
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v2, v6
-; GFX10-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc_lo
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v3, v7
-; GFX10-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc_lo
+; GFX10-NEXT: v_max_f32_e32 v0, v0, v4
+; GFX10-NEXT: v_max_f32_e32 v1, v1, v5
+; GFX10-NEXT: v_max_f32_e32 v2, v2, v6
+; GFX10-NEXT: v_max_f32_e32 v3, v3, v7
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_maximum_v4f32__nnan_nsz:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v0, v4
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc_lo
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v1, v5
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc_lo
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v2, v6
-; GFX11-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc_lo
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v3, v7
-; GFX11-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc_lo
+; GFX11-NEXT: v_dual_max_f32 v0, v0, v4 :: v_dual_max_f32 v1, v1, v5
+; GFX11-NEXT: v_dual_max_f32 v2, v2, v6 :: v_dual_max_f32 v3, v3, v7
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_v4f32__nnan_nsz:
@@ -2673,551 +1630,185 @@ define <8 x float> @v_maximum_v8f32(<8 x float> %src0, <8 x float> %src1) {
; GFX7-LABEL: v_maximum_v8f32:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_max_legacy_f32_e32 v16, v0, v8
+; GFX7-NEXT: v_max_f32_e32 v16, v0, v8
; GFX7-NEXT: v_mov_b32_e32 v17, 0x7fc00000
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v8
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v17, v16, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v0, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v8, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc
-; GFX7-NEXT: v_max_legacy_f32_e32 v8, v1, v9
+; GFX7-NEXT: v_cndmask_b32_e32 v0, v17, v16, vcc
+; GFX7-NEXT: v_max_f32_e32 v8, v1, v9
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v9
-; GFX7-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v1, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v9, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc
-; GFX7-NEXT: v_max_legacy_f32_e32 v8, v2, v10
+; GFX7-NEXT: v_cndmask_b32_e32 v1, v17, v8, vcc
+; GFX7-NEXT: v_max_f32_e32 v8, v2, v10
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v2, v10
-; GFX7-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v2, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v10, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc
-; GFX7-NEXT: v_max_legacy_f32_e32 v8, v3, v11
+; GFX7-NEXT: v_cndmask_b32_e32 v2, v17, v8, vcc
+; GFX7-NEXT: v_max_f32_e32 v8, v3, v11
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v3, v11
-; GFX7-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v3, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v8, v3, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v11, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v3, v11, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v8, v3, vcc
-; GFX7-NEXT: v_max_legacy_f32_e32 v8, v4, v12
+; GFX7-NEXT: v_cndmask_b32_e32 v3, v17, v8, vcc
+; GFX7-NEXT: v_max_f32_e32 v8, v4, v12
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v4, v12
-; GFX7-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v4, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v12, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v4, v4, v12, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX7-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc
-; GFX7-NEXT: v_max_legacy_f32_e32 v8, v5, v13
+; GFX7-NEXT: v_cndmask_b32_e32 v4, v17, v8, vcc
+; GFX7-NEXT: v_max_f32_e32 v8, v5, v13
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v5, v13
-; GFX7-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v5, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v5, v8, v5, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v13, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v5, v5, v13, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX7-NEXT: v_cndmask_b32_e32 v5, v8, v5, vcc
-; GFX7-NEXT: v_max_legacy_f32_e32 v8, v6, v14
+; GFX7-NEXT: v_cndmask_b32_e32 v5, v17, v8, vcc
+; GFX7-NEXT: v_max_f32_e32 v8, v6, v14
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v6, v14
-; GFX7-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v6, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v6, v8, v6, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v14, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v6, v6, v14, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX7-NEXT: v_cndmask_b32_e32 v6, v8, v6, vcc
-; GFX7-NEXT: v_max_legacy_f32_e32 v8, v7, v15
+; GFX7-NEXT: v_cndmask_b32_e32 v6, v17, v8, vcc
+; GFX7-NEXT: v_max_f32_e32 v8, v7, v15
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v7, v15
-; GFX7-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v7, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v7, v8, v7, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v15, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v7, v7, v15, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX7-NEXT: v_cndmask_b32_e32 v7, v8, v7, vcc
+; GFX7-NEXT: v_cndmask_b32_e32 v7, v17, v8, vcc
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_maximum_v8f32:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v0, v8
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v8, v0, vcc
+; GFX8-NEXT: v_max_f32_e32 v16, v0, v8
; GFX8-NEXT: v_mov_b32_e32 v17, 0x7fc00000
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v0, v8
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v17, v16, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v0, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v8, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v1, v9
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v9, v1, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v0, v17, v16, vcc
+; GFX8-NEXT: v_max_f32_e32 v8, v1, v9
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v1, v9
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v1, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v9, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v2, v10
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v10, v2, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v1, v17, v8, vcc
+; GFX8-NEXT: v_max_f32_e32 v8, v2, v10
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v2, v10
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v2, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v10, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v3, v11
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v11, v3, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v2, v17, v8, vcc
+; GFX8-NEXT: v_max_f32_e32 v8, v3, v11
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v3, v11
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v3, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v8, v3, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v11, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v11, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v8, v3, vcc
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v4, v12
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v12, v4, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v3, v17, v8, vcc
+; GFX8-NEXT: v_max_f32_e32 v8, v4, v12
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v4, v12
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v4, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v12, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v12, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v5, v13
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v13, v5, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v4, v17, v8, vcc
+; GFX8-NEXT: v_max_f32_e32 v8, v5, v13
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v5, v13
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v5, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v5, v8, v5, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v13, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v5, v5, v13, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX8-NEXT: v_cndmask_b32_e32 v5, v8, v5, vcc
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v6, v14
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v14, v6, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v5, v17, v8, vcc
+; GFX8-NEXT: v_max_f32_e32 v8, v6, v14
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v6, v14
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v6, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v6, v8, v6, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v14, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v6, v6, v14, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX8-NEXT: v_cndmask_b32_e32 v6, v8, v6, vcc
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v7, v15
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v15, v7, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v6, v17, v8, vcc
+; GFX8-NEXT: v_max_f32_e32 v8, v7, v15
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v7, v15
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v7, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v7, v8, v7, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v15, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v7, v7, v15, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX8-NEXT: v_cndmask_b32_e32 v7, v8, v7, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v7, v17, v8, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_maximum_v8f32:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v0, v8
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v8, v0, vcc
+; GFX9-NEXT: v_max_f32_e32 v16, v0, v8
; GFX9-NEXT: v_mov_b32_e32 v17, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v8
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v17, v16, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v8, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v1, v9
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v9, v1, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v17, v16, vcc
+; GFX9-NEXT: v_max_f32_e32 v8, v1, v9
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v9
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v9, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v2, v10
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v10, v2, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v17, v8, vcc
+; GFX9-NEXT: v_max_f32_e32 v8, v2, v10
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v10
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v2, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v10, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v3, v11
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v11, v3, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v2, v17, v8, vcc
+; GFX9-NEXT: v_max_f32_e32 v8, v3, v11
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v3, v11
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v3, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v8, v3, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v11, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v11, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v8, v3, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v4, v12
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v12, v4, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v3, v17, v8, vcc
+; GFX9-NEXT: v_max_f32_e32 v8, v4, v12
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v4, v12
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v4, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v12, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v12, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v5, v13
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v13, v5, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v4, v17, v8, vcc
+; GFX9-NEXT: v_max_f32_e32 v8, v5, v13
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v5, v13
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v5, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v5, v8, v5, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v13, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v5, v5, v13, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX9-NEXT: v_cndmask_b32_e32 v5, v8, v5, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v6, v14
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v14, v6, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v5, v17, v8, vcc
+; GFX9-NEXT: v_max_f32_e32 v8, v6, v14
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v6, v14
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v6, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v8, v6, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v14, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v6, v14, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v8, v6, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v7, v15
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v15, v7, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v6, v17, v8, vcc
+; GFX9-NEXT: v_max_f32_e32 v8, v7, v15
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v7, v15
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v7, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v7, v8, v7, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v15, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v7, v7, v15, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX9-NEXT: v_cndmask_b32_e32 v7, v8, v7, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v7, v17, v8, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_maximum_v8f32:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v0, v8
+; GFX940-NEXT: v_max_f32_e32 v16, v0, v8
; GFX940-NEXT: v_mov_b32_e32 v17, 0x7fc00000
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v8, v0, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v8
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v17, v16, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v0, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v8, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v1, v9
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v9, v1, vcc
+; GFX940-NEXT: v_max_f32_e32 v8, v1, v9
+; GFX940-NEXT: s_nop 0
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v17, v16, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v9
; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v1, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v9, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v2, v10
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v10, v2, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v17, v8, vcc
+; GFX940-NEXT: v_max_f32_e32 v8, v2, v10
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v10
; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v2, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v10, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v3, v11
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v11, v3, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v2, v17, v8, vcc
+; GFX940-NEXT: v_max_f32_e32 v8, v3, v11
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v3, v11
; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v3, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v8, v3, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v11, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v3, v11, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v8, v3, vcc
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v4, v12
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v12, v4, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v3, v17, v8, vcc
+; GFX940-NEXT: v_max_f32_e32 v8, v4, v12
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v4, v12
; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v4, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v12, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v4, v12, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v5, v13
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v13, v5, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v4, v17, v8, vcc
+; GFX940-NEXT: v_max_f32_e32 v8, v5, v13
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v5, v13
; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v5, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v8, v5, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v13, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v5, v13, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v8, v5, vcc
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v6, v14
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v14, v6, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v5, v17, v8, vcc
+; GFX940-NEXT: v_max_f32_e32 v8, v6, v14
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v6, v14
; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v6, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v8, v6, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v14, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v6, v14, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v8, v6, vcc
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v7, v15
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v15, v7, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v6, v17, v8, vcc
+; GFX940-NEXT: v_max_f32_e32 v8, v7, v15
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v7, v15
; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v7, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v7, v8, v7, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v15, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v7, v7, v15, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v7, v8, v7, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v7, v17, v8, vcc
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v8f32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v0, v8
-; GFX10-NEXT: v_cndmask_b32_e32 v16, v8, v0, vcc_lo
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v1, v9
-; GFX10-NEXT: v_cndmask_b32_e32 v17, v9, v1, vcc_lo
+; GFX10-NEXT: v_max_f32_e32 v16, v0, v8
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v8
-; GFX10-NEXT: v_cndmask_b32_e32 v16, 0x7fc00000, v16, vcc_lo
+; GFX10-NEXT: v_max_f32_e32 v17, v1, v9
+; GFX10-NEXT: v_max_f32_e32 v8, v2, v10
+; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v16, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v9
-; GFX10-NEXT: v_cndmask_b32_e32 v17, 0x7fc00000, v17, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v0, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v1, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v17, v1, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v8, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v9, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v16
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v17
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v17, v1, vcc_lo
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v2, v10
-; GFX10-NEXT: v_cndmask_b32_e32 v8, v10, v2, vcc_lo
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v3, v11
-; GFX10-NEXT: v_cndmask_b32_e32 v9, v11, v3, vcc_lo
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v4, v12
-; GFX10-NEXT: v_cndmask_b32_e32 v16, v12, v4, vcc_lo
+; GFX10-NEXT: v_max_f32_e32 v9, v3, v11
+; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v17, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v2, v10
-; GFX10-NEXT: v_cndmask_b32_e32 v8, 0x7fc00000, v8, vcc_lo
+; GFX10-NEXT: v_max_f32_e32 v10, v7, v15
+; GFX10-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v8, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v3, v11
-; GFX10-NEXT: v_cndmask_b32_e32 v9, 0x7fc00000, v9, vcc_lo
+; GFX10-NEXT: v_max_f32_e32 v8, v4, v12
+; GFX10-NEXT: v_cndmask_b32_e32 v3, 0x7fc00000, v9, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v4, v12
-; GFX10-NEXT: v_cndmask_b32_e32 v16, 0x7fc00000, v16, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v2, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v3, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v4, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v4, v16, v4, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v10, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v11, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v3, v3, v11, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v12, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v4, v4, v12, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v8
-; GFX10-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v9
-; GFX10-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v16
-; GFX10-NEXT: v_cndmask_b32_e32 v4, v16, v4, vcc_lo
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v5, v13
-; GFX10-NEXT: v_cndmask_b32_e32 v8, v13, v5, vcc_lo
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v6, v14
-; GFX10-NEXT: v_cndmask_b32_e32 v9, v14, v6, vcc_lo
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v7, v15
-; GFX10-NEXT: v_cndmask_b32_e32 v10, v15, v7, vcc_lo
+; GFX10-NEXT: v_max_f32_e32 v9, v5, v13
+; GFX10-NEXT: v_cndmask_b32_e32 v4, 0x7fc00000, v8, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v5, v13
-; GFX10-NEXT: v_cndmask_b32_e32 v8, 0x7fc00000, v8, vcc_lo
+; GFX10-NEXT: v_max_f32_e32 v8, v6, v14
+; GFX10-NEXT: v_cndmask_b32_e32 v5, 0x7fc00000, v9, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v6, v14
-; GFX10-NEXT: v_cndmask_b32_e32 v9, 0x7fc00000, v9, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e32 v6, 0x7fc00000, v8, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v7, v15
-; GFX10-NEXT: v_cndmask_b32_e32 v10, 0x7fc00000, v10, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v5, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v5, v8, v5, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v6, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v6, v9, v6, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v7, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v7, v10, v7, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v13, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v5, v5, v13, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v14, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v6, v6, v14, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v15, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v7, v7, v15, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v8
-; GFX10-NEXT: v_cndmask_b32_e32 v5, v8, v5, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v9
-; GFX10-NEXT: v_cndmask_b32_e32 v6, v9, v6, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v10
-; GFX10-NEXT: v_cndmask_b32_e32 v7, v10, v7, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e32 v7, 0x7fc00000, v10, vcc_lo
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_maximum_v8f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v0, v8
-; GFX11-NEXT: v_cndmask_b32_e32 v16, v8, v0, vcc_lo
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v1, v9
-; GFX11-NEXT: v_cndmask_b32_e32 v17, v9, v1, vcc_lo
+; GFX11-NEXT: v_dual_max_f32 v16, v0, v8 :: v_dual_max_f32 v17, v1, v9
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v8
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e32 v16, 0x7fc00000, v16, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v16, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v9
-; GFX11-NEXT: v_cndmask_b32_e32 v17, 0x7fc00000, v17, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v0, 64
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v1, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v17, v1, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v8, 64
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v9, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v16
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v17
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v17, v1, vcc_lo
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v2, v10
-; GFX11-NEXT: v_cndmask_b32_e32 v8, v10, v2, vcc_lo
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v3, v11
-; GFX11-NEXT: v_cndmask_b32_e32 v9, v11, v3, vcc_lo
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v4, v12
-; GFX11-NEXT: v_cndmask_b32_e32 v16, v12, v4, vcc_lo
+; GFX11-NEXT: v_dual_max_f32 v9, v3, v11 :: v_dual_max_f32 v8, v2, v10
+; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v17, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v2, v10
-; GFX11-NEXT: v_cndmask_b32_e32 v8, 0x7fc00000, v8, vcc_lo
+; GFX11-NEXT: v_max_f32_e32 v10, v7, v15
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_3) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v8, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v3, v11
-; GFX11-NEXT: v_cndmask_b32_e32 v9, 0x7fc00000, v9, vcc_lo
+; GFX11-NEXT: v_dual_max_f32 v8, v4, v12 :: v_dual_cndmask_b32 v3, 0x7fc00000, v9
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v4, v12
-; GFX11-NEXT: v_cndmask_b32_e32 v16, 0x7fc00000, v16, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v2, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v3, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v4, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v4, v16, v4, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v10, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v11, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v3, v3, v11, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v12, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v4, v4, v12, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v8
-; GFX11-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v9
-; GFX11-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v16
-; GFX11-NEXT: v_cndmask_b32_e32 v4, v16, v4, vcc_lo
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v5, v13
-; GFX11-NEXT: v_cndmask_b32_e32 v8, v13, v5, vcc_lo
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v6, v14
-; GFX11-NEXT: v_cndmask_b32_e32 v9, v14, v6, vcc_lo
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v7, v15
-; GFX11-NEXT: v_cndmask_b32_e32 v10, v15, v7, vcc_lo
+; GFX11-NEXT: v_dual_max_f32 v9, v5, v13 :: v_dual_cndmask_b32 v4, 0x7fc00000, v8
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v5, v13
-; GFX11-NEXT: v_cndmask_b32_e32 v8, 0x7fc00000, v8, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_dual_max_f32 v8, v6, v14 :: v_dual_cndmask_b32 v5, 0x7fc00000, v9
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v6, v14
-; GFX11-NEXT: v_cndmask_b32_e32 v9, 0x7fc00000, v9, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e32 v6, 0x7fc00000, v8, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v7, v15
-; GFX11-NEXT: v_cndmask_b32_e32 v10, 0x7fc00000, v10, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v5, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v5, v8, v5, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v6, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v6, v9, v6, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v7, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v7, v10, v7, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v13, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v5, v5, v13, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v14, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v6, v6, v14, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v15, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v7, v7, v15, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v8
-; GFX11-NEXT: v_cndmask_b32_e32 v5, v8, v5, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v9
-; GFX11-NEXT: v_cndmask_b32_e32 v6, v9, v6, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v10
-; GFX11-NEXT: v_cndmask_b32_e32 v7, v10, v7, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e32 v7, 0x7fc00000, v10, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_v8f32:
@@ -3244,1071 +1835,371 @@ define <16 x float> @v_maximum_v16f32(<16 x float> %src0, <16 x float> %src1) {
; GFX7-LABEL: v_maximum_v16f32:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_max_legacy_f32_e32 v32, v0, v16
-; GFX7-NEXT: v_mov_b32_e32 v31, 0x7fc00000
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v16
-; GFX7-NEXT: v_cndmask_b32_e32 v32, v31, v32, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v0, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v32, v0, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v16, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v16, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v32
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v32, v0, vcc
-; GFX7-NEXT: v_max_legacy_f32_e32 v16, v1, v17
+; GFX7-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX7-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
+; GFX7-NEXT: s_mov_b64 exec, s[4:5]
+; GFX7-NEXT: v_cmp_o_f32_e64 s[16:17], v0, v16
+; GFX7-NEXT: v_max_f32_e32 v0, v0, v16
+; GFX7-NEXT: buffer_load_dword v16, off, s[0:3], s32
+; GFX7-NEXT: v_writelane_b32 v31, s30, 0
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v17
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v1, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v16, v1, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v17, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v17, vcc
-; GFX7-NEXT: buffer_load_dword v17, off, s[0:3], s32
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v16, v1, vcc
-; GFX7-NEXT: v_max_legacy_f32_e32 v16, v2, v18
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v2, v18
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v2, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v16, v2, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v18, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v2, v18, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v16, v2, vcc
-; GFX7-NEXT: v_max_legacy_f32_e32 v16, v3, v19
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v3, v19
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v3, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v16, v3, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v19, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v3, v19, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v16, v3, vcc
-; GFX7-NEXT: v_max_legacy_f32_e32 v16, v4, v20
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v4, v20
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v4, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v4, v16, v4, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v20, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v4, v4, v20, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX7-NEXT: v_cndmask_b32_e32 v4, v16, v4, vcc
-; GFX7-NEXT: v_max_legacy_f32_e32 v16, v5, v21
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v5, v21
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v5, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v5, v16, v5, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v21, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v5, v5, v21, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX7-NEXT: v_cndmask_b32_e32 v5, v16, v5, vcc
-; GFX7-NEXT: v_max_legacy_f32_e32 v16, v6, v22
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v6, v22
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v6, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v6, v16, v6, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v22, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v6, v6, v22, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX7-NEXT: v_cndmask_b32_e32 v6, v16, v6, vcc
-; GFX7-NEXT: v_max_legacy_f32_e32 v16, v7, v23
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v7, v23
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v7, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v7, v16, v7, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v23, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v7, v7, v23, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX7-NEXT: v_cndmask_b32_e32 v7, v16, v7, vcc
-; GFX7-NEXT: v_max_legacy_f32_e32 v16, v8, v24
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v8, v24
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v8, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v8, v16, v8, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v24, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v8, v8, v24, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX7-NEXT: v_cndmask_b32_e32 v8, v16, v8, vcc
-; GFX7-NEXT: v_max_legacy_f32_e32 v16, v9, v25
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v9, v25
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v9, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v9, v16, v9, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v25, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v9, v9, v25, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX7-NEXT: v_cndmask_b32_e32 v9, v16, v9, vcc
-; GFX7-NEXT: v_max_legacy_f32_e32 v16, v10, v26
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v10, v26
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v10, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v10, v16, v10, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v26, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v10, v10, v26, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX7-NEXT: v_cndmask_b32_e32 v10, v16, v10, vcc
-; GFX7-NEXT: v_max_legacy_f32_e32 v16, v11, v27
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v11, v27
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v11, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v11, v16, v11, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v27, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v11, v11, v27, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX7-NEXT: v_cndmask_b32_e32 v11, v16, v11, vcc
-; GFX7-NEXT: v_max_legacy_f32_e32 v16, v12, v28
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v12, v28
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v12, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v12, v16, v12, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v28, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v12, v12, v28, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX7-NEXT: v_cndmask_b32_e32 v12, v16, v12, vcc
-; GFX7-NEXT: v_max_legacy_f32_e32 v16, v13, v29
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v13, v29
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v13, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v13, v16, v13, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v29, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v13, v13, v29, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX7-NEXT: v_cndmask_b32_e32 v13, v16, v13, vcc
-; GFX7-NEXT: v_max_legacy_f32_e32 v16, v14, v30
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v14, v30
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v14, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v14, v16, v14, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v30, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v14, v14, v30, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX7-NEXT: v_cndmask_b32_e32 v14, v16, v14, vcc
+; GFX7-NEXT: v_max_f32_e32 v1, v1, v17
+; GFX7-NEXT: v_cmp_o_f32_e64 s[4:5], v2, v18
+; GFX7-NEXT: v_max_f32_e32 v2, v2, v18
+; GFX7-NEXT: v_mov_b32_e32 v17, 0x7fc00000
+; GFX7-NEXT: v_max_f32_e32 v18, v13, v29
+; GFX7-NEXT: v_cmp_o_f32_e64 s[28:29], v13, v29
+; GFX7-NEXT: v_writelane_b32 v31, s31, 1
+; GFX7-NEXT: v_cmp_o_f32_e64 s[6:7], v3, v19
+; GFX7-NEXT: v_max_f32_e32 v3, v3, v19
+; GFX7-NEXT: v_cmp_o_f32_e64 s[8:9], v4, v20
+; GFX7-NEXT: v_max_f32_e32 v4, v4, v20
+; GFX7-NEXT: v_cmp_o_f32_e64 s[10:11], v5, v21
+; GFX7-NEXT: v_max_f32_e32 v5, v5, v21
+; GFX7-NEXT: v_cmp_o_f32_e64 s[12:13], v6, v22
+; GFX7-NEXT: v_max_f32_e32 v6, v6, v22
+; GFX7-NEXT: v_cmp_o_f32_e64 s[14:15], v7, v23
+; GFX7-NEXT: v_max_f32_e32 v7, v7, v23
+; GFX7-NEXT: v_cmp_o_f32_e64 s[18:19], v8, v24
+; GFX7-NEXT: v_max_f32_e32 v8, v8, v24
+; GFX7-NEXT: v_cmp_o_f32_e64 s[20:21], v9, v25
+; GFX7-NEXT: v_max_f32_e32 v9, v9, v25
+; GFX7-NEXT: v_cmp_o_f32_e64 s[22:23], v10, v26
+; GFX7-NEXT: v_max_f32_e32 v10, v10, v26
+; GFX7-NEXT: v_cmp_o_f32_e64 s[24:25], v11, v27
+; GFX7-NEXT: v_max_f32_e32 v11, v11, v27
+; GFX7-NEXT: v_cmp_o_f32_e64 s[26:27], v12, v28
+; GFX7-NEXT: v_max_f32_e32 v12, v12, v28
+; GFX7-NEXT: v_max_f32_e32 v19, v14, v30
+; GFX7-NEXT: v_cmp_o_f32_e64 s[30:31], v14, v30
+; GFX7-NEXT: v_cndmask_b32_e32 v1, v17, v1, vcc
+; GFX7-NEXT: v_cndmask_b32_e64 v13, v17, v18, s[28:29]
+; GFX7-NEXT: v_cndmask_b32_e64 v0, v17, v0, s[16:17]
+; GFX7-NEXT: v_cndmask_b32_e64 v2, v17, v2, s[4:5]
+; GFX7-NEXT: v_cndmask_b32_e64 v3, v17, v3, s[6:7]
+; GFX7-NEXT: v_cndmask_b32_e64 v4, v17, v4, s[8:9]
+; GFX7-NEXT: v_cndmask_b32_e64 v5, v17, v5, s[10:11]
+; GFX7-NEXT: v_cndmask_b32_e64 v6, v17, v6, s[12:13]
+; GFX7-NEXT: v_cndmask_b32_e64 v7, v17, v7, s[14:15]
+; GFX7-NEXT: v_cndmask_b32_e64 v8, v17, v8, s[18:19]
+; GFX7-NEXT: v_cndmask_b32_e64 v9, v17, v9, s[20:21]
+; GFX7-NEXT: v_cndmask_b32_e64 v10, v17, v10, s[22:23]
+; GFX7-NEXT: v_cndmask_b32_e64 v11, v17, v11, s[24:25]
+; GFX7-NEXT: v_cndmask_b32_e64 v12, v17, v12, s[26:27]
+; GFX7-NEXT: v_cndmask_b32_e64 v14, v17, v19, s[30:31]
+; GFX7-NEXT: v_readlane_b32 s31, v31, 1
+; GFX7-NEXT: v_readlane_b32 s30, v31, 0
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: v_max_f32_e32 v18, v15, v16
+; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v15, v16
+; GFX7-NEXT: v_cndmask_b32_e32 v15, v17, v18, vcc
+; GFX7-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
+; GFX7-NEXT: s_mov_b64 exec, s[4:5]
; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_max_legacy_f32_e32 v16, v15, v17
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v15, v17
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v15, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v15, v16, v15, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v17, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v15, v15, v17, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX7-NEXT: v_cndmask_b32_e32 v15, v16, v15, vcc
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_maximum_v16f32:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v0, v16
-; GFX8-NEXT: v_cndmask_b32_e32 v32, v16, v0, vcc
-; GFX8-NEXT: v_mov_b32_e32 v31, 0x7fc00000
-; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v0, v16
-; GFX8-NEXT: v_cndmask_b32_e32 v32, v31, v32, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v0, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v32, v0, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v16, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v16, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v32
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v32, v0, vcc
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v1, v17
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v17, v1, vcc
+; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX8-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
+; GFX8-NEXT: s_mov_b64 exec, s[4:5]
+; GFX8-NEXT: v_cmp_o_f32_e64 s[16:17], v0, v16
+; GFX8-NEXT: v_max_f32_e32 v0, v0, v16
+; GFX8-NEXT: buffer_load_dword v16, off, s[0:3], s32
+; GFX8-NEXT: v_writelane_b32 v31, s30, 0
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v1, v17
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v1, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v16, v1, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v17, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v17, vcc
-; GFX8-NEXT: buffer_load_dword v17, off, s[0:3], s32
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v16, v1, vcc
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v2, v18
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v18, v2, vcc
-; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v2, v18
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v2, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v16, v2, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v18, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v18, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v16, v2, vcc
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v3, v19
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v19, v3, vcc
-; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v3, v19
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v3, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v16, v3, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v19, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v19, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v16, v3, vcc
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v4, v20
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v20, v4, vcc
-; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v4, v20
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v4, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v16, v4, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v20, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v20, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v16, v4, vcc
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v5, v21
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v21, v5, vcc
-; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v5, v21
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v5, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v5, v16, v5, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v21, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v5, v5, v21, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX8-NEXT: v_cndmask_b32_e32 v5, v16, v5, vcc
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v6, v22
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v22, v6, vcc
-; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v6, v22
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v6, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v6, v16, v6, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v22, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v6, v6, v22, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX8-NEXT: v_cndmask_b32_e32 v6, v16, v6, vcc
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v7, v23
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v23, v7, vcc
-; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v7, v23
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v7, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v7, v16, v7, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v23, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v7, v7, v23, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX8-NEXT: v_cndmask_b32_e32 v7, v16, v7, vcc
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v8, v24
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v24, v8, vcc
-; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v8, v24
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v8, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v16, v8, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v24, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v8, v24, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v16, v8, vcc
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v9, v25
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v25, v9, vcc
-; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v9, v25
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v9, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v9, v16, v9, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v25, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v9, v9, v25, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX8-NEXT: v_cndmask_b32_e32 v9, v16, v9, vcc
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v10, v26
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v26, v10, vcc
-; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v10, v26
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v10, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v10, v16, v10, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v26, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v10, v10, v26, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX8-NEXT: v_cndmask_b32_e32 v10, v16, v10, vcc
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v11, v27
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v27, v11, vcc
-; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v11, v27
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v11, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v11, v16, v11, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v27, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v11, v11, v27, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX8-NEXT: v_cndmask_b32_e32 v11, v16, v11, vcc
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v12, v28
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v28, v12, vcc
-; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v12, v28
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v12, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v12, v16, v12, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v28, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v12, v12, v28, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX8-NEXT: v_cndmask_b32_e32 v12, v16, v12, vcc
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v13, v29
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v29, v13, vcc
-; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v13, v29
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v13, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v13, v16, v13, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v29, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v13, v13, v29, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX8-NEXT: v_cndmask_b32_e32 v13, v16, v13, vcc
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v14, v30
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v30, v14, vcc
-; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v14, v30
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v14, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v14, v16, v14, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v30, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v14, v14, v30, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX8-NEXT: v_cndmask_b32_e32 v14, v16, v14, vcc
+; GFX8-NEXT: v_max_f32_e32 v1, v1, v17
+; GFX8-NEXT: v_cmp_o_f32_e64 s[4:5], v2, v18
+; GFX8-NEXT: v_max_f32_e32 v2, v2, v18
+; GFX8-NEXT: v_mov_b32_e32 v17, 0x7fc00000
+; GFX8-NEXT: v_max_f32_e32 v18, v13, v29
+; GFX8-NEXT: v_cmp_o_f32_e64 s[28:29], v13, v29
+; GFX8-NEXT: v_writelane_b32 v31, s31, 1
+; GFX8-NEXT: v_cmp_o_f32_e64 s[6:7], v3, v19
+; GFX8-NEXT: v_max_f32_e32 v3, v3, v19
+; GFX8-NEXT: v_cmp_o_f32_e64 s[8:9], v4, v20
+; GFX8-NEXT: v_max_f32_e32 v4, v4, v20
+; GFX8-NEXT: v_cmp_o_f32_e64 s[10:11], v5, v21
+; GFX8-NEXT: v_max_f32_e32 v5, v5, v21
+; GFX8-NEXT: v_cmp_o_f32_e64 s[12:13], v6, v22
+; GFX8-NEXT: v_max_f32_e32 v6, v6, v22
+; GFX8-NEXT: v_cmp_o_f32_e64 s[14:15], v7, v23
+; GFX8-NEXT: v_max_f32_e32 v7, v7, v23
+; GFX8-NEXT: v_cmp_o_f32_e64 s[18:19], v8, v24
+; GFX8-NEXT: v_max_f32_e32 v8, v8, v24
+; GFX8-NEXT: v_cmp_o_f32_e64 s[20:21], v9, v25
+; GFX8-NEXT: v_max_f32_e32 v9, v9, v25
+; GFX8-NEXT: v_cmp_o_f32_e64 s[22:23], v10, v26
+; GFX8-NEXT: v_max_f32_e32 v10, v10, v26
+; GFX8-NEXT: v_cmp_o_f32_e64 s[24:25], v11, v27
+; GFX8-NEXT: v_max_f32_e32 v11, v11, v27
+; GFX8-NEXT: v_cmp_o_f32_e64 s[26:27], v12, v28
+; GFX8-NEXT: v_max_f32_e32 v12, v12, v28
+; GFX8-NEXT: v_max_f32_e32 v19, v14, v30
+; GFX8-NEXT: v_cmp_o_f32_e64 s[30:31], v14, v30
+; GFX8-NEXT: v_cndmask_b32_e32 v1, v17, v1, vcc
+; GFX8-NEXT: v_cndmask_b32_e64 v13, v17, v18, s[28:29]
+; GFX8-NEXT: v_cndmask_b32_e64 v0, v17, v0, s[16:17]
+; GFX8-NEXT: v_cndmask_b32_e64 v2, v17, v2, s[4:5]
+; GFX8-NEXT: v_cndmask_b32_e64 v3, v17, v3, s[6:7]
+; GFX8-NEXT: v_cndmask_b32_e64 v4, v17, v4, s[8:9]
+; GFX8-NEXT: v_cndmask_b32_e64 v5, v17, v5, s[10:11]
+; GFX8-NEXT: v_cndmask_b32_e64 v6, v17, v6, s[12:13]
+; GFX8-NEXT: v_cndmask_b32_e64 v7, v17, v7, s[14:15]
+; GFX8-NEXT: v_cndmask_b32_e64 v8, v17, v8, s[18:19]
+; GFX8-NEXT: v_cndmask_b32_e64 v9, v17, v9, s[20:21]
+; GFX8-NEXT: v_cndmask_b32_e64 v10, v17, v10, s[22:23]
+; GFX8-NEXT: v_cndmask_b32_e64 v11, v17, v11, s[24:25]
+; GFX8-NEXT: v_cndmask_b32_e64 v12, v17, v12, s[26:27]
+; GFX8-NEXT: v_cndmask_b32_e64 v14, v17, v19, s[30:31]
+; GFX8-NEXT: v_readlane_b32 s31, v31, 1
+; GFX8-NEXT: v_readlane_b32 s30, v31, 0
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: v_max_f32_e32 v18, v15, v16
+; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v15, v16
+; GFX8-NEXT: v_cndmask_b32_e32 v15, v17, v18, vcc
+; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
+; GFX8-NEXT: s_mov_b64 exec, s[4:5]
; GFX8-NEXT: s_waitcnt vmcnt(0)
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v15, v17
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v17, v15, vcc
-; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v15, v17
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v15, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v15, v16, v15, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v17, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v15, v15, v17, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX8-NEXT: v_cndmask_b32_e32 v15, v16, v15, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_maximum_v16f32:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v0, v16
-; GFX9-NEXT: v_cndmask_b32_e32 v32, v16, v0, vcc
-; GFX9-NEXT: v_mov_b32_e32 v31, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v16
-; GFX9-NEXT: v_cndmask_b32_e32 v32, v31, v32, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v32, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v16, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v16, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v32, v0, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v1, v17
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v17, v1, vcc
+; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
+; GFX9-NEXT: s_mov_b64 exec, s[4:5]
+; GFX9-NEXT: v_cmp_o_f32_e64 s[16:17], v0, v16
+; GFX9-NEXT: v_max_f32_e32 v0, v0, v16
+; GFX9-NEXT: buffer_load_dword v16, off, s[0:3], s32
+; GFX9-NEXT: v_writelane_b32 v31, s30, 0
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v17
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v16, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v17, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v17, vcc
-; GFX9-NEXT: buffer_load_dword v17, off, s[0:3], s32
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v16, v1, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v2, v18
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v18, v2, vcc
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v18
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v2, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v16, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v18, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v18, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v16, v2, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v3, v19
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v19, v3, vcc
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v3, v19
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v3, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v16, v3, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v19, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v19, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v16, v3, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v4, v20
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v20, v4, vcc
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v4, v20
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v4, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v16, v4, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v20, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v20, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v16, v4, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v5, v21
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v21, v5, vcc
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v5, v21
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v5, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v5, v16, v5, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v21, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v5, v5, v21, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX9-NEXT: v_cndmask_b32_e32 v5, v16, v5, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v6, v22
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v22, v6, vcc
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v6, v22
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v6, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v16, v6, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v22, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v6, v22, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v16, v6, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v7, v23
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v23, v7, vcc
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v7, v23
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v7, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v7, v16, v7, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v23, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v7, v7, v23, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX9-NEXT: v_cndmask_b32_e32 v7, v16, v7, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v8, v24
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v24, v8, vcc
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v8, v24
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v8, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v16, v8, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v24, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v8, v24, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v16, v8, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v9, v25
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v25, v9, vcc
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v9, v25
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v9, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v9, v16, v9, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v25, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v9, v9, v25, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX9-NEXT: v_cndmask_b32_e32 v9, v16, v9, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v10, v26
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v26, v10, vcc
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v10, v26
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v10, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v10, v16, v10, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v26, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v10, v10, v26, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX9-NEXT: v_cndmask_b32_e32 v10, v16, v10, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v11, v27
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v27, v11, vcc
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v11, v27
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v11, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v11, v16, v11, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v27, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v11, v11, v27, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX9-NEXT: v_cndmask_b32_e32 v11, v16, v11, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v12, v28
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v28, v12, vcc
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v12, v28
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v12, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v12, v16, v12, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v28, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v12, v12, v28, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX9-NEXT: v_cndmask_b32_e32 v12, v16, v12, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v13, v29
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v29, v13, vcc
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v13, v29
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v13, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v13, v16, v13, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v29, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v13, v13, v29, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX9-NEXT: v_cndmask_b32_e32 v13, v16, v13, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v14, v30
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v30, v14, vcc
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v14, v30
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v14, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v14, v16, v14, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v30, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v14, v14, v30, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX9-NEXT: v_cndmask_b32_e32 v14, v16, v14, vcc
+; GFX9-NEXT: v_max_f32_e32 v1, v1, v17
+; GFX9-NEXT: v_cmp_o_f32_e64 s[4:5], v2, v18
+; GFX9-NEXT: v_max_f32_e32 v2, v2, v18
+; GFX9-NEXT: v_mov_b32_e32 v17, 0x7fc00000
+; GFX9-NEXT: v_max_f32_e32 v18, v13, v29
+; GFX9-NEXT: v_cmp_o_f32_e64 s[28:29], v13, v29
+; GFX9-NEXT: v_writelane_b32 v31, s31, 1
+; GFX9-NEXT: v_cmp_o_f32_e64 s[6:7], v3, v19
+; GFX9-NEXT: v_max_f32_e32 v3, v3, v19
+; GFX9-NEXT: v_cmp_o_f32_e64 s[8:9], v4, v20
+; GFX9-NEXT: v_max_f32_e32 v4, v4, v20
+; GFX9-NEXT: v_cmp_o_f32_e64 s[10:11], v5, v21
+; GFX9-NEXT: v_max_f32_e32 v5, v5, v21
+; GFX9-NEXT: v_cmp_o_f32_e64 s[12:13], v6, v22
+; GFX9-NEXT: v_max_f32_e32 v6, v6, v22
+; GFX9-NEXT: v_cmp_o_f32_e64 s[14:15], v7, v23
+; GFX9-NEXT: v_max_f32_e32 v7, v7, v23
+; GFX9-NEXT: v_cmp_o_f32_e64 s[18:19], v8, v24
+; GFX9-NEXT: v_max_f32_e32 v8, v8, v24
+; GFX9-NEXT: v_cmp_o_f32_e64 s[20:21], v9, v25
+; GFX9-NEXT: v_max_f32_e32 v9, v9, v25
+; GFX9-NEXT: v_cmp_o_f32_e64 s[22:23], v10, v26
+; GFX9-NEXT: v_max_f32_e32 v10, v10, v26
+; GFX9-NEXT: v_cmp_o_f32_e64 s[24:25], v11, v27
+; GFX9-NEXT: v_max_f32_e32 v11, v11, v27
+; GFX9-NEXT: v_cmp_o_f32_e64 s[26:27], v12, v28
+; GFX9-NEXT: v_max_f32_e32 v12, v12, v28
+; GFX9-NEXT: v_max_f32_e32 v19, v14, v30
+; GFX9-NEXT: v_cmp_o_f32_e64 s[30:31], v14, v30
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v17, v1, vcc
+; GFX9-NEXT: v_cndmask_b32_e64 v13, v17, v18, s[28:29]
+; GFX9-NEXT: v_cndmask_b32_e64 v0, v17, v0, s[16:17]
+; GFX9-NEXT: v_cndmask_b32_e64 v2, v17, v2, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_e64 v3, v17, v3, s[6:7]
+; GFX9-NEXT: v_cndmask_b32_e64 v4, v17, v4, s[8:9]
+; GFX9-NEXT: v_cndmask_b32_e64 v5, v17, v5, s[10:11]
+; GFX9-NEXT: v_cndmask_b32_e64 v6, v17, v6, s[12:13]
+; GFX9-NEXT: v_cndmask_b32_e64 v7, v17, v7, s[14:15]
+; GFX9-NEXT: v_cndmask_b32_e64 v8, v17, v8, s[18:19]
+; GFX9-NEXT: v_cndmask_b32_e64 v9, v17, v9, s[20:21]
+; GFX9-NEXT: v_cndmask_b32_e64 v10, v17, v10, s[22:23]
+; GFX9-NEXT: v_cndmask_b32_e64 v11, v17, v11, s[24:25]
+; GFX9-NEXT: v_cndmask_b32_e64 v12, v17, v12, s[26:27]
+; GFX9-NEXT: v_cndmask_b32_e64 v14, v17, v19, s[30:31]
+; GFX9-NEXT: v_readlane_b32 s31, v31, 1
+; GFX9-NEXT: v_readlane_b32 s30, v31, 0
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_max_f32_e32 v18, v15, v16
+; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v15, v16
+; GFX9-NEXT: v_cndmask_b32_e32 v15, v17, v18, vcc
+; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
+; GFX9-NEXT: s_mov_b64 exec, s[4:5]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v15, v17
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v17, v15, vcc
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v15, v17
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v15, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v15, v16, v15, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v17, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v15, v15, v17, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX9-NEXT: v_cndmask_b32_e32 v15, v16, v15, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_maximum_v16f32:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX940-NEXT: scratch_load_dword v31, off, s32
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v0, v16
; GFX940-NEXT: v_mov_b32_e32 v32, 0x7fc00000
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v33, v16, v0, vcc
+; GFX940-NEXT: v_max_f32_e32 v33, v0, v16
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v16
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v33, v32, v33, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v0, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v33, v0, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v16, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v0, v16, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v33
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v33, v0, vcc
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v1, v17
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v17, v1, vcc
+; GFX940-NEXT: v_max_f32_e32 v34, v1, v17
+; GFX940-NEXT: v_max_f32_e32 v35, v2, v18
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v32, v33, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v17
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v32, v16, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v1, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v16, v1, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v17, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v1, v17, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v16, v1, vcc
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v2, v18
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v18, v2, vcc
+; GFX940-NEXT: v_max_f32_e32 v36, v3, v19
+; GFX940-NEXT: v_max_f32_e32 v37, v4, v20
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v32, v34, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v18
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v32, v16, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v2, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v16, v2, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v18, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v2, v18, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v16, v2, vcc
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v3, v19
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v19, v3, vcc
+; GFX940-NEXT: v_max_f32_e32 v38, v5, v21
+; GFX940-NEXT: v_max_f32_e32 v39, v6, v22
+; GFX940-NEXT: v_cndmask_b32_e32 v2, v32, v35, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v3, v19
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v32, v16, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v3, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v16, v3, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v19, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v3, v19, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v16, v3, vcc
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v4, v20
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v20, v4, vcc
+; GFX940-NEXT: v_max_f32_e32 v48, v7, v23
+; GFX940-NEXT: v_max_f32_e32 v49, v8, v24
+; GFX940-NEXT: v_cndmask_b32_e32 v3, v32, v36, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v4, v20
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v32, v16, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v4, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v16, v4, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v20, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v4, v20, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v16, v4, vcc
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v5, v21
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v21, v5, vcc
+; GFX940-NEXT: v_max_f32_e32 v50, v9, v25
+; GFX940-NEXT: v_max_f32_e32 v51, v10, v26
+; GFX940-NEXT: v_cndmask_b32_e32 v4, v32, v37, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v5, v21
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v32, v16, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v5, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v16, v5, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v21, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v5, v21, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v16, v5, vcc
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v6, v22
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v22, v6, vcc
+; GFX940-NEXT: v_max_f32_e32 v52, v11, v27
+; GFX940-NEXT: v_max_f32_e32 v53, v12, v28
+; GFX940-NEXT: v_cndmask_b32_e32 v5, v32, v38, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v6, v22
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v32, v16, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v6, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v16, v6, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v22, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v6, v22, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v16, v6, vcc
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v7, v23
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v23, v7, vcc
+; GFX940-NEXT: v_max_f32_e32 v54, v13, v29
+; GFX940-NEXT: v_max_f32_e32 v55, v14, v30
+; GFX940-NEXT: v_cndmask_b32_e32 v6, v32, v39, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v7, v23
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v32, v16, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v7, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v7, v16, v7, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v23, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v7, v7, v23, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v7, v16, v7, vcc
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v8, v24
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v24, v8, vcc
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: v_max_f32_e32 v16, v15, v31
+; GFX940-NEXT: v_cndmask_b32_e32 v7, v32, v48, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v8, v24
; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v32, v16, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v8, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v16, v8, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v24, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v8, v24, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v16, v8, vcc
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v9, v25
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v25, v9, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v8, v32, v49, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v9, v25
; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v32, v16, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v9, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v9, v16, v9, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v25, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v9, v9, v25, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v9, v16, v9, vcc
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v10, v26
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v26, v10, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v9, v32, v50, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v10, v26
; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v32, v16, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v10, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v10, v16, v10, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v26, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v10, v10, v26, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v10, v16, v10, vcc
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v11, v27
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v27, v11, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v10, v32, v51, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v11, v27
; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v32, v16, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v11, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v11, v16, v11, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v27, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v11, v11, v27, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v11, v16, v11, vcc
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v12, v28
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v28, v12, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v11, v32, v52, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v12, v28
; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v32, v16, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v12, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v12, v16, v12, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v28, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v12, v12, v28, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v12, v16, v12, vcc
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v13, v29
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v29, v13, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v12, v32, v53, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v13, v29
; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v32, v16, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v13, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v13, v16, v13, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v29, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v13, v13, v29, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v13, v16, v13, vcc
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v14, v30
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v30, v14, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v13, v32, v54, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v14, v30
; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v32, v16, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v14, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v14, v16, v14, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v30, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v14, v14, v30, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v14, v16, v14, vcc
-; GFX940-NEXT: s_waitcnt vmcnt(0)
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v15, v31
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v31, v15, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v14, v32, v55, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v15, v31
; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v32, v16, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v15, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v15, v16, v15, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v31, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v15, v15, v31, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v15, v16, v15, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v15, v32, v16, vcc
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v16f32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v0, v16
; GFX10-NEXT: buffer_load_dword v31, off, s[0:3], s32
-; GFX10-NEXT: v_cndmask_b32_e32 v32, v16, v0, vcc_lo
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v1, v17
-; GFX10-NEXT: v_cndmask_b32_e32 v33, v17, v1, vcc_lo
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v2, v18
-; GFX10-NEXT: v_cndmask_b32_e32 v34, v18, v2, vcc_lo
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v3, v19
-; GFX10-NEXT: v_cndmask_b32_e32 v35, v19, v3, vcc_lo
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v4, v20
-; GFX10-NEXT: v_cndmask_b32_e32 v36, v20, v4, vcc_lo
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v5, v21
-; GFX10-NEXT: v_cndmask_b32_e32 v37, v21, v5, vcc_lo
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v6, v22
-; GFX10-NEXT: v_cndmask_b32_e32 v38, v22, v6, vcc_lo
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v7, v23
-; GFX10-NEXT: v_cndmask_b32_e32 v39, v23, v7, vcc_lo
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v8, v24
-; GFX10-NEXT: v_cndmask_b32_e32 v48, v24, v8, vcc_lo
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v9, v25
-; GFX10-NEXT: v_cndmask_b32_e32 v49, v25, v9, vcc_lo
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v10, v26
-; GFX10-NEXT: v_cndmask_b32_e32 v50, v26, v10, vcc_lo
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v11, v27
-; GFX10-NEXT: v_cndmask_b32_e32 v51, v27, v11, vcc_lo
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v12, v28
-; GFX10-NEXT: v_cndmask_b32_e32 v52, v28, v12, vcc_lo
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v13, v29
-; GFX10-NEXT: v_cndmask_b32_e32 v53, v29, v13, vcc_lo
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v14, v30
-; GFX10-NEXT: v_cndmask_b32_e32 v54, v30, v14, vcc_lo
+; GFX10-NEXT: v_max_f32_e32 v32, v0, v16
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v16
-; GFX10-NEXT: v_cndmask_b32_e32 v32, 0x7fc00000, v32, vcc_lo
+; GFX10-NEXT: v_max_f32_e32 v33, v1, v17
+; GFX10-NEXT: v_max_f32_e32 v34, v2, v18
+; GFX10-NEXT: v_max_f32_e32 v35, v3, v19
+; GFX10-NEXT: v_max_f32_e32 v36, v4, v20
+; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v32, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v17
-; GFX10-NEXT: v_cndmask_b32_e32 v33, 0x7fc00000, v33, vcc_lo
+; GFX10-NEXT: v_max_f32_e32 v37, v5, v21
+; GFX10-NEXT: v_max_f32_e32 v38, v6, v22
+; GFX10-NEXT: v_max_f32_e32 v39, v7, v23
+; GFX10-NEXT: v_max_f32_e32 v48, v8, v24
+; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v33, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v2, v18
-; GFX10-NEXT: v_cndmask_b32_e32 v34, 0x7fc00000, v34, vcc_lo
+; GFX10-NEXT: v_max_f32_e32 v49, v9, v25
+; GFX10-NEXT: v_max_f32_e32 v50, v10, v26
+; GFX10-NEXT: v_max_f32_e32 v51, v11, v27
+; GFX10-NEXT: v_max_f32_e32 v52, v12, v28
+; GFX10-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v34, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v3, v19
-; GFX10-NEXT: v_cndmask_b32_e32 v35, 0x7fc00000, v35, vcc_lo
+; GFX10-NEXT: v_max_f32_e32 v53, v13, v29
+; GFX10-NEXT: v_max_f32_e32 v54, v14, v30
+; GFX10-NEXT: v_cndmask_b32_e32 v3, 0x7fc00000, v35, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v4, v20
-; GFX10-NEXT: v_cndmask_b32_e32 v36, 0x7fc00000, v36, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e32 v4, 0x7fc00000, v36, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v5, v21
-; GFX10-NEXT: v_cndmask_b32_e32 v37, 0x7fc00000, v37, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e32 v5, 0x7fc00000, v37, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v6, v22
-; GFX10-NEXT: v_cndmask_b32_e32 v38, 0x7fc00000, v38, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e32 v6, 0x7fc00000, v38, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v7, v23
-; GFX10-NEXT: v_cndmask_b32_e32 v39, 0x7fc00000, v39, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e32 v7, 0x7fc00000, v39, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v8, v24
-; GFX10-NEXT: v_cndmask_b32_e32 v48, 0x7fc00000, v48, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e32 v8, 0x7fc00000, v48, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v9, v25
-; GFX10-NEXT: v_cndmask_b32_e32 v49, 0x7fc00000, v49, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e32 v9, 0x7fc00000, v49, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v10, v26
-; GFX10-NEXT: v_cndmask_b32_e32 v50, 0x7fc00000, v50, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e32 v10, 0x7fc00000, v50, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v11, v27
-; GFX10-NEXT: v_cndmask_b32_e32 v51, 0x7fc00000, v51, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e32 v11, 0x7fc00000, v51, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v12, v28
-; GFX10-NEXT: v_cndmask_b32_e32 v52, 0x7fc00000, v52, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e32 v12, 0x7fc00000, v52, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v13, v29
-; GFX10-NEXT: v_cndmask_b32_e32 v53, 0x7fc00000, v53, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e32 v13, 0x7fc00000, v53, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v14, v30
-; GFX10-NEXT: v_cndmask_b32_e32 v54, 0x7fc00000, v54, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v0, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v32, v0, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v1, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v33, v1, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v2, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v2, v34, v2, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v3, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v3, v35, v3, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v4, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v4, v36, v4, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v5, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v5, v37, v5, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v6, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v6, v38, v6, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v7, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v7, v39, v7, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v8, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v8, v48, v8, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v9, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v9, v49, v9, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v10, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v10, v50, v10, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v11, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v11, v51, v11, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v12, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v12, v52, v12, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v13, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v13, v53, v13, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v14, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v14, v54, v14, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v16, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v16, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v17, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v17, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v18, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v18, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v19, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v3, v3, v19, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v20, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v4, v4, v20, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v21, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v5, v5, v21, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v22, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v6, v6, v22, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v23, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v7, v7, v23, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v24, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v8, v8, v24, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v25, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v9, v9, v25, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v26, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v10, v10, v26, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v27, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v11, v11, v27, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v28, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v12, v12, v28, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v29, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v13, v13, v29, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v30, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v14, v14, v30, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v32
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v32, v0, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v33
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v33, v1, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v34
-; GFX10-NEXT: v_cndmask_b32_e32 v2, v34, v2, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v35
-; GFX10-NEXT: v_cndmask_b32_e32 v3, v35, v3, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v36
-; GFX10-NEXT: v_cndmask_b32_e32 v4, v36, v4, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v37
-; GFX10-NEXT: v_cndmask_b32_e32 v5, v37, v5, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v38
-; GFX10-NEXT: v_cndmask_b32_e32 v6, v38, v6, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e32 v14, 0x7fc00000, v54, vcc_lo
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v15, v31
-; GFX10-NEXT: v_cndmask_b32_e32 v16, v31, v15, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v39
-; GFX10-NEXT: v_cndmask_b32_e32 v7, v39, v7, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v48
-; GFX10-NEXT: v_cndmask_b32_e32 v8, v48, v8, vcc_lo
+; GFX10-NEXT: v_max_f32_e32 v16, v15, v31
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v15, v31
-; GFX10-NEXT: v_cndmask_b32_e32 v16, 0x7fc00000, v16, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v49
-; GFX10-NEXT: v_cndmask_b32_e32 v9, v49, v9, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v50
-; GFX10-NEXT: v_cndmask_b32_e32 v10, v50, v10, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v15, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v15, v16, v15, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v51
-; GFX10-NEXT: v_cndmask_b32_e32 v11, v51, v11, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v52
-; GFX10-NEXT: v_cndmask_b32_e32 v12, v52, v12, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v31, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v15, v15, v31, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v53
-; GFX10-NEXT: v_cndmask_b32_e32 v13, v53, v13, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v54
-; GFX10-NEXT: v_cndmask_b32_e32 v14, v54, v14, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v16
-; GFX10-NEXT: v_cndmask_b32_e32 v15, v16, v15, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e32 v15, 0x7fc00000, v16, vcc_lo
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_maximum_v16f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v0, v16
; GFX11-NEXT: scratch_load_b32 v31, off, s32
-; GFX11-NEXT: v_cndmask_b32_e32 v32, v16, v0, vcc_lo
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v1, v17
-; GFX11-NEXT: v_cndmask_b32_e32 v33, v17, v1, vcc_lo
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v2, v18
-; GFX11-NEXT: v_cndmask_b32_e32 v34, v18, v2, vcc_lo
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v3, v19
-; GFX11-NEXT: v_cndmask_b32_e32 v35, v19, v3, vcc_lo
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v4, v20
-; GFX11-NEXT: v_cndmask_b32_e32 v36, v20, v4, vcc_lo
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v5, v21
-; GFX11-NEXT: v_cndmask_b32_e32 v37, v21, v5, vcc_lo
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v6, v22
-; GFX11-NEXT: v_cndmask_b32_e32 v38, v22, v6, vcc_lo
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v7, v23
-; GFX11-NEXT: v_cndmask_b32_e32 v39, v23, v7, vcc_lo
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v8, v24
-; GFX11-NEXT: v_cndmask_b32_e32 v48, v24, v8, vcc_lo
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v9, v25
-; GFX11-NEXT: v_cndmask_b32_e32 v49, v25, v9, vcc_lo
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v10, v26
-; GFX11-NEXT: v_cndmask_b32_e32 v50, v26, v10, vcc_lo
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v11, v27
-; GFX11-NEXT: v_cndmask_b32_e32 v51, v27, v11, vcc_lo
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v12, v28
-; GFX11-NEXT: v_cndmask_b32_e32 v52, v28, v12, vcc_lo
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v13, v29
-; GFX11-NEXT: v_cndmask_b32_e32 v53, v29, v13, vcc_lo
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v14, v30
-; GFX11-NEXT: v_cndmask_b32_e32 v54, v30, v14, vcc_lo
+; GFX11-NEXT: v_dual_max_f32 v32, v0, v16 :: v_dual_max_f32 v33, v1, v17
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v16
-; GFX11-NEXT: v_cndmask_b32_e32 v32, 0x7fc00000, v32, vcc_lo
+; GFX11-NEXT: v_dual_max_f32 v34, v2, v18 :: v_dual_max_f32 v35, v3, v19
+; GFX11-NEXT: v_dual_max_f32 v36, v4, v20 :: v_dual_max_f32 v37, v5, v21
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4)
+; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v32, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v17
-; GFX11-NEXT: v_cndmask_b32_e32 v33, 0x7fc00000, v33, vcc_lo
+; GFX11-NEXT: v_max_f32_e32 v54, v14, v30
+; GFX11-NEXT: v_dual_max_f32 v38, v6, v22 :: v_dual_max_f32 v39, v7, v23
+; GFX11-NEXT: v_dual_max_f32 v48, v8, v24 :: v_dual_max_f32 v49, v9, v25
+; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v33, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v2, v18
-; GFX11-NEXT: v_cndmask_b32_e32 v34, 0x7fc00000, v34, vcc_lo
+; GFX11-NEXT: v_dual_max_f32 v50, v10, v26 :: v_dual_max_f32 v51, v11, v27
+; GFX11-NEXT: v_dual_max_f32 v52, v12, v28 :: v_dual_max_f32 v53, v13, v29
+; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v34, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v3, v19
-; GFX11-NEXT: v_cndmask_b32_e32 v35, 0x7fc00000, v35, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e32 v3, 0x7fc00000, v35, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v4, v20
-; GFX11-NEXT: v_cndmask_b32_e32 v36, 0x7fc00000, v36, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e32 v4, 0x7fc00000, v36, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v5, v21
-; GFX11-NEXT: v_cndmask_b32_e32 v37, 0x7fc00000, v37, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e32 v5, 0x7fc00000, v37, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v6, v22
-; GFX11-NEXT: v_cndmask_b32_e32 v38, 0x7fc00000, v38, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e32 v6, 0x7fc00000, v38, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v7, v23
-; GFX11-NEXT: v_cndmask_b32_e32 v39, 0x7fc00000, v39, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e32 v7, 0x7fc00000, v39, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v8, v24
-; GFX11-NEXT: v_cndmask_b32_e32 v48, 0x7fc00000, v48, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e32 v8, 0x7fc00000, v48, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v9, v25
-; GFX11-NEXT: v_cndmask_b32_e32 v49, 0x7fc00000, v49, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e32 v9, 0x7fc00000, v49, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v10, v26
-; GFX11-NEXT: v_cndmask_b32_e32 v50, 0x7fc00000, v50, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e32 v10, 0x7fc00000, v50, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v11, v27
-; GFX11-NEXT: v_cndmask_b32_e32 v51, 0x7fc00000, v51, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e32 v11, 0x7fc00000, v51, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v12, v28
-; GFX11-NEXT: v_cndmask_b32_e32 v52, 0x7fc00000, v52, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e32 v12, 0x7fc00000, v52, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v13, v29
-; GFX11-NEXT: v_cndmask_b32_e32 v53, 0x7fc00000, v53, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e32 v13, 0x7fc00000, v53, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v14, v30
-; GFX11-NEXT: v_cndmask_b32_e32 v54, 0x7fc00000, v54, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v0, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v32, v0, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v1, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v33, v1, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v2, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v2, v34, v2, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v3, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v3, v35, v3, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v4, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v4, v36, v4, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v5, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v5, v37, v5, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v6, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v6, v38, v6, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v7, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v7, v39, v7, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v8, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v8, v48, v8, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v9, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v9, v49, v9, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v10, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v10, v50, v10, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v11, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v11, v51, v11, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v12, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v12, v52, v12, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v13, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v13, v53, v13, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v14, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v14, v54, v14, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v16, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v16, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v17, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v17, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v18, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v2, v2, v18, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v19, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v3, v3, v19, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v20, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v4, v4, v20, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v21, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v5, v5, v21, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v22, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v6, v6, v22, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v23, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v7, v7, v23, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v24, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v8, v8, v24, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v25, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v9, v9, v25, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v26, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v10, v10, v26, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v27, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v11, v11, v27, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v28, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v12, v12, v28, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v29, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v13, v13, v29, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v30, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v14, v14, v30, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v32
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v32, v0, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v33
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v33, v1, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v34
-; GFX11-NEXT: v_cndmask_b32_e32 v2, v34, v2, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v35
-; GFX11-NEXT: v_cndmask_b32_e32 v3, v35, v3, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v36
-; GFX11-NEXT: v_cndmask_b32_e32 v4, v36, v4, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v37
-; GFX11-NEXT: v_cndmask_b32_e32 v5, v37, v5, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v38
-; GFX11-NEXT: v_cndmask_b32_e32 v6, v38, v6, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e32 v14, 0x7fc00000, v54, vcc_lo
; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v15, v31
-; GFX11-NEXT: v_cndmask_b32_e32 v16, v31, v15, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v39
-; GFX11-NEXT: v_cndmask_b32_e32 v7, v39, v7, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v48
-; GFX11-NEXT: v_cndmask_b32_e32 v8, v48, v8, vcc_lo
+; GFX11-NEXT: v_max_f32_e32 v16, v15, v31
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v15, v31
-; GFX11-NEXT: v_cndmask_b32_e32 v16, 0x7fc00000, v16, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v49
-; GFX11-NEXT: v_cndmask_b32_e32 v9, v49, v9, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v50
-; GFX11-NEXT: v_cndmask_b32_e32 v10, v50, v10, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v15, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v15, v16, v15, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v51
-; GFX11-NEXT: v_cndmask_b32_e32 v11, v51, v11, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v52
-; GFX11-NEXT: v_cndmask_b32_e32 v12, v52, v12, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v31, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v15, v15, v31, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v53
-; GFX11-NEXT: v_cndmask_b32_e32 v13, v53, v13, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v54
-; GFX11-NEXT: v_cndmask_b32_e32 v14, v54, v14, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v16
-; GFX11-NEXT: v_cndmask_b32_e32 v15, v16, v15, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NEXT: v_cndmask_b32_e32 v15, 0x7fc00000, v16, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_v16f32:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.maximum.f64.ll b/llvm/test/CodeGen/AMDGPU/llvm.maximum.f64.ll
index d60a28e..78fb231 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.maximum.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.maximum.f64.ll
@@ -530,221 +530,86 @@ define <2 x double> @v_maximum_v2f64(<2 x double> %src0, <2 x double> %src1) {
; GFX7-LABEL: v_maximum_v2f64:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[4:5]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[4:5], v[0:1], v[4:5]
-; GFX7-NEXT: v_mov_b32_e32 v10, 0x7ff80000
-; GFX7-NEXT: v_cmp_class_f64_e64 s[6:7], v[0:1], 64
-; GFX7-NEXT: v_cmp_class_f64_e64 s[8:9], v[4:5], 64
-; GFX7-NEXT: v_cndmask_b32_e32 v8, v5, v1, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v9, v10, v8, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc
-; GFX7-NEXT: v_cmp_gt_f64_e32 vcc, v[2:3], v[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[4:5]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[4:5], v[2:3], v[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v0, v8, v0, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v0, v0, v4, s[8:9]
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[10:11], 0, v[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v1, v9, v1, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v1, v1, v5, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e32 v11, v7, v3, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v4, v6, v2, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[2:3], 64
-; GFX7-NEXT: v_cndmask_b32_e64 v11, v10, v11, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v10, 0, v4, s[4:5]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[6:7], 64
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v0, v8, v0, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v1, v9, v1, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v10, v2, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v11, v3, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v2, v2, v6, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v3, v3, v7, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v2, v10, v2, s[12:13]
-; GFX7-NEXT: v_cndmask_b32_e64 v3, v11, v3, s[12:13]
+; GFX7-NEXT: v_max_f64 v[8:9], v[0:1], v[4:5]
+; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX7-NEXT: v_max_f64 v[4:5], v[2:3], v[6:7]
+; GFX7-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[6:7]
+; GFX7-NEXT: v_mov_b32_e32 v3, 0x7ff80000
+; GFX7-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc
+; GFX7-NEXT: v_cndmask_b32_e32 v1, v9, v3, vcc
+; GFX7-NEXT: v_cndmask_b32_e64 v2, v4, 0, s[4:5]
+; GFX7-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[4:5]
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_maximum_v2f64:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[4:5]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[4:5], v[0:1], v[4:5]
-; GFX8-NEXT: v_mov_b32_e32 v10, 0x7ff80000
-; GFX8-NEXT: v_cmp_class_f64_e64 s[6:7], v[0:1], 64
-; GFX8-NEXT: v_cmp_class_f64_e64 s[8:9], v[4:5], 64
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v5, v1, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v9, v10, v8, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc
-; GFX8-NEXT: v_cmp_gt_f64_e32 vcc, v[2:3], v[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[4:5]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[4:5], v[2:3], v[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v0, v8, v0, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, v4, s[8:9]
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[10:11], 0, v[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v1, v9, v1, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, v5, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e32 v11, v7, v3, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v6, v2, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[2:3], 64
-; GFX8-NEXT: v_cndmask_b32_e64 v11, v10, v11, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v10, 0, v4, s[4:5]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[6:7], 64
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v0, v8, v0, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v1, v9, v1, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v10, v2, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v11, v3, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v2, v2, v6, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v3, v3, v7, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v2, v10, v2, s[12:13]
-; GFX8-NEXT: v_cndmask_b32_e64 v3, v11, v3, s[12:13]
+; GFX8-NEXT: v_max_f64 v[8:9], v[0:1], v[4:5]
+; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX8-NEXT: v_max_f64 v[4:5], v[2:3], v[6:7]
+; GFX8-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[6:7]
+; GFX8-NEXT: v_mov_b32_e32 v3, 0x7ff80000
+; GFX8-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v1, v9, v3, vcc
+; GFX8-NEXT: v_cndmask_b32_e64 v2, v4, 0, s[4:5]
+; GFX8-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[4:5]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_maximum_v2f64:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[4:5]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[4:5], v[0:1], v[4:5]
-; GFX9-NEXT: v_mov_b32_e32 v10, 0x7ff80000
-; GFX9-NEXT: v_cmp_class_f64_e64 s[6:7], v[0:1], 64
-; GFX9-NEXT: v_cmp_class_f64_e64 s[8:9], v[4:5], 64
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v5, v1, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v9, v10, v8, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc
-; GFX9-NEXT: v_cmp_gt_f64_e32 vcc, v[2:3], v[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[4:5]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[4:5], v[2:3], v[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v8, v0, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, v4, s[8:9]
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[10:11], 0, v[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v9, v1, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v5, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e32 v11, v7, v3, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v6, v2, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[2:3], 64
-; GFX9-NEXT: v_cndmask_b32_e64 v11, v10, v11, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v10, 0, v4, s[4:5]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[6:7], 64
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v8, v0, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v9, v1, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v2, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v11, v3, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, v6, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v3, v7, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v10, v2, s[12:13]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v11, v3, s[12:13]
+; GFX9-NEXT: v_max_f64 v[8:9], v[0:1], v[4:5]
+; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX9-NEXT: v_max_f64 v[4:5], v[2:3], v[6:7]
+; GFX9-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[6:7]
+; GFX9-NEXT: v_mov_b32_e32 v3, 0x7ff80000
+; GFX9-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v9, v3, vcc
+; GFX9-NEXT: v_cndmask_b32_e64 v2, v4, 0, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[4:5]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_maximum_v2f64:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[4:5]
-; GFX940-NEXT: v_mov_b32_e32 v10, 0x7ff80000
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[0:1], v[4:5]
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v5, v1, vcc
+; GFX940-NEXT: v_max_f64 v[8:9], v[0:1], v[4:5]
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX940-NEXT: v_max_f64 v[4:5], v[2:3], v[6:7]
; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v9, v10, v8, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 64
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[4:5], 64
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[8:9]
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[2:3], v[6:7]
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v0, v4, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v1, v1, v5, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v7, v3, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[2:3], v[6:7]
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v8, v0, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v1, v9, v1, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v5, v10, v4, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v6, v2, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v4, 0, v4, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[2:3], 64
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[6:7], 64
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[4:5]
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v2, v2, v6, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v3, v3, v7, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v2, v4, v2, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[2:3]
+; GFX940-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc
+; GFX940-NEXT: v_mov_b32_e32 v8, 0x7ff80000
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v9, v8, vcc
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[6:7]
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e64 v2, v4, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v3, v5, v8, vcc
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v2f64:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_gt_f64_e32 vcc_lo, v[0:1], v[4:5]
-; GFX10-NEXT: v_cmp_gt_f64_e64 s4, v[2:3], v[6:7]
-; GFX10-NEXT: v_cmp_o_f64_e64 s5, v[0:1], v[4:5]
-; GFX10-NEXT: v_cmp_o_f64_e64 s6, v[2:3], v[6:7]
-; GFX10-NEXT: v_cndmask_b32_e32 v8, v5, v1, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v10, v7, v3, s4
-; GFX10-NEXT: v_cndmask_b32_e32 v12, v4, v0, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v13, v6, v2, s4
-; GFX10-NEXT: v_cmp_class_f64_e64 vcc_lo, v[0:1], 64
-; GFX10-NEXT: v_cmp_class_f64_e64 s4, v[2:3], 64
-; GFX10-NEXT: v_cndmask_b32_e64 v9, 0x7ff80000, v8, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v11, 0x7ff80000, v10, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v8, 0, v12, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v10, 0, v13, s6
-; GFX10-NEXT: v_cmp_class_f64_e64 s5, v[4:5], 64
-; GFX10-NEXT: v_cmp_class_f64_e64 s6, v[6:7], 64
-; GFX10-NEXT: v_cmp_eq_f64_e64 s7, 0, v[8:9]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s8, 0, v[10:11]
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v10, v2, s4
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v11, v3, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v4, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, v6, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, v5, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, v7, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v0, v8, v0, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v10, v2, s8
-; GFX10-NEXT: v_cndmask_b32_e64 v1, v9, v1, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v11, v3, s8
+; GFX10-NEXT: v_max_f64 v[8:9], v[0:1], v[4:5]
+; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[4:5]
+; GFX10-NEXT: v_max_f64 v[4:5], v[2:3], v[6:7]
+; GFX10-NEXT: v_cmp_u_f64_e64 s4, v[2:3], v[6:7]
+; GFX10-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e64 v1, v9, 0x7ff80000, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e64 v2, v4, 0, s4
+; GFX10-NEXT: v_cndmask_b32_e64 v3, v5, 0x7ff80000, s4
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_maximum_v2f64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_gt_f64_e32 vcc_lo, v[0:1], v[4:5]
-; GFX11-NEXT: v_cmp_o_f64_e64 s1, v[0:1], v[4:5]
-; GFX11-NEXT: v_cmp_gt_f64_e64 s0, v[2:3], v[6:7]
-; GFX11-NEXT: v_cmp_o_f64_e64 s2, v[2:3], v[6:7]
-; GFX11-NEXT: v_cndmask_b32_e32 v8, v5, v1, vcc_lo
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e64 v10, v7, v3, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v13, v6, v2, s0
-; GFX11-NEXT: v_cmp_class_f64_e64 s0, v[2:3], 64
-; GFX11-NEXT: v_cndmask_b32_e64 v9, 0x7ff80000, v8, s1
-; GFX11-NEXT: v_cndmask_b32_e32 v12, v4, v0, vcc_lo
-; GFX11-NEXT: v_cmp_class_f64_e64 vcc_lo, v[0:1], 64
-; GFX11-NEXT: v_cndmask_b32_e64 v11, 0x7ff80000, v10, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v10, 0, v13, s2
-; GFX11-NEXT: v_cmp_class_f64_e64 s2, v[6:7], 64
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT: v_cmp_eq_f64_e64 s4, 0, v[10:11]
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v10, v2, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v11, v3, s0
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc_lo
-; GFX11-NEXT: v_cndmask_b32_e64 v8, 0, v12, s1
-; GFX11-NEXT: v_cmp_class_f64_e64 s1, v[4:5], 64
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, v6, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v3, v7, s2
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_cmp_eq_f64_e64 s3, 0, v[8:9]
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc_lo
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v10, v2, s4
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v11, v3, s4
-; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, v4, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, v5, s1
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_cndmask_b32_e64 v0, v8, v0, s3
-; GFX11-NEXT: v_cndmask_b32_e64 v1, v9, v1, s3
+; GFX11-NEXT: v_max_f64 v[8:9], v[0:1], v[4:5]
+; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[4:5]
+; GFX11-NEXT: v_max_f64 v[4:5], v[2:3], v[6:7]
+; GFX11-NEXT: v_cmp_u_f64_e64 s0, v[2:3], v[6:7]
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX11-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e64 v1, v9, 0x7ff80000, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e64 v2, v4, 0, s0
+; GFX11-NEXT: v_cndmask_b32_e64 v3, v5, 0x7ff80000, s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_v2f64:
@@ -765,182 +630,43 @@ define <2 x double> @v_maximum_v2f64__nnan(<2 x double> %src0, <2 x double> %src
; GFX7-LABEL: v_maximum_v2f64__nnan:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[4:5]
-; GFX7-NEXT: v_cmp_gt_f64_e64 s[4:5], v[2:3], v[6:7]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[6:7], v[4:5], 64
-; GFX7-NEXT: v_cmp_class_f64_e64 s[10:11], v[6:7], 64
-; GFX7-NEXT: v_cndmask_b32_e32 v9, v5, v1, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 64
-; GFX7-NEXT: v_cndmask_b32_e64 v11, v7, v3, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v10, v6, v2, s[4:5]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[2:3], 64
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[8:9], 0, v[8:9]
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[10:11]
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v0, v0, v4, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v2, v10, v2, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v3, v11, v3, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v2, v2, v6, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v1, v1, v5, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v3, v3, v7, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v0, v8, v0, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v1, v9, v1, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v2, v10, v2, s[12:13]
-; GFX7-NEXT: v_cndmask_b32_e64 v3, v11, v3, s[12:13]
+; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
+; GFX7-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_maximum_v2f64__nnan:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[4:5]
-; GFX8-NEXT: v_cmp_gt_f64_e64 s[4:5], v[2:3], v[6:7]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[6:7], v[4:5], 64
-; GFX8-NEXT: v_cmp_class_f64_e64 s[10:11], v[6:7], 64
-; GFX8-NEXT: v_cndmask_b32_e32 v9, v5, v1, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 64
-; GFX8-NEXT: v_cndmask_b32_e64 v11, v7, v3, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v10, v6, v2, s[4:5]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[2:3], 64
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[8:9], 0, v[8:9]
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[10:11]
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, v4, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v2, v10, v2, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v3, v11, v3, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v2, v2, v6, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, v5, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v3, v3, v7, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v0, v8, v0, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v1, v9, v1, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v2, v10, v2, s[12:13]
-; GFX8-NEXT: v_cndmask_b32_e64 v3, v11, v3, s[12:13]
+; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
+; GFX8-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_maximum_v2f64__nnan:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[4:5]
-; GFX9-NEXT: v_cmp_gt_f64_e64 s[4:5], v[2:3], v[6:7]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[6:7], v[4:5], 64
-; GFX9-NEXT: v_cmp_class_f64_e64 s[10:11], v[6:7], 64
-; GFX9-NEXT: v_cndmask_b32_e32 v9, v5, v1, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 64
-; GFX9-NEXT: v_cndmask_b32_e64 v11, v7, v3, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v10, v6, v2, s[4:5]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[2:3], 64
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[8:9], 0, v[8:9]
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[10:11]
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, v4, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v10, v2, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v11, v3, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, v6, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v5, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v3, v7, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v8, v0, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v9, v1, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v10, v2, s[12:13]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v11, v3, s[12:13]
+; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
+; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_maximum_v2f64__nnan:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[4:5]
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[4:5], 64
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v9, v5, v1, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 64
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[8:9]
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[2:3], v[6:7]
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v0, v4, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v1, v1, v5, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v7, v3, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v6, v2, vcc
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[2:3], 64
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[6:7], 64
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v8, v0, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v1, v9, v1, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v2, v2, v6, s[0:1]
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[4:5]
-; GFX940-NEXT: v_cndmask_b32_e64 v3, v3, v7, s[0:1]
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v2, v4, v2, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[2:3]
+; GFX940-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
+; GFX940-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v2f64__nnan:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_gt_f64_e32 vcc_lo, v[0:1], v[4:5]
-; GFX10-NEXT: v_cmp_gt_f64_e64 s4, v[2:3], v[6:7]
-; GFX10-NEXT: v_cmp_class_f64_e64 s5, v[4:5], 64
-; GFX10-NEXT: v_cmp_class_f64_e64 s6, v[6:7], 64
-; GFX10-NEXT: v_cndmask_b32_e32 v9, v5, v1, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v11, v7, v3, s4
-; GFX10-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v10, v6, v2, s4
-; GFX10-NEXT: v_cmp_class_f64_e64 vcc_lo, v[0:1], 64
-; GFX10-NEXT: v_cmp_class_f64_e64 s4, v[2:3], 64
-; GFX10-NEXT: v_cmp_eq_f64_e64 s7, 0, v[8:9]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s8, 0, v[10:11]
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v10, v2, s4
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v11, v3, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v4, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, v6, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, v5, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, v7, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v0, v8, v0, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v10, v2, s8
-; GFX10-NEXT: v_cndmask_b32_e64 v1, v9, v1, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v11, v3, s8
+; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
+; GFX10-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_maximum_v2f64__nnan:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_gt_f64_e32 vcc_lo, v[0:1], v[4:5]
-; GFX11-NEXT: v_cmp_gt_f64_e64 s0, v[2:3], v[6:7]
-; GFX11-NEXT: v_cmp_class_f64_e64 s1, v[4:5], 64
-; GFX11-NEXT: v_cmp_class_f64_e64 s2, v[6:7], 64
-; GFX11-NEXT: v_dual_cndmask_b32 v9, v5, v1 :: v_dual_cndmask_b32 v8, v4, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_4) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e64 v11, v7, v3, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v10, v6, v2, s0
-; GFX11-NEXT: v_cmp_class_f64_e64 vcc_lo, v[0:1], 64
-; GFX11-NEXT: v_cmp_class_f64_e64 s0, v[2:3], 64
-; GFX11-NEXT: v_cmp_eq_f64_e64 s3, 0, v[8:9]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s4, 0, v[10:11]
-; GFX11-NEXT: v_dual_cndmask_b32 v0, v8, v0 :: v_dual_cndmask_b32 v1, v9, v1
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3)
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v10, v2, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v11, v3, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, v4, s1
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, v5, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, v6, s2
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v3, v7, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v0, v8, v0, s3
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e64 v1, v9, v1, s3
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v10, v2, s4
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v11, v3, s4
+; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
+; GFX11-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_v2f64__nnan:
@@ -961,111 +687,86 @@ define <2 x double> @v_maximum_v2f64__nsz(<2 x double> %src0, <2 x double> %src1
; GFX7-LABEL: v_maximum_v2f64__nsz:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[4:5]
-; GFX7-NEXT: v_cmp_gt_f64_e64 s[6:7], v[2:3], v[6:7]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[4:5], v[0:1], v[4:5]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[8:9], v[2:3], v[6:7]
-; GFX7-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v4, v6, v2, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX7-NEXT: v_mov_b32_e32 v5, 0x7ff80000
-; GFX7-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v0, 0, v8, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v1, v5, v1, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v2, 0, v4, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[8:9]
+; GFX7-NEXT: v_max_f64 v[8:9], v[0:1], v[4:5]
+; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX7-NEXT: v_max_f64 v[4:5], v[2:3], v[6:7]
+; GFX7-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[6:7]
+; GFX7-NEXT: v_mov_b32_e32 v3, 0x7ff80000
+; GFX7-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc
+; GFX7-NEXT: v_cndmask_b32_e32 v1, v9, v3, vcc
+; GFX7-NEXT: v_cndmask_b32_e64 v2, v4, 0, s[4:5]
+; GFX7-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[4:5]
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_maximum_v2f64__nsz:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[4:5]
-; GFX8-NEXT: v_cmp_gt_f64_e64 s[6:7], v[2:3], v[6:7]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[4:5], v[0:1], v[4:5]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[8:9], v[2:3], v[6:7]
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v4, v6, v2, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX8-NEXT: v_mov_b32_e32 v5, 0x7ff80000
-; GFX8-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v0, 0, v8, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v1, v5, v1, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, v4, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[8:9]
+; GFX8-NEXT: v_max_f64 v[8:9], v[0:1], v[4:5]
+; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX8-NEXT: v_max_f64 v[4:5], v[2:3], v[6:7]
+; GFX8-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[6:7]
+; GFX8-NEXT: v_mov_b32_e32 v3, 0x7ff80000
+; GFX8-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v1, v9, v3, vcc
+; GFX8-NEXT: v_cndmask_b32_e64 v2, v4, 0, s[4:5]
+; GFX8-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[4:5]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_maximum_v2f64__nsz:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[4:5]
-; GFX9-NEXT: v_cmp_gt_f64_e64 s[6:7], v[2:3], v[6:7]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[4:5], v[0:1], v[4:5]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[8:9], v[2:3], v[6:7]
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v6, v2, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX9-NEXT: v_mov_b32_e32 v5, 0x7ff80000
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, v8, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v5, v1, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, v4, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[8:9]
+; GFX9-NEXT: v_max_f64 v[8:9], v[0:1], v[4:5]
+; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX9-NEXT: v_max_f64 v[4:5], v[2:3], v[6:7]
+; GFX9-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[6:7]
+; GFX9-NEXT: v_mov_b32_e32 v3, 0x7ff80000
+; GFX9-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v9, v3, vcc
+; GFX9-NEXT: v_cndmask_b32_e64 v2, v4, 0, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[4:5]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_maximum_v2f64__nsz:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[4:5]
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[0:1], v[4:5]
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX940-NEXT: v_mov_b32_e32 v4, 0x7ff80000
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[2:3], v[6:7]
-; GFX940-NEXT: v_cndmask_b32_e64 v0, 0, v8, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v1, v4, v1, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v6, v2, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[2:3], v[6:7]
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc
+; GFX940-NEXT: v_max_f64 v[8:9], v[0:1], v[4:5]
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX940-NEXT: v_max_f64 v[4:5], v[2:3], v[6:7]
; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v2, 0, v5, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v3, v4, v3, s[0:1]
+; GFX940-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc
+; GFX940-NEXT: v_mov_b32_e32 v8, 0x7ff80000
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v9, v8, vcc
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[6:7]
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e64 v2, v4, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v3, v5, v8, vcc
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v2f64__nsz:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_gt_f64_e32 vcc_lo, v[0:1], v[4:5]
-; GFX10-NEXT: v_cmp_gt_f64_e64 s4, v[2:3], v[6:7]
-; GFX10-NEXT: v_cmp_o_f64_e64 s5, v[0:1], v[4:5]
-; GFX10-NEXT: v_cmp_o_f64_e64 s6, v[2:3], v[6:7]
-; GFX10-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v9, v6, v2, s4
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v7, v3, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, v8, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, v9, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v1, 0x7ff80000, v1, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v3, 0x7ff80000, v3, s6
+; GFX10-NEXT: v_max_f64 v[8:9], v[0:1], v[4:5]
+; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[4:5]
+; GFX10-NEXT: v_max_f64 v[4:5], v[2:3], v[6:7]
+; GFX10-NEXT: v_cmp_u_f64_e64 s4, v[2:3], v[6:7]
+; GFX10-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e64 v1, v9, 0x7ff80000, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e64 v2, v4, 0, s4
+; GFX10-NEXT: v_cndmask_b32_e64 v3, v5, 0x7ff80000, s4
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_maximum_v2f64__nsz:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_gt_f64_e32 vcc_lo, v[0:1], v[4:5]
-; GFX11-NEXT: v_cmp_gt_f64_e64 s0, v[2:3], v[6:7]
-; GFX11-NEXT: v_cmp_o_f64_e64 s1, v[0:1], v[4:5]
-; GFX11-NEXT: v_cmp_o_f64_e64 s2, v[2:3], v[6:7]
-; GFX11-NEXT: v_dual_cndmask_b32 v8, v4, v0 :: v_dual_cndmask_b32 v1, v5, v1
+; GFX11-NEXT: v_max_f64 v[8:9], v[0:1], v[4:5]
+; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[4:5]
+; GFX11-NEXT: v_max_f64 v[4:5], v[2:3], v[6:7]
+; GFX11-NEXT: v_cmp_u_f64_e64 s0, v[2:3], v[6:7]
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3)
-; GFX11-NEXT: v_cndmask_b32_e64 v9, v6, v2, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v7, v3, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, v8, s1
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e64 v1, 0x7ff80000, v1, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, v9, s2
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e64 v3, 0x7ff80000, v3, s2
+; GFX11-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e64 v1, v9, 0x7ff80000, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e64 v2, v4, 0, s0
+; GFX11-NEXT: v_cndmask_b32_e64 v3, v5, 0x7ff80000, s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_v2f64__nsz:
@@ -1086,69 +787,43 @@ define <2 x double> @v_maximum_v2f64__nnan_nsz(<2 x double> %src0, <2 x double>
; GFX7-LABEL: v_maximum_v2f64__nnan_nsz:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[4:5]
-; GFX7-NEXT: v_cmp_gt_f64_e64 s[4:5], v[2:3], v[6:7]
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[4:5]
+; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
+; GFX7-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_maximum_v2f64__nnan_nsz:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[4:5]
-; GFX8-NEXT: v_cmp_gt_f64_e64 s[4:5], v[2:3], v[6:7]
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[4:5]
+; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
+; GFX8-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_maximum_v2f64__nnan_nsz:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[4:5]
-; GFX9-NEXT: v_cmp_gt_f64_e64 s[4:5], v[2:3], v[6:7]
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[4:5]
+; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
+; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_maximum_v2f64__nnan_nsz:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[4:5]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[2:3], v[6:7]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc
+; GFX940-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
+; GFX940-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v2f64__nnan_nsz:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_gt_f64_e32 vcc_lo, v[0:1], v[4:5]
-; GFX10-NEXT: v_cmp_gt_f64_e64 s4, v[2:3], v[6:7]
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v6, v2, s4
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v7, v3, s4
+; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
+; GFX10-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_maximum_v2f64__nnan_nsz:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_gt_f64_e32 vcc_lo, v[0:1], v[4:5]
-; GFX11-NEXT: v_cmp_gt_f64_e64 s0, v[2:3], v[6:7]
-; GFX11-NEXT: v_dual_cndmask_b32 v0, v4, v0 :: v_dual_cndmask_b32 v1, v5, v1
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v6, v2, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v7, v3, s0
+; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
+; GFX11-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_v2f64__nnan_nsz:
@@ -1170,61 +845,20 @@ define void @s_maximum_v2f64(<2 x double> inreg %src0, <2 x double> inreg %src1)
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: v_mov_b32_e32 v0, s10
+; GFX7-NEXT: v_mov_b32_e32 v4, s8
; GFX7-NEXT: v_mov_b32_e32 v1, s11
-; GFX7-NEXT: v_cmp_gt_f64_e32 vcc, s[6:7], v[0:1]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[12:13], s[6:7], v[0:1]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[18:19], s[10:11], 64
-; GFX7-NEXT: v_mov_b32_e32 v0, s8
-; GFX7-NEXT: v_mov_b32_e32 v1, s9
-; GFX7-NEXT: s_and_b64 s[14:15], vcc, exec
-; GFX7-NEXT: s_cselect_b32 s16, s7, s11
-; GFX7-NEXT: s_and_b64 s[14:15], s[12:13], exec
-; GFX7-NEXT: s_cselect_b32 s15, s16, 0x7ff80000
-; GFX7-NEXT: s_and_b64 s[16:17], vcc, exec
-; GFX7-NEXT: s_cselect_b32 s14, s6, s10
-; GFX7-NEXT: v_cmp_class_f64_e64 s[16:17], s[6:7], 64
-; GFX7-NEXT: s_and_b64 s[12:13], s[12:13], exec
-; GFX7-NEXT: s_cselect_b32 s14, s14, 0
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[20:21], s[14:15], 0
-; GFX7-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; GFX7-NEXT: s_and_b64 s[12:13], s[16:17], exec
-; GFX7-NEXT: s_cselect_b32 s7, s7, s15
-; GFX7-NEXT: s_and_b64 s[12:13], s[18:19], exec
-; GFX7-NEXT: s_cselect_b32 s7, s11, s7
-; GFX7-NEXT: s_and_b64 s[12:13], s[20:21], exec
-; GFX7-NEXT: s_cselect_b32 s7, s7, s15
-; GFX7-NEXT: s_and_b64 s[12:13], s[16:17], exec
-; GFX7-NEXT: v_cmp_o_f64_e64 s[12:13], s[4:5], v[0:1]
-; GFX7-NEXT: s_cselect_b32 s6, s6, s14
-; GFX7-NEXT: s_and_b64 s[16:17], s[18:19], exec
-; GFX7-NEXT: s_cselect_b32 s6, s10, s6
-; GFX7-NEXT: s_and_b64 s[10:11], s[20:21], exec
-; GFX7-NEXT: s_cselect_b32 s6, s6, s14
-; GFX7-NEXT: s_and_b64 s[10:11], vcc, exec
-; GFX7-NEXT: s_cselect_b32 s14, s5, s9
-; GFX7-NEXT: s_and_b64 s[10:11], s[12:13], exec
-; GFX7-NEXT: s_cselect_b32 s11, s14, 0x7ff80000
-; GFX7-NEXT: s_and_b64 s[14:15], vcc, exec
-; GFX7-NEXT: s_cselect_b32 s10, s4, s8
-; GFX7-NEXT: v_cmp_class_f64_e64 s[14:15], s[4:5], 64
-; GFX7-NEXT: s_and_b64 s[12:13], s[12:13], exec
-; GFX7-NEXT: v_cmp_class_f64_e64 s[12:13], s[8:9], 64
-; GFX7-NEXT: s_cselect_b32 s10, s10, 0
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[16:17], s[10:11], 0
-; GFX7-NEXT: s_and_b64 s[18:19], s[14:15], exec
-; GFX7-NEXT: s_cselect_b32 s5, s5, s11
-; GFX7-NEXT: s_and_b64 s[18:19], s[12:13], exec
-; GFX7-NEXT: s_cselect_b32 s5, s9, s5
-; GFX7-NEXT: s_and_b64 s[18:19], s[16:17], exec
-; GFX7-NEXT: s_cselect_b32 s5, s5, s11
-; GFX7-NEXT: s_and_b64 s[14:15], s[14:15], exec
-; GFX7-NEXT: s_cselect_b32 s4, s4, s10
-; GFX7-NEXT: s_and_b64 s[12:13], s[12:13], exec
-; GFX7-NEXT: s_cselect_b32 s4, s8, s4
-; GFX7-NEXT: s_and_b64 s[8:9], s[16:17], exec
-; GFX7-NEXT: s_cselect_b32 s4, s4, s10
+; GFX7-NEXT: v_mov_b32_e32 v5, s9
+; GFX7-NEXT: v_max_f64 v[2:3], s[6:7], v[0:1]
+; GFX7-NEXT: v_cmp_u_f64_e32 vcc, s[6:7], v[0:1]
+; GFX7-NEXT: v_max_f64 v[0:1], s[4:5], v[4:5]
+; GFX7-NEXT: v_cmp_u_f64_e64 s[4:5], s[4:5], v[4:5]
+; GFX7-NEXT: v_mov_b32_e32 v6, 0x7ff80000
+; GFX7-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc
+; GFX7-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
+; GFX7-NEXT: v_cndmask_b32_e64 v1, v1, v6, s[4:5]
+; GFX7-NEXT: v_cndmask_b32_e64 v0, v0, 0, s[4:5]
; GFX7-NEXT: ;;#ASMSTART
-; GFX7-NEXT: ; use s[4:7]
+; GFX7-NEXT: ; use v[0:3]
; GFX7-NEXT: ;;#ASMEND
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
@@ -1232,61 +866,20 @@ define void @s_maximum_v2f64(<2 x double> inreg %src0, <2 x double> inreg %src1)
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v0, s10
+; GFX8-NEXT: v_mov_b32_e32 v4, s8
; GFX8-NEXT: v_mov_b32_e32 v1, s11
-; GFX8-NEXT: v_cmp_gt_f64_e32 vcc, s[6:7], v[0:1]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[12:13], s[6:7], v[0:1]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[18:19], s[10:11], 64
-; GFX8-NEXT: v_mov_b32_e32 v0, s8
-; GFX8-NEXT: v_mov_b32_e32 v1, s9
-; GFX8-NEXT: s_and_b64 s[14:15], vcc, exec
-; GFX8-NEXT: s_cselect_b32 s16, s7, s11
-; GFX8-NEXT: s_and_b64 s[14:15], s[12:13], exec
-; GFX8-NEXT: s_cselect_b32 s15, s16, 0x7ff80000
-; GFX8-NEXT: s_and_b64 s[16:17], vcc, exec
-; GFX8-NEXT: s_cselect_b32 s14, s6, s10
-; GFX8-NEXT: v_cmp_class_f64_e64 s[16:17], s[6:7], 64
-; GFX8-NEXT: s_and_b64 s[12:13], s[12:13], exec
-; GFX8-NEXT: s_cselect_b32 s14, s14, 0
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[20:21], s[14:15], 0
-; GFX8-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; GFX8-NEXT: s_and_b64 s[12:13], s[16:17], exec
-; GFX8-NEXT: s_cselect_b32 s7, s7, s15
-; GFX8-NEXT: s_and_b64 s[12:13], s[18:19], exec
-; GFX8-NEXT: s_cselect_b32 s7, s11, s7
-; GFX8-NEXT: s_and_b64 s[12:13], s[20:21], exec
-; GFX8-NEXT: s_cselect_b32 s7, s7, s15
-; GFX8-NEXT: s_and_b64 s[12:13], s[16:17], exec
-; GFX8-NEXT: v_cmp_o_f64_e64 s[12:13], s[4:5], v[0:1]
-; GFX8-NEXT: s_cselect_b32 s6, s6, s14
-; GFX8-NEXT: s_and_b64 s[16:17], s[18:19], exec
-; GFX8-NEXT: s_cselect_b32 s6, s10, s6
-; GFX8-NEXT: s_and_b64 s[10:11], s[20:21], exec
-; GFX8-NEXT: s_cselect_b32 s6, s6, s14
-; GFX8-NEXT: s_and_b64 s[10:11], vcc, exec
-; GFX8-NEXT: s_cselect_b32 s14, s5, s9
-; GFX8-NEXT: s_and_b64 s[10:11], s[12:13], exec
-; GFX8-NEXT: s_cselect_b32 s11, s14, 0x7ff80000
-; GFX8-NEXT: s_and_b64 s[14:15], vcc, exec
-; GFX8-NEXT: s_cselect_b32 s10, s4, s8
-; GFX8-NEXT: v_cmp_class_f64_e64 s[14:15], s[4:5], 64
-; GFX8-NEXT: s_and_b64 s[12:13], s[12:13], exec
-; GFX8-NEXT: v_cmp_class_f64_e64 s[12:13], s[8:9], 64
-; GFX8-NEXT: s_cselect_b32 s10, s10, 0
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[16:17], s[10:11], 0
-; GFX8-NEXT: s_and_b64 s[18:19], s[14:15], exec
-; GFX8-NEXT: s_cselect_b32 s5, s5, s11
-; GFX8-NEXT: s_and_b64 s[18:19], s[12:13], exec
-; GFX8-NEXT: s_cselect_b32 s5, s9, s5
-; GFX8-NEXT: s_and_b64 s[18:19], s[16:17], exec
-; GFX8-NEXT: s_cselect_b32 s5, s5, s11
-; GFX8-NEXT: s_and_b64 s[14:15], s[14:15], exec
-; GFX8-NEXT: s_cselect_b32 s4, s4, s10
-; GFX8-NEXT: s_and_b64 s[12:13], s[12:13], exec
-; GFX8-NEXT: s_cselect_b32 s4, s8, s4
-; GFX8-NEXT: s_and_b64 s[8:9], s[16:17], exec
-; GFX8-NEXT: s_cselect_b32 s4, s4, s10
+; GFX8-NEXT: v_mov_b32_e32 v5, s9
+; GFX8-NEXT: v_max_f64 v[2:3], s[6:7], v[0:1]
+; GFX8-NEXT: v_cmp_u_f64_e32 vcc, s[6:7], v[0:1]
+; GFX8-NEXT: v_max_f64 v[0:1], s[4:5], v[4:5]
+; GFX8-NEXT: v_cmp_u_f64_e64 s[4:5], s[4:5], v[4:5]
+; GFX8-NEXT: v_mov_b32_e32 v6, 0x7ff80000
+; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc
+; GFX8-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
+; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, v6, s[4:5]
+; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, 0, s[4:5]
; GFX8-NEXT: ;;#ASMSTART
-; GFX8-NEXT: ; use s[4:7]
+; GFX8-NEXT: ; use v[0:3]
; GFX8-NEXT: ;;#ASMEND
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
@@ -1294,61 +887,20 @@ define void @s_maximum_v2f64(<2 x double> inreg %src0, <2 x double> inreg %src1)
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s10
+; GFX9-NEXT: v_mov_b32_e32 v4, s8
; GFX9-NEXT: v_mov_b32_e32 v1, s11
-; GFX9-NEXT: v_cmp_gt_f64_e32 vcc, s[6:7], v[0:1]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[12:13], s[6:7], v[0:1]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[18:19], s[10:11], 64
-; GFX9-NEXT: v_mov_b32_e32 v0, s8
-; GFX9-NEXT: v_mov_b32_e32 v1, s9
-; GFX9-NEXT: s_and_b64 s[14:15], vcc, exec
-; GFX9-NEXT: s_cselect_b32 s16, s7, s11
-; GFX9-NEXT: s_and_b64 s[14:15], s[12:13], exec
-; GFX9-NEXT: s_cselect_b32 s15, s16, 0x7ff80000
-; GFX9-NEXT: s_and_b64 s[16:17], vcc, exec
-; GFX9-NEXT: s_cselect_b32 s14, s6, s10
-; GFX9-NEXT: v_cmp_class_f64_e64 s[16:17], s[6:7], 64
-; GFX9-NEXT: s_and_b64 s[12:13], s[12:13], exec
-; GFX9-NEXT: s_cselect_b32 s14, s14, 0
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[20:21], s[14:15], 0
-; GFX9-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; GFX9-NEXT: s_and_b64 s[12:13], s[16:17], exec
-; GFX9-NEXT: s_cselect_b32 s7, s7, s15
-; GFX9-NEXT: s_and_b64 s[12:13], s[18:19], exec
-; GFX9-NEXT: s_cselect_b32 s7, s11, s7
-; GFX9-NEXT: s_and_b64 s[12:13], s[20:21], exec
-; GFX9-NEXT: s_cselect_b32 s7, s7, s15
-; GFX9-NEXT: s_and_b64 s[12:13], s[16:17], exec
-; GFX9-NEXT: v_cmp_o_f64_e64 s[12:13], s[4:5], v[0:1]
-; GFX9-NEXT: s_cselect_b32 s6, s6, s14
-; GFX9-NEXT: s_and_b64 s[16:17], s[18:19], exec
-; GFX9-NEXT: s_cselect_b32 s6, s10, s6
-; GFX9-NEXT: s_and_b64 s[10:11], s[20:21], exec
-; GFX9-NEXT: s_cselect_b32 s6, s6, s14
-; GFX9-NEXT: s_and_b64 s[10:11], vcc, exec
-; GFX9-NEXT: s_cselect_b32 s14, s5, s9
-; GFX9-NEXT: s_and_b64 s[10:11], s[12:13], exec
-; GFX9-NEXT: s_cselect_b32 s11, s14, 0x7ff80000
-; GFX9-NEXT: s_and_b64 s[14:15], vcc, exec
-; GFX9-NEXT: s_cselect_b32 s10, s4, s8
-; GFX9-NEXT: v_cmp_class_f64_e64 s[14:15], s[4:5], 64
-; GFX9-NEXT: s_and_b64 s[12:13], s[12:13], exec
-; GFX9-NEXT: v_cmp_class_f64_e64 s[12:13], s[8:9], 64
-; GFX9-NEXT: s_cselect_b32 s10, s10, 0
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[16:17], s[10:11], 0
-; GFX9-NEXT: s_and_b64 s[18:19], s[14:15], exec
-; GFX9-NEXT: s_cselect_b32 s5, s5, s11
-; GFX9-NEXT: s_and_b64 s[18:19], s[12:13], exec
-; GFX9-NEXT: s_cselect_b32 s5, s9, s5
-; GFX9-NEXT: s_and_b64 s[18:19], s[16:17], exec
-; GFX9-NEXT: s_cselect_b32 s5, s5, s11
-; GFX9-NEXT: s_and_b64 s[14:15], s[14:15], exec
-; GFX9-NEXT: s_cselect_b32 s4, s4, s10
-; GFX9-NEXT: s_and_b64 s[12:13], s[12:13], exec
-; GFX9-NEXT: s_cselect_b32 s4, s8, s4
-; GFX9-NEXT: s_and_b64 s[8:9], s[16:17], exec
-; GFX9-NEXT: s_cselect_b32 s4, s4, s10
+; GFX9-NEXT: v_mov_b32_e32 v5, s9
+; GFX9-NEXT: v_max_f64 v[2:3], s[6:7], v[0:1]
+; GFX9-NEXT: v_cmp_u_f64_e32 vcc, s[6:7], v[0:1]
+; GFX9-NEXT: v_max_f64 v[0:1], s[4:5], v[4:5]
+; GFX9-NEXT: v_cmp_u_f64_e64 s[4:5], s[4:5], v[4:5]
+; GFX9-NEXT: v_mov_b32_e32 v6, 0x7ff80000
+; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc
+; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
+; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v6, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, 0, s[4:5]
; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[4:7]
+; GFX9-NEXT: ; use v[0:3]
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
@@ -1356,179 +908,52 @@ define void @s_maximum_v2f64(<2 x double> inreg %src0, <2 x double> inreg %src1)
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[6:7]
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, s[2:3], v[0:1]
-; GFX940-NEXT: s_and_b64 s[8:9], vcc, exec
-; GFX940-NEXT: v_cmp_o_f64_e64 s[8:9], s[2:3], v[0:1]
-; GFX940-NEXT: s_cselect_b32 s12, s3, s7
-; GFX940-NEXT: s_and_b64 s[10:11], s[8:9], exec
-; GFX940-NEXT: s_cselect_b32 s11, s12, 0x7ff80000
-; GFX940-NEXT: s_and_b64 s[12:13], vcc, exec
-; GFX940-NEXT: s_cselect_b32 s10, s2, s6
-; GFX940-NEXT: s_and_b64 s[8:9], s[8:9], exec
-; GFX940-NEXT: v_cmp_class_f64_e64 s[12:13], s[2:3], 64
-; GFX940-NEXT: s_cselect_b32 s10, s10, 0
-; GFX940-NEXT: s_and_b64 s[14:15], s[12:13], exec
-; GFX940-NEXT: v_cmp_class_f64_e64 s[14:15], s[6:7], 64
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[8:9], s[10:11], 0
-; GFX940-NEXT: s_cselect_b32 s3, s3, s11
-; GFX940-NEXT: s_and_b64 s[16:17], s[14:15], exec
-; GFX940-NEXT: s_cselect_b32 s3, s7, s3
-; GFX940-NEXT: s_and_b64 s[16:17], s[8:9], exec
-; GFX940-NEXT: s_cselect_b32 s7, s3, s11
-; GFX940-NEXT: s_and_b64 s[12:13], s[12:13], exec
-; GFX940-NEXT: s_cselect_b32 s11, s2, s10
-; GFX940-NEXT: s_and_b64 s[2:3], s[14:15], exec
+; GFX940-NEXT: v_max_f64 v[2:3], s[2:3], v[0:1]
+; GFX940-NEXT: v_mov_b32_e32 v6, 0x7ff80000
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, s[2:3], v[0:1]
; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[4:5]
-; GFX940-NEXT: s_cselect_b32 s6, s6, s11
-; GFX940-NEXT: s_and_b64 s[2:3], s[8:9], exec
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, s[0:1], v[0:1]
-; GFX940-NEXT: s_cselect_b32 s6, s6, s10
-; GFX940-NEXT: s_and_b64 s[2:3], vcc, exec
-; GFX940-NEXT: v_cmp_o_f64_e64 s[2:3], s[0:1], v[0:1]
-; GFX940-NEXT: s_cselect_b32 s10, s1, s5
-; GFX940-NEXT: s_and_b64 s[8:9], s[2:3], exec
-; GFX940-NEXT: s_cselect_b32 s9, s10, 0x7ff80000
-; GFX940-NEXT: s_and_b64 s[10:11], vcc, exec
-; GFX940-NEXT: s_cselect_b32 s8, s0, s4
-; GFX940-NEXT: s_and_b64 s[2:3], s[2:3], exec
-; GFX940-NEXT: v_cmp_class_f64_e64 s[10:11], s[0:1], 64
-; GFX940-NEXT: s_cselect_b32 s8, s8, 0
-; GFX940-NEXT: s_and_b64 s[12:13], s[10:11], exec
-; GFX940-NEXT: v_cmp_class_f64_e64 s[12:13], s[4:5], 64
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], s[8:9], 0
-; GFX940-NEXT: s_cselect_b32 s1, s1, s9
-; GFX940-NEXT: s_and_b64 s[14:15], s[12:13], exec
-; GFX940-NEXT: s_cselect_b32 s1, s5, s1
-; GFX940-NEXT: s_and_b64 s[14:15], s[2:3], exec
-; GFX940-NEXT: s_cselect_b32 s5, s1, s9
-; GFX940-NEXT: s_and_b64 s[10:11], s[10:11], exec
-; GFX940-NEXT: s_cselect_b32 s9, s0, s8
-; GFX940-NEXT: s_and_b64 s[0:1], s[12:13], exec
-; GFX940-NEXT: s_cselect_b32 s4, s4, s9
-; GFX940-NEXT: s_and_b64 s[0:1], s[2:3], exec
-; GFX940-NEXT: s_cselect_b32 s4, s4, s8
+; GFX940-NEXT: v_max_f64 v[4:5], s[0:1], v[0:1]
+; GFX940-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc
+; GFX940-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, s[0:1], v[0:1]
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc
+; GFX940-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
; GFX940-NEXT: ;;#ASMSTART
-; GFX940-NEXT: ; use s[4:7]
+; GFX940-NEXT: ; use v[0:3]
; GFX940-NEXT: ;;#ASMEND
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: s_maximum_v2f64:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_gt_f64_e64 s12, s[6:7], s[10:11]
-; GFX10-NEXT: v_cmp_o_f64_e64 s14, s[6:7], s[10:11]
-; GFX10-NEXT: v_cmp_class_f64_e64 s15, s[6:7], 64
-; GFX10-NEXT: v_cmp_class_f64_e64 s16, s[10:11], 64
-; GFX10-NEXT: v_cmp_o_f64_e64 s18, s[4:5], s[8:9]
-; GFX10-NEXT: v_cmp_class_f64_e64 s19, s[4:5], 64
-; GFX10-NEXT: v_cmp_class_f64_e64 s20, s[8:9], 64
-; GFX10-NEXT: s_and_b32 s13, s12, exec_lo
-; GFX10-NEXT: s_cselect_b32 s13, s7, s11
-; GFX10-NEXT: s_and_b32 s17, s14, exec_lo
-; GFX10-NEXT: s_cselect_b32 s13, s13, 0x7ff80000
-; GFX10-NEXT: s_and_b32 s12, s12, exec_lo
-; GFX10-NEXT: s_cselect_b32 s12, s6, s10
-; GFX10-NEXT: s_and_b32 s14, s14, exec_lo
-; GFX10-NEXT: s_cselect_b32 s12, s12, 0
-; GFX10-NEXT: v_cmp_gt_f64_e64 s17, s[4:5], s[8:9]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s14, s[12:13], 0
-; GFX10-NEXT: s_and_b32 s21, s15, exec_lo
-; GFX10-NEXT: s_cselect_b32 s7, s7, s13
-; GFX10-NEXT: s_and_b32 s21, s16, exec_lo
-; GFX10-NEXT: s_cselect_b32 s7, s11, s7
-; GFX10-NEXT: s_and_b32 s11, s14, exec_lo
-; GFX10-NEXT: s_cselect_b32 s7, s7, s13
-; GFX10-NEXT: s_and_b32 s11, s15, exec_lo
-; GFX10-NEXT: s_cselect_b32 s6, s6, s12
-; GFX10-NEXT: s_and_b32 s11, s16, exec_lo
-; GFX10-NEXT: s_cselect_b32 s6, s10, s6
-; GFX10-NEXT: s_and_b32 s10, s14, exec_lo
-; GFX10-NEXT: s_cselect_b32 s6, s6, s12
-; GFX10-NEXT: s_and_b32 s10, s17, exec_lo
-; GFX10-NEXT: s_cselect_b32 s10, s5, s9
-; GFX10-NEXT: s_and_b32 s11, s18, exec_lo
-; GFX10-NEXT: s_cselect_b32 s11, s10, 0x7ff80000
-; GFX10-NEXT: s_and_b32 s10, s17, exec_lo
-; GFX10-NEXT: s_cselect_b32 s10, s4, s8
-; GFX10-NEXT: s_and_b32 s12, s18, exec_lo
-; GFX10-NEXT: s_cselect_b32 s10, s10, 0
-; GFX10-NEXT: s_and_b32 s13, s19, exec_lo
-; GFX10-NEXT: v_cmp_eq_f64_e64 s12, s[10:11], 0
-; GFX10-NEXT: s_cselect_b32 s5, s5, s11
-; GFX10-NEXT: s_and_b32 s13, s20, exec_lo
-; GFX10-NEXT: s_cselect_b32 s5, s9, s5
-; GFX10-NEXT: s_and_b32 s9, s12, exec_lo
-; GFX10-NEXT: s_cselect_b32 s5, s5, s11
-; GFX10-NEXT: s_and_b32 s9, s19, exec_lo
-; GFX10-NEXT: s_cselect_b32 s4, s4, s10
-; GFX10-NEXT: s_and_b32 s9, s20, exec_lo
-; GFX10-NEXT: s_cselect_b32 s4, s8, s4
-; GFX10-NEXT: s_and_b32 s8, s12, exec_lo
-; GFX10-NEXT: s_cselect_b32 s4, s4, s10
+; GFX10-NEXT: v_max_f64 v[0:1], s[6:7], s[10:11]
+; GFX10-NEXT: v_cmp_u_f64_e64 s6, s[6:7], s[10:11]
+; GFX10-NEXT: v_max_f64 v[4:5], s[4:5], s[8:9]
+; GFX10-NEXT: v_cmp_u_f64_e64 s4, s[4:5], s[8:9]
+; GFX10-NEXT: v_cndmask_b32_e64 v3, v1, 0x7ff80000, s6
+; GFX10-NEXT: v_cndmask_b32_e64 v2, v0, 0, s6
+; GFX10-NEXT: v_cndmask_b32_e64 v1, v5, 0x7ff80000, s4
+; GFX10-NEXT: v_cndmask_b32_e64 v0, v4, 0, s4
; GFX10-NEXT: ;;#ASMSTART
-; GFX10-NEXT: ; use s[4:7]
+; GFX10-NEXT: ; use v[0:3]
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: s_maximum_v2f64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_gt_f64_e64 s8, s[2:3], s[6:7]
-; GFX11-NEXT: v_cmp_o_f64_e64 s10, s[2:3], s[6:7]
-; GFX11-NEXT: v_cmp_class_f64_e64 s11, s[2:3], 64
-; GFX11-NEXT: v_cmp_class_f64_e64 s12, s[6:7], 64
-; GFX11-NEXT: v_cmp_o_f64_e64 s14, s[0:1], s[4:5]
-; GFX11-NEXT: v_cmp_class_f64_e64 s15, s[0:1], 64
-; GFX11-NEXT: v_cmp_class_f64_e64 s16, s[4:5], 64
-; GFX11-NEXT: s_and_b32 s9, s8, exec_lo
-; GFX11-NEXT: s_cselect_b32 s9, s3, s7
-; GFX11-NEXT: s_and_b32 s13, s10, exec_lo
-; GFX11-NEXT: s_cselect_b32 s9, s9, 0x7ff80000
-; GFX11-NEXT: s_and_b32 s8, s8, exec_lo
-; GFX11-NEXT: s_cselect_b32 s8, s2, s6
-; GFX11-NEXT: s_and_b32 s10, s10, exec_lo
-; GFX11-NEXT: s_cselect_b32 s8, s8, 0
-; GFX11-NEXT: v_cmp_gt_f64_e64 s13, s[0:1], s[4:5]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s10, s[8:9], 0
-; GFX11-NEXT: s_and_b32 s17, s11, exec_lo
-; GFX11-NEXT: s_cselect_b32 s3, s3, s9
-; GFX11-NEXT: s_and_b32 s17, s12, exec_lo
-; GFX11-NEXT: s_cselect_b32 s3, s7, s3
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: s_and_b32 s7, s10, exec_lo
-; GFX11-NEXT: s_cselect_b32 s3, s3, s9
-; GFX11-NEXT: s_and_b32 s7, s11, exec_lo
-; GFX11-NEXT: s_cselect_b32 s2, s2, s8
-; GFX11-NEXT: s_and_b32 s7, s12, exec_lo
-; GFX11-NEXT: s_cselect_b32 s2, s6, s2
-; GFX11-NEXT: s_and_b32 s6, s10, exec_lo
-; GFX11-NEXT: s_cselect_b32 s2, s2, s8
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT: s_and_b32 s6, s13, exec_lo
-; GFX11-NEXT: s_cselect_b32 s6, s1, s5
-; GFX11-NEXT: s_and_b32 s7, s14, exec_lo
-; GFX11-NEXT: s_cselect_b32 s7, s6, 0x7ff80000
-; GFX11-NEXT: s_and_b32 s6, s13, exec_lo
-; GFX11-NEXT: s_cselect_b32 s6, s0, s4
-; GFX11-NEXT: s_and_b32 s8, s14, exec_lo
-; GFX11-NEXT: s_cselect_b32 s6, s6, 0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_4)
-; GFX11-NEXT: s_and_b32 s9, s15, exec_lo
-; GFX11-NEXT: v_cmp_eq_f64_e64 s8, s[6:7], 0
-; GFX11-NEXT: s_cselect_b32 s1, s1, s7
-; GFX11-NEXT: s_and_b32 s9, s16, exec_lo
-; GFX11-NEXT: s_cselect_b32 s1, s5, s1
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: s_and_b32 s5, s8, exec_lo
-; GFX11-NEXT: s_cselect_b32 s1, s1, s7
-; GFX11-NEXT: s_and_b32 s5, s15, exec_lo
-; GFX11-NEXT: s_cselect_b32 s0, s0, s6
-; GFX11-NEXT: s_and_b32 s5, s16, exec_lo
-; GFX11-NEXT: s_cselect_b32 s0, s4, s0
-; GFX11-NEXT: s_and_b32 s4, s8, exec_lo
-; GFX11-NEXT: s_cselect_b32 s0, s0, s6
+; GFX11-NEXT: v_max_f64 v[0:1], s[2:3], s[6:7]
+; GFX11-NEXT: v_cmp_u_f64_e64 s2, s[2:3], s[6:7]
+; GFX11-NEXT: v_max_f64 v[4:5], s[0:1], s[4:5]
+; GFX11-NEXT: v_cmp_u_f64_e64 s0, s[0:1], s[4:5]
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX11-NEXT: v_cndmask_b32_e64 v3, v1, 0x7ff80000, s2
+; GFX11-NEXT: v_cndmask_b32_e64 v2, v0, 0, s2
+; GFX11-NEXT: v_cndmask_b32_e64 v1, v5, 0x7ff80000, s0
+; GFX11-NEXT: v_cndmask_b32_e64 v0, v4, 0, s0
; GFX11-NEXT: ;;#ASMSTART
-; GFX11-NEXT: ; use s[0:3]
+; GFX11-NEXT: ; use v[0:3]
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
@@ -1554,306 +979,110 @@ define <3 x double> @v_maximum_v3f64(<3 x double> %src0, <3 x double> %src1) {
; GFX7-LABEL: v_maximum_v3f64:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[6:7]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[4:5], v[0:1], v[6:7]
-; GFX7-NEXT: v_mov_b32_e32 v14, 0x7ff80000
-; GFX7-NEXT: v_cmp_gt_f64_e64 s[8:9], v[2:3], v[8:9]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[10:11], v[2:3], v[8:9]
-; GFX7-NEXT: v_cndmask_b32_e32 v12, v7, v1, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v13, v14, v12, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e32 v12, v6, v0, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 64
-; GFX7-NEXT: v_cndmask_b32_e64 v12, 0, v12, s[4:5]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[6:7], 64
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[6:7], 0, v[12:13]
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v12, v0, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v13, v1, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v0, v0, v6, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v1, v1, v7, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v6, v9, v3, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v0, v12, v0, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v1, v13, v1, s[6:7]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[6:7], v[2:3], 64
-; GFX7-NEXT: v_cndmask_b32_e64 v7, v14, v6, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v6, v8, v2, s[8:9]
-; GFX7-NEXT: v_cmp_gt_f64_e32 vcc, v[4:5], v[10:11]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[8:9], v[8:9], 64
-; GFX7-NEXT: v_cmp_o_f64_e64 s[4:5], v[4:5], v[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v6, 0, v6, s[10:11]
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[10:11], 0, v[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e32 v12, v11, v5, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v2, v2, v8, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e32 v8, v10, v4, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[4:5], 64
-; GFX7-NEXT: v_cndmask_b32_e64 v13, v14, v12, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v12, 0, v8, s[4:5]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[10:11], 64
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[12:13]
-; GFX7-NEXT: v_cndmask_b32_e64 v3, v3, v9, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e32 v4, v12, v4, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v5, v13, v5, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v4, v4, v10, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v5, v5, v11, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v4, v12, v4, s[12:13]
-; GFX7-NEXT: v_cndmask_b32_e64 v5, v13, v5, s[12:13]
+; GFX7-NEXT: v_max_f64 v[12:13], v[0:1], v[6:7]
+; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
+; GFX7-NEXT: v_max_f64 v[6:7], v[2:3], v[8:9]
+; GFX7-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[8:9]
+; GFX7-NEXT: v_max_f64 v[8:9], v[4:5], v[10:11]
+; GFX7-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[10:11]
+; GFX7-NEXT: v_mov_b32_e32 v5, 0x7ff80000
+; GFX7-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc
+; GFX7-NEXT: v_cndmask_b32_e32 v1, v13, v5, vcc
+; GFX7-NEXT: v_cndmask_b32_e64 v2, v6, 0, s[4:5]
+; GFX7-NEXT: v_cndmask_b32_e64 v3, v7, v5, s[4:5]
+; GFX7-NEXT: v_cndmask_b32_e64 v4, v8, 0, s[6:7]
+; GFX7-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[6:7]
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_maximum_v3f64:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[6:7]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[4:5], v[0:1], v[6:7]
-; GFX8-NEXT: v_mov_b32_e32 v14, 0x7ff80000
-; GFX8-NEXT: v_cmp_gt_f64_e64 s[8:9], v[2:3], v[8:9]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[10:11], v[2:3], v[8:9]
-; GFX8-NEXT: v_cndmask_b32_e32 v12, v7, v1, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v13, v14, v12, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e32 v12, v6, v0, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 64
-; GFX8-NEXT: v_cndmask_b32_e64 v12, 0, v12, s[4:5]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[6:7], 64
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[6:7], 0, v[12:13]
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v12, v0, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v13, v1, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, v6, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, v7, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v6, v9, v3, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v0, v12, v0, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v1, v13, v1, s[6:7]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[6:7], v[2:3], 64
-; GFX8-NEXT: v_cndmask_b32_e64 v7, v14, v6, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v6, v8, v2, s[8:9]
-; GFX8-NEXT: v_cmp_gt_f64_e32 vcc, v[4:5], v[10:11]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[8:9], v[8:9], 64
-; GFX8-NEXT: v_cmp_o_f64_e64 s[4:5], v[4:5], v[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v6, 0, v6, s[10:11]
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[10:11], 0, v[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e32 v12, v11, v5, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v2, v2, v8, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v10, v4, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[4:5], 64
-; GFX8-NEXT: v_cndmask_b32_e64 v13, v14, v12, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v12, 0, v8, s[4:5]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[10:11], 64
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[12:13]
-; GFX8-NEXT: v_cndmask_b32_e64 v3, v3, v9, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v12, v4, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v5, v13, v5, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v4, v4, v10, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v5, v5, v11, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v4, v12, v4, s[12:13]
-; GFX8-NEXT: v_cndmask_b32_e64 v5, v13, v5, s[12:13]
+; GFX8-NEXT: v_max_f64 v[12:13], v[0:1], v[6:7]
+; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
+; GFX8-NEXT: v_max_f64 v[6:7], v[2:3], v[8:9]
+; GFX8-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[8:9]
+; GFX8-NEXT: v_max_f64 v[8:9], v[4:5], v[10:11]
+; GFX8-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[10:11]
+; GFX8-NEXT: v_mov_b32_e32 v5, 0x7ff80000
+; GFX8-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v1, v13, v5, vcc
+; GFX8-NEXT: v_cndmask_b32_e64 v2, v6, 0, s[4:5]
+; GFX8-NEXT: v_cndmask_b32_e64 v3, v7, v5, s[4:5]
+; GFX8-NEXT: v_cndmask_b32_e64 v4, v8, 0, s[6:7]
+; GFX8-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[6:7]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_maximum_v3f64:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[6:7]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[4:5], v[0:1], v[6:7]
-; GFX9-NEXT: v_mov_b32_e32 v14, 0x7ff80000
-; GFX9-NEXT: v_cmp_gt_f64_e64 s[8:9], v[2:3], v[8:9]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[10:11], v[2:3], v[8:9]
-; GFX9-NEXT: v_cndmask_b32_e32 v12, v7, v1, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v13, v14, v12, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e32 v12, v6, v0, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 64
-; GFX9-NEXT: v_cndmask_b32_e64 v12, 0, v12, s[4:5]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[6:7], 64
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[6:7], 0, v[12:13]
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v12, v0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v13, v1, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, v6, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v7, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v6, v9, v3, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v12, v0, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v13, v1, s[6:7]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[6:7], v[2:3], 64
-; GFX9-NEXT: v_cndmask_b32_e64 v7, v14, v6, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v6, v8, v2, s[8:9]
-; GFX9-NEXT: v_cmp_gt_f64_e32 vcc, v[4:5], v[10:11]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[8:9], v[8:9], 64
-; GFX9-NEXT: v_cmp_o_f64_e64 s[4:5], v[4:5], v[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, v6, s[10:11]
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[10:11], 0, v[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e32 v12, v11, v5, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, v8, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v10, v4, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[4:5], 64
-; GFX9-NEXT: v_cndmask_b32_e64 v13, v14, v12, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v12, 0, v8, s[4:5]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[10:11], 64
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[12:13]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v3, v9, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v12, v4, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v5, v13, v5, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v4, v10, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v5, v11, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v12, v4, s[12:13]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v13, v5, s[12:13]
+; GFX9-NEXT: v_max_f64 v[12:13], v[0:1], v[6:7]
+; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
+; GFX9-NEXT: v_max_f64 v[6:7], v[2:3], v[8:9]
+; GFX9-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[8:9]
+; GFX9-NEXT: v_max_f64 v[8:9], v[4:5], v[10:11]
+; GFX9-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[10:11]
+; GFX9-NEXT: v_mov_b32_e32 v5, 0x7ff80000
+; GFX9-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v13, v5, vcc
+; GFX9-NEXT: v_cndmask_b32_e64 v2, v6, 0, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_e64 v3, v7, v5, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_e64 v4, v8, 0, s[6:7]
+; GFX9-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[6:7]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_maximum_v3f64:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[6:7]
-; GFX940-NEXT: v_mov_b32_e32 v14, 0x7ff80000
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[0:1], v[6:7]
-; GFX940-NEXT: v_cndmask_b32_e32 v12, v7, v1, vcc
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v13, v14, v12, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v12, v6, v0, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v12, 0, v12, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 64
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[6:7], 64
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[12:13]
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v12, v0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v13, v1, vcc
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[2:3], v[8:9]
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v0, v6, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v1, v1, v7, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v9, v3, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[2:3], v[8:9]
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v12, v0, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v1, v13, v1, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v7, v14, v6, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v8, v2, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v6, 0, v6, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[2:3], 64
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[8:9], 64
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[6:7]
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v2, v2, v8, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[4:5], v[10:11]
-; GFX940-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v3, v3, v9, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v11, v5, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[4:5], v[10:11]
-; GFX940-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[2:3]
+; GFX940-NEXT: v_max_f64 v[12:13], v[0:1], v[6:7]
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
+; GFX940-NEXT: v_max_f64 v[6:7], v[2:3], v[8:9]
; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v7, v14, v6, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v10, v4, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v6, 0, v6, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[4:5], 64
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[10:11], 64
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[6:7]
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v6, v4, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v7, v5, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v4, v4, v10, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v5, v5, v11, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v4, v6, v4, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v5, v7, v5, s[2:3]
+; GFX940-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc
+; GFX940-NEXT: v_mov_b32_e32 v12, 0x7ff80000
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v13, v12, vcc
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[8:9]
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e64 v2, v6, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v3, v7, v12, vcc
+; GFX940-NEXT: v_max_f64 v[6:7], v[4:5], v[10:11]
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[10:11]
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e64 v4, v6, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v5, v7, v12, vcc
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v3f64:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_gt_f64_e32 vcc_lo, v[0:1], v[6:7]
-; GFX10-NEXT: v_cmp_gt_f64_e64 s4, v[2:3], v[8:9]
-; GFX10-NEXT: v_cmp_gt_f64_e64 s5, v[4:5], v[10:11]
-; GFX10-NEXT: v_cmp_o_f64_e64 s6, v[0:1], v[6:7]
-; GFX10-NEXT: v_cmp_o_f64_e64 s7, v[2:3], v[8:9]
-; GFX10-NEXT: v_cmp_o_f64_e64 s8, v[4:5], v[10:11]
-; GFX10-NEXT: v_cndmask_b32_e32 v12, v7, v1, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v14, v9, v3, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v16, v11, v5, s5
-; GFX10-NEXT: v_cndmask_b32_e32 v17, v6, v0, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v18, v8, v2, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v19, v10, v4, s5
-; GFX10-NEXT: v_cmp_class_f64_e64 vcc_lo, v[0:1], 64
-; GFX10-NEXT: v_cmp_class_f64_e64 s4, v[2:3], 64
-; GFX10-NEXT: v_cmp_class_f64_e64 s5, v[4:5], 64
-; GFX10-NEXT: v_cndmask_b32_e64 v13, 0x7ff80000, v12, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v15, 0x7ff80000, v14, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v12, 0, v17, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v17, 0x7ff80000, v16, s8
-; GFX10-NEXT: v_cndmask_b32_e64 v14, 0, v18, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v16, 0, v19, s8
-; GFX10-NEXT: v_cmp_class_f64_e64 s6, v[8:9], 64
-; GFX10-NEXT: v_cmp_class_f64_e64 s7, v[6:7], 64
-; GFX10-NEXT: v_cmp_class_f64_e64 s8, v[10:11], 64
-; GFX10-NEXT: v_cmp_eq_f64_e64 s9, 0, v[12:13]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s10, 0, v[14:15]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s11, 0, v[16:17]
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v12, v0, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v14, v2, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v4, v16, v4, s5
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v13, v1, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v15, v3, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v5, v17, v5, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, v8, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v6, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v4, v4, v10, s8
-; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, v7, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, v9, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v5, v5, v11, s8
-; GFX10-NEXT: v_cndmask_b32_e64 v0, v12, v0, s9
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v14, v2, s10
-; GFX10-NEXT: v_cndmask_b32_e64 v4, v16, v4, s11
-; GFX10-NEXT: v_cndmask_b32_e64 v1, v13, v1, s9
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v15, v3, s10
-; GFX10-NEXT: v_cndmask_b32_e64 v5, v17, v5, s11
+; GFX10-NEXT: v_max_f64 v[12:13], v[0:1], v[6:7]
+; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[6:7]
+; GFX10-NEXT: v_max_f64 v[6:7], v[2:3], v[8:9]
+; GFX10-NEXT: v_cmp_u_f64_e64 s4, v[2:3], v[8:9]
+; GFX10-NEXT: v_max_f64 v[8:9], v[4:5], v[10:11]
+; GFX10-NEXT: v_cmp_u_f64_e64 s5, v[4:5], v[10:11]
+; GFX10-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e64 v1, v13, 0x7ff80000, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e64 v2, v6, 0, s4
+; GFX10-NEXT: v_cndmask_b32_e64 v3, v7, 0x7ff80000, s4
+; GFX10-NEXT: v_cndmask_b32_e64 v4, v8, 0, s5
+; GFX10-NEXT: v_cndmask_b32_e64 v5, v9, 0x7ff80000, s5
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_maximum_v3f64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_gt_f64_e32 vcc_lo, v[0:1], v[6:7]
-; GFX11-NEXT: v_cmp_gt_f64_e64 s0, v[2:3], v[8:9]
-; GFX11-NEXT: v_cmp_gt_f64_e64 s1, v[4:5], v[10:11]
-; GFX11-NEXT: v_cmp_o_f64_e64 s2, v[0:1], v[6:7]
-; GFX11-NEXT: v_cmp_o_f64_e64 s3, v[2:3], v[8:9]
-; GFX11-NEXT: v_cmp_o_f64_e64 s4, v[4:5], v[10:11]
-; GFX11-NEXT: v_dual_cndmask_b32 v12, v7, v1 :: v_dual_cndmask_b32 v17, v6, v0
-; GFX11-NEXT: v_cndmask_b32_e64 v14, v9, v3, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v16, v11, v5, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v18, v8, v2, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v19, v10, v4, s1
-; GFX11-NEXT: v_cmp_class_f64_e64 vcc_lo, v[0:1], 64
-; GFX11-NEXT: v_cmp_class_f64_e64 s0, v[2:3], 64
-; GFX11-NEXT: v_cmp_class_f64_e64 s1, v[4:5], 64
-; GFX11-NEXT: v_cndmask_b32_e64 v13, 0x7ff80000, v12, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v15, 0x7ff80000, v14, s3
-; GFX11-NEXT: v_cndmask_b32_e64 v12, 0, v17, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v17, 0x7ff80000, v16, s4
-; GFX11-NEXT: v_cndmask_b32_e64 v14, 0, v18, s3
-; GFX11-NEXT: v_cndmask_b32_e64 v16, 0, v19, s4
-; GFX11-NEXT: v_cmp_class_f64_e64 s2, v[8:9], 64
-; GFX11-NEXT: v_cmp_class_f64_e64 s3, v[6:7], 64
-; GFX11-NEXT: v_cmp_class_f64_e64 s4, v[10:11], 64
-; GFX11-NEXT: v_cmp_eq_f64_e64 s5, 0, v[12:13]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s6, 0, v[14:15]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s7, 0, v[16:17]
-; GFX11-NEXT: v_dual_cndmask_b32 v0, v12, v0 :: v_dual_cndmask_b32 v1, v13, v1
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v14, v2, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v4, v16, v4, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v15, v3, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v5, v17, v5, s1
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, v8, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, v6, s3
-; GFX11-NEXT: v_cndmask_b32_e64 v4, v4, v10, s4
-; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, v7, s3
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v3, v9, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v5, v5, v11, s4
-; GFX11-NEXT: v_cndmask_b32_e64 v0, v12, v0, s5
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v14, v2, s6
-; GFX11-NEXT: v_cndmask_b32_e64 v4, v16, v4, s7
-; GFX11-NEXT: v_cndmask_b32_e64 v1, v13, v1, s5
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v15, v3, s6
-; GFX11-NEXT: v_cndmask_b32_e64 v5, v17, v5, s7
+; GFX11-NEXT: v_max_f64 v[12:13], v[0:1], v[6:7]
+; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[6:7]
+; GFX11-NEXT: v_max_f64 v[6:7], v[2:3], v[8:9]
+; GFX11-NEXT: v_cmp_u_f64_e64 s0, v[2:3], v[8:9]
+; GFX11-NEXT: v_max_f64 v[8:9], v[4:5], v[10:11]
+; GFX11-NEXT: v_cmp_u_f64_e64 s1, v[4:5], v[10:11]
+; GFX11-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e64 v1, v13, 0x7ff80000, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e64 v2, v6, 0, s0
+; GFX11-NEXT: v_cndmask_b32_e64 v3, v7, 0x7ff80000, s0
+; GFX11-NEXT: v_cndmask_b32_e64 v4, v8, 0, s1
+; GFX11-NEXT: v_cndmask_b32_e64 v5, v9, 0x7ff80000, s1
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_v3f64:
@@ -1875,247 +1104,49 @@ define <3 x double> @v_maximum_v3f64__nnan(<3 x double> %src0, <3 x double> %src
; GFX7-LABEL: v_maximum_v3f64__nnan:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[6:7]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[6:7], 64
-; GFX7-NEXT: v_cmp_gt_f64_e64 s[8:9], v[2:3], v[8:9]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[10:11], v[10:11], 64
-; GFX7-NEXT: v_cndmask_b32_e32 v13, v7, v1, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v12, v6, v0, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 64
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[6:7], 0, v[12:13]
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v12, v0, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v13, v1, vcc
-; GFX7-NEXT: v_cmp_gt_f64_e32 vcc, v[4:5], v[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v0, v0, v6, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v1, v1, v7, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v0, v12, v0, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v1, v13, v1, s[6:7]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[2:3], 64
-; GFX7-NEXT: v_cndmask_b32_e64 v7, v9, v3, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v6, v8, v2, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e32 v13, v11, v5, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v12, v10, v4, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[4:5], 64
-; GFX7-NEXT: v_cmp_class_f64_e64 s[6:7], v[8:9], 64
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[8:9], 0, v[6:7]
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[12:13]
-; GFX7-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e32 v4, v12, v4, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v5, v13, v5, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v2, v2, v8, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v4, v4, v10, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v3, v3, v9, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v5, v5, v11, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v4, v12, v4, s[12:13]
-; GFX7-NEXT: v_cndmask_b32_e64 v5, v13, v5, s[12:13]
+; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
+; GFX7-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
+; GFX7-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_maximum_v3f64__nnan:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[6:7]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[6:7], 64
-; GFX8-NEXT: v_cmp_gt_f64_e64 s[8:9], v[2:3], v[8:9]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[10:11], v[10:11], 64
-; GFX8-NEXT: v_cndmask_b32_e32 v13, v7, v1, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v12, v6, v0, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 64
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[6:7], 0, v[12:13]
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v12, v0, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v13, v1, vcc
-; GFX8-NEXT: v_cmp_gt_f64_e32 vcc, v[4:5], v[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, v6, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, v7, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v0, v12, v0, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v1, v13, v1, s[6:7]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[2:3], 64
-; GFX8-NEXT: v_cndmask_b32_e64 v7, v9, v3, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v6, v8, v2, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e32 v13, v11, v5, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v12, v10, v4, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[4:5], 64
-; GFX8-NEXT: v_cmp_class_f64_e64 s[6:7], v[8:9], 64
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[8:9], 0, v[6:7]
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[12:13]
-; GFX8-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v12, v4, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v5, v13, v5, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v2, v2, v8, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v4, v4, v10, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v3, v3, v9, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v5, v5, v11, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v4, v12, v4, s[12:13]
-; GFX8-NEXT: v_cndmask_b32_e64 v5, v13, v5, s[12:13]
+; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
+; GFX8-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
+; GFX8-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_maximum_v3f64__nnan:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[6:7]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[6:7], 64
-; GFX9-NEXT: v_cmp_gt_f64_e64 s[8:9], v[2:3], v[8:9]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[10:11], v[10:11], 64
-; GFX9-NEXT: v_cndmask_b32_e32 v13, v7, v1, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v12, v6, v0, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 64
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[6:7], 0, v[12:13]
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v12, v0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v13, v1, vcc
-; GFX9-NEXT: v_cmp_gt_f64_e32 vcc, v[4:5], v[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, v6, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v7, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v12, v0, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v13, v1, s[6:7]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[2:3], 64
-; GFX9-NEXT: v_cndmask_b32_e64 v7, v9, v3, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v6, v8, v2, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e32 v13, v11, v5, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v12, v10, v4, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[4:5], 64
-; GFX9-NEXT: v_cmp_class_f64_e64 s[6:7], v[8:9], 64
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[8:9], 0, v[6:7]
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[12:13]
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v12, v4, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v5, v13, v5, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, v8, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v4, v10, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v3, v9, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v5, v11, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v12, v4, s[12:13]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v13, v5, s[12:13]
+; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
+; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
+; GFX9-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_maximum_v3f64__nnan:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[6:7]
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[6:7], 64
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v13, v7, v1, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v12, v6, v0, vcc
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 64
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[12:13]
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v12, v0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v13, v1, vcc
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[2:3], v[8:9]
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v0, v6, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v1, v1, v7, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v7, v9, v3, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v8, v2, vcc
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[2:3], 64
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[8:9], 64
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v12, v0, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v1, v13, v1, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v2, v2, v8, s[0:1]
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[6:7]
-; GFX940-NEXT: v_cndmask_b32_e64 v3, v3, v9, s[0:1]
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[4:5], v[10:11]
-; GFX940-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e32 v7, v11, v5, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v10, v4, vcc
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[4:5], 64
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[10:11], 64
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[6:7]
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v6, v4, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v7, v5, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v4, v4, v10, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v5, v5, v11, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v4, v6, v4, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v5, v7, v5, s[2:3]
+; GFX940-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
+; GFX940-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
+; GFX940-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v3f64__nnan:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_gt_f64_e32 vcc_lo, v[0:1], v[6:7]
-; GFX10-NEXT: v_cmp_gt_f64_e64 s4, v[2:3], v[8:9]
-; GFX10-NEXT: v_cmp_gt_f64_e64 s5, v[4:5], v[10:11]
-; GFX10-NEXT: v_cmp_class_f64_e64 s6, v[8:9], 64
-; GFX10-NEXT: v_cmp_class_f64_e64 s7, v[6:7], 64
-; GFX10-NEXT: v_cmp_class_f64_e64 s8, v[10:11], 64
-; GFX10-NEXT: v_cndmask_b32_e32 v13, v7, v1, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v15, v9, v3, s4
-; GFX10-NEXT: v_cndmask_b32_e32 v12, v6, v0, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v17, v11, v5, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v14, v8, v2, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v16, v10, v4, s5
-; GFX10-NEXT: v_cmp_class_f64_e64 vcc_lo, v[0:1], 64
-; GFX10-NEXT: v_cmp_class_f64_e64 s4, v[2:3], 64
-; GFX10-NEXT: v_cmp_class_f64_e64 s5, v[4:5], 64
-; GFX10-NEXT: v_cmp_eq_f64_e64 s9, 0, v[12:13]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s10, 0, v[14:15]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s11, 0, v[16:17]
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v12, v0, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v14, v2, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v4, v16, v4, s5
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v13, v1, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v15, v3, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v5, v17, v5, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v6, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, v8, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v4, v4, v10, s8
-; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, v7, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, v9, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v5, v5, v11, s8
-; GFX10-NEXT: v_cndmask_b32_e64 v0, v12, v0, s9
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v14, v2, s10
-; GFX10-NEXT: v_cndmask_b32_e64 v4, v16, v4, s11
-; GFX10-NEXT: v_cndmask_b32_e64 v1, v13, v1, s9
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v15, v3, s10
-; GFX10-NEXT: v_cndmask_b32_e64 v5, v17, v5, s11
+; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
+; GFX10-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
+; GFX10-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_maximum_v3f64__nnan:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_gt_f64_e32 vcc_lo, v[0:1], v[6:7]
-; GFX11-NEXT: v_cmp_gt_f64_e64 s0, v[2:3], v[8:9]
-; GFX11-NEXT: v_cmp_gt_f64_e64 s1, v[4:5], v[10:11]
-; GFX11-NEXT: v_cmp_class_f64_e64 s2, v[8:9], 64
-; GFX11-NEXT: v_cmp_class_f64_e64 s3, v[6:7], 64
-; GFX11-NEXT: v_cmp_class_f64_e64 s4, v[10:11], 64
-; GFX11-NEXT: v_dual_cndmask_b32 v13, v7, v1 :: v_dual_cndmask_b32 v12, v6, v0
-; GFX11-NEXT: v_cndmask_b32_e64 v15, v9, v3, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v17, v11, v5, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v14, v8, v2, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v16, v10, v4, s1
-; GFX11-NEXT: v_cmp_class_f64_e64 vcc_lo, v[0:1], 64
-; GFX11-NEXT: v_cmp_class_f64_e64 s0, v[2:3], 64
-; GFX11-NEXT: v_cmp_class_f64_e64 s1, v[4:5], 64
-; GFX11-NEXT: v_cmp_eq_f64_e64 s5, 0, v[12:13]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s6, 0, v[14:15]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s7, 0, v[16:17]
-; GFX11-NEXT: v_dual_cndmask_b32 v0, v12, v0 :: v_dual_cndmask_b32 v1, v13, v1
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v14, v2, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v4, v16, v4, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v15, v3, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v5, v17, v5, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, v6, s3
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, v8, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v4, v4, v10, s4
-; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, v7, s3
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v3, v9, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v5, v5, v11, s4
-; GFX11-NEXT: v_cndmask_b32_e64 v0, v12, v0, s5
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v14, v2, s6
-; GFX11-NEXT: v_cndmask_b32_e64 v4, v16, v4, s7
-; GFX11-NEXT: v_cndmask_b32_e64 v1, v13, v1, s5
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v15, v3, s6
-; GFX11-NEXT: v_cndmask_b32_e64 v5, v17, v5, s7
+; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
+; GFX11-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
+; GFX11-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_v3f64__nnan:
@@ -2137,144 +1168,110 @@ define <3 x double> @v_maximum_v3f64__nsz(<3 x double> %src0, <3 x double> %src1
; GFX7-LABEL: v_maximum_v3f64__nsz:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[6:7]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[4:5], v[0:1], v[6:7]
-; GFX7-NEXT: v_cmp_gt_f64_e64 s[8:9], v[4:5], v[10:11]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[6:7], v[2:3], v[8:9]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[10:11], v[4:5], v[10:11]
-; GFX7-NEXT: v_cndmask_b32_e32 v12, v6, v0, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc
-; GFX7-NEXT: v_cmp_gt_f64_e32 vcc, v[2:3], v[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v7, v10, v4, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v5, v11, v5, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v0, 0, v12, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v4, 0, v7, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e32 v6, v8, v2, vcc
-; GFX7-NEXT: v_mov_b32_e32 v8, 0x7ff80000
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v1, v8, v1, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v2, 0, v6, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v3, v8, v3, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v5, v8, v5, s[10:11]
+; GFX7-NEXT: v_max_f64 v[12:13], v[0:1], v[6:7]
+; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
+; GFX7-NEXT: v_max_f64 v[6:7], v[2:3], v[8:9]
+; GFX7-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[8:9]
+; GFX7-NEXT: v_max_f64 v[8:9], v[4:5], v[10:11]
+; GFX7-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[10:11]
+; GFX7-NEXT: v_mov_b32_e32 v5, 0x7ff80000
+; GFX7-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc
+; GFX7-NEXT: v_cndmask_b32_e32 v1, v13, v5, vcc
+; GFX7-NEXT: v_cndmask_b32_e64 v2, v6, 0, s[4:5]
+; GFX7-NEXT: v_cndmask_b32_e64 v3, v7, v5, s[4:5]
+; GFX7-NEXT: v_cndmask_b32_e64 v4, v8, 0, s[6:7]
+; GFX7-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[6:7]
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_maximum_v3f64__nsz:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[6:7]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[4:5], v[0:1], v[6:7]
-; GFX8-NEXT: v_cmp_gt_f64_e64 s[8:9], v[4:5], v[10:11]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[6:7], v[2:3], v[8:9]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[10:11], v[4:5], v[10:11]
-; GFX8-NEXT: v_cndmask_b32_e32 v12, v6, v0, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc
-; GFX8-NEXT: v_cmp_gt_f64_e32 vcc, v[2:3], v[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v7, v10, v4, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v5, v11, v5, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v0, 0, v12, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v4, 0, v7, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e32 v6, v8, v2, vcc
-; GFX8-NEXT: v_mov_b32_e32 v8, 0x7ff80000
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v1, v8, v1, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, v6, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v3, v8, v3, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v5, v8, v5, s[10:11]
+; GFX8-NEXT: v_max_f64 v[12:13], v[0:1], v[6:7]
+; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
+; GFX8-NEXT: v_max_f64 v[6:7], v[2:3], v[8:9]
+; GFX8-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[8:9]
+; GFX8-NEXT: v_max_f64 v[8:9], v[4:5], v[10:11]
+; GFX8-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[10:11]
+; GFX8-NEXT: v_mov_b32_e32 v5, 0x7ff80000
+; GFX8-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v1, v13, v5, vcc
+; GFX8-NEXT: v_cndmask_b32_e64 v2, v6, 0, s[4:5]
+; GFX8-NEXT: v_cndmask_b32_e64 v3, v7, v5, s[4:5]
+; GFX8-NEXT: v_cndmask_b32_e64 v4, v8, 0, s[6:7]
+; GFX8-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[6:7]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_maximum_v3f64__nsz:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[6:7]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[4:5], v[0:1], v[6:7]
-; GFX9-NEXT: v_cmp_gt_f64_e64 s[8:9], v[4:5], v[10:11]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[6:7], v[2:3], v[8:9]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[10:11], v[4:5], v[10:11]
-; GFX9-NEXT: v_cndmask_b32_e32 v12, v6, v0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc
-; GFX9-NEXT: v_cmp_gt_f64_e32 vcc, v[2:3], v[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v7, v10, v4, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v11, v5, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, v12, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, v7, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v8, v2, vcc
-; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v8, v1, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, v6, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v8, v3, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v8, v5, s[10:11]
+; GFX9-NEXT: v_max_f64 v[12:13], v[0:1], v[6:7]
+; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
+; GFX9-NEXT: v_max_f64 v[6:7], v[2:3], v[8:9]
+; GFX9-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[8:9]
+; GFX9-NEXT: v_max_f64 v[8:9], v[4:5], v[10:11]
+; GFX9-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[10:11]
+; GFX9-NEXT: v_mov_b32_e32 v5, 0x7ff80000
+; GFX9-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v13, v5, vcc
+; GFX9-NEXT: v_cndmask_b32_e64 v2, v6, 0, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_e64 v3, v7, v5, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_e64 v4, v8, 0, s[6:7]
+; GFX9-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[6:7]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_maximum_v3f64__nsz:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[6:7]
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[0:1], v[6:7]
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v12, v6, v0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc
-; GFX940-NEXT: v_mov_b32_e32 v6, 0x7ff80000
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[2:3], v[8:9]
-; GFX940-NEXT: v_cndmask_b32_e64 v0, 0, v12, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v1, v6, v1, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v7, v8, v2, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[2:3], v[8:9]
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[4:5], v[10:11]
-; GFX940-NEXT: v_cndmask_b32_e64 v2, 0, v7, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v3, v6, v3, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v7, v10, v4, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[4:5], v[10:11]
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v11, v5, vcc
+; GFX940-NEXT: v_max_f64 v[12:13], v[0:1], v[6:7]
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
+; GFX940-NEXT: v_max_f64 v[6:7], v[2:3], v[8:9]
; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v4, 0, v7, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v5, v6, v5, s[0:1]
+; GFX940-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc
+; GFX940-NEXT: v_mov_b32_e32 v12, 0x7ff80000
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v13, v12, vcc
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[8:9]
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e64 v2, v6, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v3, v7, v12, vcc
+; GFX940-NEXT: v_max_f64 v[6:7], v[4:5], v[10:11]
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[10:11]
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e64 v4, v6, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v5, v7, v12, vcc
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v3f64__nsz:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_gt_f64_e32 vcc_lo, v[0:1], v[6:7]
-; GFX10-NEXT: v_cmp_gt_f64_e64 s4, v[2:3], v[8:9]
-; GFX10-NEXT: v_cmp_gt_f64_e64 s5, v[4:5], v[10:11]
-; GFX10-NEXT: v_cmp_o_f64_e64 s6, v[0:1], v[6:7]
-; GFX10-NEXT: v_cmp_o_f64_e64 s7, v[2:3], v[8:9]
-; GFX10-NEXT: v_cmp_o_f64_e64 s8, v[4:5], v[10:11]
-; GFX10-NEXT: v_cndmask_b32_e32 v12, v6, v0, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v6, v8, v2, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v8, v10, v4, s5
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v9, v3, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v5, v11, v5, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, v12, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, v6, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v4, 0, v8, s8
-; GFX10-NEXT: v_cndmask_b32_e64 v1, 0x7ff80000, v1, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v3, 0x7ff80000, v3, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v5, 0x7ff80000, v5, s8
+; GFX10-NEXT: v_max_f64 v[12:13], v[0:1], v[6:7]
+; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[6:7]
+; GFX10-NEXT: v_max_f64 v[6:7], v[2:3], v[8:9]
+; GFX10-NEXT: v_cmp_u_f64_e64 s4, v[2:3], v[8:9]
+; GFX10-NEXT: v_max_f64 v[8:9], v[4:5], v[10:11]
+; GFX10-NEXT: v_cmp_u_f64_e64 s5, v[4:5], v[10:11]
+; GFX10-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e64 v1, v13, 0x7ff80000, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e64 v2, v6, 0, s4
+; GFX10-NEXT: v_cndmask_b32_e64 v3, v7, 0x7ff80000, s4
+; GFX10-NEXT: v_cndmask_b32_e64 v4, v8, 0, s5
+; GFX10-NEXT: v_cndmask_b32_e64 v5, v9, 0x7ff80000, s5
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_maximum_v3f64__nsz:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_gt_f64_e32 vcc_lo, v[0:1], v[6:7]
-; GFX11-NEXT: v_cmp_gt_f64_e64 s0, v[2:3], v[8:9]
-; GFX11-NEXT: v_cmp_gt_f64_e64 s1, v[4:5], v[10:11]
-; GFX11-NEXT: v_cmp_o_f64_e64 s2, v[0:1], v[6:7]
-; GFX11-NEXT: v_cmp_o_f64_e64 s3, v[2:3], v[8:9]
-; GFX11-NEXT: v_cmp_o_f64_e64 s4, v[4:5], v[10:11]
-; GFX11-NEXT: v_dual_cndmask_b32 v12, v6, v0 :: v_dual_cndmask_b32 v1, v7, v1
-; GFX11-NEXT: v_cndmask_b32_e64 v6, v8, v2, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v8, v10, v4, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v9, v3, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v5, v11, v5, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, v12, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, v6, s3
-; GFX11-NEXT: v_cndmask_b32_e64 v4, 0, v8, s4
-; GFX11-NEXT: v_cndmask_b32_e64 v1, 0x7ff80000, v1, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v3, 0x7ff80000, v3, s3
-; GFX11-NEXT: v_cndmask_b32_e64 v5, 0x7ff80000, v5, s4
+; GFX11-NEXT: v_max_f64 v[12:13], v[0:1], v[6:7]
+; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[6:7]
+; GFX11-NEXT: v_max_f64 v[6:7], v[2:3], v[8:9]
+; GFX11-NEXT: v_cmp_u_f64_e64 s0, v[2:3], v[8:9]
+; GFX11-NEXT: v_max_f64 v[8:9], v[4:5], v[10:11]
+; GFX11-NEXT: v_cmp_u_f64_e64 s1, v[4:5], v[10:11]
+; GFX11-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e64 v1, v13, 0x7ff80000, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e64 v2, v6, 0, s0
+; GFX11-NEXT: v_cndmask_b32_e64 v3, v7, 0x7ff80000, s0
+; GFX11-NEXT: v_cndmask_b32_e64 v4, v8, 0, s1
+; GFX11-NEXT: v_cndmask_b32_e64 v5, v9, 0x7ff80000, s1
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_v3f64__nsz:
@@ -2296,88 +1293,49 @@ define <3 x double> @v_maximum_v3f64__nnan_nsz(<3 x double> %src0, <3 x double>
; GFX7-LABEL: v_maximum_v3f64__nnan_nsz:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[6:7]
-; GFX7-NEXT: v_cmp_gt_f64_e64 s[4:5], v[2:3], v[8:9]
-; GFX7-NEXT: v_cmp_gt_f64_e64 s[6:7], v[4:5], v[10:11]
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v2, v8, v2, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v3, v9, v3, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v4, v10, v4, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v5, v11, v5, s[6:7]
+; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
+; GFX7-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
+; GFX7-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_maximum_v3f64__nnan_nsz:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[6:7]
-; GFX8-NEXT: v_cmp_gt_f64_e64 s[4:5], v[2:3], v[8:9]
-; GFX8-NEXT: v_cmp_gt_f64_e64 s[6:7], v[4:5], v[10:11]
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v2, v8, v2, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v3, v9, v3, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v4, v10, v4, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v5, v11, v5, s[6:7]
+; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
+; GFX8-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
+; GFX8-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_maximum_v3f64__nnan_nsz:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[6:7]
-; GFX9-NEXT: v_cmp_gt_f64_e64 s[4:5], v[2:3], v[8:9]
-; GFX9-NEXT: v_cmp_gt_f64_e64 s[6:7], v[4:5], v[10:11]
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v8, v2, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v9, v3, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v10, v4, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v11, v5, s[6:7]
+; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
+; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
+; GFX9-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_maximum_v3f64__nnan_nsz:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[6:7]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[2:3], v[8:9]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[4:5], v[10:11]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v10, v4, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v11, v5, vcc
+; GFX940-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
+; GFX940-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
+; GFX940-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v3f64__nnan_nsz:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_gt_f64_e32 vcc_lo, v[0:1], v[6:7]
-; GFX10-NEXT: v_cmp_gt_f64_e64 s4, v[2:3], v[8:9]
-; GFX10-NEXT: v_cmp_gt_f64_e64 s5, v[4:5], v[10:11]
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v8, v2, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v4, v10, v4, s5
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v9, v3, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v5, v11, v5, s5
+; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
+; GFX10-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
+; GFX10-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_maximum_v3f64__nnan_nsz:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_gt_f64_e32 vcc_lo, v[0:1], v[6:7]
-; GFX11-NEXT: v_cmp_gt_f64_e64 s0, v[2:3], v[8:9]
-; GFX11-NEXT: v_cmp_gt_f64_e64 s1, v[4:5], v[10:11]
-; GFX11-NEXT: v_dual_cndmask_b32 v0, v6, v0 :: v_dual_cndmask_b32 v1, v7, v1
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v8, v2, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v4, v10, v4, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v9, v3, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v5, v11, v5, s1
+; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
+; GFX11-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
+; GFX11-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_v3f64__nnan_nsz:
@@ -2399,404 +1357,135 @@ define <4 x double> @v_maximum_v4f64(<4 x double> %src0, <4 x double> %src1) {
; GFX7-LABEL: v_maximum_v4f64:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[8:9]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[4:5], v[0:1], v[8:9]
-; GFX7-NEXT: v_mov_b32_e32 v18, 0x7ff80000
-; GFX7-NEXT: v_cmp_gt_f64_e64 s[6:7], v[2:3], v[10:11]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[8:9], v[2:3], v[10:11]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[12:13], v[4:5], v[12:13]
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v9, v1, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v17, v18, v16, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v8, v0, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 64
-; GFX7-NEXT: v_cndmask_b32_e64 v16, 0, v16, s[4:5]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[8:9], 64
-; GFX7-NEXT: v_cndmask_b32_e64 v19, v11, v3, s[6:7]
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[10:11], 0, v[16:17]
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v17, v1, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[2:3], 64
-; GFX7-NEXT: v_cndmask_b32_e64 v0, v0, v8, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v1, v1, v9, s[4:5]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[10:11], 64
-; GFX7-NEXT: v_cndmask_b32_e64 v8, v10, v2, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v9, v18, v19, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[8:9]
-; GFX7-NEXT: v_cmp_gt_f64_e64 s[8:9], v[4:5], v[12:13]
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[6:7], 0, v[8:9]
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc
-; GFX7-NEXT: v_cmp_gt_f64_e32 vcc, v[6:7], v[14:15]
-; GFX7-NEXT: v_cndmask_b32_e64 v2, v2, v10, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v3, v3, v11, s[4:5]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[4:5], v[6:7], v[14:15]
-; GFX7-NEXT: v_cndmask_b32_e64 v10, v13, v5, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v0, v16, v0, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v3, v9, v3, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v9, v18, v10, s[12:13]
-; GFX7-NEXT: v_cndmask_b32_e32 v10, v15, v7, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v2, v8, v2, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v8, v12, v4, s[8:9]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[6:7], v[4:5], 64
-; GFX7-NEXT: v_cndmask_b32_e64 v11, v18, v10, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e32 v10, v14, v6, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[6:7], 64
-; GFX7-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[12:13]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[8:9], v[12:13], 64
-; GFX7-NEXT: v_cndmask_b32_e64 v10, 0, v10, s[4:5]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[14:15], 64
-; GFX7-NEXT: v_cndmask_b32_e64 v1, v17, v1, s[10:11]
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[10:11], 0, v[8:9]
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v4, v8, v4, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e32 v6, v10, v6, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e32 v7, v11, v7, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v4, v4, v12, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v6, v6, v14, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v5, v5, v13, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v7, v7, v15, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v4, v8, v4, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v6, v10, v6, s[12:13]
-; GFX7-NEXT: v_cndmask_b32_e64 v7, v11, v7, s[12:13]
+; GFX7-NEXT: v_max_f64 v[16:17], v[0:1], v[8:9]
+; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
+; GFX7-NEXT: v_max_f64 v[8:9], v[2:3], v[10:11]
+; GFX7-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[10:11]
+; GFX7-NEXT: v_max_f64 v[10:11], v[4:5], v[12:13]
+; GFX7-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[12:13]
+; GFX7-NEXT: v_max_f64 v[12:13], v[6:7], v[14:15]
+; GFX7-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[14:15]
+; GFX7-NEXT: v_mov_b32_e32 v7, 0x7ff80000
+; GFX7-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc
+; GFX7-NEXT: v_cndmask_b32_e32 v1, v17, v7, vcc
+; GFX7-NEXT: v_cndmask_b32_e64 v2, v8, 0, s[4:5]
+; GFX7-NEXT: v_cndmask_b32_e64 v3, v9, v7, s[4:5]
+; GFX7-NEXT: v_cndmask_b32_e64 v4, v10, 0, s[6:7]
+; GFX7-NEXT: v_cndmask_b32_e64 v5, v11, v7, s[6:7]
+; GFX7-NEXT: v_cndmask_b32_e64 v6, v12, 0, s[8:9]
+; GFX7-NEXT: v_cndmask_b32_e64 v7, v13, v7, s[8:9]
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_maximum_v4f64:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[8:9]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[4:5], v[0:1], v[8:9]
-; GFX8-NEXT: v_mov_b32_e32 v18, 0x7ff80000
-; GFX8-NEXT: v_cmp_gt_f64_e64 s[6:7], v[2:3], v[10:11]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[8:9], v[2:3], v[10:11]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[12:13], v[4:5], v[12:13]
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v9, v1, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v17, v18, v16, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v8, v0, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 64
-; GFX8-NEXT: v_cndmask_b32_e64 v16, 0, v16, s[4:5]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[8:9], 64
-; GFX8-NEXT: v_cndmask_b32_e64 v19, v11, v3, s[6:7]
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[10:11], 0, v[16:17]
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v17, v1, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[2:3], 64
-; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, v8, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, v9, s[4:5]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[10:11], 64
-; GFX8-NEXT: v_cndmask_b32_e64 v8, v10, v2, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v9, v18, v19, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[8:9]
-; GFX8-NEXT: v_cmp_gt_f64_e64 s[8:9], v[4:5], v[12:13]
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[6:7], 0, v[8:9]
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc
-; GFX8-NEXT: v_cmp_gt_f64_e32 vcc, v[6:7], v[14:15]
-; GFX8-NEXT: v_cndmask_b32_e64 v2, v2, v10, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v3, v3, v11, s[4:5]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[4:5], v[6:7], v[14:15]
-; GFX8-NEXT: v_cndmask_b32_e64 v10, v13, v5, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v0, v16, v0, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v3, v9, v3, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v9, v18, v10, s[12:13]
-; GFX8-NEXT: v_cndmask_b32_e32 v10, v15, v7, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v2, v8, v2, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v8, v12, v4, s[8:9]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[6:7], v[4:5], 64
-; GFX8-NEXT: v_cndmask_b32_e64 v11, v18, v10, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e32 v10, v14, v6, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[6:7], 64
-; GFX8-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[12:13]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[8:9], v[12:13], 64
-; GFX8-NEXT: v_cndmask_b32_e64 v10, 0, v10, s[4:5]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[14:15], 64
-; GFX8-NEXT: v_cndmask_b32_e64 v1, v17, v1, s[10:11]
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[10:11], 0, v[8:9]
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v4, v8, v4, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e32 v6, v10, v6, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e32 v7, v11, v7, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v4, v4, v12, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v6, v6, v14, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v5, v5, v13, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v7, v7, v15, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v4, v8, v4, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v6, v10, v6, s[12:13]
-; GFX8-NEXT: v_cndmask_b32_e64 v7, v11, v7, s[12:13]
+; GFX8-NEXT: v_max_f64 v[16:17], v[0:1], v[8:9]
+; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
+; GFX8-NEXT: v_max_f64 v[8:9], v[2:3], v[10:11]
+; GFX8-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[10:11]
+; GFX8-NEXT: v_max_f64 v[10:11], v[4:5], v[12:13]
+; GFX8-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[12:13]
+; GFX8-NEXT: v_max_f64 v[12:13], v[6:7], v[14:15]
+; GFX8-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[14:15]
+; GFX8-NEXT: v_mov_b32_e32 v7, 0x7ff80000
+; GFX8-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v1, v17, v7, vcc
+; GFX8-NEXT: v_cndmask_b32_e64 v2, v8, 0, s[4:5]
+; GFX8-NEXT: v_cndmask_b32_e64 v3, v9, v7, s[4:5]
+; GFX8-NEXT: v_cndmask_b32_e64 v4, v10, 0, s[6:7]
+; GFX8-NEXT: v_cndmask_b32_e64 v5, v11, v7, s[6:7]
+; GFX8-NEXT: v_cndmask_b32_e64 v6, v12, 0, s[8:9]
+; GFX8-NEXT: v_cndmask_b32_e64 v7, v13, v7, s[8:9]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_maximum_v4f64:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[8:9]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[4:5], v[0:1], v[8:9]
-; GFX9-NEXT: v_mov_b32_e32 v18, 0x7ff80000
-; GFX9-NEXT: v_cmp_gt_f64_e64 s[6:7], v[2:3], v[10:11]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[8:9], v[2:3], v[10:11]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[12:13], v[4:5], v[12:13]
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v9, v1, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v17, v18, v16, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v8, v0, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 64
-; GFX9-NEXT: v_cndmask_b32_e64 v16, 0, v16, s[4:5]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[8:9], 64
-; GFX9-NEXT: v_cndmask_b32_e64 v19, v11, v3, s[6:7]
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[10:11], 0, v[16:17]
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v17, v1, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[2:3], 64
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, v8, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v9, s[4:5]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[10:11], 64
-; GFX9-NEXT: v_cndmask_b32_e64 v8, v10, v2, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v9, v18, v19, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[8:9]
-; GFX9-NEXT: v_cmp_gt_f64_e64 s[8:9], v[4:5], v[12:13]
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[6:7], 0, v[8:9]
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc
-; GFX9-NEXT: v_cmp_gt_f64_e32 vcc, v[6:7], v[14:15]
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, v10, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v3, v11, s[4:5]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[4:5], v[6:7], v[14:15]
-; GFX9-NEXT: v_cndmask_b32_e64 v10, v13, v5, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v16, v0, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v9, v3, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v9, v18, v10, s[12:13]
-; GFX9-NEXT: v_cndmask_b32_e32 v10, v15, v7, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v8, v2, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v8, v12, v4, s[8:9]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[6:7], v[4:5], 64
-; GFX9-NEXT: v_cndmask_b32_e64 v11, v18, v10, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e32 v10, v14, v6, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[6:7], 64
-; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[12:13]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[8:9], v[12:13], 64
-; GFX9-NEXT: v_cndmask_b32_e64 v10, 0, v10, s[4:5]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[14:15], 64
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v17, v1, s[10:11]
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[10:11], 0, v[8:9]
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v8, v4, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v10, v6, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e32 v7, v11, v7, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v4, v12, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v6, v6, v14, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v5, v13, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v7, v7, v15, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v8, v4, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v6, v10, v6, s[12:13]
-; GFX9-NEXT: v_cndmask_b32_e64 v7, v11, v7, s[12:13]
+; GFX9-NEXT: v_max_f64 v[16:17], v[0:1], v[8:9]
+; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
+; GFX9-NEXT: v_max_f64 v[8:9], v[2:3], v[10:11]
+; GFX9-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[10:11]
+; GFX9-NEXT: v_max_f64 v[10:11], v[4:5], v[12:13]
+; GFX9-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[12:13]
+; GFX9-NEXT: v_max_f64 v[12:13], v[6:7], v[14:15]
+; GFX9-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[14:15]
+; GFX9-NEXT: v_mov_b32_e32 v7, 0x7ff80000
+; GFX9-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v17, v7, vcc
+; GFX9-NEXT: v_cndmask_b32_e64 v2, v8, 0, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_e64 v3, v9, v7, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_e64 v4, v10, 0, s[6:7]
+; GFX9-NEXT: v_cndmask_b32_e64 v5, v11, v7, s[6:7]
+; GFX9-NEXT: v_cndmask_b32_e64 v6, v12, 0, s[8:9]
+; GFX9-NEXT: v_cndmask_b32_e64 v7, v13, v7, s[8:9]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_maximum_v4f64:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[8:9]
-; GFX940-NEXT: v_mov_b32_e32 v18, 0x7ff80000
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[0:1], v[8:9]
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v9, v1, vcc
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v17, v18, v16, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v8, v0, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v16, 0, v16, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 64
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[8:9], 64
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[16:17]
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v17, v1, vcc
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[2:3], v[10:11]
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v0, v8, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v1, v1, v9, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v11, v3, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[2:3], v[10:11]
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v16, v0, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v1, v17, v1, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v9, v18, v8, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v10, v2, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[2:3], 64
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[10:11], 64
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[8:9]
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v2, v2, v10, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[4:5], v[12:13]
-; GFX940-NEXT: v_cndmask_b32_e64 v2, v8, v2, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v3, v3, v11, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v13, v5, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[4:5], v[12:13]
-; GFX940-NEXT: v_cndmask_b32_e64 v3, v9, v3, s[2:3]
+; GFX940-NEXT: v_max_f64 v[16:17], v[0:1], v[8:9]
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
+; GFX940-NEXT: v_max_f64 v[8:9], v[2:3], v[10:11]
; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v9, v18, v8, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v12, v4, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[4:5], 64
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[12:13], 64
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[8:9]
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v4, v4, v12, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v9, v5, vcc
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[6:7], v[14:15]
-; GFX940-NEXT: v_cndmask_b32_e64 v4, v8, v4, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v5, v5, v13, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v15, v7, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[6:7], v[14:15]
-; GFX940-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[2:3]
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v9, v18, v8, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v14, v6, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[6:7], 64
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[14:15], 64
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[8:9]
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v8, v6, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v7, v9, v7, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v6, v6, v14, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v7, v7, v15, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v6, v8, v6, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v7, v9, v7, s[2:3]
+; GFX940-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc
+; GFX940-NEXT: v_mov_b32_e32 v16, 0x7ff80000
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v17, v16, vcc
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[10:11]
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e64 v2, v8, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v3, v9, v16, vcc
+; GFX940-NEXT: v_max_f64 v[8:9], v[4:5], v[12:13]
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[12:13]
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e64 v4, v8, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v5, v9, v16, vcc
+; GFX940-NEXT: v_max_f64 v[8:9], v[6:7], v[14:15]
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[6:7], v[14:15]
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e64 v6, v8, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v7, v9, v16, vcc
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v4f64:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_gt_f64_e32 vcc_lo, v[0:1], v[8:9]
-; GFX10-NEXT: v_cmp_gt_f64_e64 s4, v[2:3], v[10:11]
-; GFX10-NEXT: v_cmp_o_f64_e64 s5, v[0:1], v[8:9]
-; GFX10-NEXT: v_cmp_gt_f64_e64 s6, v[4:5], v[12:13]
-; GFX10-NEXT: v_cmp_o_f64_e64 s7, v[2:3], v[10:11]
-; GFX10-NEXT: v_cmp_gt_f64_e64 s8, v[6:7], v[14:15]
-; GFX10-NEXT: v_cmp_o_f64_e64 s9, v[4:5], v[12:13]
-; GFX10-NEXT: v_cmp_o_f64_e64 s10, v[6:7], v[14:15]
-; GFX10-NEXT: v_cmp_class_f64_e64 s11, v[14:15], 64
-; GFX10-NEXT: v_cndmask_b32_e32 v16, v9, v1, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v18, v11, v3, s4
-; GFX10-NEXT: v_cndmask_b32_e32 v19, v8, v0, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v21, v10, v2, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v20, v13, v5, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v22, v15, v7, s8
-; GFX10-NEXT: v_cndmask_b32_e64 v17, 0x7ff80000, v16, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v16, 0, v19, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v19, 0x7ff80000, v18, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v23, v12, v4, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v18, 0, v21, s7
-; GFX10-NEXT: v_cmp_class_f64_e64 vcc_lo, v[0:1], 64
-; GFX10-NEXT: v_cndmask_b32_e64 v24, v14, v6, s8
-; GFX10-NEXT: v_cmp_class_f64_e64 s4, v[2:3], 64
-; GFX10-NEXT: v_cmp_class_f64_e64 s7, v[4:5], 64
-; GFX10-NEXT: v_cmp_class_f64_e64 s8, v[6:7], 64
-; GFX10-NEXT: v_cmp_class_f64_e64 s5, v[8:9], 64
-; GFX10-NEXT: v_cndmask_b32_e64 v21, 0x7ff80000, v20, s9
-; GFX10-NEXT: v_cndmask_b32_e64 v20, 0, v23, s9
-; GFX10-NEXT: v_cndmask_b32_e64 v23, 0x7ff80000, v22, s10
-; GFX10-NEXT: v_cndmask_b32_e64 v22, 0, v24, s10
-; GFX10-NEXT: v_cmp_class_f64_e64 s9, v[10:11], 64
-; GFX10-NEXT: v_cmp_class_f64_e64 s10, v[12:13], 64
-; GFX10-NEXT: v_cmp_eq_f64_e64 s6, 0, v[16:17]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s12, 0, v[18:19]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s13, 0, v[20:21]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s14, 0, v[22:23]
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v17, v1, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v18, v2, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v4, v20, v4, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v6, v22, v6, s8
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v19, v3, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v5, v21, v5, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v7, v23, v7, s8
-; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v8, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v6, v6, v14, s11
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, v10, s9
-; GFX10-NEXT: v_cndmask_b32_e64 v4, v4, v12, s10
-; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, v9, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, v11, s9
-; GFX10-NEXT: v_cndmask_b32_e64 v5, v5, v13, s10
-; GFX10-NEXT: v_cndmask_b32_e64 v7, v7, v15, s11
-; GFX10-NEXT: v_cndmask_b32_e64 v0, v16, v0, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v1, v17, v1, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v18, v2, s12
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v19, v3, s12
-; GFX10-NEXT: v_cndmask_b32_e64 v4, v20, v4, s13
-; GFX10-NEXT: v_cndmask_b32_e64 v5, v21, v5, s13
-; GFX10-NEXT: v_cndmask_b32_e64 v6, v22, v6, s14
-; GFX10-NEXT: v_cndmask_b32_e64 v7, v23, v7, s14
+; GFX10-NEXT: v_max_f64 v[16:17], v[0:1], v[8:9]
+; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[8:9]
+; GFX10-NEXT: v_max_f64 v[8:9], v[2:3], v[10:11]
+; GFX10-NEXT: v_cmp_u_f64_e64 s4, v[2:3], v[10:11]
+; GFX10-NEXT: v_max_f64 v[10:11], v[4:5], v[12:13]
+; GFX10-NEXT: v_cmp_u_f64_e64 s5, v[4:5], v[12:13]
+; GFX10-NEXT: v_max_f64 v[12:13], v[6:7], v[14:15]
+; GFX10-NEXT: v_cmp_u_f64_e64 s6, v[6:7], v[14:15]
+; GFX10-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e64 v1, v17, 0x7ff80000, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e64 v2, v8, 0, s4
+; GFX10-NEXT: v_cndmask_b32_e64 v3, v9, 0x7ff80000, s4
+; GFX10-NEXT: v_cndmask_b32_e64 v4, v10, 0, s5
+; GFX10-NEXT: v_cndmask_b32_e64 v5, v11, 0x7ff80000, s5
+; GFX10-NEXT: v_cndmask_b32_e64 v6, v12, 0, s6
+; GFX10-NEXT: v_cndmask_b32_e64 v7, v13, 0x7ff80000, s6
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_maximum_v4f64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_gt_f64_e32 vcc_lo, v[0:1], v[8:9]
-; GFX11-NEXT: v_cmp_o_f64_e64 s0, v[0:1], v[8:9]
-; GFX11-NEXT: v_cmp_gt_f64_e64 s1, v[2:3], v[10:11]
-; GFX11-NEXT: v_cmp_gt_f64_e64 s2, v[4:5], v[12:13]
-; GFX11-NEXT: v_cmp_gt_f64_e64 s3, v[6:7], v[14:15]
-; GFX11-NEXT: v_cmp_o_f64_e64 s4, v[2:3], v[10:11]
-; GFX11-NEXT: v_cmp_o_f64_e64 s5, v[4:5], v[12:13]
-; GFX11-NEXT: v_cmp_o_f64_e64 s6, v[6:7], v[14:15]
-; GFX11-NEXT: v_cndmask_b32_e32 v16, v9, v1, vcc_lo
-; GFX11-NEXT: v_cndmask_b32_e64 v18, v11, v3, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v20, v13, v5, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v22, v15, v7, s3
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e64 v17, 0x7ff80000, v16, s0
-; GFX11-NEXT: v_cndmask_b32_e32 v16, v8, v0, vcc_lo
-; GFX11-NEXT: v_cmp_class_f64_e64 vcc_lo, v[0:1], 64
-; GFX11-NEXT: v_cndmask_b32_e64 v19, 0x7ff80000, v18, s4
-; GFX11-NEXT: v_cndmask_b32_e64 v21, 0x7ff80000, v20, s5
-; GFX11-NEXT: v_cndmask_b32_e64 v18, v10, v2, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v20, v12, v4, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v24, v14, v6, s3
-; GFX11-NEXT: v_cmp_class_f64_e64 s1, v[4:5], 64
-; GFX11-NEXT: v_cmp_class_f64_e64 s3, v[6:7], 64
-; GFX11-NEXT: v_cndmask_b32_e64 v23, 0x7ff80000, v22, s6
-; GFX11-NEXT: v_cndmask_b32_e64 v18, 0, v18, s4
-; GFX11-NEXT: v_cndmask_b32_e64 v20, 0, v20, s5
-; GFX11-NEXT: v_cndmask_b32_e64 v22, 0, v24, s6
-; GFX11-NEXT: v_cmp_class_f64_e64 s2, v[12:13], 64
-; GFX11-NEXT: v_cmp_class_f64_e64 s4, v[14:15], 64
-; GFX11-NEXT: v_cmp_class_f64_e64 s5, v[8:9], 64
-; GFX11-NEXT: v_cmp_class_f64_e64 s6, v[10:11], 64
-; GFX11-NEXT: v_cmp_eq_f64_e64 s8, 0, v[18:19]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s9, 0, v[20:21]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s10, 0, v[22:23]
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v17, v1, vcc_lo
-; GFX11-NEXT: v_cndmask_b32_e64 v16, 0, v16, s0
-; GFX11-NEXT: v_cmp_class_f64_e64 s0, v[2:3], 64
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT: v_cmp_eq_f64_e64 s7, 0, v[16:17]
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc_lo
-; GFX11-NEXT: v_cndmask_b32_e64 v4, v20, v4, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v6, v22, v6, s3
-; GFX11-NEXT: v_cndmask_b32_e64 v5, v21, v5, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v7, v23, v7, s3
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e64 v4, v4, v12, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, v8, s5
-; GFX11-NEXT: v_cndmask_b32_e64 v6, v6, v14, s4
-; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, v9, s5
-; GFX11-NEXT: v_cndmask_b32_e64 v5, v5, v13, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v7, v7, v15, s4
-; GFX11-NEXT: v_cndmask_b32_e64 v4, v20, v4, s9
-; GFX11-NEXT: v_cndmask_b32_e64 v6, v22, v6, s10
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e64 v5, v21, v5, s9
-; GFX11-NEXT: v_cndmask_b32_e64 v7, v23, v7, s10
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v18, v2, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v19, v3, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v0, v16, v0, s7
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, v10, s6
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v3, v11, s6
-; GFX11-NEXT: v_cndmask_b32_e64 v1, v17, v1, s7
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v18, v2, s8
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v19, v3, s8
+; GFX11-NEXT: v_max_f64 v[16:17], v[0:1], v[8:9]
+; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[8:9]
+; GFX11-NEXT: v_max_f64 v[8:9], v[2:3], v[10:11]
+; GFX11-NEXT: v_cmp_u_f64_e64 s0, v[2:3], v[10:11]
+; GFX11-NEXT: v_max_f64 v[10:11], v[4:5], v[12:13]
+; GFX11-NEXT: v_cmp_u_f64_e64 s1, v[4:5], v[12:13]
+; GFX11-NEXT: v_max_f64 v[12:13], v[6:7], v[14:15]
+; GFX11-NEXT: v_cmp_u_f64_e64 s2, v[6:7], v[14:15]
+; GFX11-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e64 v1, v17, 0x7ff80000, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e64 v2, v8, 0, s0
+; GFX11-NEXT: v_cndmask_b32_e64 v3, v9, 0x7ff80000, s0
+; GFX11-NEXT: v_cndmask_b32_e64 v4, v10, 0, s1
+; GFX11-NEXT: v_cndmask_b32_e64 v5, v11, 0x7ff80000, s1
+; GFX11-NEXT: v_cndmask_b32_e64 v6, v12, 0, s2
+; GFX11-NEXT: v_cndmask_b32_e64 v7, v13, 0x7ff80000, s2
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_v4f64:
@@ -2819,320 +1508,55 @@ define <4 x double> @v_maximum_v4f64__nnan(<4 x double> %src0, <4 x double> %src
; GFX7-LABEL: v_maximum_v4f64__nnan:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[8:9]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[0:1], 64
-; GFX7-NEXT: v_cmp_class_f64_e64 s[6:7], v[8:9], 64
-; GFX7-NEXT: v_cmp_class_f64_e64 s[10:11], v[10:11], 64
-; GFX7-NEXT: v_cndmask_b32_e32 v17, v9, v1, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v8, v0, vcc
-; GFX7-NEXT: v_cmp_gt_f64_e32 vcc, v[2:3], v[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v0, v16, v0, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v1, v17, v1, s[4:5]
-; GFX7-NEXT: v_cmp_gt_f64_e64 s[4:5], v[4:5], v[12:13]
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[8:9], 0, v[16:17]
-; GFX7-NEXT: v_cndmask_b32_e64 v0, v0, v8, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v1, v1, v9, s[6:7]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[6:7], v[12:13], 64
-; GFX7-NEXT: v_cndmask_b32_e32 v19, v11, v3, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v18, v10, v2, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[2:3], 64
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[18:19]
-; GFX7-NEXT: v_cndmask_b32_e64 v9, v13, v5, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v8, v12, v4, s[4:5]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[4:5], 64
-; GFX7-NEXT: v_cndmask_b32_e64 v0, v16, v0, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v1, v17, v1, s[8:9]
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[8:9], 0, v[8:9]
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v18, v2, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v19, v3, vcc
-; GFX7-NEXT: v_cmp_gt_f64_e32 vcc, v[6:7], v[14:15]
-; GFX7-NEXT: v_cndmask_b32_e64 v2, v2, v10, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v3, v3, v11, s[10:11]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[10:11], v[14:15], 64
-; GFX7-NEXT: v_cndmask_b32_e64 v2, v18, v2, s[12:13]
-; GFX7-NEXT: v_cndmask_b32_e64 v3, v19, v3, s[12:13]
-; GFX7-NEXT: v_cndmask_b32_e64 v4, v8, v4, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e32 v11, v15, v7, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v10, v14, v6, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[6:7], 64
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v4, v4, v12, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v5, v5, v13, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v4, v8, v4, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e32 v6, v10, v6, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v7, v11, v7, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v6, v6, v14, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v7, v7, v15, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v6, v10, v6, s[12:13]
-; GFX7-NEXT: v_cndmask_b32_e64 v7, v11, v7, s[12:13]
+; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9]
+; GFX7-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11]
+; GFX7-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13]
+; GFX7-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_maximum_v4f64__nnan:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[8:9]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[0:1], 64
-; GFX8-NEXT: v_cmp_class_f64_e64 s[6:7], v[8:9], 64
-; GFX8-NEXT: v_cmp_class_f64_e64 s[10:11], v[10:11], 64
-; GFX8-NEXT: v_cndmask_b32_e32 v17, v9, v1, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v8, v0, vcc
-; GFX8-NEXT: v_cmp_gt_f64_e32 vcc, v[2:3], v[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v0, v16, v0, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v1, v17, v1, s[4:5]
-; GFX8-NEXT: v_cmp_gt_f64_e64 s[4:5], v[4:5], v[12:13]
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[8:9], 0, v[16:17]
-; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, v8, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, v9, s[6:7]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[6:7], v[12:13], 64
-; GFX8-NEXT: v_cndmask_b32_e32 v19, v11, v3, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v18, v10, v2, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[2:3], 64
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[18:19]
-; GFX8-NEXT: v_cndmask_b32_e64 v9, v13, v5, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v8, v12, v4, s[4:5]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[4:5], 64
-; GFX8-NEXT: v_cndmask_b32_e64 v0, v16, v0, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v1, v17, v1, s[8:9]
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[8:9], 0, v[8:9]
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v18, v2, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v19, v3, vcc
-; GFX8-NEXT: v_cmp_gt_f64_e32 vcc, v[6:7], v[14:15]
-; GFX8-NEXT: v_cndmask_b32_e64 v2, v2, v10, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v3, v3, v11, s[10:11]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[10:11], v[14:15], 64
-; GFX8-NEXT: v_cndmask_b32_e64 v2, v18, v2, s[12:13]
-; GFX8-NEXT: v_cndmask_b32_e64 v3, v19, v3, s[12:13]
-; GFX8-NEXT: v_cndmask_b32_e64 v4, v8, v4, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e32 v11, v15, v7, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v10, v14, v6, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[6:7], 64
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v4, v4, v12, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v5, v5, v13, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v4, v8, v4, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e32 v6, v10, v6, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v7, v11, v7, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v6, v6, v14, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v7, v7, v15, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v6, v10, v6, s[12:13]
-; GFX8-NEXT: v_cndmask_b32_e64 v7, v11, v7, s[12:13]
+; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9]
+; GFX8-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11]
+; GFX8-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13]
+; GFX8-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_maximum_v4f64__nnan:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[8:9]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[0:1], 64
-; GFX9-NEXT: v_cmp_class_f64_e64 s[6:7], v[8:9], 64
-; GFX9-NEXT: v_cmp_class_f64_e64 s[10:11], v[10:11], 64
-; GFX9-NEXT: v_cndmask_b32_e32 v17, v9, v1, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v8, v0, vcc
-; GFX9-NEXT: v_cmp_gt_f64_e32 vcc, v[2:3], v[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v16, v0, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v17, v1, s[4:5]
-; GFX9-NEXT: v_cmp_gt_f64_e64 s[4:5], v[4:5], v[12:13]
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[8:9], 0, v[16:17]
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, v8, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v9, s[6:7]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[6:7], v[12:13], 64
-; GFX9-NEXT: v_cndmask_b32_e32 v19, v11, v3, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v18, v10, v2, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[2:3], 64
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[18:19]
-; GFX9-NEXT: v_cndmask_b32_e64 v9, v13, v5, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v8, v12, v4, s[4:5]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[4:5], 64
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v16, v0, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v17, v1, s[8:9]
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[8:9], 0, v[8:9]
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v18, v2, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v19, v3, vcc
-; GFX9-NEXT: v_cmp_gt_f64_e32 vcc, v[6:7], v[14:15]
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, v10, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v3, v11, s[10:11]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[10:11], v[14:15], 64
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v18, v2, s[12:13]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v19, v3, s[12:13]
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v8, v4, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e32 v11, v15, v7, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v10, v14, v6, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[6:7], 64
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v4, v12, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v5, v13, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v8, v4, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v10, v6, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v7, v11, v7, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v6, v6, v14, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v7, v7, v15, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v6, v10, v6, s[12:13]
-; GFX9-NEXT: v_cndmask_b32_e64 v7, v11, v7, s[12:13]
+; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9]
+; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11]
+; GFX9-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13]
+; GFX9-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_maximum_v4f64__nnan:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[8:9]
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[8:9], 64
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v17, v9, v1, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v8, v0, vcc
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 64
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[16:17]
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v17, v1, vcc
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[2:3], v[10:11]
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v0, v8, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v1, v1, v9, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v9, v11, v3, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v10, v2, vcc
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[2:3], 64
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[10:11], 64
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v16, v0, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v1, v17, v1, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v2, v2, v10, s[0:1]
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[8:9]
-; GFX940-NEXT: v_cndmask_b32_e64 v3, v3, v11, s[0:1]
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[4:5], v[12:13]
-; GFX940-NEXT: v_cndmask_b32_e64 v2, v8, v2, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v3, v9, v3, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e32 v9, v13, v5, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v12, v4, vcc
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[4:5], 64
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[12:13], 64
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[8:9]
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v9, v5, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v4, v4, v12, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v5, v5, v13, s[0:1]
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[6:7], v[14:15]
-; GFX940-NEXT: v_cndmask_b32_e64 v4, v8, v4, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e32 v9, v15, v7, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v14, v6, vcc
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[6:7], 64
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[14:15], 64
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[8:9]
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v8, v6, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v7, v9, v7, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v6, v6, v14, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v7, v7, v15, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v6, v8, v6, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v7, v9, v7, s[2:3]
+; GFX940-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9]
+; GFX940-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11]
+; GFX940-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13]
+; GFX940-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v4f64__nnan:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_gt_f64_e32 vcc_lo, v[0:1], v[8:9]
-; GFX10-NEXT: v_cmp_gt_f64_e64 s4, v[2:3], v[10:11]
-; GFX10-NEXT: v_cmp_gt_f64_e64 s5, v[4:5], v[12:13]
-; GFX10-NEXT: v_cmp_gt_f64_e64 s6, v[6:7], v[14:15]
-; GFX10-NEXT: v_cmp_class_f64_e64 s7, v[10:11], 64
-; GFX10-NEXT: v_cmp_class_f64_e64 s8, v[8:9], 64
-; GFX10-NEXT: v_cmp_class_f64_e64 s10, v[12:13], 64
-; GFX10-NEXT: v_cmp_class_f64_e64 s11, v[14:15], 64
-; GFX10-NEXT: v_cndmask_b32_e32 v17, v9, v1, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v19, v11, v3, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v21, v13, v5, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v23, v15, v7, s6
-; GFX10-NEXT: v_cndmask_b32_e32 v16, v8, v0, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v18, v10, v2, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v20, v12, v4, s5
-; GFX10-NEXT: v_cmp_class_f64_e64 vcc_lo, v[4:5], 64
-; GFX10-NEXT: v_cndmask_b32_e64 v22, v14, v6, s6
-; GFX10-NEXT: v_cmp_class_f64_e64 s4, v[6:7], 64
-; GFX10-NEXT: v_cmp_class_f64_e64 s5, v[0:1], 64
-; GFX10-NEXT: v_cmp_class_f64_e64 s6, v[2:3], 64
-; GFX10-NEXT: v_cmp_eq_f64_e64 s9, 0, v[16:17]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s12, 0, v[18:19]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s13, 0, v[20:21]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s14, 0, v[22:23]
-; GFX10-NEXT: v_cndmask_b32_e32 v4, v20, v4, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e32 v5, v21, v5, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v6, v22, v6, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v0, v16, v0, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v18, v2, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v1, v17, v1, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v19, v3, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v7, v23, v7, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v8, s8
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, v10, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v4, v4, v12, s10
-; GFX10-NEXT: v_cndmask_b32_e64 v6, v6, v14, s11
-; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, v9, s8
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, v11, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v5, v5, v13, s10
-; GFX10-NEXT: v_cndmask_b32_e64 v7, v7, v15, s11
-; GFX10-NEXT: v_cndmask_b32_e64 v0, v16, v0, s9
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v18, v2, s12
-; GFX10-NEXT: v_cndmask_b32_e64 v1, v17, v1, s9
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v19, v3, s12
-; GFX10-NEXT: v_cndmask_b32_e64 v4, v20, v4, s13
-; GFX10-NEXT: v_cndmask_b32_e64 v5, v21, v5, s13
-; GFX10-NEXT: v_cndmask_b32_e64 v6, v22, v6, s14
-; GFX10-NEXT: v_cndmask_b32_e64 v7, v23, v7, s14
+; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9]
+; GFX10-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11]
+; GFX10-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13]
+; GFX10-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_maximum_v4f64__nnan:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_gt_f64_e32 vcc_lo, v[0:1], v[8:9]
-; GFX11-NEXT: v_cmp_gt_f64_e64 s0, v[2:3], v[10:11]
-; GFX11-NEXT: v_cmp_gt_f64_e64 s1, v[4:5], v[12:13]
-; GFX11-NEXT: v_cmp_gt_f64_e64 s2, v[6:7], v[14:15]
-; GFX11-NEXT: v_cmp_class_f64_e64 s3, v[6:7], 64
-; GFX11-NEXT: v_cmp_class_f64_e64 s4, v[14:15], 64
-; GFX11-NEXT: v_cmp_class_f64_e64 s5, v[8:9], 64
-; GFX11-NEXT: v_cmp_class_f64_e64 s6, v[10:11], 64
-; GFX11-NEXT: v_dual_cndmask_b32 v17, v9, v1 :: v_dual_cndmask_b32 v16, v8, v0
-; GFX11-NEXT: v_cndmask_b32_e64 v19, v11, v3, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v21, v13, v5, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v18, v10, v2, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v20, v12, v4, s1
-; GFX11-NEXT: v_cmp_class_f64_e64 vcc_lo, v[0:1], 64
-; GFX11-NEXT: v_cmp_class_f64_e64 s0, v[2:3], 64
-; GFX11-NEXT: v_cmp_class_f64_e64 s1, v[4:5], 64
-; GFX11-NEXT: v_cndmask_b32_e64 v23, v15, v7, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v22, v14, v6, s2
-; GFX11-NEXT: v_cmp_class_f64_e64 s2, v[12:13], 64
-; GFX11-NEXT: v_cmp_eq_f64_e64 s7, 0, v[16:17]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s8, 0, v[18:19]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s9, 0, v[20:21]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s10, 0, v[22:23]
-; GFX11-NEXT: v_cndmask_b32_e64 v6, v22, v6, s3
-; GFX11-NEXT: v_cndmask_b32_e64 v7, v23, v7, s3
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_cndmask_b32_e64 v6, v6, v14, s4
-; GFX11-NEXT: v_cndmask_b32_e64 v7, v7, v15, s4
-; GFX11-NEXT: v_dual_cndmask_b32 v0, v16, v0 :: v_dual_cndmask_b32 v1, v17, v1
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v18, v2, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v4, v20, v4, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v19, v3, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v5, v21, v5, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, v8, s5
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, v10, s6
-; GFX11-NEXT: v_cndmask_b32_e64 v4, v4, v12, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, v9, s5
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v3, v11, s6
-; GFX11-NEXT: v_cndmask_b32_e64 v5, v5, v13, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v0, v16, v0, s7
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v18, v2, s8
-; GFX11-NEXT: v_cndmask_b32_e64 v4, v20, v4, s9
-; GFX11-NEXT: v_cndmask_b32_e64 v6, v22, v6, s10
-; GFX11-NEXT: v_cndmask_b32_e64 v1, v17, v1, s7
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v19, v3, s8
-; GFX11-NEXT: v_cndmask_b32_e64 v5, v21, v5, s9
-; GFX11-NEXT: v_cndmask_b32_e64 v7, v23, v7, s10
+; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9]
+; GFX11-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11]
+; GFX11-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13]
+; GFX11-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_v4f64__nnan:
@@ -3155,180 +1579,135 @@ define <4 x double> @v_maximum_v4f64__nsz(<4 x double> %src0, <4 x double> %src1
; GFX7-LABEL: v_maximum_v4f64__nsz:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[8:9]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[4:5], v[0:1], v[8:9]
-; GFX7-NEXT: v_cmp_gt_f64_e64 s[6:7], v[4:5], v[12:13]
-; GFX7-NEXT: v_cmp_gt_f64_e64 s[10:11], v[6:7], v[14:15]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[8:9], v[4:5], v[12:13]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[12:13], v[6:7], v[14:15]
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v8, v0, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc
-; GFX7-NEXT: v_mov_b32_e32 v8, 0x7ff80000
-; GFX7-NEXT: v_cmp_gt_f64_e32 vcc, v[2:3], v[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v0, 0, v16, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v1, v8, v1, s[4:5]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[4:5], v[2:3], v[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v5, v13, v5, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v7, v15, v7, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v5, v8, v5, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v7, v8, v7, s[12:13]
-; GFX7-NEXT: v_cndmask_b32_e32 v9, v10, v2, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v10, v14, v6, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v11, v3, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v2, 0, v9, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v9, v12, v4, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v3, v8, v3, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v4, 0, v9, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v6, 0, v10, s[12:13]
+; GFX7-NEXT: v_max_f64 v[16:17], v[0:1], v[8:9]
+; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
+; GFX7-NEXT: v_max_f64 v[8:9], v[2:3], v[10:11]
+; GFX7-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[10:11]
+; GFX7-NEXT: v_max_f64 v[10:11], v[4:5], v[12:13]
+; GFX7-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[12:13]
+; GFX7-NEXT: v_max_f64 v[12:13], v[6:7], v[14:15]
+; GFX7-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[14:15]
+; GFX7-NEXT: v_mov_b32_e32 v7, 0x7ff80000
+; GFX7-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc
+; GFX7-NEXT: v_cndmask_b32_e32 v1, v17, v7, vcc
+; GFX7-NEXT: v_cndmask_b32_e64 v2, v8, 0, s[4:5]
+; GFX7-NEXT: v_cndmask_b32_e64 v3, v9, v7, s[4:5]
+; GFX7-NEXT: v_cndmask_b32_e64 v4, v10, 0, s[6:7]
+; GFX7-NEXT: v_cndmask_b32_e64 v5, v11, v7, s[6:7]
+; GFX7-NEXT: v_cndmask_b32_e64 v6, v12, 0, s[8:9]
+; GFX7-NEXT: v_cndmask_b32_e64 v7, v13, v7, s[8:9]
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_maximum_v4f64__nsz:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[8:9]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[4:5], v[0:1], v[8:9]
-; GFX8-NEXT: v_cmp_gt_f64_e64 s[6:7], v[4:5], v[12:13]
-; GFX8-NEXT: v_cmp_gt_f64_e64 s[10:11], v[6:7], v[14:15]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[8:9], v[4:5], v[12:13]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[12:13], v[6:7], v[14:15]
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v8, v0, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc
-; GFX8-NEXT: v_mov_b32_e32 v8, 0x7ff80000
-; GFX8-NEXT: v_cmp_gt_f64_e32 vcc, v[2:3], v[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v0, 0, v16, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v1, v8, v1, s[4:5]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[4:5], v[2:3], v[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v5, v13, v5, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v7, v15, v7, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v5, v8, v5, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v7, v8, v7, s[12:13]
-; GFX8-NEXT: v_cndmask_b32_e32 v9, v10, v2, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v10, v14, v6, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v11, v3, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, v9, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v9, v12, v4, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v3, v8, v3, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v4, 0, v9, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v6, 0, v10, s[12:13]
+; GFX8-NEXT: v_max_f64 v[16:17], v[0:1], v[8:9]
+; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
+; GFX8-NEXT: v_max_f64 v[8:9], v[2:3], v[10:11]
+; GFX8-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[10:11]
+; GFX8-NEXT: v_max_f64 v[10:11], v[4:5], v[12:13]
+; GFX8-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[12:13]
+; GFX8-NEXT: v_max_f64 v[12:13], v[6:7], v[14:15]
+; GFX8-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[14:15]
+; GFX8-NEXT: v_mov_b32_e32 v7, 0x7ff80000
+; GFX8-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v1, v17, v7, vcc
+; GFX8-NEXT: v_cndmask_b32_e64 v2, v8, 0, s[4:5]
+; GFX8-NEXT: v_cndmask_b32_e64 v3, v9, v7, s[4:5]
+; GFX8-NEXT: v_cndmask_b32_e64 v4, v10, 0, s[6:7]
+; GFX8-NEXT: v_cndmask_b32_e64 v5, v11, v7, s[6:7]
+; GFX8-NEXT: v_cndmask_b32_e64 v6, v12, 0, s[8:9]
+; GFX8-NEXT: v_cndmask_b32_e64 v7, v13, v7, s[8:9]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_maximum_v4f64__nsz:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[8:9]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[4:5], v[0:1], v[8:9]
-; GFX9-NEXT: v_cmp_gt_f64_e64 s[6:7], v[4:5], v[12:13]
-; GFX9-NEXT: v_cmp_gt_f64_e64 s[10:11], v[6:7], v[14:15]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[8:9], v[4:5], v[12:13]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[12:13], v[6:7], v[14:15]
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v8, v0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc
-; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000
-; GFX9-NEXT: v_cmp_gt_f64_e32 vcc, v[2:3], v[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, v16, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v8, v1, s[4:5]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[4:5], v[2:3], v[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v13, v5, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v7, v15, v7, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v8, v5, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v7, v8, v7, s[12:13]
-; GFX9-NEXT: v_cndmask_b32_e32 v9, v10, v2, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v10, v14, v6, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v11, v3, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, v9, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v9, v12, v4, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v8, v3, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, v9, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, v10, s[12:13]
+; GFX9-NEXT: v_max_f64 v[16:17], v[0:1], v[8:9]
+; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
+; GFX9-NEXT: v_max_f64 v[8:9], v[2:3], v[10:11]
+; GFX9-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[10:11]
+; GFX9-NEXT: v_max_f64 v[10:11], v[4:5], v[12:13]
+; GFX9-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[12:13]
+; GFX9-NEXT: v_max_f64 v[12:13], v[6:7], v[14:15]
+; GFX9-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[14:15]
+; GFX9-NEXT: v_mov_b32_e32 v7, 0x7ff80000
+; GFX9-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v17, v7, vcc
+; GFX9-NEXT: v_cndmask_b32_e64 v2, v8, 0, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_e64 v3, v9, v7, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_e64 v4, v10, 0, s[6:7]
+; GFX9-NEXT: v_cndmask_b32_e64 v5, v11, v7, s[6:7]
+; GFX9-NEXT: v_cndmask_b32_e64 v6, v12, 0, s[8:9]
+; GFX9-NEXT: v_cndmask_b32_e64 v7, v13, v7, s[8:9]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_maximum_v4f64__nsz:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[8:9]
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[0:1], v[8:9]
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v8, v0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc
-; GFX940-NEXT: v_mov_b32_e32 v8, 0x7ff80000
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[2:3], v[10:11]
-; GFX940-NEXT: v_cndmask_b32_e64 v0, 0, v16, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v1, v8, v1, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v9, v10, v2, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[2:3], v[10:11]
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v11, v3, vcc
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[4:5], v[12:13]
-; GFX940-NEXT: v_cndmask_b32_e64 v2, 0, v9, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v3, v8, v3, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v9, v12, v4, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[4:5], v[12:13]
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v13, v5, vcc
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[6:7], v[14:15]
-; GFX940-NEXT: v_cndmask_b32_e64 v4, 0, v9, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v5, v8, v5, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v9, v14, v6, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[6:7], v[14:15]
-; GFX940-NEXT: v_cndmask_b32_e32 v7, v15, v7, vcc
+; GFX940-NEXT: v_max_f64 v[16:17], v[0:1], v[8:9]
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
+; GFX940-NEXT: v_max_f64 v[8:9], v[2:3], v[10:11]
; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v6, 0, v9, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v7, v8, v7, s[0:1]
+; GFX940-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc
+; GFX940-NEXT: v_mov_b32_e32 v16, 0x7ff80000
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v17, v16, vcc
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[10:11]
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e64 v2, v8, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v3, v9, v16, vcc
+; GFX940-NEXT: v_max_f64 v[8:9], v[4:5], v[12:13]
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[12:13]
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e64 v4, v8, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v5, v9, v16, vcc
+; GFX940-NEXT: v_max_f64 v[8:9], v[6:7], v[14:15]
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[6:7], v[14:15]
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e64 v6, v8, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v7, v9, v16, vcc
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v4f64__nsz:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_gt_f64_e32 vcc_lo, v[0:1], v[8:9]
-; GFX10-NEXT: v_cmp_gt_f64_e64 s4, v[2:3], v[10:11]
-; GFX10-NEXT: v_cmp_gt_f64_e64 s5, v[4:5], v[12:13]
-; GFX10-NEXT: v_cmp_gt_f64_e64 s7, v[6:7], v[14:15]
-; GFX10-NEXT: v_cmp_o_f64_e64 s6, v[0:1], v[8:9]
-; GFX10-NEXT: v_cmp_o_f64_e64 s8, v[2:3], v[10:11]
-; GFX10-NEXT: v_cmp_o_f64_e64 s9, v[4:5], v[12:13]
-; GFX10-NEXT: v_cmp_o_f64_e64 s10, v[6:7], v[14:15]
-; GFX10-NEXT: v_cndmask_b32_e32 v16, v8, v0, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v8, v10, v2, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v10, v12, v4, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v12, v14, v6, s7
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v11, v3, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v5, v13, v5, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v7, v15, v7, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, v16, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v1, 0x7ff80000, v1, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, v8, s8
-; GFX10-NEXT: v_cndmask_b32_e64 v3, 0x7ff80000, v3, s8
-; GFX10-NEXT: v_cndmask_b32_e64 v4, 0, v10, s9
-; GFX10-NEXT: v_cndmask_b32_e64 v5, 0x7ff80000, v5, s9
-; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, v12, s10
-; GFX10-NEXT: v_cndmask_b32_e64 v7, 0x7ff80000, v7, s10
+; GFX10-NEXT: v_max_f64 v[16:17], v[0:1], v[8:9]
+; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[8:9]
+; GFX10-NEXT: v_max_f64 v[8:9], v[2:3], v[10:11]
+; GFX10-NEXT: v_cmp_u_f64_e64 s4, v[2:3], v[10:11]
+; GFX10-NEXT: v_max_f64 v[10:11], v[4:5], v[12:13]
+; GFX10-NEXT: v_cmp_u_f64_e64 s5, v[4:5], v[12:13]
+; GFX10-NEXT: v_max_f64 v[12:13], v[6:7], v[14:15]
+; GFX10-NEXT: v_cmp_u_f64_e64 s6, v[6:7], v[14:15]
+; GFX10-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e64 v1, v17, 0x7ff80000, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e64 v2, v8, 0, s4
+; GFX10-NEXT: v_cndmask_b32_e64 v3, v9, 0x7ff80000, s4
+; GFX10-NEXT: v_cndmask_b32_e64 v4, v10, 0, s5
+; GFX10-NEXT: v_cndmask_b32_e64 v5, v11, 0x7ff80000, s5
+; GFX10-NEXT: v_cndmask_b32_e64 v6, v12, 0, s6
+; GFX10-NEXT: v_cndmask_b32_e64 v7, v13, 0x7ff80000, s6
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_maximum_v4f64__nsz:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_gt_f64_e32 vcc_lo, v[0:1], v[8:9]
-; GFX11-NEXT: v_cmp_gt_f64_e64 s1, v[2:3], v[10:11]
-; GFX11-NEXT: v_cmp_gt_f64_e64 s2, v[4:5], v[12:13]
-; GFX11-NEXT: v_cmp_gt_f64_e64 s3, v[6:7], v[14:15]
-; GFX11-NEXT: v_cmp_o_f64_e64 s0, v[0:1], v[8:9]
-; GFX11-NEXT: v_cmp_o_f64_e64 s4, v[2:3], v[10:11]
-; GFX11-NEXT: v_cmp_o_f64_e64 s5, v[4:5], v[12:13]
-; GFX11-NEXT: v_cmp_o_f64_e64 s6, v[6:7], v[14:15]
-; GFX11-NEXT: v_dual_cndmask_b32 v16, v8, v0 :: v_dual_cndmask_b32 v1, v9, v1
-; GFX11-NEXT: v_cndmask_b32_e64 v8, v10, v2, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v10, v12, v4, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v12, v14, v6, s3
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v11, v3, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v5, v13, v5, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v7, v15, v7, s3
-; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, v16, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, v8, s4
-; GFX11-NEXT: v_cndmask_b32_e64 v4, 0, v10, s5
-; GFX11-NEXT: v_cndmask_b32_e64 v6, 0, v12, s6
-; GFX11-NEXT: v_cndmask_b32_e64 v1, 0x7ff80000, v1, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v3, 0x7ff80000, v3, s4
-; GFX11-NEXT: v_cndmask_b32_e64 v5, 0x7ff80000, v5, s5
-; GFX11-NEXT: v_cndmask_b32_e64 v7, 0x7ff80000, v7, s6
+; GFX11-NEXT: v_max_f64 v[16:17], v[0:1], v[8:9]
+; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[8:9]
+; GFX11-NEXT: v_max_f64 v[8:9], v[2:3], v[10:11]
+; GFX11-NEXT: v_cmp_u_f64_e64 s0, v[2:3], v[10:11]
+; GFX11-NEXT: v_max_f64 v[10:11], v[4:5], v[12:13]
+; GFX11-NEXT: v_cmp_u_f64_e64 s1, v[4:5], v[12:13]
+; GFX11-NEXT: v_max_f64 v[12:13], v[6:7], v[14:15]
+; GFX11-NEXT: v_cmp_u_f64_e64 s2, v[6:7], v[14:15]
+; GFX11-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e64 v1, v17, 0x7ff80000, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e64 v2, v8, 0, s0
+; GFX11-NEXT: v_cndmask_b32_e64 v3, v9, 0x7ff80000, s0
+; GFX11-NEXT: v_cndmask_b32_e64 v4, v10, 0, s1
+; GFX11-NEXT: v_cndmask_b32_e64 v5, v11, 0x7ff80000, s1
+; GFX11-NEXT: v_cndmask_b32_e64 v6, v12, 0, s2
+; GFX11-NEXT: v_cndmask_b32_e64 v7, v13, 0x7ff80000, s2
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_v4f64__nsz:
@@ -3351,108 +1730,55 @@ define <4 x double> @v_maximum_v4f64__nnan_nsz(<4 x double> %src0, <4 x double>
; GFX7-LABEL: v_maximum_v4f64__nnan_nsz:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[8:9]
-; GFX7-NEXT: v_cmp_gt_f64_e64 s[4:5], v[2:3], v[10:11]
-; GFX7-NEXT: v_cmp_gt_f64_e64 s[6:7], v[4:5], v[12:13]
-; GFX7-NEXT: v_cmp_gt_f64_e64 s[8:9], v[6:7], v[14:15]
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v2, v10, v2, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v3, v11, v3, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v4, v12, v4, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v5, v13, v5, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v6, v14, v6, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v7, v15, v7, s[8:9]
+; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9]
+; GFX7-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11]
+; GFX7-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13]
+; GFX7-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_maximum_v4f64__nnan_nsz:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[8:9]
-; GFX8-NEXT: v_cmp_gt_f64_e64 s[4:5], v[2:3], v[10:11]
-; GFX8-NEXT: v_cmp_gt_f64_e64 s[6:7], v[4:5], v[12:13]
-; GFX8-NEXT: v_cmp_gt_f64_e64 s[8:9], v[6:7], v[14:15]
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v2, v10, v2, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v3, v11, v3, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v4, v12, v4, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v5, v13, v5, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v6, v14, v6, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v7, v15, v7, s[8:9]
+; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9]
+; GFX8-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11]
+; GFX8-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13]
+; GFX8-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_maximum_v4f64__nnan_nsz:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[8:9]
-; GFX9-NEXT: v_cmp_gt_f64_e64 s[4:5], v[2:3], v[10:11]
-; GFX9-NEXT: v_cmp_gt_f64_e64 s[6:7], v[4:5], v[12:13]
-; GFX9-NEXT: v_cmp_gt_f64_e64 s[8:9], v[6:7], v[14:15]
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v10, v2, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v11, v3, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v12, v4, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v13, v5, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v6, v14, v6, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v7, v15, v7, s[8:9]
+; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9]
+; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11]
+; GFX9-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13]
+; GFX9-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_maximum_v4f64__nnan_nsz:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[8:9]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[2:3], v[10:11]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v10, v2, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v11, v3, vcc
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[4:5], v[12:13]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v12, v4, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v13, v5, vcc
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[6:7], v[14:15]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v14, v6, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v7, v15, v7, vcc
+; GFX940-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9]
+; GFX940-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11]
+; GFX940-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13]
+; GFX940-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v4f64__nnan_nsz:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_gt_f64_e32 vcc_lo, v[0:1], v[8:9]
-; GFX10-NEXT: v_cmp_gt_f64_e64 s4, v[2:3], v[10:11]
-; GFX10-NEXT: v_cmp_gt_f64_e64 s5, v[4:5], v[12:13]
-; GFX10-NEXT: v_cmp_gt_f64_e64 s6, v[6:7], v[14:15]
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v10, v2, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v4, v12, v4, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v6, v14, v6, s6
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v11, v3, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v5, v13, v5, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v7, v15, v7, s6
+; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9]
+; GFX10-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11]
+; GFX10-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13]
+; GFX10-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_maximum_v4f64__nnan_nsz:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_gt_f64_e32 vcc_lo, v[0:1], v[8:9]
-; GFX11-NEXT: v_cmp_gt_f64_e64 s0, v[2:3], v[10:11]
-; GFX11-NEXT: v_cmp_gt_f64_e64 s1, v[4:5], v[12:13]
-; GFX11-NEXT: v_cmp_gt_f64_e64 s2, v[6:7], v[14:15]
-; GFX11-NEXT: v_dual_cndmask_b32 v0, v8, v0 :: v_dual_cndmask_b32 v1, v9, v1
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v10, v2, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v4, v12, v4, s1
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e64 v6, v14, v6, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v11, v3, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v5, v13, v5, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v7, v15, v7, s2
+; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9]
+; GFX11-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11]
+; GFX11-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13]
+; GFX11-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_v4f64__nnan_nsz:
@@ -3475,782 +1801,244 @@ define <8 x double> @v_maximum_v8f64(<8 x double> %src0, <8 x double> %src1) {
; GFX7-LABEL: v_maximum_v8f64:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[16:17]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[4:5], v[0:1], v[16:17]
-; GFX7-NEXT: v_mov_b32_e32 v32, 0x7ff80000
-; GFX7-NEXT: v_cmp_o_f64_e64 s[6:7], v[4:5], v[20:21]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[8:9], v[8:9], v[24:25]
-; GFX7-NEXT: v_cmp_gt_f64_e64 s[10:11], v[12:13], v[28:29]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[12:13], v[12:13], v[28:29]
-; GFX7-NEXT: v_cndmask_b32_e32 v31, v17, v1, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v34, v32, v31, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e32 v31, v16, v0, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 64
-; GFX7-NEXT: v_cndmask_b32_e64 v33, 0, v31, s[4:5]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[4:5], v[2:3], v[18:19]
; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v33, v0, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v34, v1, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[16:17], 64
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v16, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v17, vcc
-; GFX7-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[33:34]
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v33, v0, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v34, v1, vcc
-; GFX7-NEXT: v_cmp_gt_f64_e32 vcc, v[2:3], v[18:19]
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v19, v3, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v17, v32, v16, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v18, v2, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[2:3], 64
-; GFX7-NEXT: v_cndmask_b32_e64 v16, 0, v16, s[4:5]
-; GFX7-NEXT: v_cmp_gt_f64_e64 s[4:5], v[4:5], v[20:21]
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v16, v2, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v17, v3, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[18:19], 64
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v2, v18, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v3, v19, vcc
-; GFX7-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[16:17]
-; GFX7-NEXT: v_cndmask_b32_e64 v18, v21, v5, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v16, v2, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v17, v3, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[4:5], 64
-; GFX7-NEXT: v_cndmask_b32_e64 v16, v20, v4, s[4:5]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[20:21], 64
-; GFX7-NEXT: v_cndmask_b32_e64 v17, v32, v18, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v16, 0, v16, s[6:7]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[6:7], v[6:7], v[22:23]
-; GFX7-NEXT: v_cndmask_b32_e32 v4, v16, v4, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v5, v17, v5, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v4, v4, v20, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v5, v5, v21, s[4:5]
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[4:5], 0, v[16:17]
-; GFX7-NEXT: v_cmp_gt_f64_e32 vcc, v[6:7], v[22:23]
-; GFX7-NEXT: v_cndmask_b32_e64 v4, v16, v4, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v5, v17, v5, s[4:5]
-; GFX7-NEXT: v_cmp_gt_f64_e64 s[4:5], v[8:9], v[24:25]
-; GFX7-NEXT: v_cndmask_b32_e32 v18, v23, v7, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v22, v6, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[6:7], 64
-; GFX7-NEXT: v_cndmask_b32_e64 v19, v32, v18, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v18, 0, v16, s[6:7]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[6:7], v[22:23], 64
-; GFX7-NEXT: v_cndmask_b32_e64 v16, v25, v9, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v17, v32, v16, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v16, v24, v8, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e32 v6, v18, v6, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v7, v19, v7, vcc
-; GFX7-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[18:19]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[8:9], 64
-; GFX7-NEXT: v_cndmask_b32_e64 v6, v6, v22, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v7, v7, v23, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v16, 0, v16, s[8:9]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[6:7], v[24:25], 64
-; GFX7-NEXT: v_cmp_o_f64_e64 s[8:9], v[10:11], v[26:27]
-; GFX7-NEXT: v_cndmask_b32_e32 v6, v18, v6, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v8, v16, v8, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e32 v7, v19, v7, vcc
-; GFX7-NEXT: v_cmp_gt_f64_e32 vcc, v[10:11], v[26:27]
-; GFX7-NEXT: v_cndmask_b32_e64 v9, v17, v9, s[4:5]
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[4:5], 0, v[16:17]
-; GFX7-NEXT: v_cndmask_b32_e64 v8, v8, v24, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v9, v9, v25, s[6:7]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[6:7], v[26:27], 64
-; GFX7-NEXT: v_cndmask_b32_e32 v18, v27, v11, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v19, v32, v18, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v8, v16, v8, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v26, v10, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[10:11], 64
-; GFX7-NEXT: v_cndmask_b32_e64 v18, 0, v16, s[8:9]
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[8:9], 0, v[18:19]
-; GFX7-NEXT: v_cndmask_b32_e64 v9, v17, v9, s[4:5]
+; GFX7-NEXT: v_max_f64 v[32:33], v[2:3], v[18:19]
+; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[18:19]
+; GFX7-NEXT: v_max_f64 v[18:19], v[4:5], v[20:21]
+; GFX7-NEXT: v_cmp_u_f64_e64 s[4:5], v[4:5], v[20:21]
+; GFX7-NEXT: v_max_f64 v[2:3], v[0:1], v[16:17]
+; GFX7-NEXT: v_cmp_u_f64_e64 s[8:9], v[0:1], v[16:17]
+; GFX7-NEXT: v_mov_b32_e32 v34, 0x7ff80000
+; GFX7-NEXT: v_max_f64 v[20:21], v[6:7], v[22:23]
+; GFX7-NEXT: v_cmp_u_f64_e64 s[6:7], v[6:7], v[22:23]
+; GFX7-NEXT: v_max_f64 v[16:17], v[8:9], v[24:25]
+; GFX7-NEXT: v_cmp_u_f64_e64 s[10:11], v[8:9], v[24:25]
+; GFX7-NEXT: v_max_f64 v[22:23], v[10:11], v[26:27]
+; GFX7-NEXT: v_cmp_u_f64_e64 s[12:13], v[10:11], v[26:27]
+; GFX7-NEXT: v_max_f64 v[24:25], v[12:13], v[28:29]
+; GFX7-NEXT: v_cmp_u_f64_e64 s[14:15], v[12:13], v[28:29]
+; GFX7-NEXT: v_cndmask_b32_e64 v0, v2, 0, s[8:9]
+; GFX7-NEXT: v_cndmask_b32_e64 v1, v3, v34, s[8:9]
+; GFX7-NEXT: v_cndmask_b32_e64 v2, v32, 0, vcc
+; GFX7-NEXT: v_cndmask_b32_e32 v3, v33, v34, vcc
+; GFX7-NEXT: v_cndmask_b32_e64 v4, v18, 0, s[4:5]
+; GFX7-NEXT: v_cndmask_b32_e64 v5, v19, v34, s[4:5]
+; GFX7-NEXT: v_cndmask_b32_e64 v6, v20, 0, s[6:7]
+; GFX7-NEXT: v_cndmask_b32_e64 v7, v21, v34, s[6:7]
+; GFX7-NEXT: v_cndmask_b32_e64 v8, v16, 0, s[10:11]
+; GFX7-NEXT: v_cndmask_b32_e64 v9, v17, v34, s[10:11]
+; GFX7-NEXT: v_cndmask_b32_e64 v10, v22, 0, s[12:13]
+; GFX7-NEXT: v_cndmask_b32_e64 v11, v23, v34, s[12:13]
+; GFX7-NEXT: v_cndmask_b32_e64 v12, v24, 0, s[14:15]
+; GFX7-NEXT: v_cndmask_b32_e64 v13, v25, v34, s[14:15]
; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_cmp_o_f64_e64 s[4:5], v[14:15], v[30:31]
-; GFX7-NEXT: v_cndmask_b32_e64 v16, v29, v13, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v17, v32, v16, s[12:13]
-; GFX7-NEXT: v_cndmask_b32_e64 v16, v28, v12, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e32 v10, v18, v10, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v11, v19, v11, vcc
-; GFX7-NEXT: v_cmp_gt_f64_e32 vcc, v[14:15], v[30:31]
-; GFX7-NEXT: v_cndmask_b32_e64 v10, v10, v26, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v11, v11, v27, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v10, v18, v10, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v11, v19, v11, s[8:9]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[6:7], v[12:13], 64
-; GFX7-NEXT: v_cndmask_b32_e64 v16, 0, v16, s[12:13]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[8:9], v[28:29], 64
-; GFX7-NEXT: v_cndmask_b32_e32 v18, v31, v15, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v19, v32, v18, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e32 v18, v30, v14, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[14:15], 64
-; GFX7-NEXT: v_cndmask_b32_e64 v18, 0, v18, s[4:5]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[30:31], 64
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[10:11], 0, v[16:17]
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[18:19]
-; GFX7-NEXT: v_cndmask_b32_e64 v12, v16, v12, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v13, v17, v13, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v12, v12, v28, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e32 v14, v18, v14, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v15, v19, v15, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v14, v14, v30, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v13, v13, v29, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v15, v15, v31, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v12, v16, v12, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v13, v17, v13, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v14, v18, v14, s[12:13]
-; GFX7-NEXT: v_cndmask_b32_e64 v15, v19, v15, s[12:13]
+; GFX7-NEXT: v_max_f64 v[18:19], v[14:15], v[30:31]
+; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[14:15], v[30:31]
+; GFX7-NEXT: v_cndmask_b32_e64 v14, v18, 0, vcc
+; GFX7-NEXT: v_cndmask_b32_e32 v15, v19, v34, vcc
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_maximum_v8f64:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[16:17]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[4:5], v[0:1], v[16:17]
-; GFX8-NEXT: v_mov_b32_e32 v32, 0x7ff80000
-; GFX8-NEXT: v_cmp_o_f64_e64 s[6:7], v[4:5], v[20:21]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[8:9], v[8:9], v[24:25]
-; GFX8-NEXT: v_cmp_gt_f64_e64 s[10:11], v[12:13], v[28:29]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[12:13], v[12:13], v[28:29]
-; GFX8-NEXT: v_cndmask_b32_e32 v31, v17, v1, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v34, v32, v31, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e32 v31, v16, v0, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 64
-; GFX8-NEXT: v_cndmask_b32_e64 v33, 0, v31, s[4:5]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[4:5], v[2:3], v[18:19]
; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v33, v0, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v34, v1, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[16:17], 64
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v16, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v17, vcc
-; GFX8-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[33:34]
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v33, v0, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v34, v1, vcc
-; GFX8-NEXT: v_cmp_gt_f64_e32 vcc, v[2:3], v[18:19]
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v19, v3, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v17, v32, v16, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v18, v2, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[2:3], 64
-; GFX8-NEXT: v_cndmask_b32_e64 v16, 0, v16, s[4:5]
-; GFX8-NEXT: v_cmp_gt_f64_e64 s[4:5], v[4:5], v[20:21]
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v16, v2, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v17, v3, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[18:19], 64
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v18, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v19, vcc
-; GFX8-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[16:17]
-; GFX8-NEXT: v_cndmask_b32_e64 v18, v21, v5, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v16, v2, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v17, v3, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[4:5], 64
-; GFX8-NEXT: v_cndmask_b32_e64 v16, v20, v4, s[4:5]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[20:21], 64
-; GFX8-NEXT: v_cndmask_b32_e64 v17, v32, v18, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v16, 0, v16, s[6:7]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[6:7], v[6:7], v[22:23]
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v16, v4, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v5, v17, v5, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v4, v4, v20, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v5, v5, v21, s[4:5]
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[4:5], 0, v[16:17]
-; GFX8-NEXT: v_cmp_gt_f64_e32 vcc, v[6:7], v[22:23]
-; GFX8-NEXT: v_cndmask_b32_e64 v4, v16, v4, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v5, v17, v5, s[4:5]
-; GFX8-NEXT: v_cmp_gt_f64_e64 s[4:5], v[8:9], v[24:25]
-; GFX8-NEXT: v_cndmask_b32_e32 v18, v23, v7, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v22, v6, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[6:7], 64
-; GFX8-NEXT: v_cndmask_b32_e64 v19, v32, v18, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v18, 0, v16, s[6:7]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[6:7], v[22:23], 64
-; GFX8-NEXT: v_cndmask_b32_e64 v16, v25, v9, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v17, v32, v16, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v16, v24, v8, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e32 v6, v18, v6, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v7, v19, v7, vcc
-; GFX8-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[18:19]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[8:9], 64
-; GFX8-NEXT: v_cndmask_b32_e64 v6, v6, v22, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v7, v7, v23, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v16, 0, v16, s[8:9]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[6:7], v[24:25], 64
-; GFX8-NEXT: v_cmp_o_f64_e64 s[8:9], v[10:11], v[26:27]
-; GFX8-NEXT: v_cndmask_b32_e32 v6, v18, v6, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v8, v16, v8, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e32 v7, v19, v7, vcc
-; GFX8-NEXT: v_cmp_gt_f64_e32 vcc, v[10:11], v[26:27]
-; GFX8-NEXT: v_cndmask_b32_e64 v9, v17, v9, s[4:5]
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[4:5], 0, v[16:17]
-; GFX8-NEXT: v_cndmask_b32_e64 v8, v8, v24, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v9, v9, v25, s[6:7]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[6:7], v[26:27], 64
-; GFX8-NEXT: v_cndmask_b32_e32 v18, v27, v11, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v19, v32, v18, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v8, v16, v8, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v26, v10, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[10:11], 64
-; GFX8-NEXT: v_cndmask_b32_e64 v18, 0, v16, s[8:9]
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[8:9], 0, v[18:19]
-; GFX8-NEXT: v_cndmask_b32_e64 v9, v17, v9, s[4:5]
+; GFX8-NEXT: v_max_f64 v[32:33], v[2:3], v[18:19]
+; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[18:19]
+; GFX8-NEXT: v_max_f64 v[18:19], v[4:5], v[20:21]
+; GFX8-NEXT: v_cmp_u_f64_e64 s[4:5], v[4:5], v[20:21]
+; GFX8-NEXT: v_max_f64 v[2:3], v[0:1], v[16:17]
+; GFX8-NEXT: v_cmp_u_f64_e64 s[8:9], v[0:1], v[16:17]
+; GFX8-NEXT: v_mov_b32_e32 v34, 0x7ff80000
+; GFX8-NEXT: v_max_f64 v[20:21], v[6:7], v[22:23]
+; GFX8-NEXT: v_cmp_u_f64_e64 s[6:7], v[6:7], v[22:23]
+; GFX8-NEXT: v_max_f64 v[16:17], v[8:9], v[24:25]
+; GFX8-NEXT: v_cmp_u_f64_e64 s[10:11], v[8:9], v[24:25]
+; GFX8-NEXT: v_max_f64 v[22:23], v[10:11], v[26:27]
+; GFX8-NEXT: v_cmp_u_f64_e64 s[12:13], v[10:11], v[26:27]
+; GFX8-NEXT: v_max_f64 v[24:25], v[12:13], v[28:29]
+; GFX8-NEXT: v_cmp_u_f64_e64 s[14:15], v[12:13], v[28:29]
+; GFX8-NEXT: v_cndmask_b32_e64 v0, v2, 0, s[8:9]
+; GFX8-NEXT: v_cndmask_b32_e64 v1, v3, v34, s[8:9]
+; GFX8-NEXT: v_cndmask_b32_e64 v2, v32, 0, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v3, v33, v34, vcc
+; GFX8-NEXT: v_cndmask_b32_e64 v4, v18, 0, s[4:5]
+; GFX8-NEXT: v_cndmask_b32_e64 v5, v19, v34, s[4:5]
+; GFX8-NEXT: v_cndmask_b32_e64 v6, v20, 0, s[6:7]
+; GFX8-NEXT: v_cndmask_b32_e64 v7, v21, v34, s[6:7]
+; GFX8-NEXT: v_cndmask_b32_e64 v8, v16, 0, s[10:11]
+; GFX8-NEXT: v_cndmask_b32_e64 v9, v17, v34, s[10:11]
+; GFX8-NEXT: v_cndmask_b32_e64 v10, v22, 0, s[12:13]
+; GFX8-NEXT: v_cndmask_b32_e64 v11, v23, v34, s[12:13]
+; GFX8-NEXT: v_cndmask_b32_e64 v12, v24, 0, s[14:15]
+; GFX8-NEXT: v_cndmask_b32_e64 v13, v25, v34, s[14:15]
; GFX8-NEXT: s_waitcnt vmcnt(0)
-; GFX8-NEXT: v_cmp_o_f64_e64 s[4:5], v[14:15], v[30:31]
-; GFX8-NEXT: v_cndmask_b32_e64 v16, v29, v13, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v17, v32, v16, s[12:13]
-; GFX8-NEXT: v_cndmask_b32_e64 v16, v28, v12, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e32 v10, v18, v10, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v11, v19, v11, vcc
-; GFX8-NEXT: v_cmp_gt_f64_e32 vcc, v[14:15], v[30:31]
-; GFX8-NEXT: v_cndmask_b32_e64 v10, v10, v26, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v11, v11, v27, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v10, v18, v10, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v11, v19, v11, s[8:9]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[6:7], v[12:13], 64
-; GFX8-NEXT: v_cndmask_b32_e64 v16, 0, v16, s[12:13]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[8:9], v[28:29], 64
-; GFX8-NEXT: v_cndmask_b32_e32 v18, v31, v15, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v19, v32, v18, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e32 v18, v30, v14, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[14:15], 64
-; GFX8-NEXT: v_cndmask_b32_e64 v18, 0, v18, s[4:5]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[30:31], 64
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[10:11], 0, v[16:17]
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[18:19]
-; GFX8-NEXT: v_cndmask_b32_e64 v12, v16, v12, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v13, v17, v13, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v12, v12, v28, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e32 v14, v18, v14, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v15, v19, v15, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v14, v14, v30, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v13, v13, v29, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v15, v15, v31, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v12, v16, v12, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v13, v17, v13, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v14, v18, v14, s[12:13]
-; GFX8-NEXT: v_cndmask_b32_e64 v15, v19, v15, s[12:13]
+; GFX8-NEXT: v_max_f64 v[18:19], v[14:15], v[30:31]
+; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[14:15], v[30:31]
+; GFX8-NEXT: v_cndmask_b32_e64 v14, v18, 0, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v15, v19, v34, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_maximum_v8f64:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[16:17]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[4:5], v[0:1], v[16:17]
-; GFX9-NEXT: v_mov_b32_e32 v32, 0x7ff80000
-; GFX9-NEXT: v_cmp_o_f64_e64 s[6:7], v[4:5], v[20:21]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[8:9], v[8:9], v[24:25]
-; GFX9-NEXT: v_cmp_gt_f64_e64 s[10:11], v[12:13], v[28:29]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[12:13], v[12:13], v[28:29]
-; GFX9-NEXT: v_cndmask_b32_e32 v31, v17, v1, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v34, v32, v31, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e32 v31, v16, v0, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 64
-; GFX9-NEXT: v_cndmask_b32_e64 v33, 0, v31, s[4:5]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[4:5], v[2:3], v[18:19]
; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v33, v0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v34, v1, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[16:17], 64
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v16, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v17, vcc
-; GFX9-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[33:34]
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v33, v0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v34, v1, vcc
-; GFX9-NEXT: v_cmp_gt_f64_e32 vcc, v[2:3], v[18:19]
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v19, v3, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v17, v32, v16, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v18, v2, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[2:3], 64
-; GFX9-NEXT: v_cndmask_b32_e64 v16, 0, v16, s[4:5]
-; GFX9-NEXT: v_cmp_gt_f64_e64 s[4:5], v[4:5], v[20:21]
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v16, v2, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v17, v3, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[18:19], 64
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v18, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v19, vcc
-; GFX9-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[16:17]
-; GFX9-NEXT: v_cndmask_b32_e64 v18, v21, v5, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v16, v2, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v17, v3, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[4:5], 64
-; GFX9-NEXT: v_cndmask_b32_e64 v16, v20, v4, s[4:5]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[20:21], 64
-; GFX9-NEXT: v_cndmask_b32_e64 v17, v32, v18, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v16, 0, v16, s[6:7]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[6:7], v[6:7], v[22:23]
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v16, v4, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v5, v17, v5, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v4, v20, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v5, v21, s[4:5]
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[4:5], 0, v[16:17]
-; GFX9-NEXT: v_cmp_gt_f64_e32 vcc, v[6:7], v[22:23]
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v16, v4, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v17, v5, s[4:5]
-; GFX9-NEXT: v_cmp_gt_f64_e64 s[4:5], v[8:9], v[24:25]
-; GFX9-NEXT: v_cndmask_b32_e32 v18, v23, v7, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v22, v6, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[6:7], 64
-; GFX9-NEXT: v_cndmask_b32_e64 v19, v32, v18, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v18, 0, v16, s[6:7]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[6:7], v[22:23], 64
-; GFX9-NEXT: v_cndmask_b32_e64 v16, v25, v9, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v17, v32, v16, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v16, v24, v8, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v18, v6, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v7, v19, v7, vcc
-; GFX9-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[18:19]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[8:9], 64
-; GFX9-NEXT: v_cndmask_b32_e64 v6, v6, v22, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v7, v7, v23, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v16, 0, v16, s[8:9]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[6:7], v[24:25], 64
-; GFX9-NEXT: v_cmp_o_f64_e64 s[8:9], v[10:11], v[26:27]
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v18, v6, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v8, v16, v8, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e32 v7, v19, v7, vcc
-; GFX9-NEXT: v_cmp_gt_f64_e32 vcc, v[10:11], v[26:27]
-; GFX9-NEXT: v_cndmask_b32_e64 v9, v17, v9, s[4:5]
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[4:5], 0, v[16:17]
-; GFX9-NEXT: v_cndmask_b32_e64 v8, v8, v24, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v9, v9, v25, s[6:7]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[6:7], v[26:27], 64
-; GFX9-NEXT: v_cndmask_b32_e32 v18, v27, v11, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v19, v32, v18, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v8, v16, v8, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v26, v10, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[10:11], 64
-; GFX9-NEXT: v_cndmask_b32_e64 v18, 0, v16, s[8:9]
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[8:9], 0, v[18:19]
-; GFX9-NEXT: v_cndmask_b32_e64 v9, v17, v9, s[4:5]
+; GFX9-NEXT: v_max_f64 v[32:33], v[2:3], v[18:19]
+; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[18:19]
+; GFX9-NEXT: v_max_f64 v[18:19], v[4:5], v[20:21]
+; GFX9-NEXT: v_cmp_u_f64_e64 s[4:5], v[4:5], v[20:21]
+; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], v[16:17]
+; GFX9-NEXT: v_cmp_u_f64_e64 s[8:9], v[0:1], v[16:17]
+; GFX9-NEXT: v_mov_b32_e32 v34, 0x7ff80000
+; GFX9-NEXT: v_max_f64 v[20:21], v[6:7], v[22:23]
+; GFX9-NEXT: v_cmp_u_f64_e64 s[6:7], v[6:7], v[22:23]
+; GFX9-NEXT: v_max_f64 v[16:17], v[8:9], v[24:25]
+; GFX9-NEXT: v_cmp_u_f64_e64 s[10:11], v[8:9], v[24:25]
+; GFX9-NEXT: v_max_f64 v[22:23], v[10:11], v[26:27]
+; GFX9-NEXT: v_cmp_u_f64_e64 s[12:13], v[10:11], v[26:27]
+; GFX9-NEXT: v_max_f64 v[24:25], v[12:13], v[28:29]
+; GFX9-NEXT: v_cmp_u_f64_e64 s[14:15], v[12:13], v[28:29]
+; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, s[8:9]
+; GFX9-NEXT: v_cndmask_b32_e64 v1, v3, v34, s[8:9]
+; GFX9-NEXT: v_cndmask_b32_e64 v2, v32, 0, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v3, v33, v34, vcc
+; GFX9-NEXT: v_cndmask_b32_e64 v4, v18, 0, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_e64 v5, v19, v34, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_e64 v6, v20, 0, s[6:7]
+; GFX9-NEXT: v_cndmask_b32_e64 v7, v21, v34, s[6:7]
+; GFX9-NEXT: v_cndmask_b32_e64 v8, v16, 0, s[10:11]
+; GFX9-NEXT: v_cndmask_b32_e64 v9, v17, v34, s[10:11]
+; GFX9-NEXT: v_cndmask_b32_e64 v10, v22, 0, s[12:13]
+; GFX9-NEXT: v_cndmask_b32_e64 v11, v23, v34, s[12:13]
+; GFX9-NEXT: v_cndmask_b32_e64 v12, v24, 0, s[14:15]
+; GFX9-NEXT: v_cndmask_b32_e64 v13, v25, v34, s[14:15]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_cmp_o_f64_e64 s[4:5], v[14:15], v[30:31]
-; GFX9-NEXT: v_cndmask_b32_e64 v16, v29, v13, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v17, v32, v16, s[12:13]
-; GFX9-NEXT: v_cndmask_b32_e64 v16, v28, v12, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e32 v10, v18, v10, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v11, v19, v11, vcc
-; GFX9-NEXT: v_cmp_gt_f64_e32 vcc, v[14:15], v[30:31]
-; GFX9-NEXT: v_cndmask_b32_e64 v10, v10, v26, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v11, v11, v27, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v10, v18, v10, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v11, v19, v11, s[8:9]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[6:7], v[12:13], 64
-; GFX9-NEXT: v_cndmask_b32_e64 v16, 0, v16, s[12:13]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[8:9], v[28:29], 64
-; GFX9-NEXT: v_cndmask_b32_e32 v18, v31, v15, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v19, v32, v18, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e32 v18, v30, v14, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[14:15], 64
-; GFX9-NEXT: v_cndmask_b32_e64 v18, 0, v18, s[4:5]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[30:31], 64
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[10:11], 0, v[16:17]
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[18:19]
-; GFX9-NEXT: v_cndmask_b32_e64 v12, v16, v12, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v13, v17, v13, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v12, v12, v28, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e32 v14, v18, v14, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v15, v19, v15, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v14, v14, v30, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v13, v13, v29, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v15, v15, v31, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v12, v16, v12, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v13, v17, v13, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v14, v18, v14, s[12:13]
-; GFX9-NEXT: v_cndmask_b32_e64 v15, v19, v15, s[12:13]
+; GFX9-NEXT: v_max_f64 v[18:19], v[14:15], v[30:31]
+; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[14:15], v[30:31]
+; GFX9-NEXT: v_cndmask_b32_e64 v14, v18, 0, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v15, v19, v34, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_maximum_v8f64:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX940-NEXT: scratch_load_dword v31, off, s32
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[16:17]
-; GFX940-NEXT: v_mov_b32_e32 v32, 0x7ff80000
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[0:1], v[16:17]
-; GFX940-NEXT: v_cndmask_b32_e32 v33, v17, v1, vcc
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v35, v32, v33, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v33, v16, v0, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v34, 0, v33, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 64
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[16:17], 64
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[34:35]
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v34, v0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v35, v1, vcc
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[2:3], v[18:19]
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v0, v16, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v1, v1, v17, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v19, v3, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[2:3], v[18:19]
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v34, v0, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v1, v35, v1, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v17, v32, v16, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v18, v2, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v16, 0, v16, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[2:3], 64
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[18:19], 64
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[16:17]
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v16, v2, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v2, v2, v18, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v17, v3, vcc
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[4:5], v[20:21]
-; GFX940-NEXT: v_cndmask_b32_e64 v2, v16, v2, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v3, v3, v19, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v21, v5, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[4:5], v[20:21]
-; GFX940-NEXT: v_cndmask_b32_e64 v3, v17, v3, s[2:3]
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v17, v32, v16, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v20, v4, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v16, 0, v16, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[4:5], 64
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[20:21], 64
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[16:17]
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v16, v4, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v4, v4, v20, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v17, v5, vcc
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[6:7], v[22:23]
-; GFX940-NEXT: v_cndmask_b32_e64 v4, v16, v4, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v5, v5, v21, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v23, v7, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[6:7], v[22:23]
-; GFX940-NEXT: v_cndmask_b32_e64 v5, v17, v5, s[2:3]
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v17, v32, v16, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v22, v6, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v16, 0, v16, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[6:7], 64
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[22:23], 64
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[16:17]
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v16, v6, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v6, v6, v22, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v7, v17, v7, vcc
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[8:9], v[24:25]
-; GFX940-NEXT: v_cndmask_b32_e64 v6, v16, v6, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v7, v7, v23, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v25, v9, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[8:9], v[24:25]
-; GFX940-NEXT: v_cndmask_b32_e64 v7, v17, v7, s[2:3]
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v17, v32, v16, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v24, v8, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v16, 0, v16, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[8:9], 64
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[24:25], 64
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[16:17]
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v16, v8, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v8, v8, v24, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v9, v17, v9, vcc
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[10:11], v[26:27]
-; GFX940-NEXT: v_cndmask_b32_e64 v8, v16, v8, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v9, v9, v25, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v27, v11, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[10:11], v[26:27]
-; GFX940-NEXT: v_cndmask_b32_e64 v9, v17, v9, s[2:3]
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v17, v32, v16, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v26, v10, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v16, 0, v16, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[10:11], 64
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[26:27], 64
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[16:17]
-; GFX940-NEXT: v_cndmask_b32_e32 v10, v16, v10, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v10, v10, v26, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v11, v17, v11, vcc
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[12:13], v[28:29]
-; GFX940-NEXT: v_cndmask_b32_e64 v10, v16, v10, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v11, v11, v27, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v29, v13, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[12:13], v[28:29]
-; GFX940-NEXT: v_cndmask_b32_e64 v11, v17, v11, s[2:3]
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v17, v32, v16, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v28, v12, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v16, 0, v16, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[12:13], 64
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[28:29], 64
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[16:17]
-; GFX940-NEXT: v_cndmask_b32_e32 v12, v16, v12, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v12, v12, v28, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v13, v17, v13, vcc
+; GFX940-NEXT: v_mov_b32_e32 v54, 0x7ff80000
+; GFX940-NEXT: v_max_f64 v[32:33], v[0:1], v[16:17]
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[16:17]
+; GFX940-NEXT: v_max_f64 v[34:35], v[2:3], v[18:19]
+; GFX940-NEXT: v_max_f64 v[36:37], v[4:5], v[20:21]
+; GFX940-NEXT: v_cndmask_b32_e64 v0, v32, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v33, v54, vcc
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[18:19]
+; GFX940-NEXT: v_max_f64 v[38:39], v[6:7], v[22:23]
+; GFX940-NEXT: v_max_f64 v[48:49], v[8:9], v[24:25]
+; GFX940-NEXT: v_cndmask_b32_e64 v2, v34, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v3, v35, v54, vcc
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[20:21]
+; GFX940-NEXT: v_max_f64 v[50:51], v[10:11], v[26:27]
+; GFX940-NEXT: v_max_f64 v[52:53], v[12:13], v[28:29]
+; GFX940-NEXT: v_cndmask_b32_e64 v4, v36, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v5, v37, v54, vcc
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[6:7], v[22:23]
; GFX940-NEXT: s_waitcnt vmcnt(0)
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[14:15], v[30:31]
-; GFX940-NEXT: v_cndmask_b32_e64 v12, v16, v12, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v13, v13, v29, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v31, v15, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[14:15], v[30:31]
-; GFX940-NEXT: v_cndmask_b32_e64 v13, v17, v13, s[2:3]
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v17, v32, v16, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v30, v14, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v16, 0, v16, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[14:15], 64
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[30:31], 64
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[16:17]
-; GFX940-NEXT: v_cndmask_b32_e32 v14, v16, v14, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v15, v17, v15, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v14, v14, v30, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v15, v15, v31, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v14, v16, v14, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v15, v17, v15, s[2:3]
+; GFX940-NEXT: v_max_f64 v[16:17], v[14:15], v[30:31]
+; GFX940-NEXT: v_cndmask_b32_e64 v6, v38, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v7, v39, v54, vcc
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[8:9], v[24:25]
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e64 v8, v48, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v9, v49, v54, vcc
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[10:11], v[26:27]
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e64 v10, v50, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v11, v51, v54, vcc
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[12:13], v[28:29]
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e64 v12, v52, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v13, v53, v54, vcc
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[14:15], v[30:31]
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e64 v14, v16, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v15, v17, v54, vcc
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v8f64:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: buffer_load_dword v31, off, s[0:3], s32
-; GFX10-NEXT: v_cmp_gt_f64_e32 vcc_lo, v[0:1], v[16:17]
-; GFX10-NEXT: v_cmp_o_f64_e64 s4, v[0:1], v[16:17]
-; GFX10-NEXT: v_cmp_gt_f64_e64 s9, v[6:7], v[22:23]
-; GFX10-NEXT: v_cmp_gt_f64_e64 s10, v[8:9], v[24:25]
-; GFX10-NEXT: v_cmp_gt_f64_e64 s11, v[10:11], v[26:27]
-; GFX10-NEXT: v_cmp_gt_f64_e64 s12, v[12:13], v[28:29]
-; GFX10-NEXT: v_cmp_o_f64_e64 s13, v[6:7], v[22:23]
-; GFX10-NEXT: v_cmp_o_f64_e64 s14, v[8:9], v[24:25]
-; GFX10-NEXT: v_cmp_o_f64_e64 s15, v[10:11], v[26:27]
-; GFX10-NEXT: v_cmp_o_f64_e64 s16, v[12:13], v[28:29]
-; GFX10-NEXT: v_cmp_gt_f64_e64 s5, v[2:3], v[18:19]
-; GFX10-NEXT: v_cmp_o_f64_e64 s6, v[2:3], v[18:19]
-; GFX10-NEXT: v_cmp_gt_f64_e64 s7, v[4:5], v[20:21]
-; GFX10-NEXT: v_cmp_o_f64_e64 s8, v[4:5], v[20:21]
-; GFX10-NEXT: v_cmp_class_f64_e64 s17, v[26:27], 64
-; GFX10-NEXT: v_cmp_class_f64_e64 s18, v[28:29], 64
-; GFX10-NEXT: v_cndmask_b32_e32 v32, v17, v1, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v38, v23, v7, s9
-; GFX10-NEXT: v_cndmask_b32_e64 v48, v25, v9, s10
-; GFX10-NEXT: v_cndmask_b32_e64 v50, v27, v11, s11
-; GFX10-NEXT: v_cndmask_b32_e64 v33, 0x7ff80000, v32, s4
-; GFX10-NEXT: v_cndmask_b32_e32 v32, v16, v0, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v52, v29, v13, s12
-; GFX10-NEXT: v_cmp_class_f64_e64 vcc_lo, v[0:1], 64
-; GFX10-NEXT: v_cndmask_b32_e64 v39, 0x7ff80000, v38, s13
-; GFX10-NEXT: v_cndmask_b32_e64 v49, 0x7ff80000, v48, s14
-; GFX10-NEXT: v_cndmask_b32_e64 v32, 0, v32, s4
-; GFX10-NEXT: v_cmp_class_f64_e64 s4, v[2:3], 64
-; GFX10-NEXT: v_cndmask_b32_e64 v51, 0x7ff80000, v50, s15
-; GFX10-NEXT: v_cndmask_b32_e64 v53, 0x7ff80000, v52, s16
-; GFX10-NEXT: v_cndmask_b32_e64 v38, v22, v6, s9
-; GFX10-NEXT: v_cndmask_b32_e64 v48, v24, v8, s10
-; GFX10-NEXT: v_cndmask_b32_e64 v50, v26, v10, s11
-; GFX10-NEXT: v_cndmask_b32_e64 v52, v28, v12, s12
-; GFX10-NEXT: v_cmp_class_f64_e64 s11, v[16:17], 64
-; GFX10-NEXT: v_cmp_class_f64_e64 s12, v[18:19], 64
-; GFX10-NEXT: v_cndmask_b32_e64 v34, v19, v3, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v38, 0, v38, s13
-; GFX10-NEXT: v_cndmask_b32_e64 v36, v21, v5, s7
-; GFX10-NEXT: v_cmp_class_f64_e64 s9, v[12:13], 64
-; GFX10-NEXT: v_cndmask_b32_e64 v48, 0, v48, s14
-; GFX10-NEXT: v_cndmask_b32_e64 v35, 0x7ff80000, v34, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v34, v18, v2, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v37, 0x7ff80000, v36, s8
-; GFX10-NEXT: v_cndmask_b32_e64 v36, v20, v4, s7
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v32, v0, vcc_lo
-; GFX10-NEXT: v_cmp_class_f64_e64 s5, v[4:5], 64
-; GFX10-NEXT: v_cndmask_b32_e64 v34, 0, v34, s6
-; GFX10-NEXT: v_cmp_class_f64_e64 s6, v[6:7], 64
-; GFX10-NEXT: v_cndmask_b32_e64 v36, 0, v36, s8
-; GFX10-NEXT: v_cmp_class_f64_e64 s7, v[8:9], 64
-; GFX10-NEXT: v_cmp_class_f64_e64 s8, v[10:11], 64
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v34, v2, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v50, 0, v50, s15
-; GFX10-NEXT: v_cndmask_b32_e64 v52, 0, v52, s16
-; GFX10-NEXT: v_cmp_class_f64_e64 s14, v[20:21], 64
-; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v16, s11
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, v18, s12
-; GFX10-NEXT: v_cmp_class_f64_e64 s15, v[22:23], 64
-; GFX10-NEXT: v_cmp_class_f64_e64 s16, v[24:25], 64
-; GFX10-NEXT: v_cmp_eq_f64_e64 s19, 0, v[32:33]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s20, 0, v[34:35]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s21, 0, v[36:37]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s22, 0, v[48:49]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s23, 0, v[50:51]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s24, 0, v[52:53]
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v33, v1, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v12, v52, v12, s9
-; GFX10-NEXT: v_cndmask_b32_e64 v4, v36, v4, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v35, v3, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v6, v38, v6, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v5, v37, v5, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v8, v48, v8, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v10, v50, v10, s8
-; GFX10-NEXT: v_cndmask_b32_e64 v7, v39, v7, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v9, v49, v9, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v11, v51, v11, s8
-; GFX10-NEXT: v_cndmask_b32_e64 v13, v53, v13, s9
-; GFX10-NEXT: v_cndmask_b32_e64 v4, v4, v20, s14
-; GFX10-NEXT: v_cndmask_b32_e64 v10, v10, v26, s17
-; GFX10-NEXT: v_cndmask_b32_e64 v6, v6, v22, s15
-; GFX10-NEXT: v_cndmask_b32_e64 v8, v8, v24, s16
-; GFX10-NEXT: v_cndmask_b32_e64 v12, v12, v28, s18
-; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, v17, s11
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, v19, s12
-; GFX10-NEXT: v_cndmask_b32_e64 v5, v5, v21, s14
-; GFX10-NEXT: v_cndmask_b32_e64 v7, v7, v23, s15
-; GFX10-NEXT: v_cndmask_b32_e64 v9, v9, v25, s16
-; GFX10-NEXT: v_cndmask_b32_e64 v11, v11, v27, s17
-; GFX10-NEXT: v_cndmask_b32_e64 v13, v13, v29, s18
-; GFX10-NEXT: v_cndmask_b32_e64 v0, v32, v0, s19
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v34, v2, s20
-; GFX10-NEXT: v_cndmask_b32_e64 v4, v36, v4, s21
-; GFX10-NEXT: v_cndmask_b32_e64 v8, v48, v8, s22
-; GFX10-NEXT: v_cndmask_b32_e64 v10, v50, v10, s23
-; GFX10-NEXT: v_cndmask_b32_e64 v12, v52, v12, s24
-; GFX10-NEXT: v_cndmask_b32_e64 v1, v33, v1, s19
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v35, v3, s20
-; GFX10-NEXT: v_cndmask_b32_e64 v5, v37, v5, s21
-; GFX10-NEXT: v_cndmask_b32_e64 v9, v49, v9, s22
-; GFX10-NEXT: v_cndmask_b32_e64 v11, v51, v11, s23
-; GFX10-NEXT: v_cndmask_b32_e64 v13, v53, v13, s24
+; GFX10-NEXT: v_max_f64 v[32:33], v[0:1], v[16:17]
+; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[16:17]
+; GFX10-NEXT: v_max_f64 v[16:17], v[2:3], v[18:19]
+; GFX10-NEXT: v_cmp_u_f64_e64 s4, v[2:3], v[18:19]
+; GFX10-NEXT: v_max_f64 v[18:19], v[4:5], v[20:21]
+; GFX10-NEXT: v_cmp_u_f64_e64 s5, v[4:5], v[20:21]
+; GFX10-NEXT: v_max_f64 v[20:21], v[6:7], v[22:23]
+; GFX10-NEXT: v_cmp_u_f64_e64 s6, v[6:7], v[22:23]
+; GFX10-NEXT: v_max_f64 v[22:23], v[8:9], v[24:25]
+; GFX10-NEXT: v_cmp_u_f64_e64 s7, v[8:9], v[24:25]
+; GFX10-NEXT: v_max_f64 v[24:25], v[10:11], v[26:27]
+; GFX10-NEXT: v_cmp_u_f64_e64 s8, v[10:11], v[26:27]
+; GFX10-NEXT: v_max_f64 v[26:27], v[12:13], v[28:29]
+; GFX10-NEXT: v_cmp_u_f64_e64 s9, v[12:13], v[28:29]
+; GFX10-NEXT: v_cndmask_b32_e64 v0, v32, 0, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e64 v1, v33, 0x7ff80000, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e64 v2, v16, 0, s4
+; GFX10-NEXT: v_cndmask_b32_e64 v3, v17, 0x7ff80000, s4
+; GFX10-NEXT: v_cndmask_b32_e64 v4, v18, 0, s5
+; GFX10-NEXT: v_cndmask_b32_e64 v5, v19, 0x7ff80000, s5
+; GFX10-NEXT: v_cndmask_b32_e64 v6, v20, 0, s6
+; GFX10-NEXT: v_cndmask_b32_e64 v7, v21, 0x7ff80000, s6
+; GFX10-NEXT: v_cndmask_b32_e64 v8, v22, 0, s7
+; GFX10-NEXT: v_cndmask_b32_e64 v9, v23, 0x7ff80000, s7
+; GFX10-NEXT: v_cndmask_b32_e64 v10, v24, 0, s8
+; GFX10-NEXT: v_cndmask_b32_e64 v11, v25, 0x7ff80000, s8
+; GFX10-NEXT: v_cndmask_b32_e64 v12, v26, 0, s9
+; GFX10-NEXT: v_cndmask_b32_e64 v13, v27, 0x7ff80000, s9
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: v_cmp_gt_f64_e64 s10, v[14:15], v[30:31]
-; GFX10-NEXT: v_cmp_o_f64_e64 s13, v[14:15], v[30:31]
-; GFX10-NEXT: v_cmp_class_f64_e64 s25, v[30:31], 64
-; GFX10-NEXT: v_cndmask_b32_e64 v16, v31, v15, s10
-; GFX10-NEXT: v_cndmask_b32_e64 v18, v30, v14, s10
-; GFX10-NEXT: v_cmp_eq_f64_e64 s10, 0, v[38:39]
-; GFX10-NEXT: v_cndmask_b32_e64 v55, 0x7ff80000, v16, s13
-; GFX10-NEXT: v_cndmask_b32_e64 v54, 0, v18, s13
-; GFX10-NEXT: v_cmp_class_f64_e64 s13, v[14:15], 64
-; GFX10-NEXT: v_cmp_eq_f64_e32 vcc_lo, 0, v[54:55]
-; GFX10-NEXT: v_cndmask_b32_e64 v6, v38, v6, s10
-; GFX10-NEXT: v_cndmask_b32_e64 v7, v39, v7, s10
-; GFX10-NEXT: v_cndmask_b32_e64 v14, v54, v14, s13
-; GFX10-NEXT: v_cndmask_b32_e64 v15, v55, v15, s13
-; GFX10-NEXT: v_cndmask_b32_e64 v14, v14, v30, s25
-; GFX10-NEXT: v_cndmask_b32_e64 v15, v15, v31, s25
-; GFX10-NEXT: v_cndmask_b32_e32 v14, v54, v14, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e32 v15, v55, v15, vcc_lo
+; GFX10-NEXT: v_max_f64 v[28:29], v[14:15], v[30:31]
+; GFX10-NEXT: v_cmp_u_f64_e64 s10, v[14:15], v[30:31]
+; GFX10-NEXT: v_cndmask_b32_e64 v14, v28, 0, s10
+; GFX10-NEXT: v_cndmask_b32_e64 v15, v29, 0x7ff80000, s10
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_maximum_v8f64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: scratch_load_b32 v31, off, s32
-; GFX11-NEXT: v_cmp_gt_f64_e64 s4, v[6:7], v[22:23]
-; GFX11-NEXT: v_cmp_o_f64_e64 s9, v[6:7], v[22:23]
-; GFX11-NEXT: v_cmp_gt_f64_e64 s1, v[2:3], v[18:19]
-; GFX11-NEXT: v_cmp_gt_f64_e64 s6, v[10:11], v[26:27]
-; GFX11-NEXT: v_cmp_gt_f64_e32 vcc_lo, v[0:1], v[16:17]
-; GFX11-NEXT: v_cmp_o_f64_e64 s2, v[2:3], v[18:19]
-; GFX11-NEXT: v_cmp_o_f64_e64 s11, v[10:11], v[26:27]
-; GFX11-NEXT: v_cmp_o_f64_e64 s0, v[0:1], v[16:17]
-; GFX11-NEXT: v_cmp_gt_f64_e64 s3, v[4:5], v[20:21]
-; GFX11-NEXT: v_cmp_gt_f64_e64 s5, v[8:9], v[24:25]
-; GFX11-NEXT: v_cmp_gt_f64_e64 s7, v[12:13], v[28:29]
-; GFX11-NEXT: v_cmp_o_f64_e64 s8, v[4:5], v[20:21]
-; GFX11-NEXT: v_cmp_o_f64_e64 s10, v[8:9], v[24:25]
-; GFX11-NEXT: v_cmp_o_f64_e64 s12, v[12:13], v[28:29]
-; GFX11-NEXT: v_cmp_class_f64_e64 s13, v[18:19], 64
-; GFX11-NEXT: v_cmp_class_f64_e64 s15, v[20:21], 64
-; GFX11-NEXT: v_cndmask_b32_e64 v38, v23, v7, s4
-; GFX11-NEXT: v_cndmask_b32_e64 v34, v19, v3, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v50, v27, v11, s6
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3)
-; GFX11-NEXT: v_cndmask_b32_e64 v39, 0x7ff80000, v38, s9
-; GFX11-NEXT: v_cndmask_b32_e64 v38, v22, v6, s4
-; GFX11-NEXT: v_cmp_class_f64_e64 s4, v[6:7], 64
-; GFX11-NEXT: v_cndmask_b32_e64 v35, 0x7ff80000, v34, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v51, 0x7ff80000, v50, s11
-; GFX11-NEXT: v_cndmask_b32_e64 v34, v18, v2, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v50, v26, v10, s6
-; GFX11-NEXT: v_cmp_class_f64_e64 s1, v[0:1], 64
-; GFX11-NEXT: v_cndmask_b32_e64 v36, v21, v5, s3
-; GFX11-NEXT: v_cndmask_b32_e64 v48, v25, v9, s5
-; GFX11-NEXT: v_cndmask_b32_e64 v52, v29, v13, s7
-; GFX11-NEXT: v_cndmask_b32_e64 v50, 0, v50, s11
-; GFX11-NEXT: v_cmp_class_f64_e64 s11, v[16:17], 64
-; GFX11-NEXT: v_cndmask_b32_e64 v37, 0x7ff80000, v36, s8
-; GFX11-NEXT: v_cndmask_b32_e64 v49, 0x7ff80000, v48, s10
-; GFX11-NEXT: v_cndmask_b32_e64 v53, 0x7ff80000, v52, s12
-; GFX11-NEXT: v_cndmask_b32_e64 v36, v20, v4, s3
-; GFX11-NEXT: v_cndmask_b32_e64 v48, v24, v8, s5
-; GFX11-NEXT: v_cndmask_b32_e64 v52, v28, v12, s7
-; GFX11-NEXT: v_cndmask_b32_e64 v34, 0, v34, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v38, 0, v38, s9
-; GFX11-NEXT: v_cmp_class_f64_e64 s2, v[2:3], 64
-; GFX11-NEXT: v_cmp_class_f64_e64 s3, v[4:5], 64
-; GFX11-NEXT: v_cmp_class_f64_e64 s5, v[8:9], 64
-; GFX11-NEXT: v_cmp_class_f64_e64 s7, v[10:11], 64
-; GFX11-NEXT: v_cmp_class_f64_e64 s9, v[12:13], 64
-; GFX11-NEXT: v_cndmask_b32_e64 v36, 0, v36, s8
-; GFX11-NEXT: v_cndmask_b32_e64 v48, 0, v48, s10
-; GFX11-NEXT: v_cndmask_b32_e64 v52, 0, v52, s12
-; GFX11-NEXT: v_cmp_class_f64_e64 s6, v[24:25], 64
-; GFX11-NEXT: v_cmp_class_f64_e64 s8, v[26:27], 64
-; GFX11-NEXT: v_cmp_class_f64_e64 s10, v[28:29], 64
-; GFX11-NEXT: v_cmp_eq_f64_e64 s14, 0, v[34:35]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s16, 0, v[36:37]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s17, 0, v[38:39]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s18, 0, v[48:49]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s20, 0, v[50:51]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s21, 0, v[52:53]
-; GFX11-NEXT: v_cndmask_b32_e64 v7, v39, v7, s4
-; GFX11-NEXT: v_cndmask_b32_e32 v32, v17, v1, vcc_lo
-; GFX11-NEXT: v_cndmask_b32_e64 v6, v38, v6, s4
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_cndmask_b32_e64 v33, 0x7ff80000, v32, s0
-; GFX11-NEXT: v_cndmask_b32_e32 v32, v16, v0, vcc_lo
-; GFX11-NEXT: v_cndmask_b32_e64 v1, v33, v1, s1
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_cndmask_b32_e64 v32, 0, v32, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, v17, s11
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT: v_cndmask_b32_e64 v0, v32, v0, s1
-; GFX11-NEXT: v_cmp_eq_f64_e64 s12, 0, v[32:33]
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v34, v2, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v4, v36, v4, s3
-; GFX11-NEXT: v_cndmask_b32_e64 v8, v48, v8, s5
-; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, v16, s11
-; GFX11-NEXT: v_cndmask_b32_e64 v10, v50, v10, s7
-; GFX11-NEXT: v_cndmask_b32_e64 v12, v52, v12, s9
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v35, v3, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v5, v37, v5, s3
-; GFX11-NEXT: v_cndmask_b32_e64 v9, v49, v9, s5
-; GFX11-NEXT: v_cndmask_b32_e64 v11, v51, v11, s7
-; GFX11-NEXT: v_cndmask_b32_e64 v13, v53, v13, s9
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, v18, s13
-; GFX11-NEXT: v_cndmask_b32_e64 v4, v4, v20, s15
-; GFX11-NEXT: v_cndmask_b32_e64 v8, v8, v24, s6
-; GFX11-NEXT: v_cndmask_b32_e64 v10, v10, v26, s8
-; GFX11-NEXT: v_cndmask_b32_e64 v12, v12, v28, s10
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v3, v19, s13
-; GFX11-NEXT: v_cndmask_b32_e64 v5, v5, v21, s15
-; GFX11-NEXT: v_cndmask_b32_e64 v9, v9, v25, s6
-; GFX11-NEXT: v_cndmask_b32_e64 v11, v11, v27, s8
-; GFX11-NEXT: v_cndmask_b32_e64 v13, v13, v29, s10
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v34, v2, s14
-; GFX11-NEXT: v_cndmask_b32_e64 v4, v36, v4, s16
-; GFX11-NEXT: v_cndmask_b32_e64 v8, v48, v8, s18
-; GFX11-NEXT: v_cndmask_b32_e64 v10, v50, v10, s20
-; GFX11-NEXT: v_cndmask_b32_e64 v12, v52, v12, s21
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v35, v3, s14
-; GFX11-NEXT: v_cndmask_b32_e64 v5, v37, v5, s16
-; GFX11-NEXT: v_cndmask_b32_e64 v9, v49, v9, s18
-; GFX11-NEXT: v_cndmask_b32_e64 v11, v51, v11, s20
-; GFX11-NEXT: v_cndmask_b32_e64 v13, v53, v13, s21
-; GFX11-NEXT: v_cndmask_b32_e64 v0, v32, v0, s12
-; GFX11-NEXT: v_cndmask_b32_e64 v1, v33, v1, s12
+; GFX11-NEXT: v_max_f64 v[32:33], v[0:1], v[16:17]
+; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[16:17]
+; GFX11-NEXT: v_max_f64 v[16:17], v[2:3], v[18:19]
+; GFX11-NEXT: v_cmp_u_f64_e64 s0, v[2:3], v[18:19]
+; GFX11-NEXT: v_max_f64 v[18:19], v[4:5], v[20:21]
+; GFX11-NEXT: v_cmp_u_f64_e64 s1, v[4:5], v[20:21]
+; GFX11-NEXT: v_max_f64 v[20:21], v[6:7], v[22:23]
+; GFX11-NEXT: v_cmp_u_f64_e64 s2, v[6:7], v[22:23]
+; GFX11-NEXT: v_max_f64 v[22:23], v[8:9], v[24:25]
+; GFX11-NEXT: v_cmp_u_f64_e64 s3, v[8:9], v[24:25]
+; GFX11-NEXT: v_max_f64 v[24:25], v[10:11], v[26:27]
+; GFX11-NEXT: v_cmp_u_f64_e64 s4, v[10:11], v[26:27]
+; GFX11-NEXT: v_max_f64 v[26:27], v[12:13], v[28:29]
+; GFX11-NEXT: v_cmp_u_f64_e64 s5, v[12:13], v[28:29]
+; GFX11-NEXT: v_cndmask_b32_e64 v0, v32, 0, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e64 v1, v33, 0x7ff80000, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e64 v2, v16, 0, s0
+; GFX11-NEXT: v_cndmask_b32_e64 v3, v17, 0x7ff80000, s0
+; GFX11-NEXT: v_cndmask_b32_e64 v4, v18, 0, s1
+; GFX11-NEXT: v_cndmask_b32_e64 v5, v19, 0x7ff80000, s1
+; GFX11-NEXT: v_cndmask_b32_e64 v6, v20, 0, s2
+; GFX11-NEXT: v_cndmask_b32_e64 v7, v21, 0x7ff80000, s2
+; GFX11-NEXT: v_cndmask_b32_e64 v8, v22, 0, s3
+; GFX11-NEXT: v_cndmask_b32_e64 v9, v23, 0x7ff80000, s3
+; GFX11-NEXT: v_cndmask_b32_e64 v10, v24, 0, s4
+; GFX11-NEXT: v_cndmask_b32_e64 v11, v25, 0x7ff80000, s4
+; GFX11-NEXT: v_cndmask_b32_e64 v12, v26, 0, s5
+; GFX11-NEXT: v_cndmask_b32_e64 v13, v27, 0x7ff80000, s5
; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: v_cmp_gt_f64_e32 vcc_lo, v[14:15], v[30:31]
-; GFX11-NEXT: v_cmp_o_f64_e64 s0, v[14:15], v[30:31]
-; GFX11-NEXT: v_cmp_class_f64_e64 s19, v[30:31], 64
-; GFX11-NEXT: v_cndmask_b32_e32 v54, v31, v15, vcc_lo
-; GFX11-NEXT: v_cndmask_b32_e32 v16, v30, v14, vcc_lo
-; GFX11-NEXT: v_cmp_class_f64_e64 vcc_lo, v[22:23], 64
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT: v_cndmask_b32_e64 v55, 0x7ff80000, v54, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v54, 0, v16, s0
-; GFX11-NEXT: v_cmp_class_f64_e64 s0, v[14:15], 64
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
-; GFX11-NEXT: v_cmp_eq_f64_e64 s22, 0, v[54:55]
-; GFX11-NEXT: v_dual_cndmask_b32 v7, v7, v23 :: v_dual_cndmask_b32 v6, v6, v22
-; GFX11-NEXT: v_cndmask_b32_e64 v14, v54, v14, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v15, v55, v15, s0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e64 v6, v38, v6, s17
-; GFX11-NEXT: v_cndmask_b32_e64 v7, v39, v7, s17
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e64 v14, v14, v30, s19
-; GFX11-NEXT: v_cndmask_b32_e64 v15, v15, v31, s19
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_cndmask_b32_e64 v14, v54, v14, s22
-; GFX11-NEXT: v_cndmask_b32_e64 v15, v55, v15, s22
+; GFX11-NEXT: v_max_f64 v[28:29], v[14:15], v[30:31]
+; GFX11-NEXT: v_cmp_u_f64_e64 s6, v[14:15], v[30:31]
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-NEXT: v_cndmask_b32_e64 v14, v28, 0, s6
+; GFX11-NEXT: v_cndmask_b32_e64 v15, v29, 0x7ff80000, s6
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_v8f64:
@@ -4279,1799 +2067,798 @@ define <16 x double> @v_maximum_v16f64(<16 x double> %src0, <16 x double> %src1)
; GFX7-LABEL: v_maximum_v16f64:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX7-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill
+; GFX7-NEXT: s_mov_b64 exec, s[4:5]
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:8
; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4
-; GFX7-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:16
-; GFX7-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:12
-; GFX7-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:24
-; GFX7-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:20
-; GFX7-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:32
-; GFX7-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:28
-; GFX7-NEXT: v_mov_b32_e32 v39, 0x7ff80000
-; GFX7-NEXT: s_waitcnt vmcnt(6)
-; GFX7-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[31:32]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[4:5], v[0:1], v[31:32]
-; GFX7-NEXT: s_waitcnt vmcnt(4)
-; GFX7-NEXT: v_cmp_o_f64_e64 s[6:7], v[2:3], v[33:34]
-; GFX7-NEXT: v_cndmask_b32_e32 v48, v32, v1, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v49, v39, v48, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e32 v48, v31, v0, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 64
-; GFX7-NEXT: v_cndmask_b32_e64 v48, 0, v48, s[4:5]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[31:32], 64
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v48, v0, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v49, v1, vcc
-; GFX7-NEXT: v_cmp_gt_f64_e32 vcc, v[2:3], v[33:34]
-; GFX7-NEXT: v_cndmask_b32_e64 v0, v0, v31, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v1, v1, v32, s[4:5]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[33:34], 64
-; GFX7-NEXT: v_cndmask_b32_e32 v50, v34, v3, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v31, v33, v2, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[2:3], 64
-; GFX7-NEXT: v_cndmask_b32_e64 v32, v39, v50, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v31, 0, v31, s[6:7]
-; GFX7-NEXT: s_waitcnt vmcnt(2)
-; GFX7-NEXT: v_cmp_o_f64_e64 s[6:7], v[4:5], v[35:36]
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v31, v2, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v32, v3, vcc
-; GFX7-NEXT: v_cmp_gt_f64_e32 vcc, v[4:5], v[35:36]
-; GFX7-NEXT: v_cndmask_b32_e64 v2, v2, v33, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v3, v3, v34, s[4:5]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[35:36], 64
-; GFX7-NEXT: v_cndmask_b32_e32 v50, v36, v5, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v33, v35, v4, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[4:5], 64
-; GFX7-NEXT: v_cndmask_b32_e64 v34, v39, v50, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v33, 0, v33, s[6:7]
+; GFX7-NEXT: v_writelane_b32 v34, s30, 0
+; GFX7-NEXT: v_writelane_b32 v34, s31, 1
+; GFX7-NEXT: v_writelane_b32 v34, s34, 2
+; GFX7-NEXT: v_writelane_b32 v34, s35, 3
; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_cmp_o_f64_e64 s[6:7], v[6:7], v[37:38]
-; GFX7-NEXT: v_cndmask_b32_e32 v4, v33, v4, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v5, v34, v5, vcc
-; GFX7-NEXT: v_cmp_gt_f64_e32 vcc, v[6:7], v[37:38]
-; GFX7-NEXT: v_cndmask_b32_e64 v4, v4, v35, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v5, v5, v36, s[4:5]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[37:38], 64
-; GFX7-NEXT: v_cndmask_b32_e32 v50, v38, v7, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v35, v37, v6, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[6:7], 64
-; GFX7-NEXT: v_cndmask_b32_e64 v36, v39, v50, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v35, 0, v35, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e32 v6, v35, v6, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v7, v36, v7, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v6, v6, v37, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v7, v7, v38, s[4:5]
-; GFX7-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:36
-; GFX7-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:40
-; GFX7-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[48:49]
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v48, v0, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v49, v1, vcc
-; GFX7-NEXT: buffer_load_dword v49, off, s[0:3], s32 offset:48
-; GFX7-NEXT: buffer_load_dword v48, off, s[0:3], s32 offset:44
-; GFX7-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[31:32]
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v31, v2, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v32, v3, vcc
+; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[31:32]
+; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[31:32]
+; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:16
+; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:12
+; GFX7-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[31:32]
+; GFX7-NEXT: v_max_f64 v[2:3], v[2:3], v[31:32]
+; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:24
+; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:20
+; GFX7-NEXT: v_cndmask_b32_e64 v2, v2, 0, s[4:5]
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[31:32]
+; GFX7-NEXT: v_max_f64 v[4:5], v[4:5], v[31:32]
+; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:32
+; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:28
+; GFX7-NEXT: v_cndmask_b32_e64 v4, v4, 0, s[6:7]
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[31:32]
+; GFX7-NEXT: v_max_f64 v[6:7], v[6:7], v[31:32]
+; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:36
+; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:40
+; GFX7-NEXT: v_cndmask_b32_e64 v6, v6, 0, s[8:9]
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: v_cmp_u_f64_e64 s[10:11], v[8:9], v[31:32]
+; GFX7-NEXT: v_max_f64 v[8:9], v[8:9], v[31:32]
+; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:48
+; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:44
+; GFX7-NEXT: v_cndmask_b32_e64 v8, v8, 0, s[10:11]
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: v_cmp_u_f64_e64 s[12:13], v[10:11], v[31:32]
+; GFX7-NEXT: v_max_f64 v[10:11], v[10:11], v[31:32]
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:56
; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:52
-; GFX7-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[33:34]
-; GFX7-NEXT: v_cndmask_b32_e32 v4, v33, v4, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v5, v34, v5, vcc
-; GFX7-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:64
-; GFX7-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:60
-; GFX7-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[35:36]
-; GFX7-NEXT: v_cndmask_b32_e32 v6, v35, v6, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v7, v36, v7, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[8:9], 64
-; GFX7-NEXT: s_waitcnt vmcnt(6)
-; GFX7-NEXT: v_cmp_gt_f64_e64 s[4:5], v[8:9], v[37:38]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[6:7], v[8:9], v[37:38]
-; GFX7-NEXT: v_cndmask_b32_e64 v50, v38, v9, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v35, v37, v8, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v36, v39, v50, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v35, 0, v35, s[6:7]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[37:38], 64
-; GFX7-NEXT: v_cndmask_b32_e32 v8, v35, v8, vcc
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[6:7], 0, v[35:36]
-; GFX7-NEXT: v_cndmask_b32_e32 v9, v36, v9, vcc
-; GFX7-NEXT: s_waitcnt vmcnt(4)
-; GFX7-NEXT: v_cmp_gt_f64_e32 vcc, v[10:11], v[48:49]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[8:9], v[10:11], v[48:49]
-; GFX7-NEXT: v_cndmask_b32_e64 v8, v8, v37, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v9, v9, v38, s[4:5]
-; GFX7-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:68
-; GFX7-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:72
-; GFX7-NEXT: v_cndmask_b32_e64 v8, v35, v8, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e32 v50, v49, v11, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v35, v48, v10, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[10:11], 64
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[48:49], 64
-; GFX7-NEXT: v_cndmask_b32_e64 v51, v39, v50, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v50, 0, v35, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v9, v36, v9, s[6:7]
-; GFX7-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:80
-; GFX7-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:76
-; GFX7-NEXT: s_waitcnt vmcnt(6)
-; GFX7-NEXT: v_cmp_o_f64_e64 s[6:7], v[12:13], v[31:32]
-; GFX7-NEXT: v_cndmask_b32_e32 v10, v50, v10, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v11, v51, v11, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v10, v10, v48, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v11, v11, v49, s[4:5]
-; GFX7-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[50:51]
-; GFX7-NEXT: v_cmp_gt_f64_e64 s[4:5], v[12:13], v[31:32]
-; GFX7-NEXT: v_cndmask_b32_e32 v10, v50, v10, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v48, v32, v13, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e32 v11, v51, v11, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[12:13], 64
-; GFX7-NEXT: v_cndmask_b32_e64 v49, v39, v48, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v48, v31, v12, s[4:5]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[31:32], 64
-; GFX7-NEXT: v_cndmask_b32_e64 v48, 0, v48, s[6:7]
-; GFX7-NEXT: s_waitcnt vmcnt(4)
-; GFX7-NEXT: v_cmp_o_f64_e64 s[6:7], v[14:15], v[33:34]
-; GFX7-NEXT: v_cndmask_b32_e32 v12, v48, v12, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v13, v49, v13, vcc
-; GFX7-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[48:49]
-; GFX7-NEXT: v_cndmask_b32_e64 v12, v12, v31, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v13, v13, v32, s[4:5]
+; GFX7-NEXT: v_cndmask_b32_e64 v10, v10, 0, s[12:13]
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: v_cmp_u_f64_e64 s[14:15], v[12:13], v[31:32]
+; GFX7-NEXT: v_max_f64 v[12:13], v[12:13], v[31:32]
+; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:64
+; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:60
+; GFX7-NEXT: v_cndmask_b32_e64 v12, v12, 0, s[14:15]
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: v_cmp_u_f64_e64 s[16:17], v[14:15], v[31:32]
+; GFX7-NEXT: v_max_f64 v[14:15], v[14:15], v[31:32]
+; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:68
+; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:72
+; GFX7-NEXT: v_cndmask_b32_e64 v14, v14, 0, s[16:17]
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: v_cmp_u_f64_e64 s[18:19], v[16:17], v[31:32]
+; GFX7-NEXT: v_max_f64 v[16:17], v[16:17], v[31:32]
+; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:80
+; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:76
+; GFX7-NEXT: v_cndmask_b32_e64 v16, v16, 0, s[18:19]
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: v_cmp_u_f64_e64 s[20:21], v[18:19], v[31:32]
+; GFX7-NEXT: v_max_f64 v[18:19], v[18:19], v[31:32]
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:88
; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:84
-; GFX7-NEXT: v_cmp_gt_f64_e64 s[4:5], v[14:15], v[33:34]
-; GFX7-NEXT: v_cndmask_b32_e32 v12, v48, v12, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v13, v49, v13, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[14:15], 64
-; GFX7-NEXT: v_cndmask_b32_e64 v50, v34, v15, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v48, v33, v14, s[4:5]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[33:34], 64
-; GFX7-NEXT: v_cndmask_b32_e64 v49, v39, v50, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v48, 0, v48, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e32 v14, v48, v14, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v15, v49, v15, vcc
-; GFX7-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[48:49]
-; GFX7-NEXT: v_cndmask_b32_e64 v14, v14, v33, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v15, v15, v34, s[4:5]
-; GFX7-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:96
-; GFX7-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:92
-; GFX7-NEXT: v_cndmask_b32_e32 v14, v48, v14, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v15, v49, v15, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[16:17], 64
-; GFX7-NEXT: s_waitcnt vmcnt(6)
-; GFX7-NEXT: v_cmp_gt_f64_e64 s[4:5], v[16:17], v[37:38]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[6:7], v[16:17], v[37:38]
-; GFX7-NEXT: v_cndmask_b32_e64 v50, v38, v17, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v48, v37, v16, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v49, v39, v50, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v48, 0, v48, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v48, v16, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[37:38], 64
-; GFX7-NEXT: v_cndmask_b32_e32 v17, v49, v17, vcc
-; GFX7-NEXT: s_waitcnt vmcnt(4)
-; GFX7-NEXT: v_cmp_gt_f64_e32 vcc, v[18:19], v[35:36]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[6:7], v[18:19], v[35:36]
-; GFX7-NEXT: v_cndmask_b32_e64 v16, v16, v37, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v17, v17, v38, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e32 v50, v36, v19, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v37, v35, v18, vcc
-; GFX7-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[48:49]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[18:19], 64
-; GFX7-NEXT: v_cndmask_b32_e64 v51, v39, v50, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v50, 0, v37, s[6:7]
-; GFX7-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:100
-; GFX7-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:104
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v48, v16, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v17, v49, v17, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[35:36], 64
-; GFX7-NEXT: v_cndmask_b32_e64 v18, v50, v18, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v19, v51, v19, s[4:5]
-; GFX7-NEXT: s_waitcnt vmcnt(4)
-; GFX7-NEXT: v_cmp_gt_f64_e64 s[4:5], v[20:21], v[31:32]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[6:7], v[20:21], v[31:32]
-; GFX7-NEXT: v_cndmask_b32_e32 v18, v18, v35, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v19, v19, v36, vcc
-; GFX7-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[50:51]
-; GFX7-NEXT: v_cndmask_b32_e64 v48, v32, v21, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v35, v31, v20, s[4:5]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[20:21], 64
-; GFX7-NEXT: v_cndmask_b32_e64 v36, v39, v48, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v35, 0, v35, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e32 v18, v50, v18, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v19, v51, v19, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[31:32], 64
-; GFX7-NEXT: buffer_load_dword v49, off, s[0:3], s32 offset:112
-; GFX7-NEXT: buffer_load_dword v48, off, s[0:3], s32 offset:108
-; GFX7-NEXT: buffer_load_dword v51, off, s[0:3], s32 offset:120
-; GFX7-NEXT: buffer_load_dword v50, off, s[0:3], s32 offset:116
-; GFX7-NEXT: v_cndmask_b32_e64 v20, v35, v20, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v21, v36, v21, s[4:5]
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[4:5], 0, v[35:36]
-; GFX7-NEXT: s_waitcnt vmcnt(6)
-; GFX7-NEXT: v_cmp_gt_f64_e64 s[6:7], v[22:23], v[33:34]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[8:9], v[22:23], v[33:34]
-; GFX7-NEXT: v_cndmask_b32_e32 v20, v20, v31, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v21, v21, v32, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[22:23], 64
-; GFX7-NEXT: v_cndmask_b32_e64 v20, v35, v20, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v21, v36, v21, s[4:5]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[33:34], 64
-; GFX7-NEXT: v_cndmask_b32_e64 v31, v34, v23, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v36, v39, v31, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v31, v33, v22, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v35, 0, v31, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e32 v22, v35, v22, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v23, v36, v23, vcc
-; GFX7-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[35:36]
-; GFX7-NEXT: v_cndmask_b32_e64 v22, v22, v33, s[4:5]
+; GFX7-NEXT: v_cndmask_b32_e64 v18, v18, 0, s[20:21]
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: v_cmp_u_f64_e64 s[22:23], v[20:21], v[31:32]
+; GFX7-NEXT: v_max_f64 v[20:21], v[20:21], v[31:32]
+; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:96
+; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:92
+; GFX7-NEXT: v_cndmask_b32_e64 v20, v20, 0, s[22:23]
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: v_cmp_u_f64_e64 s[24:25], v[22:23], v[31:32]
+; GFX7-NEXT: v_max_f64 v[22:23], v[22:23], v[31:32]
+; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:100
+; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:104
+; GFX7-NEXT: v_cndmask_b32_e64 v22, v22, 0, s[24:25]
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: v_cmp_u_f64_e64 s[26:27], v[24:25], v[31:32]
+; GFX7-NEXT: v_max_f64 v[24:25], v[24:25], v[31:32]
+; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:112
+; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:108
+; GFX7-NEXT: v_cndmask_b32_e64 v24, v24, 0, s[26:27]
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: v_cmp_u_f64_e64 s[28:29], v[26:27], v[31:32]
+; GFX7-NEXT: v_max_f64 v[26:27], v[26:27], v[31:32]
+; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:120
+; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:116
+; GFX7-NEXT: v_cndmask_b32_e64 v26, v26, 0, s[28:29]
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: v_cmp_u_f64_e64 s[30:31], v[28:29], v[31:32]
+; GFX7-NEXT: v_max_f64 v[28:29], v[28:29], v[31:32]
; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32
; GFX7-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:128
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:124
-; GFX7-NEXT: v_cndmask_b32_e64 v23, v23, v34, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e32 v22, v35, v22, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v23, v36, v23, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[24:25], 64
-; GFX7-NEXT: s_waitcnt vmcnt(7)
-; GFX7-NEXT: v_cmp_gt_f64_e64 s[4:5], v[24:25], v[37:38]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[6:7], v[24:25], v[37:38]
-; GFX7-NEXT: v_cndmask_b32_e64 v34, v38, v25, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v35, v39, v34, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v34, v37, v24, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v34, 0, v34, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e32 v24, v34, v24, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v25, v35, v25, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[37:38], 64
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[4:5], 0, v[34:35]
-; GFX7-NEXT: v_cndmask_b32_e32 v24, v24, v37, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v25, v25, v38, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[26:27], 64
-; GFX7-NEXT: v_cndmask_b32_e64 v24, v34, v24, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v25, v35, v25, s[4:5]
-; GFX7-NEXT: s_waitcnt vmcnt(5)
-; GFX7-NEXT: v_cmp_gt_f64_e64 s[6:7], v[26:27], v[48:49]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[8:9], v[26:27], v[48:49]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[48:49], 64
-; GFX7-NEXT: s_waitcnt vmcnt(3)
-; GFX7-NEXT: v_cmp_o_f64_e64 s[10:11], v[28:29], v[50:51]
-; GFX7-NEXT: v_cndmask_b32_e64 v36, v49, v27, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v34, v48, v26, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v35, v39, v36, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v34, 0, v34, s[8:9]
-; GFX7-NEXT: v_cmp_gt_f64_e64 s[8:9], v[28:29], v[50:51]
-; GFX7-NEXT: v_cndmask_b32_e32 v26, v34, v26, vcc
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[6:7], 0, v[34:35]
-; GFX7-NEXT: v_cndmask_b32_e32 v27, v35, v27, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v26, v26, v48, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v27, v27, v49, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v36, v51, v29, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v27, v35, v27, s[6:7]
+; GFX7-NEXT: v_cndmask_b32_e64 v28, v28, 0, s[30:31]
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: v_cmp_u_f64_e64 s[34:35], v[30:31], v[32:33]
+; GFX7-NEXT: v_max_f64 v[30:31], v[30:31], v[32:33]
+; GFX7-NEXT: v_mov_b32_e32 v32, 0x7ff80000
+; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v32, vcc
+; GFX7-NEXT: v_cndmask_b32_e64 v3, v3, v32, s[4:5]
+; GFX7-NEXT: v_cndmask_b32_e64 v5, v5, v32, s[6:7]
+; GFX7-NEXT: v_cndmask_b32_e64 v7, v7, v32, s[8:9]
+; GFX7-NEXT: v_cndmask_b32_e64 v9, v9, v32, s[10:11]
+; GFX7-NEXT: v_cndmask_b32_e64 v11, v11, v32, s[12:13]
+; GFX7-NEXT: v_cndmask_b32_e64 v13, v13, v32, s[14:15]
+; GFX7-NEXT: v_cndmask_b32_e64 v15, v15, v32, s[16:17]
+; GFX7-NEXT: v_cndmask_b32_e64 v17, v17, v32, s[18:19]
+; GFX7-NEXT: v_cndmask_b32_e64 v19, v19, v32, s[20:21]
+; GFX7-NEXT: v_cndmask_b32_e64 v21, v21, v32, s[22:23]
+; GFX7-NEXT: v_cndmask_b32_e64 v23, v23, v32, s[24:25]
+; GFX7-NEXT: v_cndmask_b32_e64 v25, v25, v32, s[26:27]
+; GFX7-NEXT: v_cndmask_b32_e64 v27, v27, v32, s[28:29]
+; GFX7-NEXT: v_cndmask_b32_e64 v29, v29, v32, s[30:31]
+; GFX7-NEXT: v_cndmask_b32_e64 v31, v31, v32, s[34:35]
+; GFX7-NEXT: v_cndmask_b32_e64 v30, v30, 0, s[34:35]
+; GFX7-NEXT: v_readlane_b32 s35, v34, 3
+; GFX7-NEXT: v_readlane_b32 s34, v34, 2
+; GFX7-NEXT: v_readlane_b32 s31, v34, 1
+; GFX7-NEXT: v_readlane_b32 s30, v34, 0
+; GFX7-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX7-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload
+; GFX7-NEXT: s_mov_b64 exec, s[4:5]
; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_cmp_gt_f64_e32 vcc, v[30:31], v[32:33]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[4:5], v[30:31], v[32:33]
-; GFX7-NEXT: v_cndmask_b32_e64 v35, v39, v36, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v26, v34, v26, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v34, v50, v28, s[8:9]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[6:7], v[28:29], 64
-; GFX7-NEXT: v_cndmask_b32_e64 v34, 0, v34, s[10:11]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[8:9], v[50:51], 64
-; GFX7-NEXT: v_cndmask_b32_e32 v36, v33, v31, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v37, v39, v36, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e32 v36, v32, v30, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[30:31], 64
-; GFX7-NEXT: v_cndmask_b32_e64 v36, 0, v36, s[4:5]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[32:33], 64
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[10:11], 0, v[34:35]
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[36:37]
-; GFX7-NEXT: v_cndmask_b32_e64 v28, v34, v28, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v29, v35, v29, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v28, v28, v50, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e32 v30, v36, v30, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v31, v37, v31, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v30, v30, v32, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v29, v29, v51, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v31, v31, v33, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v28, v34, v28, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v29, v35, v29, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v30, v36, v30, s[12:13]
-; GFX7-NEXT: v_cndmask_b32_e64 v31, v37, v31, s[12:13]
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_maximum_v16f64:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX8-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill
+; GFX8-NEXT: s_mov_b64 exec, s[4:5]
; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:8
; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4
-; GFX8-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:16
-; GFX8-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:12
-; GFX8-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:24
-; GFX8-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:20
-; GFX8-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:32
-; GFX8-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:28
-; GFX8-NEXT: v_mov_b32_e32 v39, 0x7ff80000
-; GFX8-NEXT: s_waitcnt vmcnt(6)
-; GFX8-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[31:32]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[4:5], v[0:1], v[31:32]
-; GFX8-NEXT: s_waitcnt vmcnt(4)
-; GFX8-NEXT: v_cmp_o_f64_e64 s[6:7], v[2:3], v[33:34]
-; GFX8-NEXT: v_cndmask_b32_e32 v48, v32, v1, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v49, v39, v48, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e32 v48, v31, v0, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 64
-; GFX8-NEXT: v_cndmask_b32_e64 v48, 0, v48, s[4:5]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[31:32], 64
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v48, v0, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v49, v1, vcc
-; GFX8-NEXT: v_cmp_gt_f64_e32 vcc, v[2:3], v[33:34]
-; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, v31, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, v32, s[4:5]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[33:34], 64
-; GFX8-NEXT: v_cndmask_b32_e32 v50, v34, v3, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v31, v33, v2, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[2:3], 64
-; GFX8-NEXT: v_cndmask_b32_e64 v32, v39, v50, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v31, 0, v31, s[6:7]
-; GFX8-NEXT: s_waitcnt vmcnt(2)
-; GFX8-NEXT: v_cmp_o_f64_e64 s[6:7], v[4:5], v[35:36]
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v31, v2, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v32, v3, vcc
-; GFX8-NEXT: v_cmp_gt_f64_e32 vcc, v[4:5], v[35:36]
-; GFX8-NEXT: v_cndmask_b32_e64 v2, v2, v33, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v3, v3, v34, s[4:5]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[35:36], 64
-; GFX8-NEXT: v_cndmask_b32_e32 v50, v36, v5, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v33, v35, v4, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[4:5], 64
-; GFX8-NEXT: v_cndmask_b32_e64 v34, v39, v50, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v33, 0, v33, s[6:7]
+; GFX8-NEXT: v_writelane_b32 v34, s30, 0
+; GFX8-NEXT: v_writelane_b32 v34, s31, 1
+; GFX8-NEXT: v_writelane_b32 v34, s34, 2
+; GFX8-NEXT: v_writelane_b32 v34, s35, 3
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[31:32]
+; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[31:32]
+; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:16
+; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:12
+; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[31:32]
+; GFX8-NEXT: v_max_f64 v[2:3], v[2:3], v[31:32]
+; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:24
+; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:20
+; GFX8-NEXT: v_cndmask_b32_e64 v2, v2, 0, s[4:5]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[31:32]
+; GFX8-NEXT: v_max_f64 v[4:5], v[4:5], v[31:32]
+; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:32
+; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:28
+; GFX8-NEXT: v_cndmask_b32_e64 v4, v4, 0, s[6:7]
; GFX8-NEXT: s_waitcnt vmcnt(0)
-; GFX8-NEXT: v_cmp_o_f64_e64 s[6:7], v[6:7], v[37:38]
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v33, v4, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v5, v34, v5, vcc
-; GFX8-NEXT: v_cmp_gt_f64_e32 vcc, v[6:7], v[37:38]
-; GFX8-NEXT: v_cndmask_b32_e64 v4, v4, v35, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v5, v5, v36, s[4:5]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[37:38], 64
-; GFX8-NEXT: v_cndmask_b32_e32 v50, v38, v7, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v35, v37, v6, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[6:7], 64
-; GFX8-NEXT: v_cndmask_b32_e64 v36, v39, v50, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v35, 0, v35, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e32 v6, v35, v6, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v7, v36, v7, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v6, v6, v37, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v7, v7, v38, s[4:5]
-; GFX8-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:36
-; GFX8-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:40
-; GFX8-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[48:49]
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v48, v0, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v49, v1, vcc
-; GFX8-NEXT: buffer_load_dword v49, off, s[0:3], s32 offset:48
-; GFX8-NEXT: buffer_load_dword v48, off, s[0:3], s32 offset:44
-; GFX8-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[31:32]
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v31, v2, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v32, v3, vcc
+; GFX8-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[31:32]
+; GFX8-NEXT: v_max_f64 v[6:7], v[6:7], v[31:32]
+; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:36
+; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:40
+; GFX8-NEXT: v_cndmask_b32_e64 v6, v6, 0, s[8:9]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: v_cmp_u_f64_e64 s[10:11], v[8:9], v[31:32]
+; GFX8-NEXT: v_max_f64 v[8:9], v[8:9], v[31:32]
+; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:48
+; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:44
+; GFX8-NEXT: v_cndmask_b32_e64 v8, v8, 0, s[10:11]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: v_cmp_u_f64_e64 s[12:13], v[10:11], v[31:32]
+; GFX8-NEXT: v_max_f64 v[10:11], v[10:11], v[31:32]
; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:56
; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:52
-; GFX8-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[33:34]
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v33, v4, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v5, v34, v5, vcc
-; GFX8-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:64
-; GFX8-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:60
-; GFX8-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[35:36]
-; GFX8-NEXT: v_cndmask_b32_e32 v6, v35, v6, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v7, v36, v7, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[8:9], 64
-; GFX8-NEXT: s_waitcnt vmcnt(6)
-; GFX8-NEXT: v_cmp_gt_f64_e64 s[4:5], v[8:9], v[37:38]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[6:7], v[8:9], v[37:38]
-; GFX8-NEXT: v_cndmask_b32_e64 v50, v38, v9, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v35, v37, v8, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v36, v39, v50, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v35, 0, v35, s[6:7]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[37:38], 64
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v35, v8, vcc
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[6:7], 0, v[35:36]
-; GFX8-NEXT: v_cndmask_b32_e32 v9, v36, v9, vcc
-; GFX8-NEXT: s_waitcnt vmcnt(4)
-; GFX8-NEXT: v_cmp_gt_f64_e32 vcc, v[10:11], v[48:49]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[8:9], v[10:11], v[48:49]
-; GFX8-NEXT: v_cndmask_b32_e64 v8, v8, v37, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v9, v9, v38, s[4:5]
-; GFX8-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:68
-; GFX8-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:72
-; GFX8-NEXT: v_cndmask_b32_e64 v8, v35, v8, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e32 v50, v49, v11, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v35, v48, v10, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[10:11], 64
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[48:49], 64
-; GFX8-NEXT: v_cndmask_b32_e64 v51, v39, v50, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v50, 0, v35, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v9, v36, v9, s[6:7]
-; GFX8-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:80
-; GFX8-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:76
-; GFX8-NEXT: s_waitcnt vmcnt(6)
-; GFX8-NEXT: v_cmp_o_f64_e64 s[6:7], v[12:13], v[31:32]
-; GFX8-NEXT: v_cndmask_b32_e32 v10, v50, v10, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v11, v51, v11, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v10, v10, v48, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v11, v11, v49, s[4:5]
-; GFX8-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[50:51]
-; GFX8-NEXT: v_cmp_gt_f64_e64 s[4:5], v[12:13], v[31:32]
-; GFX8-NEXT: v_cndmask_b32_e32 v10, v50, v10, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v48, v32, v13, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e32 v11, v51, v11, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[12:13], 64
-; GFX8-NEXT: v_cndmask_b32_e64 v49, v39, v48, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v48, v31, v12, s[4:5]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[31:32], 64
-; GFX8-NEXT: v_cndmask_b32_e64 v48, 0, v48, s[6:7]
-; GFX8-NEXT: s_waitcnt vmcnt(4)
-; GFX8-NEXT: v_cmp_o_f64_e64 s[6:7], v[14:15], v[33:34]
-; GFX8-NEXT: v_cndmask_b32_e32 v12, v48, v12, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v13, v49, v13, vcc
-; GFX8-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[48:49]
-; GFX8-NEXT: v_cndmask_b32_e64 v12, v12, v31, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v13, v13, v32, s[4:5]
+; GFX8-NEXT: v_cndmask_b32_e64 v10, v10, 0, s[12:13]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: v_cmp_u_f64_e64 s[14:15], v[12:13], v[31:32]
+; GFX8-NEXT: v_max_f64 v[12:13], v[12:13], v[31:32]
+; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:64
+; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:60
+; GFX8-NEXT: v_cndmask_b32_e64 v12, v12, 0, s[14:15]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: v_cmp_u_f64_e64 s[16:17], v[14:15], v[31:32]
+; GFX8-NEXT: v_max_f64 v[14:15], v[14:15], v[31:32]
+; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:68
+; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:72
+; GFX8-NEXT: v_cndmask_b32_e64 v14, v14, 0, s[16:17]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: v_cmp_u_f64_e64 s[18:19], v[16:17], v[31:32]
+; GFX8-NEXT: v_max_f64 v[16:17], v[16:17], v[31:32]
+; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:80
+; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:76
+; GFX8-NEXT: v_cndmask_b32_e64 v16, v16, 0, s[18:19]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: v_cmp_u_f64_e64 s[20:21], v[18:19], v[31:32]
+; GFX8-NEXT: v_max_f64 v[18:19], v[18:19], v[31:32]
; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:88
; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:84
-; GFX8-NEXT: v_cmp_gt_f64_e64 s[4:5], v[14:15], v[33:34]
-; GFX8-NEXT: v_cndmask_b32_e32 v12, v48, v12, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v13, v49, v13, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[14:15], 64
-; GFX8-NEXT: v_cndmask_b32_e64 v50, v34, v15, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v48, v33, v14, s[4:5]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[33:34], 64
-; GFX8-NEXT: v_cndmask_b32_e64 v49, v39, v50, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v48, 0, v48, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e32 v14, v48, v14, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v15, v49, v15, vcc
-; GFX8-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[48:49]
-; GFX8-NEXT: v_cndmask_b32_e64 v14, v14, v33, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v15, v15, v34, s[4:5]
-; GFX8-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:96
-; GFX8-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:92
-; GFX8-NEXT: v_cndmask_b32_e32 v14, v48, v14, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v15, v49, v15, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[16:17], 64
-; GFX8-NEXT: s_waitcnt vmcnt(6)
-; GFX8-NEXT: v_cmp_gt_f64_e64 s[4:5], v[16:17], v[37:38]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[6:7], v[16:17], v[37:38]
-; GFX8-NEXT: v_cndmask_b32_e64 v50, v38, v17, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v48, v37, v16, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v49, v39, v50, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v48, 0, v48, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v48, v16, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[37:38], 64
-; GFX8-NEXT: v_cndmask_b32_e32 v17, v49, v17, vcc
-; GFX8-NEXT: s_waitcnt vmcnt(4)
-; GFX8-NEXT: v_cmp_gt_f64_e32 vcc, v[18:19], v[35:36]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[6:7], v[18:19], v[35:36]
-; GFX8-NEXT: v_cndmask_b32_e64 v16, v16, v37, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v17, v17, v38, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e32 v50, v36, v19, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v37, v35, v18, vcc
-; GFX8-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[48:49]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[18:19], 64
-; GFX8-NEXT: v_cndmask_b32_e64 v51, v39, v50, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v50, 0, v37, s[6:7]
-; GFX8-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:100
-; GFX8-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:104
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v48, v16, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v17, v49, v17, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[35:36], 64
-; GFX8-NEXT: v_cndmask_b32_e64 v18, v50, v18, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v19, v51, v19, s[4:5]
-; GFX8-NEXT: s_waitcnt vmcnt(4)
-; GFX8-NEXT: v_cmp_gt_f64_e64 s[4:5], v[20:21], v[31:32]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[6:7], v[20:21], v[31:32]
-; GFX8-NEXT: v_cndmask_b32_e32 v18, v18, v35, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v19, v19, v36, vcc
-; GFX8-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[50:51]
-; GFX8-NEXT: v_cndmask_b32_e64 v48, v32, v21, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v35, v31, v20, s[4:5]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[20:21], 64
-; GFX8-NEXT: v_cndmask_b32_e64 v36, v39, v48, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v35, 0, v35, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e32 v18, v50, v18, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v19, v51, v19, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[31:32], 64
-; GFX8-NEXT: buffer_load_dword v49, off, s[0:3], s32 offset:112
-; GFX8-NEXT: buffer_load_dword v48, off, s[0:3], s32 offset:108
-; GFX8-NEXT: buffer_load_dword v51, off, s[0:3], s32 offset:120
-; GFX8-NEXT: buffer_load_dword v50, off, s[0:3], s32 offset:116
-; GFX8-NEXT: v_cndmask_b32_e64 v20, v35, v20, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v21, v36, v21, s[4:5]
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[4:5], 0, v[35:36]
-; GFX8-NEXT: s_waitcnt vmcnt(6)
-; GFX8-NEXT: v_cmp_gt_f64_e64 s[6:7], v[22:23], v[33:34]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[8:9], v[22:23], v[33:34]
-; GFX8-NEXT: v_cndmask_b32_e32 v20, v20, v31, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v21, v21, v32, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[22:23], 64
-; GFX8-NEXT: v_cndmask_b32_e64 v20, v35, v20, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v21, v36, v21, s[4:5]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[33:34], 64
-; GFX8-NEXT: v_cndmask_b32_e64 v31, v34, v23, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v36, v39, v31, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v31, v33, v22, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v35, 0, v31, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e32 v22, v35, v22, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v23, v36, v23, vcc
-; GFX8-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[35:36]
-; GFX8-NEXT: v_cndmask_b32_e64 v22, v22, v33, s[4:5]
+; GFX8-NEXT: v_cndmask_b32_e64 v18, v18, 0, s[20:21]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: v_cmp_u_f64_e64 s[22:23], v[20:21], v[31:32]
+; GFX8-NEXT: v_max_f64 v[20:21], v[20:21], v[31:32]
+; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:96
+; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:92
+; GFX8-NEXT: v_cndmask_b32_e64 v20, v20, 0, s[22:23]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: v_cmp_u_f64_e64 s[24:25], v[22:23], v[31:32]
+; GFX8-NEXT: v_max_f64 v[22:23], v[22:23], v[31:32]
+; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:100
+; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:104
+; GFX8-NEXT: v_cndmask_b32_e64 v22, v22, 0, s[24:25]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: v_cmp_u_f64_e64 s[26:27], v[24:25], v[31:32]
+; GFX8-NEXT: v_max_f64 v[24:25], v[24:25], v[31:32]
+; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:112
+; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:108
+; GFX8-NEXT: v_cndmask_b32_e64 v24, v24, 0, s[26:27]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: v_cmp_u_f64_e64 s[28:29], v[26:27], v[31:32]
+; GFX8-NEXT: v_max_f64 v[26:27], v[26:27], v[31:32]
+; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:120
+; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:116
+; GFX8-NEXT: v_cndmask_b32_e64 v26, v26, 0, s[28:29]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: v_cmp_u_f64_e64 s[30:31], v[28:29], v[31:32]
+; GFX8-NEXT: v_max_f64 v[28:29], v[28:29], v[31:32]
; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32
; GFX8-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:128
; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:124
-; GFX8-NEXT: v_cndmask_b32_e64 v23, v23, v34, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e32 v22, v35, v22, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v23, v36, v23, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[24:25], 64
-; GFX8-NEXT: s_waitcnt vmcnt(7)
-; GFX8-NEXT: v_cmp_gt_f64_e64 s[4:5], v[24:25], v[37:38]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[6:7], v[24:25], v[37:38]
-; GFX8-NEXT: v_cndmask_b32_e64 v34, v38, v25, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v35, v39, v34, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v34, v37, v24, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v34, 0, v34, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e32 v24, v34, v24, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v25, v35, v25, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[37:38], 64
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[4:5], 0, v[34:35]
-; GFX8-NEXT: v_cndmask_b32_e32 v24, v24, v37, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v25, v25, v38, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[26:27], 64
-; GFX8-NEXT: v_cndmask_b32_e64 v24, v34, v24, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v25, v35, v25, s[4:5]
-; GFX8-NEXT: s_waitcnt vmcnt(5)
-; GFX8-NEXT: v_cmp_gt_f64_e64 s[6:7], v[26:27], v[48:49]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[8:9], v[26:27], v[48:49]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[48:49], 64
-; GFX8-NEXT: s_waitcnt vmcnt(3)
-; GFX8-NEXT: v_cmp_o_f64_e64 s[10:11], v[28:29], v[50:51]
-; GFX8-NEXT: v_cndmask_b32_e64 v36, v49, v27, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v34, v48, v26, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v35, v39, v36, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v34, 0, v34, s[8:9]
-; GFX8-NEXT: v_cmp_gt_f64_e64 s[8:9], v[28:29], v[50:51]
-; GFX8-NEXT: v_cndmask_b32_e32 v26, v34, v26, vcc
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[6:7], 0, v[34:35]
-; GFX8-NEXT: v_cndmask_b32_e32 v27, v35, v27, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v26, v26, v48, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v27, v27, v49, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v36, v51, v29, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v27, v35, v27, s[6:7]
+; GFX8-NEXT: v_cndmask_b32_e64 v28, v28, 0, s[30:31]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: v_cmp_u_f64_e64 s[34:35], v[30:31], v[32:33]
+; GFX8-NEXT: v_max_f64 v[30:31], v[30:31], v[32:33]
+; GFX8-NEXT: v_mov_b32_e32 v32, 0x7ff80000
+; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v32, vcc
+; GFX8-NEXT: v_cndmask_b32_e64 v3, v3, v32, s[4:5]
+; GFX8-NEXT: v_cndmask_b32_e64 v5, v5, v32, s[6:7]
+; GFX8-NEXT: v_cndmask_b32_e64 v7, v7, v32, s[8:9]
+; GFX8-NEXT: v_cndmask_b32_e64 v9, v9, v32, s[10:11]
+; GFX8-NEXT: v_cndmask_b32_e64 v11, v11, v32, s[12:13]
+; GFX8-NEXT: v_cndmask_b32_e64 v13, v13, v32, s[14:15]
+; GFX8-NEXT: v_cndmask_b32_e64 v15, v15, v32, s[16:17]
+; GFX8-NEXT: v_cndmask_b32_e64 v17, v17, v32, s[18:19]
+; GFX8-NEXT: v_cndmask_b32_e64 v19, v19, v32, s[20:21]
+; GFX8-NEXT: v_cndmask_b32_e64 v21, v21, v32, s[22:23]
+; GFX8-NEXT: v_cndmask_b32_e64 v23, v23, v32, s[24:25]
+; GFX8-NEXT: v_cndmask_b32_e64 v25, v25, v32, s[26:27]
+; GFX8-NEXT: v_cndmask_b32_e64 v27, v27, v32, s[28:29]
+; GFX8-NEXT: v_cndmask_b32_e64 v29, v29, v32, s[30:31]
+; GFX8-NEXT: v_cndmask_b32_e64 v31, v31, v32, s[34:35]
+; GFX8-NEXT: v_cndmask_b32_e64 v30, v30, 0, s[34:35]
+; GFX8-NEXT: v_readlane_b32 s35, v34, 3
+; GFX8-NEXT: v_readlane_b32 s34, v34, 2
+; GFX8-NEXT: v_readlane_b32 s31, v34, 1
+; GFX8-NEXT: v_readlane_b32 s30, v34, 0
+; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX8-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload
+; GFX8-NEXT: s_mov_b64 exec, s[4:5]
; GFX8-NEXT: s_waitcnt vmcnt(0)
-; GFX8-NEXT: v_cmp_gt_f64_e32 vcc, v[30:31], v[32:33]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[4:5], v[30:31], v[32:33]
-; GFX8-NEXT: v_cndmask_b32_e64 v35, v39, v36, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v26, v34, v26, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v34, v50, v28, s[8:9]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[6:7], v[28:29], 64
-; GFX8-NEXT: v_cndmask_b32_e64 v34, 0, v34, s[10:11]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[8:9], v[50:51], 64
-; GFX8-NEXT: v_cndmask_b32_e32 v36, v33, v31, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v37, v39, v36, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e32 v36, v32, v30, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[30:31], 64
-; GFX8-NEXT: v_cndmask_b32_e64 v36, 0, v36, s[4:5]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[32:33], 64
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[10:11], 0, v[34:35]
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[36:37]
-; GFX8-NEXT: v_cndmask_b32_e64 v28, v34, v28, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v29, v35, v29, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v28, v28, v50, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e32 v30, v36, v30, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v31, v37, v31, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v30, v30, v32, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v29, v29, v51, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v31, v31, v33, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v28, v34, v28, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v29, v35, v29, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v30, v36, v30, s[12:13]
-; GFX8-NEXT: v_cndmask_b32_e64 v31, v37, v31, s[12:13]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_maximum_v16f64:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill
+; GFX9-NEXT: s_mov_b64 exec, s[4:5]
; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:8
; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4
-; GFX9-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:16
-; GFX9-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:12
-; GFX9-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:24
-; GFX9-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:20
-; GFX9-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:32
-; GFX9-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:28
-; GFX9-NEXT: v_mov_b32_e32 v39, 0x7ff80000
-; GFX9-NEXT: s_waitcnt vmcnt(6)
-; GFX9-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[31:32]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[4:5], v[0:1], v[31:32]
-; GFX9-NEXT: s_waitcnt vmcnt(4)
-; GFX9-NEXT: v_cmp_o_f64_e64 s[6:7], v[2:3], v[33:34]
-; GFX9-NEXT: v_cndmask_b32_e32 v48, v32, v1, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v49, v39, v48, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e32 v48, v31, v0, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 64
-; GFX9-NEXT: v_cndmask_b32_e64 v48, 0, v48, s[4:5]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[31:32], 64
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v48, v0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v49, v1, vcc
-; GFX9-NEXT: v_cmp_gt_f64_e32 vcc, v[2:3], v[33:34]
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, v31, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v32, s[4:5]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[33:34], 64
-; GFX9-NEXT: v_cndmask_b32_e32 v50, v34, v3, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v31, v33, v2, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[2:3], 64
-; GFX9-NEXT: v_cndmask_b32_e64 v32, v39, v50, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v31, 0, v31, s[6:7]
-; GFX9-NEXT: s_waitcnt vmcnt(2)
-; GFX9-NEXT: v_cmp_o_f64_e64 s[6:7], v[4:5], v[35:36]
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v31, v2, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v32, v3, vcc
-; GFX9-NEXT: v_cmp_gt_f64_e32 vcc, v[4:5], v[35:36]
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, v33, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v3, v34, s[4:5]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[35:36], 64
-; GFX9-NEXT: v_cndmask_b32_e32 v50, v36, v5, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v33, v35, v4, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[4:5], 64
-; GFX9-NEXT: v_cndmask_b32_e64 v34, v39, v50, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v33, 0, v33, s[6:7]
+; GFX9-NEXT: v_writelane_b32 v34, s30, 0
+; GFX9-NEXT: v_writelane_b32 v34, s31, 1
+; GFX9-NEXT: v_writelane_b32 v34, s34, 2
+; GFX9-NEXT: v_writelane_b32 v34, s35, 3
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[31:32]
+; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[31:32]
+; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:16
+; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:12
+; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_cmp_o_f64_e64 s[6:7], v[6:7], v[37:38]
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v33, v4, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v5, v34, v5, vcc
-; GFX9-NEXT: v_cmp_gt_f64_e32 vcc, v[6:7], v[37:38]
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v4, v35, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v5, v36, s[4:5]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[37:38], 64
-; GFX9-NEXT: v_cndmask_b32_e32 v50, v38, v7, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v35, v37, v6, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[6:7], 64
-; GFX9-NEXT: v_cndmask_b32_e64 v36, v39, v50, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v35, 0, v35, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v35, v6, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v7, v36, v7, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v6, v6, v37, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v7, v7, v38, s[4:5]
-; GFX9-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:36
-; GFX9-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:40
-; GFX9-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[48:49]
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v48, v0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v49, v1, vcc
-; GFX9-NEXT: buffer_load_dword v49, off, s[0:3], s32 offset:48
-; GFX9-NEXT: buffer_load_dword v48, off, s[0:3], s32 offset:44
-; GFX9-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[31:32]
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v31, v2, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v32, v3, vcc
+; GFX9-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[31:32]
+; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[31:32]
+; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:24
+; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:20
+; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, 0, s[4:5]
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[31:32]
+; GFX9-NEXT: v_max_f64 v[4:5], v[4:5], v[31:32]
+; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:32
+; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:28
+; GFX9-NEXT: v_cndmask_b32_e64 v4, v4, 0, s[6:7]
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[31:32]
+; GFX9-NEXT: v_max_f64 v[6:7], v[6:7], v[31:32]
+; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:36
+; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:40
+; GFX9-NEXT: v_cndmask_b32_e64 v6, v6, 0, s[8:9]
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_cmp_u_f64_e64 s[10:11], v[8:9], v[31:32]
+; GFX9-NEXT: v_max_f64 v[8:9], v[8:9], v[31:32]
+; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:48
+; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:44
+; GFX9-NEXT: v_cndmask_b32_e64 v8, v8, 0, s[10:11]
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_cmp_u_f64_e64 s[12:13], v[10:11], v[31:32]
+; GFX9-NEXT: v_max_f64 v[10:11], v[10:11], v[31:32]
; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:56
; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:52
-; GFX9-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[33:34]
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v33, v4, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v5, v34, v5, vcc
-; GFX9-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:64
-; GFX9-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:60
-; GFX9-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[35:36]
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v35, v6, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v7, v36, v7, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[8:9], 64
-; GFX9-NEXT: s_waitcnt vmcnt(6)
-; GFX9-NEXT: v_cmp_gt_f64_e64 s[4:5], v[8:9], v[37:38]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[6:7], v[8:9], v[37:38]
-; GFX9-NEXT: v_cndmask_b32_e64 v50, v38, v9, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v35, v37, v8, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v36, v39, v50, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v35, 0, v35, s[6:7]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[37:38], 64
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v35, v8, vcc
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[6:7], 0, v[35:36]
-; GFX9-NEXT: v_cndmask_b32_e32 v9, v36, v9, vcc
-; GFX9-NEXT: s_waitcnt vmcnt(4)
-; GFX9-NEXT: v_cmp_gt_f64_e32 vcc, v[10:11], v[48:49]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[8:9], v[10:11], v[48:49]
-; GFX9-NEXT: v_cndmask_b32_e64 v8, v8, v37, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v9, v9, v38, s[4:5]
-; GFX9-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:68
-; GFX9-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:72
-; GFX9-NEXT: v_cndmask_b32_e64 v8, v35, v8, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e32 v50, v49, v11, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v35, v48, v10, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[10:11], 64
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[48:49], 64
-; GFX9-NEXT: v_cndmask_b32_e64 v51, v39, v50, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v50, 0, v35, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v9, v36, v9, s[6:7]
-; GFX9-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:80
-; GFX9-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:76
-; GFX9-NEXT: s_waitcnt vmcnt(6)
-; GFX9-NEXT: v_cmp_o_f64_e64 s[6:7], v[12:13], v[31:32]
-; GFX9-NEXT: v_cndmask_b32_e32 v10, v50, v10, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v11, v51, v11, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v10, v10, v48, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v11, v11, v49, s[4:5]
-; GFX9-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[50:51]
-; GFX9-NEXT: v_cmp_gt_f64_e64 s[4:5], v[12:13], v[31:32]
-; GFX9-NEXT: v_cndmask_b32_e32 v10, v50, v10, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v48, v32, v13, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e32 v11, v51, v11, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[12:13], 64
-; GFX9-NEXT: v_cndmask_b32_e64 v49, v39, v48, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v48, v31, v12, s[4:5]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[31:32], 64
-; GFX9-NEXT: v_cndmask_b32_e64 v48, 0, v48, s[6:7]
-; GFX9-NEXT: s_waitcnt vmcnt(4)
-; GFX9-NEXT: v_cmp_o_f64_e64 s[6:7], v[14:15], v[33:34]
-; GFX9-NEXT: v_cndmask_b32_e32 v12, v48, v12, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v13, v49, v13, vcc
-; GFX9-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[48:49]
-; GFX9-NEXT: v_cndmask_b32_e64 v12, v12, v31, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v13, v13, v32, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_e64 v10, v10, 0, s[12:13]
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_cmp_u_f64_e64 s[14:15], v[12:13], v[31:32]
+; GFX9-NEXT: v_max_f64 v[12:13], v[12:13], v[31:32]
+; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:64
+; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:60
+; GFX9-NEXT: v_cndmask_b32_e64 v12, v12, 0, s[14:15]
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_cmp_u_f64_e64 s[16:17], v[14:15], v[31:32]
+; GFX9-NEXT: v_max_f64 v[14:15], v[14:15], v[31:32]
+; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:68
+; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:72
+; GFX9-NEXT: v_cndmask_b32_e64 v14, v14, 0, s[16:17]
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_cmp_u_f64_e64 s[18:19], v[16:17], v[31:32]
+; GFX9-NEXT: v_max_f64 v[16:17], v[16:17], v[31:32]
+; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:80
+; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:76
+; GFX9-NEXT: v_cndmask_b32_e64 v16, v16, 0, s[18:19]
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_cmp_u_f64_e64 s[20:21], v[18:19], v[31:32]
+; GFX9-NEXT: v_max_f64 v[18:19], v[18:19], v[31:32]
; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:88
; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:84
-; GFX9-NEXT: v_cmp_gt_f64_e64 s[4:5], v[14:15], v[33:34]
-; GFX9-NEXT: v_cndmask_b32_e32 v12, v48, v12, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v13, v49, v13, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[14:15], 64
-; GFX9-NEXT: v_cndmask_b32_e64 v50, v34, v15, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v48, v33, v14, s[4:5]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[33:34], 64
-; GFX9-NEXT: v_cndmask_b32_e64 v49, v39, v50, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v48, 0, v48, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e32 v14, v48, v14, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v15, v49, v15, vcc
-; GFX9-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[48:49]
-; GFX9-NEXT: v_cndmask_b32_e64 v14, v14, v33, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v15, v15, v34, s[4:5]
-; GFX9-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:96
-; GFX9-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:92
-; GFX9-NEXT: v_cndmask_b32_e32 v14, v48, v14, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v15, v49, v15, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[16:17], 64
-; GFX9-NEXT: s_waitcnt vmcnt(6)
-; GFX9-NEXT: v_cmp_gt_f64_e64 s[4:5], v[16:17], v[37:38]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[6:7], v[16:17], v[37:38]
-; GFX9-NEXT: v_cndmask_b32_e64 v50, v38, v17, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v48, v37, v16, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v49, v39, v50, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v48, 0, v48, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v48, v16, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[37:38], 64
-; GFX9-NEXT: v_cndmask_b32_e32 v17, v49, v17, vcc
-; GFX9-NEXT: s_waitcnt vmcnt(4)
-; GFX9-NEXT: v_cmp_gt_f64_e32 vcc, v[18:19], v[35:36]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[6:7], v[18:19], v[35:36]
-; GFX9-NEXT: v_cndmask_b32_e64 v16, v16, v37, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v17, v17, v38, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e32 v50, v36, v19, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v37, v35, v18, vcc
-; GFX9-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[48:49]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[18:19], 64
-; GFX9-NEXT: v_cndmask_b32_e64 v51, v39, v50, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v50, 0, v37, s[6:7]
-; GFX9-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:100
-; GFX9-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:104
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v48, v16, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v17, v49, v17, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[35:36], 64
-; GFX9-NEXT: v_cndmask_b32_e64 v18, v50, v18, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v19, v51, v19, s[4:5]
-; GFX9-NEXT: s_waitcnt vmcnt(4)
-; GFX9-NEXT: v_cmp_gt_f64_e64 s[4:5], v[20:21], v[31:32]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[6:7], v[20:21], v[31:32]
-; GFX9-NEXT: v_cndmask_b32_e32 v18, v18, v35, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v19, v19, v36, vcc
-; GFX9-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[50:51]
-; GFX9-NEXT: v_cndmask_b32_e64 v48, v32, v21, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v35, v31, v20, s[4:5]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[20:21], 64
-; GFX9-NEXT: v_cndmask_b32_e64 v36, v39, v48, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v35, 0, v35, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e32 v18, v50, v18, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v19, v51, v19, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[31:32], 64
-; GFX9-NEXT: buffer_load_dword v49, off, s[0:3], s32 offset:112
-; GFX9-NEXT: buffer_load_dword v48, off, s[0:3], s32 offset:108
-; GFX9-NEXT: buffer_load_dword v51, off, s[0:3], s32 offset:120
-; GFX9-NEXT: buffer_load_dword v50, off, s[0:3], s32 offset:116
-; GFX9-NEXT: v_cndmask_b32_e64 v20, v35, v20, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v21, v36, v21, s[4:5]
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[4:5], 0, v[35:36]
-; GFX9-NEXT: s_waitcnt vmcnt(6)
-; GFX9-NEXT: v_cmp_gt_f64_e64 s[6:7], v[22:23], v[33:34]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[8:9], v[22:23], v[33:34]
-; GFX9-NEXT: v_cndmask_b32_e32 v20, v20, v31, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v21, v21, v32, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[22:23], 64
-; GFX9-NEXT: v_cndmask_b32_e64 v20, v35, v20, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v21, v36, v21, s[4:5]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[33:34], 64
-; GFX9-NEXT: v_cndmask_b32_e64 v31, v34, v23, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v36, v39, v31, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v31, v33, v22, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v35, 0, v31, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e32 v22, v35, v22, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v23, v36, v23, vcc
-; GFX9-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[35:36]
-; GFX9-NEXT: v_cndmask_b32_e64 v22, v22, v33, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_e64 v18, v18, 0, s[20:21]
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_cmp_u_f64_e64 s[22:23], v[20:21], v[31:32]
+; GFX9-NEXT: v_max_f64 v[20:21], v[20:21], v[31:32]
+; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:96
+; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:92
+; GFX9-NEXT: v_cndmask_b32_e64 v20, v20, 0, s[22:23]
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_cmp_u_f64_e64 s[24:25], v[22:23], v[31:32]
+; GFX9-NEXT: v_max_f64 v[22:23], v[22:23], v[31:32]
+; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:100
+; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:104
+; GFX9-NEXT: v_cndmask_b32_e64 v22, v22, 0, s[24:25]
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_cmp_u_f64_e64 s[26:27], v[24:25], v[31:32]
+; GFX9-NEXT: v_max_f64 v[24:25], v[24:25], v[31:32]
+; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:112
+; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:108
+; GFX9-NEXT: v_cndmask_b32_e64 v24, v24, 0, s[26:27]
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_cmp_u_f64_e64 s[28:29], v[26:27], v[31:32]
+; GFX9-NEXT: v_max_f64 v[26:27], v[26:27], v[31:32]
+; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:120
+; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:116
+; GFX9-NEXT: v_cndmask_b32_e64 v26, v26, 0, s[28:29]
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_cmp_u_f64_e64 s[30:31], v[28:29], v[31:32]
+; GFX9-NEXT: v_max_f64 v[28:29], v[28:29], v[31:32]
; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32
; GFX9-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:128
; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:124
-; GFX9-NEXT: v_cndmask_b32_e64 v23, v23, v34, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e32 v22, v35, v22, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v23, v36, v23, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[24:25], 64
-; GFX9-NEXT: s_waitcnt vmcnt(7)
-; GFX9-NEXT: v_cmp_gt_f64_e64 s[4:5], v[24:25], v[37:38]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[6:7], v[24:25], v[37:38]
-; GFX9-NEXT: v_cndmask_b32_e64 v34, v38, v25, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v35, v39, v34, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v34, v37, v24, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v34, 0, v34, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e32 v24, v34, v24, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v25, v35, v25, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[37:38], 64
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[4:5], 0, v[34:35]
-; GFX9-NEXT: v_cndmask_b32_e32 v24, v24, v37, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v25, v25, v38, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[26:27], 64
-; GFX9-NEXT: v_cndmask_b32_e64 v24, v34, v24, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v25, v35, v25, s[4:5]
-; GFX9-NEXT: s_waitcnt vmcnt(5)
-; GFX9-NEXT: v_cmp_gt_f64_e64 s[6:7], v[26:27], v[48:49]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[8:9], v[26:27], v[48:49]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[48:49], 64
-; GFX9-NEXT: s_waitcnt vmcnt(3)
-; GFX9-NEXT: v_cmp_o_f64_e64 s[10:11], v[28:29], v[50:51]
-; GFX9-NEXT: v_cndmask_b32_e64 v36, v49, v27, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v34, v48, v26, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v35, v39, v36, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v34, 0, v34, s[8:9]
-; GFX9-NEXT: v_cmp_gt_f64_e64 s[8:9], v[28:29], v[50:51]
-; GFX9-NEXT: v_cndmask_b32_e32 v26, v34, v26, vcc
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[6:7], 0, v[34:35]
-; GFX9-NEXT: v_cndmask_b32_e32 v27, v35, v27, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v26, v26, v48, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v27, v27, v49, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v36, v51, v29, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v27, v35, v27, s[6:7]
+; GFX9-NEXT: v_cndmask_b32_e64 v28, v28, 0, s[30:31]
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_cmp_u_f64_e64 s[34:35], v[30:31], v[32:33]
+; GFX9-NEXT: v_max_f64 v[30:31], v[30:31], v[32:33]
+; GFX9-NEXT: v_mov_b32_e32 v32, 0x7ff80000
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v32, vcc
+; GFX9-NEXT: v_cndmask_b32_e64 v3, v3, v32, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_e64 v5, v5, v32, s[6:7]
+; GFX9-NEXT: v_cndmask_b32_e64 v7, v7, v32, s[8:9]
+; GFX9-NEXT: v_cndmask_b32_e64 v9, v9, v32, s[10:11]
+; GFX9-NEXT: v_cndmask_b32_e64 v11, v11, v32, s[12:13]
+; GFX9-NEXT: v_cndmask_b32_e64 v13, v13, v32, s[14:15]
+; GFX9-NEXT: v_cndmask_b32_e64 v15, v15, v32, s[16:17]
+; GFX9-NEXT: v_cndmask_b32_e64 v17, v17, v32, s[18:19]
+; GFX9-NEXT: v_cndmask_b32_e64 v19, v19, v32, s[20:21]
+; GFX9-NEXT: v_cndmask_b32_e64 v21, v21, v32, s[22:23]
+; GFX9-NEXT: v_cndmask_b32_e64 v23, v23, v32, s[24:25]
+; GFX9-NEXT: v_cndmask_b32_e64 v25, v25, v32, s[26:27]
+; GFX9-NEXT: v_cndmask_b32_e64 v27, v27, v32, s[28:29]
+; GFX9-NEXT: v_cndmask_b32_e64 v29, v29, v32, s[30:31]
+; GFX9-NEXT: v_cndmask_b32_e64 v31, v31, v32, s[34:35]
+; GFX9-NEXT: v_cndmask_b32_e64 v30, v30, 0, s[34:35]
+; GFX9-NEXT: v_readlane_b32 s35, v34, 3
+; GFX9-NEXT: v_readlane_b32 s34, v34, 2
+; GFX9-NEXT: v_readlane_b32 s31, v34, 1
+; GFX9-NEXT: v_readlane_b32 s30, v34, 0
+; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload
+; GFX9-NEXT: s_mov_b64 exec, s[4:5]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_cmp_gt_f64_e32 vcc, v[30:31], v[32:33]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[4:5], v[30:31], v[32:33]
-; GFX9-NEXT: v_cndmask_b32_e64 v35, v39, v36, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v26, v34, v26, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v34, v50, v28, s[8:9]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[6:7], v[28:29], 64
-; GFX9-NEXT: v_cndmask_b32_e64 v34, 0, v34, s[10:11]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[8:9], v[50:51], 64
-; GFX9-NEXT: v_cndmask_b32_e32 v36, v33, v31, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v37, v39, v36, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e32 v36, v32, v30, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[30:31], 64
-; GFX9-NEXT: v_cndmask_b32_e64 v36, 0, v36, s[4:5]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[32:33], 64
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[10:11], 0, v[34:35]
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[36:37]
-; GFX9-NEXT: v_cndmask_b32_e64 v28, v34, v28, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v29, v35, v29, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v28, v28, v50, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e32 v30, v36, v30, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v31, v37, v31, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v30, v30, v32, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v29, v29, v51, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v31, v31, v33, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v28, v34, v28, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v29, v35, v29, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v30, v36, v30, s[12:13]
-; GFX9-NEXT: v_cndmask_b32_e64 v31, v37, v31, s[12:13]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_maximum_v16f64:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_accvgpr_write_b32 a0, v40 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a1, v41 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a2, v42 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a3, v43 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a4, v44 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a5, v45 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a6, v46 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a7, v47 ; Reload Reuse
-; GFX940-NEXT: scratch_load_dword v41, off, s32 offset:8
-; GFX940-NEXT: scratch_load_dword v40, off, s32 offset:4
-; GFX940-NEXT: scratch_load_dword v51, off, s32 offset:16
-; GFX940-NEXT: scratch_load_dword v50, off, s32 offset:12
-; GFX940-NEXT: scratch_load_dword v45, off, s32 offset:24
-; GFX940-NEXT: scratch_load_dword v44, off, s32 offset:20
-; GFX940-NEXT: scratch_load_dword v47, off, s32 offset:32
-; GFX940-NEXT: scratch_load_dword v46, off, s32 offset:28
+; GFX940-NEXT: v_accvgpr_write_b32 a1, v40 ; Reload Reuse
+; GFX940-NEXT: v_accvgpr_write_b32 a2, v41 ; Reload Reuse
+; GFX940-NEXT: v_accvgpr_write_b32 a3, v42 ; Reload Reuse
+; GFX940-NEXT: v_accvgpr_write_b32 a4, v43 ; Reload Reuse
+; GFX940-NEXT: v_accvgpr_write_b32 a5, v44 ; Reload Reuse
+; GFX940-NEXT: v_accvgpr_write_b32 a6, v45 ; Reload Reuse
+; GFX940-NEXT: v_accvgpr_write_b32 a7, v46 ; Reload Reuse
+; GFX940-NEXT: v_accvgpr_write_b32 a8, v47 ; Reload Reuse
+; GFX940-NEXT: v_accvgpr_write_b32 a9, v56 ; Reload Reuse
+; GFX940-NEXT: v_accvgpr_write_b32 a10, v57 ; Reload Reuse
+; GFX940-NEXT: scratch_load_dword v37, off, s32 offset:16
+; GFX940-NEXT: scratch_load_dword v36, off, s32 offset:12
+; GFX940-NEXT: scratch_load_dword v39, off, s32 offset:24
+; GFX940-NEXT: scratch_load_dword v38, off, s32 offset:20
+; GFX940-NEXT: scratch_load_dword v49, off, s32 offset:32
+; GFX940-NEXT: scratch_load_dword v48, off, s32 offset:28
+; GFX940-NEXT: scratch_load_dword v57, off, s32 offset:8
+; GFX940-NEXT: scratch_load_dword v56, off, s32 offset:4
+; GFX940-NEXT: scratch_load_dword v47, off, s32 offset:40
+; GFX940-NEXT: scratch_load_dword v46, off, s32 offset:36
+; GFX940-NEXT: scratch_load_dword v45, off, s32 offset:48
+; GFX940-NEXT: scratch_load_dword v44, off, s32 offset:44
+; GFX940-NEXT: scratch_load_dword v43, off, s32 offset:56
+; GFX940-NEXT: scratch_load_dword v42, off, s32 offset:52
+; GFX940-NEXT: scratch_load_dword v41, off, s32 offset:64
+; GFX940-NEXT: scratch_load_dword v40, off, s32 offset:60
+; GFX940-NEXT: scratch_load_dword v55, off, s32 offset:72
+; GFX940-NEXT: scratch_load_dword v54, off, s32 offset:68
+; GFX940-NEXT: scratch_load_dword v53, off, s32 offset:80
+; GFX940-NEXT: scratch_load_dword v52, off, s32 offset:76
+; GFX940-NEXT: scratch_load_dword v51, off, s32 offset:88
+; GFX940-NEXT: scratch_load_dword v50, off, s32 offset:84
+; GFX940-NEXT: scratch_load_dword v35, off, s32 offset:96
+; GFX940-NEXT: scratch_load_dword v34, off, s32 offset:92
; GFX940-NEXT: scratch_load_dword v31, off, s32
-; GFX940-NEXT: scratch_load_dword v33, off, s32 offset:128
-; GFX940-NEXT: scratch_load_dword v32, off, s32 offset:124
-; GFX940-NEXT: scratch_load_dword v35, off, s32 offset:120
-; GFX940-NEXT: scratch_load_dword v34, off, s32 offset:116
-; GFX940-NEXT: scratch_load_dword v43, off, s32 offset:40
-; GFX940-NEXT: scratch_load_dword v42, off, s32 offset:36
+; GFX940-NEXT: scratch_load_dword v33, off, s32 offset:104
+; GFX940-NEXT: scratch_load_dword v32, off, s32 offset:100
+; GFX940-NEXT: v_accvgpr_write_b32 a11, v58 ; Reload Reuse
+; GFX940-NEXT: v_accvgpr_write_b32 a12, v59 ; Reload Reuse
+; GFX940-NEXT: v_accvgpr_write_b32 a13, v60 ; Reload Reuse
+; GFX940-NEXT: v_accvgpr_write_b32 a14, v61 ; Reload Reuse
+; GFX940-NEXT: v_accvgpr_write_b32 a15, v62 ; Reload Reuse
+; GFX940-NEXT: v_accvgpr_write_b32 a16, v63 ; Reload Reuse
+; GFX940-NEXT: s_waitcnt vmcnt(25)
+; GFX940-NEXT: v_max_f64 v[58:59], v[2:3], v[36:37]
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[36:37]
; GFX940-NEXT: scratch_load_dword v37, off, s32 offset:112
; GFX940-NEXT: scratch_load_dword v36, off, s32 offset:108
-; GFX940-NEXT: scratch_load_dword v39, off, s32 offset:104
-; GFX940-NEXT: scratch_load_dword v38, off, s32 offset:100
-; GFX940-NEXT: scratch_load_dword v49, off, s32 offset:96
-; GFX940-NEXT: scratch_load_dword v48, off, s32 offset:92
-; GFX940-NEXT: scratch_load_dword v53, off, s32 offset:56
-; GFX940-NEXT: scratch_load_dword v52, off, s32 offset:52
-; GFX940-NEXT: scratch_load_dword v55, off, s32 offset:48
-; GFX940-NEXT: scratch_load_dword v54, off, s32 offset:44
-; GFX940-NEXT: v_accvgpr_write_b32 a8, v56 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a9, v57 ; Reload Reuse
-; GFX940-NEXT: v_mov_b32_e32 v56, 0x7ff80000
-; GFX940-NEXT: v_accvgpr_write_b32 a11, v59 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a10, v58 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a12, v60 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a13, v61 ; Reload Reuse
+; GFX940-NEXT: s_waitcnt vmcnt(25)
+; GFX940-NEXT: v_max_f64 v[60:61], v[4:5], v[38:39]
+; GFX940-NEXT: v_cmp_u_f64_e64 s[0:1], v[4:5], v[38:39]
+; GFX940-NEXT: scratch_load_dword v39, off, s32 offset:120
+; GFX940-NEXT: scratch_load_dword v38, off, s32 offset:116
+; GFX940-NEXT: s_waitcnt vmcnt(25)
+; GFX940-NEXT: v_max_f64 v[62:63], v[6:7], v[48:49]
+; GFX940-NEXT: v_cmp_u_f64_e64 s[2:3], v[6:7], v[48:49]
+; GFX940-NEXT: scratch_load_dword v49, off, s32 offset:128
+; GFX940-NEXT: scratch_load_dword v48, off, s32 offset:124
+; GFX940-NEXT: s_waitcnt vmcnt(25)
+; GFX940-NEXT: v_max_f64 v[2:3], v[0:1], v[56:57]
+; GFX940-NEXT: v_cmp_u_f64_e64 s[4:5], v[0:1], v[56:57]
+; GFX940-NEXT: v_mov_b32_e32 v0, 0x7ff80000
; GFX940-NEXT: s_waitcnt vmcnt(23)
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[40:41]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v57, v41, v1, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[0:1], v[40:41]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e64 v59, v56, v57, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v57, v40, v0, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v58, 0, v57, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 64
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[40:41], 64
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v58, v0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v59, v1, vcc
+; GFX940-NEXT: v_max_f64 v[56:57], v[8:9], v[46:47]
+; GFX940-NEXT: v_cndmask_b32_e64 v1, v2, 0, s[4:5]
+; GFX940-NEXT: v_accvgpr_write_b32 a0, v1
+; GFX940-NEXT: v_cndmask_b32_e64 v1, v3, v0, s[4:5]
+; GFX940-NEXT: v_cndmask_b32_e64 v2, v58, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v3, v59, v0, vcc
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[8:9], v[46:47]
; GFX940-NEXT: s_waitcnt vmcnt(21)
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[2:3], v[50:51]
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v0, v40, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v1, v1, v41, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v57, v51, v3, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[2:3], v[50:51]
-; GFX940-NEXT: v_cndmask_b32_e32 v40, v50, v2, vcc
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[2:3], 64
-; GFX940-NEXT: v_cndmask_b32_e64 v61, v56, v57, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v60, 0, v40, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v60, v2, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v61, v3, vcc
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[50:51], 64
-; GFX940-NEXT: scratch_load_dword v41, off, s32 offset:64
-; GFX940-NEXT: scratch_load_dword v40, off, s32 offset:60
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v2, v50, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v3, v51, vcc
-; GFX940-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[58:59]
-; GFX940-NEXT: s_waitcnt vmcnt(21)
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[4:5], v[44:45]
-; GFX940-NEXT: scratch_load_dword v51, off, s32 offset:88
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v58, v0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v59, v1, vcc
-; GFX940-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[60:61]
+; GFX940-NEXT: v_max_f64 v[46:47], v[10:11], v[44:45]
+; GFX940-NEXT: v_cndmask_b32_e64 v4, v60, 0, s[0:1]
+; GFX940-NEXT: v_cndmask_b32_e64 v8, v56, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v9, v57, v0, vcc
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[10:11], v[44:45]
+; GFX940-NEXT: s_waitcnt vmcnt(19)
+; GFX940-NEXT: v_max_f64 v[44:45], v[12:13], v[42:43]
+; GFX940-NEXT: v_cndmask_b32_e64 v5, v61, v0, s[0:1]
+; GFX940-NEXT: v_cndmask_b32_e64 v10, v46, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v11, v47, v0, vcc
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[12:13], v[42:43]
+; GFX940-NEXT: s_waitcnt vmcnt(17)
+; GFX940-NEXT: v_max_f64 v[42:43], v[14:15], v[40:41]
+; GFX940-NEXT: v_cndmask_b32_e64 v6, v62, 0, s[2:3]
+; GFX940-NEXT: v_cndmask_b32_e64 v12, v44, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v13, v45, v0, vcc
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[14:15], v[40:41]
+; GFX940-NEXT: s_waitcnt vmcnt(15)
+; GFX940-NEXT: v_max_f64 v[40:41], v[16:17], v[54:55]
+; GFX940-NEXT: v_cndmask_b32_e64 v7, v63, v0, s[2:3]
+; GFX940-NEXT: v_cndmask_b32_e64 v14, v42, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v15, v43, v0, vcc
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[16:17], v[54:55]
+; GFX940-NEXT: s_waitcnt vmcnt(13)
+; GFX940-NEXT: v_max_f64 v[54:55], v[18:19], v[52:53]
+; GFX940-NEXT: v_accvgpr_read_b32 v63, a16 ; Reload Reuse
+; GFX940-NEXT: v_cndmask_b32_e64 v16, v40, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v17, v41, v0, vcc
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[18:19], v[52:53]
+; GFX940-NEXT: s_waitcnt vmcnt(11)
+; GFX940-NEXT: v_max_f64 v[52:53], v[20:21], v[50:51]
+; GFX940-NEXT: v_accvgpr_read_b32 v62, a15 ; Reload Reuse
+; GFX940-NEXT: v_cndmask_b32_e64 v18, v54, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v19, v55, v0, vcc
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[20:21], v[50:51]
+; GFX940-NEXT: s_waitcnt vmcnt(9)
+; GFX940-NEXT: v_max_f64 v[50:51], v[22:23], v[34:35]
+; GFX940-NEXT: v_accvgpr_read_b32 v61, a14 ; Reload Reuse
+; GFX940-NEXT: v_cndmask_b32_e64 v20, v52, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v21, v53, v0, vcc
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[22:23], v[34:35]
+; GFX940-NEXT: s_waitcnt vmcnt(6)
+; GFX940-NEXT: v_max_f64 v[34:35], v[24:25], v[32:33]
+; GFX940-NEXT: v_accvgpr_read_b32 v60, a13 ; Reload Reuse
+; GFX940-NEXT: v_cndmask_b32_e64 v22, v50, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v23, v51, v0, vcc
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[24:25], v[32:33]
+; GFX940-NEXT: v_accvgpr_read_b32 v59, a12 ; Reload Reuse
+; GFX940-NEXT: v_accvgpr_read_b32 v58, a11 ; Reload Reuse
+; GFX940-NEXT: v_cndmask_b32_e64 v24, v34, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v25, v35, v0, vcc
+; GFX940-NEXT: v_accvgpr_read_b32 v57, a10 ; Reload Reuse
+; GFX940-NEXT: v_accvgpr_read_b32 v56, a9 ; Reload Reuse
+; GFX940-NEXT: v_accvgpr_read_b32 v47, a8 ; Reload Reuse
+; GFX940-NEXT: v_accvgpr_read_b32 v46, a7 ; Reload Reuse
+; GFX940-NEXT: v_accvgpr_read_b32 v45, a6 ; Reload Reuse
+; GFX940-NEXT: v_accvgpr_read_b32 v44, a5 ; Reload Reuse
+; GFX940-NEXT: v_accvgpr_read_b32 v43, a4 ; Reload Reuse
+; GFX940-NEXT: v_accvgpr_read_b32 v42, a3 ; Reload Reuse
+; GFX940-NEXT: v_accvgpr_read_b32 v41, a2 ; Reload Reuse
+; GFX940-NEXT: v_accvgpr_read_b32 v40, a1 ; Reload Reuse
+; GFX940-NEXT: s_waitcnt vmcnt(4)
+; GFX940-NEXT: v_max_f64 v[32:33], v[26:27], v[36:37]
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[26:27], v[36:37]
; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v60, v2, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v61, v3, vcc
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[4:5], v[44:45]
-; GFX940-NEXT: v_accvgpr_read_b32 v61, a13 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_read_b32 v60, a12 ; Reload Reuse
-; GFX940-NEXT: v_cndmask_b32_e32 v50, v45, v5, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v59, v56, v50, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v50, v44, v4, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v58, 0, v50, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[4:5], 64
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[44:45], 64
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v58, v4, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v59, v5, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v4, v4, v44, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v5, v5, v45, s[0:1]
-; GFX940-NEXT: scratch_load_dword v45, off, s32 offset:72
-; GFX940-NEXT: scratch_load_dword v44, off, s32 offset:68
-; GFX940-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[58:59]
-; GFX940-NEXT: s_waitcnt vmcnt(22)
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[6:7], v[46:47]
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v58, v4, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v59, v5, vcc
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[6:7], v[46:47]
+; GFX940-NEXT: v_cndmask_b32_e64 v26, v32, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v27, v33, v0, vcc
+; GFX940-NEXT: s_waitcnt vmcnt(2)
+; GFX940-NEXT: v_max_f64 v[32:33], v[28:29], v[38:39]
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[28:29], v[38:39]
; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v50, v47, v7, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v59, v56, v50, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v50, v46, v6, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v58, 0, v50, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[6:7], 64
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[46:47], 64
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v58, v6, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v7, v59, v7, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v6, v6, v46, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v7, v7, v47, s[0:1]
-; GFX940-NEXT: scratch_load_dword v47, off, s32 offset:80
-; GFX940-NEXT: scratch_load_dword v46, off, s32 offset:76
-; GFX940-NEXT: scratch_load_dword v50, off, s32 offset:84
-; GFX940-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[58:59]
-; GFX940-NEXT: s_waitcnt vmcnt(18)
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[8:9], v[42:43]
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v58, v6, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v7, v59, v7, vcc
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[8:9], v[42:43]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v57, v43, v9, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v59, v56, v57, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v57, v42, v8, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v58, 0, v57, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[8:9], 64
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[42:43], 64
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[58:59]
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v58, v8, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v9, v59, v9, vcc
-; GFX940-NEXT: s_waitcnt vmcnt(8)
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[10:11], v[54:55]
-; GFX940-NEXT: v_cndmask_b32_e64 v8, v8, v42, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v9, v9, v43, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v42, v55, v11, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[10:11], v[54:55]
-; GFX940-NEXT: v_cndmask_b32_e64 v8, v58, v8, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v9, v59, v9, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v43, v56, v42, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v42, v54, v10, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v42, 0, v42, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[10:11], 64
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[54:55], 64
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[42:43]
-; GFX940-NEXT: v_cndmask_b32_e32 v10, v42, v10, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v11, v43, v11, vcc
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[12:13], v[52:53]
-; GFX940-NEXT: v_cndmask_b32_e64 v10, v10, v54, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v11, v11, v55, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v54, v53, v13, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[12:13], v[52:53]
-; GFX940-NEXT: v_cndmask_b32_e64 v10, v42, v10, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v11, v43, v11, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v55, v56, v54, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v54, v52, v12, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v54, 0, v54, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[12:13], 64
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[52:53], 64
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[54:55]
-; GFX940-NEXT: v_cndmask_b32_e32 v12, v54, v12, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v13, v55, v13, vcc
-; GFX940-NEXT: s_waitcnt vmcnt(6)
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[14:15], v[40:41]
-; GFX940-NEXT: v_cndmask_b32_e64 v12, v12, v52, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v13, v13, v53, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v52, v41, v15, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[14:15], v[40:41]
-; GFX940-NEXT: v_cndmask_b32_e64 v12, v54, v12, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v13, v55, v13, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v53, v56, v52, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v52, v40, v14, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v52, 0, v52, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[14:15], 64
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[40:41], 64
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[52:53]
-; GFX940-NEXT: v_cndmask_b32_e32 v14, v52, v14, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v14, v14, v40, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v15, v53, v15, vcc
-; GFX940-NEXT: s_waitcnt vmcnt(3)
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[16:17], v[44:45]
-; GFX940-NEXT: v_cndmask_b32_e64 v14, v52, v14, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v15, v15, v41, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v52, v45, v17, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[16:17], v[44:45]
-; GFX940-NEXT: v_cndmask_b32_e64 v15, v53, v15, s[2:3]
-; GFX940-NEXT: v_accvgpr_read_b32 v59, a11 ; Reload Reuse
-; GFX940-NEXT: v_cndmask_b32_e64 v53, v56, v52, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v52, v44, v16, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v52, 0, v52, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[16:17], 64
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[44:45], 64
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[52:53]
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v52, v16, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v16, v16, v44, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v17, v53, v17, vcc
-; GFX940-NEXT: s_waitcnt vmcnt(1)
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[18:19], v[46:47]
-; GFX940-NEXT: v_cndmask_b32_e64 v16, v52, v16, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v17, v17, v45, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v52, v47, v19, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[18:19], v[46:47]
-; GFX940-NEXT: v_cndmask_b32_e64 v17, v53, v17, s[2:3]
-; GFX940-NEXT: v_accvgpr_read_b32 v58, a10 ; Reload Reuse
-; GFX940-NEXT: v_cndmask_b32_e64 v53, v56, v52, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v52, v46, v18, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v52, 0, v52, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[18:19], 64
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[46:47], 64
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[52:53]
-; GFX940-NEXT: v_cndmask_b32_e32 v18, v52, v18, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v18, v18, v46, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v19, v53, v19, vcc
+; GFX940-NEXT: v_cndmask_b32_e64 v28, v32, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v29, v33, v0, vcc
; GFX940-NEXT: s_waitcnt vmcnt(0)
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[20:21], v[50:51]
-; GFX940-NEXT: v_cndmask_b32_e64 v18, v52, v18, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v19, v19, v47, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v52, v51, v21, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[20:21], v[50:51]
-; GFX940-NEXT: v_cndmask_b32_e64 v19, v53, v19, s[2:3]
-; GFX940-NEXT: v_accvgpr_read_b32 v57, a9 ; Reload Reuse
-; GFX940-NEXT: v_cndmask_b32_e64 v53, v56, v52, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v52, v50, v20, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v52, 0, v52, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[20:21], 64
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[50:51], 64
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[52:53]
-; GFX940-NEXT: v_cndmask_b32_e32 v20, v52, v20, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v21, v53, v21, vcc
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[22:23], v[48:49]
-; GFX940-NEXT: v_cndmask_b32_e64 v20, v20, v50, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v21, v21, v51, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v50, v49, v23, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[22:23], v[48:49]
-; GFX940-NEXT: v_cndmask_b32_e64 v20, v52, v20, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v21, v53, v21, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v51, v56, v50, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v50, v48, v22, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v50, 0, v50, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[22:23], 64
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[48:49], 64
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[50:51]
-; GFX940-NEXT: v_cndmask_b32_e32 v22, v50, v22, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v23, v51, v23, vcc
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[24:25], v[38:39]
-; GFX940-NEXT: v_cndmask_b32_e64 v22, v22, v48, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v23, v23, v49, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v48, v39, v25, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[24:25], v[38:39]
-; GFX940-NEXT: v_cndmask_b32_e64 v22, v50, v22, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v23, v51, v23, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v49, v56, v48, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v48, v38, v24, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v48, 0, v48, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[24:25], 64
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[38:39], 64
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[48:49]
-; GFX940-NEXT: v_cndmask_b32_e32 v24, v48, v24, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v25, v49, v25, vcc
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[26:27], v[36:37]
-; GFX940-NEXT: v_cndmask_b32_e64 v24, v24, v38, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v25, v25, v39, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v38, v37, v27, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[26:27], v[36:37]
-; GFX940-NEXT: v_cndmask_b32_e64 v24, v48, v24, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v25, v49, v25, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v39, v56, v38, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v38, v36, v26, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v38, 0, v38, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[26:27], 64
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[36:37], 64
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[38:39]
-; GFX940-NEXT: v_cndmask_b32_e32 v26, v38, v26, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v27, v39, v27, vcc
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[28:29], v[34:35]
-; GFX940-NEXT: v_cndmask_b32_e64 v26, v26, v36, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v27, v27, v37, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v36, v35, v29, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[28:29], v[34:35]
-; GFX940-NEXT: v_cndmask_b32_e64 v26, v38, v26, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v27, v39, v27, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v37, v56, v36, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v36, v34, v28, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v36, 0, v36, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[28:29], 64
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[34:35], 64
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[36:37]
-; GFX940-NEXT: v_cndmask_b32_e32 v28, v36, v28, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v29, v37, v29, vcc
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[30:31], v[32:33]
-; GFX940-NEXT: v_cndmask_b32_e64 v28, v28, v34, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v29, v29, v35, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v34, v33, v31, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[30:31], v[32:33]
-; GFX940-NEXT: v_cndmask_b32_e64 v28, v36, v28, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v29, v37, v29, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v35, v56, v34, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v34, v32, v30, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v34, 0, v34, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[30:31], 64
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[32:33], 64
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[34:35]
-; GFX940-NEXT: v_cndmask_b32_e32 v30, v34, v30, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v31, v35, v31, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v30, v30, v32, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v31, v31, v33, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v30, v34, v30, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v31, v35, v31, s[2:3]
-; GFX940-NEXT: v_accvgpr_read_b32 v56, a8 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_read_b32 v47, a7 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_read_b32 v46, a6 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_read_b32 v45, a5 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_read_b32 v44, a4 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_read_b32 v43, a3 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_read_b32 v42, a2 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_read_b32 v41, a1 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_read_b32 v40, a0 ; Reload Reuse
+; GFX940-NEXT: v_max_f64 v[32:33], v[30:31], v[48:49]
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[30:31], v[48:49]
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e64 v30, v32, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v31, v33, v0, vcc
+; GFX940-NEXT: v_accvgpr_read_b32 v0, a0
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v16f64:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_clause 0x20
+; GFX10-NEXT: s_clause 0x19
+; GFX10-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:16
+; GFX10-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:12
+; GFX10-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:24
+; GFX10-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:20
+; GFX10-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:32
+; GFX10-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:28
+; GFX10-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:36
+; GFX10-NEXT: buffer_load_dword v48, off, s[0:3], s32 offset:68
+; GFX10-NEXT: buffer_load_dword v51, off, s[0:3], s32 offset:64
+; GFX10-NEXT: buffer_load_dword v50, off, s[0:3], s32 offset:60
+; GFX10-NEXT: buffer_load_dword v53, off, s[0:3], s32 offset:56
+; GFX10-NEXT: buffer_load_dword v52, off, s[0:3], s32 offset:52
+; GFX10-NEXT: buffer_load_dword v55, off, s[0:3], s32 offset:48
+; GFX10-NEXT: buffer_load_dword v54, off, s[0:3], s32 offset:44
+; GFX10-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:40
; GFX10-NEXT: buffer_load_dword v65, off, s[0:3], s32 offset:8
; GFX10-NEXT: buffer_load_dword v64, off, s[0:3], s32 offset:4
-; GFX10-NEXT: buffer_load_dword v55, off, s[0:3], s32 offset:16
-; GFX10-NEXT: buffer_load_dword v54, off, s[0:3], s32 offset:12
-; GFX10-NEXT: buffer_load_dword v53, off, s[0:3], s32 offset:24
-; GFX10-NEXT: buffer_load_dword v52, off, s[0:3], s32 offset:20
-; GFX10-NEXT: buffer_load_dword v51, off, s[0:3], s32 offset:32
-; GFX10-NEXT: buffer_load_dword v50, off, s[0:3], s32 offset:28
-; GFX10-NEXT: buffer_load_dword v48, off, s[0:3], s32 offset:36
-; GFX10-NEXT: buffer_load_dword v49, off, s[0:3], s32 offset:40
-; GFX10-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:56
-; GFX10-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:52
-; GFX10-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:48
-; GFX10-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:44
-; GFX10-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:64
-; GFX10-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:60
-; GFX10-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:68
-; GFX10-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:72
-; GFX10-NEXT: buffer_load_dword v83, off, s[0:3], s32 offset:80
-; GFX10-NEXT: buffer_load_dword v82, off, s[0:3], s32 offset:76
-; GFX10-NEXT: buffer_load_dword v67, off, s[0:3], s32 offset:88
-; GFX10-NEXT: buffer_load_dword v66, off, s[0:3], s32 offset:84
+; GFX10-NEXT: buffer_load_dword v66, off, s[0:3], s32 offset:100
; GFX10-NEXT: buffer_load_dword v69, off, s[0:3], s32 offset:96
; GFX10-NEXT: buffer_load_dword v68, off, s[0:3], s32 offset:92
-; GFX10-NEXT: buffer_load_dword v70, off, s[0:3], s32 offset:100
-; GFX10-NEXT: buffer_load_dword v71, off, s[0:3], s32 offset:104
-; GFX10-NEXT: buffer_load_dword v81, off, s[0:3], s32 offset:112
-; GFX10-NEXT: buffer_load_dword v80, off, s[0:3], s32 offset:108
-; GFX10-NEXT: buffer_load_dword v85, off, s[0:3], s32 offset:120
-; GFX10-NEXT: buffer_load_dword v84, off, s[0:3], s32 offset:116
+; GFX10-NEXT: buffer_load_dword v71, off, s[0:3], s32 offset:88
+; GFX10-NEXT: buffer_load_dword v70, off, s[0:3], s32 offset:84
+; GFX10-NEXT: buffer_load_dword v81, off, s[0:3], s32 offset:80
+; GFX10-NEXT: buffer_load_dword v80, off, s[0:3], s32 offset:76
+; GFX10-NEXT: buffer_load_dword v49, off, s[0:3], s32 offset:72
+; GFX10-NEXT: buffer_load_dword v67, off, s[0:3], s32 offset:104
+; GFX10-NEXT: s_waitcnt vmcnt(24)
+; GFX10-NEXT: v_max_f64 v[82:83], v[2:3], v[31:32]
+; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[2:3], v[31:32]
+; GFX10-NEXT: s_waitcnt vmcnt(22)
+; GFX10-NEXT: v_max_f64 v[84:85], v[4:5], v[33:34]
+; GFX10-NEXT: v_cmp_u_f64_e64 s4, v[4:5], v[33:34]
+; GFX10-NEXT: s_clause 0x3
+; GFX10-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:120
+; GFX10-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:116
+; GFX10-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:112
+; GFX10-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:108
+; GFX10-NEXT: s_waitcnt vmcnt(24)
+; GFX10-NEXT: v_max_f64 v[32:33], v[6:7], v[35:36]
+; GFX10-NEXT: v_cmp_u_f64_e64 s5, v[6:7], v[35:36]
+; GFX10-NEXT: s_clause 0x2
; GFX10-NEXT: buffer_load_dword v31, off, s[0:3], s32
-; GFX10-NEXT: buffer_load_dword v87, off, s[0:3], s32 offset:128
-; GFX10-NEXT: buffer_load_dword v86, off, s[0:3], s32 offset:124
-; GFX10-NEXT: v_cmp_class_f64_e64 s10, v[0:1], 64
-; GFX10-NEXT: v_cmp_class_f64_e64 s12, v[2:3], 64
-; GFX10-NEXT: v_cmp_class_f64_e64 s17, v[10:11], 64
-; GFX10-NEXT: s_waitcnt vmcnt(31)
-; GFX10-NEXT: v_cmp_gt_f64_e32 vcc_lo, v[0:1], v[64:65]
-; GFX10-NEXT: v_cmp_o_f64_e64 s4, v[0:1], v[64:65]
-; GFX10-NEXT: s_waitcnt vmcnt(29)
-; GFX10-NEXT: v_cmp_gt_f64_e64 s5, v[2:3], v[54:55]
-; GFX10-NEXT: v_cmp_o_f64_e64 s6, v[2:3], v[54:55]
-; GFX10-NEXT: s_waitcnt vmcnt(27)
-; GFX10-NEXT: v_cmp_gt_f64_e64 s7, v[4:5], v[52:53]
-; GFX10-NEXT: v_cmp_o_f64_e64 s8, v[4:5], v[52:53]
-; GFX10-NEXT: s_waitcnt vmcnt(25)
-; GFX10-NEXT: v_cmp_gt_f64_e64 s9, v[6:7], v[50:51]
-; GFX10-NEXT: v_cmp_o_f64_e64 s11, v[6:7], v[50:51]
+; GFX10-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:128
+; GFX10-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:124
; GFX10-NEXT: s_waitcnt vmcnt(23)
-; GFX10-NEXT: v_cmp_gt_f64_e64 s13, v[8:9], v[48:49]
-; GFX10-NEXT: v_cmp_class_f64_e64 s14, v[64:65], 64
+; GFX10-NEXT: v_cmp_u_f64_e64 s10, v[14:15], v[50:51]
; GFX10-NEXT: s_waitcnt vmcnt(21)
-; GFX10-NEXT: v_cmp_gt_f64_e64 s15, v[12:13], v[36:37]
-; GFX10-NEXT: s_waitcnt vmcnt(17)
-; GFX10-NEXT: v_cmp_o_f64_e64 s16, v[14:15], v[34:35]
-; GFX10-NEXT: v_cndmask_b32_e32 v96, v64, v0, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v97, v54, v2, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v99, v55, v3, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v100, v52, v4, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v96, 0, v96, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v101, v50, v6, s9
-; GFX10-NEXT: v_cndmask_b32_e64 v98, 0, v97, s6
-; GFX10-NEXT: v_cndmask_b32_e32 v97, v65, v1, vcc_lo
-; GFX10-NEXT: v_cmp_class_f64_e64 vcc_lo, v[54:55], 64
-; GFX10-NEXT: v_cndmask_b32_e64 v0, v96, v0, s10
-; GFX10-NEXT: v_cndmask_b32_e64 v99, 0x7ff80000, v99, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v98, v2, s12
-; GFX10-NEXT: v_cndmask_b32_e64 v97, 0x7ff80000, v97, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v100, 0, v100, s8
-; GFX10-NEXT: v_cndmask_b32_e64 v102, 0, v101, s11
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v99, v3, s12
-; GFX10-NEXT: v_cmp_class_f64_e64 s12, v[6:7], 64
-; GFX10-NEXT: v_cndmask_b32_e64 v1, v97, v1, s10
-; GFX10-NEXT: v_cmp_class_f64_e64 s10, v[4:5], 64
-; GFX10-NEXT: v_cmp_gt_f64_e64 s4, v[10:11], v[38:39]
-; GFX10-NEXT: v_cndmask_b32_e64 v112, v48, v8, s13
-; GFX10-NEXT: v_cmp_o_f64_e64 s5, v[12:13], v[36:37]
-; GFX10-NEXT: v_cmp_gt_f64_e64 s6, v[14:15], v[34:35]
-; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v64, s14
-; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, v65, s14
-; GFX10-NEXT: v_cmp_class_f64_e64 s14, v[52:53], 64
-; GFX10-NEXT: v_cndmask_b32_e64 v113, v36, v12, s15
+; GFX10-NEXT: v_cmp_u_f64_e64 s9, v[12:13], v[52:53]
+; GFX10-NEXT: s_waitcnt vmcnt(19)
+; GFX10-NEXT: v_cmp_u_f64_e64 s7, v[10:11], v[54:55]
+; GFX10-NEXT: s_waitcnt vmcnt(18)
+; GFX10-NEXT: v_max_f64 v[34:35], v[8:9], v[37:38]
+; GFX10-NEXT: v_cmp_u_f64_e64 s6, v[8:9], v[37:38]
+; GFX10-NEXT: s_waitcnt vmcnt(16)
+; GFX10-NEXT: v_max_f64 v[8:9], v[0:1], v[64:65]
+; GFX10-NEXT: v_max_f64 v[36:37], v[10:11], v[54:55]
+; GFX10-NEXT: v_cmp_u_f64_e64 s8, v[0:1], v[64:65]
+; GFX10-NEXT: v_max_f64 v[38:39], v[12:13], v[52:53]
+; GFX10-NEXT: v_max_f64 v[52:53], v[14:15], v[50:51]
+; GFX10-NEXT: s_waitcnt vmcnt(11)
+; GFX10-NEXT: v_max_f64 v[54:55], v[20:21], v[70:71]
+; GFX10-NEXT: v_cmp_u_f64_e64 s13, v[20:21], v[70:71]
+; GFX10-NEXT: s_waitcnt vmcnt(9)
+; GFX10-NEXT: v_cmp_u_f64_e64 s12, v[18:19], v[80:81]
+; GFX10-NEXT: s_waitcnt vmcnt(8)
+; GFX10-NEXT: v_max_f64 v[50:51], v[16:17], v[48:49]
+; GFX10-NEXT: v_cmp_u_f64_e64 s11, v[16:17], v[48:49]
+; GFX10-NEXT: v_max_f64 v[48:49], v[18:19], v[80:81]
+; GFX10-NEXT: v_max_f64 v[64:65], v[22:23], v[68:69]
+; GFX10-NEXT: v_cmp_u_f64_e64 s14, v[22:23], v[68:69]
+; GFX10-NEXT: s_waitcnt vmcnt(7)
+; GFX10-NEXT: v_max_f64 v[68:69], v[24:25], v[66:67]
+; GFX10-NEXT: v_cmp_u_f64_e64 s15, v[24:25], v[66:67]
+; GFX10-NEXT: v_cndmask_b32_e64 v10, v36, 0, s7
+; GFX10-NEXT: v_cndmask_b32_e64 v0, v8, 0, s8
+; GFX10-NEXT: v_cndmask_b32_e64 v1, v9, 0x7ff80000, s8
+; GFX10-NEXT: v_cndmask_b32_e64 v8, v34, 0, s6
+; GFX10-NEXT: v_cndmask_b32_e64 v9, v35, 0x7ff80000, s6
+; GFX10-NEXT: v_cndmask_b32_e64 v11, v37, 0x7ff80000, s7
+; GFX10-NEXT: v_cndmask_b32_e64 v12, v38, 0, s9
+; GFX10-NEXT: v_cndmask_b32_e64 v13, v39, 0x7ff80000, s9
+; GFX10-NEXT: v_cndmask_b32_e64 v14, v52, 0, s10
+; GFX10-NEXT: v_cndmask_b32_e64 v15, v53, 0x7ff80000, s10
+; GFX10-NEXT: v_cndmask_b32_e64 v16, v50, 0, s11
+; GFX10-NEXT: v_cndmask_b32_e64 v17, v51, 0x7ff80000, s11
+; GFX10-NEXT: v_cndmask_b32_e64 v18, v48, 0, s12
+; GFX10-NEXT: v_cndmask_b32_e64 v19, v49, 0x7ff80000, s12
+; GFX10-NEXT: v_cndmask_b32_e64 v20, v54, 0, s13
+; GFX10-NEXT: v_cndmask_b32_e64 v21, v55, 0x7ff80000, s13
+; GFX10-NEXT: v_cndmask_b32_e64 v22, v64, 0, s14
+; GFX10-NEXT: v_cndmask_b32_e64 v23, v65, 0x7ff80000, s14
+; GFX10-NEXT: v_cndmask_b32_e64 v24, v68, 0, s15
+; GFX10-NEXT: v_cndmask_b32_e64 v25, v69, 0x7ff80000, s15
+; GFX10-NEXT: s_waitcnt vmcnt(5)
+; GFX10-NEXT: v_max_f64 v[70:71], v[28:29], v[2:3]
+; GFX10-NEXT: v_cmp_u_f64_e64 s17, v[28:29], v[2:3]
+; GFX10-NEXT: s_waitcnt vmcnt(3)
+; GFX10-NEXT: v_max_f64 v[66:67], v[26:27], v[4:5]
+; GFX10-NEXT: v_cmp_u_f64_e64 s16, v[26:27], v[4:5]
+; GFX10-NEXT: v_cndmask_b32_e64 v2, v82, 0, vcc_lo
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: v_cmp_o_f64_e64 s18, v[30:31], v[86:87]
-; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v54, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v54, v53, v5, s7
-; GFX10-NEXT: v_cndmask_b32_e32 v3, v3, v55, vcc_lo
-; GFX10-NEXT: v_cmp_class_f64_e64 vcc_lo, v[50:51], 64
-; GFX10-NEXT: v_cndmask_b32_e64 v55, v51, v7, s9
-; GFX10-NEXT: v_cmp_o_f64_e64 s9, v[8:9], v[48:49]
-; GFX10-NEXT: v_cndmask_b32_e64 v101, 0x7ff80000, v54, s8
-; GFX10-NEXT: v_cmp_gt_f64_e64 s7, v[16:17], v[32:33]
-; GFX10-NEXT: v_cndmask_b32_e64 v6, v102, v6, s12
-; GFX10-NEXT: v_cndmask_b32_e64 v103, 0x7ff80000, v55, s11
-; GFX10-NEXT: v_cndmask_b32_e64 v4, v100, v4, s10
-; GFX10-NEXT: v_cndmask_b32_e64 v5, v101, v5, s10
-; GFX10-NEXT: v_cmp_class_f64_e64 s10, v[8:9], 64
-; GFX10-NEXT: v_cmp_o_f64_e64 s11, v[10:11], v[38:39]
-; GFX10-NEXT: v_cndmask_b32_e64 v7, v103, v7, s12
-; GFX10-NEXT: v_cmp_class_f64_e64 s12, v[48:49], 64
-; GFX10-NEXT: v_cndmask_b32_e64 v114, v38, v10, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v115, v34, v14, s6
-; GFX10-NEXT: v_cmp_o_f64_e64 s8, v[16:17], v[32:33]
-; GFX10-NEXT: v_cndmask_b32_e64 v4, v4, v52, s14
-; GFX10-NEXT: v_cndmask_b32_e64 v5, v5, v53, s14
-; GFX10-NEXT: v_cmp_gt_f64_e64 s14, v[18:19], v[82:83]
-; GFX10-NEXT: v_cndmask_b32_e64 v52, 0, v115, s16
-; GFX10-NEXT: v_cndmask_b32_e32 v6, v6, v50, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v50, v49, v9, s13
-; GFX10-NEXT: v_cndmask_b32_e32 v7, v7, v51, vcc_lo
-; GFX10-NEXT: v_cmp_class_f64_e64 vcc_lo, v[38:39], 64
-; GFX10-NEXT: v_cndmask_b32_e64 v54, 0, v112, s9
-; GFX10-NEXT: v_cndmask_b32_e64 v51, v39, v11, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v55, 0x7ff80000, v50, s9
-; GFX10-NEXT: v_cndmask_b32_e64 v50, 0, v113, s5
-; GFX10-NEXT: v_cmp_o_f64_e64 s4, v[18:19], v[82:83]
-; GFX10-NEXT: v_cndmask_b32_e64 v8, v54, v8, s10
-; GFX10-NEXT: v_cndmask_b32_e64 v64, 0, v114, s11
-; GFX10-NEXT: v_cndmask_b32_e64 v9, v55, v9, s10
-; GFX10-NEXT: v_cmp_class_f64_e64 s10, v[12:13], 64
-; GFX10-NEXT: v_cndmask_b32_e64 v65, 0x7ff80000, v51, s11
-; GFX10-NEXT: v_cndmask_b32_e64 v8, v8, v48, s12
-; GFX10-NEXT: v_cndmask_b32_e64 v10, v64, v10, s17
-; GFX10-NEXT: v_cndmask_b32_e64 v9, v9, v49, s12
-; GFX10-NEXT: v_cmp_class_f64_e64 s12, v[14:15], 64
-; GFX10-NEXT: v_cndmask_b32_e64 v11, v65, v11, s17
-; GFX10-NEXT: v_cndmask_b32_e64 v48, v37, v13, s15
-; GFX10-NEXT: v_cmp_class_f64_e64 s17, v[34:35], 64
-; GFX10-NEXT: v_cmp_gt_f64_e64 s9, v[20:21], v[66:67]
-; GFX10-NEXT: v_cmp_o_f64_e64 s11, v[20:21], v[66:67]
-; GFX10-NEXT: v_cndmask_b32_e64 v116, v32, v16, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v51, 0x7ff80000, v48, s5
-; GFX10-NEXT: v_cndmask_b32_e32 v10, v10, v38, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e32 v11, v11, v39, vcc_lo
-; GFX10-NEXT: v_cmp_gt_f64_e32 vcc_lo, v[22:23], v[68:69]
-; GFX10-NEXT: v_cndmask_b32_e64 v38, v35, v15, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v49, v82, v18, s14
-; GFX10-NEXT: v_cndmask_b32_e64 v48, 0, v116, s8
-; GFX10-NEXT: v_cmp_class_f64_e64 s13, v[36:37], 64
-; GFX10-NEXT: v_cmp_o_f64_e64 s5, v[22:23], v[68:69]
-; GFX10-NEXT: v_cndmask_b32_e64 v53, 0x7ff80000, v38, s16
-; GFX10-NEXT: v_cndmask_b32_e64 v12, v50, v12, s10
-; GFX10-NEXT: v_cndmask_b32_e64 v13, v51, v13, s10
-; GFX10-NEXT: v_cmp_class_f64_e64 s10, v[16:17], 64
-; GFX10-NEXT: v_cndmask_b32_e64 v38, 0, v49, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v112, v83, v19, s14
-; GFX10-NEXT: v_cndmask_b32_e64 v14, v52, v14, s12
-; GFX10-NEXT: v_cndmask_b32_e64 v15, v53, v15, s12
-; GFX10-NEXT: v_cmp_class_f64_e64 s12, v[32:33], 64
-; GFX10-NEXT: v_cmp_class_f64_e64 s14, v[18:19], 64
-; GFX10-NEXT: v_cndmask_b32_e64 v114, v67, v21, s9
-; GFX10-NEXT: v_cndmask_b32_e64 v14, v14, v34, s17
-; GFX10-NEXT: v_cndmask_b32_e64 v15, v15, v35, s17
-; GFX10-NEXT: v_cndmask_b32_e64 v34, v33, v17, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v39, 0x7ff80000, v112, s4
-; GFX10-NEXT: v_cmp_gt_f64_e64 s4, v[24:25], v[70:71]
-; GFX10-NEXT: v_cndmask_b32_e32 v113, v69, v23, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e32 v35, v68, v22, vcc_lo
-; GFX10-NEXT: v_cmp_class_f64_e64 vcc_lo, v[20:21], 64
-; GFX10-NEXT: v_cndmask_b32_e64 v49, 0x7ff80000, v34, s8
-; GFX10-NEXT: v_cndmask_b32_e64 v34, 0x7ff80000, v114, s11
-; GFX10-NEXT: v_cndmask_b32_e64 v12, v12, v36, s13
-; GFX10-NEXT: v_cndmask_b32_e64 v13, v13, v37, s13
-; GFX10-NEXT: v_cmp_class_f64_e64 s13, v[82:83], 64
-; GFX10-NEXT: v_cmp_o_f64_e64 s6, v[24:25], v[70:71]
-; GFX10-NEXT: v_cndmask_b32_e64 v16, v48, v16, s10
-; GFX10-NEXT: v_cndmask_b32_e64 v17, v49, v17, s10
-; GFX10-NEXT: v_cndmask_b32_e64 v36, 0x7ff80000, v113, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v35, 0, v35, s5
-; GFX10-NEXT: v_cmp_gt_f64_e64 s7, v[26:27], v[80:81]
-; GFX10-NEXT: v_cndmask_b32_e64 v16, v16, v32, s12
-; GFX10-NEXT: v_cndmask_b32_e64 v32, v66, v20, s9
-; GFX10-NEXT: v_cndmask_b32_e64 v17, v17, v33, s12
-; GFX10-NEXT: v_cndmask_b32_e64 v18, v38, v18, s14
-; GFX10-NEXT: v_cndmask_b32_e64 v19, v39, v19, s14
-; GFX10-NEXT: v_cmp_o_f64_e64 s15, v[26:27], v[80:81]
-; GFX10-NEXT: v_cndmask_b32_e64 v33, 0, v32, s11
-; GFX10-NEXT: v_cndmask_b32_e64 v112, v71, v25, s4
-; GFX10-NEXT: v_cmp_gt_f64_e64 s16, v[28:29], v[84:85]
-; GFX10-NEXT: v_cmp_o_f64_e64 s8, v[28:29], v[84:85]
-; GFX10-NEXT: v_cndmask_b32_e32 v21, v34, v21, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e32 v20, v33, v20, vcc_lo
-; GFX10-NEXT: v_cmp_class_f64_e64 vcc_lo, v[22:23], 64
-; GFX10-NEXT: v_cmp_gt_f64_e64 s17, v[30:31], v[86:87]
-; GFX10-NEXT: v_cmp_class_f64_e64 s5, v[70:71], 64
-; GFX10-NEXT: v_cndmask_b32_e64 v18, v18, v82, s13
-; GFX10-NEXT: v_cndmask_b32_e64 v82, v70, v24, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v19, v19, v83, s13
-; GFX10-NEXT: v_cndmask_b32_e64 v83, 0x7ff80000, v112, s6
-; GFX10-NEXT: v_cmp_class_f64_e64 s4, v[68:69], 64
-; GFX10-NEXT: v_cmp_eq_f64_e64 s9, 0, v[96:97]
-; GFX10-NEXT: v_cndmask_b32_e64 v82, 0, v82, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v37, v81, v27, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v32, v80, v26, s7
-; GFX10-NEXT: v_cmp_class_f64_e64 s6, v[80:81], 64
-; GFX10-NEXT: v_cmp_class_f64_e64 s7, v[84:85], 64
-; GFX10-NEXT: v_cmp_eq_f64_e64 s10, 0, v[98:99]
-; GFX10-NEXT: v_cndmask_b32_e64 v113, 0x7ff80000, v37, s15
-; GFX10-NEXT: v_cndmask_b32_e64 v112, 0, v32, s15
-; GFX10-NEXT: v_cmp_eq_f64_e64 s11, 0, v[100:101]
-; GFX10-NEXT: v_cndmask_b32_e64 v115, v85, v29, s16
-; GFX10-NEXT: v_cndmask_b32_e64 v114, v84, v28, s16
-; GFX10-NEXT: v_cmp_eq_f64_e64 s12, 0, v[102:103]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s13, 0, v[54:55]
-; GFX10-NEXT: v_cndmask_b32_e32 v22, v35, v22, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e32 v23, v36, v23, vcc_lo
-; GFX10-NEXT: v_cmp_class_f64_e64 vcc_lo, v[24:25], 64
-; GFX10-NEXT: v_cndmask_b32_e64 v115, 0x7ff80000, v115, s8
-; GFX10-NEXT: v_cndmask_b32_e64 v114, 0, v114, s8
-; GFX10-NEXT: v_cndmask_b32_e64 v116, v87, v31, s17
-; GFX10-NEXT: v_cndmask_b32_e64 v32, v86, v30, s17
-; GFX10-NEXT: v_cmp_class_f64_e64 s8, v[86:87], 64
-; GFX10-NEXT: v_cmp_eq_f64_e64 s14, 0, v[64:65]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s15, 0, v[50:51]
-; GFX10-NEXT: v_cndmask_b32_e64 v117, 0x7ff80000, v116, s18
-; GFX10-NEXT: v_cndmask_b32_e64 v116, 0, v32, s18
-; GFX10-NEXT: v_cmp_eq_f64_e64 s16, 0, v[52:53]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s17, 0, v[48:49]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s18, 0, v[38:39]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s19, 0, v[33:34]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s20, 0, v[35:36]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s21, 0, v[82:83]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s22, 0, v[112:113]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s23, 0, v[114:115]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s24, 0, v[116:117]
-; GFX10-NEXT: v_cndmask_b32_e64 v22, v22, v68, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v23, v23, v69, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v0, v96, v0, s9
-; GFX10-NEXT: v_cndmask_b32_e32 v24, v82, v24, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e32 v25, v83, v25, vcc_lo
-; GFX10-NEXT: v_cmp_class_f64_e64 vcc_lo, v[26:27], 64
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v98, v2, s10
-; GFX10-NEXT: v_cndmask_b32_e64 v4, v100, v4, s11
-; GFX10-NEXT: v_cndmask_b32_e64 v24, v24, v70, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v25, v25, v71, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v6, v102, v6, s12
-; GFX10-NEXT: v_cndmask_b32_e64 v8, v54, v8, s13
-; GFX10-NEXT: v_cndmask_b32_e64 v10, v64, v10, s14
-; GFX10-NEXT: v_cndmask_b32_e64 v12, v50, v12, s15
-; GFX10-NEXT: v_cndmask_b32_e64 v14, v52, v14, s16
-; GFX10-NEXT: v_cndmask_b32_e64 v16, v48, v16, s17
-; GFX10-NEXT: v_cndmask_b32_e64 v18, v38, v18, s18
-; GFX10-NEXT: v_cndmask_b32_e64 v22, v35, v22, s20
-; GFX10-NEXT: v_cndmask_b32_e64 v24, v82, v24, s21
-; GFX10-NEXT: v_cndmask_b32_e64 v1, v97, v1, s9
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v99, v3, s10
-; GFX10-NEXT: v_cndmask_b32_e64 v5, v101, v5, s11
-; GFX10-NEXT: v_cndmask_b32_e64 v7, v103, v7, s12
-; GFX10-NEXT: v_cndmask_b32_e64 v9, v55, v9, s13
-; GFX10-NEXT: v_cndmask_b32_e64 v11, v65, v11, s14
-; GFX10-NEXT: v_cndmask_b32_e64 v13, v51, v13, s15
-; GFX10-NEXT: v_cndmask_b32_e64 v15, v53, v15, s16
-; GFX10-NEXT: v_cndmask_b32_e32 v26, v112, v26, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e32 v27, v113, v27, vcc_lo
-; GFX10-NEXT: v_cmp_class_f64_e64 vcc_lo, v[28:29], 64
-; GFX10-NEXT: v_cndmask_b32_e64 v17, v49, v17, s17
-; GFX10-NEXT: v_cndmask_b32_e64 v19, v39, v19, s18
-; GFX10-NEXT: v_cndmask_b32_e64 v26, v26, v80, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v27, v27, v81, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v23, v36, v23, s20
-; GFX10-NEXT: v_cndmask_b32_e64 v25, v83, v25, s21
-; GFX10-NEXT: v_cndmask_b32_e64 v26, v112, v26, s22
-; GFX10-NEXT: v_cndmask_b32_e64 v27, v113, v27, s22
-; GFX10-NEXT: v_cndmask_b32_e32 v28, v114, v28, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e32 v29, v115, v29, vcc_lo
-; GFX10-NEXT: v_cmp_class_f64_e64 vcc_lo, v[30:31], 64
-; GFX10-NEXT: v_cndmask_b32_e64 v28, v28, v84, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v29, v29, v85, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v28, v114, v28, s23
-; GFX10-NEXT: v_cndmask_b32_e64 v29, v115, v29, s23
-; GFX10-NEXT: v_cndmask_b32_e32 v30, v116, v30, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e32 v31, v117, v31, vcc_lo
-; GFX10-NEXT: v_cmp_class_f64_e64 vcc_lo, v[66:67], 64
-; GFX10-NEXT: v_cndmask_b32_e64 v30, v30, v86, s8
-; GFX10-NEXT: v_cndmask_b32_e64 v31, v31, v87, s8
-; GFX10-NEXT: v_cndmask_b32_e64 v30, v116, v30, s24
-; GFX10-NEXT: v_cndmask_b32_e64 v31, v117, v31, s24
-; GFX10-NEXT: v_cndmask_b32_e32 v20, v20, v66, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e32 v21, v21, v67, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v20, v33, v20, s19
-; GFX10-NEXT: v_cndmask_b32_e64 v21, v34, v21, s19
+; GFX10-NEXT: v_max_f64 v[80:81], v[30:31], v[6:7]
+; GFX10-NEXT: v_cmp_u_f64_e64 s18, v[30:31], v[6:7]
+; GFX10-NEXT: v_cndmask_b32_e64 v3, v83, 0x7ff80000, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e64 v4, v84, 0, s4
+; GFX10-NEXT: v_cndmask_b32_e64 v5, v85, 0x7ff80000, s4
+; GFX10-NEXT: v_cndmask_b32_e64 v6, v32, 0, s5
+; GFX10-NEXT: v_cndmask_b32_e64 v7, v33, 0x7ff80000, s5
+; GFX10-NEXT: v_cndmask_b32_e64 v28, v70, 0, s17
+; GFX10-NEXT: v_cndmask_b32_e64 v29, v71, 0x7ff80000, s17
+; GFX10-NEXT: v_cndmask_b32_e64 v26, v66, 0, s16
+; GFX10-NEXT: v_cndmask_b32_e64 v27, v67, 0x7ff80000, s16
+; GFX10-NEXT: v_cndmask_b32_e64 v30, v80, 0, s18
+; GFX10-NEXT: v_cndmask_b32_e64 v31, v81, 0x7ff80000, s18
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_maximum_v16f64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_clause 0x1f
-; GFX11-NEXT: scratch_load_b32 v87, off, s32 offset:8
-; GFX11-NEXT: scratch_load_b32 v86, off, s32 offset:4
-; GFX11-NEXT: scratch_load_b32 v85, off, s32 offset:16
-; GFX11-NEXT: scratch_load_b32 v84, off, s32 offset:12
-; GFX11-NEXT: scratch_load_b32 v33, off, s32 offset:24
-; GFX11-NEXT: scratch_load_b32 v32, off, s32 offset:20
-; GFX11-NEXT: scratch_load_b32 v35, off, s32 offset:32
-; GFX11-NEXT: scratch_load_b32 v34, off, s32 offset:28
-; GFX11-NEXT: scratch_load_b32 v37, off, s32 offset:40
-; GFX11-NEXT: scratch_load_b32 v36, off, s32 offset:36
-; GFX11-NEXT: scratch_load_b32 v39, off, s32 offset:48
-; GFX11-NEXT: scratch_load_b32 v38, off, s32 offset:44
-; GFX11-NEXT: scratch_load_b32 v49, off, s32 offset:56
-; GFX11-NEXT: scratch_load_b32 v48, off, s32 offset:52
-; GFX11-NEXT: scratch_load_b32 v51, off, s32 offset:64
-; GFX11-NEXT: scratch_load_b32 v50, off, s32 offset:60
-; GFX11-NEXT: scratch_load_b32 v53, off, s32 offset:72
-; GFX11-NEXT: scratch_load_b32 v52, off, s32 offset:68
-; GFX11-NEXT: scratch_load_b32 v55, off, s32 offset:80
-; GFX11-NEXT: scratch_load_b32 v54, off, s32 offset:76
-; GFX11-NEXT: scratch_load_b32 v65, off, s32 offset:88
-; GFX11-NEXT: scratch_load_b32 v64, off, s32 offset:84
-; GFX11-NEXT: scratch_load_b32 v67, off, s32 offset:96
-; GFX11-NEXT: scratch_load_b32 v66, off, s32 offset:92
-; GFX11-NEXT: scratch_load_b32 v69, off, s32 offset:104
-; GFX11-NEXT: scratch_load_b32 v68, off, s32 offset:100
-; GFX11-NEXT: scratch_load_b32 v71, off, s32 offset:112
-; GFX11-NEXT: scratch_load_b32 v70, off, s32 offset:108
-; GFX11-NEXT: scratch_load_b32 v81, off, s32 offset:120
-; GFX11-NEXT: scratch_load_b32 v80, off, s32 offset:116
; GFX11-NEXT: scratch_load_b32 v31, off, s32
-; GFX11-NEXT: scratch_load_b32 v83, off, s32 offset:128
-; GFX11-NEXT: scratch_load_b32 v82, off, s32 offset:124
-; GFX11-NEXT: s_waitcnt vmcnt(31)
-; GFX11-NEXT: v_cmp_gt_f64_e64 s9, v[0:1], v[86:87]
-; GFX11-NEXT: v_cmp_o_f64_e64 s11, v[0:1], v[86:87]
-; GFX11-NEXT: s_waitcnt vmcnt(29)
-; GFX11-NEXT: v_cmp_gt_f64_e64 s10, v[2:3], v[84:85]
-; GFX11-NEXT: v_cmp_class_f64_e64 s14, v[86:87], 64
-; GFX11-NEXT: s_waitcnt vmcnt(27)
-; GFX11-NEXT: v_cmp_gt_f64_e64 s0, v[4:5], v[32:33]
-; GFX11-NEXT: v_cmp_o_f64_e32 vcc_lo, v[4:5], v[32:33]
-; GFX11-NEXT: s_waitcnt vmcnt(25)
-; GFX11-NEXT: v_cmp_gt_f64_e64 s2, v[6:7], v[34:35]
-; GFX11-NEXT: v_cmp_o_f64_e64 s12, v[2:3], v[84:85]
-; GFX11-NEXT: v_cmp_o_f64_e64 s1, v[6:7], v[34:35]
-; GFX11-NEXT: s_waitcnt vmcnt(23)
-; GFX11-NEXT: v_cmp_gt_f64_e64 s4, v[8:9], v[36:37]
-; GFX11-NEXT: v_cmp_o_f64_e64 s3, v[8:9], v[36:37]
-; GFX11-NEXT: v_cmp_class_f64_e64 s16, v[84:85], 64
-; GFX11-NEXT: s_waitcnt vmcnt(21)
-; GFX11-NEXT: v_cmp_gt_f64_e64 s6, v[10:11], v[38:39]
-; GFX11-NEXT: v_cmp_o_f64_e64 s5, v[10:11], v[38:39]
-; GFX11-NEXT: s_waitcnt vmcnt(19)
-; GFX11-NEXT: v_cmp_gt_f64_e64 s8, v[12:13], v[48:49]
-; GFX11-NEXT: v_cmp_o_f64_e64 s7, v[12:13], v[48:49]
-; GFX11-NEXT: s_waitcnt vmcnt(17)
-; GFX11-NEXT: v_cmp_gt_f64_e64 s13, v[14:15], v[50:51]
-; GFX11-NEXT: s_waitcnt vmcnt(15)
-; GFX11-NEXT: v_cmp_o_f64_e64 s15, v[16:17], v[52:53]
-; GFX11-NEXT: s_waitcnt vmcnt(13)
-; GFX11-NEXT: v_cmp_gt_f64_e64 s17, v[18:19], v[54:55]
-; GFX11-NEXT: v_cmp_o_f64_e64 s18, v[18:19], v[54:55]
-; GFX11-NEXT: s_waitcnt vmcnt(11)
-; GFX11-NEXT: v_cmp_gt_f64_e64 s19, v[20:21], v[64:65]
-; GFX11-NEXT: v_cmp_o_f64_e64 s20, v[20:21], v[64:65]
-; GFX11-NEXT: s_waitcnt vmcnt(9)
-; GFX11-NEXT: v_cmp_gt_f64_e64 s21, v[22:23], v[66:67]
-; GFX11-NEXT: v_cmp_o_f64_e64 s22, v[22:23], v[66:67]
-; GFX11-NEXT: s_waitcnt vmcnt(7)
-; GFX11-NEXT: v_cmp_gt_f64_e64 s23, v[24:25], v[68:69]
-; GFX11-NEXT: v_cmp_o_f64_e64 s24, v[24:25], v[68:69]
-; GFX11-NEXT: s_waitcnt vmcnt(5)
-; GFX11-NEXT: v_cmp_gt_f64_e64 s25, v[26:27], v[70:71]
-; GFX11-NEXT: v_cmp_o_f64_e64 s26, v[26:27], v[70:71]
-; GFX11-NEXT: s_waitcnt vmcnt(3)
-; GFX11-NEXT: v_cmp_gt_f64_e64 s27, v[28:29], v[80:81]
-; GFX11-NEXT: v_cmp_o_f64_e64 s28, v[28:29], v[80:81]
+; GFX11-NEXT: scratch_load_b32 v33, off, s32 offset:8
+; GFX11-NEXT: scratch_load_b32 v32, off, s32 offset:4
+; GFX11-NEXT: scratch_load_b32 v35, off, s32 offset:16
+; GFX11-NEXT: scratch_load_b32 v34, off, s32 offset:12
+; GFX11-NEXT: scratch_load_b32 v37, off, s32 offset:24
+; GFX11-NEXT: scratch_load_b32 v36, off, s32 offset:20
+; GFX11-NEXT: scratch_load_b32 v39, off, s32 offset:32
+; GFX11-NEXT: scratch_load_b32 v38, off, s32 offset:28
+; GFX11-NEXT: scratch_load_b32 v49, off, s32 offset:40
+; GFX11-NEXT: scratch_load_b32 v48, off, s32 offset:36
+; GFX11-NEXT: scratch_load_b32 v51, off, s32 offset:48
+; GFX11-NEXT: scratch_load_b32 v50, off, s32 offset:44
+; GFX11-NEXT: scratch_load_b32 v53, off, s32 offset:56
+; GFX11-NEXT: scratch_load_b32 v52, off, s32 offset:52
+; GFX11-NEXT: scratch_load_b32 v55, off, s32 offset:64
+; GFX11-NEXT: scratch_load_b32 v54, off, s32 offset:60
+; GFX11-NEXT: scratch_load_b32 v65, off, s32 offset:72
+; GFX11-NEXT: scratch_load_b32 v64, off, s32 offset:68
+; GFX11-NEXT: scratch_load_b32 v67, off, s32 offset:80
+; GFX11-NEXT: scratch_load_b32 v66, off, s32 offset:76
+; GFX11-NEXT: scratch_load_b32 v69, off, s32 offset:88
+; GFX11-NEXT: scratch_load_b32 v68, off, s32 offset:84
+; GFX11-NEXT: scratch_load_b32 v71, off, s32 offset:96
+; GFX11-NEXT: scratch_load_b32 v70, off, s32 offset:92
+; GFX11-NEXT: scratch_load_b32 v81, off, s32 offset:104
+; GFX11-NEXT: scratch_load_b32 v80, off, s32 offset:100
+; GFX11-NEXT: scratch_load_b32 v83, off, s32 offset:112
+; GFX11-NEXT: scratch_load_b32 v82, off, s32 offset:108
+; GFX11-NEXT: scratch_load_b32 v85, off, s32 offset:120
+; GFX11-NEXT: scratch_load_b32 v84, off, s32 offset:116
+; GFX11-NEXT: scratch_load_b32 v87, off, s32 offset:128
+; GFX11-NEXT: scratch_load_b32 v86, off, s32 offset:124
+; GFX11-NEXT: s_waitcnt vmcnt(30)
+; GFX11-NEXT: v_max_f64 v[96:97], v[0:1], v[32:33]
+; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[32:33]
+; GFX11-NEXT: s_waitcnt vmcnt(28)
+; GFX11-NEXT: v_max_f64 v[32:33], v[2:3], v[34:35]
+; GFX11-NEXT: v_cmp_u_f64_e64 s0, v[2:3], v[34:35]
+; GFX11-NEXT: s_waitcnt vmcnt(26)
+; GFX11-NEXT: v_max_f64 v[34:35], v[4:5], v[36:37]
+; GFX11-NEXT: v_cmp_u_f64_e64 s1, v[4:5], v[36:37]
+; GFX11-NEXT: s_waitcnt vmcnt(24)
+; GFX11-NEXT: v_max_f64 v[36:37], v[6:7], v[38:39]
+; GFX11-NEXT: v_cmp_u_f64_e64 s2, v[6:7], v[38:39]
+; GFX11-NEXT: s_waitcnt vmcnt(22)
+; GFX11-NEXT: v_max_f64 v[38:39], v[8:9], v[48:49]
+; GFX11-NEXT: v_cmp_u_f64_e64 s3, v[8:9], v[48:49]
+; GFX11-NEXT: s_waitcnt vmcnt(20)
+; GFX11-NEXT: v_max_f64 v[48:49], v[10:11], v[50:51]
+; GFX11-NEXT: v_cmp_u_f64_e64 s4, v[10:11], v[50:51]
+; GFX11-NEXT: s_waitcnt vmcnt(18)
+; GFX11-NEXT: v_max_f64 v[50:51], v[12:13], v[52:53]
+; GFX11-NEXT: v_cmp_u_f64_e64 s5, v[12:13], v[52:53]
+; GFX11-NEXT: s_waitcnt vmcnt(16)
+; GFX11-NEXT: v_max_f64 v[52:53], v[14:15], v[54:55]
+; GFX11-NEXT: v_cmp_u_f64_e64 s6, v[14:15], v[54:55]
+; GFX11-NEXT: s_waitcnt vmcnt(14)
+; GFX11-NEXT: v_max_f64 v[54:55], v[16:17], v[64:65]
+; GFX11-NEXT: v_cmp_u_f64_e64 s7, v[16:17], v[64:65]
+; GFX11-NEXT: s_waitcnt vmcnt(12)
+; GFX11-NEXT: v_max_f64 v[64:65], v[18:19], v[66:67]
+; GFX11-NEXT: v_cmp_u_f64_e64 s8, v[18:19], v[66:67]
+; GFX11-NEXT: s_waitcnt vmcnt(10)
+; GFX11-NEXT: v_max_f64 v[66:67], v[20:21], v[68:69]
+; GFX11-NEXT: v_cmp_u_f64_e64 s9, v[20:21], v[68:69]
+; GFX11-NEXT: s_waitcnt vmcnt(8)
+; GFX11-NEXT: v_max_f64 v[68:69], v[22:23], v[70:71]
+; GFX11-NEXT: v_cmp_u_f64_e64 s10, v[22:23], v[70:71]
+; GFX11-NEXT: s_waitcnt vmcnt(6)
+; GFX11-NEXT: v_max_f64 v[70:71], v[24:25], v[80:81]
+; GFX11-NEXT: v_cmp_u_f64_e64 s11, v[24:25], v[80:81]
+; GFX11-NEXT: s_waitcnt vmcnt(4)
+; GFX11-NEXT: v_max_f64 v[80:81], v[26:27], v[82:83]
+; GFX11-NEXT: v_cmp_u_f64_e64 s12, v[26:27], v[82:83]
+; GFX11-NEXT: s_waitcnt vmcnt(2)
+; GFX11-NEXT: v_max_f64 v[82:83], v[28:29], v[84:85]
+; GFX11-NEXT: v_cmp_u_f64_e64 s13, v[28:29], v[84:85]
; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: v_cmp_gt_f64_e64 s29, v[30:31], v[82:83]
-; GFX11-NEXT: v_cmp_o_f64_e64 vcc_hi, v[30:31], v[82:83]
-; GFX11-NEXT: v_cndmask_b32_e64 v96, v87, v1, s9
-; GFX11-NEXT: v_cndmask_b32_e64 v101, v86, v0, s9
-; GFX11-NEXT: v_cndmask_b32_e64 v98, v85, v3, s10
-; GFX11-NEXT: v_cndmask_b32_e64 v103, v84, v2, s10
-; GFX11-NEXT: v_cmp_class_f64_e64 s10, v[0:1], 64
-; GFX11-NEXT: v_cndmask_b32_e64 v97, 0x7ff80000, v96, s11
-; GFX11-NEXT: v_cndmask_b32_e64 v96, 0, v101, s11
-; GFX11-NEXT: v_cndmask_b32_e64 v100, v33, v5, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v102, v35, v7, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v99, 0x7ff80000, v98, s12
-; GFX11-NEXT: v_cndmask_b32_e64 v98, 0, v103, s12
-; GFX11-NEXT: v_cmp_class_f64_e64 s11, v[2:3], 64
-; GFX11-NEXT: v_cndmask_b32_e32 v101, 0x7ff80000, v100, vcc_lo
-; GFX11-NEXT: v_cndmask_b32_e64 v103, 0x7ff80000, v102, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v112, v37, v9, s4
-; GFX11-NEXT: v_cndmask_b32_e64 v114, v39, v11, s6
-; GFX11-NEXT: v_cndmask_b32_e64 v116, v49, v13, s8
-; GFX11-NEXT: v_cmp_o_f64_e64 s9, v[14:15], v[50:51]
-; GFX11-NEXT: v_cndmask_b32_e64 v118, v51, v15, s13
-; GFX11-NEXT: v_cndmask_b32_e64 v113, 0x7ff80000, v112, s3
-; GFX11-NEXT: v_cndmask_b32_e64 v115, 0x7ff80000, v114, s5
-; GFX11-NEXT: v_cndmask_b32_e64 v117, 0x7ff80000, v116, s7
-; GFX11-NEXT: v_cmp_gt_f64_e64 s12, v[16:17], v[52:53]
-; GFX11-NEXT: v_cndmask_b32_e64 v130, v55, v19, s17
-; GFX11-NEXT: v_cndmask_b32_e64 v132, v65, v21, s19
-; GFX11-NEXT: v_cndmask_b32_e64 v134, v67, v23, s21
-; GFX11-NEXT: v_cndmask_b32_e64 v144, v69, v25, s23
-; GFX11-NEXT: v_cndmask_b32_e64 v145, v71, v27, s25
-; GFX11-NEXT: v_cndmask_b32_e64 v131, 0x7ff80000, v130, s18
-; GFX11-NEXT: v_cndmask_b32_e64 v133, 0x7ff80000, v132, s20
-; GFX11-NEXT: v_cndmask_b32_e64 v135, 0x7ff80000, v134, s22
-; GFX11-NEXT: v_cndmask_b32_e64 v146, v81, v29, s27
-; GFX11-NEXT: v_cndmask_b32_e64 v148, v80, v28, s27
-; GFX11-NEXT: v_cndmask_b32_e64 v147, v83, v31, s29
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_3)
-; GFX11-NEXT: v_cndmask_b32_e64 v147, 0x7ff80000, v147, vcc_hi
-; GFX11-NEXT: v_cndmask_b32_e64 v0, v96, v0, s10
-; GFX11-NEXT: v_cndmask_b32_e64 v1, v97, v1, s10
-; GFX11-NEXT: v_cmp_class_f64_e64 s10, v[36:37], 64
-; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, v86, s14
-; GFX11-NEXT: v_cndmask_b32_e64 v86, v32, v4, s0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, v87, s14
-; GFX11-NEXT: v_cndmask_b32_e64 v87, v34, v6, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v98, v2, s11
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v99, v3, s11
-; GFX11-NEXT: v_cndmask_b32_e32 v100, 0, v86, vcc_lo
-; GFX11-NEXT: v_cmp_class_f64_e64 vcc_lo, v[4:5], 64
-; GFX11-NEXT: v_cndmask_b32_e64 v102, 0, v87, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, v84, s16
-; GFX11-NEXT: v_cndmask_b32_e64 v84, v36, v8, s4
-; GFX11-NEXT: v_cndmask_b32_e64 v86, v38, v10, s6
-; GFX11-NEXT: v_cndmask_b32_e64 v87, v48, v12, s8
-; GFX11-NEXT: v_cndmask_b32_e64 v119, 0x7ff80000, v118, s9
-; GFX11-NEXT: v_cndmask_b32_e64 v128, v53, v17, s12
-; GFX11-NEXT: v_cndmask_b32_e64 v112, 0, v84, s3
-; GFX11-NEXT: v_cndmask_b32_e64 v114, 0, v86, s5
-; GFX11-NEXT: v_cndmask_b32_e64 v116, 0, v87, s7
-; GFX11-NEXT: v_cndmask_b32_e64 v84, v50, v14, s13
-; GFX11-NEXT: v_cndmask_b32_e64 v129, 0x7ff80000, v128, s15
-; GFX11-NEXT: v_cndmask_b32_e64 v86, v52, v16, s12
-; GFX11-NEXT: v_cndmask_b32_e64 v87, v54, v18, s17
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v3, v85, s16
-; GFX11-NEXT: v_cndmask_b32_e64 v118, 0, v84, s9
-; GFX11-NEXT: v_cndmask_b32_e64 v84, v64, v20, s19
-; GFX11-NEXT: v_cndmask_b32_e64 v128, 0, v86, s15
-; GFX11-NEXT: v_cndmask_b32_e64 v130, 0, v87, s18
-; GFX11-NEXT: v_cndmask_b32_e64 v86, v66, v22, s21
-; GFX11-NEXT: v_cndmask_b32_e64 v85, 0x7ff80000, v144, s24
-; GFX11-NEXT: v_cndmask_b32_e64 v132, 0, v84, s20
-; GFX11-NEXT: v_cndmask_b32_e64 v87, v68, v24, s23
-; GFX11-NEXT: v_cndmask_b32_e64 v144, v70, v26, s25
-; GFX11-NEXT: v_cndmask_b32_e64 v134, 0, v86, s22
-; GFX11-NEXT: v_cmp_class_f64_e64 s0, v[68:69], 64
-; GFX11-NEXT: v_cmp_class_f64_e64 s1, v[70:71], 64
-; GFX11-NEXT: v_cndmask_b32_e64 v84, 0, v87, s24
-; GFX11-NEXT: v_cndmask_b32_e64 v87, 0x7ff80000, v145, s26
-; GFX11-NEXT: v_cndmask_b32_e64 v86, 0, v144, s26
-; GFX11-NEXT: v_cndmask_b32_e64 v145, 0x7ff80000, v146, s28
-; GFX11-NEXT: v_cndmask_b32_e64 v144, 0, v148, s28
-; GFX11-NEXT: v_cndmask_b32_e64 v146, v82, v30, s29
-; GFX11-NEXT: v_cmp_class_f64_e64 s2, v[80:81], 64
-; GFX11-NEXT: v_cmp_class_f64_e64 s3, v[82:83], 64
-; GFX11-NEXT: v_cmp_class_f64_e64 s6, v[32:33], 64
-; GFX11-NEXT: v_cmp_class_f64_e64 s8, v[34:35], 64
-; GFX11-NEXT: v_dual_cndmask_b32 v5, v101, v5 :: v_dual_cndmask_b32 v4, v100, v4
-; GFX11-NEXT: v_cmp_class_f64_e64 vcc_lo, v[6:7], 64
-; GFX11-NEXT: v_cndmask_b32_e64 v146, 0, v146, vcc_hi
-; GFX11-NEXT: v_cmp_class_f64_e64 s12, v[38:39], 64
-; GFX11-NEXT: v_cmp_class_f64_e64 s14, v[48:49], 64
-; GFX11-NEXT: v_cmp_class_f64_e64 s16, v[50:51], 64
-; GFX11-NEXT: v_cmp_class_f64_e64 s18, v[52:53], 64
-; GFX11-NEXT: v_cmp_class_f64_e64 s20, v[54:55], 64
-; GFX11-NEXT: v_cmp_class_f64_e64 s21, v[64:65], 64
-; GFX11-NEXT: v_cmp_eq_f64_e64 s4, 0, v[96:97]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s5, 0, v[98:99]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s7, 0, v[100:101]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s9, 0, v[102:103]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s11, 0, v[112:113]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s13, 0, v[114:115]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s15, 0, v[116:117]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s17, 0, v[118:119]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s19, 0, v[128:129]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s22, 0, v[130:131]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s23, 0, v[132:133]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s24, 0, v[134:135]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s25, 0, v[84:85]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s26, 0, v[86:87]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s27, 0, v[144:145]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s28, 0, v[146:147]
-; GFX11-NEXT: v_cndmask_b32_e64 v5, v5, v33, s6
-; GFX11-NEXT: v_dual_cndmask_b32 v7, v103, v7 :: v_dual_cndmask_b32 v6, v102, v6
-; GFX11-NEXT: v_cmp_class_f64_e64 vcc_lo, v[8:9], 64
-; GFX11-NEXT: v_cndmask_b32_e64 v4, v4, v32, s6
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e64 v7, v7, v35, s8
-; GFX11-NEXT: v_cndmask_b32_e64 v0, v96, v0, s4
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v98, v2, s5
-; GFX11-NEXT: v_cndmask_b32_e64 v4, v100, v4, s7
-; GFX11-NEXT: v_cndmask_b32_e64 v1, v97, v1, s4
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v99, v3, s5
-; GFX11-NEXT: v_cndmask_b32_e64 v5, v101, v5, s7
-; GFX11-NEXT: v_cndmask_b32_e64 v7, v103, v7, s9
-; GFX11-NEXT: v_dual_cndmask_b32 v9, v113, v9 :: v_dual_cndmask_b32 v8, v112, v8
-; GFX11-NEXT: v_cmp_class_f64_e64 vcc_lo, v[10:11], 64
-; GFX11-NEXT: v_cndmask_b32_e64 v6, v6, v34, s8
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_cndmask_b32_e64 v9, v9, v37, s10
-; GFX11-NEXT: v_cndmask_b32_e64 v6, v102, v6, s9
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_3)
-; GFX11-NEXT: v_cndmask_b32_e64 v9, v113, v9, s11
-; GFX11-NEXT: v_dual_cndmask_b32 v11, v115, v11 :: v_dual_cndmask_b32 v10, v114, v10
-; GFX11-NEXT: v_cmp_class_f64_e64 vcc_lo, v[12:13], 64
-; GFX11-NEXT: v_cndmask_b32_e64 v8, v8, v36, s10
-; GFX11-NEXT: v_cndmask_b32_e64 v11, v11, v39, s12
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_cndmask_b32_e64 v8, v112, v8, s11
-; GFX11-NEXT: v_cndmask_b32_e64 v11, v115, v11, s13
-; GFX11-NEXT: v_dual_cndmask_b32 v13, v117, v13 :: v_dual_cndmask_b32 v12, v116, v12
-; GFX11-NEXT: v_cmp_class_f64_e64 vcc_lo, v[14:15], 64
-; GFX11-NEXT: v_cndmask_b32_e64 v10, v10, v38, s12
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_cndmask_b32_e64 v13, v13, v49, s14
-; GFX11-NEXT: v_cndmask_b32_e64 v10, v114, v10, s13
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_3)
-; GFX11-NEXT: v_cndmask_b32_e64 v13, v117, v13, s15
-; GFX11-NEXT: v_dual_cndmask_b32 v15, v119, v15 :: v_dual_cndmask_b32 v14, v118, v14
-; GFX11-NEXT: v_cmp_class_f64_e64 vcc_lo, v[16:17], 64
-; GFX11-NEXT: v_cndmask_b32_e64 v12, v12, v48, s14
-; GFX11-NEXT: v_cndmask_b32_e64 v15, v15, v51, s16
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_cndmask_b32_e64 v12, v116, v12, s15
-; GFX11-NEXT: v_cndmask_b32_e64 v15, v119, v15, s17
-; GFX11-NEXT: v_dual_cndmask_b32 v17, v129, v17 :: v_dual_cndmask_b32 v16, v128, v16
-; GFX11-NEXT: v_cmp_class_f64_e64 vcc_lo, v[18:19], 64
-; GFX11-NEXT: v_cndmask_b32_e64 v14, v14, v50, s16
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_cndmask_b32_e64 v17, v17, v53, s18
-; GFX11-NEXT: v_cndmask_b32_e64 v14, v118, v14, s17
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_3)
-; GFX11-NEXT: v_cndmask_b32_e64 v17, v129, v17, s19
-; GFX11-NEXT: v_dual_cndmask_b32 v19, v131, v19 :: v_dual_cndmask_b32 v18, v130, v18
-; GFX11-NEXT: v_cmp_class_f64_e64 vcc_lo, v[20:21], 64
-; GFX11-NEXT: v_cndmask_b32_e64 v16, v16, v52, s18
-; GFX11-NEXT: v_cndmask_b32_e64 v19, v19, v55, s20
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_cndmask_b32_e64 v16, v128, v16, s19
-; GFX11-NEXT: v_cndmask_b32_e64 v19, v131, v19, s22
-; GFX11-NEXT: v_dual_cndmask_b32 v21, v133, v21 :: v_dual_cndmask_b32 v20, v132, v20
-; GFX11-NEXT: v_cmp_class_f64_e64 vcc_lo, v[22:23], 64
-; GFX11-NEXT: v_cndmask_b32_e64 v18, v18, v54, s20
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_cndmask_b32_e64 v21, v21, v65, s21
-; GFX11-NEXT: v_cndmask_b32_e64 v18, v130, v18, s22
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_1)
-; GFX11-NEXT: v_cndmask_b32_e64 v21, v133, v21, s23
-; GFX11-NEXT: v_dual_cndmask_b32 v23, v135, v23 :: v_dual_cndmask_b32 v22, v134, v22
-; GFX11-NEXT: v_cmp_class_f64_e64 vcc_lo, v[24:25], 64
-; GFX11-NEXT: v_cndmask_b32_e64 v20, v20, v64, s21
-; GFX11-NEXT: v_cndmask_b32_e64 v20, v132, v20, s23
-; GFX11-NEXT: v_dual_cndmask_b32 v25, v85, v25 :: v_dual_cndmask_b32 v24, v84, v24
-; GFX11-NEXT: v_cmp_class_f64_e64 vcc_lo, v[26:27], 64
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT: v_cndmask_b32_e64 v25, v25, v69, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v25, v85, v25, s25
-; GFX11-NEXT: v_dual_cndmask_b32 v27, v87, v27 :: v_dual_cndmask_b32 v26, v86, v26
-; GFX11-NEXT: v_cmp_class_f64_e64 vcc_lo, v[28:29], 64
-; GFX11-NEXT: v_cndmask_b32_e64 v24, v24, v68, s0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_cndmask_b32_e64 v27, v27, v71, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v24, v84, v24, s25
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_3)
-; GFX11-NEXT: v_cndmask_b32_e64 v27, v87, v27, s26
-; GFX11-NEXT: v_dual_cndmask_b32 v29, v145, v29 :: v_dual_cndmask_b32 v28, v144, v28
-; GFX11-NEXT: v_cmp_class_f64_e64 vcc_lo, v[30:31], 64
-; GFX11-NEXT: v_cndmask_b32_e64 v26, v26, v70, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v29, v29, v81, s2
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_cndmask_b32_e64 v26, v86, v26, s26
-; GFX11-NEXT: v_cndmask_b32_e64 v29, v145, v29, s27
-; GFX11-NEXT: v_dual_cndmask_b32 v31, v147, v31 :: v_dual_cndmask_b32 v30, v146, v30
-; GFX11-NEXT: v_cmp_class_f64_e64 vcc_lo, v[66:67], 64
-; GFX11-NEXT: v_cndmask_b32_e64 v28, v28, v80, s2
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_cndmask_b32_e64 v31, v31, v83, s3
-; GFX11-NEXT: v_cndmask_b32_e64 v28, v144, v28, s27
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_cndmask_b32_e64 v31, v147, v31, s28
-; GFX11-NEXT: v_dual_cndmask_b32 v23, v23, v67 :: v_dual_cndmask_b32 v22, v22, v66
-; GFX11-NEXT: v_cndmask_b32_e64 v30, v30, v82, s3
-; GFX11-NEXT: v_cndmask_b32_e64 v23, v135, v23, s24
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT: v_cndmask_b32_e64 v22, v134, v22, s24
-; GFX11-NEXT: v_cndmask_b32_e64 v30, v146, v30, s28
+; GFX11-NEXT: v_max_f64 v[84:85], v[30:31], v[86:87]
+; GFX11-NEXT: v_cmp_u_f64_e64 s14, v[30:31], v[86:87]
+; GFX11-NEXT: v_cndmask_b32_e64 v0, v96, 0, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e64 v1, v97, 0x7ff80000, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e64 v2, v32, 0, s0
+; GFX11-NEXT: v_cndmask_b32_e64 v3, v33, 0x7ff80000, s0
+; GFX11-NEXT: v_cndmask_b32_e64 v4, v34, 0, s1
+; GFX11-NEXT: v_cndmask_b32_e64 v5, v35, 0x7ff80000, s1
+; GFX11-NEXT: v_cndmask_b32_e64 v6, v36, 0, s2
+; GFX11-NEXT: v_cndmask_b32_e64 v7, v37, 0x7ff80000, s2
+; GFX11-NEXT: v_cndmask_b32_e64 v8, v38, 0, s3
+; GFX11-NEXT: v_cndmask_b32_e64 v9, v39, 0x7ff80000, s3
+; GFX11-NEXT: v_cndmask_b32_e64 v10, v48, 0, s4
+; GFX11-NEXT: v_cndmask_b32_e64 v11, v49, 0x7ff80000, s4
+; GFX11-NEXT: v_cndmask_b32_e64 v12, v50, 0, s5
+; GFX11-NEXT: v_cndmask_b32_e64 v13, v51, 0x7ff80000, s5
+; GFX11-NEXT: v_cndmask_b32_e64 v14, v52, 0, s6
+; GFX11-NEXT: v_cndmask_b32_e64 v15, v53, 0x7ff80000, s6
+; GFX11-NEXT: v_cndmask_b32_e64 v16, v54, 0, s7
+; GFX11-NEXT: v_cndmask_b32_e64 v17, v55, 0x7ff80000, s7
+; GFX11-NEXT: v_cndmask_b32_e64 v18, v64, 0, s8
+; GFX11-NEXT: v_cndmask_b32_e64 v19, v65, 0x7ff80000, s8
+; GFX11-NEXT: v_cndmask_b32_e64 v20, v66, 0, s9
+; GFX11-NEXT: v_cndmask_b32_e64 v21, v67, 0x7ff80000, s9
+; GFX11-NEXT: v_cndmask_b32_e64 v22, v68, 0, s10
+; GFX11-NEXT: v_cndmask_b32_e64 v23, v69, 0x7ff80000, s10
+; GFX11-NEXT: v_cndmask_b32_e64 v24, v70, 0, s11
+; GFX11-NEXT: v_cndmask_b32_e64 v25, v71, 0x7ff80000, s11
+; GFX11-NEXT: v_cndmask_b32_e64 v26, v80, 0, s12
+; GFX11-NEXT: v_cndmask_b32_e64 v27, v81, 0x7ff80000, s12
+; GFX11-NEXT: v_cndmask_b32_e64 v28, v82, 0, s13
+; GFX11-NEXT: v_cndmask_b32_e64 v29, v83, 0x7ff80000, s13
+; GFX11-NEXT: v_cndmask_b32_e64 v30, v84, 0, s14
+; GFX11-NEXT: v_cndmask_b32_e64 v31, v85, 0x7ff80000, s14
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_v16f64:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.minimum.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.minimum.f16.ll
index 95d351e..e00ebff 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.minimum.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.minimum.f16.ll
@@ -443,28 +443,14 @@ define <2 x half> @v_minimum_v2f16(<2 x half> %src0, <2 x half> %src1) {
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_lshrrev_b32_e32 v2, 16, v1
; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v0
-; GFX8-NEXT: v_cmp_lt_f16_e32 vcc, v3, v2
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v2, v3, vcc
+; GFX8-NEXT: v_min_f16_e32 v4, v3, v2
; GFX8-NEXT: v_mov_b32_e32 v5, 0x7e00
; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v3, v2
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v5, v4, vcc
-; GFX8-NEXT: v_cmp_class_f16_e64 vcc, v3, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
-; GFX8-NEXT: v_cmp_class_f16_e64 vcc, v2, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
-; GFX8-NEXT: v_cmp_eq_f16_e32 vcc, 0, v4
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
-; GFX8-NEXT: v_cmp_lt_f16_e32 vcc, v0, v1
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v1, v0, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v2, v5, v4, vcc
+; GFX8-NEXT: v_min_f16_e32 v3, v0, v1
; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc
-; GFX8-NEXT: v_cmp_class_f16_e64 vcc, v0, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX8-NEXT: v_cmp_class_f16_e64 vcc, v1, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
-; GFX8-NEXT: v_cmp_eq_f16_e32 vcc, 0, v3
; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v2
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
@@ -543,26 +529,9 @@ define <2 x half> @v_minimum_v2f16__nnan(<2 x half> %src0, <2 x half> %src1) {
; GFX8-LABEL: v_minimum_v2f16__nnan:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_lshrrev_b32_e32 v2, 16, v1
-; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v0
-; GFX8-NEXT: v_cmp_lt_f16_e32 vcc, v3, v2
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v2, v3, vcc
-; GFX8-NEXT: v_cmp_class_f16_e64 vcc, v3, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
-; GFX8-NEXT: v_cmp_class_f16_e64 vcc, v2, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
-; GFX8-NEXT: v_cmp_eq_f16_e32 vcc, 0, v4
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
-; GFX8-NEXT: v_cmp_lt_f16_e32 vcc, v0, v1
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v1, v0, vcc
-; GFX8-NEXT: v_cmp_class_f16_e64 vcc, v0, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX8-NEXT: v_cmp_class_f16_e64 vcc, v1, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
-; GFX8-NEXT: v_cmp_eq_f16_e32 vcc, 0, v3
-; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v2
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_min_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_minimum_v2f16__nnan:
@@ -608,13 +577,11 @@ define <2 x half> @v_minimum_v2f16__nsz(<2 x half> %src0, <2 x half> %src1) {
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_lshrrev_b32_e32 v2, 16, v1
; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v0
-; GFX8-NEXT: v_cmp_lt_f16_e32 vcc, v3, v2
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v2, v3, vcc
+; GFX8-NEXT: v_min_f16_e32 v4, v3, v2
; GFX8-NEXT: v_mov_b32_e32 v5, 0x7e00
; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v3, v2
; GFX8-NEXT: v_cndmask_b32_e32 v2, v5, v4, vcc
-; GFX8-NEXT: v_cmp_lt_f16_e32 vcc, v0, v1
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v1, v0, vcc
+; GFX8-NEXT: v_min_f16_e32 v3, v0, v1
; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v2
; GFX8-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
@@ -696,14 +663,9 @@ define <2 x half> @v_minimum_v2f16__nnan_nsz(<2 x half> %src0, <2 x half> %src1)
; GFX8-LABEL: v_minimum_v2f16__nnan_nsz:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_lshrrev_b32_e32 v2, 16, v1
-; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v0
-; GFX8-NEXT: v_cmp_lt_f16_e32 vcc, v3, v2
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
-; GFX8-NEXT: v_cmp_lt_f16_e32 vcc, v0, v1
-; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v2
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
-; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_min_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_minimum_v2f16__nnan_nsz:
@@ -750,31 +712,15 @@ define void @s_minimum_v2f16(<2 x half> inreg %src0, <2 x half> inreg %src1) {
; GFX8-NEXT: s_lshr_b32 s6, s5, 16
; GFX8-NEXT: s_lshr_b32 s7, s4, 16
; GFX8-NEXT: v_mov_b32_e32 v0, s6
-; GFX8-NEXT: v_mov_b32_e32 v1, s7
-; GFX8-NEXT: v_cmp_lt_f16_e32 vcc, s7, v0
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v0, v1, vcc
-; GFX8-NEXT: v_mov_b32_e32 v3, 0x7e00
+; GFX8-NEXT: v_min_f16_e32 v1, s7, v0
+; GFX8-NEXT: v_mov_b32_e32 v2, 0x7e00
; GFX8-NEXT: v_cmp_o_f16_e32 vcc, s7, v0
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
-; GFX8-NEXT: v_cmp_class_f16_e64 vcc, s7, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX8-NEXT: v_cmp_class_f16_e64 vcc, s6, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
-; GFX8-NEXT: v_cmp_eq_f16_e32 vcc, 0, v2
+; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; GFX8-NEXT: v_mov_b32_e32 v1, s5
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; GFX8-NEXT: v_mov_b32_e32 v2, s4
-; GFX8-NEXT: v_cmp_lt_f16_e32 vcc, s4, v1
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v1, v2, vcc
+; GFX8-NEXT: v_min_f16_e32 v3, s4, v1
; GFX8-NEXT: v_cmp_o_f16_e32 vcc, s4, v1
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc
-; GFX8-NEXT: v_cmp_class_f16_e64 vcc, s4, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
-; GFX8-NEXT: v_cmp_class_f16_e64 vcc, s5, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX8-NEXT: v_cmp_eq_f16_e32 vcc, 0, v3
; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc
; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: ;;#ASMSTART
; GFX8-NEXT: ; use v0
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.minimum.f32.ll b/llvm/test/CodeGen/AMDGPU/llvm.minimum.f32.ll
index 1da2647..e0566820 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.minimum.f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.minimum.f32.ll
@@ -495,167 +495,73 @@ define <2 x float> @v_minimum_v2f32(<2 x float> %src0, <2 x float> %src1) {
; GFX7-LABEL: v_minimum_v2f32:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_min_legacy_f32_e32 v4, v0, v2
+; GFX7-NEXT: v_min_f32_e32 v4, v0, v2
; GFX7-NEXT: v_mov_b32_e32 v5, 0x7fc00000
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
-; GFX7-NEXT: v_cndmask_b32_e32 v4, v5, v4, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v0, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v2, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX7-NEXT: v_min_legacy_f32_e32 v2, v1, v3
+; GFX7-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
+; GFX7-NEXT: v_min_f32_e32 v2, v1, v3
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v1, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v3, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX7-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_minimum_v2f32:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v2, v0, vcc
+; GFX8-NEXT: v_min_f32_e32 v4, v0, v2
; GFX8-NEXT: v_mov_b32_e32 v5, 0x7fc00000
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v5, v4, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v0, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v2, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v1, v3
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v3, v1, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
+; GFX8-NEXT: v_min_f32_e32 v2, v1, v3
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v1, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v3, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_minimum_v2f32:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v2, v0, vcc
+; GFX9-NEXT: v_min_f32_e32 v4, v0, v2
; GFX9-NEXT: v_mov_b32_e32 v5, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v5, v4, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v2, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v1, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v1, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
+; GFX9-NEXT: v_min_f32_e32 v2, v1, v3
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v3, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_minimum_v2f32:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
+; GFX940-NEXT: v_min_f32_e32 v4, v0, v2
; GFX940-NEXT: v_mov_b32_e32 v5, 0x7fc00000
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v2, v0, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v5, v4, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v0, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v2, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v1, v3
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v3, v1, vcc
+; GFX940-NEXT: v_min_f32_e32 v2, v1, v3
+; GFX940-NEXT: s_nop 0
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v1, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v3, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v2f32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v0, v2
-; GFX10-NEXT: v_cndmask_b32_e32 v4, v2, v0, vcc_lo
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v1, v3
-; GFX10-NEXT: v_cndmask_b32_e32 v5, v3, v1, vcc_lo
+; GFX10-NEXT: v_min_f32_e32 v4, v0, v2
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v2
-; GFX10-NEXT: v_cndmask_b32_e32 v4, 0x7fc00000, v4, vcc_lo
+; GFX10-NEXT: v_min_f32_e32 v5, v1, v3
+; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v4, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v3
-; GFX10-NEXT: v_cndmask_b32_e32 v5, 0x7fc00000, v5, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v0, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v1, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v2, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v3, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v4
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v5
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v5, vcc_lo
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_minimum_v2f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v0, v2
-; GFX11-NEXT: v_cndmask_b32_e32 v4, v2, v0, vcc_lo
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v1, v3
-; GFX11-NEXT: v_cndmask_b32_e32 v5, v3, v1, vcc_lo
+; GFX11-NEXT: v_dual_min_f32 v4, v0, v2 :: v_dual_min_f32 v5, v1, v3
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v2
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e32 v4, 0x7fc00000, v4, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v4, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v3
-; GFX11-NEXT: v_cndmask_b32_e32 v5, 0x7fc00000, v5, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v0, 32
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v1, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v2, 32
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v3, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v4
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v5
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v5, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_v2f32:
@@ -676,136 +582,42 @@ define <2 x float> @v_minimum_v2f32__nnan(<2 x float> %src0, <2 x float> %src1)
; GFX7-LABEL: v_minimum_v2f32__nnan:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_min_legacy_f32_e32 v4, v0, v2
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v0, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v2, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX7-NEXT: v_min_legacy_f32_e32 v2, v1, v3
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v1, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v3, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX7-NEXT: v_min_f32_e32 v0, v0, v2
+; GFX7-NEXT: v_min_f32_e32 v1, v1, v3
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_minimum_v2f32__nnan:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v2, v0, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v0, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v2, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v1, v3
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v3, v1, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v1, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v3, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX8-NEXT: v_min_f32_e32 v0, v0, v2
+; GFX8-NEXT: v_min_f32_e32 v1, v1, v3
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_minimum_v2f32__nnan:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v2, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v2, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v1, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v3, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX9-NEXT: v_min_f32_e32 v0, v0, v2
+; GFX9-NEXT: v_min_f32_e32 v1, v1, v3
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_minimum_v2f32__nnan:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v2, v0, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v0, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v2, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v1, v3
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v3, v1, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v1, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v3, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX940-NEXT: v_min_f32_e32 v0, v0, v2
+; GFX940-NEXT: v_min_f32_e32 v1, v1, v3
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v2f32__nnan:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v0, v2
-; GFX10-NEXT: v_cndmask_b32_e32 v4, v2, v0, vcc_lo
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v1, v3
-; GFX10-NEXT: v_cndmask_b32_e32 v5, v3, v1, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v0, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v1, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v2, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v3, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v4
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v5
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc_lo
+; GFX10-NEXT: v_min_f32_e32 v0, v0, v2
+; GFX10-NEXT: v_min_f32_e32 v1, v1, v3
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_minimum_v2f32__nnan:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v0, v2
-; GFX11-NEXT: v_cndmask_b32_e32 v4, v2, v0, vcc_lo
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v1, v3
-; GFX11-NEXT: v_cndmask_b32_e32 v5, v3, v1, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v0, 32
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v1, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v2, 32
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v3, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v4
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v5
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc_lo
+; GFX11-NEXT: v_dual_min_f32 v0, v0, v2 :: v_dual_min_f32 v1, v1, v3
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_v2f32__nnan:
@@ -826,11 +638,11 @@ define <2 x float> @v_minimum_v2f32__nsz(<2 x float> %src0, <2 x float> %src1) {
; GFX7-LABEL: v_minimum_v2f32__nsz:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_min_legacy_f32_e32 v4, v0, v2
+; GFX7-NEXT: v_min_f32_e32 v4, v0, v2
; GFX7-NEXT: v_mov_b32_e32 v5, 0x7fc00000
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
; GFX7-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX7-NEXT: v_min_legacy_f32_e32 v2, v1, v3
+; GFX7-NEXT: v_min_f32_e32 v2, v1, v3
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
; GFX7-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
; GFX7-NEXT: s_setpc_b64 s[30:31]
@@ -838,13 +650,11 @@ define <2 x float> @v_minimum_v2f32__nsz(<2 x float> %src0, <2 x float> %src1) {
; GFX8-LABEL: v_minimum_v2f32__nsz:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v2, v0, vcc
+; GFX8-NEXT: v_min_f32_e32 v4, v0, v2
; GFX8-NEXT: v_mov_b32_e32 v5, 0x7fc00000
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
; GFX8-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v1, v3
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v3, v1, vcc
+; GFX8-NEXT: v_min_f32_e32 v2, v1, v3
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
; GFX8-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
@@ -852,13 +662,11 @@ define <2 x float> @v_minimum_v2f32__nsz(<2 x float> %src0, <2 x float> %src1) {
; GFX9-LABEL: v_minimum_v2f32__nsz:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v2, v0, vcc
+; GFX9-NEXT: v_min_f32_e32 v4, v0, v2
; GFX9-NEXT: v_mov_b32_e32 v5, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v1, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v1, vcc
+; GFX9-NEXT: v_min_f32_e32 v2, v1, v3
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -866,16 +674,12 @@ define <2 x float> @v_minimum_v2f32__nsz(<2 x float> %src0, <2 x float> %src1) {
; GFX940-LABEL: v_minimum_v2f32__nsz:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
+; GFX940-NEXT: v_min_f32_e32 v4, v0, v2
; GFX940-NEXT: v_mov_b32_e32 v5, 0x7fc00000
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v2, v0, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
-; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_min_f32_e32 v2, v1, v3
+; GFX940-NEXT: s_nop 0
; GFX940-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v1, v3
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v3, v1, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
; GFX940-NEXT: s_nop 1
; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
@@ -884,11 +688,9 @@ define <2 x float> @v_minimum_v2f32__nsz(<2 x float> %src0, <2 x float> %src1) {
; GFX10-LABEL: v_minimum_v2f32__nsz:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v0, v2
-; GFX10-NEXT: v_cndmask_b32_e32 v4, v2, v0, vcc_lo
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v1, v3
-; GFX10-NEXT: v_cndmask_b32_e32 v5, v3, v1, vcc_lo
+; GFX10-NEXT: v_min_f32_e32 v4, v0, v2
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v2
+; GFX10-NEXT: v_min_f32_e32 v5, v1, v3
; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v4, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v3
; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v5, vcc_lo
@@ -897,12 +699,9 @@ define <2 x float> @v_minimum_v2f32__nsz(<2 x float> %src0, <2 x float> %src1) {
; GFX11-LABEL: v_minimum_v2f32__nsz:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v0, v2
-; GFX11-NEXT: v_cndmask_b32_e32 v4, v2, v0, vcc_lo
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v1, v3
-; GFX11-NEXT: v_cndmask_b32_e32 v5, v3, v1, vcc_lo
+; GFX11-NEXT: v_dual_min_f32 v4, v0, v2 :: v_dual_min_f32 v5, v1, v3
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v2
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_4)
; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v4, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v3
; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v5, vcc_lo
@@ -926,55 +725,42 @@ define <2 x float> @v_minimum_v2f32__nnan_nsz(<2 x float> %src0, <2 x float> %sr
; GFX7-LABEL: v_minimum_v2f32__nnan_nsz:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_min_legacy_f32_e32 v0, v0, v2
-; GFX7-NEXT: v_min_legacy_f32_e32 v1, v1, v3
+; GFX7-NEXT: v_min_f32_e32 v0, v0, v2
+; GFX7-NEXT: v_min_f32_e32 v1, v1, v3
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_minimum_v2f32__nnan_nsz:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v1, v3
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; GFX8-NEXT: v_min_f32_e32 v0, v0, v2
+; GFX8-NEXT: v_min_f32_e32 v1, v1, v3
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_minimum_v2f32__nnan_nsz:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v1, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; GFX9-NEXT: v_min_f32_e32 v0, v0, v2
+; GFX9-NEXT: v_min_f32_e32 v1, v1, v3
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_minimum_v2f32__nnan_nsz:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v1, v3
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; GFX940-NEXT: v_min_f32_e32 v0, v0, v2
+; GFX940-NEXT: v_min_f32_e32 v1, v1, v3
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v2f32__nnan_nsz:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v0, v2
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v1, v3
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
+; GFX10-NEXT: v_min_f32_e32 v0, v0, v2
+; GFX10-NEXT: v_min_f32_e32 v1, v1, v3
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_minimum_v2f32__nnan_nsz:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v0, v2
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v1, v3
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
+; GFX11-NEXT: v_dual_min_f32 v0, v0, v2 :: v_dual_min_f32 v1, v1, v3
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_v2f32__nnan_nsz:
@@ -996,28 +782,14 @@ define void @s_minimum_v2f32(<2 x float> inreg %src0, <2 x float> inreg %src1) {
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: v_mov_b32_e32 v0, s7
-; GFX7-NEXT: v_min_legacy_f32_e32 v1, s5, v0
+; GFX7-NEXT: v_min_f32_e32 v1, s5, v0
; GFX7-NEXT: v_mov_b32_e32 v2, 0x7fc00000
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, s5, v0
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX7-NEXT: v_mov_b32_e32 v3, s5
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, s5, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v1, v3, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, s7, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v1
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc
; GFX7-NEXT: v_mov_b32_e32 v0, s6
-; GFX7-NEXT: v_min_legacy_f32_e32 v3, s4, v0
+; GFX7-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX7-NEXT: v_min_f32_e32 v3, s4, v0
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, s4, v0
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
-; GFX7-NEXT: v_mov_b32_e32 v3, s4
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, s4, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v2, v3, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, s6, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; GFX7-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
; GFX7-NEXT: ;;#ASMSTART
; GFX7-NEXT: ; use v[0:1]
; GFX7-NEXT: ;;#ASMEND
@@ -1027,30 +799,14 @@ define void @s_minimum_v2f32(<2 x float> inreg %src0, <2 x float> inreg %src1) {
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v0, s7
-; GFX8-NEXT: v_mov_b32_e32 v1, s5
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, s5, v0
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v0, v1, vcc
-; GFX8-NEXT: v_mov_b32_e32 v3, 0x7fc00000
+; GFX8-NEXT: v_min_f32_e32 v1, s5, v0
+; GFX8-NEXT: v_mov_b32_e32 v2, 0x7fc00000
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, s5, v0
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, s5, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, s7, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v2, v0, vcc
; GFX8-NEXT: v_mov_b32_e32 v0, s6
-; GFX8-NEXT: v_mov_b32_e32 v2, s4
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v0, v2, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX8-NEXT: v_min_f32_e32 v3, s4, v0
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, s4, v0
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, s4, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, s6, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
; GFX8-NEXT: ;;#ASMSTART
; GFX8-NEXT: ; use v[0:1]
; GFX8-NEXT: ;;#ASMEND
@@ -1060,30 +816,14 @@ define void @s_minimum_v2f32(<2 x float> inreg %src0, <2 x float> inreg %src1) {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s7
-; GFX9-NEXT: v_mov_b32_e32 v1, s5
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, s5, v0
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v0, v1, vcc
-; GFX9-NEXT: v_mov_b32_e32 v3, 0x7fc00000
+; GFX9-NEXT: v_min_f32_e32 v1, s5, v0
+; GFX9-NEXT: v_mov_b32_e32 v2, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, s5, v0
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, s5, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, s7, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v0, vcc
; GFX9-NEXT: v_mov_b32_e32 v0, s6
-; GFX9-NEXT: v_mov_b32_e32 v2, s4
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v0, v2, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX9-NEXT: v_min_f32_e32 v3, s4, v0
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, s4, v0
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, s4, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, s6, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use v[0:1]
; GFX9-NEXT: ;;#ASMEND
@@ -1093,40 +833,15 @@ define void @s_minimum_v2f32(<2 x float> inreg %src0, <2 x float> inreg %src1) {
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX940-NEXT: v_mov_b32_e32 v0, s3
-; GFX940-NEXT: v_mov_b32_e32 v1, s1
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, s1, v0
-; GFX940-NEXT: v_mov_b32_e32 v3, 0x7fc00000
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v0, v1, vcc
+; GFX940-NEXT: v_min_f32_e32 v1, s1, v0
+; GFX940-NEXT: v_mov_b32_e32 v2, 0x7fc00000
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, s1, v0
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, s1, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, s3, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v2, v0, vcc
; GFX940-NEXT: v_mov_b32_e32 v0, s2
-; GFX940-NEXT: v_mov_b32_e32 v2, s0
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, s0, v0
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v0, v2, vcc
+; GFX940-NEXT: v_min_f32_e32 v3, s0, v0
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, s0, v0
; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, s0, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, s2, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
; GFX940-NEXT: ;;#ASMSTART
; GFX940-NEXT: ; use v[0:1]
; GFX940-NEXT: ;;#ASMEND
@@ -1135,28 +850,12 @@ define void @s_minimum_v2f32(<2 x float> inreg %src0, <2 x float> inreg %src1) {
; GFX10-LABEL: s_minimum_v2f32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_mov_b32_e32 v0, s5
-; GFX10-NEXT: v_cmp_lt_f32_e64 vcc_lo, s5, s7
-; GFX10-NEXT: v_mov_b32_e32 v1, s4
-; GFX10-NEXT: v_cmp_class_f32_e64 s8, s5, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v0, s7, v0, vcc_lo
-; GFX10-NEXT: v_cmp_lt_f32_e64 vcc_lo, s4, s6
-; GFX10-NEXT: v_cndmask_b32_e32 v1, s6, v1, vcc_lo
+; GFX10-NEXT: v_min_f32_e64 v0, s5, s7
; GFX10-NEXT: v_cmp_o_f32_e64 vcc_lo, s5, s7
-; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v0, vcc_lo
+; GFX10-NEXT: v_min_f32_e64 v2, s4, s6
+; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v0, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e64 vcc_lo, s4, s6
-; GFX10-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v1, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v1, v0, s5, s8
-; GFX10-NEXT: v_cmp_class_f32_e64 s5, s4, 32
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v0
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v2, s4, s5
-; GFX10-NEXT: v_cmp_class_f32_e64 s4, s7, 32
-; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s7, s4
-; GFX10-NEXT: v_cmp_class_f32_e64 s4, s6, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v0, v1, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, s6, s4
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v2
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; use v[0:1]
; GFX10-NEXT: ;;#ASMEND
@@ -1165,32 +864,13 @@ define void @s_minimum_v2f32(<2 x float> inreg %src0, <2 x float> inreg %src1) {
; GFX11-LABEL: s_minimum_v2f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s0
-; GFX11-NEXT: v_cmp_lt_f32_e64 vcc_lo, s1, s3
-; GFX11-NEXT: v_cmp_class_f32_e64 s4, s1, 32
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e32 v0, s3, v0, vcc_lo
-; GFX11-NEXT: v_cmp_lt_f32_e64 vcc_lo, s0, s2
-; GFX11-NEXT: v_cndmask_b32_e32 v1, s2, v1, vcc_lo
+; GFX11-NEXT: v_min_f32_e64 v0, s1, s3
; GFX11-NEXT: v_cmp_o_f32_e64 vcc_lo, s1, s3
-; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v0, vcc_lo
+; GFX11-NEXT: v_min_f32_e64 v2, s0, s2
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v0, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e64 vcc_lo, s0, s2
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v1, vcc_lo
-; GFX11-NEXT: v_cndmask_b32_e64 v1, v0, s1, s4
-; GFX11-NEXT: v_cmp_class_f32_e64 s1, s0, 32
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v2, s0, s1
-; GFX11-NEXT: v_cmp_class_f32_e64 s0, s3, 32
-; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s3, s0
-; GFX11-NEXT: v_cmp_class_f32_e64 s0, s2, 32
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v3, s2, s0
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v0, v1, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v2
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3)
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use v[0:1]
; GFX11-NEXT: ;;#ASMEND
@@ -1218,227 +898,92 @@ define <3 x float> @v_minimum_v3f32(<3 x float> %src0, <3 x float> %src1) {
; GFX7-LABEL: v_minimum_v3f32:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_min_legacy_f32_e32 v6, v0, v3
+; GFX7-NEXT: v_min_f32_e32 v6, v0, v3
; GFX7-NEXT: v_mov_b32_e32 v7, 0x7fc00000
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
-; GFX7-NEXT: v_cndmask_b32_e32 v6, v7, v6, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v0, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v3, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v6
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
-; GFX7-NEXT: v_min_legacy_f32_e32 v3, v1, v4
+; GFX7-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
+; GFX7-NEXT: v_min_f32_e32 v3, v1, v4
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v1, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v4, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX7-NEXT: v_min_legacy_f32_e32 v3, v2, v5
+; GFX7-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
+; GFX7-NEXT: v_min_f32_e32 v3, v2, v5
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v2, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v5, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
+; GFX7-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_minimum_v3f32:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v0, v3
-; GFX8-NEXT: v_cndmask_b32_e32 v6, v3, v0, vcc
+; GFX8-NEXT: v_min_f32_e32 v6, v0, v3
; GFX8-NEXT: v_mov_b32_e32 v7, 0x7fc00000
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
-; GFX8-NEXT: v_cndmask_b32_e32 v6, v7, v6, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v0, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v3, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v6
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v1, v4
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v4, v1, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
+; GFX8-NEXT: v_min_f32_e32 v3, v1, v4
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v1, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v4, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v2, v5
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v5, v2, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
+; GFX8-NEXT: v_min_f32_e32 v3, v2, v5
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v2, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v5, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_minimum_v3f32:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v3, v0, vcc
+; GFX9-NEXT: v_min_f32_e32 v6, v0, v3
; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v7, v6, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v3, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v6
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v1, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v4, v1, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
+; GFX9-NEXT: v_min_f32_e32 v3, v1, v4
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v4, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v2, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v5, v2, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
+; GFX9-NEXT: v_min_f32_e32 v3, v2, v5
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v2, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v5, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_minimum_v3f32:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v0, v3
+; GFX940-NEXT: v_min_f32_e32 v6, v0, v3
; GFX940-NEXT: v_mov_b32_e32 v7, 0x7fc00000
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v3, v0, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v7, v6, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v0, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v3, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v6
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v1, v4
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v4, v1, vcc
+; GFX940-NEXT: v_min_f32_e32 v3, v1, v4
+; GFX940-NEXT: s_nop 0
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v1, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v4, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v2, v5
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v5, v2, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
+; GFX940-NEXT: v_min_f32_e32 v3, v2, v5
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v2, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v5, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v3f32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v0, v3
-; GFX10-NEXT: v_cndmask_b32_e32 v6, v3, v0, vcc_lo
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v1, v4
-; GFX10-NEXT: v_cndmask_b32_e32 v7, v4, v1, vcc_lo
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v2, v5
-; GFX10-NEXT: v_cndmask_b32_e32 v8, v5, v2, vcc_lo
+; GFX10-NEXT: v_min_f32_e32 v6, v0, v3
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v3
-; GFX10-NEXT: v_cndmask_b32_e32 v6, 0x7fc00000, v6, vcc_lo
+; GFX10-NEXT: v_min_f32_e32 v7, v1, v4
+; GFX10-NEXT: v_min_f32_e32 v8, v2, v5
+; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v6, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v4
-; GFX10-NEXT: v_cndmask_b32_e32 v7, 0x7fc00000, v7, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v7, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v2, v5
-; GFX10-NEXT: v_cndmask_b32_e32 v8, 0x7fc00000, v8, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v0, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v1, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v2, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v3, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v4, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v5, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v6
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v7
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v8
-; GFX10-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v8, vcc_lo
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_minimum_v3f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v0, v3
-; GFX11-NEXT: v_cndmask_b32_e32 v6, v3, v0, vcc_lo
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v1, v4
-; GFX11-NEXT: v_cndmask_b32_e32 v7, v4, v1, vcc_lo
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v2, v5
-; GFX11-NEXT: v_cndmask_b32_e32 v8, v5, v2, vcc_lo
+; GFX11-NEXT: v_dual_min_f32 v6, v0, v3 :: v_dual_min_f32 v7, v1, v4
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v3
-; GFX11-NEXT: v_cndmask_b32_e32 v6, 0x7fc00000, v6, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v6, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v4
-; GFX11-NEXT: v_cndmask_b32_e32 v7, 0x7fc00000, v7, vcc_lo
+; GFX11-NEXT: v_dual_min_f32 v8, v2, v5 :: v_dual_cndmask_b32 v1, 0x7fc00000, v7
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v2, v5
-; GFX11-NEXT: v_cndmask_b32_e32 v8, 0x7fc00000, v8, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v0, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v1, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v2, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v3, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v4, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v5, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v6
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v7
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v8
-; GFX11-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v8, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_v3f32:
@@ -1460,184 +1005,48 @@ define <3 x float> @v_minimum_v3f32__nnan(<3 x float> %src0, <3 x float> %src1)
; GFX7-LABEL: v_minimum_v3f32__nnan:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_min_legacy_f32_e32 v6, v0, v3
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v0, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v3, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v6
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
-; GFX7-NEXT: v_min_legacy_f32_e32 v3, v1, v4
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v1, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v4, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX7-NEXT: v_min_legacy_f32_e32 v3, v2, v5
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v2, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v5, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
+; GFX7-NEXT: v_min_f32_e32 v0, v0, v3
+; GFX7-NEXT: v_min_f32_e32 v1, v1, v4
+; GFX7-NEXT: v_min_f32_e32 v2, v2, v5
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_minimum_v3f32__nnan:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v0, v3
-; GFX8-NEXT: v_cndmask_b32_e32 v6, v3, v0, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v0, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v3, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v6
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v1, v4
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v4, v1, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v1, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v4, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v2, v5
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v5, v2, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v2, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v5, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
+; GFX8-NEXT: v_min_f32_e32 v0, v0, v3
+; GFX8-NEXT: v_min_f32_e32 v1, v1, v4
+; GFX8-NEXT: v_min_f32_e32 v2, v2, v5
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_minimum_v3f32__nnan:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v3, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v3, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v6
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v1, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v4, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v4, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v2, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v5, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v2, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v5, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
+; GFX9-NEXT: v_min_f32_e32 v0, v0, v3
+; GFX9-NEXT: v_min_f32_e32 v1, v1, v4
+; GFX9-NEXT: v_min_f32_e32 v2, v2, v5
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_minimum_v3f32__nnan:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v0, v3
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v3, v0, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v0, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v3, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v6
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v1, v4
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v4, v1, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v1, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v4, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v2, v5
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v5, v2, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v2, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v5, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
+; GFX940-NEXT: v_min_f32_e32 v0, v0, v3
+; GFX940-NEXT: v_min_f32_e32 v1, v1, v4
+; GFX940-NEXT: v_min_f32_e32 v2, v2, v5
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v3f32__nnan:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v0, v3
-; GFX10-NEXT: v_cndmask_b32_e32 v6, v3, v0, vcc_lo
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v1, v4
-; GFX10-NEXT: v_cndmask_b32_e32 v7, v4, v1, vcc_lo
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v2, v5
-; GFX10-NEXT: v_cndmask_b32_e32 v8, v5, v2, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v0, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v1, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v2, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v3, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v4, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v5, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v6
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v7
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v8
-; GFX10-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc_lo
+; GFX10-NEXT: v_min_f32_e32 v0, v0, v3
+; GFX10-NEXT: v_min_f32_e32 v1, v1, v4
+; GFX10-NEXT: v_min_f32_e32 v2, v2, v5
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_minimum_v3f32__nnan:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v0, v3
-; GFX11-NEXT: v_cndmask_b32_e32 v6, v3, v0, vcc_lo
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v1, v4
-; GFX11-NEXT: v_cndmask_b32_e32 v7, v4, v1, vcc_lo
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v2, v5
-; GFX11-NEXT: v_cndmask_b32_e32 v8, v5, v2, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v0, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v1, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v2, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v3, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v4, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v5, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v6
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v7
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v8
-; GFX11-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc_lo
+; GFX11-NEXT: v_dual_min_f32 v0, v0, v3 :: v_dual_min_f32 v1, v1, v4
+; GFX11-NEXT: v_min_f32_e32 v2, v2, v5
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_v3f32__nnan:
@@ -1659,14 +1068,14 @@ define <3 x float> @v_minimum_v3f32__nsz(<3 x float> %src0, <3 x float> %src1) {
; GFX7-LABEL: v_minimum_v3f32__nsz:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_min_legacy_f32_e32 v6, v0, v3
+; GFX7-NEXT: v_min_f32_e32 v6, v0, v3
; GFX7-NEXT: v_mov_b32_e32 v7, 0x7fc00000
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
; GFX7-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX7-NEXT: v_min_legacy_f32_e32 v3, v1, v4
+; GFX7-NEXT: v_min_f32_e32 v3, v1, v4
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
; GFX7-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
-; GFX7-NEXT: v_min_legacy_f32_e32 v3, v2, v5
+; GFX7-NEXT: v_min_f32_e32 v3, v2, v5
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
; GFX7-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
; GFX7-NEXT: s_setpc_b64 s[30:31]
@@ -1674,17 +1083,14 @@ define <3 x float> @v_minimum_v3f32__nsz(<3 x float> %src0, <3 x float> %src1) {
; GFX8-LABEL: v_minimum_v3f32__nsz:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v0, v3
-; GFX8-NEXT: v_cndmask_b32_e32 v6, v3, v0, vcc
+; GFX8-NEXT: v_min_f32_e32 v6, v0, v3
; GFX8-NEXT: v_mov_b32_e32 v7, 0x7fc00000
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
; GFX8-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v1, v4
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v4, v1, vcc
+; GFX8-NEXT: v_min_f32_e32 v3, v1, v4
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
; GFX8-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v2, v5
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v5, v2, vcc
+; GFX8-NEXT: v_min_f32_e32 v3, v2, v5
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
; GFX8-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
@@ -1692,17 +1098,14 @@ define <3 x float> @v_minimum_v3f32__nsz(<3 x float> %src0, <3 x float> %src1) {
; GFX9-LABEL: v_minimum_v3f32__nsz:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v3, v0, vcc
+; GFX9-NEXT: v_min_f32_e32 v6, v0, v3
; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v1, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v4, v1, vcc
+; GFX9-NEXT: v_min_f32_e32 v3, v1, v4
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v2, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v5, v2, vcc
+; GFX9-NEXT: v_min_f32_e32 v3, v2, v5
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -1710,22 +1113,16 @@ define <3 x float> @v_minimum_v3f32__nsz(<3 x float> %src0, <3 x float> %src1) {
; GFX940-LABEL: v_minimum_v3f32__nsz:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v0, v3
+; GFX940-NEXT: v_min_f32_e32 v6, v0, v3
; GFX940-NEXT: v_mov_b32_e32 v7, 0x7fc00000
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v3, v0, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
-; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_min_f32_e32 v3, v1, v4
+; GFX940-NEXT: s_nop 0
; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v1, v4
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v4, v1, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
; GFX940-NEXT: s_nop 1
; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v2, v5
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v5, v2, vcc
+; GFX940-NEXT: v_min_f32_e32 v3, v2, v5
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
; GFX940-NEXT: s_nop 1
; GFX940-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
@@ -1734,13 +1131,10 @@ define <3 x float> @v_minimum_v3f32__nsz(<3 x float> %src0, <3 x float> %src1) {
; GFX10-LABEL: v_minimum_v3f32__nsz:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v0, v3
-; GFX10-NEXT: v_cndmask_b32_e32 v6, v3, v0, vcc_lo
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v1, v4
-; GFX10-NEXT: v_cndmask_b32_e32 v7, v4, v1, vcc_lo
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v2, v5
-; GFX10-NEXT: v_cndmask_b32_e32 v8, v5, v2, vcc_lo
+; GFX10-NEXT: v_min_f32_e32 v6, v0, v3
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v3
+; GFX10-NEXT: v_min_f32_e32 v7, v1, v4
+; GFX10-NEXT: v_min_f32_e32 v8, v2, v5
; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v6, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v4
; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v7, vcc_lo
@@ -1751,17 +1145,14 @@ define <3 x float> @v_minimum_v3f32__nsz(<3 x float> %src0, <3 x float> %src1) {
; GFX11-LABEL: v_minimum_v3f32__nsz:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v0, v3
-; GFX11-NEXT: v_cndmask_b32_e32 v6, v3, v0, vcc_lo
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v1, v4
-; GFX11-NEXT: v_cndmask_b32_e32 v7, v4, v1, vcc_lo
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v2, v5
-; GFX11-NEXT: v_cndmask_b32_e32 v8, v5, v2, vcc_lo
+; GFX11-NEXT: v_dual_min_f32 v6, v0, v3 :: v_dual_min_f32 v7, v1, v4
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v3
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_4)
; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v6, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v4
-; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v7, vcc_lo
+; GFX11-NEXT: v_dual_min_f32 v8, v2, v5 :: v_dual_cndmask_b32 v1, 0x7fc00000, v7
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v2, v5
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v8, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
@@ -1784,67 +1175,48 @@ define <3 x float> @v_minimum_v3f32__nnan_nsz(<3 x float> %src0, <3 x float> %sr
; GFX7-LABEL: v_minimum_v3f32__nnan_nsz:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_min_legacy_f32_e32 v0, v0, v3
-; GFX7-NEXT: v_min_legacy_f32_e32 v1, v1, v4
-; GFX7-NEXT: v_min_legacy_f32_e32 v2, v2, v5
+; GFX7-NEXT: v_min_f32_e32 v0, v0, v3
+; GFX7-NEXT: v_min_f32_e32 v1, v1, v4
+; GFX7-NEXT: v_min_f32_e32 v2, v2, v5
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_minimum_v3f32__nnan_nsz:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v0, v3
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v1, v4
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v2, v5
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc
+; GFX8-NEXT: v_min_f32_e32 v0, v0, v3
+; GFX8-NEXT: v_min_f32_e32 v1, v1, v4
+; GFX8-NEXT: v_min_f32_e32 v2, v2, v5
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_minimum_v3f32__nnan_nsz:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v1, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v2, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc
+; GFX9-NEXT: v_min_f32_e32 v0, v0, v3
+; GFX9-NEXT: v_min_f32_e32 v1, v1, v4
+; GFX9-NEXT: v_min_f32_e32 v2, v2, v5
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_minimum_v3f32__nnan_nsz:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v0, v3
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v1, v4
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v2, v5
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc
+; GFX940-NEXT: v_min_f32_e32 v0, v0, v3
+; GFX940-NEXT: v_min_f32_e32 v1, v1, v4
+; GFX940-NEXT: v_min_f32_e32 v2, v2, v5
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v3f32__nnan_nsz:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v0, v3
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc_lo
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v1, v4
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc_lo
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v2, v5
-; GFX10-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc_lo
+; GFX10-NEXT: v_min_f32_e32 v0, v0, v3
+; GFX10-NEXT: v_min_f32_e32 v1, v1, v4
+; GFX10-NEXT: v_min_f32_e32 v2, v2, v5
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_minimum_v3f32__nnan_nsz:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v0, v3
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc_lo
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v1, v4
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc_lo
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v2, v5
-; GFX11-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc_lo
+; GFX11-NEXT: v_dual_min_f32 v0, v0, v3 :: v_dual_min_f32 v1, v1, v4
+; GFX11-NEXT: v_min_f32_e32 v2, v2, v5
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_v3f32__nnan_nsz:
@@ -1866,292 +1238,111 @@ define <4 x float> @v_minimum_v4f32(<4 x float> %src0, <4 x float> %src1) {
; GFX7-LABEL: v_minimum_v4f32:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_min_legacy_f32_e32 v8, v0, v4
+; GFX7-NEXT: v_min_f32_e32 v8, v0, v4
; GFX7-NEXT: v_mov_b32_e32 v9, 0x7fc00000
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
-; GFX7-NEXT: v_cndmask_b32_e32 v8, v9, v8, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v0, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v4, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX7-NEXT: v_min_legacy_f32_e32 v4, v1, v5
+; GFX7-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc
+; GFX7-NEXT: v_min_f32_e32 v4, v1, v5
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
-; GFX7-NEXT: v_cndmask_b32_e32 v4, v9, v4, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v1, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v5, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
-; GFX7-NEXT: v_min_legacy_f32_e32 v4, v2, v6
+; GFX7-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc
+; GFX7-NEXT: v_min_f32_e32 v4, v2, v6
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v2, v6
-; GFX7-NEXT: v_cndmask_b32_e32 v4, v9, v4, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v2, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v6, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
-; GFX7-NEXT: v_min_legacy_f32_e32 v4, v3, v7
+; GFX7-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc
+; GFX7-NEXT: v_min_f32_e32 v4, v3, v7
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v3, v7
-; GFX7-NEXT: v_cndmask_b32_e32 v4, v9, v4, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v3, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v7, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
+; GFX7-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_minimum_v4f32:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v0, v4
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc
+; GFX8-NEXT: v_min_f32_e32 v8, v0, v4
; GFX8-NEXT: v_mov_b32_e32 v9, 0x7fc00000
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v9, v8, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v0, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v4, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v1, v5
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v5, v1, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc
+; GFX8-NEXT: v_min_f32_e32 v4, v1, v5
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v9, v4, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v1, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v5, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v2, v6
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v6, v2, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc
+; GFX8-NEXT: v_min_f32_e32 v4, v2, v6
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v2, v6
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v9, v4, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v2, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v6, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v3, v7
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v7, v3, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc
+; GFX8-NEXT: v_min_f32_e32 v4, v3, v7
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v3, v7
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v9, v4, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v3, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v7, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_minimum_v4f32:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc
+; GFX9-NEXT: v_min_f32_e32 v8, v0, v4
; GFX9-NEXT: v_mov_b32_e32 v9, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v9, v8, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v4, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v1, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v5, v1, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc
+; GFX9-NEXT: v_min_f32_e32 v4, v1, v5
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v9, v4, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v5, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v2, v6
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v6, v2, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc
+; GFX9-NEXT: v_min_f32_e32 v4, v2, v6
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v6
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v9, v4, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v2, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v6, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v3, v7
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v7, v3, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc
+; GFX9-NEXT: v_min_f32_e32 v4, v3, v7
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v3, v7
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v9, v4, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v3, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v7, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_minimum_v4f32:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v0, v4
+; GFX940-NEXT: v_min_f32_e32 v8, v0, v4
; GFX940-NEXT: v_mov_b32_e32 v9, 0x7fc00000
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v9, v8, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v0, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v4, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v1, v5
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v5, v1, vcc
+; GFX940-NEXT: v_min_f32_e32 v4, v1, v5
+; GFX940-NEXT: s_nop 0
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v9, v4, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v1, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v5, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v2, v6
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v6, v2, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc
+; GFX940-NEXT: v_min_f32_e32 v4, v2, v6
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v6
; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v9, v4, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v2, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v6, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v3, v7
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v7, v3, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc
+; GFX940-NEXT: v_min_f32_e32 v4, v3, v7
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v3, v7
; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v9, v4, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v3, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v7, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v4f32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v0, v4
-; GFX10-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc_lo
+; GFX10-NEXT: v_min_f32_e32 v8, v0, v4
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v4
-; GFX10-NEXT: v_cndmask_b32_e32 v8, 0x7fc00000, v8, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v0, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc_lo
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v1, v5
-; GFX10-NEXT: v_cndmask_b32_e32 v9, v5, v1, vcc_lo
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v2, v6
-; GFX10-NEXT: v_cndmask_b32_e32 v10, v6, v2, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v4, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v3, v7
-; GFX10-NEXT: v_cndmask_b32_e32 v4, v7, v3, vcc_lo
+; GFX10-NEXT: v_min_f32_e32 v9, v1, v5
+; GFX10-NEXT: v_min_f32_e32 v4, v2, v6
+; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v8, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v5
-; GFX10-NEXT: v_cndmask_b32_e32 v9, 0x7fc00000, v9, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v8
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc_lo
+; GFX10-NEXT: v_min_f32_e32 v8, v3, v7
+; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v9, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v2, v6
-; GFX10-NEXT: v_cndmask_b32_e32 v8, 0x7fc00000, v10, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v4, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v3, v7
-; GFX10-NEXT: v_cndmask_b32_e32 v4, 0x7fc00000, v4, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v1, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v2, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v3, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v5, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v6, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v7, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v9
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v8
-; GFX10-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v4
-; GFX10-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e32 v3, 0x7fc00000, v8, vcc_lo
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_minimum_v4f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v0, v4
-; GFX11-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc_lo
+; GFX11-NEXT: v_dual_min_f32 v8, v0, v4 :: v_dual_min_f32 v9, v1, v5
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v4
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_cndmask_b32_e32 v8, 0x7fc00000, v8, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v0, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc_lo
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v1, v5
-; GFX11-NEXT: v_cndmask_b32_e32 v9, v5, v1, vcc_lo
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v2, v6
-; GFX11-NEXT: v_cndmask_b32_e32 v10, v6, v2, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v4, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v3, v7
-; GFX11-NEXT: v_cndmask_b32_e32 v4, v7, v3, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(VALU_DEP_3)
+; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v8, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v5
-; GFX11-NEXT: v_cndmask_b32_e32 v9, 0x7fc00000, v9, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v8
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc_lo
+; GFX11-NEXT: v_min_f32_e32 v4, v2, v6
+; GFX11-NEXT: v_dual_min_f32 v8, v3, v7 :: v_dual_cndmask_b32 v1, 0x7fc00000, v9
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v2, v6
-; GFX11-NEXT: v_cndmask_b32_e32 v8, 0x7fc00000, v10, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v4, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v3, v7
-; GFX11-NEXT: v_cndmask_b32_e32 v4, 0x7fc00000, v4, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v1, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v2, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v3, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v5, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v6, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v7, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v9
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v8
-; GFX11-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v4
-; GFX11-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4)
+; GFX11-NEXT: v_cndmask_b32_e32 v3, 0x7fc00000, v8, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_v4f32:
@@ -2174,236 +1365,53 @@ define <4 x float> @v_minimum_v4f32__nnan(<4 x float> %src0, <4 x float> %src1)
; GFX7-LABEL: v_minimum_v4f32__nnan:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_min_legacy_f32_e32 v8, v0, v4
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v0, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v4, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX7-NEXT: v_min_legacy_f32_e32 v4, v1, v5
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v1, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v5, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
-; GFX7-NEXT: v_min_legacy_f32_e32 v4, v2, v6
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v2, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v6, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
-; GFX7-NEXT: v_min_legacy_f32_e32 v4, v3, v7
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v3, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v7, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
+; GFX7-NEXT: v_min_f32_e32 v0, v0, v4
+; GFX7-NEXT: v_min_f32_e32 v1, v1, v5
+; GFX7-NEXT: v_min_f32_e32 v2, v2, v6
+; GFX7-NEXT: v_min_f32_e32 v3, v3, v7
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_minimum_v4f32__nnan:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v0, v4
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v0, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v4, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v1, v5
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v5, v1, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v1, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v5, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v2, v6
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v6, v2, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v2, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v6, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v3, v7
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v7, v3, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v3, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v7, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
+; GFX8-NEXT: v_min_f32_e32 v0, v0, v4
+; GFX8-NEXT: v_min_f32_e32 v1, v1, v5
+; GFX8-NEXT: v_min_f32_e32 v2, v2, v6
+; GFX8-NEXT: v_min_f32_e32 v3, v3, v7
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_minimum_v4f32__nnan:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v4, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v1, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v5, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v5, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v2, v6
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v6, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v2, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v6, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v3, v7
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v7, v3, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v3, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v7, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
+; GFX9-NEXT: v_min_f32_e32 v0, v0, v4
+; GFX9-NEXT: v_min_f32_e32 v1, v1, v5
+; GFX9-NEXT: v_min_f32_e32 v2, v2, v6
+; GFX9-NEXT: v_min_f32_e32 v3, v3, v7
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_minimum_v4f32__nnan:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v0, v4
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v0, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v4, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v1, v5
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v5, v1, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v1, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v5, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v2, v6
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v6, v2, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v2, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v6, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v3, v7
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v7, v3, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v3, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v7, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
+; GFX940-NEXT: v_min_f32_e32 v0, v0, v4
+; GFX940-NEXT: v_min_f32_e32 v1, v1, v5
+; GFX940-NEXT: v_min_f32_e32 v2, v2, v6
+; GFX940-NEXT: v_min_f32_e32 v3, v3, v7
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v4f32__nnan:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v0, v4
-; GFX10-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v0, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v4, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v1, v5
-; GFX10-NEXT: v_cndmask_b32_e32 v4, v5, v1, vcc_lo
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v2, v6
-; GFX10-NEXT: v_cndmask_b32_e32 v9, v6, v2, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v8
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc_lo
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v3, v7
-; GFX10-NEXT: v_cndmask_b32_e32 v8, v7, v3, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v1, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v2, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v2, v9, v2, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v3, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v3, v8, v3, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v5, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v6, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v7, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v4
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v9
-; GFX10-NEXT: v_cndmask_b32_e32 v2, v9, v2, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v8
-; GFX10-NEXT: v_cndmask_b32_e32 v3, v8, v3, vcc_lo
+; GFX10-NEXT: v_min_f32_e32 v0, v0, v4
+; GFX10-NEXT: v_min_f32_e32 v1, v1, v5
+; GFX10-NEXT: v_min_f32_e32 v2, v2, v6
+; GFX10-NEXT: v_min_f32_e32 v3, v3, v7
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_minimum_v4f32__nnan:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v0, v4
-; GFX11-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v0, 32
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v4, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v1, v5
-; GFX11-NEXT: v_cndmask_b32_e32 v4, v5, v1, vcc_lo
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v2, v6
-; GFX11-NEXT: v_cndmask_b32_e32 v9, v6, v2, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v8
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc_lo
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v3, v7
-; GFX11-NEXT: v_cndmask_b32_e32 v8, v7, v3, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v1, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v2, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v2, v9, v2, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v3, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v3, v8, v3, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v5, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v6, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v7, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v4
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v9
-; GFX11-NEXT: v_cndmask_b32_e32 v2, v9, v2, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v8
-; GFX11-NEXT: v_cndmask_b32_e32 v3, v8, v3, vcc_lo
+; GFX11-NEXT: v_dual_min_f32 v0, v0, v4 :: v_dual_min_f32 v1, v1, v5
+; GFX11-NEXT: v_dual_min_f32 v2, v2, v6 :: v_dual_min_f32 v3, v3, v7
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_v4f32__nnan:
@@ -2426,17 +1434,17 @@ define <4 x float> @v_minimum_v4f32__nsz(<4 x float> %src0, <4 x float> %src1) {
; GFX7-LABEL: v_minimum_v4f32__nsz:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_min_legacy_f32_e32 v8, v0, v4
+; GFX7-NEXT: v_min_f32_e32 v8, v0, v4
; GFX7-NEXT: v_mov_b32_e32 v9, 0x7fc00000
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
; GFX7-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc
-; GFX7-NEXT: v_min_legacy_f32_e32 v4, v1, v5
+; GFX7-NEXT: v_min_f32_e32 v4, v1, v5
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
; GFX7-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc
-; GFX7-NEXT: v_min_legacy_f32_e32 v4, v2, v6
+; GFX7-NEXT: v_min_f32_e32 v4, v2, v6
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v2, v6
; GFX7-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc
-; GFX7-NEXT: v_min_legacy_f32_e32 v4, v3, v7
+; GFX7-NEXT: v_min_f32_e32 v4, v3, v7
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v3, v7
; GFX7-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
; GFX7-NEXT: s_setpc_b64 s[30:31]
@@ -2444,21 +1452,17 @@ define <4 x float> @v_minimum_v4f32__nsz(<4 x float> %src0, <4 x float> %src1) {
; GFX8-LABEL: v_minimum_v4f32__nsz:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v0, v4
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc
+; GFX8-NEXT: v_min_f32_e32 v8, v0, v4
; GFX8-NEXT: v_mov_b32_e32 v9, 0x7fc00000
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
; GFX8-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v1, v5
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v5, v1, vcc
+; GFX8-NEXT: v_min_f32_e32 v4, v1, v5
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
; GFX8-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v2, v6
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v6, v2, vcc
+; GFX8-NEXT: v_min_f32_e32 v4, v2, v6
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v2, v6
; GFX8-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v3, v7
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v7, v3, vcc
+; GFX8-NEXT: v_min_f32_e32 v4, v3, v7
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v3, v7
; GFX8-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
@@ -2466,21 +1470,17 @@ define <4 x float> @v_minimum_v4f32__nsz(<4 x float> %src0, <4 x float> %src1) {
; GFX9-LABEL: v_minimum_v4f32__nsz:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc
+; GFX9-NEXT: v_min_f32_e32 v8, v0, v4
; GFX9-NEXT: v_mov_b32_e32 v9, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
; GFX9-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v1, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v5, v1, vcc
+; GFX9-NEXT: v_min_f32_e32 v4, v1, v5
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
; GFX9-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v2, v6
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v6, v2, vcc
+; GFX9-NEXT: v_min_f32_e32 v4, v2, v6
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v6
; GFX9-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v3, v7
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v7, v3, vcc
+; GFX9-NEXT: v_min_f32_e32 v4, v3, v7
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v3, v7
; GFX9-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -2488,28 +1488,20 @@ define <4 x float> @v_minimum_v4f32__nsz(<4 x float> %src0, <4 x float> %src1) {
; GFX940-LABEL: v_minimum_v4f32__nsz:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v0, v4
+; GFX940-NEXT: v_min_f32_e32 v8, v0, v4
; GFX940-NEXT: v_mov_b32_e32 v9, 0x7fc00000
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
-; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_min_f32_e32 v4, v1, v5
+; GFX940-NEXT: s_nop 0
; GFX940-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v1, v5
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v5, v1, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
; GFX940-NEXT: s_nop 1
; GFX940-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v2, v6
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v6, v2, vcc
+; GFX940-NEXT: v_min_f32_e32 v4, v2, v6
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v6
; GFX940-NEXT: s_nop 1
; GFX940-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v3, v7
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v7, v3, vcc
+; GFX940-NEXT: v_min_f32_e32 v4, v3, v7
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v3, v7
; GFX940-NEXT: s_nop 1
; GFX940-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
@@ -2518,44 +1510,35 @@ define <4 x float> @v_minimum_v4f32__nsz(<4 x float> %src0, <4 x float> %src1) {
; GFX10-LABEL: v_minimum_v4f32__nsz:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v0, v4
-; GFX10-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc_lo
+; GFX10-NEXT: v_min_f32_e32 v8, v0, v4
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v4
+; GFX10-NEXT: v_min_f32_e32 v9, v1, v5
+; GFX10-NEXT: v_min_f32_e32 v4, v2, v6
; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v8, vcc_lo
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v1, v5
-; GFX10-NEXT: v_cndmask_b32_e32 v4, v5, v1, vcc_lo
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v2, v6
-; GFX10-NEXT: v_cndmask_b32_e32 v8, v6, v2, vcc_lo
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v3, v7
-; GFX10-NEXT: v_cndmask_b32_e32 v9, v7, v3, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v5
-; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v4, vcc_lo
+; GFX10-NEXT: v_min_f32_e32 v8, v3, v7
+; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v9, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v2, v6
-; GFX10-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v8, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v4, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v3, v7
-; GFX10-NEXT: v_cndmask_b32_e32 v3, 0x7fc00000, v9, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e32 v3, 0x7fc00000, v8, vcc_lo
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_minimum_v4f32__nsz:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v0, v4
-; GFX11-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc_lo
+; GFX11-NEXT: v_dual_min_f32 v8, v0, v4 :: v_dual_min_f32 v9, v1, v5
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v4
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(VALU_DEP_3)
; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v8, vcc_lo
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v1, v5
-; GFX11-NEXT: v_cndmask_b32_e32 v4, v5, v1, vcc_lo
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v2, v6
-; GFX11-NEXT: v_cndmask_b32_e32 v8, v6, v2, vcc_lo
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v3, v7
-; GFX11-NEXT: v_cndmask_b32_e32 v9, v7, v3, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v5
-; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v4, vcc_lo
+; GFX11-NEXT: v_min_f32_e32 v4, v2, v6
+; GFX11-NEXT: v_dual_min_f32 v8, v3, v7 :: v_dual_cndmask_b32 v1, 0x7fc00000, v9
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v2, v6
-; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v8, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v4, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v3, v7
-; GFX11-NEXT: v_cndmask_b32_e32 v3, 0x7fc00000, v9, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4)
+; GFX11-NEXT: v_cndmask_b32_e32 v3, 0x7fc00000, v8, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_v4f32__nsz:
@@ -2578,79 +1561,53 @@ define <4 x float> @v_minimum_v4f32__nnan_nsz(<4 x float> %src0, <4 x float> %sr
; GFX7-LABEL: v_minimum_v4f32__nnan_nsz:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_min_legacy_f32_e32 v0, v0, v4
-; GFX7-NEXT: v_min_legacy_f32_e32 v1, v1, v5
-; GFX7-NEXT: v_min_legacy_f32_e32 v2, v2, v6
-; GFX7-NEXT: v_min_legacy_f32_e32 v3, v3, v7
+; GFX7-NEXT: v_min_f32_e32 v0, v0, v4
+; GFX7-NEXT: v_min_f32_e32 v1, v1, v5
+; GFX7-NEXT: v_min_f32_e32 v2, v2, v6
+; GFX7-NEXT: v_min_f32_e32 v3, v3, v7
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_minimum_v4f32__nnan_nsz:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v0, v4
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v1, v5
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v2, v6
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v3, v7
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc
+; GFX8-NEXT: v_min_f32_e32 v0, v0, v4
+; GFX8-NEXT: v_min_f32_e32 v1, v1, v5
+; GFX8-NEXT: v_min_f32_e32 v2, v2, v6
+; GFX8-NEXT: v_min_f32_e32 v3, v3, v7
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_minimum_v4f32__nnan_nsz:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v1, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v2, v6
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v3, v7
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc
+; GFX9-NEXT: v_min_f32_e32 v0, v0, v4
+; GFX9-NEXT: v_min_f32_e32 v1, v1, v5
+; GFX9-NEXT: v_min_f32_e32 v2, v2, v6
+; GFX9-NEXT: v_min_f32_e32 v3, v3, v7
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_minimum_v4f32__nnan_nsz:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v0, v4
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v1, v5
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v2, v6
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v3, v7
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc
+; GFX940-NEXT: v_min_f32_e32 v0, v0, v4
+; GFX940-NEXT: v_min_f32_e32 v1, v1, v5
+; GFX940-NEXT: v_min_f32_e32 v2, v2, v6
+; GFX940-NEXT: v_min_f32_e32 v3, v3, v7
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v4f32__nnan_nsz:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v0, v4
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc_lo
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v1, v5
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc_lo
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v2, v6
-; GFX10-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc_lo
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v3, v7
-; GFX10-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc_lo
+; GFX10-NEXT: v_min_f32_e32 v0, v0, v4
+; GFX10-NEXT: v_min_f32_e32 v1, v1, v5
+; GFX10-NEXT: v_min_f32_e32 v2, v2, v6
+; GFX10-NEXT: v_min_f32_e32 v3, v3, v7
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_minimum_v4f32__nnan_nsz:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v0, v4
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc_lo
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v1, v5
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc_lo
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v2, v6
-; GFX11-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc_lo
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v3, v7
-; GFX11-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc_lo
+; GFX11-NEXT: v_dual_min_f32 v0, v0, v4 :: v_dual_min_f32 v1, v1, v5
+; GFX11-NEXT: v_dual_min_f32 v2, v2, v6 :: v_dual_min_f32 v3, v3, v7
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_v4f32__nnan_nsz:
@@ -2673,551 +1630,185 @@ define <8 x float> @v_minimum_v8f32(<8 x float> %src0, <8 x float> %src1) {
; GFX7-LABEL: v_minimum_v8f32:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_min_legacy_f32_e32 v16, v0, v8
+; GFX7-NEXT: v_min_f32_e32 v16, v0, v8
; GFX7-NEXT: v_mov_b32_e32 v17, 0x7fc00000
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v8
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v17, v16, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v0, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v8, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc
-; GFX7-NEXT: v_min_legacy_f32_e32 v8, v1, v9
+; GFX7-NEXT: v_cndmask_b32_e32 v0, v17, v16, vcc
+; GFX7-NEXT: v_min_f32_e32 v8, v1, v9
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v9
-; GFX7-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v1, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v9, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc
-; GFX7-NEXT: v_min_legacy_f32_e32 v8, v2, v10
+; GFX7-NEXT: v_cndmask_b32_e32 v1, v17, v8, vcc
+; GFX7-NEXT: v_min_f32_e32 v8, v2, v10
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v2, v10
-; GFX7-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v2, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v10, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc
-; GFX7-NEXT: v_min_legacy_f32_e32 v8, v3, v11
+; GFX7-NEXT: v_cndmask_b32_e32 v2, v17, v8, vcc
+; GFX7-NEXT: v_min_f32_e32 v8, v3, v11
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v3, v11
-; GFX7-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v3, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v8, v3, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v11, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v3, v11, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v8, v3, vcc
-; GFX7-NEXT: v_min_legacy_f32_e32 v8, v4, v12
+; GFX7-NEXT: v_cndmask_b32_e32 v3, v17, v8, vcc
+; GFX7-NEXT: v_min_f32_e32 v8, v4, v12
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v4, v12
-; GFX7-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v4, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v12, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v4, v4, v12, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX7-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc
-; GFX7-NEXT: v_min_legacy_f32_e32 v8, v5, v13
+; GFX7-NEXT: v_cndmask_b32_e32 v4, v17, v8, vcc
+; GFX7-NEXT: v_min_f32_e32 v8, v5, v13
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v5, v13
-; GFX7-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v5, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v5, v8, v5, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v13, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v5, v5, v13, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX7-NEXT: v_cndmask_b32_e32 v5, v8, v5, vcc
-; GFX7-NEXT: v_min_legacy_f32_e32 v8, v6, v14
+; GFX7-NEXT: v_cndmask_b32_e32 v5, v17, v8, vcc
+; GFX7-NEXT: v_min_f32_e32 v8, v6, v14
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v6, v14
-; GFX7-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v6, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v6, v8, v6, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v14, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v6, v6, v14, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX7-NEXT: v_cndmask_b32_e32 v6, v8, v6, vcc
-; GFX7-NEXT: v_min_legacy_f32_e32 v8, v7, v15
+; GFX7-NEXT: v_cndmask_b32_e32 v6, v17, v8, vcc
+; GFX7-NEXT: v_min_f32_e32 v8, v7, v15
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v7, v15
-; GFX7-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v7, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v7, v8, v7, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v15, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v7, v7, v15, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX7-NEXT: v_cndmask_b32_e32 v7, v8, v7, vcc
+; GFX7-NEXT: v_cndmask_b32_e32 v7, v17, v8, vcc
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_minimum_v8f32:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v0, v8
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v8, v0, vcc
+; GFX8-NEXT: v_min_f32_e32 v16, v0, v8
; GFX8-NEXT: v_mov_b32_e32 v17, 0x7fc00000
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v0, v8
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v17, v16, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v0, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v8, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v1, v9
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v9, v1, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v0, v17, v16, vcc
+; GFX8-NEXT: v_min_f32_e32 v8, v1, v9
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v1, v9
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v1, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v9, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v2, v10
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v10, v2, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v1, v17, v8, vcc
+; GFX8-NEXT: v_min_f32_e32 v8, v2, v10
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v2, v10
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v2, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v10, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v3, v11
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v11, v3, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v2, v17, v8, vcc
+; GFX8-NEXT: v_min_f32_e32 v8, v3, v11
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v3, v11
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v3, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v8, v3, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v11, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v11, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v8, v3, vcc
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v4, v12
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v12, v4, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v3, v17, v8, vcc
+; GFX8-NEXT: v_min_f32_e32 v8, v4, v12
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v4, v12
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v4, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v12, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v12, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v5, v13
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v13, v5, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v4, v17, v8, vcc
+; GFX8-NEXT: v_min_f32_e32 v8, v5, v13
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v5, v13
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v5, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v5, v8, v5, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v13, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v5, v5, v13, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX8-NEXT: v_cndmask_b32_e32 v5, v8, v5, vcc
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v6, v14
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v14, v6, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v5, v17, v8, vcc
+; GFX8-NEXT: v_min_f32_e32 v8, v6, v14
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v6, v14
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v6, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v6, v8, v6, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v14, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v6, v6, v14, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX8-NEXT: v_cndmask_b32_e32 v6, v8, v6, vcc
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v7, v15
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v15, v7, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v6, v17, v8, vcc
+; GFX8-NEXT: v_min_f32_e32 v8, v7, v15
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v7, v15
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v7, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v7, v8, v7, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v15, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v7, v7, v15, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX8-NEXT: v_cndmask_b32_e32 v7, v8, v7, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v7, v17, v8, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_minimum_v8f32:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v0, v8
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v8, v0, vcc
+; GFX9-NEXT: v_min_f32_e32 v16, v0, v8
; GFX9-NEXT: v_mov_b32_e32 v17, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v8
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v17, v16, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v8, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v1, v9
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v9, v1, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v17, v16, vcc
+; GFX9-NEXT: v_min_f32_e32 v8, v1, v9
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v9
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v9, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v2, v10
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v10, v2, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v17, v8, vcc
+; GFX9-NEXT: v_min_f32_e32 v8, v2, v10
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v10
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v2, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v10, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v3, v11
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v11, v3, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v2, v17, v8, vcc
+; GFX9-NEXT: v_min_f32_e32 v8, v3, v11
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v3, v11
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v3, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v8, v3, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v11, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v11, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v8, v3, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v4, v12
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v12, v4, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v3, v17, v8, vcc
+; GFX9-NEXT: v_min_f32_e32 v8, v4, v12
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v4, v12
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v4, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v12, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v12, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v5, v13
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v13, v5, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v4, v17, v8, vcc
+; GFX9-NEXT: v_min_f32_e32 v8, v5, v13
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v5, v13
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v5, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v5, v8, v5, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v13, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v5, v5, v13, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX9-NEXT: v_cndmask_b32_e32 v5, v8, v5, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v6, v14
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v14, v6, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v5, v17, v8, vcc
+; GFX9-NEXT: v_min_f32_e32 v8, v6, v14
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v6, v14
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v6, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v8, v6, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v14, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v6, v14, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v8, v6, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v7, v15
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v15, v7, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v6, v17, v8, vcc
+; GFX9-NEXT: v_min_f32_e32 v8, v7, v15
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v7, v15
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v7, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v7, v8, v7, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v15, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v7, v7, v15, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX9-NEXT: v_cndmask_b32_e32 v7, v8, v7, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v7, v17, v8, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_minimum_v8f32:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v0, v8
+; GFX940-NEXT: v_min_f32_e32 v16, v0, v8
; GFX940-NEXT: v_mov_b32_e32 v17, 0x7fc00000
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v8, v0, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v8
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v17, v16, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v0, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v8, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v1, v9
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v9, v1, vcc
+; GFX940-NEXT: v_min_f32_e32 v8, v1, v9
+; GFX940-NEXT: s_nop 0
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v17, v16, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v9
; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v1, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v9, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v2, v10
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v10, v2, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v17, v8, vcc
+; GFX940-NEXT: v_min_f32_e32 v8, v2, v10
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v10
; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v2, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v10, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v3, v11
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v11, v3, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v2, v17, v8, vcc
+; GFX940-NEXT: v_min_f32_e32 v8, v3, v11
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v3, v11
; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v3, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v8, v3, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v11, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v3, v11, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v8, v3, vcc
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v4, v12
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v12, v4, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v3, v17, v8, vcc
+; GFX940-NEXT: v_min_f32_e32 v8, v4, v12
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v4, v12
; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v4, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v12, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v4, v12, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v5, v13
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v13, v5, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v4, v17, v8, vcc
+; GFX940-NEXT: v_min_f32_e32 v8, v5, v13
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v5, v13
; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v5, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v8, v5, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v13, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v5, v13, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v8, v5, vcc
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v6, v14
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v14, v6, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v5, v17, v8, vcc
+; GFX940-NEXT: v_min_f32_e32 v8, v6, v14
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v6, v14
; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v6, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v8, v6, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v14, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v6, v14, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v8, v6, vcc
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v7, v15
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v15, v7, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v6, v17, v8, vcc
+; GFX940-NEXT: v_min_f32_e32 v8, v7, v15
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v7, v15
; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v7, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v7, v8, v7, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v15, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v7, v7, v15, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v7, v8, v7, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v7, v17, v8, vcc
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v8f32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v0, v8
-; GFX10-NEXT: v_cndmask_b32_e32 v16, v8, v0, vcc_lo
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v1, v9
-; GFX10-NEXT: v_cndmask_b32_e32 v17, v9, v1, vcc_lo
+; GFX10-NEXT: v_min_f32_e32 v16, v0, v8
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v8
-; GFX10-NEXT: v_cndmask_b32_e32 v16, 0x7fc00000, v16, vcc_lo
+; GFX10-NEXT: v_min_f32_e32 v17, v1, v9
+; GFX10-NEXT: v_min_f32_e32 v8, v2, v10
+; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v16, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v9
-; GFX10-NEXT: v_cndmask_b32_e32 v17, 0x7fc00000, v17, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v0, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v1, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v17, v1, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v8, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v9, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v16
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v17
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v17, v1, vcc_lo
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v2, v10
-; GFX10-NEXT: v_cndmask_b32_e32 v8, v10, v2, vcc_lo
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v3, v11
-; GFX10-NEXT: v_cndmask_b32_e32 v9, v11, v3, vcc_lo
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v4, v12
-; GFX10-NEXT: v_cndmask_b32_e32 v16, v12, v4, vcc_lo
+; GFX10-NEXT: v_min_f32_e32 v9, v3, v11
+; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v17, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v2, v10
-; GFX10-NEXT: v_cndmask_b32_e32 v8, 0x7fc00000, v8, vcc_lo
+; GFX10-NEXT: v_min_f32_e32 v10, v7, v15
+; GFX10-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v8, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v3, v11
-; GFX10-NEXT: v_cndmask_b32_e32 v9, 0x7fc00000, v9, vcc_lo
+; GFX10-NEXT: v_min_f32_e32 v8, v4, v12
+; GFX10-NEXT: v_cndmask_b32_e32 v3, 0x7fc00000, v9, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v4, v12
-; GFX10-NEXT: v_cndmask_b32_e32 v16, 0x7fc00000, v16, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v2, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v3, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v4, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v4, v16, v4, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v10, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v11, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v3, v3, v11, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v12, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v4, v4, v12, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v8
-; GFX10-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v9
-; GFX10-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v16
-; GFX10-NEXT: v_cndmask_b32_e32 v4, v16, v4, vcc_lo
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v5, v13
-; GFX10-NEXT: v_cndmask_b32_e32 v8, v13, v5, vcc_lo
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v6, v14
-; GFX10-NEXT: v_cndmask_b32_e32 v9, v14, v6, vcc_lo
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v7, v15
-; GFX10-NEXT: v_cndmask_b32_e32 v10, v15, v7, vcc_lo
+; GFX10-NEXT: v_min_f32_e32 v9, v5, v13
+; GFX10-NEXT: v_cndmask_b32_e32 v4, 0x7fc00000, v8, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v5, v13
-; GFX10-NEXT: v_cndmask_b32_e32 v8, 0x7fc00000, v8, vcc_lo
+; GFX10-NEXT: v_min_f32_e32 v8, v6, v14
+; GFX10-NEXT: v_cndmask_b32_e32 v5, 0x7fc00000, v9, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v6, v14
-; GFX10-NEXT: v_cndmask_b32_e32 v9, 0x7fc00000, v9, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e32 v6, 0x7fc00000, v8, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v7, v15
-; GFX10-NEXT: v_cndmask_b32_e32 v10, 0x7fc00000, v10, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v5, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v5, v8, v5, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v6, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v6, v9, v6, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v7, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v7, v10, v7, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v13, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v5, v5, v13, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v14, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v6, v6, v14, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v15, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v7, v7, v15, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v8
-; GFX10-NEXT: v_cndmask_b32_e32 v5, v8, v5, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v9
-; GFX10-NEXT: v_cndmask_b32_e32 v6, v9, v6, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v10
-; GFX10-NEXT: v_cndmask_b32_e32 v7, v10, v7, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e32 v7, 0x7fc00000, v10, vcc_lo
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_minimum_v8f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v0, v8
-; GFX11-NEXT: v_cndmask_b32_e32 v16, v8, v0, vcc_lo
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v1, v9
-; GFX11-NEXT: v_cndmask_b32_e32 v17, v9, v1, vcc_lo
+; GFX11-NEXT: v_dual_min_f32 v16, v0, v8 :: v_dual_min_f32 v17, v1, v9
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v8
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e32 v16, 0x7fc00000, v16, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v16, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v9
-; GFX11-NEXT: v_cndmask_b32_e32 v17, 0x7fc00000, v17, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v0, 32
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v1, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v17, v1, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v8, 32
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v9, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v16
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v17
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v17, v1, vcc_lo
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v2, v10
-; GFX11-NEXT: v_cndmask_b32_e32 v8, v10, v2, vcc_lo
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v3, v11
-; GFX11-NEXT: v_cndmask_b32_e32 v9, v11, v3, vcc_lo
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v4, v12
-; GFX11-NEXT: v_cndmask_b32_e32 v16, v12, v4, vcc_lo
+; GFX11-NEXT: v_dual_min_f32 v9, v3, v11 :: v_dual_min_f32 v8, v2, v10
+; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v17, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v2, v10
-; GFX11-NEXT: v_cndmask_b32_e32 v8, 0x7fc00000, v8, vcc_lo
+; GFX11-NEXT: v_min_f32_e32 v10, v7, v15
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_3) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v8, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v3, v11
-; GFX11-NEXT: v_cndmask_b32_e32 v9, 0x7fc00000, v9, vcc_lo
+; GFX11-NEXT: v_dual_min_f32 v8, v4, v12 :: v_dual_cndmask_b32 v3, 0x7fc00000, v9
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v4, v12
-; GFX11-NEXT: v_cndmask_b32_e32 v16, 0x7fc00000, v16, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v2, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v3, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v4, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v4, v16, v4, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v10, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v11, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v3, v3, v11, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v12, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v4, v4, v12, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v8
-; GFX11-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v9
-; GFX11-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v16
-; GFX11-NEXT: v_cndmask_b32_e32 v4, v16, v4, vcc_lo
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v5, v13
-; GFX11-NEXT: v_cndmask_b32_e32 v8, v13, v5, vcc_lo
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v6, v14
-; GFX11-NEXT: v_cndmask_b32_e32 v9, v14, v6, vcc_lo
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v7, v15
-; GFX11-NEXT: v_cndmask_b32_e32 v10, v15, v7, vcc_lo
+; GFX11-NEXT: v_dual_min_f32 v9, v5, v13 :: v_dual_cndmask_b32 v4, 0x7fc00000, v8
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v5, v13
-; GFX11-NEXT: v_cndmask_b32_e32 v8, 0x7fc00000, v8, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_dual_min_f32 v8, v6, v14 :: v_dual_cndmask_b32 v5, 0x7fc00000, v9
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v6, v14
-; GFX11-NEXT: v_cndmask_b32_e32 v9, 0x7fc00000, v9, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e32 v6, 0x7fc00000, v8, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v7, v15
-; GFX11-NEXT: v_cndmask_b32_e32 v10, 0x7fc00000, v10, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v5, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v5, v8, v5, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v6, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v6, v9, v6, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v7, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v7, v10, v7, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v13, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v5, v5, v13, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v14, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v6, v6, v14, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v15, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v7, v7, v15, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v8
-; GFX11-NEXT: v_cndmask_b32_e32 v5, v8, v5, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v9
-; GFX11-NEXT: v_cndmask_b32_e32 v6, v9, v6, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v10
-; GFX11-NEXT: v_cndmask_b32_e32 v7, v10, v7, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e32 v7, 0x7fc00000, v10, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_v8f32:
@@ -3244,1071 +1835,371 @@ define <16 x float> @v_minimum_v16f32(<16 x float> %src0, <16 x float> %src1) {
; GFX7-LABEL: v_minimum_v16f32:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_min_legacy_f32_e32 v32, v0, v16
-; GFX7-NEXT: v_mov_b32_e32 v31, 0x7fc00000
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v16
-; GFX7-NEXT: v_cndmask_b32_e32 v32, v31, v32, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v0, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v32, v0, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v16, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v16, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v32
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v32, v0, vcc
-; GFX7-NEXT: v_min_legacy_f32_e32 v16, v1, v17
+; GFX7-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX7-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
+; GFX7-NEXT: s_mov_b64 exec, s[4:5]
+; GFX7-NEXT: v_cmp_o_f32_e64 s[16:17], v0, v16
+; GFX7-NEXT: v_min_f32_e32 v0, v0, v16
+; GFX7-NEXT: buffer_load_dword v16, off, s[0:3], s32
+; GFX7-NEXT: v_writelane_b32 v31, s30, 0
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v17
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v1, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v16, v1, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v17, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v17, vcc
-; GFX7-NEXT: buffer_load_dword v17, off, s[0:3], s32
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v16, v1, vcc
-; GFX7-NEXT: v_min_legacy_f32_e32 v16, v2, v18
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v2, v18
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v2, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v16, v2, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v18, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v2, v18, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v16, v2, vcc
-; GFX7-NEXT: v_min_legacy_f32_e32 v16, v3, v19
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v3, v19
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v3, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v16, v3, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v19, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v3, v19, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v16, v3, vcc
-; GFX7-NEXT: v_min_legacy_f32_e32 v16, v4, v20
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v4, v20
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v4, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v4, v16, v4, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v20, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v4, v4, v20, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX7-NEXT: v_cndmask_b32_e32 v4, v16, v4, vcc
-; GFX7-NEXT: v_min_legacy_f32_e32 v16, v5, v21
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v5, v21
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v5, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v5, v16, v5, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v21, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v5, v5, v21, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX7-NEXT: v_cndmask_b32_e32 v5, v16, v5, vcc
-; GFX7-NEXT: v_min_legacy_f32_e32 v16, v6, v22
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v6, v22
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v6, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v6, v16, v6, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v22, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v6, v6, v22, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX7-NEXT: v_cndmask_b32_e32 v6, v16, v6, vcc
-; GFX7-NEXT: v_min_legacy_f32_e32 v16, v7, v23
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v7, v23
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v7, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v7, v16, v7, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v23, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v7, v7, v23, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX7-NEXT: v_cndmask_b32_e32 v7, v16, v7, vcc
-; GFX7-NEXT: v_min_legacy_f32_e32 v16, v8, v24
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v8, v24
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v8, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v8, v16, v8, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v24, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v8, v8, v24, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX7-NEXT: v_cndmask_b32_e32 v8, v16, v8, vcc
-; GFX7-NEXT: v_min_legacy_f32_e32 v16, v9, v25
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v9, v25
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v9, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v9, v16, v9, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v25, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v9, v9, v25, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX7-NEXT: v_cndmask_b32_e32 v9, v16, v9, vcc
-; GFX7-NEXT: v_min_legacy_f32_e32 v16, v10, v26
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v10, v26
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v10, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v10, v16, v10, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v26, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v10, v10, v26, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX7-NEXT: v_cndmask_b32_e32 v10, v16, v10, vcc
-; GFX7-NEXT: v_min_legacy_f32_e32 v16, v11, v27
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v11, v27
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v11, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v11, v16, v11, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v27, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v11, v11, v27, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX7-NEXT: v_cndmask_b32_e32 v11, v16, v11, vcc
-; GFX7-NEXT: v_min_legacy_f32_e32 v16, v12, v28
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v12, v28
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v12, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v12, v16, v12, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v28, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v12, v12, v28, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX7-NEXT: v_cndmask_b32_e32 v12, v16, v12, vcc
-; GFX7-NEXT: v_min_legacy_f32_e32 v16, v13, v29
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v13, v29
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v13, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v13, v16, v13, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v29, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v13, v13, v29, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX7-NEXT: v_cndmask_b32_e32 v13, v16, v13, vcc
-; GFX7-NEXT: v_min_legacy_f32_e32 v16, v14, v30
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v14, v30
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v14, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v14, v16, v14, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v30, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v14, v14, v30, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX7-NEXT: v_cndmask_b32_e32 v14, v16, v14, vcc
+; GFX7-NEXT: v_min_f32_e32 v1, v1, v17
+; GFX7-NEXT: v_cmp_o_f32_e64 s[4:5], v2, v18
+; GFX7-NEXT: v_min_f32_e32 v2, v2, v18
+; GFX7-NEXT: v_mov_b32_e32 v17, 0x7fc00000
+; GFX7-NEXT: v_min_f32_e32 v18, v13, v29
+; GFX7-NEXT: v_cmp_o_f32_e64 s[28:29], v13, v29
+; GFX7-NEXT: v_writelane_b32 v31, s31, 1
+; GFX7-NEXT: v_cmp_o_f32_e64 s[6:7], v3, v19
+; GFX7-NEXT: v_min_f32_e32 v3, v3, v19
+; GFX7-NEXT: v_cmp_o_f32_e64 s[8:9], v4, v20
+; GFX7-NEXT: v_min_f32_e32 v4, v4, v20
+; GFX7-NEXT: v_cmp_o_f32_e64 s[10:11], v5, v21
+; GFX7-NEXT: v_min_f32_e32 v5, v5, v21
+; GFX7-NEXT: v_cmp_o_f32_e64 s[12:13], v6, v22
+; GFX7-NEXT: v_min_f32_e32 v6, v6, v22
+; GFX7-NEXT: v_cmp_o_f32_e64 s[14:15], v7, v23
+; GFX7-NEXT: v_min_f32_e32 v7, v7, v23
+; GFX7-NEXT: v_cmp_o_f32_e64 s[18:19], v8, v24
+; GFX7-NEXT: v_min_f32_e32 v8, v8, v24
+; GFX7-NEXT: v_cmp_o_f32_e64 s[20:21], v9, v25
+; GFX7-NEXT: v_min_f32_e32 v9, v9, v25
+; GFX7-NEXT: v_cmp_o_f32_e64 s[22:23], v10, v26
+; GFX7-NEXT: v_min_f32_e32 v10, v10, v26
+; GFX7-NEXT: v_cmp_o_f32_e64 s[24:25], v11, v27
+; GFX7-NEXT: v_min_f32_e32 v11, v11, v27
+; GFX7-NEXT: v_cmp_o_f32_e64 s[26:27], v12, v28
+; GFX7-NEXT: v_min_f32_e32 v12, v12, v28
+; GFX7-NEXT: v_min_f32_e32 v19, v14, v30
+; GFX7-NEXT: v_cmp_o_f32_e64 s[30:31], v14, v30
+; GFX7-NEXT: v_cndmask_b32_e32 v1, v17, v1, vcc
+; GFX7-NEXT: v_cndmask_b32_e64 v13, v17, v18, s[28:29]
+; GFX7-NEXT: v_cndmask_b32_e64 v0, v17, v0, s[16:17]
+; GFX7-NEXT: v_cndmask_b32_e64 v2, v17, v2, s[4:5]
+; GFX7-NEXT: v_cndmask_b32_e64 v3, v17, v3, s[6:7]
+; GFX7-NEXT: v_cndmask_b32_e64 v4, v17, v4, s[8:9]
+; GFX7-NEXT: v_cndmask_b32_e64 v5, v17, v5, s[10:11]
+; GFX7-NEXT: v_cndmask_b32_e64 v6, v17, v6, s[12:13]
+; GFX7-NEXT: v_cndmask_b32_e64 v7, v17, v7, s[14:15]
+; GFX7-NEXT: v_cndmask_b32_e64 v8, v17, v8, s[18:19]
+; GFX7-NEXT: v_cndmask_b32_e64 v9, v17, v9, s[20:21]
+; GFX7-NEXT: v_cndmask_b32_e64 v10, v17, v10, s[22:23]
+; GFX7-NEXT: v_cndmask_b32_e64 v11, v17, v11, s[24:25]
+; GFX7-NEXT: v_cndmask_b32_e64 v12, v17, v12, s[26:27]
+; GFX7-NEXT: v_cndmask_b32_e64 v14, v17, v19, s[30:31]
+; GFX7-NEXT: v_readlane_b32 s31, v31, 1
+; GFX7-NEXT: v_readlane_b32 s30, v31, 0
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: v_min_f32_e32 v18, v15, v16
+; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v15, v16
+; GFX7-NEXT: v_cndmask_b32_e32 v15, v17, v18, vcc
+; GFX7-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
+; GFX7-NEXT: s_mov_b64 exec, s[4:5]
; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_min_legacy_f32_e32 v16, v15, v17
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v15, v17
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v15, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v15, v16, v15, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v17, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v15, v15, v17, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX7-NEXT: v_cndmask_b32_e32 v15, v16, v15, vcc
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_minimum_v16f32:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v0, v16
-; GFX8-NEXT: v_cndmask_b32_e32 v32, v16, v0, vcc
-; GFX8-NEXT: v_mov_b32_e32 v31, 0x7fc00000
-; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v0, v16
-; GFX8-NEXT: v_cndmask_b32_e32 v32, v31, v32, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v0, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v32, v0, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v16, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v16, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v32
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v32, v0, vcc
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v1, v17
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v17, v1, vcc
+; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX8-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
+; GFX8-NEXT: s_mov_b64 exec, s[4:5]
+; GFX8-NEXT: v_cmp_o_f32_e64 s[16:17], v0, v16
+; GFX8-NEXT: v_min_f32_e32 v0, v0, v16
+; GFX8-NEXT: buffer_load_dword v16, off, s[0:3], s32
+; GFX8-NEXT: v_writelane_b32 v31, s30, 0
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v1, v17
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v1, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v16, v1, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v17, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v17, vcc
-; GFX8-NEXT: buffer_load_dword v17, off, s[0:3], s32
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v16, v1, vcc
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v2, v18
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v18, v2, vcc
-; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v2, v18
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v2, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v16, v2, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v18, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v18, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v16, v2, vcc
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v3, v19
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v19, v3, vcc
-; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v3, v19
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v3, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v16, v3, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v19, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v19, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v16, v3, vcc
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v4, v20
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v20, v4, vcc
-; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v4, v20
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v4, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v16, v4, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v20, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v20, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v16, v4, vcc
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v5, v21
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v21, v5, vcc
-; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v5, v21
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v5, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v5, v16, v5, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v21, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v5, v5, v21, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX8-NEXT: v_cndmask_b32_e32 v5, v16, v5, vcc
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v6, v22
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v22, v6, vcc
-; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v6, v22
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v6, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v6, v16, v6, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v22, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v6, v6, v22, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX8-NEXT: v_cndmask_b32_e32 v6, v16, v6, vcc
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v7, v23
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v23, v7, vcc
-; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v7, v23
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v7, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v7, v16, v7, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v23, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v7, v7, v23, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX8-NEXT: v_cndmask_b32_e32 v7, v16, v7, vcc
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v8, v24
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v24, v8, vcc
-; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v8, v24
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v8, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v16, v8, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v24, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v8, v24, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v16, v8, vcc
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v9, v25
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v25, v9, vcc
-; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v9, v25
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v9, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v9, v16, v9, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v25, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v9, v9, v25, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX8-NEXT: v_cndmask_b32_e32 v9, v16, v9, vcc
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v10, v26
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v26, v10, vcc
-; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v10, v26
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v10, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v10, v16, v10, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v26, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v10, v10, v26, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX8-NEXT: v_cndmask_b32_e32 v10, v16, v10, vcc
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v11, v27
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v27, v11, vcc
-; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v11, v27
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v11, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v11, v16, v11, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v27, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v11, v11, v27, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX8-NEXT: v_cndmask_b32_e32 v11, v16, v11, vcc
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v12, v28
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v28, v12, vcc
-; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v12, v28
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v12, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v12, v16, v12, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v28, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v12, v12, v28, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX8-NEXT: v_cndmask_b32_e32 v12, v16, v12, vcc
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v13, v29
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v29, v13, vcc
-; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v13, v29
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v13, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v13, v16, v13, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v29, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v13, v13, v29, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX8-NEXT: v_cndmask_b32_e32 v13, v16, v13, vcc
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v14, v30
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v30, v14, vcc
-; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v14, v30
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v14, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v14, v16, v14, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v30, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v14, v14, v30, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX8-NEXT: v_cndmask_b32_e32 v14, v16, v14, vcc
+; GFX8-NEXT: v_min_f32_e32 v1, v1, v17
+; GFX8-NEXT: v_cmp_o_f32_e64 s[4:5], v2, v18
+; GFX8-NEXT: v_min_f32_e32 v2, v2, v18
+; GFX8-NEXT: v_mov_b32_e32 v17, 0x7fc00000
+; GFX8-NEXT: v_min_f32_e32 v18, v13, v29
+; GFX8-NEXT: v_cmp_o_f32_e64 s[28:29], v13, v29
+; GFX8-NEXT: v_writelane_b32 v31, s31, 1
+; GFX8-NEXT: v_cmp_o_f32_e64 s[6:7], v3, v19
+; GFX8-NEXT: v_min_f32_e32 v3, v3, v19
+; GFX8-NEXT: v_cmp_o_f32_e64 s[8:9], v4, v20
+; GFX8-NEXT: v_min_f32_e32 v4, v4, v20
+; GFX8-NEXT: v_cmp_o_f32_e64 s[10:11], v5, v21
+; GFX8-NEXT: v_min_f32_e32 v5, v5, v21
+; GFX8-NEXT: v_cmp_o_f32_e64 s[12:13], v6, v22
+; GFX8-NEXT: v_min_f32_e32 v6, v6, v22
+; GFX8-NEXT: v_cmp_o_f32_e64 s[14:15], v7, v23
+; GFX8-NEXT: v_min_f32_e32 v7, v7, v23
+; GFX8-NEXT: v_cmp_o_f32_e64 s[18:19], v8, v24
+; GFX8-NEXT: v_min_f32_e32 v8, v8, v24
+; GFX8-NEXT: v_cmp_o_f32_e64 s[20:21], v9, v25
+; GFX8-NEXT: v_min_f32_e32 v9, v9, v25
+; GFX8-NEXT: v_cmp_o_f32_e64 s[22:23], v10, v26
+; GFX8-NEXT: v_min_f32_e32 v10, v10, v26
+; GFX8-NEXT: v_cmp_o_f32_e64 s[24:25], v11, v27
+; GFX8-NEXT: v_min_f32_e32 v11, v11, v27
+; GFX8-NEXT: v_cmp_o_f32_e64 s[26:27], v12, v28
+; GFX8-NEXT: v_min_f32_e32 v12, v12, v28
+; GFX8-NEXT: v_min_f32_e32 v19, v14, v30
+; GFX8-NEXT: v_cmp_o_f32_e64 s[30:31], v14, v30
+; GFX8-NEXT: v_cndmask_b32_e32 v1, v17, v1, vcc
+; GFX8-NEXT: v_cndmask_b32_e64 v13, v17, v18, s[28:29]
+; GFX8-NEXT: v_cndmask_b32_e64 v0, v17, v0, s[16:17]
+; GFX8-NEXT: v_cndmask_b32_e64 v2, v17, v2, s[4:5]
+; GFX8-NEXT: v_cndmask_b32_e64 v3, v17, v3, s[6:7]
+; GFX8-NEXT: v_cndmask_b32_e64 v4, v17, v4, s[8:9]
+; GFX8-NEXT: v_cndmask_b32_e64 v5, v17, v5, s[10:11]
+; GFX8-NEXT: v_cndmask_b32_e64 v6, v17, v6, s[12:13]
+; GFX8-NEXT: v_cndmask_b32_e64 v7, v17, v7, s[14:15]
+; GFX8-NEXT: v_cndmask_b32_e64 v8, v17, v8, s[18:19]
+; GFX8-NEXT: v_cndmask_b32_e64 v9, v17, v9, s[20:21]
+; GFX8-NEXT: v_cndmask_b32_e64 v10, v17, v10, s[22:23]
+; GFX8-NEXT: v_cndmask_b32_e64 v11, v17, v11, s[24:25]
+; GFX8-NEXT: v_cndmask_b32_e64 v12, v17, v12, s[26:27]
+; GFX8-NEXT: v_cndmask_b32_e64 v14, v17, v19, s[30:31]
+; GFX8-NEXT: v_readlane_b32 s31, v31, 1
+; GFX8-NEXT: v_readlane_b32 s30, v31, 0
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: v_min_f32_e32 v18, v15, v16
+; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v15, v16
+; GFX8-NEXT: v_cndmask_b32_e32 v15, v17, v18, vcc
+; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
+; GFX8-NEXT: s_mov_b64 exec, s[4:5]
; GFX8-NEXT: s_waitcnt vmcnt(0)
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v15, v17
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v17, v15, vcc
-; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v15, v17
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v15, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v15, v16, v15, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v17, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v15, v15, v17, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX8-NEXT: v_cndmask_b32_e32 v15, v16, v15, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_minimum_v16f32:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v0, v16
-; GFX9-NEXT: v_cndmask_b32_e32 v32, v16, v0, vcc
-; GFX9-NEXT: v_mov_b32_e32 v31, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v16
-; GFX9-NEXT: v_cndmask_b32_e32 v32, v31, v32, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v32, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v16, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v16, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v32, v0, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v1, v17
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v17, v1, vcc
+; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
+; GFX9-NEXT: s_mov_b64 exec, s[4:5]
+; GFX9-NEXT: v_cmp_o_f32_e64 s[16:17], v0, v16
+; GFX9-NEXT: v_min_f32_e32 v0, v0, v16
+; GFX9-NEXT: buffer_load_dword v16, off, s[0:3], s32
+; GFX9-NEXT: v_writelane_b32 v31, s30, 0
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v17
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v16, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v17, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v17, vcc
-; GFX9-NEXT: buffer_load_dword v17, off, s[0:3], s32
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v16, v1, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v2, v18
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v18, v2, vcc
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v18
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v2, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v16, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v18, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v18, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v16, v2, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v3, v19
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v19, v3, vcc
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v3, v19
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v3, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v16, v3, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v19, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v19, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v16, v3, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v4, v20
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v20, v4, vcc
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v4, v20
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v4, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v16, v4, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v20, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v20, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v16, v4, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v5, v21
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v21, v5, vcc
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v5, v21
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v5, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v5, v16, v5, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v21, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v5, v5, v21, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX9-NEXT: v_cndmask_b32_e32 v5, v16, v5, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v6, v22
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v22, v6, vcc
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v6, v22
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v6, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v16, v6, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v22, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v6, v22, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v16, v6, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v7, v23
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v23, v7, vcc
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v7, v23
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v7, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v7, v16, v7, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v23, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v7, v7, v23, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX9-NEXT: v_cndmask_b32_e32 v7, v16, v7, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v8, v24
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v24, v8, vcc
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v8, v24
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v8, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v16, v8, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v24, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v8, v24, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v16, v8, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v9, v25
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v25, v9, vcc
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v9, v25
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v9, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v9, v16, v9, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v25, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v9, v9, v25, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX9-NEXT: v_cndmask_b32_e32 v9, v16, v9, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v10, v26
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v26, v10, vcc
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v10, v26
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v10, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v10, v16, v10, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v26, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v10, v10, v26, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX9-NEXT: v_cndmask_b32_e32 v10, v16, v10, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v11, v27
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v27, v11, vcc
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v11, v27
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v11, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v11, v16, v11, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v27, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v11, v11, v27, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX9-NEXT: v_cndmask_b32_e32 v11, v16, v11, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v12, v28
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v28, v12, vcc
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v12, v28
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v12, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v12, v16, v12, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v28, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v12, v12, v28, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX9-NEXT: v_cndmask_b32_e32 v12, v16, v12, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v13, v29
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v29, v13, vcc
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v13, v29
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v13, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v13, v16, v13, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v29, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v13, v13, v29, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX9-NEXT: v_cndmask_b32_e32 v13, v16, v13, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v14, v30
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v30, v14, vcc
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v14, v30
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v14, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v14, v16, v14, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v30, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v14, v14, v30, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX9-NEXT: v_cndmask_b32_e32 v14, v16, v14, vcc
+; GFX9-NEXT: v_min_f32_e32 v1, v1, v17
+; GFX9-NEXT: v_cmp_o_f32_e64 s[4:5], v2, v18
+; GFX9-NEXT: v_min_f32_e32 v2, v2, v18
+; GFX9-NEXT: v_mov_b32_e32 v17, 0x7fc00000
+; GFX9-NEXT: v_min_f32_e32 v18, v13, v29
+; GFX9-NEXT: v_cmp_o_f32_e64 s[28:29], v13, v29
+; GFX9-NEXT: v_writelane_b32 v31, s31, 1
+; GFX9-NEXT: v_cmp_o_f32_e64 s[6:7], v3, v19
+; GFX9-NEXT: v_min_f32_e32 v3, v3, v19
+; GFX9-NEXT: v_cmp_o_f32_e64 s[8:9], v4, v20
+; GFX9-NEXT: v_min_f32_e32 v4, v4, v20
+; GFX9-NEXT: v_cmp_o_f32_e64 s[10:11], v5, v21
+; GFX9-NEXT: v_min_f32_e32 v5, v5, v21
+; GFX9-NEXT: v_cmp_o_f32_e64 s[12:13], v6, v22
+; GFX9-NEXT: v_min_f32_e32 v6, v6, v22
+; GFX9-NEXT: v_cmp_o_f32_e64 s[14:15], v7, v23
+; GFX9-NEXT: v_min_f32_e32 v7, v7, v23
+; GFX9-NEXT: v_cmp_o_f32_e64 s[18:19], v8, v24
+; GFX9-NEXT: v_min_f32_e32 v8, v8, v24
+; GFX9-NEXT: v_cmp_o_f32_e64 s[20:21], v9, v25
+; GFX9-NEXT: v_min_f32_e32 v9, v9, v25
+; GFX9-NEXT: v_cmp_o_f32_e64 s[22:23], v10, v26
+; GFX9-NEXT: v_min_f32_e32 v10, v10, v26
+; GFX9-NEXT: v_cmp_o_f32_e64 s[24:25], v11, v27
+; GFX9-NEXT: v_min_f32_e32 v11, v11, v27
+; GFX9-NEXT: v_cmp_o_f32_e64 s[26:27], v12, v28
+; GFX9-NEXT: v_min_f32_e32 v12, v12, v28
+; GFX9-NEXT: v_min_f32_e32 v19, v14, v30
+; GFX9-NEXT: v_cmp_o_f32_e64 s[30:31], v14, v30
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v17, v1, vcc
+; GFX9-NEXT: v_cndmask_b32_e64 v13, v17, v18, s[28:29]
+; GFX9-NEXT: v_cndmask_b32_e64 v0, v17, v0, s[16:17]
+; GFX9-NEXT: v_cndmask_b32_e64 v2, v17, v2, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_e64 v3, v17, v3, s[6:7]
+; GFX9-NEXT: v_cndmask_b32_e64 v4, v17, v4, s[8:9]
+; GFX9-NEXT: v_cndmask_b32_e64 v5, v17, v5, s[10:11]
+; GFX9-NEXT: v_cndmask_b32_e64 v6, v17, v6, s[12:13]
+; GFX9-NEXT: v_cndmask_b32_e64 v7, v17, v7, s[14:15]
+; GFX9-NEXT: v_cndmask_b32_e64 v8, v17, v8, s[18:19]
+; GFX9-NEXT: v_cndmask_b32_e64 v9, v17, v9, s[20:21]
+; GFX9-NEXT: v_cndmask_b32_e64 v10, v17, v10, s[22:23]
+; GFX9-NEXT: v_cndmask_b32_e64 v11, v17, v11, s[24:25]
+; GFX9-NEXT: v_cndmask_b32_e64 v12, v17, v12, s[26:27]
+; GFX9-NEXT: v_cndmask_b32_e64 v14, v17, v19, s[30:31]
+; GFX9-NEXT: v_readlane_b32 s31, v31, 1
+; GFX9-NEXT: v_readlane_b32 s30, v31, 0
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_min_f32_e32 v18, v15, v16
+; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v15, v16
+; GFX9-NEXT: v_cndmask_b32_e32 v15, v17, v18, vcc
+; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
+; GFX9-NEXT: s_mov_b64 exec, s[4:5]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v15, v17
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v17, v15, vcc
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v15, v17
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v15, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v15, v16, v15, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v17, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v15, v15, v17, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX9-NEXT: v_cndmask_b32_e32 v15, v16, v15, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_minimum_v16f32:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX940-NEXT: scratch_load_dword v31, off, s32
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v0, v16
; GFX940-NEXT: v_mov_b32_e32 v32, 0x7fc00000
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v33, v16, v0, vcc
+; GFX940-NEXT: v_min_f32_e32 v33, v0, v16
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v16
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v33, v32, v33, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v0, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v33, v0, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v16, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v0, v16, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v33
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v33, v0, vcc
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v1, v17
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v17, v1, vcc
+; GFX940-NEXT: v_min_f32_e32 v34, v1, v17
+; GFX940-NEXT: v_min_f32_e32 v35, v2, v18
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v32, v33, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v17
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v32, v16, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v1, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v16, v1, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v17, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v1, v17, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v16, v1, vcc
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v2, v18
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v18, v2, vcc
+; GFX940-NEXT: v_min_f32_e32 v36, v3, v19
+; GFX940-NEXT: v_min_f32_e32 v37, v4, v20
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v32, v34, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v18
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v32, v16, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v2, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v16, v2, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v18, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v2, v18, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v16, v2, vcc
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v3, v19
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v19, v3, vcc
+; GFX940-NEXT: v_min_f32_e32 v38, v5, v21
+; GFX940-NEXT: v_min_f32_e32 v39, v6, v22
+; GFX940-NEXT: v_cndmask_b32_e32 v2, v32, v35, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v3, v19
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v32, v16, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v3, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v16, v3, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v19, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v3, v19, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v16, v3, vcc
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v4, v20
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v20, v4, vcc
+; GFX940-NEXT: v_min_f32_e32 v48, v7, v23
+; GFX940-NEXT: v_min_f32_e32 v49, v8, v24
+; GFX940-NEXT: v_cndmask_b32_e32 v3, v32, v36, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v4, v20
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v32, v16, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v4, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v16, v4, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v20, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v4, v20, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v16, v4, vcc
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v5, v21
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v21, v5, vcc
+; GFX940-NEXT: v_min_f32_e32 v50, v9, v25
+; GFX940-NEXT: v_min_f32_e32 v51, v10, v26
+; GFX940-NEXT: v_cndmask_b32_e32 v4, v32, v37, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v5, v21
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v32, v16, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v5, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v16, v5, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v21, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v5, v21, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v16, v5, vcc
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v6, v22
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v22, v6, vcc
+; GFX940-NEXT: v_min_f32_e32 v52, v11, v27
+; GFX940-NEXT: v_min_f32_e32 v53, v12, v28
+; GFX940-NEXT: v_cndmask_b32_e32 v5, v32, v38, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v6, v22
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v32, v16, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v6, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v16, v6, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v22, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v6, v22, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v16, v6, vcc
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v7, v23
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v23, v7, vcc
+; GFX940-NEXT: v_min_f32_e32 v54, v13, v29
+; GFX940-NEXT: v_min_f32_e32 v55, v14, v30
+; GFX940-NEXT: v_cndmask_b32_e32 v6, v32, v39, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v7, v23
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v32, v16, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v7, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v7, v16, v7, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v23, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v7, v7, v23, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v7, v16, v7, vcc
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v8, v24
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v24, v8, vcc
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: v_min_f32_e32 v16, v15, v31
+; GFX940-NEXT: v_cndmask_b32_e32 v7, v32, v48, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v8, v24
; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v32, v16, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v8, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v16, v8, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v24, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v8, v24, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v16, v8, vcc
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v9, v25
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v25, v9, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v8, v32, v49, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v9, v25
; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v32, v16, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v9, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v9, v16, v9, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v25, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v9, v9, v25, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v9, v16, v9, vcc
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v10, v26
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v26, v10, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v9, v32, v50, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v10, v26
; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v32, v16, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v10, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v10, v16, v10, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v26, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v10, v10, v26, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v10, v16, v10, vcc
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v11, v27
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v27, v11, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v10, v32, v51, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v11, v27
; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v32, v16, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v11, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v11, v16, v11, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v27, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v11, v11, v27, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v11, v16, v11, vcc
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v12, v28
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v28, v12, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v11, v32, v52, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v12, v28
; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v32, v16, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v12, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v12, v16, v12, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v28, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v12, v12, v28, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v12, v16, v12, vcc
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v13, v29
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v29, v13, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v12, v32, v53, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v13, v29
; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v32, v16, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v13, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v13, v16, v13, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v29, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v13, v13, v29, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v13, v16, v13, vcc
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v14, v30
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v30, v14, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v13, v32, v54, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v14, v30
; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v32, v16, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v14, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v14, v16, v14, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v30, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v14, v14, v30, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v14, v16, v14, vcc
-; GFX940-NEXT: s_waitcnt vmcnt(0)
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v15, v31
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v31, v15, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v14, v32, v55, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v15, v31
; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v32, v16, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v15, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v15, v16, v15, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v31, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v15, v15, v31, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v15, v16, v15, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v15, v32, v16, vcc
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v16f32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v0, v16
; GFX10-NEXT: buffer_load_dword v31, off, s[0:3], s32
-; GFX10-NEXT: v_cndmask_b32_e32 v32, v16, v0, vcc_lo
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v1, v17
-; GFX10-NEXT: v_cndmask_b32_e32 v33, v17, v1, vcc_lo
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v2, v18
-; GFX10-NEXT: v_cndmask_b32_e32 v34, v18, v2, vcc_lo
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v3, v19
-; GFX10-NEXT: v_cndmask_b32_e32 v35, v19, v3, vcc_lo
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v4, v20
-; GFX10-NEXT: v_cndmask_b32_e32 v36, v20, v4, vcc_lo
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v5, v21
-; GFX10-NEXT: v_cndmask_b32_e32 v37, v21, v5, vcc_lo
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v6, v22
-; GFX10-NEXT: v_cndmask_b32_e32 v38, v22, v6, vcc_lo
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v7, v23
-; GFX10-NEXT: v_cndmask_b32_e32 v39, v23, v7, vcc_lo
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v8, v24
-; GFX10-NEXT: v_cndmask_b32_e32 v48, v24, v8, vcc_lo
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v9, v25
-; GFX10-NEXT: v_cndmask_b32_e32 v49, v25, v9, vcc_lo
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v10, v26
-; GFX10-NEXT: v_cndmask_b32_e32 v50, v26, v10, vcc_lo
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v11, v27
-; GFX10-NEXT: v_cndmask_b32_e32 v51, v27, v11, vcc_lo
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v12, v28
-; GFX10-NEXT: v_cndmask_b32_e32 v52, v28, v12, vcc_lo
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v13, v29
-; GFX10-NEXT: v_cndmask_b32_e32 v53, v29, v13, vcc_lo
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v14, v30
-; GFX10-NEXT: v_cndmask_b32_e32 v54, v30, v14, vcc_lo
+; GFX10-NEXT: v_min_f32_e32 v32, v0, v16
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v16
-; GFX10-NEXT: v_cndmask_b32_e32 v32, 0x7fc00000, v32, vcc_lo
+; GFX10-NEXT: v_min_f32_e32 v33, v1, v17
+; GFX10-NEXT: v_min_f32_e32 v34, v2, v18
+; GFX10-NEXT: v_min_f32_e32 v35, v3, v19
+; GFX10-NEXT: v_min_f32_e32 v36, v4, v20
+; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v32, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v17
-; GFX10-NEXT: v_cndmask_b32_e32 v33, 0x7fc00000, v33, vcc_lo
+; GFX10-NEXT: v_min_f32_e32 v37, v5, v21
+; GFX10-NEXT: v_min_f32_e32 v38, v6, v22
+; GFX10-NEXT: v_min_f32_e32 v39, v7, v23
+; GFX10-NEXT: v_min_f32_e32 v48, v8, v24
+; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v33, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v2, v18
-; GFX10-NEXT: v_cndmask_b32_e32 v34, 0x7fc00000, v34, vcc_lo
+; GFX10-NEXT: v_min_f32_e32 v49, v9, v25
+; GFX10-NEXT: v_min_f32_e32 v50, v10, v26
+; GFX10-NEXT: v_min_f32_e32 v51, v11, v27
+; GFX10-NEXT: v_min_f32_e32 v52, v12, v28
+; GFX10-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v34, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v3, v19
-; GFX10-NEXT: v_cndmask_b32_e32 v35, 0x7fc00000, v35, vcc_lo
+; GFX10-NEXT: v_min_f32_e32 v53, v13, v29
+; GFX10-NEXT: v_min_f32_e32 v54, v14, v30
+; GFX10-NEXT: v_cndmask_b32_e32 v3, 0x7fc00000, v35, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v4, v20
-; GFX10-NEXT: v_cndmask_b32_e32 v36, 0x7fc00000, v36, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e32 v4, 0x7fc00000, v36, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v5, v21
-; GFX10-NEXT: v_cndmask_b32_e32 v37, 0x7fc00000, v37, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e32 v5, 0x7fc00000, v37, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v6, v22
-; GFX10-NEXT: v_cndmask_b32_e32 v38, 0x7fc00000, v38, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e32 v6, 0x7fc00000, v38, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v7, v23
-; GFX10-NEXT: v_cndmask_b32_e32 v39, 0x7fc00000, v39, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e32 v7, 0x7fc00000, v39, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v8, v24
-; GFX10-NEXT: v_cndmask_b32_e32 v48, 0x7fc00000, v48, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e32 v8, 0x7fc00000, v48, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v9, v25
-; GFX10-NEXT: v_cndmask_b32_e32 v49, 0x7fc00000, v49, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e32 v9, 0x7fc00000, v49, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v10, v26
-; GFX10-NEXT: v_cndmask_b32_e32 v50, 0x7fc00000, v50, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e32 v10, 0x7fc00000, v50, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v11, v27
-; GFX10-NEXT: v_cndmask_b32_e32 v51, 0x7fc00000, v51, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e32 v11, 0x7fc00000, v51, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v12, v28
-; GFX10-NEXT: v_cndmask_b32_e32 v52, 0x7fc00000, v52, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e32 v12, 0x7fc00000, v52, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v13, v29
-; GFX10-NEXT: v_cndmask_b32_e32 v53, 0x7fc00000, v53, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e32 v13, 0x7fc00000, v53, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v14, v30
-; GFX10-NEXT: v_cndmask_b32_e32 v54, 0x7fc00000, v54, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v0, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v32, v0, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v1, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v33, v1, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v2, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v2, v34, v2, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v3, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v3, v35, v3, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v4, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v4, v36, v4, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v5, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v5, v37, v5, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v6, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v6, v38, v6, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v7, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v7, v39, v7, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v8, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v8, v48, v8, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v9, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v9, v49, v9, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v10, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v10, v50, v10, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v11, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v11, v51, v11, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v12, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v12, v52, v12, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v13, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v13, v53, v13, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v14, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v14, v54, v14, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v16, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v16, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v17, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v17, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v18, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v18, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v19, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v3, v3, v19, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v20, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v4, v4, v20, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v21, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v5, v5, v21, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v22, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v6, v6, v22, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v23, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v7, v7, v23, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v24, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v8, v8, v24, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v25, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v9, v9, v25, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v26, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v10, v10, v26, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v27, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v11, v11, v27, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v28, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v12, v12, v28, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v29, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v13, v13, v29, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v30, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v14, v14, v30, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v32
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v32, v0, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v33
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v33, v1, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v34
-; GFX10-NEXT: v_cndmask_b32_e32 v2, v34, v2, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v35
-; GFX10-NEXT: v_cndmask_b32_e32 v3, v35, v3, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v36
-; GFX10-NEXT: v_cndmask_b32_e32 v4, v36, v4, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v37
-; GFX10-NEXT: v_cndmask_b32_e32 v5, v37, v5, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v38
-; GFX10-NEXT: v_cndmask_b32_e32 v6, v38, v6, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e32 v14, 0x7fc00000, v54, vcc_lo
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v15, v31
-; GFX10-NEXT: v_cndmask_b32_e32 v16, v31, v15, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v39
-; GFX10-NEXT: v_cndmask_b32_e32 v7, v39, v7, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v48
-; GFX10-NEXT: v_cndmask_b32_e32 v8, v48, v8, vcc_lo
+; GFX10-NEXT: v_min_f32_e32 v16, v15, v31
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v15, v31
-; GFX10-NEXT: v_cndmask_b32_e32 v16, 0x7fc00000, v16, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v49
-; GFX10-NEXT: v_cndmask_b32_e32 v9, v49, v9, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v50
-; GFX10-NEXT: v_cndmask_b32_e32 v10, v50, v10, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v15, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v15, v16, v15, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v51
-; GFX10-NEXT: v_cndmask_b32_e32 v11, v51, v11, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v52
-; GFX10-NEXT: v_cndmask_b32_e32 v12, v52, v12, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v31, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v15, v15, v31, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v53
-; GFX10-NEXT: v_cndmask_b32_e32 v13, v53, v13, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v54
-; GFX10-NEXT: v_cndmask_b32_e32 v14, v54, v14, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v16
-; GFX10-NEXT: v_cndmask_b32_e32 v15, v16, v15, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e32 v15, 0x7fc00000, v16, vcc_lo
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_minimum_v16f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v0, v16
; GFX11-NEXT: scratch_load_b32 v31, off, s32
-; GFX11-NEXT: v_cndmask_b32_e32 v32, v16, v0, vcc_lo
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v1, v17
-; GFX11-NEXT: v_cndmask_b32_e32 v33, v17, v1, vcc_lo
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v2, v18
-; GFX11-NEXT: v_cndmask_b32_e32 v34, v18, v2, vcc_lo
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v3, v19
-; GFX11-NEXT: v_cndmask_b32_e32 v35, v19, v3, vcc_lo
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v4, v20
-; GFX11-NEXT: v_cndmask_b32_e32 v36, v20, v4, vcc_lo
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v5, v21
-; GFX11-NEXT: v_cndmask_b32_e32 v37, v21, v5, vcc_lo
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v6, v22
-; GFX11-NEXT: v_cndmask_b32_e32 v38, v22, v6, vcc_lo
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v7, v23
-; GFX11-NEXT: v_cndmask_b32_e32 v39, v23, v7, vcc_lo
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v8, v24
-; GFX11-NEXT: v_cndmask_b32_e32 v48, v24, v8, vcc_lo
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v9, v25
-; GFX11-NEXT: v_cndmask_b32_e32 v49, v25, v9, vcc_lo
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v10, v26
-; GFX11-NEXT: v_cndmask_b32_e32 v50, v26, v10, vcc_lo
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v11, v27
-; GFX11-NEXT: v_cndmask_b32_e32 v51, v27, v11, vcc_lo
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v12, v28
-; GFX11-NEXT: v_cndmask_b32_e32 v52, v28, v12, vcc_lo
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v13, v29
-; GFX11-NEXT: v_cndmask_b32_e32 v53, v29, v13, vcc_lo
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v14, v30
-; GFX11-NEXT: v_cndmask_b32_e32 v54, v30, v14, vcc_lo
+; GFX11-NEXT: v_dual_min_f32 v32, v0, v16 :: v_dual_min_f32 v33, v1, v17
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v16
-; GFX11-NEXT: v_cndmask_b32_e32 v32, 0x7fc00000, v32, vcc_lo
+; GFX11-NEXT: v_dual_min_f32 v34, v2, v18 :: v_dual_min_f32 v35, v3, v19
+; GFX11-NEXT: v_dual_min_f32 v36, v4, v20 :: v_dual_min_f32 v37, v5, v21
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4)
+; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v32, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v17
-; GFX11-NEXT: v_cndmask_b32_e32 v33, 0x7fc00000, v33, vcc_lo
+; GFX11-NEXT: v_min_f32_e32 v54, v14, v30
+; GFX11-NEXT: v_dual_min_f32 v38, v6, v22 :: v_dual_min_f32 v39, v7, v23
+; GFX11-NEXT: v_dual_min_f32 v48, v8, v24 :: v_dual_min_f32 v49, v9, v25
+; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v33, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v2, v18
-; GFX11-NEXT: v_cndmask_b32_e32 v34, 0x7fc00000, v34, vcc_lo
+; GFX11-NEXT: v_dual_min_f32 v50, v10, v26 :: v_dual_min_f32 v51, v11, v27
+; GFX11-NEXT: v_dual_min_f32 v52, v12, v28 :: v_dual_min_f32 v53, v13, v29
+; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v34, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v3, v19
-; GFX11-NEXT: v_cndmask_b32_e32 v35, 0x7fc00000, v35, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e32 v3, 0x7fc00000, v35, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v4, v20
-; GFX11-NEXT: v_cndmask_b32_e32 v36, 0x7fc00000, v36, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e32 v4, 0x7fc00000, v36, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v5, v21
-; GFX11-NEXT: v_cndmask_b32_e32 v37, 0x7fc00000, v37, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e32 v5, 0x7fc00000, v37, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v6, v22
-; GFX11-NEXT: v_cndmask_b32_e32 v38, 0x7fc00000, v38, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e32 v6, 0x7fc00000, v38, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v7, v23
-; GFX11-NEXT: v_cndmask_b32_e32 v39, 0x7fc00000, v39, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e32 v7, 0x7fc00000, v39, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v8, v24
-; GFX11-NEXT: v_cndmask_b32_e32 v48, 0x7fc00000, v48, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e32 v8, 0x7fc00000, v48, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v9, v25
-; GFX11-NEXT: v_cndmask_b32_e32 v49, 0x7fc00000, v49, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e32 v9, 0x7fc00000, v49, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v10, v26
-; GFX11-NEXT: v_cndmask_b32_e32 v50, 0x7fc00000, v50, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e32 v10, 0x7fc00000, v50, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v11, v27
-; GFX11-NEXT: v_cndmask_b32_e32 v51, 0x7fc00000, v51, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e32 v11, 0x7fc00000, v51, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v12, v28
-; GFX11-NEXT: v_cndmask_b32_e32 v52, 0x7fc00000, v52, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e32 v12, 0x7fc00000, v52, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v13, v29
-; GFX11-NEXT: v_cndmask_b32_e32 v53, 0x7fc00000, v53, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e32 v13, 0x7fc00000, v53, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v14, v30
-; GFX11-NEXT: v_cndmask_b32_e32 v54, 0x7fc00000, v54, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v0, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v32, v0, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v1, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v33, v1, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v2, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v2, v34, v2, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v3, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v3, v35, v3, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v4, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v4, v36, v4, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v5, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v5, v37, v5, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v6, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v6, v38, v6, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v7, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v7, v39, v7, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v8, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v8, v48, v8, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v9, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v9, v49, v9, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v10, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v10, v50, v10, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v11, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v11, v51, v11, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v12, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v12, v52, v12, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v13, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v13, v53, v13, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v14, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v14, v54, v14, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v16, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v16, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v17, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v17, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v18, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v2, v2, v18, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v19, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v3, v3, v19, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v20, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v4, v4, v20, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v21, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v5, v5, v21, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v22, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v6, v6, v22, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v23, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v7, v7, v23, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v24, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v8, v8, v24, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v25, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v9, v9, v25, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v26, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v10, v10, v26, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v27, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v11, v11, v27, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v28, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v12, v12, v28, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v29, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v13, v13, v29, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v30, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v14, v14, v30, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v32
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v32, v0, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v33
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v33, v1, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v34
-; GFX11-NEXT: v_cndmask_b32_e32 v2, v34, v2, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v35
-; GFX11-NEXT: v_cndmask_b32_e32 v3, v35, v3, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v36
-; GFX11-NEXT: v_cndmask_b32_e32 v4, v36, v4, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v37
-; GFX11-NEXT: v_cndmask_b32_e32 v5, v37, v5, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v38
-; GFX11-NEXT: v_cndmask_b32_e32 v6, v38, v6, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e32 v14, 0x7fc00000, v54, vcc_lo
; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v15, v31
-; GFX11-NEXT: v_cndmask_b32_e32 v16, v31, v15, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v39
-; GFX11-NEXT: v_cndmask_b32_e32 v7, v39, v7, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v48
-; GFX11-NEXT: v_cndmask_b32_e32 v8, v48, v8, vcc_lo
+; GFX11-NEXT: v_min_f32_e32 v16, v15, v31
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v15, v31
-; GFX11-NEXT: v_cndmask_b32_e32 v16, 0x7fc00000, v16, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v49
-; GFX11-NEXT: v_cndmask_b32_e32 v9, v49, v9, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v50
-; GFX11-NEXT: v_cndmask_b32_e32 v10, v50, v10, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v15, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v15, v16, v15, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v51
-; GFX11-NEXT: v_cndmask_b32_e32 v11, v51, v11, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v52
-; GFX11-NEXT: v_cndmask_b32_e32 v12, v52, v12, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v31, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v15, v15, v31, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v53
-; GFX11-NEXT: v_cndmask_b32_e32 v13, v53, v13, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v54
-; GFX11-NEXT: v_cndmask_b32_e32 v14, v54, v14, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v16
-; GFX11-NEXT: v_cndmask_b32_e32 v15, v16, v15, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NEXT: v_cndmask_b32_e32 v15, 0x7fc00000, v16, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_v16f32:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.minimum.f64.ll b/llvm/test/CodeGen/AMDGPU/llvm.minimum.f64.ll
index 7013c60..37fe2e9 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.minimum.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.minimum.f64.ll
@@ -530,221 +530,86 @@ define <2 x double> @v_minimum_v2f64(<2 x double> %src0, <2 x double> %src1) {
; GFX7-LABEL: v_minimum_v2f64:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[4:5], v[0:1], v[4:5]
-; GFX7-NEXT: v_mov_b32_e32 v10, 0x7ff80000
-; GFX7-NEXT: v_cmp_class_f64_e64 s[6:7], v[0:1], 32
-; GFX7-NEXT: v_cmp_class_f64_e64 s[8:9], v[4:5], 32
-; GFX7-NEXT: v_cndmask_b32_e32 v8, v5, v1, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v9, v10, v8, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc
-; GFX7-NEXT: v_cmp_lt_f64_e32 vcc, v[2:3], v[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[4:5]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[4:5], v[2:3], v[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v0, v8, v0, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v0, v0, v4, s[8:9]
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[10:11], 0, v[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v1, v9, v1, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v1, v1, v5, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e32 v11, v7, v3, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v4, v6, v2, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[2:3], 32
-; GFX7-NEXT: v_cndmask_b32_e64 v11, v10, v11, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v10, 0, v4, s[4:5]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[6:7], 32
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v0, v8, v0, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v1, v9, v1, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v10, v2, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v11, v3, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v2, v2, v6, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v3, v3, v7, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v2, v10, v2, s[12:13]
-; GFX7-NEXT: v_cndmask_b32_e64 v3, v11, v3, s[12:13]
+; GFX7-NEXT: v_min_f64 v[8:9], v[0:1], v[4:5]
+; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX7-NEXT: v_min_f64 v[4:5], v[2:3], v[6:7]
+; GFX7-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[6:7]
+; GFX7-NEXT: v_mov_b32_e32 v3, 0x7ff80000
+; GFX7-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc
+; GFX7-NEXT: v_cndmask_b32_e32 v1, v9, v3, vcc
+; GFX7-NEXT: v_cndmask_b32_e64 v2, v4, 0, s[4:5]
+; GFX7-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[4:5]
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_minimum_v2f64:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[4:5], v[0:1], v[4:5]
-; GFX8-NEXT: v_mov_b32_e32 v10, 0x7ff80000
-; GFX8-NEXT: v_cmp_class_f64_e64 s[6:7], v[0:1], 32
-; GFX8-NEXT: v_cmp_class_f64_e64 s[8:9], v[4:5], 32
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v5, v1, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v9, v10, v8, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc
-; GFX8-NEXT: v_cmp_lt_f64_e32 vcc, v[2:3], v[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[4:5]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[4:5], v[2:3], v[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v0, v8, v0, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, v4, s[8:9]
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[10:11], 0, v[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v1, v9, v1, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, v5, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e32 v11, v7, v3, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v6, v2, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[2:3], 32
-; GFX8-NEXT: v_cndmask_b32_e64 v11, v10, v11, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v10, 0, v4, s[4:5]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[6:7], 32
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v0, v8, v0, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v1, v9, v1, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v10, v2, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v11, v3, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v2, v2, v6, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v3, v3, v7, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v2, v10, v2, s[12:13]
-; GFX8-NEXT: v_cndmask_b32_e64 v3, v11, v3, s[12:13]
+; GFX8-NEXT: v_min_f64 v[8:9], v[0:1], v[4:5]
+; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX8-NEXT: v_min_f64 v[4:5], v[2:3], v[6:7]
+; GFX8-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[6:7]
+; GFX8-NEXT: v_mov_b32_e32 v3, 0x7ff80000
+; GFX8-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v1, v9, v3, vcc
+; GFX8-NEXT: v_cndmask_b32_e64 v2, v4, 0, s[4:5]
+; GFX8-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[4:5]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_minimum_v2f64:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[4:5], v[0:1], v[4:5]
-; GFX9-NEXT: v_mov_b32_e32 v10, 0x7ff80000
-; GFX9-NEXT: v_cmp_class_f64_e64 s[6:7], v[0:1], 32
-; GFX9-NEXT: v_cmp_class_f64_e64 s[8:9], v[4:5], 32
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v5, v1, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v9, v10, v8, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc
-; GFX9-NEXT: v_cmp_lt_f64_e32 vcc, v[2:3], v[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[4:5]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[4:5], v[2:3], v[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v8, v0, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, v4, s[8:9]
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[10:11], 0, v[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v9, v1, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v5, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e32 v11, v7, v3, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v6, v2, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[2:3], 32
-; GFX9-NEXT: v_cndmask_b32_e64 v11, v10, v11, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v10, 0, v4, s[4:5]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[6:7], 32
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v8, v0, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v9, v1, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v2, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v11, v3, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, v6, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v3, v7, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v10, v2, s[12:13]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v11, v3, s[12:13]
+; GFX9-NEXT: v_min_f64 v[8:9], v[0:1], v[4:5]
+; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX9-NEXT: v_min_f64 v[4:5], v[2:3], v[6:7]
+; GFX9-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[6:7]
+; GFX9-NEXT: v_mov_b32_e32 v3, 0x7ff80000
+; GFX9-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v9, v3, vcc
+; GFX9-NEXT: v_cndmask_b32_e64 v2, v4, 0, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[4:5]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_minimum_v2f64:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
-; GFX940-NEXT: v_mov_b32_e32 v10, 0x7ff80000
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[0:1], v[4:5]
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v5, v1, vcc
+; GFX940-NEXT: v_min_f64 v[8:9], v[0:1], v[4:5]
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX940-NEXT: v_min_f64 v[4:5], v[2:3], v[6:7]
; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v9, v10, v8, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 32
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[4:5], 32
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[8:9]
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[2:3], v[6:7]
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v0, v4, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v1, v1, v5, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v7, v3, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[2:3], v[6:7]
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v8, v0, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v1, v9, v1, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v5, v10, v4, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v6, v2, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v4, 0, v4, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[2:3], 32
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[6:7], 32
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[4:5]
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v2, v2, v6, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v3, v3, v7, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v2, v4, v2, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[2:3]
+; GFX940-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc
+; GFX940-NEXT: v_mov_b32_e32 v8, 0x7ff80000
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v9, v8, vcc
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[6:7]
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e64 v2, v4, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v3, v5, v8, vcc
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v2f64:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_lt_f64_e32 vcc_lo, v[0:1], v[4:5]
-; GFX10-NEXT: v_cmp_lt_f64_e64 s4, v[2:3], v[6:7]
-; GFX10-NEXT: v_cmp_o_f64_e64 s5, v[0:1], v[4:5]
-; GFX10-NEXT: v_cmp_o_f64_e64 s6, v[2:3], v[6:7]
-; GFX10-NEXT: v_cndmask_b32_e32 v8, v5, v1, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v10, v7, v3, s4
-; GFX10-NEXT: v_cndmask_b32_e32 v12, v4, v0, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v13, v6, v2, s4
-; GFX10-NEXT: v_cmp_class_f64_e64 vcc_lo, v[0:1], 32
-; GFX10-NEXT: v_cmp_class_f64_e64 s4, v[2:3], 32
-; GFX10-NEXT: v_cndmask_b32_e64 v9, 0x7ff80000, v8, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v11, 0x7ff80000, v10, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v8, 0, v12, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v10, 0, v13, s6
-; GFX10-NEXT: v_cmp_class_f64_e64 s5, v[4:5], 32
-; GFX10-NEXT: v_cmp_class_f64_e64 s6, v[6:7], 32
-; GFX10-NEXT: v_cmp_eq_f64_e64 s7, 0, v[8:9]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s8, 0, v[10:11]
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v10, v2, s4
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v11, v3, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v4, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, v6, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, v5, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, v7, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v0, v8, v0, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v10, v2, s8
-; GFX10-NEXT: v_cndmask_b32_e64 v1, v9, v1, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v11, v3, s8
+; GFX10-NEXT: v_min_f64 v[8:9], v[0:1], v[4:5]
+; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[4:5]
+; GFX10-NEXT: v_min_f64 v[4:5], v[2:3], v[6:7]
+; GFX10-NEXT: v_cmp_u_f64_e64 s4, v[2:3], v[6:7]
+; GFX10-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e64 v1, v9, 0x7ff80000, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e64 v2, v4, 0, s4
+; GFX10-NEXT: v_cndmask_b32_e64 v3, v5, 0x7ff80000, s4
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_minimum_v2f64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_lt_f64_e32 vcc_lo, v[0:1], v[4:5]
-; GFX11-NEXT: v_cmp_o_f64_e64 s1, v[0:1], v[4:5]
-; GFX11-NEXT: v_cmp_lt_f64_e64 s0, v[2:3], v[6:7]
-; GFX11-NEXT: v_cmp_o_f64_e64 s2, v[2:3], v[6:7]
-; GFX11-NEXT: v_cndmask_b32_e32 v8, v5, v1, vcc_lo
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e64 v10, v7, v3, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v13, v6, v2, s0
-; GFX11-NEXT: v_cmp_class_f64_e64 s0, v[2:3], 32
-; GFX11-NEXT: v_cndmask_b32_e64 v9, 0x7ff80000, v8, s1
-; GFX11-NEXT: v_cndmask_b32_e32 v12, v4, v0, vcc_lo
-; GFX11-NEXT: v_cmp_class_f64_e64 vcc_lo, v[0:1], 32
-; GFX11-NEXT: v_cndmask_b32_e64 v11, 0x7ff80000, v10, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v10, 0, v13, s2
-; GFX11-NEXT: v_cmp_class_f64_e64 s2, v[6:7], 32
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT: v_cmp_eq_f64_e64 s4, 0, v[10:11]
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v10, v2, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v11, v3, s0
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc_lo
-; GFX11-NEXT: v_cndmask_b32_e64 v8, 0, v12, s1
-; GFX11-NEXT: v_cmp_class_f64_e64 s1, v[4:5], 32
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, v6, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v3, v7, s2
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_cmp_eq_f64_e64 s3, 0, v[8:9]
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc_lo
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v10, v2, s4
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v11, v3, s4
-; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, v4, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, v5, s1
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_cndmask_b32_e64 v0, v8, v0, s3
-; GFX11-NEXT: v_cndmask_b32_e64 v1, v9, v1, s3
+; GFX11-NEXT: v_min_f64 v[8:9], v[0:1], v[4:5]
+; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[4:5]
+; GFX11-NEXT: v_min_f64 v[4:5], v[2:3], v[6:7]
+; GFX11-NEXT: v_cmp_u_f64_e64 s0, v[2:3], v[6:7]
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX11-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e64 v1, v9, 0x7ff80000, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e64 v2, v4, 0, s0
+; GFX11-NEXT: v_cndmask_b32_e64 v3, v5, 0x7ff80000, s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_v2f64:
@@ -765,182 +630,43 @@ define <2 x double> @v_minimum_v2f64__nnan(<2 x double> %src0, <2 x double> %src
; GFX7-LABEL: v_minimum_v2f64__nnan:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
-; GFX7-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[6:7]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[6:7], v[4:5], 32
-; GFX7-NEXT: v_cmp_class_f64_e64 s[10:11], v[6:7], 32
-; GFX7-NEXT: v_cndmask_b32_e32 v9, v5, v1, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 32
-; GFX7-NEXT: v_cndmask_b32_e64 v11, v7, v3, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v10, v6, v2, s[4:5]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[2:3], 32
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[8:9], 0, v[8:9]
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[10:11]
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v0, v0, v4, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v2, v10, v2, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v3, v11, v3, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v2, v2, v6, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v1, v1, v5, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v3, v3, v7, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v0, v8, v0, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v1, v9, v1, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v2, v10, v2, s[12:13]
-; GFX7-NEXT: v_cndmask_b32_e64 v3, v11, v3, s[12:13]
+; GFX7-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5]
+; GFX7-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_minimum_v2f64__nnan:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
-; GFX8-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[6:7]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[6:7], v[4:5], 32
-; GFX8-NEXT: v_cmp_class_f64_e64 s[10:11], v[6:7], 32
-; GFX8-NEXT: v_cndmask_b32_e32 v9, v5, v1, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 32
-; GFX8-NEXT: v_cndmask_b32_e64 v11, v7, v3, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v10, v6, v2, s[4:5]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[2:3], 32
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[8:9], 0, v[8:9]
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[10:11]
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, v4, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v2, v10, v2, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v3, v11, v3, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v2, v2, v6, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, v5, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v3, v3, v7, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v0, v8, v0, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v1, v9, v1, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v2, v10, v2, s[12:13]
-; GFX8-NEXT: v_cndmask_b32_e64 v3, v11, v3, s[12:13]
+; GFX8-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5]
+; GFX8-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_minimum_v2f64__nnan:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
-; GFX9-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[6:7]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[6:7], v[4:5], 32
-; GFX9-NEXT: v_cmp_class_f64_e64 s[10:11], v[6:7], 32
-; GFX9-NEXT: v_cndmask_b32_e32 v9, v5, v1, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 32
-; GFX9-NEXT: v_cndmask_b32_e64 v11, v7, v3, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v10, v6, v2, s[4:5]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[2:3], 32
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[8:9], 0, v[8:9]
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[10:11]
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, v4, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v10, v2, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v11, v3, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, v6, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v5, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v3, v7, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v8, v0, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v9, v1, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v10, v2, s[12:13]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v11, v3, s[12:13]
+; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5]
+; GFX9-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_minimum_v2f64__nnan:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[4:5], 32
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v9, v5, v1, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 32
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[8:9]
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[2:3], v[6:7]
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v0, v4, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v1, v1, v5, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v7, v3, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v6, v2, vcc
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[2:3], 32
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[6:7], 32
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v8, v0, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v1, v9, v1, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v2, v2, v6, s[0:1]
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[4:5]
-; GFX940-NEXT: v_cndmask_b32_e64 v3, v3, v7, s[0:1]
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v2, v4, v2, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[2:3]
+; GFX940-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5]
+; GFX940-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v2f64__nnan:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_lt_f64_e32 vcc_lo, v[0:1], v[4:5]
-; GFX10-NEXT: v_cmp_lt_f64_e64 s4, v[2:3], v[6:7]
-; GFX10-NEXT: v_cmp_class_f64_e64 s5, v[4:5], 32
-; GFX10-NEXT: v_cmp_class_f64_e64 s6, v[6:7], 32
-; GFX10-NEXT: v_cndmask_b32_e32 v9, v5, v1, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v11, v7, v3, s4
-; GFX10-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v10, v6, v2, s4
-; GFX10-NEXT: v_cmp_class_f64_e64 vcc_lo, v[0:1], 32
-; GFX10-NEXT: v_cmp_class_f64_e64 s4, v[2:3], 32
-; GFX10-NEXT: v_cmp_eq_f64_e64 s7, 0, v[8:9]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s8, 0, v[10:11]
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v10, v2, s4
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v11, v3, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v4, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, v6, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, v5, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, v7, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v0, v8, v0, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v10, v2, s8
-; GFX10-NEXT: v_cndmask_b32_e64 v1, v9, v1, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v11, v3, s8
+; GFX10-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5]
+; GFX10-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_minimum_v2f64__nnan:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_lt_f64_e32 vcc_lo, v[0:1], v[4:5]
-; GFX11-NEXT: v_cmp_lt_f64_e64 s0, v[2:3], v[6:7]
-; GFX11-NEXT: v_cmp_class_f64_e64 s1, v[4:5], 32
-; GFX11-NEXT: v_cmp_class_f64_e64 s2, v[6:7], 32
-; GFX11-NEXT: v_dual_cndmask_b32 v9, v5, v1 :: v_dual_cndmask_b32 v8, v4, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_4) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e64 v11, v7, v3, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v10, v6, v2, s0
-; GFX11-NEXT: v_cmp_class_f64_e64 vcc_lo, v[0:1], 32
-; GFX11-NEXT: v_cmp_class_f64_e64 s0, v[2:3], 32
-; GFX11-NEXT: v_cmp_eq_f64_e64 s3, 0, v[8:9]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s4, 0, v[10:11]
-; GFX11-NEXT: v_dual_cndmask_b32 v0, v8, v0 :: v_dual_cndmask_b32 v1, v9, v1
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3)
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v10, v2, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v11, v3, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, v4, s1
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, v5, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, v6, s2
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v3, v7, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v0, v8, v0, s3
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e64 v1, v9, v1, s3
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v10, v2, s4
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v11, v3, s4
+; GFX11-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5]
+; GFX11-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_v2f64__nnan:
@@ -961,111 +687,86 @@ define <2 x double> @v_minimum_v2f64__nsz(<2 x double> %src0, <2 x double> %src1
; GFX7-LABEL: v_minimum_v2f64__nsz:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
-; GFX7-NEXT: v_cmp_lt_f64_e64 s[6:7], v[2:3], v[6:7]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[4:5], v[0:1], v[4:5]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[8:9], v[2:3], v[6:7]
-; GFX7-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v4, v6, v2, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX7-NEXT: v_mov_b32_e32 v5, 0x7ff80000
-; GFX7-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v0, 0, v8, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v1, v5, v1, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v2, 0, v4, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[8:9]
+; GFX7-NEXT: v_min_f64 v[8:9], v[0:1], v[4:5]
+; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX7-NEXT: v_min_f64 v[4:5], v[2:3], v[6:7]
+; GFX7-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[6:7]
+; GFX7-NEXT: v_mov_b32_e32 v3, 0x7ff80000
+; GFX7-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc
+; GFX7-NEXT: v_cndmask_b32_e32 v1, v9, v3, vcc
+; GFX7-NEXT: v_cndmask_b32_e64 v2, v4, 0, s[4:5]
+; GFX7-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[4:5]
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_minimum_v2f64__nsz:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
-; GFX8-NEXT: v_cmp_lt_f64_e64 s[6:7], v[2:3], v[6:7]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[4:5], v[0:1], v[4:5]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[8:9], v[2:3], v[6:7]
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v4, v6, v2, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX8-NEXT: v_mov_b32_e32 v5, 0x7ff80000
-; GFX8-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v0, 0, v8, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v1, v5, v1, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, v4, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[8:9]
+; GFX8-NEXT: v_min_f64 v[8:9], v[0:1], v[4:5]
+; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX8-NEXT: v_min_f64 v[4:5], v[2:3], v[6:7]
+; GFX8-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[6:7]
+; GFX8-NEXT: v_mov_b32_e32 v3, 0x7ff80000
+; GFX8-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v1, v9, v3, vcc
+; GFX8-NEXT: v_cndmask_b32_e64 v2, v4, 0, s[4:5]
+; GFX8-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[4:5]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_minimum_v2f64__nsz:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
-; GFX9-NEXT: v_cmp_lt_f64_e64 s[6:7], v[2:3], v[6:7]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[4:5], v[0:1], v[4:5]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[8:9], v[2:3], v[6:7]
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v6, v2, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX9-NEXT: v_mov_b32_e32 v5, 0x7ff80000
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, v8, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v5, v1, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, v4, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[8:9]
+; GFX9-NEXT: v_min_f64 v[8:9], v[0:1], v[4:5]
+; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX9-NEXT: v_min_f64 v[4:5], v[2:3], v[6:7]
+; GFX9-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[6:7]
+; GFX9-NEXT: v_mov_b32_e32 v3, 0x7ff80000
+; GFX9-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v9, v3, vcc
+; GFX9-NEXT: v_cndmask_b32_e64 v2, v4, 0, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[4:5]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_minimum_v2f64__nsz:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[0:1], v[4:5]
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX940-NEXT: v_mov_b32_e32 v4, 0x7ff80000
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[2:3], v[6:7]
-; GFX940-NEXT: v_cndmask_b32_e64 v0, 0, v8, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v1, v4, v1, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v6, v2, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[2:3], v[6:7]
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc
+; GFX940-NEXT: v_min_f64 v[8:9], v[0:1], v[4:5]
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX940-NEXT: v_min_f64 v[4:5], v[2:3], v[6:7]
; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v2, 0, v5, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v3, v4, v3, s[0:1]
+; GFX940-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc
+; GFX940-NEXT: v_mov_b32_e32 v8, 0x7ff80000
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v9, v8, vcc
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[6:7]
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e64 v2, v4, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v3, v5, v8, vcc
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v2f64__nsz:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_lt_f64_e32 vcc_lo, v[0:1], v[4:5]
-; GFX10-NEXT: v_cmp_lt_f64_e64 s4, v[2:3], v[6:7]
-; GFX10-NEXT: v_cmp_o_f64_e64 s5, v[0:1], v[4:5]
-; GFX10-NEXT: v_cmp_o_f64_e64 s6, v[2:3], v[6:7]
-; GFX10-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v9, v6, v2, s4
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v7, v3, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, v8, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, v9, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v1, 0x7ff80000, v1, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v3, 0x7ff80000, v3, s6
+; GFX10-NEXT: v_min_f64 v[8:9], v[0:1], v[4:5]
+; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[4:5]
+; GFX10-NEXT: v_min_f64 v[4:5], v[2:3], v[6:7]
+; GFX10-NEXT: v_cmp_u_f64_e64 s4, v[2:3], v[6:7]
+; GFX10-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e64 v1, v9, 0x7ff80000, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e64 v2, v4, 0, s4
+; GFX10-NEXT: v_cndmask_b32_e64 v3, v5, 0x7ff80000, s4
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_minimum_v2f64__nsz:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_lt_f64_e32 vcc_lo, v[0:1], v[4:5]
-; GFX11-NEXT: v_cmp_lt_f64_e64 s0, v[2:3], v[6:7]
-; GFX11-NEXT: v_cmp_o_f64_e64 s1, v[0:1], v[4:5]
-; GFX11-NEXT: v_cmp_o_f64_e64 s2, v[2:3], v[6:7]
-; GFX11-NEXT: v_dual_cndmask_b32 v8, v4, v0 :: v_dual_cndmask_b32 v1, v5, v1
+; GFX11-NEXT: v_min_f64 v[8:9], v[0:1], v[4:5]
+; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[4:5]
+; GFX11-NEXT: v_min_f64 v[4:5], v[2:3], v[6:7]
+; GFX11-NEXT: v_cmp_u_f64_e64 s0, v[2:3], v[6:7]
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3)
-; GFX11-NEXT: v_cndmask_b32_e64 v9, v6, v2, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v7, v3, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, v8, s1
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e64 v1, 0x7ff80000, v1, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, v9, s2
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e64 v3, 0x7ff80000, v3, s2
+; GFX11-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e64 v1, v9, 0x7ff80000, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e64 v2, v4, 0, s0
+; GFX11-NEXT: v_cndmask_b32_e64 v3, v5, 0x7ff80000, s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_v2f64__nsz:
@@ -1086,69 +787,43 @@ define <2 x double> @v_minimum_v2f64__nnan_nsz(<2 x double> %src0, <2 x double>
; GFX7-LABEL: v_minimum_v2f64__nnan_nsz:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
-; GFX7-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[6:7]
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[4:5]
+; GFX7-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5]
+; GFX7-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_minimum_v2f64__nnan_nsz:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
-; GFX8-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[6:7]
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[4:5]
+; GFX8-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5]
+; GFX8-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_minimum_v2f64__nnan_nsz:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
-; GFX9-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[6:7]
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[4:5]
+; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5]
+; GFX9-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_minimum_v2f64__nnan_nsz:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[2:3], v[6:7]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc
+; GFX940-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5]
+; GFX940-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v2f64__nnan_nsz:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_lt_f64_e32 vcc_lo, v[0:1], v[4:5]
-; GFX10-NEXT: v_cmp_lt_f64_e64 s4, v[2:3], v[6:7]
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v6, v2, s4
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v7, v3, s4
+; GFX10-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5]
+; GFX10-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_minimum_v2f64__nnan_nsz:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_lt_f64_e32 vcc_lo, v[0:1], v[4:5]
-; GFX11-NEXT: v_cmp_lt_f64_e64 s0, v[2:3], v[6:7]
-; GFX11-NEXT: v_dual_cndmask_b32 v0, v4, v0 :: v_dual_cndmask_b32 v1, v5, v1
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v6, v2, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v7, v3, s0
+; GFX11-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5]
+; GFX11-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_v2f64__nnan_nsz:
@@ -1170,61 +845,20 @@ define void @s_minimum_v2f64(<2 x double> inreg %src0, <2 x double> inreg %src1)
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: v_mov_b32_e32 v0, s10
+; GFX7-NEXT: v_mov_b32_e32 v4, s8
; GFX7-NEXT: v_mov_b32_e32 v1, s11
-; GFX7-NEXT: v_cmp_lt_f64_e32 vcc, s[6:7], v[0:1]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[12:13], s[6:7], v[0:1]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[18:19], s[10:11], 32
-; GFX7-NEXT: v_mov_b32_e32 v0, s8
-; GFX7-NEXT: v_mov_b32_e32 v1, s9
-; GFX7-NEXT: s_and_b64 s[14:15], vcc, exec
-; GFX7-NEXT: s_cselect_b32 s16, s7, s11
-; GFX7-NEXT: s_and_b64 s[14:15], s[12:13], exec
-; GFX7-NEXT: s_cselect_b32 s15, s16, 0x7ff80000
-; GFX7-NEXT: s_and_b64 s[16:17], vcc, exec
-; GFX7-NEXT: s_cselect_b32 s14, s6, s10
-; GFX7-NEXT: v_cmp_class_f64_e64 s[16:17], s[6:7], 32
-; GFX7-NEXT: s_and_b64 s[12:13], s[12:13], exec
-; GFX7-NEXT: s_cselect_b32 s14, s14, 0
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[20:21], s[14:15], 0
-; GFX7-NEXT: v_cmp_lt_f64_e32 vcc, s[4:5], v[0:1]
-; GFX7-NEXT: s_and_b64 s[12:13], s[16:17], exec
-; GFX7-NEXT: s_cselect_b32 s7, s7, s15
-; GFX7-NEXT: s_and_b64 s[12:13], s[18:19], exec
-; GFX7-NEXT: s_cselect_b32 s7, s11, s7
-; GFX7-NEXT: s_and_b64 s[12:13], s[20:21], exec
-; GFX7-NEXT: s_cselect_b32 s7, s7, s15
-; GFX7-NEXT: s_and_b64 s[12:13], s[16:17], exec
-; GFX7-NEXT: v_cmp_o_f64_e64 s[12:13], s[4:5], v[0:1]
-; GFX7-NEXT: s_cselect_b32 s6, s6, s14
-; GFX7-NEXT: s_and_b64 s[16:17], s[18:19], exec
-; GFX7-NEXT: s_cselect_b32 s6, s10, s6
-; GFX7-NEXT: s_and_b64 s[10:11], s[20:21], exec
-; GFX7-NEXT: s_cselect_b32 s6, s6, s14
-; GFX7-NEXT: s_and_b64 s[10:11], vcc, exec
-; GFX7-NEXT: s_cselect_b32 s14, s5, s9
-; GFX7-NEXT: s_and_b64 s[10:11], s[12:13], exec
-; GFX7-NEXT: s_cselect_b32 s11, s14, 0x7ff80000
-; GFX7-NEXT: s_and_b64 s[14:15], vcc, exec
-; GFX7-NEXT: s_cselect_b32 s10, s4, s8
-; GFX7-NEXT: v_cmp_class_f64_e64 s[14:15], s[4:5], 32
-; GFX7-NEXT: s_and_b64 s[12:13], s[12:13], exec
-; GFX7-NEXT: v_cmp_class_f64_e64 s[12:13], s[8:9], 32
-; GFX7-NEXT: s_cselect_b32 s10, s10, 0
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[16:17], s[10:11], 0
-; GFX7-NEXT: s_and_b64 s[18:19], s[14:15], exec
-; GFX7-NEXT: s_cselect_b32 s5, s5, s11
-; GFX7-NEXT: s_and_b64 s[18:19], s[12:13], exec
-; GFX7-NEXT: s_cselect_b32 s5, s9, s5
-; GFX7-NEXT: s_and_b64 s[18:19], s[16:17], exec
-; GFX7-NEXT: s_cselect_b32 s5, s5, s11
-; GFX7-NEXT: s_and_b64 s[14:15], s[14:15], exec
-; GFX7-NEXT: s_cselect_b32 s4, s4, s10
-; GFX7-NEXT: s_and_b64 s[12:13], s[12:13], exec
-; GFX7-NEXT: s_cselect_b32 s4, s8, s4
-; GFX7-NEXT: s_and_b64 s[8:9], s[16:17], exec
-; GFX7-NEXT: s_cselect_b32 s4, s4, s10
+; GFX7-NEXT: v_mov_b32_e32 v5, s9
+; GFX7-NEXT: v_min_f64 v[2:3], s[6:7], v[0:1]
+; GFX7-NEXT: v_cmp_u_f64_e32 vcc, s[6:7], v[0:1]
+; GFX7-NEXT: v_min_f64 v[0:1], s[4:5], v[4:5]
+; GFX7-NEXT: v_cmp_u_f64_e64 s[4:5], s[4:5], v[4:5]
+; GFX7-NEXT: v_mov_b32_e32 v6, 0x7ff80000
+; GFX7-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc
+; GFX7-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
+; GFX7-NEXT: v_cndmask_b32_e64 v1, v1, v6, s[4:5]
+; GFX7-NEXT: v_cndmask_b32_e64 v0, v0, 0, s[4:5]
; GFX7-NEXT: ;;#ASMSTART
-; GFX7-NEXT: ; use s[4:7]
+; GFX7-NEXT: ; use v[0:3]
; GFX7-NEXT: ;;#ASMEND
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
@@ -1232,61 +866,20 @@ define void @s_minimum_v2f64(<2 x double> inreg %src0, <2 x double> inreg %src1)
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v0, s10
+; GFX8-NEXT: v_mov_b32_e32 v4, s8
; GFX8-NEXT: v_mov_b32_e32 v1, s11
-; GFX8-NEXT: v_cmp_lt_f64_e32 vcc, s[6:7], v[0:1]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[12:13], s[6:7], v[0:1]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[18:19], s[10:11], 32
-; GFX8-NEXT: v_mov_b32_e32 v0, s8
-; GFX8-NEXT: v_mov_b32_e32 v1, s9
-; GFX8-NEXT: s_and_b64 s[14:15], vcc, exec
-; GFX8-NEXT: s_cselect_b32 s16, s7, s11
-; GFX8-NEXT: s_and_b64 s[14:15], s[12:13], exec
-; GFX8-NEXT: s_cselect_b32 s15, s16, 0x7ff80000
-; GFX8-NEXT: s_and_b64 s[16:17], vcc, exec
-; GFX8-NEXT: s_cselect_b32 s14, s6, s10
-; GFX8-NEXT: v_cmp_class_f64_e64 s[16:17], s[6:7], 32
-; GFX8-NEXT: s_and_b64 s[12:13], s[12:13], exec
-; GFX8-NEXT: s_cselect_b32 s14, s14, 0
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[20:21], s[14:15], 0
-; GFX8-NEXT: v_cmp_lt_f64_e32 vcc, s[4:5], v[0:1]
-; GFX8-NEXT: s_and_b64 s[12:13], s[16:17], exec
-; GFX8-NEXT: s_cselect_b32 s7, s7, s15
-; GFX8-NEXT: s_and_b64 s[12:13], s[18:19], exec
-; GFX8-NEXT: s_cselect_b32 s7, s11, s7
-; GFX8-NEXT: s_and_b64 s[12:13], s[20:21], exec
-; GFX8-NEXT: s_cselect_b32 s7, s7, s15
-; GFX8-NEXT: s_and_b64 s[12:13], s[16:17], exec
-; GFX8-NEXT: v_cmp_o_f64_e64 s[12:13], s[4:5], v[0:1]
-; GFX8-NEXT: s_cselect_b32 s6, s6, s14
-; GFX8-NEXT: s_and_b64 s[16:17], s[18:19], exec
-; GFX8-NEXT: s_cselect_b32 s6, s10, s6
-; GFX8-NEXT: s_and_b64 s[10:11], s[20:21], exec
-; GFX8-NEXT: s_cselect_b32 s6, s6, s14
-; GFX8-NEXT: s_and_b64 s[10:11], vcc, exec
-; GFX8-NEXT: s_cselect_b32 s14, s5, s9
-; GFX8-NEXT: s_and_b64 s[10:11], s[12:13], exec
-; GFX8-NEXT: s_cselect_b32 s11, s14, 0x7ff80000
-; GFX8-NEXT: s_and_b64 s[14:15], vcc, exec
-; GFX8-NEXT: s_cselect_b32 s10, s4, s8
-; GFX8-NEXT: v_cmp_class_f64_e64 s[14:15], s[4:5], 32
-; GFX8-NEXT: s_and_b64 s[12:13], s[12:13], exec
-; GFX8-NEXT: v_cmp_class_f64_e64 s[12:13], s[8:9], 32
-; GFX8-NEXT: s_cselect_b32 s10, s10, 0
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[16:17], s[10:11], 0
-; GFX8-NEXT: s_and_b64 s[18:19], s[14:15], exec
-; GFX8-NEXT: s_cselect_b32 s5, s5, s11
-; GFX8-NEXT: s_and_b64 s[18:19], s[12:13], exec
-; GFX8-NEXT: s_cselect_b32 s5, s9, s5
-; GFX8-NEXT: s_and_b64 s[18:19], s[16:17], exec
-; GFX8-NEXT: s_cselect_b32 s5, s5, s11
-; GFX8-NEXT: s_and_b64 s[14:15], s[14:15], exec
-; GFX8-NEXT: s_cselect_b32 s4, s4, s10
-; GFX8-NEXT: s_and_b64 s[12:13], s[12:13], exec
-; GFX8-NEXT: s_cselect_b32 s4, s8, s4
-; GFX8-NEXT: s_and_b64 s[8:9], s[16:17], exec
-; GFX8-NEXT: s_cselect_b32 s4, s4, s10
+; GFX8-NEXT: v_mov_b32_e32 v5, s9
+; GFX8-NEXT: v_min_f64 v[2:3], s[6:7], v[0:1]
+; GFX8-NEXT: v_cmp_u_f64_e32 vcc, s[6:7], v[0:1]
+; GFX8-NEXT: v_min_f64 v[0:1], s[4:5], v[4:5]
+; GFX8-NEXT: v_cmp_u_f64_e64 s[4:5], s[4:5], v[4:5]
+; GFX8-NEXT: v_mov_b32_e32 v6, 0x7ff80000
+; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc
+; GFX8-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
+; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, v6, s[4:5]
+; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, 0, s[4:5]
; GFX8-NEXT: ;;#ASMSTART
-; GFX8-NEXT: ; use s[4:7]
+; GFX8-NEXT: ; use v[0:3]
; GFX8-NEXT: ;;#ASMEND
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
@@ -1294,61 +887,20 @@ define void @s_minimum_v2f64(<2 x double> inreg %src0, <2 x double> inreg %src1)
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s10
+; GFX9-NEXT: v_mov_b32_e32 v4, s8
; GFX9-NEXT: v_mov_b32_e32 v1, s11
-; GFX9-NEXT: v_cmp_lt_f64_e32 vcc, s[6:7], v[0:1]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[12:13], s[6:7], v[0:1]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[18:19], s[10:11], 32
-; GFX9-NEXT: v_mov_b32_e32 v0, s8
-; GFX9-NEXT: v_mov_b32_e32 v1, s9
-; GFX9-NEXT: s_and_b64 s[14:15], vcc, exec
-; GFX9-NEXT: s_cselect_b32 s16, s7, s11
-; GFX9-NEXT: s_and_b64 s[14:15], s[12:13], exec
-; GFX9-NEXT: s_cselect_b32 s15, s16, 0x7ff80000
-; GFX9-NEXT: s_and_b64 s[16:17], vcc, exec
-; GFX9-NEXT: s_cselect_b32 s14, s6, s10
-; GFX9-NEXT: v_cmp_class_f64_e64 s[16:17], s[6:7], 32
-; GFX9-NEXT: s_and_b64 s[12:13], s[12:13], exec
-; GFX9-NEXT: s_cselect_b32 s14, s14, 0
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[20:21], s[14:15], 0
-; GFX9-NEXT: v_cmp_lt_f64_e32 vcc, s[4:5], v[0:1]
-; GFX9-NEXT: s_and_b64 s[12:13], s[16:17], exec
-; GFX9-NEXT: s_cselect_b32 s7, s7, s15
-; GFX9-NEXT: s_and_b64 s[12:13], s[18:19], exec
-; GFX9-NEXT: s_cselect_b32 s7, s11, s7
-; GFX9-NEXT: s_and_b64 s[12:13], s[20:21], exec
-; GFX9-NEXT: s_cselect_b32 s7, s7, s15
-; GFX9-NEXT: s_and_b64 s[12:13], s[16:17], exec
-; GFX9-NEXT: v_cmp_o_f64_e64 s[12:13], s[4:5], v[0:1]
-; GFX9-NEXT: s_cselect_b32 s6, s6, s14
-; GFX9-NEXT: s_and_b64 s[16:17], s[18:19], exec
-; GFX9-NEXT: s_cselect_b32 s6, s10, s6
-; GFX9-NEXT: s_and_b64 s[10:11], s[20:21], exec
-; GFX9-NEXT: s_cselect_b32 s6, s6, s14
-; GFX9-NEXT: s_and_b64 s[10:11], vcc, exec
-; GFX9-NEXT: s_cselect_b32 s14, s5, s9
-; GFX9-NEXT: s_and_b64 s[10:11], s[12:13], exec
-; GFX9-NEXT: s_cselect_b32 s11, s14, 0x7ff80000
-; GFX9-NEXT: s_and_b64 s[14:15], vcc, exec
-; GFX9-NEXT: s_cselect_b32 s10, s4, s8
-; GFX9-NEXT: v_cmp_class_f64_e64 s[14:15], s[4:5], 32
-; GFX9-NEXT: s_and_b64 s[12:13], s[12:13], exec
-; GFX9-NEXT: v_cmp_class_f64_e64 s[12:13], s[8:9], 32
-; GFX9-NEXT: s_cselect_b32 s10, s10, 0
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[16:17], s[10:11], 0
-; GFX9-NEXT: s_and_b64 s[18:19], s[14:15], exec
-; GFX9-NEXT: s_cselect_b32 s5, s5, s11
-; GFX9-NEXT: s_and_b64 s[18:19], s[12:13], exec
-; GFX9-NEXT: s_cselect_b32 s5, s9, s5
-; GFX9-NEXT: s_and_b64 s[18:19], s[16:17], exec
-; GFX9-NEXT: s_cselect_b32 s5, s5, s11
-; GFX9-NEXT: s_and_b64 s[14:15], s[14:15], exec
-; GFX9-NEXT: s_cselect_b32 s4, s4, s10
-; GFX9-NEXT: s_and_b64 s[12:13], s[12:13], exec
-; GFX9-NEXT: s_cselect_b32 s4, s8, s4
-; GFX9-NEXT: s_and_b64 s[8:9], s[16:17], exec
-; GFX9-NEXT: s_cselect_b32 s4, s4, s10
+; GFX9-NEXT: v_mov_b32_e32 v5, s9
+; GFX9-NEXT: v_min_f64 v[2:3], s[6:7], v[0:1]
+; GFX9-NEXT: v_cmp_u_f64_e32 vcc, s[6:7], v[0:1]
+; GFX9-NEXT: v_min_f64 v[0:1], s[4:5], v[4:5]
+; GFX9-NEXT: v_cmp_u_f64_e64 s[4:5], s[4:5], v[4:5]
+; GFX9-NEXT: v_mov_b32_e32 v6, 0x7ff80000
+; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc
+; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
+; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v6, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, 0, s[4:5]
; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[4:7]
+; GFX9-NEXT: ; use v[0:3]
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
@@ -1356,179 +908,52 @@ define void @s_minimum_v2f64(<2 x double> inreg %src0, <2 x double> inreg %src1)
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[6:7]
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, s[2:3], v[0:1]
-; GFX940-NEXT: s_and_b64 s[8:9], vcc, exec
-; GFX940-NEXT: v_cmp_o_f64_e64 s[8:9], s[2:3], v[0:1]
-; GFX940-NEXT: s_cselect_b32 s12, s3, s7
-; GFX940-NEXT: s_and_b64 s[10:11], s[8:9], exec
-; GFX940-NEXT: s_cselect_b32 s11, s12, 0x7ff80000
-; GFX940-NEXT: s_and_b64 s[12:13], vcc, exec
-; GFX940-NEXT: s_cselect_b32 s10, s2, s6
-; GFX940-NEXT: s_and_b64 s[8:9], s[8:9], exec
-; GFX940-NEXT: v_cmp_class_f64_e64 s[12:13], s[2:3], 32
-; GFX940-NEXT: s_cselect_b32 s10, s10, 0
-; GFX940-NEXT: s_and_b64 s[14:15], s[12:13], exec
-; GFX940-NEXT: v_cmp_class_f64_e64 s[14:15], s[6:7], 32
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[8:9], s[10:11], 0
-; GFX940-NEXT: s_cselect_b32 s3, s3, s11
-; GFX940-NEXT: s_and_b64 s[16:17], s[14:15], exec
-; GFX940-NEXT: s_cselect_b32 s3, s7, s3
-; GFX940-NEXT: s_and_b64 s[16:17], s[8:9], exec
-; GFX940-NEXT: s_cselect_b32 s7, s3, s11
-; GFX940-NEXT: s_and_b64 s[12:13], s[12:13], exec
-; GFX940-NEXT: s_cselect_b32 s11, s2, s10
-; GFX940-NEXT: s_and_b64 s[2:3], s[14:15], exec
+; GFX940-NEXT: v_min_f64 v[2:3], s[2:3], v[0:1]
+; GFX940-NEXT: v_mov_b32_e32 v6, 0x7ff80000
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, s[2:3], v[0:1]
; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[4:5]
-; GFX940-NEXT: s_cselect_b32 s6, s6, s11
-; GFX940-NEXT: s_and_b64 s[2:3], s[8:9], exec
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
-; GFX940-NEXT: s_cselect_b32 s6, s6, s10
-; GFX940-NEXT: s_and_b64 s[2:3], vcc, exec
-; GFX940-NEXT: v_cmp_o_f64_e64 s[2:3], s[0:1], v[0:1]
-; GFX940-NEXT: s_cselect_b32 s10, s1, s5
-; GFX940-NEXT: s_and_b64 s[8:9], s[2:3], exec
-; GFX940-NEXT: s_cselect_b32 s9, s10, 0x7ff80000
-; GFX940-NEXT: s_and_b64 s[10:11], vcc, exec
-; GFX940-NEXT: s_cselect_b32 s8, s0, s4
-; GFX940-NEXT: s_and_b64 s[2:3], s[2:3], exec
-; GFX940-NEXT: v_cmp_class_f64_e64 s[10:11], s[0:1], 32
-; GFX940-NEXT: s_cselect_b32 s8, s8, 0
-; GFX940-NEXT: s_and_b64 s[12:13], s[10:11], exec
-; GFX940-NEXT: v_cmp_class_f64_e64 s[12:13], s[4:5], 32
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], s[8:9], 0
-; GFX940-NEXT: s_cselect_b32 s1, s1, s9
-; GFX940-NEXT: s_and_b64 s[14:15], s[12:13], exec
-; GFX940-NEXT: s_cselect_b32 s1, s5, s1
-; GFX940-NEXT: s_and_b64 s[14:15], s[2:3], exec
-; GFX940-NEXT: s_cselect_b32 s5, s1, s9
-; GFX940-NEXT: s_and_b64 s[10:11], s[10:11], exec
-; GFX940-NEXT: s_cselect_b32 s9, s0, s8
-; GFX940-NEXT: s_and_b64 s[0:1], s[12:13], exec
-; GFX940-NEXT: s_cselect_b32 s4, s4, s9
-; GFX940-NEXT: s_and_b64 s[0:1], s[2:3], exec
-; GFX940-NEXT: s_cselect_b32 s4, s4, s8
+; GFX940-NEXT: v_min_f64 v[4:5], s[0:1], v[0:1]
+; GFX940-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc
+; GFX940-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, s[0:1], v[0:1]
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc
+; GFX940-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
; GFX940-NEXT: ;;#ASMSTART
-; GFX940-NEXT: ; use s[4:7]
+; GFX940-NEXT: ; use v[0:3]
; GFX940-NEXT: ;;#ASMEND
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: s_minimum_v2f64:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_lt_f64_e64 s12, s[6:7], s[10:11]
-; GFX10-NEXT: v_cmp_o_f64_e64 s14, s[6:7], s[10:11]
-; GFX10-NEXT: v_cmp_class_f64_e64 s15, s[6:7], 32
-; GFX10-NEXT: v_cmp_class_f64_e64 s16, s[10:11], 32
-; GFX10-NEXT: v_cmp_o_f64_e64 s18, s[4:5], s[8:9]
-; GFX10-NEXT: v_cmp_class_f64_e64 s19, s[4:5], 32
-; GFX10-NEXT: v_cmp_class_f64_e64 s20, s[8:9], 32
-; GFX10-NEXT: s_and_b32 s13, s12, exec_lo
-; GFX10-NEXT: s_cselect_b32 s13, s7, s11
-; GFX10-NEXT: s_and_b32 s17, s14, exec_lo
-; GFX10-NEXT: s_cselect_b32 s13, s13, 0x7ff80000
-; GFX10-NEXT: s_and_b32 s12, s12, exec_lo
-; GFX10-NEXT: s_cselect_b32 s12, s6, s10
-; GFX10-NEXT: s_and_b32 s14, s14, exec_lo
-; GFX10-NEXT: s_cselect_b32 s12, s12, 0
-; GFX10-NEXT: v_cmp_lt_f64_e64 s17, s[4:5], s[8:9]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s14, s[12:13], 0
-; GFX10-NEXT: s_and_b32 s21, s15, exec_lo
-; GFX10-NEXT: s_cselect_b32 s7, s7, s13
-; GFX10-NEXT: s_and_b32 s21, s16, exec_lo
-; GFX10-NEXT: s_cselect_b32 s7, s11, s7
-; GFX10-NEXT: s_and_b32 s11, s14, exec_lo
-; GFX10-NEXT: s_cselect_b32 s7, s7, s13
-; GFX10-NEXT: s_and_b32 s11, s15, exec_lo
-; GFX10-NEXT: s_cselect_b32 s6, s6, s12
-; GFX10-NEXT: s_and_b32 s11, s16, exec_lo
-; GFX10-NEXT: s_cselect_b32 s6, s10, s6
-; GFX10-NEXT: s_and_b32 s10, s14, exec_lo
-; GFX10-NEXT: s_cselect_b32 s6, s6, s12
-; GFX10-NEXT: s_and_b32 s10, s17, exec_lo
-; GFX10-NEXT: s_cselect_b32 s10, s5, s9
-; GFX10-NEXT: s_and_b32 s11, s18, exec_lo
-; GFX10-NEXT: s_cselect_b32 s11, s10, 0x7ff80000
-; GFX10-NEXT: s_and_b32 s10, s17, exec_lo
-; GFX10-NEXT: s_cselect_b32 s10, s4, s8
-; GFX10-NEXT: s_and_b32 s12, s18, exec_lo
-; GFX10-NEXT: s_cselect_b32 s10, s10, 0
-; GFX10-NEXT: s_and_b32 s13, s19, exec_lo
-; GFX10-NEXT: v_cmp_eq_f64_e64 s12, s[10:11], 0
-; GFX10-NEXT: s_cselect_b32 s5, s5, s11
-; GFX10-NEXT: s_and_b32 s13, s20, exec_lo
-; GFX10-NEXT: s_cselect_b32 s5, s9, s5
-; GFX10-NEXT: s_and_b32 s9, s12, exec_lo
-; GFX10-NEXT: s_cselect_b32 s5, s5, s11
-; GFX10-NEXT: s_and_b32 s9, s19, exec_lo
-; GFX10-NEXT: s_cselect_b32 s4, s4, s10
-; GFX10-NEXT: s_and_b32 s9, s20, exec_lo
-; GFX10-NEXT: s_cselect_b32 s4, s8, s4
-; GFX10-NEXT: s_and_b32 s8, s12, exec_lo
-; GFX10-NEXT: s_cselect_b32 s4, s4, s10
+; GFX10-NEXT: v_min_f64 v[0:1], s[6:7], s[10:11]
+; GFX10-NEXT: v_cmp_u_f64_e64 s6, s[6:7], s[10:11]
+; GFX10-NEXT: v_min_f64 v[4:5], s[4:5], s[8:9]
+; GFX10-NEXT: v_cmp_u_f64_e64 s4, s[4:5], s[8:9]
+; GFX10-NEXT: v_cndmask_b32_e64 v3, v1, 0x7ff80000, s6
+; GFX10-NEXT: v_cndmask_b32_e64 v2, v0, 0, s6
+; GFX10-NEXT: v_cndmask_b32_e64 v1, v5, 0x7ff80000, s4
+; GFX10-NEXT: v_cndmask_b32_e64 v0, v4, 0, s4
; GFX10-NEXT: ;;#ASMSTART
-; GFX10-NEXT: ; use s[4:7]
+; GFX10-NEXT: ; use v[0:3]
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: s_minimum_v2f64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_lt_f64_e64 s8, s[2:3], s[6:7]
-; GFX11-NEXT: v_cmp_o_f64_e64 s10, s[2:3], s[6:7]
-; GFX11-NEXT: v_cmp_class_f64_e64 s11, s[2:3], 32
-; GFX11-NEXT: v_cmp_class_f64_e64 s12, s[6:7], 32
-; GFX11-NEXT: v_cmp_o_f64_e64 s14, s[0:1], s[4:5]
-; GFX11-NEXT: v_cmp_class_f64_e64 s15, s[0:1], 32
-; GFX11-NEXT: v_cmp_class_f64_e64 s16, s[4:5], 32
-; GFX11-NEXT: s_and_b32 s9, s8, exec_lo
-; GFX11-NEXT: s_cselect_b32 s9, s3, s7
-; GFX11-NEXT: s_and_b32 s13, s10, exec_lo
-; GFX11-NEXT: s_cselect_b32 s9, s9, 0x7ff80000
-; GFX11-NEXT: s_and_b32 s8, s8, exec_lo
-; GFX11-NEXT: s_cselect_b32 s8, s2, s6
-; GFX11-NEXT: s_and_b32 s10, s10, exec_lo
-; GFX11-NEXT: s_cselect_b32 s8, s8, 0
-; GFX11-NEXT: v_cmp_lt_f64_e64 s13, s[0:1], s[4:5]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s10, s[8:9], 0
-; GFX11-NEXT: s_and_b32 s17, s11, exec_lo
-; GFX11-NEXT: s_cselect_b32 s3, s3, s9
-; GFX11-NEXT: s_and_b32 s17, s12, exec_lo
-; GFX11-NEXT: s_cselect_b32 s3, s7, s3
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: s_and_b32 s7, s10, exec_lo
-; GFX11-NEXT: s_cselect_b32 s3, s3, s9
-; GFX11-NEXT: s_and_b32 s7, s11, exec_lo
-; GFX11-NEXT: s_cselect_b32 s2, s2, s8
-; GFX11-NEXT: s_and_b32 s7, s12, exec_lo
-; GFX11-NEXT: s_cselect_b32 s2, s6, s2
-; GFX11-NEXT: s_and_b32 s6, s10, exec_lo
-; GFX11-NEXT: s_cselect_b32 s2, s2, s8
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT: s_and_b32 s6, s13, exec_lo
-; GFX11-NEXT: s_cselect_b32 s6, s1, s5
-; GFX11-NEXT: s_and_b32 s7, s14, exec_lo
-; GFX11-NEXT: s_cselect_b32 s7, s6, 0x7ff80000
-; GFX11-NEXT: s_and_b32 s6, s13, exec_lo
-; GFX11-NEXT: s_cselect_b32 s6, s0, s4
-; GFX11-NEXT: s_and_b32 s8, s14, exec_lo
-; GFX11-NEXT: s_cselect_b32 s6, s6, 0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_4)
-; GFX11-NEXT: s_and_b32 s9, s15, exec_lo
-; GFX11-NEXT: v_cmp_eq_f64_e64 s8, s[6:7], 0
-; GFX11-NEXT: s_cselect_b32 s1, s1, s7
-; GFX11-NEXT: s_and_b32 s9, s16, exec_lo
-; GFX11-NEXT: s_cselect_b32 s1, s5, s1
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: s_and_b32 s5, s8, exec_lo
-; GFX11-NEXT: s_cselect_b32 s1, s1, s7
-; GFX11-NEXT: s_and_b32 s5, s15, exec_lo
-; GFX11-NEXT: s_cselect_b32 s0, s0, s6
-; GFX11-NEXT: s_and_b32 s5, s16, exec_lo
-; GFX11-NEXT: s_cselect_b32 s0, s4, s0
-; GFX11-NEXT: s_and_b32 s4, s8, exec_lo
-; GFX11-NEXT: s_cselect_b32 s0, s0, s6
+; GFX11-NEXT: v_min_f64 v[0:1], s[2:3], s[6:7]
+; GFX11-NEXT: v_cmp_u_f64_e64 s2, s[2:3], s[6:7]
+; GFX11-NEXT: v_min_f64 v[4:5], s[0:1], s[4:5]
+; GFX11-NEXT: v_cmp_u_f64_e64 s0, s[0:1], s[4:5]
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX11-NEXT: v_cndmask_b32_e64 v3, v1, 0x7ff80000, s2
+; GFX11-NEXT: v_cndmask_b32_e64 v2, v0, 0, s2
+; GFX11-NEXT: v_cndmask_b32_e64 v1, v5, 0x7ff80000, s0
+; GFX11-NEXT: v_cndmask_b32_e64 v0, v4, 0, s0
; GFX11-NEXT: ;;#ASMSTART
-; GFX11-NEXT: ; use s[0:3]
+; GFX11-NEXT: ; use v[0:3]
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
@@ -1554,306 +979,110 @@ define <3 x double> @v_minimum_v3f64(<3 x double> %src0, <3 x double> %src1) {
; GFX7-LABEL: v_minimum_v3f64:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[6:7]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[4:5], v[0:1], v[6:7]
-; GFX7-NEXT: v_mov_b32_e32 v14, 0x7ff80000
-; GFX7-NEXT: v_cmp_lt_f64_e64 s[8:9], v[2:3], v[8:9]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[10:11], v[2:3], v[8:9]
-; GFX7-NEXT: v_cndmask_b32_e32 v12, v7, v1, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v13, v14, v12, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e32 v12, v6, v0, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 32
-; GFX7-NEXT: v_cndmask_b32_e64 v12, 0, v12, s[4:5]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[6:7], 32
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[6:7], 0, v[12:13]
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v12, v0, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v13, v1, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v0, v0, v6, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v1, v1, v7, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v6, v9, v3, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v0, v12, v0, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v1, v13, v1, s[6:7]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[6:7], v[2:3], 32
-; GFX7-NEXT: v_cndmask_b32_e64 v7, v14, v6, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v6, v8, v2, s[8:9]
-; GFX7-NEXT: v_cmp_lt_f64_e32 vcc, v[4:5], v[10:11]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[8:9], v[8:9], 32
-; GFX7-NEXT: v_cmp_o_f64_e64 s[4:5], v[4:5], v[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v6, 0, v6, s[10:11]
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[10:11], 0, v[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e32 v12, v11, v5, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v2, v2, v8, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e32 v8, v10, v4, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[4:5], 32
-; GFX7-NEXT: v_cndmask_b32_e64 v13, v14, v12, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v12, 0, v8, s[4:5]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[10:11], 32
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[12:13]
-; GFX7-NEXT: v_cndmask_b32_e64 v3, v3, v9, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e32 v4, v12, v4, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v5, v13, v5, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v4, v4, v10, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v5, v5, v11, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v4, v12, v4, s[12:13]
-; GFX7-NEXT: v_cndmask_b32_e64 v5, v13, v5, s[12:13]
+; GFX7-NEXT: v_min_f64 v[12:13], v[0:1], v[6:7]
+; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
+; GFX7-NEXT: v_min_f64 v[6:7], v[2:3], v[8:9]
+; GFX7-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[8:9]
+; GFX7-NEXT: v_min_f64 v[8:9], v[4:5], v[10:11]
+; GFX7-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[10:11]
+; GFX7-NEXT: v_mov_b32_e32 v5, 0x7ff80000
+; GFX7-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc
+; GFX7-NEXT: v_cndmask_b32_e32 v1, v13, v5, vcc
+; GFX7-NEXT: v_cndmask_b32_e64 v2, v6, 0, s[4:5]
+; GFX7-NEXT: v_cndmask_b32_e64 v3, v7, v5, s[4:5]
+; GFX7-NEXT: v_cndmask_b32_e64 v4, v8, 0, s[6:7]
+; GFX7-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[6:7]
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_minimum_v3f64:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[6:7]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[4:5], v[0:1], v[6:7]
-; GFX8-NEXT: v_mov_b32_e32 v14, 0x7ff80000
-; GFX8-NEXT: v_cmp_lt_f64_e64 s[8:9], v[2:3], v[8:9]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[10:11], v[2:3], v[8:9]
-; GFX8-NEXT: v_cndmask_b32_e32 v12, v7, v1, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v13, v14, v12, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e32 v12, v6, v0, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 32
-; GFX8-NEXT: v_cndmask_b32_e64 v12, 0, v12, s[4:5]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[6:7], 32
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[6:7], 0, v[12:13]
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v12, v0, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v13, v1, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, v6, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, v7, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v6, v9, v3, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v0, v12, v0, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v1, v13, v1, s[6:7]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[6:7], v[2:3], 32
-; GFX8-NEXT: v_cndmask_b32_e64 v7, v14, v6, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v6, v8, v2, s[8:9]
-; GFX8-NEXT: v_cmp_lt_f64_e32 vcc, v[4:5], v[10:11]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[8:9], v[8:9], 32
-; GFX8-NEXT: v_cmp_o_f64_e64 s[4:5], v[4:5], v[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v6, 0, v6, s[10:11]
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[10:11], 0, v[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e32 v12, v11, v5, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v2, v2, v8, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v10, v4, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[4:5], 32
-; GFX8-NEXT: v_cndmask_b32_e64 v13, v14, v12, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v12, 0, v8, s[4:5]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[10:11], 32
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[12:13]
-; GFX8-NEXT: v_cndmask_b32_e64 v3, v3, v9, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v12, v4, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v5, v13, v5, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v4, v4, v10, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v5, v5, v11, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v4, v12, v4, s[12:13]
-; GFX8-NEXT: v_cndmask_b32_e64 v5, v13, v5, s[12:13]
+; GFX8-NEXT: v_min_f64 v[12:13], v[0:1], v[6:7]
+; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
+; GFX8-NEXT: v_min_f64 v[6:7], v[2:3], v[8:9]
+; GFX8-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[8:9]
+; GFX8-NEXT: v_min_f64 v[8:9], v[4:5], v[10:11]
+; GFX8-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[10:11]
+; GFX8-NEXT: v_mov_b32_e32 v5, 0x7ff80000
+; GFX8-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v1, v13, v5, vcc
+; GFX8-NEXT: v_cndmask_b32_e64 v2, v6, 0, s[4:5]
+; GFX8-NEXT: v_cndmask_b32_e64 v3, v7, v5, s[4:5]
+; GFX8-NEXT: v_cndmask_b32_e64 v4, v8, 0, s[6:7]
+; GFX8-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[6:7]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_minimum_v3f64:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[6:7]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[4:5], v[0:1], v[6:7]
-; GFX9-NEXT: v_mov_b32_e32 v14, 0x7ff80000
-; GFX9-NEXT: v_cmp_lt_f64_e64 s[8:9], v[2:3], v[8:9]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[10:11], v[2:3], v[8:9]
-; GFX9-NEXT: v_cndmask_b32_e32 v12, v7, v1, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v13, v14, v12, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e32 v12, v6, v0, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 32
-; GFX9-NEXT: v_cndmask_b32_e64 v12, 0, v12, s[4:5]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[6:7], 32
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[6:7], 0, v[12:13]
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v12, v0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v13, v1, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, v6, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v7, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v6, v9, v3, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v12, v0, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v13, v1, s[6:7]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[6:7], v[2:3], 32
-; GFX9-NEXT: v_cndmask_b32_e64 v7, v14, v6, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v6, v8, v2, s[8:9]
-; GFX9-NEXT: v_cmp_lt_f64_e32 vcc, v[4:5], v[10:11]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[8:9], v[8:9], 32
-; GFX9-NEXT: v_cmp_o_f64_e64 s[4:5], v[4:5], v[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, v6, s[10:11]
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[10:11], 0, v[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e32 v12, v11, v5, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, v8, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v10, v4, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[4:5], 32
-; GFX9-NEXT: v_cndmask_b32_e64 v13, v14, v12, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v12, 0, v8, s[4:5]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[10:11], 32
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[12:13]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v3, v9, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v12, v4, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v5, v13, v5, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v4, v10, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v5, v11, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v12, v4, s[12:13]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v13, v5, s[12:13]
+; GFX9-NEXT: v_min_f64 v[12:13], v[0:1], v[6:7]
+; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
+; GFX9-NEXT: v_min_f64 v[6:7], v[2:3], v[8:9]
+; GFX9-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[8:9]
+; GFX9-NEXT: v_min_f64 v[8:9], v[4:5], v[10:11]
+; GFX9-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[10:11]
+; GFX9-NEXT: v_mov_b32_e32 v5, 0x7ff80000
+; GFX9-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v13, v5, vcc
+; GFX9-NEXT: v_cndmask_b32_e64 v2, v6, 0, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_e64 v3, v7, v5, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_e64 v4, v8, 0, s[6:7]
+; GFX9-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[6:7]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_minimum_v3f64:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[6:7]
-; GFX940-NEXT: v_mov_b32_e32 v14, 0x7ff80000
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[0:1], v[6:7]
-; GFX940-NEXT: v_cndmask_b32_e32 v12, v7, v1, vcc
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v13, v14, v12, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v12, v6, v0, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v12, 0, v12, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 32
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[6:7], 32
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[12:13]
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v12, v0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v13, v1, vcc
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[2:3], v[8:9]
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v0, v6, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v1, v1, v7, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v9, v3, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[2:3], v[8:9]
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v12, v0, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v1, v13, v1, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v7, v14, v6, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v8, v2, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v6, 0, v6, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[2:3], 32
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[8:9], 32
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[6:7]
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v2, v2, v8, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[4:5], v[10:11]
-; GFX940-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v3, v3, v9, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v11, v5, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[4:5], v[10:11]
-; GFX940-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[2:3]
+; GFX940-NEXT: v_min_f64 v[12:13], v[0:1], v[6:7]
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
+; GFX940-NEXT: v_min_f64 v[6:7], v[2:3], v[8:9]
; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v7, v14, v6, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v10, v4, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v6, 0, v6, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[4:5], 32
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[10:11], 32
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[6:7]
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v6, v4, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v7, v5, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v4, v4, v10, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v5, v5, v11, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v4, v6, v4, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v5, v7, v5, s[2:3]
+; GFX940-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc
+; GFX940-NEXT: v_mov_b32_e32 v12, 0x7ff80000
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v13, v12, vcc
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[8:9]
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e64 v2, v6, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v3, v7, v12, vcc
+; GFX940-NEXT: v_min_f64 v[6:7], v[4:5], v[10:11]
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[10:11]
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e64 v4, v6, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v5, v7, v12, vcc
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v3f64:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_lt_f64_e32 vcc_lo, v[0:1], v[6:7]
-; GFX10-NEXT: v_cmp_lt_f64_e64 s4, v[2:3], v[8:9]
-; GFX10-NEXT: v_cmp_lt_f64_e64 s5, v[4:5], v[10:11]
-; GFX10-NEXT: v_cmp_o_f64_e64 s6, v[0:1], v[6:7]
-; GFX10-NEXT: v_cmp_o_f64_e64 s7, v[2:3], v[8:9]
-; GFX10-NEXT: v_cmp_o_f64_e64 s8, v[4:5], v[10:11]
-; GFX10-NEXT: v_cndmask_b32_e32 v12, v7, v1, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v14, v9, v3, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v16, v11, v5, s5
-; GFX10-NEXT: v_cndmask_b32_e32 v17, v6, v0, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v18, v8, v2, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v19, v10, v4, s5
-; GFX10-NEXT: v_cmp_class_f64_e64 vcc_lo, v[0:1], 32
-; GFX10-NEXT: v_cmp_class_f64_e64 s4, v[2:3], 32
-; GFX10-NEXT: v_cmp_class_f64_e64 s5, v[4:5], 32
-; GFX10-NEXT: v_cndmask_b32_e64 v13, 0x7ff80000, v12, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v15, 0x7ff80000, v14, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v12, 0, v17, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v17, 0x7ff80000, v16, s8
-; GFX10-NEXT: v_cndmask_b32_e64 v14, 0, v18, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v16, 0, v19, s8
-; GFX10-NEXT: v_cmp_class_f64_e64 s6, v[8:9], 32
-; GFX10-NEXT: v_cmp_class_f64_e64 s7, v[6:7], 32
-; GFX10-NEXT: v_cmp_class_f64_e64 s8, v[10:11], 32
-; GFX10-NEXT: v_cmp_eq_f64_e64 s9, 0, v[12:13]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s10, 0, v[14:15]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s11, 0, v[16:17]
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v12, v0, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v14, v2, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v4, v16, v4, s5
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v13, v1, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v15, v3, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v5, v17, v5, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, v8, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v6, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v4, v4, v10, s8
-; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, v7, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, v9, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v5, v5, v11, s8
-; GFX10-NEXT: v_cndmask_b32_e64 v0, v12, v0, s9
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v14, v2, s10
-; GFX10-NEXT: v_cndmask_b32_e64 v4, v16, v4, s11
-; GFX10-NEXT: v_cndmask_b32_e64 v1, v13, v1, s9
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v15, v3, s10
-; GFX10-NEXT: v_cndmask_b32_e64 v5, v17, v5, s11
+; GFX10-NEXT: v_min_f64 v[12:13], v[0:1], v[6:7]
+; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[6:7]
+; GFX10-NEXT: v_min_f64 v[6:7], v[2:3], v[8:9]
+; GFX10-NEXT: v_cmp_u_f64_e64 s4, v[2:3], v[8:9]
+; GFX10-NEXT: v_min_f64 v[8:9], v[4:5], v[10:11]
+; GFX10-NEXT: v_cmp_u_f64_e64 s5, v[4:5], v[10:11]
+; GFX10-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e64 v1, v13, 0x7ff80000, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e64 v2, v6, 0, s4
+; GFX10-NEXT: v_cndmask_b32_e64 v3, v7, 0x7ff80000, s4
+; GFX10-NEXT: v_cndmask_b32_e64 v4, v8, 0, s5
+; GFX10-NEXT: v_cndmask_b32_e64 v5, v9, 0x7ff80000, s5
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_minimum_v3f64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_lt_f64_e32 vcc_lo, v[0:1], v[6:7]
-; GFX11-NEXT: v_cmp_lt_f64_e64 s0, v[2:3], v[8:9]
-; GFX11-NEXT: v_cmp_lt_f64_e64 s1, v[4:5], v[10:11]
-; GFX11-NEXT: v_cmp_o_f64_e64 s2, v[0:1], v[6:7]
-; GFX11-NEXT: v_cmp_o_f64_e64 s3, v[2:3], v[8:9]
-; GFX11-NEXT: v_cmp_o_f64_e64 s4, v[4:5], v[10:11]
-; GFX11-NEXT: v_dual_cndmask_b32 v12, v7, v1 :: v_dual_cndmask_b32 v17, v6, v0
-; GFX11-NEXT: v_cndmask_b32_e64 v14, v9, v3, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v16, v11, v5, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v18, v8, v2, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v19, v10, v4, s1
-; GFX11-NEXT: v_cmp_class_f64_e64 vcc_lo, v[0:1], 32
-; GFX11-NEXT: v_cmp_class_f64_e64 s0, v[2:3], 32
-; GFX11-NEXT: v_cmp_class_f64_e64 s1, v[4:5], 32
-; GFX11-NEXT: v_cndmask_b32_e64 v13, 0x7ff80000, v12, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v15, 0x7ff80000, v14, s3
-; GFX11-NEXT: v_cndmask_b32_e64 v12, 0, v17, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v17, 0x7ff80000, v16, s4
-; GFX11-NEXT: v_cndmask_b32_e64 v14, 0, v18, s3
-; GFX11-NEXT: v_cndmask_b32_e64 v16, 0, v19, s4
-; GFX11-NEXT: v_cmp_class_f64_e64 s2, v[8:9], 32
-; GFX11-NEXT: v_cmp_class_f64_e64 s3, v[6:7], 32
-; GFX11-NEXT: v_cmp_class_f64_e64 s4, v[10:11], 32
-; GFX11-NEXT: v_cmp_eq_f64_e64 s5, 0, v[12:13]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s6, 0, v[14:15]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s7, 0, v[16:17]
-; GFX11-NEXT: v_dual_cndmask_b32 v0, v12, v0 :: v_dual_cndmask_b32 v1, v13, v1
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v14, v2, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v4, v16, v4, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v15, v3, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v5, v17, v5, s1
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, v8, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, v6, s3
-; GFX11-NEXT: v_cndmask_b32_e64 v4, v4, v10, s4
-; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, v7, s3
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v3, v9, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v5, v5, v11, s4
-; GFX11-NEXT: v_cndmask_b32_e64 v0, v12, v0, s5
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v14, v2, s6
-; GFX11-NEXT: v_cndmask_b32_e64 v4, v16, v4, s7
-; GFX11-NEXT: v_cndmask_b32_e64 v1, v13, v1, s5
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v15, v3, s6
-; GFX11-NEXT: v_cndmask_b32_e64 v5, v17, v5, s7
+; GFX11-NEXT: v_min_f64 v[12:13], v[0:1], v[6:7]
+; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[6:7]
+; GFX11-NEXT: v_min_f64 v[6:7], v[2:3], v[8:9]
+; GFX11-NEXT: v_cmp_u_f64_e64 s0, v[2:3], v[8:9]
+; GFX11-NEXT: v_min_f64 v[8:9], v[4:5], v[10:11]
+; GFX11-NEXT: v_cmp_u_f64_e64 s1, v[4:5], v[10:11]
+; GFX11-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e64 v1, v13, 0x7ff80000, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e64 v2, v6, 0, s0
+; GFX11-NEXT: v_cndmask_b32_e64 v3, v7, 0x7ff80000, s0
+; GFX11-NEXT: v_cndmask_b32_e64 v4, v8, 0, s1
+; GFX11-NEXT: v_cndmask_b32_e64 v5, v9, 0x7ff80000, s1
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_v3f64:
@@ -1875,247 +1104,49 @@ define <3 x double> @v_minimum_v3f64__nnan(<3 x double> %src0, <3 x double> %src
; GFX7-LABEL: v_minimum_v3f64__nnan:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[6:7]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[6:7], 32
-; GFX7-NEXT: v_cmp_lt_f64_e64 s[8:9], v[2:3], v[8:9]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[10:11], v[10:11], 32
-; GFX7-NEXT: v_cndmask_b32_e32 v13, v7, v1, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v12, v6, v0, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 32
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[6:7], 0, v[12:13]
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v12, v0, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v13, v1, vcc
-; GFX7-NEXT: v_cmp_lt_f64_e32 vcc, v[4:5], v[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v0, v0, v6, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v1, v1, v7, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v0, v12, v0, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v1, v13, v1, s[6:7]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[2:3], 32
-; GFX7-NEXT: v_cndmask_b32_e64 v7, v9, v3, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v6, v8, v2, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e32 v13, v11, v5, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v12, v10, v4, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[4:5], 32
-; GFX7-NEXT: v_cmp_class_f64_e64 s[6:7], v[8:9], 32
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[8:9], 0, v[6:7]
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[12:13]
-; GFX7-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e32 v4, v12, v4, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v5, v13, v5, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v2, v2, v8, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v4, v4, v10, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v3, v3, v9, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v5, v5, v11, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v4, v12, v4, s[12:13]
-; GFX7-NEXT: v_cndmask_b32_e64 v5, v13, v5, s[12:13]
+; GFX7-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7]
+; GFX7-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9]
+; GFX7-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_minimum_v3f64__nnan:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[6:7]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[6:7], 32
-; GFX8-NEXT: v_cmp_lt_f64_e64 s[8:9], v[2:3], v[8:9]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[10:11], v[10:11], 32
-; GFX8-NEXT: v_cndmask_b32_e32 v13, v7, v1, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v12, v6, v0, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 32
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[6:7], 0, v[12:13]
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v12, v0, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v13, v1, vcc
-; GFX8-NEXT: v_cmp_lt_f64_e32 vcc, v[4:5], v[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, v6, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, v7, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v0, v12, v0, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v1, v13, v1, s[6:7]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[2:3], 32
-; GFX8-NEXT: v_cndmask_b32_e64 v7, v9, v3, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v6, v8, v2, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e32 v13, v11, v5, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v12, v10, v4, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[4:5], 32
-; GFX8-NEXT: v_cmp_class_f64_e64 s[6:7], v[8:9], 32
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[8:9], 0, v[6:7]
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[12:13]
-; GFX8-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v12, v4, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v5, v13, v5, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v2, v2, v8, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v4, v4, v10, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v3, v3, v9, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v5, v5, v11, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v4, v12, v4, s[12:13]
-; GFX8-NEXT: v_cndmask_b32_e64 v5, v13, v5, s[12:13]
+; GFX8-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7]
+; GFX8-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9]
+; GFX8-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_minimum_v3f64__nnan:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[6:7]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[6:7], 32
-; GFX9-NEXT: v_cmp_lt_f64_e64 s[8:9], v[2:3], v[8:9]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[10:11], v[10:11], 32
-; GFX9-NEXT: v_cndmask_b32_e32 v13, v7, v1, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v12, v6, v0, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 32
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[6:7], 0, v[12:13]
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v12, v0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v13, v1, vcc
-; GFX9-NEXT: v_cmp_lt_f64_e32 vcc, v[4:5], v[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, v6, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v7, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v12, v0, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v13, v1, s[6:7]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[2:3], 32
-; GFX9-NEXT: v_cndmask_b32_e64 v7, v9, v3, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v6, v8, v2, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e32 v13, v11, v5, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v12, v10, v4, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[4:5], 32
-; GFX9-NEXT: v_cmp_class_f64_e64 s[6:7], v[8:9], 32
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[8:9], 0, v[6:7]
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[12:13]
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v12, v4, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v5, v13, v5, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, v8, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v4, v10, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v3, v9, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v5, v11, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v12, v4, s[12:13]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v13, v5, s[12:13]
+; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7]
+; GFX9-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9]
+; GFX9-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_minimum_v3f64__nnan:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[6:7]
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[6:7], 32
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v13, v7, v1, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v12, v6, v0, vcc
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 32
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[12:13]
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v12, v0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v13, v1, vcc
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[2:3], v[8:9]
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v0, v6, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v1, v1, v7, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v7, v9, v3, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v8, v2, vcc
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[2:3], 32
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[8:9], 32
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v12, v0, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v1, v13, v1, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v2, v2, v8, s[0:1]
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[6:7]
-; GFX940-NEXT: v_cndmask_b32_e64 v3, v3, v9, s[0:1]
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[4:5], v[10:11]
-; GFX940-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e32 v7, v11, v5, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v10, v4, vcc
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[4:5], 32
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[10:11], 32
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[6:7]
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v6, v4, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v7, v5, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v4, v4, v10, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v5, v5, v11, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v4, v6, v4, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v5, v7, v5, s[2:3]
+; GFX940-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7]
+; GFX940-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9]
+; GFX940-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v3f64__nnan:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_lt_f64_e32 vcc_lo, v[0:1], v[6:7]
-; GFX10-NEXT: v_cmp_lt_f64_e64 s4, v[2:3], v[8:9]
-; GFX10-NEXT: v_cmp_lt_f64_e64 s5, v[4:5], v[10:11]
-; GFX10-NEXT: v_cmp_class_f64_e64 s6, v[8:9], 32
-; GFX10-NEXT: v_cmp_class_f64_e64 s7, v[6:7], 32
-; GFX10-NEXT: v_cmp_class_f64_e64 s8, v[10:11], 32
-; GFX10-NEXT: v_cndmask_b32_e32 v13, v7, v1, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v15, v9, v3, s4
-; GFX10-NEXT: v_cndmask_b32_e32 v12, v6, v0, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v17, v11, v5, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v14, v8, v2, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v16, v10, v4, s5
-; GFX10-NEXT: v_cmp_class_f64_e64 vcc_lo, v[0:1], 32
-; GFX10-NEXT: v_cmp_class_f64_e64 s4, v[2:3], 32
-; GFX10-NEXT: v_cmp_class_f64_e64 s5, v[4:5], 32
-; GFX10-NEXT: v_cmp_eq_f64_e64 s9, 0, v[12:13]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s10, 0, v[14:15]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s11, 0, v[16:17]
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v12, v0, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v14, v2, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v4, v16, v4, s5
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v13, v1, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v15, v3, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v5, v17, v5, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v6, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, v8, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v4, v4, v10, s8
-; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, v7, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, v9, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v5, v5, v11, s8
-; GFX10-NEXT: v_cndmask_b32_e64 v0, v12, v0, s9
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v14, v2, s10
-; GFX10-NEXT: v_cndmask_b32_e64 v4, v16, v4, s11
-; GFX10-NEXT: v_cndmask_b32_e64 v1, v13, v1, s9
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v15, v3, s10
-; GFX10-NEXT: v_cndmask_b32_e64 v5, v17, v5, s11
+; GFX10-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7]
+; GFX10-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9]
+; GFX10-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_minimum_v3f64__nnan:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_lt_f64_e32 vcc_lo, v[0:1], v[6:7]
-; GFX11-NEXT: v_cmp_lt_f64_e64 s0, v[2:3], v[8:9]
-; GFX11-NEXT: v_cmp_lt_f64_e64 s1, v[4:5], v[10:11]
-; GFX11-NEXT: v_cmp_class_f64_e64 s2, v[8:9], 32
-; GFX11-NEXT: v_cmp_class_f64_e64 s3, v[6:7], 32
-; GFX11-NEXT: v_cmp_class_f64_e64 s4, v[10:11], 32
-; GFX11-NEXT: v_dual_cndmask_b32 v13, v7, v1 :: v_dual_cndmask_b32 v12, v6, v0
-; GFX11-NEXT: v_cndmask_b32_e64 v15, v9, v3, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v17, v11, v5, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v14, v8, v2, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v16, v10, v4, s1
-; GFX11-NEXT: v_cmp_class_f64_e64 vcc_lo, v[0:1], 32
-; GFX11-NEXT: v_cmp_class_f64_e64 s0, v[2:3], 32
-; GFX11-NEXT: v_cmp_class_f64_e64 s1, v[4:5], 32
-; GFX11-NEXT: v_cmp_eq_f64_e64 s5, 0, v[12:13]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s6, 0, v[14:15]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s7, 0, v[16:17]
-; GFX11-NEXT: v_dual_cndmask_b32 v0, v12, v0 :: v_dual_cndmask_b32 v1, v13, v1
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v14, v2, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v4, v16, v4, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v15, v3, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v5, v17, v5, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, v6, s3
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, v8, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v4, v4, v10, s4
-; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, v7, s3
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v3, v9, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v5, v5, v11, s4
-; GFX11-NEXT: v_cndmask_b32_e64 v0, v12, v0, s5
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v14, v2, s6
-; GFX11-NEXT: v_cndmask_b32_e64 v4, v16, v4, s7
-; GFX11-NEXT: v_cndmask_b32_e64 v1, v13, v1, s5
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v15, v3, s6
-; GFX11-NEXT: v_cndmask_b32_e64 v5, v17, v5, s7
+; GFX11-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7]
+; GFX11-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9]
+; GFX11-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_v3f64__nnan:
@@ -2137,144 +1168,110 @@ define <3 x double> @v_minimum_v3f64__nsz(<3 x double> %src0, <3 x double> %src1
; GFX7-LABEL: v_minimum_v3f64__nsz:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[6:7]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[4:5], v[0:1], v[6:7]
-; GFX7-NEXT: v_cmp_lt_f64_e64 s[8:9], v[4:5], v[10:11]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[6:7], v[2:3], v[8:9]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[10:11], v[4:5], v[10:11]
-; GFX7-NEXT: v_cndmask_b32_e32 v12, v6, v0, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc
-; GFX7-NEXT: v_cmp_lt_f64_e32 vcc, v[2:3], v[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v7, v10, v4, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v5, v11, v5, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v0, 0, v12, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v4, 0, v7, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e32 v6, v8, v2, vcc
-; GFX7-NEXT: v_mov_b32_e32 v8, 0x7ff80000
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v1, v8, v1, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v2, 0, v6, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v3, v8, v3, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v5, v8, v5, s[10:11]
+; GFX7-NEXT: v_min_f64 v[12:13], v[0:1], v[6:7]
+; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
+; GFX7-NEXT: v_min_f64 v[6:7], v[2:3], v[8:9]
+; GFX7-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[8:9]
+; GFX7-NEXT: v_min_f64 v[8:9], v[4:5], v[10:11]
+; GFX7-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[10:11]
+; GFX7-NEXT: v_mov_b32_e32 v5, 0x7ff80000
+; GFX7-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc
+; GFX7-NEXT: v_cndmask_b32_e32 v1, v13, v5, vcc
+; GFX7-NEXT: v_cndmask_b32_e64 v2, v6, 0, s[4:5]
+; GFX7-NEXT: v_cndmask_b32_e64 v3, v7, v5, s[4:5]
+; GFX7-NEXT: v_cndmask_b32_e64 v4, v8, 0, s[6:7]
+; GFX7-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[6:7]
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_minimum_v3f64__nsz:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[6:7]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[4:5], v[0:1], v[6:7]
-; GFX8-NEXT: v_cmp_lt_f64_e64 s[8:9], v[4:5], v[10:11]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[6:7], v[2:3], v[8:9]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[10:11], v[4:5], v[10:11]
-; GFX8-NEXT: v_cndmask_b32_e32 v12, v6, v0, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc
-; GFX8-NEXT: v_cmp_lt_f64_e32 vcc, v[2:3], v[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v7, v10, v4, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v5, v11, v5, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v0, 0, v12, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v4, 0, v7, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e32 v6, v8, v2, vcc
-; GFX8-NEXT: v_mov_b32_e32 v8, 0x7ff80000
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v1, v8, v1, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, v6, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v3, v8, v3, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v5, v8, v5, s[10:11]
+; GFX8-NEXT: v_min_f64 v[12:13], v[0:1], v[6:7]
+; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
+; GFX8-NEXT: v_min_f64 v[6:7], v[2:3], v[8:9]
+; GFX8-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[8:9]
+; GFX8-NEXT: v_min_f64 v[8:9], v[4:5], v[10:11]
+; GFX8-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[10:11]
+; GFX8-NEXT: v_mov_b32_e32 v5, 0x7ff80000
+; GFX8-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v1, v13, v5, vcc
+; GFX8-NEXT: v_cndmask_b32_e64 v2, v6, 0, s[4:5]
+; GFX8-NEXT: v_cndmask_b32_e64 v3, v7, v5, s[4:5]
+; GFX8-NEXT: v_cndmask_b32_e64 v4, v8, 0, s[6:7]
+; GFX8-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[6:7]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_minimum_v3f64__nsz:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[6:7]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[4:5], v[0:1], v[6:7]
-; GFX9-NEXT: v_cmp_lt_f64_e64 s[8:9], v[4:5], v[10:11]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[6:7], v[2:3], v[8:9]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[10:11], v[4:5], v[10:11]
-; GFX9-NEXT: v_cndmask_b32_e32 v12, v6, v0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc
-; GFX9-NEXT: v_cmp_lt_f64_e32 vcc, v[2:3], v[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v7, v10, v4, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v11, v5, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, v12, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, v7, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v8, v2, vcc
-; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v8, v1, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, v6, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v8, v3, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v8, v5, s[10:11]
+; GFX9-NEXT: v_min_f64 v[12:13], v[0:1], v[6:7]
+; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
+; GFX9-NEXT: v_min_f64 v[6:7], v[2:3], v[8:9]
+; GFX9-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[8:9]
+; GFX9-NEXT: v_min_f64 v[8:9], v[4:5], v[10:11]
+; GFX9-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[10:11]
+; GFX9-NEXT: v_mov_b32_e32 v5, 0x7ff80000
+; GFX9-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v13, v5, vcc
+; GFX9-NEXT: v_cndmask_b32_e64 v2, v6, 0, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_e64 v3, v7, v5, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_e64 v4, v8, 0, s[6:7]
+; GFX9-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[6:7]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_minimum_v3f64__nsz:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[6:7]
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[0:1], v[6:7]
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v12, v6, v0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc
-; GFX940-NEXT: v_mov_b32_e32 v6, 0x7ff80000
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[2:3], v[8:9]
-; GFX940-NEXT: v_cndmask_b32_e64 v0, 0, v12, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v1, v6, v1, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v7, v8, v2, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[2:3], v[8:9]
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[4:5], v[10:11]
-; GFX940-NEXT: v_cndmask_b32_e64 v2, 0, v7, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v3, v6, v3, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v7, v10, v4, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[4:5], v[10:11]
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v11, v5, vcc
+; GFX940-NEXT: v_min_f64 v[12:13], v[0:1], v[6:7]
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
+; GFX940-NEXT: v_min_f64 v[6:7], v[2:3], v[8:9]
; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v4, 0, v7, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v5, v6, v5, s[0:1]
+; GFX940-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc
+; GFX940-NEXT: v_mov_b32_e32 v12, 0x7ff80000
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v13, v12, vcc
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[8:9]
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e64 v2, v6, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v3, v7, v12, vcc
+; GFX940-NEXT: v_min_f64 v[6:7], v[4:5], v[10:11]
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[10:11]
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e64 v4, v6, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v5, v7, v12, vcc
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v3f64__nsz:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_lt_f64_e32 vcc_lo, v[0:1], v[6:7]
-; GFX10-NEXT: v_cmp_lt_f64_e64 s4, v[2:3], v[8:9]
-; GFX10-NEXT: v_cmp_lt_f64_e64 s5, v[4:5], v[10:11]
-; GFX10-NEXT: v_cmp_o_f64_e64 s6, v[0:1], v[6:7]
-; GFX10-NEXT: v_cmp_o_f64_e64 s7, v[2:3], v[8:9]
-; GFX10-NEXT: v_cmp_o_f64_e64 s8, v[4:5], v[10:11]
-; GFX10-NEXT: v_cndmask_b32_e32 v12, v6, v0, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v6, v8, v2, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v8, v10, v4, s5
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v9, v3, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v5, v11, v5, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, v12, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, v6, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v4, 0, v8, s8
-; GFX10-NEXT: v_cndmask_b32_e64 v1, 0x7ff80000, v1, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v3, 0x7ff80000, v3, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v5, 0x7ff80000, v5, s8
+; GFX10-NEXT: v_min_f64 v[12:13], v[0:1], v[6:7]
+; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[6:7]
+; GFX10-NEXT: v_min_f64 v[6:7], v[2:3], v[8:9]
+; GFX10-NEXT: v_cmp_u_f64_e64 s4, v[2:3], v[8:9]
+; GFX10-NEXT: v_min_f64 v[8:9], v[4:5], v[10:11]
+; GFX10-NEXT: v_cmp_u_f64_e64 s5, v[4:5], v[10:11]
+; GFX10-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e64 v1, v13, 0x7ff80000, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e64 v2, v6, 0, s4
+; GFX10-NEXT: v_cndmask_b32_e64 v3, v7, 0x7ff80000, s4
+; GFX10-NEXT: v_cndmask_b32_e64 v4, v8, 0, s5
+; GFX10-NEXT: v_cndmask_b32_e64 v5, v9, 0x7ff80000, s5
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_minimum_v3f64__nsz:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_lt_f64_e32 vcc_lo, v[0:1], v[6:7]
-; GFX11-NEXT: v_cmp_lt_f64_e64 s0, v[2:3], v[8:9]
-; GFX11-NEXT: v_cmp_lt_f64_e64 s1, v[4:5], v[10:11]
-; GFX11-NEXT: v_cmp_o_f64_e64 s2, v[0:1], v[6:7]
-; GFX11-NEXT: v_cmp_o_f64_e64 s3, v[2:3], v[8:9]
-; GFX11-NEXT: v_cmp_o_f64_e64 s4, v[4:5], v[10:11]
-; GFX11-NEXT: v_dual_cndmask_b32 v12, v6, v0 :: v_dual_cndmask_b32 v1, v7, v1
-; GFX11-NEXT: v_cndmask_b32_e64 v6, v8, v2, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v8, v10, v4, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v9, v3, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v5, v11, v5, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, v12, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, v6, s3
-; GFX11-NEXT: v_cndmask_b32_e64 v4, 0, v8, s4
-; GFX11-NEXT: v_cndmask_b32_e64 v1, 0x7ff80000, v1, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v3, 0x7ff80000, v3, s3
-; GFX11-NEXT: v_cndmask_b32_e64 v5, 0x7ff80000, v5, s4
+; GFX11-NEXT: v_min_f64 v[12:13], v[0:1], v[6:7]
+; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[6:7]
+; GFX11-NEXT: v_min_f64 v[6:7], v[2:3], v[8:9]
+; GFX11-NEXT: v_cmp_u_f64_e64 s0, v[2:3], v[8:9]
+; GFX11-NEXT: v_min_f64 v[8:9], v[4:5], v[10:11]
+; GFX11-NEXT: v_cmp_u_f64_e64 s1, v[4:5], v[10:11]
+; GFX11-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e64 v1, v13, 0x7ff80000, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e64 v2, v6, 0, s0
+; GFX11-NEXT: v_cndmask_b32_e64 v3, v7, 0x7ff80000, s0
+; GFX11-NEXT: v_cndmask_b32_e64 v4, v8, 0, s1
+; GFX11-NEXT: v_cndmask_b32_e64 v5, v9, 0x7ff80000, s1
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_v3f64__nsz:
@@ -2296,88 +1293,49 @@ define <3 x double> @v_minimum_v3f64__nnan_nsz(<3 x double> %src0, <3 x double>
; GFX7-LABEL: v_minimum_v3f64__nnan_nsz:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[6:7]
-; GFX7-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[8:9]
-; GFX7-NEXT: v_cmp_lt_f64_e64 s[6:7], v[4:5], v[10:11]
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v2, v8, v2, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v3, v9, v3, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v4, v10, v4, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v5, v11, v5, s[6:7]
+; GFX7-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7]
+; GFX7-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9]
+; GFX7-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_minimum_v3f64__nnan_nsz:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[6:7]
-; GFX8-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[8:9]
-; GFX8-NEXT: v_cmp_lt_f64_e64 s[6:7], v[4:5], v[10:11]
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v2, v8, v2, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v3, v9, v3, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v4, v10, v4, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v5, v11, v5, s[6:7]
+; GFX8-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7]
+; GFX8-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9]
+; GFX8-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_minimum_v3f64__nnan_nsz:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[6:7]
-; GFX9-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[8:9]
-; GFX9-NEXT: v_cmp_lt_f64_e64 s[6:7], v[4:5], v[10:11]
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v8, v2, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v9, v3, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v10, v4, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v11, v5, s[6:7]
+; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7]
+; GFX9-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9]
+; GFX9-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_minimum_v3f64__nnan_nsz:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[6:7]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[2:3], v[8:9]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[4:5], v[10:11]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v10, v4, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v11, v5, vcc
+; GFX940-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7]
+; GFX940-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9]
+; GFX940-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v3f64__nnan_nsz:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_lt_f64_e32 vcc_lo, v[0:1], v[6:7]
-; GFX10-NEXT: v_cmp_lt_f64_e64 s4, v[2:3], v[8:9]
-; GFX10-NEXT: v_cmp_lt_f64_e64 s5, v[4:5], v[10:11]
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v8, v2, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v4, v10, v4, s5
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v9, v3, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v5, v11, v5, s5
+; GFX10-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7]
+; GFX10-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9]
+; GFX10-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_minimum_v3f64__nnan_nsz:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_lt_f64_e32 vcc_lo, v[0:1], v[6:7]
-; GFX11-NEXT: v_cmp_lt_f64_e64 s0, v[2:3], v[8:9]
-; GFX11-NEXT: v_cmp_lt_f64_e64 s1, v[4:5], v[10:11]
-; GFX11-NEXT: v_dual_cndmask_b32 v0, v6, v0 :: v_dual_cndmask_b32 v1, v7, v1
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v8, v2, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v4, v10, v4, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v9, v3, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v5, v11, v5, s1
+; GFX11-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7]
+; GFX11-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9]
+; GFX11-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_v3f64__nnan_nsz:
@@ -2399,404 +1357,135 @@ define <4 x double> @v_minimum_v4f64(<4 x double> %src0, <4 x double> %src1) {
; GFX7-LABEL: v_minimum_v4f64:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[8:9]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[4:5], v[0:1], v[8:9]
-; GFX7-NEXT: v_mov_b32_e32 v18, 0x7ff80000
-; GFX7-NEXT: v_cmp_lt_f64_e64 s[6:7], v[2:3], v[10:11]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[8:9], v[2:3], v[10:11]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[12:13], v[4:5], v[12:13]
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v9, v1, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v17, v18, v16, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v8, v0, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 32
-; GFX7-NEXT: v_cndmask_b32_e64 v16, 0, v16, s[4:5]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[8:9], 32
-; GFX7-NEXT: v_cndmask_b32_e64 v19, v11, v3, s[6:7]
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[10:11], 0, v[16:17]
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v17, v1, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[2:3], 32
-; GFX7-NEXT: v_cndmask_b32_e64 v0, v0, v8, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v1, v1, v9, s[4:5]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[10:11], 32
-; GFX7-NEXT: v_cndmask_b32_e64 v8, v10, v2, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v9, v18, v19, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[8:9]
-; GFX7-NEXT: v_cmp_lt_f64_e64 s[8:9], v[4:5], v[12:13]
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[6:7], 0, v[8:9]
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc
-; GFX7-NEXT: v_cmp_lt_f64_e32 vcc, v[6:7], v[14:15]
-; GFX7-NEXT: v_cndmask_b32_e64 v2, v2, v10, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v3, v3, v11, s[4:5]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[4:5], v[6:7], v[14:15]
-; GFX7-NEXT: v_cndmask_b32_e64 v10, v13, v5, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v0, v16, v0, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v3, v9, v3, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v9, v18, v10, s[12:13]
-; GFX7-NEXT: v_cndmask_b32_e32 v10, v15, v7, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v2, v8, v2, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v8, v12, v4, s[8:9]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[6:7], v[4:5], 32
-; GFX7-NEXT: v_cndmask_b32_e64 v11, v18, v10, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e32 v10, v14, v6, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[6:7], 32
-; GFX7-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[12:13]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[8:9], v[12:13], 32
-; GFX7-NEXT: v_cndmask_b32_e64 v10, 0, v10, s[4:5]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[14:15], 32
-; GFX7-NEXT: v_cndmask_b32_e64 v1, v17, v1, s[10:11]
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[10:11], 0, v[8:9]
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v4, v8, v4, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e32 v6, v10, v6, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e32 v7, v11, v7, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v4, v4, v12, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v6, v6, v14, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v5, v5, v13, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v7, v7, v15, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v4, v8, v4, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v6, v10, v6, s[12:13]
-; GFX7-NEXT: v_cndmask_b32_e64 v7, v11, v7, s[12:13]
+; GFX7-NEXT: v_min_f64 v[16:17], v[0:1], v[8:9]
+; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
+; GFX7-NEXT: v_min_f64 v[8:9], v[2:3], v[10:11]
+; GFX7-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[10:11]
+; GFX7-NEXT: v_min_f64 v[10:11], v[4:5], v[12:13]
+; GFX7-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[12:13]
+; GFX7-NEXT: v_min_f64 v[12:13], v[6:7], v[14:15]
+; GFX7-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[14:15]
+; GFX7-NEXT: v_mov_b32_e32 v7, 0x7ff80000
+; GFX7-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc
+; GFX7-NEXT: v_cndmask_b32_e32 v1, v17, v7, vcc
+; GFX7-NEXT: v_cndmask_b32_e64 v2, v8, 0, s[4:5]
+; GFX7-NEXT: v_cndmask_b32_e64 v3, v9, v7, s[4:5]
+; GFX7-NEXT: v_cndmask_b32_e64 v4, v10, 0, s[6:7]
+; GFX7-NEXT: v_cndmask_b32_e64 v5, v11, v7, s[6:7]
+; GFX7-NEXT: v_cndmask_b32_e64 v6, v12, 0, s[8:9]
+; GFX7-NEXT: v_cndmask_b32_e64 v7, v13, v7, s[8:9]
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_minimum_v4f64:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[8:9]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[4:5], v[0:1], v[8:9]
-; GFX8-NEXT: v_mov_b32_e32 v18, 0x7ff80000
-; GFX8-NEXT: v_cmp_lt_f64_e64 s[6:7], v[2:3], v[10:11]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[8:9], v[2:3], v[10:11]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[12:13], v[4:5], v[12:13]
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v9, v1, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v17, v18, v16, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v8, v0, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 32
-; GFX8-NEXT: v_cndmask_b32_e64 v16, 0, v16, s[4:5]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[8:9], 32
-; GFX8-NEXT: v_cndmask_b32_e64 v19, v11, v3, s[6:7]
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[10:11], 0, v[16:17]
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v17, v1, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[2:3], 32
-; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, v8, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, v9, s[4:5]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[10:11], 32
-; GFX8-NEXT: v_cndmask_b32_e64 v8, v10, v2, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v9, v18, v19, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[8:9]
-; GFX8-NEXT: v_cmp_lt_f64_e64 s[8:9], v[4:5], v[12:13]
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[6:7], 0, v[8:9]
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc
-; GFX8-NEXT: v_cmp_lt_f64_e32 vcc, v[6:7], v[14:15]
-; GFX8-NEXT: v_cndmask_b32_e64 v2, v2, v10, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v3, v3, v11, s[4:5]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[4:5], v[6:7], v[14:15]
-; GFX8-NEXT: v_cndmask_b32_e64 v10, v13, v5, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v0, v16, v0, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v3, v9, v3, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v9, v18, v10, s[12:13]
-; GFX8-NEXT: v_cndmask_b32_e32 v10, v15, v7, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v2, v8, v2, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v8, v12, v4, s[8:9]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[6:7], v[4:5], 32
-; GFX8-NEXT: v_cndmask_b32_e64 v11, v18, v10, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e32 v10, v14, v6, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[6:7], 32
-; GFX8-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[12:13]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[8:9], v[12:13], 32
-; GFX8-NEXT: v_cndmask_b32_e64 v10, 0, v10, s[4:5]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[14:15], 32
-; GFX8-NEXT: v_cndmask_b32_e64 v1, v17, v1, s[10:11]
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[10:11], 0, v[8:9]
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v4, v8, v4, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e32 v6, v10, v6, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e32 v7, v11, v7, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v4, v4, v12, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v6, v6, v14, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v5, v5, v13, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v7, v7, v15, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v4, v8, v4, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v6, v10, v6, s[12:13]
-; GFX8-NEXT: v_cndmask_b32_e64 v7, v11, v7, s[12:13]
+; GFX8-NEXT: v_min_f64 v[16:17], v[0:1], v[8:9]
+; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
+; GFX8-NEXT: v_min_f64 v[8:9], v[2:3], v[10:11]
+; GFX8-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[10:11]
+; GFX8-NEXT: v_min_f64 v[10:11], v[4:5], v[12:13]
+; GFX8-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[12:13]
+; GFX8-NEXT: v_min_f64 v[12:13], v[6:7], v[14:15]
+; GFX8-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[14:15]
+; GFX8-NEXT: v_mov_b32_e32 v7, 0x7ff80000
+; GFX8-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v1, v17, v7, vcc
+; GFX8-NEXT: v_cndmask_b32_e64 v2, v8, 0, s[4:5]
+; GFX8-NEXT: v_cndmask_b32_e64 v3, v9, v7, s[4:5]
+; GFX8-NEXT: v_cndmask_b32_e64 v4, v10, 0, s[6:7]
+; GFX8-NEXT: v_cndmask_b32_e64 v5, v11, v7, s[6:7]
+; GFX8-NEXT: v_cndmask_b32_e64 v6, v12, 0, s[8:9]
+; GFX8-NEXT: v_cndmask_b32_e64 v7, v13, v7, s[8:9]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_minimum_v4f64:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[8:9]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[4:5], v[0:1], v[8:9]
-; GFX9-NEXT: v_mov_b32_e32 v18, 0x7ff80000
-; GFX9-NEXT: v_cmp_lt_f64_e64 s[6:7], v[2:3], v[10:11]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[8:9], v[2:3], v[10:11]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[12:13], v[4:5], v[12:13]
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v9, v1, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v17, v18, v16, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v8, v0, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 32
-; GFX9-NEXT: v_cndmask_b32_e64 v16, 0, v16, s[4:5]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[8:9], 32
-; GFX9-NEXT: v_cndmask_b32_e64 v19, v11, v3, s[6:7]
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[10:11], 0, v[16:17]
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v17, v1, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[2:3], 32
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, v8, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v9, s[4:5]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[10:11], 32
-; GFX9-NEXT: v_cndmask_b32_e64 v8, v10, v2, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v9, v18, v19, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[8:9]
-; GFX9-NEXT: v_cmp_lt_f64_e64 s[8:9], v[4:5], v[12:13]
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[6:7], 0, v[8:9]
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc
-; GFX9-NEXT: v_cmp_lt_f64_e32 vcc, v[6:7], v[14:15]
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, v10, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v3, v11, s[4:5]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[4:5], v[6:7], v[14:15]
-; GFX9-NEXT: v_cndmask_b32_e64 v10, v13, v5, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v16, v0, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v9, v3, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v9, v18, v10, s[12:13]
-; GFX9-NEXT: v_cndmask_b32_e32 v10, v15, v7, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v8, v2, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v8, v12, v4, s[8:9]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[6:7], v[4:5], 32
-; GFX9-NEXT: v_cndmask_b32_e64 v11, v18, v10, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e32 v10, v14, v6, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[6:7], 32
-; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[12:13]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[8:9], v[12:13], 32
-; GFX9-NEXT: v_cndmask_b32_e64 v10, 0, v10, s[4:5]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[14:15], 32
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v17, v1, s[10:11]
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[10:11], 0, v[8:9]
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v8, v4, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v10, v6, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e32 v7, v11, v7, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v4, v12, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v6, v6, v14, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v5, v13, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v7, v7, v15, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v8, v4, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v6, v10, v6, s[12:13]
-; GFX9-NEXT: v_cndmask_b32_e64 v7, v11, v7, s[12:13]
+; GFX9-NEXT: v_min_f64 v[16:17], v[0:1], v[8:9]
+; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
+; GFX9-NEXT: v_min_f64 v[8:9], v[2:3], v[10:11]
+; GFX9-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[10:11]
+; GFX9-NEXT: v_min_f64 v[10:11], v[4:5], v[12:13]
+; GFX9-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[12:13]
+; GFX9-NEXT: v_min_f64 v[12:13], v[6:7], v[14:15]
+; GFX9-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[14:15]
+; GFX9-NEXT: v_mov_b32_e32 v7, 0x7ff80000
+; GFX9-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v17, v7, vcc
+; GFX9-NEXT: v_cndmask_b32_e64 v2, v8, 0, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_e64 v3, v9, v7, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_e64 v4, v10, 0, s[6:7]
+; GFX9-NEXT: v_cndmask_b32_e64 v5, v11, v7, s[6:7]
+; GFX9-NEXT: v_cndmask_b32_e64 v6, v12, 0, s[8:9]
+; GFX9-NEXT: v_cndmask_b32_e64 v7, v13, v7, s[8:9]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_minimum_v4f64:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[8:9]
-; GFX940-NEXT: v_mov_b32_e32 v18, 0x7ff80000
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[0:1], v[8:9]
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v9, v1, vcc
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v17, v18, v16, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v8, v0, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v16, 0, v16, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 32
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[8:9], 32
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[16:17]
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v17, v1, vcc
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[2:3], v[10:11]
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v0, v8, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v1, v1, v9, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v11, v3, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[2:3], v[10:11]
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v16, v0, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v1, v17, v1, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v9, v18, v8, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v10, v2, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[2:3], 32
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[10:11], 32
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[8:9]
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v2, v2, v10, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[4:5], v[12:13]
-; GFX940-NEXT: v_cndmask_b32_e64 v2, v8, v2, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v3, v3, v11, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v13, v5, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[4:5], v[12:13]
-; GFX940-NEXT: v_cndmask_b32_e64 v3, v9, v3, s[2:3]
+; GFX940-NEXT: v_min_f64 v[16:17], v[0:1], v[8:9]
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
+; GFX940-NEXT: v_min_f64 v[8:9], v[2:3], v[10:11]
; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v9, v18, v8, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v12, v4, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[4:5], 32
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[12:13], 32
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[8:9]
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v4, v4, v12, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v9, v5, vcc
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[6:7], v[14:15]
-; GFX940-NEXT: v_cndmask_b32_e64 v4, v8, v4, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v5, v5, v13, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v15, v7, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[6:7], v[14:15]
-; GFX940-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[2:3]
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v9, v18, v8, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v14, v6, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[6:7], 32
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[14:15], 32
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[8:9]
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v8, v6, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v7, v9, v7, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v6, v6, v14, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v7, v7, v15, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v6, v8, v6, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v7, v9, v7, s[2:3]
+; GFX940-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc
+; GFX940-NEXT: v_mov_b32_e32 v16, 0x7ff80000
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v17, v16, vcc
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[10:11]
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e64 v2, v8, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v3, v9, v16, vcc
+; GFX940-NEXT: v_min_f64 v[8:9], v[4:5], v[12:13]
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[12:13]
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e64 v4, v8, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v5, v9, v16, vcc
+; GFX940-NEXT: v_min_f64 v[8:9], v[6:7], v[14:15]
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[6:7], v[14:15]
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e64 v6, v8, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v7, v9, v16, vcc
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v4f64:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_lt_f64_e32 vcc_lo, v[0:1], v[8:9]
-; GFX10-NEXT: v_cmp_lt_f64_e64 s4, v[2:3], v[10:11]
-; GFX10-NEXT: v_cmp_o_f64_e64 s5, v[0:1], v[8:9]
-; GFX10-NEXT: v_cmp_lt_f64_e64 s6, v[4:5], v[12:13]
-; GFX10-NEXT: v_cmp_o_f64_e64 s7, v[2:3], v[10:11]
-; GFX10-NEXT: v_cmp_lt_f64_e64 s8, v[6:7], v[14:15]
-; GFX10-NEXT: v_cmp_o_f64_e64 s9, v[4:5], v[12:13]
-; GFX10-NEXT: v_cmp_o_f64_e64 s10, v[6:7], v[14:15]
-; GFX10-NEXT: v_cmp_class_f64_e64 s11, v[14:15], 32
-; GFX10-NEXT: v_cndmask_b32_e32 v16, v9, v1, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v18, v11, v3, s4
-; GFX10-NEXT: v_cndmask_b32_e32 v19, v8, v0, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v21, v10, v2, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v20, v13, v5, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v22, v15, v7, s8
-; GFX10-NEXT: v_cndmask_b32_e64 v17, 0x7ff80000, v16, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v16, 0, v19, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v19, 0x7ff80000, v18, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v23, v12, v4, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v18, 0, v21, s7
-; GFX10-NEXT: v_cmp_class_f64_e64 vcc_lo, v[0:1], 32
-; GFX10-NEXT: v_cndmask_b32_e64 v24, v14, v6, s8
-; GFX10-NEXT: v_cmp_class_f64_e64 s4, v[2:3], 32
-; GFX10-NEXT: v_cmp_class_f64_e64 s7, v[4:5], 32
-; GFX10-NEXT: v_cmp_class_f64_e64 s8, v[6:7], 32
-; GFX10-NEXT: v_cmp_class_f64_e64 s5, v[8:9], 32
-; GFX10-NEXT: v_cndmask_b32_e64 v21, 0x7ff80000, v20, s9
-; GFX10-NEXT: v_cndmask_b32_e64 v20, 0, v23, s9
-; GFX10-NEXT: v_cndmask_b32_e64 v23, 0x7ff80000, v22, s10
-; GFX10-NEXT: v_cndmask_b32_e64 v22, 0, v24, s10
-; GFX10-NEXT: v_cmp_class_f64_e64 s9, v[10:11], 32
-; GFX10-NEXT: v_cmp_class_f64_e64 s10, v[12:13], 32
-; GFX10-NEXT: v_cmp_eq_f64_e64 s6, 0, v[16:17]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s12, 0, v[18:19]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s13, 0, v[20:21]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s14, 0, v[22:23]
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v17, v1, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v18, v2, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v4, v20, v4, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v6, v22, v6, s8
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v19, v3, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v5, v21, v5, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v7, v23, v7, s8
-; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v8, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v6, v6, v14, s11
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, v10, s9
-; GFX10-NEXT: v_cndmask_b32_e64 v4, v4, v12, s10
-; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, v9, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, v11, s9
-; GFX10-NEXT: v_cndmask_b32_e64 v5, v5, v13, s10
-; GFX10-NEXT: v_cndmask_b32_e64 v7, v7, v15, s11
-; GFX10-NEXT: v_cndmask_b32_e64 v0, v16, v0, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v1, v17, v1, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v18, v2, s12
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v19, v3, s12
-; GFX10-NEXT: v_cndmask_b32_e64 v4, v20, v4, s13
-; GFX10-NEXT: v_cndmask_b32_e64 v5, v21, v5, s13
-; GFX10-NEXT: v_cndmask_b32_e64 v6, v22, v6, s14
-; GFX10-NEXT: v_cndmask_b32_e64 v7, v23, v7, s14
+; GFX10-NEXT: v_min_f64 v[16:17], v[0:1], v[8:9]
+; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[8:9]
+; GFX10-NEXT: v_min_f64 v[8:9], v[2:3], v[10:11]
+; GFX10-NEXT: v_cmp_u_f64_e64 s4, v[2:3], v[10:11]
+; GFX10-NEXT: v_min_f64 v[10:11], v[4:5], v[12:13]
+; GFX10-NEXT: v_cmp_u_f64_e64 s5, v[4:5], v[12:13]
+; GFX10-NEXT: v_min_f64 v[12:13], v[6:7], v[14:15]
+; GFX10-NEXT: v_cmp_u_f64_e64 s6, v[6:7], v[14:15]
+; GFX10-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e64 v1, v17, 0x7ff80000, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e64 v2, v8, 0, s4
+; GFX10-NEXT: v_cndmask_b32_e64 v3, v9, 0x7ff80000, s4
+; GFX10-NEXT: v_cndmask_b32_e64 v4, v10, 0, s5
+; GFX10-NEXT: v_cndmask_b32_e64 v5, v11, 0x7ff80000, s5
+; GFX10-NEXT: v_cndmask_b32_e64 v6, v12, 0, s6
+; GFX10-NEXT: v_cndmask_b32_e64 v7, v13, 0x7ff80000, s6
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_minimum_v4f64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_lt_f64_e32 vcc_lo, v[0:1], v[8:9]
-; GFX11-NEXT: v_cmp_o_f64_e64 s0, v[0:1], v[8:9]
-; GFX11-NEXT: v_cmp_lt_f64_e64 s1, v[2:3], v[10:11]
-; GFX11-NEXT: v_cmp_lt_f64_e64 s2, v[4:5], v[12:13]
-; GFX11-NEXT: v_cmp_lt_f64_e64 s3, v[6:7], v[14:15]
-; GFX11-NEXT: v_cmp_o_f64_e64 s4, v[2:3], v[10:11]
-; GFX11-NEXT: v_cmp_o_f64_e64 s5, v[4:5], v[12:13]
-; GFX11-NEXT: v_cmp_o_f64_e64 s6, v[6:7], v[14:15]
-; GFX11-NEXT: v_cndmask_b32_e32 v16, v9, v1, vcc_lo
-; GFX11-NEXT: v_cndmask_b32_e64 v18, v11, v3, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v20, v13, v5, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v22, v15, v7, s3
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e64 v17, 0x7ff80000, v16, s0
-; GFX11-NEXT: v_cndmask_b32_e32 v16, v8, v0, vcc_lo
-; GFX11-NEXT: v_cmp_class_f64_e64 vcc_lo, v[0:1], 32
-; GFX11-NEXT: v_cndmask_b32_e64 v19, 0x7ff80000, v18, s4
-; GFX11-NEXT: v_cndmask_b32_e64 v21, 0x7ff80000, v20, s5
-; GFX11-NEXT: v_cndmask_b32_e64 v18, v10, v2, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v20, v12, v4, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v24, v14, v6, s3
-; GFX11-NEXT: v_cmp_class_f64_e64 s1, v[4:5], 32
-; GFX11-NEXT: v_cmp_class_f64_e64 s3, v[6:7], 32
-; GFX11-NEXT: v_cndmask_b32_e64 v23, 0x7ff80000, v22, s6
-; GFX11-NEXT: v_cndmask_b32_e64 v18, 0, v18, s4
-; GFX11-NEXT: v_cndmask_b32_e64 v20, 0, v20, s5
-; GFX11-NEXT: v_cndmask_b32_e64 v22, 0, v24, s6
-; GFX11-NEXT: v_cmp_class_f64_e64 s2, v[12:13], 32
-; GFX11-NEXT: v_cmp_class_f64_e64 s4, v[14:15], 32
-; GFX11-NEXT: v_cmp_class_f64_e64 s5, v[8:9], 32
-; GFX11-NEXT: v_cmp_class_f64_e64 s6, v[10:11], 32
-; GFX11-NEXT: v_cmp_eq_f64_e64 s8, 0, v[18:19]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s9, 0, v[20:21]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s10, 0, v[22:23]
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v17, v1, vcc_lo
-; GFX11-NEXT: v_cndmask_b32_e64 v16, 0, v16, s0
-; GFX11-NEXT: v_cmp_class_f64_e64 s0, v[2:3], 32
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT: v_cmp_eq_f64_e64 s7, 0, v[16:17]
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc_lo
-; GFX11-NEXT: v_cndmask_b32_e64 v4, v20, v4, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v6, v22, v6, s3
-; GFX11-NEXT: v_cndmask_b32_e64 v5, v21, v5, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v7, v23, v7, s3
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e64 v4, v4, v12, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, v8, s5
-; GFX11-NEXT: v_cndmask_b32_e64 v6, v6, v14, s4
-; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, v9, s5
-; GFX11-NEXT: v_cndmask_b32_e64 v5, v5, v13, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v7, v7, v15, s4
-; GFX11-NEXT: v_cndmask_b32_e64 v4, v20, v4, s9
-; GFX11-NEXT: v_cndmask_b32_e64 v6, v22, v6, s10
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e64 v5, v21, v5, s9
-; GFX11-NEXT: v_cndmask_b32_e64 v7, v23, v7, s10
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v18, v2, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v19, v3, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v0, v16, v0, s7
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, v10, s6
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v3, v11, s6
-; GFX11-NEXT: v_cndmask_b32_e64 v1, v17, v1, s7
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v18, v2, s8
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v19, v3, s8
+; GFX11-NEXT: v_min_f64 v[16:17], v[0:1], v[8:9]
+; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[8:9]
+; GFX11-NEXT: v_min_f64 v[8:9], v[2:3], v[10:11]
+; GFX11-NEXT: v_cmp_u_f64_e64 s0, v[2:3], v[10:11]
+; GFX11-NEXT: v_min_f64 v[10:11], v[4:5], v[12:13]
+; GFX11-NEXT: v_cmp_u_f64_e64 s1, v[4:5], v[12:13]
+; GFX11-NEXT: v_min_f64 v[12:13], v[6:7], v[14:15]
+; GFX11-NEXT: v_cmp_u_f64_e64 s2, v[6:7], v[14:15]
+; GFX11-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e64 v1, v17, 0x7ff80000, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e64 v2, v8, 0, s0
+; GFX11-NEXT: v_cndmask_b32_e64 v3, v9, 0x7ff80000, s0
+; GFX11-NEXT: v_cndmask_b32_e64 v4, v10, 0, s1
+; GFX11-NEXT: v_cndmask_b32_e64 v5, v11, 0x7ff80000, s1
+; GFX11-NEXT: v_cndmask_b32_e64 v6, v12, 0, s2
+; GFX11-NEXT: v_cndmask_b32_e64 v7, v13, 0x7ff80000, s2
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_v4f64:
@@ -2819,320 +1508,55 @@ define <4 x double> @v_minimum_v4f64__nnan(<4 x double> %src0, <4 x double> %src
; GFX7-LABEL: v_minimum_v4f64__nnan:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[8:9]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[0:1], 32
-; GFX7-NEXT: v_cmp_class_f64_e64 s[6:7], v[8:9], 32
-; GFX7-NEXT: v_cmp_class_f64_e64 s[10:11], v[10:11], 32
-; GFX7-NEXT: v_cndmask_b32_e32 v17, v9, v1, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v8, v0, vcc
-; GFX7-NEXT: v_cmp_lt_f64_e32 vcc, v[2:3], v[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v0, v16, v0, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v1, v17, v1, s[4:5]
-; GFX7-NEXT: v_cmp_lt_f64_e64 s[4:5], v[4:5], v[12:13]
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[8:9], 0, v[16:17]
-; GFX7-NEXT: v_cndmask_b32_e64 v0, v0, v8, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v1, v1, v9, s[6:7]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[6:7], v[12:13], 32
-; GFX7-NEXT: v_cndmask_b32_e32 v19, v11, v3, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v18, v10, v2, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[2:3], 32
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[18:19]
-; GFX7-NEXT: v_cndmask_b32_e64 v9, v13, v5, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v8, v12, v4, s[4:5]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[4:5], 32
-; GFX7-NEXT: v_cndmask_b32_e64 v0, v16, v0, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v1, v17, v1, s[8:9]
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[8:9], 0, v[8:9]
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v18, v2, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v19, v3, vcc
-; GFX7-NEXT: v_cmp_lt_f64_e32 vcc, v[6:7], v[14:15]
-; GFX7-NEXT: v_cndmask_b32_e64 v2, v2, v10, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v3, v3, v11, s[10:11]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[10:11], v[14:15], 32
-; GFX7-NEXT: v_cndmask_b32_e64 v2, v18, v2, s[12:13]
-; GFX7-NEXT: v_cndmask_b32_e64 v3, v19, v3, s[12:13]
-; GFX7-NEXT: v_cndmask_b32_e64 v4, v8, v4, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e32 v11, v15, v7, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v10, v14, v6, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[6:7], 32
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v4, v4, v12, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v5, v5, v13, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v4, v8, v4, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e32 v6, v10, v6, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v7, v11, v7, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v6, v6, v14, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v7, v7, v15, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v6, v10, v6, s[12:13]
-; GFX7-NEXT: v_cndmask_b32_e64 v7, v11, v7, s[12:13]
+; GFX7-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9]
+; GFX7-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11]
+; GFX7-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13]
+; GFX7-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15]
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_minimum_v4f64__nnan:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[8:9]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[0:1], 32
-; GFX8-NEXT: v_cmp_class_f64_e64 s[6:7], v[8:9], 32
-; GFX8-NEXT: v_cmp_class_f64_e64 s[10:11], v[10:11], 32
-; GFX8-NEXT: v_cndmask_b32_e32 v17, v9, v1, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v8, v0, vcc
-; GFX8-NEXT: v_cmp_lt_f64_e32 vcc, v[2:3], v[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v0, v16, v0, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v1, v17, v1, s[4:5]
-; GFX8-NEXT: v_cmp_lt_f64_e64 s[4:5], v[4:5], v[12:13]
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[8:9], 0, v[16:17]
-; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, v8, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, v9, s[6:7]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[6:7], v[12:13], 32
-; GFX8-NEXT: v_cndmask_b32_e32 v19, v11, v3, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v18, v10, v2, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[2:3], 32
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[18:19]
-; GFX8-NEXT: v_cndmask_b32_e64 v9, v13, v5, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v8, v12, v4, s[4:5]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[4:5], 32
-; GFX8-NEXT: v_cndmask_b32_e64 v0, v16, v0, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v1, v17, v1, s[8:9]
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[8:9], 0, v[8:9]
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v18, v2, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v19, v3, vcc
-; GFX8-NEXT: v_cmp_lt_f64_e32 vcc, v[6:7], v[14:15]
-; GFX8-NEXT: v_cndmask_b32_e64 v2, v2, v10, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v3, v3, v11, s[10:11]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[10:11], v[14:15], 32
-; GFX8-NEXT: v_cndmask_b32_e64 v2, v18, v2, s[12:13]
-; GFX8-NEXT: v_cndmask_b32_e64 v3, v19, v3, s[12:13]
-; GFX8-NEXT: v_cndmask_b32_e64 v4, v8, v4, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e32 v11, v15, v7, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v10, v14, v6, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[6:7], 32
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v4, v4, v12, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v5, v5, v13, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v4, v8, v4, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e32 v6, v10, v6, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v7, v11, v7, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v6, v6, v14, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v7, v7, v15, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v6, v10, v6, s[12:13]
-; GFX8-NEXT: v_cndmask_b32_e64 v7, v11, v7, s[12:13]
+; GFX8-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9]
+; GFX8-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11]
+; GFX8-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13]
+; GFX8-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_minimum_v4f64__nnan:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[8:9]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[0:1], 32
-; GFX9-NEXT: v_cmp_class_f64_e64 s[6:7], v[8:9], 32
-; GFX9-NEXT: v_cmp_class_f64_e64 s[10:11], v[10:11], 32
-; GFX9-NEXT: v_cndmask_b32_e32 v17, v9, v1, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v8, v0, vcc
-; GFX9-NEXT: v_cmp_lt_f64_e32 vcc, v[2:3], v[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v16, v0, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v17, v1, s[4:5]
-; GFX9-NEXT: v_cmp_lt_f64_e64 s[4:5], v[4:5], v[12:13]
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[8:9], 0, v[16:17]
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, v8, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v9, s[6:7]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[6:7], v[12:13], 32
-; GFX9-NEXT: v_cndmask_b32_e32 v19, v11, v3, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v18, v10, v2, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[2:3], 32
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[18:19]
-; GFX9-NEXT: v_cndmask_b32_e64 v9, v13, v5, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v8, v12, v4, s[4:5]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[4:5], 32
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v16, v0, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v17, v1, s[8:9]
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[8:9], 0, v[8:9]
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v18, v2, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v19, v3, vcc
-; GFX9-NEXT: v_cmp_lt_f64_e32 vcc, v[6:7], v[14:15]
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, v10, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v3, v11, s[10:11]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[10:11], v[14:15], 32
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v18, v2, s[12:13]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v19, v3, s[12:13]
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v8, v4, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e32 v11, v15, v7, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v10, v14, v6, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[6:7], 32
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v4, v12, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v5, v13, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v8, v4, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v10, v6, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v7, v11, v7, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v6, v6, v14, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v7, v7, v15, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v6, v10, v6, s[12:13]
-; GFX9-NEXT: v_cndmask_b32_e64 v7, v11, v7, s[12:13]
+; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9]
+; GFX9-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11]
+; GFX9-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13]
+; GFX9-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_minimum_v4f64__nnan:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[8:9]
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[8:9], 32
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v17, v9, v1, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v8, v0, vcc
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 32
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[16:17]
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v17, v1, vcc
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[2:3], v[10:11]
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v0, v8, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v1, v1, v9, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v9, v11, v3, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v10, v2, vcc
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[2:3], 32
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[10:11], 32
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v16, v0, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v1, v17, v1, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v2, v2, v10, s[0:1]
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[8:9]
-; GFX940-NEXT: v_cndmask_b32_e64 v3, v3, v11, s[0:1]
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[4:5], v[12:13]
-; GFX940-NEXT: v_cndmask_b32_e64 v2, v8, v2, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v3, v9, v3, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e32 v9, v13, v5, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v12, v4, vcc
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[4:5], 32
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[12:13], 32
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[8:9]
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v9, v5, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v4, v4, v12, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v5, v5, v13, s[0:1]
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[6:7], v[14:15]
-; GFX940-NEXT: v_cndmask_b32_e64 v4, v8, v4, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e32 v9, v15, v7, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v14, v6, vcc
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[6:7], 32
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[14:15], 32
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[8:9]
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v8, v6, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v7, v9, v7, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v6, v6, v14, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v7, v7, v15, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v6, v8, v6, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v7, v9, v7, s[2:3]
+; GFX940-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9]
+; GFX940-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11]
+; GFX940-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13]
+; GFX940-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15]
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v4f64__nnan:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_lt_f64_e32 vcc_lo, v[0:1], v[8:9]
-; GFX10-NEXT: v_cmp_lt_f64_e64 s4, v[2:3], v[10:11]
-; GFX10-NEXT: v_cmp_lt_f64_e64 s5, v[4:5], v[12:13]
-; GFX10-NEXT: v_cmp_lt_f64_e64 s6, v[6:7], v[14:15]
-; GFX10-NEXT: v_cmp_class_f64_e64 s7, v[10:11], 32
-; GFX10-NEXT: v_cmp_class_f64_e64 s8, v[8:9], 32
-; GFX10-NEXT: v_cmp_class_f64_e64 s10, v[12:13], 32
-; GFX10-NEXT: v_cmp_class_f64_e64 s11, v[14:15], 32
-; GFX10-NEXT: v_cndmask_b32_e32 v17, v9, v1, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v19, v11, v3, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v21, v13, v5, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v23, v15, v7, s6
-; GFX10-NEXT: v_cndmask_b32_e32 v16, v8, v0, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v18, v10, v2, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v20, v12, v4, s5
-; GFX10-NEXT: v_cmp_class_f64_e64 vcc_lo, v[4:5], 32
-; GFX10-NEXT: v_cndmask_b32_e64 v22, v14, v6, s6
-; GFX10-NEXT: v_cmp_class_f64_e64 s4, v[6:7], 32
-; GFX10-NEXT: v_cmp_class_f64_e64 s5, v[0:1], 32
-; GFX10-NEXT: v_cmp_class_f64_e64 s6, v[2:3], 32
-; GFX10-NEXT: v_cmp_eq_f64_e64 s9, 0, v[16:17]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s12, 0, v[18:19]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s13, 0, v[20:21]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s14, 0, v[22:23]
-; GFX10-NEXT: v_cndmask_b32_e32 v4, v20, v4, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e32 v5, v21, v5, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v6, v22, v6, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v0, v16, v0, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v18, v2, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v1, v17, v1, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v19, v3, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v7, v23, v7, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v8, s8
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, v10, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v4, v4, v12, s10
-; GFX10-NEXT: v_cndmask_b32_e64 v6, v6, v14, s11
-; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, v9, s8
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, v11, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v5, v5, v13, s10
-; GFX10-NEXT: v_cndmask_b32_e64 v7, v7, v15, s11
-; GFX10-NEXT: v_cndmask_b32_e64 v0, v16, v0, s9
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v18, v2, s12
-; GFX10-NEXT: v_cndmask_b32_e64 v1, v17, v1, s9
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v19, v3, s12
-; GFX10-NEXT: v_cndmask_b32_e64 v4, v20, v4, s13
-; GFX10-NEXT: v_cndmask_b32_e64 v5, v21, v5, s13
-; GFX10-NEXT: v_cndmask_b32_e64 v6, v22, v6, s14
-; GFX10-NEXT: v_cndmask_b32_e64 v7, v23, v7, s14
+; GFX10-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9]
+; GFX10-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11]
+; GFX10-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13]
+; GFX10-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15]
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_minimum_v4f64__nnan:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_lt_f64_e32 vcc_lo, v[0:1], v[8:9]
-; GFX11-NEXT: v_cmp_lt_f64_e64 s0, v[2:3], v[10:11]
-; GFX11-NEXT: v_cmp_lt_f64_e64 s1, v[4:5], v[12:13]
-; GFX11-NEXT: v_cmp_lt_f64_e64 s2, v[6:7], v[14:15]
-; GFX11-NEXT: v_cmp_class_f64_e64 s3, v[6:7], 32
-; GFX11-NEXT: v_cmp_class_f64_e64 s4, v[14:15], 32
-; GFX11-NEXT: v_cmp_class_f64_e64 s5, v[8:9], 32
-; GFX11-NEXT: v_cmp_class_f64_e64 s6, v[10:11], 32
-; GFX11-NEXT: v_dual_cndmask_b32 v17, v9, v1 :: v_dual_cndmask_b32 v16, v8, v0
-; GFX11-NEXT: v_cndmask_b32_e64 v19, v11, v3, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v21, v13, v5, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v18, v10, v2, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v20, v12, v4, s1
-; GFX11-NEXT: v_cmp_class_f64_e64 vcc_lo, v[0:1], 32
-; GFX11-NEXT: v_cmp_class_f64_e64 s0, v[2:3], 32
-; GFX11-NEXT: v_cmp_class_f64_e64 s1, v[4:5], 32
-; GFX11-NEXT: v_cndmask_b32_e64 v23, v15, v7, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v22, v14, v6, s2
-; GFX11-NEXT: v_cmp_class_f64_e64 s2, v[12:13], 32
-; GFX11-NEXT: v_cmp_eq_f64_e64 s7, 0, v[16:17]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s8, 0, v[18:19]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s9, 0, v[20:21]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s10, 0, v[22:23]
-; GFX11-NEXT: v_cndmask_b32_e64 v6, v22, v6, s3
-; GFX11-NEXT: v_cndmask_b32_e64 v7, v23, v7, s3
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_cndmask_b32_e64 v6, v6, v14, s4
-; GFX11-NEXT: v_cndmask_b32_e64 v7, v7, v15, s4
-; GFX11-NEXT: v_dual_cndmask_b32 v0, v16, v0 :: v_dual_cndmask_b32 v1, v17, v1
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v18, v2, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v4, v20, v4, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v19, v3, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v5, v21, v5, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, v8, s5
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, v10, s6
-; GFX11-NEXT: v_cndmask_b32_e64 v4, v4, v12, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, v9, s5
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v3, v11, s6
-; GFX11-NEXT: v_cndmask_b32_e64 v5, v5, v13, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v0, v16, v0, s7
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v18, v2, s8
-; GFX11-NEXT: v_cndmask_b32_e64 v4, v20, v4, s9
-; GFX11-NEXT: v_cndmask_b32_e64 v6, v22, v6, s10
-; GFX11-NEXT: v_cndmask_b32_e64 v1, v17, v1, s7
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v19, v3, s8
-; GFX11-NEXT: v_cndmask_b32_e64 v5, v21, v5, s9
-; GFX11-NEXT: v_cndmask_b32_e64 v7, v23, v7, s10
+; GFX11-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9]
+; GFX11-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11]
+; GFX11-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13]
+; GFX11-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15]
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_v4f64__nnan:
@@ -3155,180 +1579,135 @@ define <4 x double> @v_minimum_v4f64__nsz(<4 x double> %src0, <4 x double> %src1
; GFX7-LABEL: v_minimum_v4f64__nsz:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[8:9]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[4:5], v[0:1], v[8:9]
-; GFX7-NEXT: v_cmp_lt_f64_e64 s[6:7], v[4:5], v[12:13]
-; GFX7-NEXT: v_cmp_lt_f64_e64 s[10:11], v[6:7], v[14:15]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[8:9], v[4:5], v[12:13]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[12:13], v[6:7], v[14:15]
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v8, v0, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc
-; GFX7-NEXT: v_mov_b32_e32 v8, 0x7ff80000
-; GFX7-NEXT: v_cmp_lt_f64_e32 vcc, v[2:3], v[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v0, 0, v16, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v1, v8, v1, s[4:5]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[4:5], v[2:3], v[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v5, v13, v5, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v7, v15, v7, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v5, v8, v5, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v7, v8, v7, s[12:13]
-; GFX7-NEXT: v_cndmask_b32_e32 v9, v10, v2, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v10, v14, v6, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v11, v3, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v2, 0, v9, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v9, v12, v4, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v3, v8, v3, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v4, 0, v9, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v6, 0, v10, s[12:13]
+; GFX7-NEXT: v_min_f64 v[16:17], v[0:1], v[8:9]
+; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
+; GFX7-NEXT: v_min_f64 v[8:9], v[2:3], v[10:11]
+; GFX7-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[10:11]
+; GFX7-NEXT: v_min_f64 v[10:11], v[4:5], v[12:13]
+; GFX7-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[12:13]
+; GFX7-NEXT: v_min_f64 v[12:13], v[6:7], v[14:15]
+; GFX7-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[14:15]
+; GFX7-NEXT: v_mov_b32_e32 v7, 0x7ff80000
+; GFX7-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc
+; GFX7-NEXT: v_cndmask_b32_e32 v1, v17, v7, vcc
+; GFX7-NEXT: v_cndmask_b32_e64 v2, v8, 0, s[4:5]
+; GFX7-NEXT: v_cndmask_b32_e64 v3, v9, v7, s[4:5]
+; GFX7-NEXT: v_cndmask_b32_e64 v4, v10, 0, s[6:7]
+; GFX7-NEXT: v_cndmask_b32_e64 v5, v11, v7, s[6:7]
+; GFX7-NEXT: v_cndmask_b32_e64 v6, v12, 0, s[8:9]
+; GFX7-NEXT: v_cndmask_b32_e64 v7, v13, v7, s[8:9]
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_minimum_v4f64__nsz:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[8:9]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[4:5], v[0:1], v[8:9]
-; GFX8-NEXT: v_cmp_lt_f64_e64 s[6:7], v[4:5], v[12:13]
-; GFX8-NEXT: v_cmp_lt_f64_e64 s[10:11], v[6:7], v[14:15]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[8:9], v[4:5], v[12:13]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[12:13], v[6:7], v[14:15]
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v8, v0, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc
-; GFX8-NEXT: v_mov_b32_e32 v8, 0x7ff80000
-; GFX8-NEXT: v_cmp_lt_f64_e32 vcc, v[2:3], v[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v0, 0, v16, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v1, v8, v1, s[4:5]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[4:5], v[2:3], v[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v5, v13, v5, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v7, v15, v7, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v5, v8, v5, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v7, v8, v7, s[12:13]
-; GFX8-NEXT: v_cndmask_b32_e32 v9, v10, v2, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v10, v14, v6, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v11, v3, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, v9, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v9, v12, v4, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v3, v8, v3, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v4, 0, v9, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v6, 0, v10, s[12:13]
+; GFX8-NEXT: v_min_f64 v[16:17], v[0:1], v[8:9]
+; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
+; GFX8-NEXT: v_min_f64 v[8:9], v[2:3], v[10:11]
+; GFX8-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[10:11]
+; GFX8-NEXT: v_min_f64 v[10:11], v[4:5], v[12:13]
+; GFX8-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[12:13]
+; GFX8-NEXT: v_min_f64 v[12:13], v[6:7], v[14:15]
+; GFX8-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[14:15]
+; GFX8-NEXT: v_mov_b32_e32 v7, 0x7ff80000
+; GFX8-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v1, v17, v7, vcc
+; GFX8-NEXT: v_cndmask_b32_e64 v2, v8, 0, s[4:5]
+; GFX8-NEXT: v_cndmask_b32_e64 v3, v9, v7, s[4:5]
+; GFX8-NEXT: v_cndmask_b32_e64 v4, v10, 0, s[6:7]
+; GFX8-NEXT: v_cndmask_b32_e64 v5, v11, v7, s[6:7]
+; GFX8-NEXT: v_cndmask_b32_e64 v6, v12, 0, s[8:9]
+; GFX8-NEXT: v_cndmask_b32_e64 v7, v13, v7, s[8:9]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_minimum_v4f64__nsz:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[8:9]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[4:5], v[0:1], v[8:9]
-; GFX9-NEXT: v_cmp_lt_f64_e64 s[6:7], v[4:5], v[12:13]
-; GFX9-NEXT: v_cmp_lt_f64_e64 s[10:11], v[6:7], v[14:15]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[8:9], v[4:5], v[12:13]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[12:13], v[6:7], v[14:15]
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v8, v0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc
-; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000
-; GFX9-NEXT: v_cmp_lt_f64_e32 vcc, v[2:3], v[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, v16, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v8, v1, s[4:5]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[4:5], v[2:3], v[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v13, v5, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v7, v15, v7, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v8, v5, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v7, v8, v7, s[12:13]
-; GFX9-NEXT: v_cndmask_b32_e32 v9, v10, v2, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v10, v14, v6, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v11, v3, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, v9, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v9, v12, v4, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v8, v3, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, v9, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, v10, s[12:13]
+; GFX9-NEXT: v_min_f64 v[16:17], v[0:1], v[8:9]
+; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
+; GFX9-NEXT: v_min_f64 v[8:9], v[2:3], v[10:11]
+; GFX9-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[10:11]
+; GFX9-NEXT: v_min_f64 v[10:11], v[4:5], v[12:13]
+; GFX9-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[12:13]
+; GFX9-NEXT: v_min_f64 v[12:13], v[6:7], v[14:15]
+; GFX9-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[14:15]
+; GFX9-NEXT: v_mov_b32_e32 v7, 0x7ff80000
+; GFX9-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v17, v7, vcc
+; GFX9-NEXT: v_cndmask_b32_e64 v2, v8, 0, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_e64 v3, v9, v7, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_e64 v4, v10, 0, s[6:7]
+; GFX9-NEXT: v_cndmask_b32_e64 v5, v11, v7, s[6:7]
+; GFX9-NEXT: v_cndmask_b32_e64 v6, v12, 0, s[8:9]
+; GFX9-NEXT: v_cndmask_b32_e64 v7, v13, v7, s[8:9]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_minimum_v4f64__nsz:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[8:9]
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[0:1], v[8:9]
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v8, v0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc
-; GFX940-NEXT: v_mov_b32_e32 v8, 0x7ff80000
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[2:3], v[10:11]
-; GFX940-NEXT: v_cndmask_b32_e64 v0, 0, v16, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v1, v8, v1, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v9, v10, v2, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[2:3], v[10:11]
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v11, v3, vcc
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[4:5], v[12:13]
-; GFX940-NEXT: v_cndmask_b32_e64 v2, 0, v9, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v3, v8, v3, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v9, v12, v4, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[4:5], v[12:13]
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v13, v5, vcc
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[6:7], v[14:15]
-; GFX940-NEXT: v_cndmask_b32_e64 v4, 0, v9, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v5, v8, v5, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v9, v14, v6, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[6:7], v[14:15]
-; GFX940-NEXT: v_cndmask_b32_e32 v7, v15, v7, vcc
+; GFX940-NEXT: v_min_f64 v[16:17], v[0:1], v[8:9]
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
+; GFX940-NEXT: v_min_f64 v[8:9], v[2:3], v[10:11]
; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v6, 0, v9, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v7, v8, v7, s[0:1]
+; GFX940-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc
+; GFX940-NEXT: v_mov_b32_e32 v16, 0x7ff80000
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v17, v16, vcc
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[10:11]
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e64 v2, v8, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v3, v9, v16, vcc
+; GFX940-NEXT: v_min_f64 v[8:9], v[4:5], v[12:13]
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[12:13]
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e64 v4, v8, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v5, v9, v16, vcc
+; GFX940-NEXT: v_min_f64 v[8:9], v[6:7], v[14:15]
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[6:7], v[14:15]
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e64 v6, v8, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v7, v9, v16, vcc
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v4f64__nsz:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_lt_f64_e32 vcc_lo, v[0:1], v[8:9]
-; GFX10-NEXT: v_cmp_lt_f64_e64 s4, v[2:3], v[10:11]
-; GFX10-NEXT: v_cmp_lt_f64_e64 s5, v[4:5], v[12:13]
-; GFX10-NEXT: v_cmp_lt_f64_e64 s7, v[6:7], v[14:15]
-; GFX10-NEXT: v_cmp_o_f64_e64 s6, v[0:1], v[8:9]
-; GFX10-NEXT: v_cmp_o_f64_e64 s8, v[2:3], v[10:11]
-; GFX10-NEXT: v_cmp_o_f64_e64 s9, v[4:5], v[12:13]
-; GFX10-NEXT: v_cmp_o_f64_e64 s10, v[6:7], v[14:15]
-; GFX10-NEXT: v_cndmask_b32_e32 v16, v8, v0, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v8, v10, v2, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v10, v12, v4, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v12, v14, v6, s7
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v11, v3, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v5, v13, v5, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v7, v15, v7, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, v16, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v1, 0x7ff80000, v1, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, v8, s8
-; GFX10-NEXT: v_cndmask_b32_e64 v3, 0x7ff80000, v3, s8
-; GFX10-NEXT: v_cndmask_b32_e64 v4, 0, v10, s9
-; GFX10-NEXT: v_cndmask_b32_e64 v5, 0x7ff80000, v5, s9
-; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, v12, s10
-; GFX10-NEXT: v_cndmask_b32_e64 v7, 0x7ff80000, v7, s10
+; GFX10-NEXT: v_min_f64 v[16:17], v[0:1], v[8:9]
+; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[8:9]
+; GFX10-NEXT: v_min_f64 v[8:9], v[2:3], v[10:11]
+; GFX10-NEXT: v_cmp_u_f64_e64 s4, v[2:3], v[10:11]
+; GFX10-NEXT: v_min_f64 v[10:11], v[4:5], v[12:13]
+; GFX10-NEXT: v_cmp_u_f64_e64 s5, v[4:5], v[12:13]
+; GFX10-NEXT: v_min_f64 v[12:13], v[6:7], v[14:15]
+; GFX10-NEXT: v_cmp_u_f64_e64 s6, v[6:7], v[14:15]
+; GFX10-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e64 v1, v17, 0x7ff80000, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e64 v2, v8, 0, s4
+; GFX10-NEXT: v_cndmask_b32_e64 v3, v9, 0x7ff80000, s4
+; GFX10-NEXT: v_cndmask_b32_e64 v4, v10, 0, s5
+; GFX10-NEXT: v_cndmask_b32_e64 v5, v11, 0x7ff80000, s5
+; GFX10-NEXT: v_cndmask_b32_e64 v6, v12, 0, s6
+; GFX10-NEXT: v_cndmask_b32_e64 v7, v13, 0x7ff80000, s6
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_minimum_v4f64__nsz:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_lt_f64_e32 vcc_lo, v[0:1], v[8:9]
-; GFX11-NEXT: v_cmp_lt_f64_e64 s1, v[2:3], v[10:11]
-; GFX11-NEXT: v_cmp_lt_f64_e64 s2, v[4:5], v[12:13]
-; GFX11-NEXT: v_cmp_lt_f64_e64 s3, v[6:7], v[14:15]
-; GFX11-NEXT: v_cmp_o_f64_e64 s0, v[0:1], v[8:9]
-; GFX11-NEXT: v_cmp_o_f64_e64 s4, v[2:3], v[10:11]
-; GFX11-NEXT: v_cmp_o_f64_e64 s5, v[4:5], v[12:13]
-; GFX11-NEXT: v_cmp_o_f64_e64 s6, v[6:7], v[14:15]
-; GFX11-NEXT: v_dual_cndmask_b32 v16, v8, v0 :: v_dual_cndmask_b32 v1, v9, v1
-; GFX11-NEXT: v_cndmask_b32_e64 v8, v10, v2, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v10, v12, v4, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v12, v14, v6, s3
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v11, v3, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v5, v13, v5, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v7, v15, v7, s3
-; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, v16, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, v8, s4
-; GFX11-NEXT: v_cndmask_b32_e64 v4, 0, v10, s5
-; GFX11-NEXT: v_cndmask_b32_e64 v6, 0, v12, s6
-; GFX11-NEXT: v_cndmask_b32_e64 v1, 0x7ff80000, v1, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v3, 0x7ff80000, v3, s4
-; GFX11-NEXT: v_cndmask_b32_e64 v5, 0x7ff80000, v5, s5
-; GFX11-NEXT: v_cndmask_b32_e64 v7, 0x7ff80000, v7, s6
+; GFX11-NEXT: v_min_f64 v[16:17], v[0:1], v[8:9]
+; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[8:9]
+; GFX11-NEXT: v_min_f64 v[8:9], v[2:3], v[10:11]
+; GFX11-NEXT: v_cmp_u_f64_e64 s0, v[2:3], v[10:11]
+; GFX11-NEXT: v_min_f64 v[10:11], v[4:5], v[12:13]
+; GFX11-NEXT: v_cmp_u_f64_e64 s1, v[4:5], v[12:13]
+; GFX11-NEXT: v_min_f64 v[12:13], v[6:7], v[14:15]
+; GFX11-NEXT: v_cmp_u_f64_e64 s2, v[6:7], v[14:15]
+; GFX11-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e64 v1, v17, 0x7ff80000, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e64 v2, v8, 0, s0
+; GFX11-NEXT: v_cndmask_b32_e64 v3, v9, 0x7ff80000, s0
+; GFX11-NEXT: v_cndmask_b32_e64 v4, v10, 0, s1
+; GFX11-NEXT: v_cndmask_b32_e64 v5, v11, 0x7ff80000, s1
+; GFX11-NEXT: v_cndmask_b32_e64 v6, v12, 0, s2
+; GFX11-NEXT: v_cndmask_b32_e64 v7, v13, 0x7ff80000, s2
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_v4f64__nsz:
@@ -3351,108 +1730,55 @@ define <4 x double> @v_minimum_v4f64__nnan_nsz(<4 x double> %src0, <4 x double>
; GFX7-LABEL: v_minimum_v4f64__nnan_nsz:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[8:9]
-; GFX7-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[10:11]
-; GFX7-NEXT: v_cmp_lt_f64_e64 s[6:7], v[4:5], v[12:13]
-; GFX7-NEXT: v_cmp_lt_f64_e64 s[8:9], v[6:7], v[14:15]
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v2, v10, v2, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v3, v11, v3, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v4, v12, v4, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v5, v13, v5, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v6, v14, v6, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v7, v15, v7, s[8:9]
+; GFX7-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9]
+; GFX7-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11]
+; GFX7-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13]
+; GFX7-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15]
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_minimum_v4f64__nnan_nsz:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[8:9]
-; GFX8-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[10:11]
-; GFX8-NEXT: v_cmp_lt_f64_e64 s[6:7], v[4:5], v[12:13]
-; GFX8-NEXT: v_cmp_lt_f64_e64 s[8:9], v[6:7], v[14:15]
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v2, v10, v2, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v3, v11, v3, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v4, v12, v4, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v5, v13, v5, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v6, v14, v6, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v7, v15, v7, s[8:9]
+; GFX8-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9]
+; GFX8-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11]
+; GFX8-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13]
+; GFX8-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_minimum_v4f64__nnan_nsz:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[8:9]
-; GFX9-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[10:11]
-; GFX9-NEXT: v_cmp_lt_f64_e64 s[6:7], v[4:5], v[12:13]
-; GFX9-NEXT: v_cmp_lt_f64_e64 s[8:9], v[6:7], v[14:15]
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v10, v2, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v11, v3, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v12, v4, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v13, v5, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v6, v14, v6, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v7, v15, v7, s[8:9]
+; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9]
+; GFX9-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11]
+; GFX9-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13]
+; GFX9-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_minimum_v4f64__nnan_nsz:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[8:9]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[2:3], v[10:11]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v10, v2, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v11, v3, vcc
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[4:5], v[12:13]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v12, v4, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v13, v5, vcc
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[6:7], v[14:15]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v14, v6, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v7, v15, v7, vcc
+; GFX940-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9]
+; GFX940-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11]
+; GFX940-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13]
+; GFX940-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15]
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v4f64__nnan_nsz:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_lt_f64_e32 vcc_lo, v[0:1], v[8:9]
-; GFX10-NEXT: v_cmp_lt_f64_e64 s4, v[2:3], v[10:11]
-; GFX10-NEXT: v_cmp_lt_f64_e64 s5, v[4:5], v[12:13]
-; GFX10-NEXT: v_cmp_lt_f64_e64 s6, v[6:7], v[14:15]
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v10, v2, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v4, v12, v4, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v6, v14, v6, s6
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v11, v3, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v5, v13, v5, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v7, v15, v7, s6
+; GFX10-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9]
+; GFX10-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11]
+; GFX10-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13]
+; GFX10-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15]
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_minimum_v4f64__nnan_nsz:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_lt_f64_e32 vcc_lo, v[0:1], v[8:9]
-; GFX11-NEXT: v_cmp_lt_f64_e64 s0, v[2:3], v[10:11]
-; GFX11-NEXT: v_cmp_lt_f64_e64 s1, v[4:5], v[12:13]
-; GFX11-NEXT: v_cmp_lt_f64_e64 s2, v[6:7], v[14:15]
-; GFX11-NEXT: v_dual_cndmask_b32 v0, v8, v0 :: v_dual_cndmask_b32 v1, v9, v1
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v10, v2, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v4, v12, v4, s1
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e64 v6, v14, v6, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v11, v3, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v5, v13, v5, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v7, v15, v7, s2
+; GFX11-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9]
+; GFX11-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11]
+; GFX11-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13]
+; GFX11-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15]
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_v4f64__nnan_nsz:
@@ -3475,782 +1801,244 @@ define <8 x double> @v_minimum_v8f64(<8 x double> %src0, <8 x double> %src1) {
; GFX7-LABEL: v_minimum_v8f64:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[16:17]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[4:5], v[0:1], v[16:17]
-; GFX7-NEXT: v_mov_b32_e32 v32, 0x7ff80000
-; GFX7-NEXT: v_cmp_o_f64_e64 s[6:7], v[4:5], v[20:21]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[8:9], v[8:9], v[24:25]
-; GFX7-NEXT: v_cmp_lt_f64_e64 s[10:11], v[12:13], v[28:29]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[12:13], v[12:13], v[28:29]
-; GFX7-NEXT: v_cndmask_b32_e32 v31, v17, v1, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v34, v32, v31, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e32 v31, v16, v0, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 32
-; GFX7-NEXT: v_cndmask_b32_e64 v33, 0, v31, s[4:5]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[4:5], v[2:3], v[18:19]
; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v33, v0, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v34, v1, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[16:17], 32
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v16, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v17, vcc
-; GFX7-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[33:34]
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v33, v0, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v34, v1, vcc
-; GFX7-NEXT: v_cmp_lt_f64_e32 vcc, v[2:3], v[18:19]
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v19, v3, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v17, v32, v16, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v18, v2, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[2:3], 32
-; GFX7-NEXT: v_cndmask_b32_e64 v16, 0, v16, s[4:5]
-; GFX7-NEXT: v_cmp_lt_f64_e64 s[4:5], v[4:5], v[20:21]
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v16, v2, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v17, v3, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[18:19], 32
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v2, v18, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v3, v19, vcc
-; GFX7-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[16:17]
-; GFX7-NEXT: v_cndmask_b32_e64 v18, v21, v5, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v16, v2, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v17, v3, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[4:5], 32
-; GFX7-NEXT: v_cndmask_b32_e64 v16, v20, v4, s[4:5]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[20:21], 32
-; GFX7-NEXT: v_cndmask_b32_e64 v17, v32, v18, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v16, 0, v16, s[6:7]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[6:7], v[6:7], v[22:23]
-; GFX7-NEXT: v_cndmask_b32_e32 v4, v16, v4, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v5, v17, v5, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v4, v4, v20, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v5, v5, v21, s[4:5]
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[4:5], 0, v[16:17]
-; GFX7-NEXT: v_cmp_lt_f64_e32 vcc, v[6:7], v[22:23]
-; GFX7-NEXT: v_cndmask_b32_e64 v4, v16, v4, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v5, v17, v5, s[4:5]
-; GFX7-NEXT: v_cmp_lt_f64_e64 s[4:5], v[8:9], v[24:25]
-; GFX7-NEXT: v_cndmask_b32_e32 v18, v23, v7, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v22, v6, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[6:7], 32
-; GFX7-NEXT: v_cndmask_b32_e64 v19, v32, v18, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v18, 0, v16, s[6:7]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[6:7], v[22:23], 32
-; GFX7-NEXT: v_cndmask_b32_e64 v16, v25, v9, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v17, v32, v16, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v16, v24, v8, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e32 v6, v18, v6, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v7, v19, v7, vcc
-; GFX7-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[18:19]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[8:9], 32
-; GFX7-NEXT: v_cndmask_b32_e64 v6, v6, v22, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v7, v7, v23, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v16, 0, v16, s[8:9]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[6:7], v[24:25], 32
-; GFX7-NEXT: v_cmp_o_f64_e64 s[8:9], v[10:11], v[26:27]
-; GFX7-NEXT: v_cndmask_b32_e32 v6, v18, v6, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v8, v16, v8, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e32 v7, v19, v7, vcc
-; GFX7-NEXT: v_cmp_lt_f64_e32 vcc, v[10:11], v[26:27]
-; GFX7-NEXT: v_cndmask_b32_e64 v9, v17, v9, s[4:5]
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[4:5], 0, v[16:17]
-; GFX7-NEXT: v_cndmask_b32_e64 v8, v8, v24, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v9, v9, v25, s[6:7]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[6:7], v[26:27], 32
-; GFX7-NEXT: v_cndmask_b32_e32 v18, v27, v11, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v19, v32, v18, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v8, v16, v8, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v26, v10, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[10:11], 32
-; GFX7-NEXT: v_cndmask_b32_e64 v18, 0, v16, s[8:9]
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[8:9], 0, v[18:19]
-; GFX7-NEXT: v_cndmask_b32_e64 v9, v17, v9, s[4:5]
+; GFX7-NEXT: v_min_f64 v[32:33], v[2:3], v[18:19]
+; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[18:19]
+; GFX7-NEXT: v_min_f64 v[18:19], v[4:5], v[20:21]
+; GFX7-NEXT: v_cmp_u_f64_e64 s[4:5], v[4:5], v[20:21]
+; GFX7-NEXT: v_min_f64 v[2:3], v[0:1], v[16:17]
+; GFX7-NEXT: v_cmp_u_f64_e64 s[8:9], v[0:1], v[16:17]
+; GFX7-NEXT: v_mov_b32_e32 v34, 0x7ff80000
+; GFX7-NEXT: v_min_f64 v[20:21], v[6:7], v[22:23]
+; GFX7-NEXT: v_cmp_u_f64_e64 s[6:7], v[6:7], v[22:23]
+; GFX7-NEXT: v_min_f64 v[16:17], v[8:9], v[24:25]
+; GFX7-NEXT: v_cmp_u_f64_e64 s[10:11], v[8:9], v[24:25]
+; GFX7-NEXT: v_min_f64 v[22:23], v[10:11], v[26:27]
+; GFX7-NEXT: v_cmp_u_f64_e64 s[12:13], v[10:11], v[26:27]
+; GFX7-NEXT: v_min_f64 v[24:25], v[12:13], v[28:29]
+; GFX7-NEXT: v_cmp_u_f64_e64 s[14:15], v[12:13], v[28:29]
+; GFX7-NEXT: v_cndmask_b32_e64 v0, v2, 0, s[8:9]
+; GFX7-NEXT: v_cndmask_b32_e64 v1, v3, v34, s[8:9]
+; GFX7-NEXT: v_cndmask_b32_e64 v2, v32, 0, vcc
+; GFX7-NEXT: v_cndmask_b32_e32 v3, v33, v34, vcc
+; GFX7-NEXT: v_cndmask_b32_e64 v4, v18, 0, s[4:5]
+; GFX7-NEXT: v_cndmask_b32_e64 v5, v19, v34, s[4:5]
+; GFX7-NEXT: v_cndmask_b32_e64 v6, v20, 0, s[6:7]
+; GFX7-NEXT: v_cndmask_b32_e64 v7, v21, v34, s[6:7]
+; GFX7-NEXT: v_cndmask_b32_e64 v8, v16, 0, s[10:11]
+; GFX7-NEXT: v_cndmask_b32_e64 v9, v17, v34, s[10:11]
+; GFX7-NEXT: v_cndmask_b32_e64 v10, v22, 0, s[12:13]
+; GFX7-NEXT: v_cndmask_b32_e64 v11, v23, v34, s[12:13]
+; GFX7-NEXT: v_cndmask_b32_e64 v12, v24, 0, s[14:15]
+; GFX7-NEXT: v_cndmask_b32_e64 v13, v25, v34, s[14:15]
; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_cmp_o_f64_e64 s[4:5], v[14:15], v[30:31]
-; GFX7-NEXT: v_cndmask_b32_e64 v16, v29, v13, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v17, v32, v16, s[12:13]
-; GFX7-NEXT: v_cndmask_b32_e64 v16, v28, v12, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e32 v10, v18, v10, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v11, v19, v11, vcc
-; GFX7-NEXT: v_cmp_lt_f64_e32 vcc, v[14:15], v[30:31]
-; GFX7-NEXT: v_cndmask_b32_e64 v10, v10, v26, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v11, v11, v27, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v10, v18, v10, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v11, v19, v11, s[8:9]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[6:7], v[12:13], 32
-; GFX7-NEXT: v_cndmask_b32_e64 v16, 0, v16, s[12:13]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[8:9], v[28:29], 32
-; GFX7-NEXT: v_cndmask_b32_e32 v18, v31, v15, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v19, v32, v18, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e32 v18, v30, v14, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[14:15], 32
-; GFX7-NEXT: v_cndmask_b32_e64 v18, 0, v18, s[4:5]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[30:31], 32
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[10:11], 0, v[16:17]
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[18:19]
-; GFX7-NEXT: v_cndmask_b32_e64 v12, v16, v12, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v13, v17, v13, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v12, v12, v28, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e32 v14, v18, v14, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v15, v19, v15, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v14, v14, v30, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v13, v13, v29, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v15, v15, v31, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v12, v16, v12, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v13, v17, v13, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v14, v18, v14, s[12:13]
-; GFX7-NEXT: v_cndmask_b32_e64 v15, v19, v15, s[12:13]
+; GFX7-NEXT: v_min_f64 v[18:19], v[14:15], v[30:31]
+; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[14:15], v[30:31]
+; GFX7-NEXT: v_cndmask_b32_e64 v14, v18, 0, vcc
+; GFX7-NEXT: v_cndmask_b32_e32 v15, v19, v34, vcc
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_minimum_v8f64:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[16:17]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[4:5], v[0:1], v[16:17]
-; GFX8-NEXT: v_mov_b32_e32 v32, 0x7ff80000
-; GFX8-NEXT: v_cmp_o_f64_e64 s[6:7], v[4:5], v[20:21]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[8:9], v[8:9], v[24:25]
-; GFX8-NEXT: v_cmp_lt_f64_e64 s[10:11], v[12:13], v[28:29]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[12:13], v[12:13], v[28:29]
-; GFX8-NEXT: v_cndmask_b32_e32 v31, v17, v1, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v34, v32, v31, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e32 v31, v16, v0, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 32
-; GFX8-NEXT: v_cndmask_b32_e64 v33, 0, v31, s[4:5]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[4:5], v[2:3], v[18:19]
; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v33, v0, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v34, v1, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[16:17], 32
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v16, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v17, vcc
-; GFX8-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[33:34]
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v33, v0, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v34, v1, vcc
-; GFX8-NEXT: v_cmp_lt_f64_e32 vcc, v[2:3], v[18:19]
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v19, v3, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v17, v32, v16, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v18, v2, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[2:3], 32
-; GFX8-NEXT: v_cndmask_b32_e64 v16, 0, v16, s[4:5]
-; GFX8-NEXT: v_cmp_lt_f64_e64 s[4:5], v[4:5], v[20:21]
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v16, v2, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v17, v3, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[18:19], 32
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v18, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v19, vcc
-; GFX8-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[16:17]
-; GFX8-NEXT: v_cndmask_b32_e64 v18, v21, v5, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v16, v2, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v17, v3, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[4:5], 32
-; GFX8-NEXT: v_cndmask_b32_e64 v16, v20, v4, s[4:5]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[20:21], 32
-; GFX8-NEXT: v_cndmask_b32_e64 v17, v32, v18, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v16, 0, v16, s[6:7]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[6:7], v[6:7], v[22:23]
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v16, v4, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v5, v17, v5, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v4, v4, v20, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v5, v5, v21, s[4:5]
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[4:5], 0, v[16:17]
-; GFX8-NEXT: v_cmp_lt_f64_e32 vcc, v[6:7], v[22:23]
-; GFX8-NEXT: v_cndmask_b32_e64 v4, v16, v4, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v5, v17, v5, s[4:5]
-; GFX8-NEXT: v_cmp_lt_f64_e64 s[4:5], v[8:9], v[24:25]
-; GFX8-NEXT: v_cndmask_b32_e32 v18, v23, v7, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v22, v6, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[6:7], 32
-; GFX8-NEXT: v_cndmask_b32_e64 v19, v32, v18, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v18, 0, v16, s[6:7]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[6:7], v[22:23], 32
-; GFX8-NEXT: v_cndmask_b32_e64 v16, v25, v9, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v17, v32, v16, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v16, v24, v8, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e32 v6, v18, v6, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v7, v19, v7, vcc
-; GFX8-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[18:19]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[8:9], 32
-; GFX8-NEXT: v_cndmask_b32_e64 v6, v6, v22, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v7, v7, v23, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v16, 0, v16, s[8:9]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[6:7], v[24:25], 32
-; GFX8-NEXT: v_cmp_o_f64_e64 s[8:9], v[10:11], v[26:27]
-; GFX8-NEXT: v_cndmask_b32_e32 v6, v18, v6, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v8, v16, v8, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e32 v7, v19, v7, vcc
-; GFX8-NEXT: v_cmp_lt_f64_e32 vcc, v[10:11], v[26:27]
-; GFX8-NEXT: v_cndmask_b32_e64 v9, v17, v9, s[4:5]
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[4:5], 0, v[16:17]
-; GFX8-NEXT: v_cndmask_b32_e64 v8, v8, v24, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v9, v9, v25, s[6:7]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[6:7], v[26:27], 32
-; GFX8-NEXT: v_cndmask_b32_e32 v18, v27, v11, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v19, v32, v18, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v8, v16, v8, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v26, v10, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[10:11], 32
-; GFX8-NEXT: v_cndmask_b32_e64 v18, 0, v16, s[8:9]
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[8:9], 0, v[18:19]
-; GFX8-NEXT: v_cndmask_b32_e64 v9, v17, v9, s[4:5]
+; GFX8-NEXT: v_min_f64 v[32:33], v[2:3], v[18:19]
+; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[18:19]
+; GFX8-NEXT: v_min_f64 v[18:19], v[4:5], v[20:21]
+; GFX8-NEXT: v_cmp_u_f64_e64 s[4:5], v[4:5], v[20:21]
+; GFX8-NEXT: v_min_f64 v[2:3], v[0:1], v[16:17]
+; GFX8-NEXT: v_cmp_u_f64_e64 s[8:9], v[0:1], v[16:17]
+; GFX8-NEXT: v_mov_b32_e32 v34, 0x7ff80000
+; GFX8-NEXT: v_min_f64 v[20:21], v[6:7], v[22:23]
+; GFX8-NEXT: v_cmp_u_f64_e64 s[6:7], v[6:7], v[22:23]
+; GFX8-NEXT: v_min_f64 v[16:17], v[8:9], v[24:25]
+; GFX8-NEXT: v_cmp_u_f64_e64 s[10:11], v[8:9], v[24:25]
+; GFX8-NEXT: v_min_f64 v[22:23], v[10:11], v[26:27]
+; GFX8-NEXT: v_cmp_u_f64_e64 s[12:13], v[10:11], v[26:27]
+; GFX8-NEXT: v_min_f64 v[24:25], v[12:13], v[28:29]
+; GFX8-NEXT: v_cmp_u_f64_e64 s[14:15], v[12:13], v[28:29]
+; GFX8-NEXT: v_cndmask_b32_e64 v0, v2, 0, s[8:9]
+; GFX8-NEXT: v_cndmask_b32_e64 v1, v3, v34, s[8:9]
+; GFX8-NEXT: v_cndmask_b32_e64 v2, v32, 0, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v3, v33, v34, vcc
+; GFX8-NEXT: v_cndmask_b32_e64 v4, v18, 0, s[4:5]
+; GFX8-NEXT: v_cndmask_b32_e64 v5, v19, v34, s[4:5]
+; GFX8-NEXT: v_cndmask_b32_e64 v6, v20, 0, s[6:7]
+; GFX8-NEXT: v_cndmask_b32_e64 v7, v21, v34, s[6:7]
+; GFX8-NEXT: v_cndmask_b32_e64 v8, v16, 0, s[10:11]
+; GFX8-NEXT: v_cndmask_b32_e64 v9, v17, v34, s[10:11]
+; GFX8-NEXT: v_cndmask_b32_e64 v10, v22, 0, s[12:13]
+; GFX8-NEXT: v_cndmask_b32_e64 v11, v23, v34, s[12:13]
+; GFX8-NEXT: v_cndmask_b32_e64 v12, v24, 0, s[14:15]
+; GFX8-NEXT: v_cndmask_b32_e64 v13, v25, v34, s[14:15]
; GFX8-NEXT: s_waitcnt vmcnt(0)
-; GFX8-NEXT: v_cmp_o_f64_e64 s[4:5], v[14:15], v[30:31]
-; GFX8-NEXT: v_cndmask_b32_e64 v16, v29, v13, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v17, v32, v16, s[12:13]
-; GFX8-NEXT: v_cndmask_b32_e64 v16, v28, v12, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e32 v10, v18, v10, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v11, v19, v11, vcc
-; GFX8-NEXT: v_cmp_lt_f64_e32 vcc, v[14:15], v[30:31]
-; GFX8-NEXT: v_cndmask_b32_e64 v10, v10, v26, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v11, v11, v27, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v10, v18, v10, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v11, v19, v11, s[8:9]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[6:7], v[12:13], 32
-; GFX8-NEXT: v_cndmask_b32_e64 v16, 0, v16, s[12:13]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[8:9], v[28:29], 32
-; GFX8-NEXT: v_cndmask_b32_e32 v18, v31, v15, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v19, v32, v18, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e32 v18, v30, v14, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[14:15], 32
-; GFX8-NEXT: v_cndmask_b32_e64 v18, 0, v18, s[4:5]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[30:31], 32
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[10:11], 0, v[16:17]
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[18:19]
-; GFX8-NEXT: v_cndmask_b32_e64 v12, v16, v12, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v13, v17, v13, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v12, v12, v28, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e32 v14, v18, v14, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v15, v19, v15, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v14, v14, v30, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v13, v13, v29, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v15, v15, v31, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v12, v16, v12, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v13, v17, v13, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v14, v18, v14, s[12:13]
-; GFX8-NEXT: v_cndmask_b32_e64 v15, v19, v15, s[12:13]
+; GFX8-NEXT: v_min_f64 v[18:19], v[14:15], v[30:31]
+; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[14:15], v[30:31]
+; GFX8-NEXT: v_cndmask_b32_e64 v14, v18, 0, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v15, v19, v34, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_minimum_v8f64:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[16:17]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[4:5], v[0:1], v[16:17]
-; GFX9-NEXT: v_mov_b32_e32 v32, 0x7ff80000
-; GFX9-NEXT: v_cmp_o_f64_e64 s[6:7], v[4:5], v[20:21]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[8:9], v[8:9], v[24:25]
-; GFX9-NEXT: v_cmp_lt_f64_e64 s[10:11], v[12:13], v[28:29]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[12:13], v[12:13], v[28:29]
-; GFX9-NEXT: v_cndmask_b32_e32 v31, v17, v1, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v34, v32, v31, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e32 v31, v16, v0, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 32
-; GFX9-NEXT: v_cndmask_b32_e64 v33, 0, v31, s[4:5]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[4:5], v[2:3], v[18:19]
; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v33, v0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v34, v1, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[16:17], 32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v16, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v17, vcc
-; GFX9-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[33:34]
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v33, v0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v34, v1, vcc
-; GFX9-NEXT: v_cmp_lt_f64_e32 vcc, v[2:3], v[18:19]
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v19, v3, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v17, v32, v16, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v18, v2, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[2:3], 32
-; GFX9-NEXT: v_cndmask_b32_e64 v16, 0, v16, s[4:5]
-; GFX9-NEXT: v_cmp_lt_f64_e64 s[4:5], v[4:5], v[20:21]
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v16, v2, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v17, v3, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[18:19], 32
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v18, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v19, vcc
-; GFX9-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[16:17]
-; GFX9-NEXT: v_cndmask_b32_e64 v18, v21, v5, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v16, v2, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v17, v3, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[4:5], 32
-; GFX9-NEXT: v_cndmask_b32_e64 v16, v20, v4, s[4:5]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[20:21], 32
-; GFX9-NEXT: v_cndmask_b32_e64 v17, v32, v18, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v16, 0, v16, s[6:7]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[6:7], v[6:7], v[22:23]
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v16, v4, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v5, v17, v5, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v4, v20, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v5, v21, s[4:5]
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[4:5], 0, v[16:17]
-; GFX9-NEXT: v_cmp_lt_f64_e32 vcc, v[6:7], v[22:23]
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v16, v4, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v17, v5, s[4:5]
-; GFX9-NEXT: v_cmp_lt_f64_e64 s[4:5], v[8:9], v[24:25]
-; GFX9-NEXT: v_cndmask_b32_e32 v18, v23, v7, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v22, v6, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[6:7], 32
-; GFX9-NEXT: v_cndmask_b32_e64 v19, v32, v18, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v18, 0, v16, s[6:7]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[6:7], v[22:23], 32
-; GFX9-NEXT: v_cndmask_b32_e64 v16, v25, v9, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v17, v32, v16, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v16, v24, v8, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v18, v6, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v7, v19, v7, vcc
-; GFX9-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[18:19]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[8:9], 32
-; GFX9-NEXT: v_cndmask_b32_e64 v6, v6, v22, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v7, v7, v23, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v16, 0, v16, s[8:9]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[6:7], v[24:25], 32
-; GFX9-NEXT: v_cmp_o_f64_e64 s[8:9], v[10:11], v[26:27]
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v18, v6, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v8, v16, v8, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e32 v7, v19, v7, vcc
-; GFX9-NEXT: v_cmp_lt_f64_e32 vcc, v[10:11], v[26:27]
-; GFX9-NEXT: v_cndmask_b32_e64 v9, v17, v9, s[4:5]
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[4:5], 0, v[16:17]
-; GFX9-NEXT: v_cndmask_b32_e64 v8, v8, v24, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v9, v9, v25, s[6:7]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[6:7], v[26:27], 32
-; GFX9-NEXT: v_cndmask_b32_e32 v18, v27, v11, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v19, v32, v18, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v8, v16, v8, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v26, v10, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[10:11], 32
-; GFX9-NEXT: v_cndmask_b32_e64 v18, 0, v16, s[8:9]
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[8:9], 0, v[18:19]
-; GFX9-NEXT: v_cndmask_b32_e64 v9, v17, v9, s[4:5]
+; GFX9-NEXT: v_min_f64 v[32:33], v[2:3], v[18:19]
+; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[18:19]
+; GFX9-NEXT: v_min_f64 v[18:19], v[4:5], v[20:21]
+; GFX9-NEXT: v_cmp_u_f64_e64 s[4:5], v[4:5], v[20:21]
+; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], v[16:17]
+; GFX9-NEXT: v_cmp_u_f64_e64 s[8:9], v[0:1], v[16:17]
+; GFX9-NEXT: v_mov_b32_e32 v34, 0x7ff80000
+; GFX9-NEXT: v_min_f64 v[20:21], v[6:7], v[22:23]
+; GFX9-NEXT: v_cmp_u_f64_e64 s[6:7], v[6:7], v[22:23]
+; GFX9-NEXT: v_min_f64 v[16:17], v[8:9], v[24:25]
+; GFX9-NEXT: v_cmp_u_f64_e64 s[10:11], v[8:9], v[24:25]
+; GFX9-NEXT: v_min_f64 v[22:23], v[10:11], v[26:27]
+; GFX9-NEXT: v_cmp_u_f64_e64 s[12:13], v[10:11], v[26:27]
+; GFX9-NEXT: v_min_f64 v[24:25], v[12:13], v[28:29]
+; GFX9-NEXT: v_cmp_u_f64_e64 s[14:15], v[12:13], v[28:29]
+; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, s[8:9]
+; GFX9-NEXT: v_cndmask_b32_e64 v1, v3, v34, s[8:9]
+; GFX9-NEXT: v_cndmask_b32_e64 v2, v32, 0, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v3, v33, v34, vcc
+; GFX9-NEXT: v_cndmask_b32_e64 v4, v18, 0, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_e64 v5, v19, v34, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_e64 v6, v20, 0, s[6:7]
+; GFX9-NEXT: v_cndmask_b32_e64 v7, v21, v34, s[6:7]
+; GFX9-NEXT: v_cndmask_b32_e64 v8, v16, 0, s[10:11]
+; GFX9-NEXT: v_cndmask_b32_e64 v9, v17, v34, s[10:11]
+; GFX9-NEXT: v_cndmask_b32_e64 v10, v22, 0, s[12:13]
+; GFX9-NEXT: v_cndmask_b32_e64 v11, v23, v34, s[12:13]
+; GFX9-NEXT: v_cndmask_b32_e64 v12, v24, 0, s[14:15]
+; GFX9-NEXT: v_cndmask_b32_e64 v13, v25, v34, s[14:15]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_cmp_o_f64_e64 s[4:5], v[14:15], v[30:31]
-; GFX9-NEXT: v_cndmask_b32_e64 v16, v29, v13, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v17, v32, v16, s[12:13]
-; GFX9-NEXT: v_cndmask_b32_e64 v16, v28, v12, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e32 v10, v18, v10, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v11, v19, v11, vcc
-; GFX9-NEXT: v_cmp_lt_f64_e32 vcc, v[14:15], v[30:31]
-; GFX9-NEXT: v_cndmask_b32_e64 v10, v10, v26, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v11, v11, v27, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v10, v18, v10, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v11, v19, v11, s[8:9]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[6:7], v[12:13], 32
-; GFX9-NEXT: v_cndmask_b32_e64 v16, 0, v16, s[12:13]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[8:9], v[28:29], 32
-; GFX9-NEXT: v_cndmask_b32_e32 v18, v31, v15, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v19, v32, v18, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e32 v18, v30, v14, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[14:15], 32
-; GFX9-NEXT: v_cndmask_b32_e64 v18, 0, v18, s[4:5]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[30:31], 32
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[10:11], 0, v[16:17]
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[18:19]
-; GFX9-NEXT: v_cndmask_b32_e64 v12, v16, v12, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v13, v17, v13, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v12, v12, v28, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e32 v14, v18, v14, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v15, v19, v15, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v14, v14, v30, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v13, v13, v29, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v15, v15, v31, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v12, v16, v12, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v13, v17, v13, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v14, v18, v14, s[12:13]
-; GFX9-NEXT: v_cndmask_b32_e64 v15, v19, v15, s[12:13]
+; GFX9-NEXT: v_min_f64 v[18:19], v[14:15], v[30:31]
+; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[14:15], v[30:31]
+; GFX9-NEXT: v_cndmask_b32_e64 v14, v18, 0, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v15, v19, v34, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_minimum_v8f64:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX940-NEXT: scratch_load_dword v31, off, s32
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[16:17]
-; GFX940-NEXT: v_mov_b32_e32 v32, 0x7ff80000
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[0:1], v[16:17]
-; GFX940-NEXT: v_cndmask_b32_e32 v33, v17, v1, vcc
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v35, v32, v33, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v33, v16, v0, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v34, 0, v33, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 32
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[16:17], 32
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[34:35]
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v34, v0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v35, v1, vcc
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[2:3], v[18:19]
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v0, v16, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v1, v1, v17, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v19, v3, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[2:3], v[18:19]
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v34, v0, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v1, v35, v1, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v17, v32, v16, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v18, v2, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v16, 0, v16, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[2:3], 32
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[18:19], 32
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[16:17]
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v16, v2, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v2, v2, v18, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v17, v3, vcc
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[4:5], v[20:21]
-; GFX940-NEXT: v_cndmask_b32_e64 v2, v16, v2, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v3, v3, v19, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v21, v5, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[4:5], v[20:21]
-; GFX940-NEXT: v_cndmask_b32_e64 v3, v17, v3, s[2:3]
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v17, v32, v16, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v20, v4, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v16, 0, v16, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[4:5], 32
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[20:21], 32
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[16:17]
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v16, v4, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v4, v4, v20, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v17, v5, vcc
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[6:7], v[22:23]
-; GFX940-NEXT: v_cndmask_b32_e64 v4, v16, v4, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v5, v5, v21, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v23, v7, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[6:7], v[22:23]
-; GFX940-NEXT: v_cndmask_b32_e64 v5, v17, v5, s[2:3]
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v17, v32, v16, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v22, v6, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v16, 0, v16, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[6:7], 32
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[22:23], 32
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[16:17]
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v16, v6, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v6, v6, v22, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v7, v17, v7, vcc
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[8:9], v[24:25]
-; GFX940-NEXT: v_cndmask_b32_e64 v6, v16, v6, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v7, v7, v23, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v25, v9, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[8:9], v[24:25]
-; GFX940-NEXT: v_cndmask_b32_e64 v7, v17, v7, s[2:3]
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v17, v32, v16, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v24, v8, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v16, 0, v16, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[8:9], 32
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[24:25], 32
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[16:17]
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v16, v8, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v8, v8, v24, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v9, v17, v9, vcc
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[10:11], v[26:27]
-; GFX940-NEXT: v_cndmask_b32_e64 v8, v16, v8, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v9, v9, v25, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v27, v11, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[10:11], v[26:27]
-; GFX940-NEXT: v_cndmask_b32_e64 v9, v17, v9, s[2:3]
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v17, v32, v16, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v26, v10, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v16, 0, v16, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[10:11], 32
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[26:27], 32
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[16:17]
-; GFX940-NEXT: v_cndmask_b32_e32 v10, v16, v10, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v10, v10, v26, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v11, v17, v11, vcc
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[12:13], v[28:29]
-; GFX940-NEXT: v_cndmask_b32_e64 v10, v16, v10, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v11, v11, v27, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v29, v13, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[12:13], v[28:29]
-; GFX940-NEXT: v_cndmask_b32_e64 v11, v17, v11, s[2:3]
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v17, v32, v16, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v28, v12, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v16, 0, v16, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[12:13], 32
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[28:29], 32
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[16:17]
-; GFX940-NEXT: v_cndmask_b32_e32 v12, v16, v12, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v12, v12, v28, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v13, v17, v13, vcc
+; GFX940-NEXT: v_mov_b32_e32 v54, 0x7ff80000
+; GFX940-NEXT: v_min_f64 v[32:33], v[0:1], v[16:17]
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[16:17]
+; GFX940-NEXT: v_min_f64 v[34:35], v[2:3], v[18:19]
+; GFX940-NEXT: v_min_f64 v[36:37], v[4:5], v[20:21]
+; GFX940-NEXT: v_cndmask_b32_e64 v0, v32, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v33, v54, vcc
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[18:19]
+; GFX940-NEXT: v_min_f64 v[38:39], v[6:7], v[22:23]
+; GFX940-NEXT: v_min_f64 v[48:49], v[8:9], v[24:25]
+; GFX940-NEXT: v_cndmask_b32_e64 v2, v34, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v3, v35, v54, vcc
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[20:21]
+; GFX940-NEXT: v_min_f64 v[50:51], v[10:11], v[26:27]
+; GFX940-NEXT: v_min_f64 v[52:53], v[12:13], v[28:29]
+; GFX940-NEXT: v_cndmask_b32_e64 v4, v36, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v5, v37, v54, vcc
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[6:7], v[22:23]
; GFX940-NEXT: s_waitcnt vmcnt(0)
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[14:15], v[30:31]
-; GFX940-NEXT: v_cndmask_b32_e64 v12, v16, v12, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v13, v13, v29, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v31, v15, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[14:15], v[30:31]
-; GFX940-NEXT: v_cndmask_b32_e64 v13, v17, v13, s[2:3]
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v17, v32, v16, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v30, v14, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v16, 0, v16, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[14:15], 32
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[30:31], 32
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[16:17]
-; GFX940-NEXT: v_cndmask_b32_e32 v14, v16, v14, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v15, v17, v15, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v14, v14, v30, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v15, v15, v31, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v14, v16, v14, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v15, v17, v15, s[2:3]
+; GFX940-NEXT: v_min_f64 v[16:17], v[14:15], v[30:31]
+; GFX940-NEXT: v_cndmask_b32_e64 v6, v38, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v7, v39, v54, vcc
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[8:9], v[24:25]
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e64 v8, v48, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v9, v49, v54, vcc
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[10:11], v[26:27]
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e64 v10, v50, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v11, v51, v54, vcc
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[12:13], v[28:29]
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e64 v12, v52, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v13, v53, v54, vcc
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[14:15], v[30:31]
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e64 v14, v16, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v15, v17, v54, vcc
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v8f64:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: buffer_load_dword v31, off, s[0:3], s32
-; GFX10-NEXT: v_cmp_lt_f64_e32 vcc_lo, v[0:1], v[16:17]
-; GFX10-NEXT: v_cmp_o_f64_e64 s4, v[0:1], v[16:17]
-; GFX10-NEXT: v_cmp_lt_f64_e64 s9, v[6:7], v[22:23]
-; GFX10-NEXT: v_cmp_lt_f64_e64 s10, v[8:9], v[24:25]
-; GFX10-NEXT: v_cmp_lt_f64_e64 s11, v[10:11], v[26:27]
-; GFX10-NEXT: v_cmp_lt_f64_e64 s12, v[12:13], v[28:29]
-; GFX10-NEXT: v_cmp_o_f64_e64 s13, v[6:7], v[22:23]
-; GFX10-NEXT: v_cmp_o_f64_e64 s14, v[8:9], v[24:25]
-; GFX10-NEXT: v_cmp_o_f64_e64 s15, v[10:11], v[26:27]
-; GFX10-NEXT: v_cmp_o_f64_e64 s16, v[12:13], v[28:29]
-; GFX10-NEXT: v_cmp_lt_f64_e64 s5, v[2:3], v[18:19]
-; GFX10-NEXT: v_cmp_o_f64_e64 s6, v[2:3], v[18:19]
-; GFX10-NEXT: v_cmp_lt_f64_e64 s7, v[4:5], v[20:21]
-; GFX10-NEXT: v_cmp_o_f64_e64 s8, v[4:5], v[20:21]
-; GFX10-NEXT: v_cmp_class_f64_e64 s17, v[26:27], 32
-; GFX10-NEXT: v_cmp_class_f64_e64 s18, v[28:29], 32
-; GFX10-NEXT: v_cndmask_b32_e32 v32, v17, v1, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v38, v23, v7, s9
-; GFX10-NEXT: v_cndmask_b32_e64 v48, v25, v9, s10
-; GFX10-NEXT: v_cndmask_b32_e64 v50, v27, v11, s11
-; GFX10-NEXT: v_cndmask_b32_e64 v33, 0x7ff80000, v32, s4
-; GFX10-NEXT: v_cndmask_b32_e32 v32, v16, v0, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v52, v29, v13, s12
-; GFX10-NEXT: v_cmp_class_f64_e64 vcc_lo, v[0:1], 32
-; GFX10-NEXT: v_cndmask_b32_e64 v39, 0x7ff80000, v38, s13
-; GFX10-NEXT: v_cndmask_b32_e64 v49, 0x7ff80000, v48, s14
-; GFX10-NEXT: v_cndmask_b32_e64 v32, 0, v32, s4
-; GFX10-NEXT: v_cmp_class_f64_e64 s4, v[2:3], 32
-; GFX10-NEXT: v_cndmask_b32_e64 v51, 0x7ff80000, v50, s15
-; GFX10-NEXT: v_cndmask_b32_e64 v53, 0x7ff80000, v52, s16
-; GFX10-NEXT: v_cndmask_b32_e64 v38, v22, v6, s9
-; GFX10-NEXT: v_cndmask_b32_e64 v48, v24, v8, s10
-; GFX10-NEXT: v_cndmask_b32_e64 v50, v26, v10, s11
-; GFX10-NEXT: v_cndmask_b32_e64 v52, v28, v12, s12
-; GFX10-NEXT: v_cmp_class_f64_e64 s11, v[16:17], 32
-; GFX10-NEXT: v_cmp_class_f64_e64 s12, v[18:19], 32
-; GFX10-NEXT: v_cndmask_b32_e64 v34, v19, v3, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v38, 0, v38, s13
-; GFX10-NEXT: v_cndmask_b32_e64 v36, v21, v5, s7
-; GFX10-NEXT: v_cmp_class_f64_e64 s9, v[12:13], 32
-; GFX10-NEXT: v_cndmask_b32_e64 v48, 0, v48, s14
-; GFX10-NEXT: v_cndmask_b32_e64 v35, 0x7ff80000, v34, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v34, v18, v2, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v37, 0x7ff80000, v36, s8
-; GFX10-NEXT: v_cndmask_b32_e64 v36, v20, v4, s7
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v32, v0, vcc_lo
-; GFX10-NEXT: v_cmp_class_f64_e64 s5, v[4:5], 32
-; GFX10-NEXT: v_cndmask_b32_e64 v34, 0, v34, s6
-; GFX10-NEXT: v_cmp_class_f64_e64 s6, v[6:7], 32
-; GFX10-NEXT: v_cndmask_b32_e64 v36, 0, v36, s8
-; GFX10-NEXT: v_cmp_class_f64_e64 s7, v[8:9], 32
-; GFX10-NEXT: v_cmp_class_f64_e64 s8, v[10:11], 32
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v34, v2, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v50, 0, v50, s15
-; GFX10-NEXT: v_cndmask_b32_e64 v52, 0, v52, s16
-; GFX10-NEXT: v_cmp_class_f64_e64 s14, v[20:21], 32
-; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v16, s11
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, v18, s12
-; GFX10-NEXT: v_cmp_class_f64_e64 s15, v[22:23], 32
-; GFX10-NEXT: v_cmp_class_f64_e64 s16, v[24:25], 32
-; GFX10-NEXT: v_cmp_eq_f64_e64 s19, 0, v[32:33]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s20, 0, v[34:35]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s21, 0, v[36:37]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s22, 0, v[48:49]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s23, 0, v[50:51]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s24, 0, v[52:53]
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v33, v1, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v12, v52, v12, s9
-; GFX10-NEXT: v_cndmask_b32_e64 v4, v36, v4, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v35, v3, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v6, v38, v6, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v5, v37, v5, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v8, v48, v8, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v10, v50, v10, s8
-; GFX10-NEXT: v_cndmask_b32_e64 v7, v39, v7, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v9, v49, v9, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v11, v51, v11, s8
-; GFX10-NEXT: v_cndmask_b32_e64 v13, v53, v13, s9
-; GFX10-NEXT: v_cndmask_b32_e64 v4, v4, v20, s14
-; GFX10-NEXT: v_cndmask_b32_e64 v10, v10, v26, s17
-; GFX10-NEXT: v_cndmask_b32_e64 v6, v6, v22, s15
-; GFX10-NEXT: v_cndmask_b32_e64 v8, v8, v24, s16
-; GFX10-NEXT: v_cndmask_b32_e64 v12, v12, v28, s18
-; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, v17, s11
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, v19, s12
-; GFX10-NEXT: v_cndmask_b32_e64 v5, v5, v21, s14
-; GFX10-NEXT: v_cndmask_b32_e64 v7, v7, v23, s15
-; GFX10-NEXT: v_cndmask_b32_e64 v9, v9, v25, s16
-; GFX10-NEXT: v_cndmask_b32_e64 v11, v11, v27, s17
-; GFX10-NEXT: v_cndmask_b32_e64 v13, v13, v29, s18
-; GFX10-NEXT: v_cndmask_b32_e64 v0, v32, v0, s19
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v34, v2, s20
-; GFX10-NEXT: v_cndmask_b32_e64 v4, v36, v4, s21
-; GFX10-NEXT: v_cndmask_b32_e64 v8, v48, v8, s22
-; GFX10-NEXT: v_cndmask_b32_e64 v10, v50, v10, s23
-; GFX10-NEXT: v_cndmask_b32_e64 v12, v52, v12, s24
-; GFX10-NEXT: v_cndmask_b32_e64 v1, v33, v1, s19
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v35, v3, s20
-; GFX10-NEXT: v_cndmask_b32_e64 v5, v37, v5, s21
-; GFX10-NEXT: v_cndmask_b32_e64 v9, v49, v9, s22
-; GFX10-NEXT: v_cndmask_b32_e64 v11, v51, v11, s23
-; GFX10-NEXT: v_cndmask_b32_e64 v13, v53, v13, s24
+; GFX10-NEXT: v_min_f64 v[32:33], v[0:1], v[16:17]
+; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[16:17]
+; GFX10-NEXT: v_min_f64 v[16:17], v[2:3], v[18:19]
+; GFX10-NEXT: v_cmp_u_f64_e64 s4, v[2:3], v[18:19]
+; GFX10-NEXT: v_min_f64 v[18:19], v[4:5], v[20:21]
+; GFX10-NEXT: v_cmp_u_f64_e64 s5, v[4:5], v[20:21]
+; GFX10-NEXT: v_min_f64 v[20:21], v[6:7], v[22:23]
+; GFX10-NEXT: v_cmp_u_f64_e64 s6, v[6:7], v[22:23]
+; GFX10-NEXT: v_min_f64 v[22:23], v[8:9], v[24:25]
+; GFX10-NEXT: v_cmp_u_f64_e64 s7, v[8:9], v[24:25]
+; GFX10-NEXT: v_min_f64 v[24:25], v[10:11], v[26:27]
+; GFX10-NEXT: v_cmp_u_f64_e64 s8, v[10:11], v[26:27]
+; GFX10-NEXT: v_min_f64 v[26:27], v[12:13], v[28:29]
+; GFX10-NEXT: v_cmp_u_f64_e64 s9, v[12:13], v[28:29]
+; GFX10-NEXT: v_cndmask_b32_e64 v0, v32, 0, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e64 v1, v33, 0x7ff80000, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e64 v2, v16, 0, s4
+; GFX10-NEXT: v_cndmask_b32_e64 v3, v17, 0x7ff80000, s4
+; GFX10-NEXT: v_cndmask_b32_e64 v4, v18, 0, s5
+; GFX10-NEXT: v_cndmask_b32_e64 v5, v19, 0x7ff80000, s5
+; GFX10-NEXT: v_cndmask_b32_e64 v6, v20, 0, s6
+; GFX10-NEXT: v_cndmask_b32_e64 v7, v21, 0x7ff80000, s6
+; GFX10-NEXT: v_cndmask_b32_e64 v8, v22, 0, s7
+; GFX10-NEXT: v_cndmask_b32_e64 v9, v23, 0x7ff80000, s7
+; GFX10-NEXT: v_cndmask_b32_e64 v10, v24, 0, s8
+; GFX10-NEXT: v_cndmask_b32_e64 v11, v25, 0x7ff80000, s8
+; GFX10-NEXT: v_cndmask_b32_e64 v12, v26, 0, s9
+; GFX10-NEXT: v_cndmask_b32_e64 v13, v27, 0x7ff80000, s9
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: v_cmp_lt_f64_e64 s10, v[14:15], v[30:31]
-; GFX10-NEXT: v_cmp_o_f64_e64 s13, v[14:15], v[30:31]
-; GFX10-NEXT: v_cmp_class_f64_e64 s25, v[30:31], 32
-; GFX10-NEXT: v_cndmask_b32_e64 v16, v31, v15, s10
-; GFX10-NEXT: v_cndmask_b32_e64 v18, v30, v14, s10
-; GFX10-NEXT: v_cmp_eq_f64_e64 s10, 0, v[38:39]
-; GFX10-NEXT: v_cndmask_b32_e64 v55, 0x7ff80000, v16, s13
-; GFX10-NEXT: v_cndmask_b32_e64 v54, 0, v18, s13
-; GFX10-NEXT: v_cmp_class_f64_e64 s13, v[14:15], 32
-; GFX10-NEXT: v_cmp_eq_f64_e32 vcc_lo, 0, v[54:55]
-; GFX10-NEXT: v_cndmask_b32_e64 v6, v38, v6, s10
-; GFX10-NEXT: v_cndmask_b32_e64 v7, v39, v7, s10
-; GFX10-NEXT: v_cndmask_b32_e64 v14, v54, v14, s13
-; GFX10-NEXT: v_cndmask_b32_e64 v15, v55, v15, s13
-; GFX10-NEXT: v_cndmask_b32_e64 v14, v14, v30, s25
-; GFX10-NEXT: v_cndmask_b32_e64 v15, v15, v31, s25
-; GFX10-NEXT: v_cndmask_b32_e32 v14, v54, v14, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e32 v15, v55, v15, vcc_lo
+; GFX10-NEXT: v_min_f64 v[28:29], v[14:15], v[30:31]
+; GFX10-NEXT: v_cmp_u_f64_e64 s10, v[14:15], v[30:31]
+; GFX10-NEXT: v_cndmask_b32_e64 v14, v28, 0, s10
+; GFX10-NEXT: v_cndmask_b32_e64 v15, v29, 0x7ff80000, s10
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_minimum_v8f64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: scratch_load_b32 v31, off, s32
-; GFX11-NEXT: v_cmp_lt_f64_e64 s4, v[6:7], v[22:23]
-; GFX11-NEXT: v_cmp_o_f64_e64 s9, v[6:7], v[22:23]
-; GFX11-NEXT: v_cmp_lt_f64_e64 s1, v[2:3], v[18:19]
-; GFX11-NEXT: v_cmp_lt_f64_e64 s6, v[10:11], v[26:27]
-; GFX11-NEXT: v_cmp_lt_f64_e32 vcc_lo, v[0:1], v[16:17]
-; GFX11-NEXT: v_cmp_o_f64_e64 s2, v[2:3], v[18:19]
-; GFX11-NEXT: v_cmp_o_f64_e64 s11, v[10:11], v[26:27]
-; GFX11-NEXT: v_cmp_o_f64_e64 s0, v[0:1], v[16:17]
-; GFX11-NEXT: v_cmp_lt_f64_e64 s3, v[4:5], v[20:21]
-; GFX11-NEXT: v_cmp_lt_f64_e64 s5, v[8:9], v[24:25]
-; GFX11-NEXT: v_cmp_lt_f64_e64 s7, v[12:13], v[28:29]
-; GFX11-NEXT: v_cmp_o_f64_e64 s8, v[4:5], v[20:21]
-; GFX11-NEXT: v_cmp_o_f64_e64 s10, v[8:9], v[24:25]
-; GFX11-NEXT: v_cmp_o_f64_e64 s12, v[12:13], v[28:29]
-; GFX11-NEXT: v_cmp_class_f64_e64 s13, v[18:19], 32
-; GFX11-NEXT: v_cmp_class_f64_e64 s15, v[20:21], 32
-; GFX11-NEXT: v_cndmask_b32_e64 v38, v23, v7, s4
-; GFX11-NEXT: v_cndmask_b32_e64 v34, v19, v3, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v50, v27, v11, s6
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3)
-; GFX11-NEXT: v_cndmask_b32_e64 v39, 0x7ff80000, v38, s9
-; GFX11-NEXT: v_cndmask_b32_e64 v38, v22, v6, s4
-; GFX11-NEXT: v_cmp_class_f64_e64 s4, v[6:7], 32
-; GFX11-NEXT: v_cndmask_b32_e64 v35, 0x7ff80000, v34, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v51, 0x7ff80000, v50, s11
-; GFX11-NEXT: v_cndmask_b32_e64 v34, v18, v2, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v50, v26, v10, s6
-; GFX11-NEXT: v_cmp_class_f64_e64 s1, v[0:1], 32
-; GFX11-NEXT: v_cndmask_b32_e64 v36, v21, v5, s3
-; GFX11-NEXT: v_cndmask_b32_e64 v48, v25, v9, s5
-; GFX11-NEXT: v_cndmask_b32_e64 v52, v29, v13, s7
-; GFX11-NEXT: v_cndmask_b32_e64 v50, 0, v50, s11
-; GFX11-NEXT: v_cmp_class_f64_e64 s11, v[16:17], 32
-; GFX11-NEXT: v_cndmask_b32_e64 v37, 0x7ff80000, v36, s8
-; GFX11-NEXT: v_cndmask_b32_e64 v49, 0x7ff80000, v48, s10
-; GFX11-NEXT: v_cndmask_b32_e64 v53, 0x7ff80000, v52, s12
-; GFX11-NEXT: v_cndmask_b32_e64 v36, v20, v4, s3
-; GFX11-NEXT: v_cndmask_b32_e64 v48, v24, v8, s5
-; GFX11-NEXT: v_cndmask_b32_e64 v52, v28, v12, s7
-; GFX11-NEXT: v_cndmask_b32_e64 v34, 0, v34, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v38, 0, v38, s9
-; GFX11-NEXT: v_cmp_class_f64_e64 s2, v[2:3], 32
-; GFX11-NEXT: v_cmp_class_f64_e64 s3, v[4:5], 32
-; GFX11-NEXT: v_cmp_class_f64_e64 s5, v[8:9], 32
-; GFX11-NEXT: v_cmp_class_f64_e64 s7, v[10:11], 32
-; GFX11-NEXT: v_cmp_class_f64_e64 s9, v[12:13], 32
-; GFX11-NEXT: v_cndmask_b32_e64 v36, 0, v36, s8
-; GFX11-NEXT: v_cndmask_b32_e64 v48, 0, v48, s10
-; GFX11-NEXT: v_cndmask_b32_e64 v52, 0, v52, s12
-; GFX11-NEXT: v_cmp_class_f64_e64 s6, v[24:25], 32
-; GFX11-NEXT: v_cmp_class_f64_e64 s8, v[26:27], 32
-; GFX11-NEXT: v_cmp_class_f64_e64 s10, v[28:29], 32
-; GFX11-NEXT: v_cmp_eq_f64_e64 s14, 0, v[34:35]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s16, 0, v[36:37]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s17, 0, v[38:39]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s18, 0, v[48:49]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s20, 0, v[50:51]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s21, 0, v[52:53]
-; GFX11-NEXT: v_cndmask_b32_e64 v7, v39, v7, s4
-; GFX11-NEXT: v_cndmask_b32_e32 v32, v17, v1, vcc_lo
-; GFX11-NEXT: v_cndmask_b32_e64 v6, v38, v6, s4
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_cndmask_b32_e64 v33, 0x7ff80000, v32, s0
-; GFX11-NEXT: v_cndmask_b32_e32 v32, v16, v0, vcc_lo
-; GFX11-NEXT: v_cndmask_b32_e64 v1, v33, v1, s1
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_cndmask_b32_e64 v32, 0, v32, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, v17, s11
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT: v_cndmask_b32_e64 v0, v32, v0, s1
-; GFX11-NEXT: v_cmp_eq_f64_e64 s12, 0, v[32:33]
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v34, v2, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v4, v36, v4, s3
-; GFX11-NEXT: v_cndmask_b32_e64 v8, v48, v8, s5
-; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, v16, s11
-; GFX11-NEXT: v_cndmask_b32_e64 v10, v50, v10, s7
-; GFX11-NEXT: v_cndmask_b32_e64 v12, v52, v12, s9
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v35, v3, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v5, v37, v5, s3
-; GFX11-NEXT: v_cndmask_b32_e64 v9, v49, v9, s5
-; GFX11-NEXT: v_cndmask_b32_e64 v11, v51, v11, s7
-; GFX11-NEXT: v_cndmask_b32_e64 v13, v53, v13, s9
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, v18, s13
-; GFX11-NEXT: v_cndmask_b32_e64 v4, v4, v20, s15
-; GFX11-NEXT: v_cndmask_b32_e64 v8, v8, v24, s6
-; GFX11-NEXT: v_cndmask_b32_e64 v10, v10, v26, s8
-; GFX11-NEXT: v_cndmask_b32_e64 v12, v12, v28, s10
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v3, v19, s13
-; GFX11-NEXT: v_cndmask_b32_e64 v5, v5, v21, s15
-; GFX11-NEXT: v_cndmask_b32_e64 v9, v9, v25, s6
-; GFX11-NEXT: v_cndmask_b32_e64 v11, v11, v27, s8
-; GFX11-NEXT: v_cndmask_b32_e64 v13, v13, v29, s10
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v34, v2, s14
-; GFX11-NEXT: v_cndmask_b32_e64 v4, v36, v4, s16
-; GFX11-NEXT: v_cndmask_b32_e64 v8, v48, v8, s18
-; GFX11-NEXT: v_cndmask_b32_e64 v10, v50, v10, s20
-; GFX11-NEXT: v_cndmask_b32_e64 v12, v52, v12, s21
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v35, v3, s14
-; GFX11-NEXT: v_cndmask_b32_e64 v5, v37, v5, s16
-; GFX11-NEXT: v_cndmask_b32_e64 v9, v49, v9, s18
-; GFX11-NEXT: v_cndmask_b32_e64 v11, v51, v11, s20
-; GFX11-NEXT: v_cndmask_b32_e64 v13, v53, v13, s21
-; GFX11-NEXT: v_cndmask_b32_e64 v0, v32, v0, s12
-; GFX11-NEXT: v_cndmask_b32_e64 v1, v33, v1, s12
+; GFX11-NEXT: v_min_f64 v[32:33], v[0:1], v[16:17]
+; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[16:17]
+; GFX11-NEXT: v_min_f64 v[16:17], v[2:3], v[18:19]
+; GFX11-NEXT: v_cmp_u_f64_e64 s0, v[2:3], v[18:19]
+; GFX11-NEXT: v_min_f64 v[18:19], v[4:5], v[20:21]
+; GFX11-NEXT: v_cmp_u_f64_e64 s1, v[4:5], v[20:21]
+; GFX11-NEXT: v_min_f64 v[20:21], v[6:7], v[22:23]
+; GFX11-NEXT: v_cmp_u_f64_e64 s2, v[6:7], v[22:23]
+; GFX11-NEXT: v_min_f64 v[22:23], v[8:9], v[24:25]
+; GFX11-NEXT: v_cmp_u_f64_e64 s3, v[8:9], v[24:25]
+; GFX11-NEXT: v_min_f64 v[24:25], v[10:11], v[26:27]
+; GFX11-NEXT: v_cmp_u_f64_e64 s4, v[10:11], v[26:27]
+; GFX11-NEXT: v_min_f64 v[26:27], v[12:13], v[28:29]
+; GFX11-NEXT: v_cmp_u_f64_e64 s5, v[12:13], v[28:29]
+; GFX11-NEXT: v_cndmask_b32_e64 v0, v32, 0, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e64 v1, v33, 0x7ff80000, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e64 v2, v16, 0, s0
+; GFX11-NEXT: v_cndmask_b32_e64 v3, v17, 0x7ff80000, s0
+; GFX11-NEXT: v_cndmask_b32_e64 v4, v18, 0, s1
+; GFX11-NEXT: v_cndmask_b32_e64 v5, v19, 0x7ff80000, s1
+; GFX11-NEXT: v_cndmask_b32_e64 v6, v20, 0, s2
+; GFX11-NEXT: v_cndmask_b32_e64 v7, v21, 0x7ff80000, s2
+; GFX11-NEXT: v_cndmask_b32_e64 v8, v22, 0, s3
+; GFX11-NEXT: v_cndmask_b32_e64 v9, v23, 0x7ff80000, s3
+; GFX11-NEXT: v_cndmask_b32_e64 v10, v24, 0, s4
+; GFX11-NEXT: v_cndmask_b32_e64 v11, v25, 0x7ff80000, s4
+; GFX11-NEXT: v_cndmask_b32_e64 v12, v26, 0, s5
+; GFX11-NEXT: v_cndmask_b32_e64 v13, v27, 0x7ff80000, s5
; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: v_cmp_lt_f64_e32 vcc_lo, v[14:15], v[30:31]
-; GFX11-NEXT: v_cmp_o_f64_e64 s0, v[14:15], v[30:31]
-; GFX11-NEXT: v_cmp_class_f64_e64 s19, v[30:31], 32
-; GFX11-NEXT: v_cndmask_b32_e32 v54, v31, v15, vcc_lo
-; GFX11-NEXT: v_cndmask_b32_e32 v16, v30, v14, vcc_lo
-; GFX11-NEXT: v_cmp_class_f64_e64 vcc_lo, v[22:23], 32
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT: v_cndmask_b32_e64 v55, 0x7ff80000, v54, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v54, 0, v16, s0
-; GFX11-NEXT: v_cmp_class_f64_e64 s0, v[14:15], 32
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
-; GFX11-NEXT: v_cmp_eq_f64_e64 s22, 0, v[54:55]
-; GFX11-NEXT: v_dual_cndmask_b32 v7, v7, v23 :: v_dual_cndmask_b32 v6, v6, v22
-; GFX11-NEXT: v_cndmask_b32_e64 v14, v54, v14, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v15, v55, v15, s0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e64 v6, v38, v6, s17
-; GFX11-NEXT: v_cndmask_b32_e64 v7, v39, v7, s17
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e64 v14, v14, v30, s19
-; GFX11-NEXT: v_cndmask_b32_e64 v15, v15, v31, s19
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_cndmask_b32_e64 v14, v54, v14, s22
-; GFX11-NEXT: v_cndmask_b32_e64 v15, v55, v15, s22
+; GFX11-NEXT: v_min_f64 v[28:29], v[14:15], v[30:31]
+; GFX11-NEXT: v_cmp_u_f64_e64 s6, v[14:15], v[30:31]
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-NEXT: v_cndmask_b32_e64 v14, v28, 0, s6
+; GFX11-NEXT: v_cndmask_b32_e64 v15, v29, 0x7ff80000, s6
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_v8f64:
@@ -4279,1799 +2067,798 @@ define <16 x double> @v_minimum_v16f64(<16 x double> %src0, <16 x double> %src1)
; GFX7-LABEL: v_minimum_v16f64:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX7-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill
+; GFX7-NEXT: s_mov_b64 exec, s[4:5]
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:8
; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4
-; GFX7-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:16
-; GFX7-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:12
-; GFX7-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:24
-; GFX7-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:20
-; GFX7-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:32
-; GFX7-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:28
-; GFX7-NEXT: v_mov_b32_e32 v39, 0x7ff80000
-; GFX7-NEXT: s_waitcnt vmcnt(6)
-; GFX7-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[31:32]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[4:5], v[0:1], v[31:32]
-; GFX7-NEXT: s_waitcnt vmcnt(4)
-; GFX7-NEXT: v_cmp_o_f64_e64 s[6:7], v[2:3], v[33:34]
-; GFX7-NEXT: v_cndmask_b32_e32 v48, v32, v1, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v49, v39, v48, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e32 v48, v31, v0, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 32
-; GFX7-NEXT: v_cndmask_b32_e64 v48, 0, v48, s[4:5]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[31:32], 32
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v48, v0, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v49, v1, vcc
-; GFX7-NEXT: v_cmp_lt_f64_e32 vcc, v[2:3], v[33:34]
-; GFX7-NEXT: v_cndmask_b32_e64 v0, v0, v31, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v1, v1, v32, s[4:5]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[33:34], 32
-; GFX7-NEXT: v_cndmask_b32_e32 v50, v34, v3, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v31, v33, v2, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[2:3], 32
-; GFX7-NEXT: v_cndmask_b32_e64 v32, v39, v50, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v31, 0, v31, s[6:7]
-; GFX7-NEXT: s_waitcnt vmcnt(2)
-; GFX7-NEXT: v_cmp_o_f64_e64 s[6:7], v[4:5], v[35:36]
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v31, v2, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v32, v3, vcc
-; GFX7-NEXT: v_cmp_lt_f64_e32 vcc, v[4:5], v[35:36]
-; GFX7-NEXT: v_cndmask_b32_e64 v2, v2, v33, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v3, v3, v34, s[4:5]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[35:36], 32
-; GFX7-NEXT: v_cndmask_b32_e32 v50, v36, v5, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v33, v35, v4, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[4:5], 32
-; GFX7-NEXT: v_cndmask_b32_e64 v34, v39, v50, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v33, 0, v33, s[6:7]
+; GFX7-NEXT: v_writelane_b32 v34, s30, 0
+; GFX7-NEXT: v_writelane_b32 v34, s31, 1
+; GFX7-NEXT: v_writelane_b32 v34, s34, 2
+; GFX7-NEXT: v_writelane_b32 v34, s35, 3
; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_cmp_o_f64_e64 s[6:7], v[6:7], v[37:38]
-; GFX7-NEXT: v_cndmask_b32_e32 v4, v33, v4, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v5, v34, v5, vcc
-; GFX7-NEXT: v_cmp_lt_f64_e32 vcc, v[6:7], v[37:38]
-; GFX7-NEXT: v_cndmask_b32_e64 v4, v4, v35, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v5, v5, v36, s[4:5]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[37:38], 32
-; GFX7-NEXT: v_cndmask_b32_e32 v50, v38, v7, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v35, v37, v6, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[6:7], 32
-; GFX7-NEXT: v_cndmask_b32_e64 v36, v39, v50, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v35, 0, v35, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e32 v6, v35, v6, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v7, v36, v7, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v6, v6, v37, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v7, v7, v38, s[4:5]
-; GFX7-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:36
-; GFX7-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:40
-; GFX7-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[48:49]
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v48, v0, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v49, v1, vcc
-; GFX7-NEXT: buffer_load_dword v49, off, s[0:3], s32 offset:48
-; GFX7-NEXT: buffer_load_dword v48, off, s[0:3], s32 offset:44
-; GFX7-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[31:32]
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v31, v2, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v32, v3, vcc
+; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[31:32]
+; GFX7-NEXT: v_min_f64 v[0:1], v[0:1], v[31:32]
+; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:16
+; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:12
+; GFX7-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[31:32]
+; GFX7-NEXT: v_min_f64 v[2:3], v[2:3], v[31:32]
+; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:24
+; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:20
+; GFX7-NEXT: v_cndmask_b32_e64 v2, v2, 0, s[4:5]
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[31:32]
+; GFX7-NEXT: v_min_f64 v[4:5], v[4:5], v[31:32]
+; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:32
+; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:28
+; GFX7-NEXT: v_cndmask_b32_e64 v4, v4, 0, s[6:7]
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[31:32]
+; GFX7-NEXT: v_min_f64 v[6:7], v[6:7], v[31:32]
+; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:36
+; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:40
+; GFX7-NEXT: v_cndmask_b32_e64 v6, v6, 0, s[8:9]
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: v_cmp_u_f64_e64 s[10:11], v[8:9], v[31:32]
+; GFX7-NEXT: v_min_f64 v[8:9], v[8:9], v[31:32]
+; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:48
+; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:44
+; GFX7-NEXT: v_cndmask_b32_e64 v8, v8, 0, s[10:11]
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: v_cmp_u_f64_e64 s[12:13], v[10:11], v[31:32]
+; GFX7-NEXT: v_min_f64 v[10:11], v[10:11], v[31:32]
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:56
; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:52
-; GFX7-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[33:34]
-; GFX7-NEXT: v_cndmask_b32_e32 v4, v33, v4, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v5, v34, v5, vcc
-; GFX7-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:64
-; GFX7-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:60
-; GFX7-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[35:36]
-; GFX7-NEXT: v_cndmask_b32_e32 v6, v35, v6, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v7, v36, v7, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[8:9], 32
-; GFX7-NEXT: s_waitcnt vmcnt(6)
-; GFX7-NEXT: v_cmp_lt_f64_e64 s[4:5], v[8:9], v[37:38]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[6:7], v[8:9], v[37:38]
-; GFX7-NEXT: v_cndmask_b32_e64 v50, v38, v9, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v35, v37, v8, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v36, v39, v50, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v35, 0, v35, s[6:7]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[37:38], 32
-; GFX7-NEXT: v_cndmask_b32_e32 v8, v35, v8, vcc
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[6:7], 0, v[35:36]
-; GFX7-NEXT: v_cndmask_b32_e32 v9, v36, v9, vcc
-; GFX7-NEXT: s_waitcnt vmcnt(4)
-; GFX7-NEXT: v_cmp_lt_f64_e32 vcc, v[10:11], v[48:49]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[8:9], v[10:11], v[48:49]
-; GFX7-NEXT: v_cndmask_b32_e64 v8, v8, v37, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v9, v9, v38, s[4:5]
-; GFX7-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:68
-; GFX7-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:72
-; GFX7-NEXT: v_cndmask_b32_e64 v8, v35, v8, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e32 v50, v49, v11, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v35, v48, v10, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[10:11], 32
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[48:49], 32
-; GFX7-NEXT: v_cndmask_b32_e64 v51, v39, v50, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v50, 0, v35, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v9, v36, v9, s[6:7]
-; GFX7-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:80
-; GFX7-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:76
-; GFX7-NEXT: s_waitcnt vmcnt(6)
-; GFX7-NEXT: v_cmp_o_f64_e64 s[6:7], v[12:13], v[31:32]
-; GFX7-NEXT: v_cndmask_b32_e32 v10, v50, v10, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v11, v51, v11, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v10, v10, v48, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v11, v11, v49, s[4:5]
-; GFX7-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[50:51]
-; GFX7-NEXT: v_cmp_lt_f64_e64 s[4:5], v[12:13], v[31:32]
-; GFX7-NEXT: v_cndmask_b32_e32 v10, v50, v10, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v48, v32, v13, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e32 v11, v51, v11, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[12:13], 32
-; GFX7-NEXT: v_cndmask_b32_e64 v49, v39, v48, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v48, v31, v12, s[4:5]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[31:32], 32
-; GFX7-NEXT: v_cndmask_b32_e64 v48, 0, v48, s[6:7]
-; GFX7-NEXT: s_waitcnt vmcnt(4)
-; GFX7-NEXT: v_cmp_o_f64_e64 s[6:7], v[14:15], v[33:34]
-; GFX7-NEXT: v_cndmask_b32_e32 v12, v48, v12, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v13, v49, v13, vcc
-; GFX7-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[48:49]
-; GFX7-NEXT: v_cndmask_b32_e64 v12, v12, v31, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v13, v13, v32, s[4:5]
+; GFX7-NEXT: v_cndmask_b32_e64 v10, v10, 0, s[12:13]
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: v_cmp_u_f64_e64 s[14:15], v[12:13], v[31:32]
+; GFX7-NEXT: v_min_f64 v[12:13], v[12:13], v[31:32]
+; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:64
+; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:60
+; GFX7-NEXT: v_cndmask_b32_e64 v12, v12, 0, s[14:15]
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: v_cmp_u_f64_e64 s[16:17], v[14:15], v[31:32]
+; GFX7-NEXT: v_min_f64 v[14:15], v[14:15], v[31:32]
+; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:68
+; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:72
+; GFX7-NEXT: v_cndmask_b32_e64 v14, v14, 0, s[16:17]
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: v_cmp_u_f64_e64 s[18:19], v[16:17], v[31:32]
+; GFX7-NEXT: v_min_f64 v[16:17], v[16:17], v[31:32]
+; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:80
+; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:76
+; GFX7-NEXT: v_cndmask_b32_e64 v16, v16, 0, s[18:19]
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: v_cmp_u_f64_e64 s[20:21], v[18:19], v[31:32]
+; GFX7-NEXT: v_min_f64 v[18:19], v[18:19], v[31:32]
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:88
; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:84
-; GFX7-NEXT: v_cmp_lt_f64_e64 s[4:5], v[14:15], v[33:34]
-; GFX7-NEXT: v_cndmask_b32_e32 v12, v48, v12, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v13, v49, v13, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[14:15], 32
-; GFX7-NEXT: v_cndmask_b32_e64 v50, v34, v15, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v48, v33, v14, s[4:5]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[33:34], 32
-; GFX7-NEXT: v_cndmask_b32_e64 v49, v39, v50, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v48, 0, v48, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e32 v14, v48, v14, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v15, v49, v15, vcc
-; GFX7-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[48:49]
-; GFX7-NEXT: v_cndmask_b32_e64 v14, v14, v33, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v15, v15, v34, s[4:5]
-; GFX7-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:96
-; GFX7-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:92
-; GFX7-NEXT: v_cndmask_b32_e32 v14, v48, v14, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v15, v49, v15, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[16:17], 32
-; GFX7-NEXT: s_waitcnt vmcnt(6)
-; GFX7-NEXT: v_cmp_lt_f64_e64 s[4:5], v[16:17], v[37:38]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[6:7], v[16:17], v[37:38]
-; GFX7-NEXT: v_cndmask_b32_e64 v50, v38, v17, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v48, v37, v16, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v49, v39, v50, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v48, 0, v48, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v48, v16, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[37:38], 32
-; GFX7-NEXT: v_cndmask_b32_e32 v17, v49, v17, vcc
-; GFX7-NEXT: s_waitcnt vmcnt(4)
-; GFX7-NEXT: v_cmp_lt_f64_e32 vcc, v[18:19], v[35:36]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[6:7], v[18:19], v[35:36]
-; GFX7-NEXT: v_cndmask_b32_e64 v16, v16, v37, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v17, v17, v38, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e32 v50, v36, v19, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v37, v35, v18, vcc
-; GFX7-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[48:49]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[18:19], 32
-; GFX7-NEXT: v_cndmask_b32_e64 v51, v39, v50, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v50, 0, v37, s[6:7]
-; GFX7-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:100
-; GFX7-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:104
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v48, v16, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v17, v49, v17, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[35:36], 32
-; GFX7-NEXT: v_cndmask_b32_e64 v18, v50, v18, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v19, v51, v19, s[4:5]
-; GFX7-NEXT: s_waitcnt vmcnt(4)
-; GFX7-NEXT: v_cmp_lt_f64_e64 s[4:5], v[20:21], v[31:32]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[6:7], v[20:21], v[31:32]
-; GFX7-NEXT: v_cndmask_b32_e32 v18, v18, v35, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v19, v19, v36, vcc
-; GFX7-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[50:51]
-; GFX7-NEXT: v_cndmask_b32_e64 v48, v32, v21, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v35, v31, v20, s[4:5]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[20:21], 32
-; GFX7-NEXT: v_cndmask_b32_e64 v36, v39, v48, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v35, 0, v35, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e32 v18, v50, v18, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v19, v51, v19, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[31:32], 32
-; GFX7-NEXT: buffer_load_dword v49, off, s[0:3], s32 offset:112
-; GFX7-NEXT: buffer_load_dword v48, off, s[0:3], s32 offset:108
-; GFX7-NEXT: buffer_load_dword v51, off, s[0:3], s32 offset:120
-; GFX7-NEXT: buffer_load_dword v50, off, s[0:3], s32 offset:116
-; GFX7-NEXT: v_cndmask_b32_e64 v20, v35, v20, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v21, v36, v21, s[4:5]
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[4:5], 0, v[35:36]
-; GFX7-NEXT: s_waitcnt vmcnt(6)
-; GFX7-NEXT: v_cmp_lt_f64_e64 s[6:7], v[22:23], v[33:34]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[8:9], v[22:23], v[33:34]
-; GFX7-NEXT: v_cndmask_b32_e32 v20, v20, v31, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v21, v21, v32, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[22:23], 32
-; GFX7-NEXT: v_cndmask_b32_e64 v20, v35, v20, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v21, v36, v21, s[4:5]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[33:34], 32
-; GFX7-NEXT: v_cndmask_b32_e64 v31, v34, v23, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v36, v39, v31, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v31, v33, v22, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v35, 0, v31, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e32 v22, v35, v22, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v23, v36, v23, vcc
-; GFX7-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[35:36]
-; GFX7-NEXT: v_cndmask_b32_e64 v22, v22, v33, s[4:5]
+; GFX7-NEXT: v_cndmask_b32_e64 v18, v18, 0, s[20:21]
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: v_cmp_u_f64_e64 s[22:23], v[20:21], v[31:32]
+; GFX7-NEXT: v_min_f64 v[20:21], v[20:21], v[31:32]
+; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:96
+; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:92
+; GFX7-NEXT: v_cndmask_b32_e64 v20, v20, 0, s[22:23]
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: v_cmp_u_f64_e64 s[24:25], v[22:23], v[31:32]
+; GFX7-NEXT: v_min_f64 v[22:23], v[22:23], v[31:32]
+; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:100
+; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:104
+; GFX7-NEXT: v_cndmask_b32_e64 v22, v22, 0, s[24:25]
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: v_cmp_u_f64_e64 s[26:27], v[24:25], v[31:32]
+; GFX7-NEXT: v_min_f64 v[24:25], v[24:25], v[31:32]
+; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:112
+; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:108
+; GFX7-NEXT: v_cndmask_b32_e64 v24, v24, 0, s[26:27]
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: v_cmp_u_f64_e64 s[28:29], v[26:27], v[31:32]
+; GFX7-NEXT: v_min_f64 v[26:27], v[26:27], v[31:32]
+; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:120
+; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:116
+; GFX7-NEXT: v_cndmask_b32_e64 v26, v26, 0, s[28:29]
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: v_cmp_u_f64_e64 s[30:31], v[28:29], v[31:32]
+; GFX7-NEXT: v_min_f64 v[28:29], v[28:29], v[31:32]
; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32
; GFX7-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:128
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:124
-; GFX7-NEXT: v_cndmask_b32_e64 v23, v23, v34, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e32 v22, v35, v22, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v23, v36, v23, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[24:25], 32
-; GFX7-NEXT: s_waitcnt vmcnt(7)
-; GFX7-NEXT: v_cmp_lt_f64_e64 s[4:5], v[24:25], v[37:38]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[6:7], v[24:25], v[37:38]
-; GFX7-NEXT: v_cndmask_b32_e64 v34, v38, v25, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v35, v39, v34, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v34, v37, v24, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v34, 0, v34, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e32 v24, v34, v24, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v25, v35, v25, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[37:38], 32
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[4:5], 0, v[34:35]
-; GFX7-NEXT: v_cndmask_b32_e32 v24, v24, v37, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v25, v25, v38, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[26:27], 32
-; GFX7-NEXT: v_cndmask_b32_e64 v24, v34, v24, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v25, v35, v25, s[4:5]
-; GFX7-NEXT: s_waitcnt vmcnt(5)
-; GFX7-NEXT: v_cmp_lt_f64_e64 s[6:7], v[26:27], v[48:49]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[8:9], v[26:27], v[48:49]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[48:49], 32
-; GFX7-NEXT: s_waitcnt vmcnt(3)
-; GFX7-NEXT: v_cmp_o_f64_e64 s[10:11], v[28:29], v[50:51]
-; GFX7-NEXT: v_cndmask_b32_e64 v36, v49, v27, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v34, v48, v26, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v35, v39, v36, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v34, 0, v34, s[8:9]
-; GFX7-NEXT: v_cmp_lt_f64_e64 s[8:9], v[28:29], v[50:51]
-; GFX7-NEXT: v_cndmask_b32_e32 v26, v34, v26, vcc
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[6:7], 0, v[34:35]
-; GFX7-NEXT: v_cndmask_b32_e32 v27, v35, v27, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v26, v26, v48, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v27, v27, v49, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v36, v51, v29, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v27, v35, v27, s[6:7]
+; GFX7-NEXT: v_cndmask_b32_e64 v28, v28, 0, s[30:31]
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: v_cmp_u_f64_e64 s[34:35], v[30:31], v[32:33]
+; GFX7-NEXT: v_min_f64 v[30:31], v[30:31], v[32:33]
+; GFX7-NEXT: v_mov_b32_e32 v32, 0x7ff80000
+; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v32, vcc
+; GFX7-NEXT: v_cndmask_b32_e64 v3, v3, v32, s[4:5]
+; GFX7-NEXT: v_cndmask_b32_e64 v5, v5, v32, s[6:7]
+; GFX7-NEXT: v_cndmask_b32_e64 v7, v7, v32, s[8:9]
+; GFX7-NEXT: v_cndmask_b32_e64 v9, v9, v32, s[10:11]
+; GFX7-NEXT: v_cndmask_b32_e64 v11, v11, v32, s[12:13]
+; GFX7-NEXT: v_cndmask_b32_e64 v13, v13, v32, s[14:15]
+; GFX7-NEXT: v_cndmask_b32_e64 v15, v15, v32, s[16:17]
+; GFX7-NEXT: v_cndmask_b32_e64 v17, v17, v32, s[18:19]
+; GFX7-NEXT: v_cndmask_b32_e64 v19, v19, v32, s[20:21]
+; GFX7-NEXT: v_cndmask_b32_e64 v21, v21, v32, s[22:23]
+; GFX7-NEXT: v_cndmask_b32_e64 v23, v23, v32, s[24:25]
+; GFX7-NEXT: v_cndmask_b32_e64 v25, v25, v32, s[26:27]
+; GFX7-NEXT: v_cndmask_b32_e64 v27, v27, v32, s[28:29]
+; GFX7-NEXT: v_cndmask_b32_e64 v29, v29, v32, s[30:31]
+; GFX7-NEXT: v_cndmask_b32_e64 v31, v31, v32, s[34:35]
+; GFX7-NEXT: v_cndmask_b32_e64 v30, v30, 0, s[34:35]
+; GFX7-NEXT: v_readlane_b32 s35, v34, 3
+; GFX7-NEXT: v_readlane_b32 s34, v34, 2
+; GFX7-NEXT: v_readlane_b32 s31, v34, 1
+; GFX7-NEXT: v_readlane_b32 s30, v34, 0
+; GFX7-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX7-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload
+; GFX7-NEXT: s_mov_b64 exec, s[4:5]
; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_cmp_lt_f64_e32 vcc, v[30:31], v[32:33]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[4:5], v[30:31], v[32:33]
-; GFX7-NEXT: v_cndmask_b32_e64 v35, v39, v36, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v26, v34, v26, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v34, v50, v28, s[8:9]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[6:7], v[28:29], 32
-; GFX7-NEXT: v_cndmask_b32_e64 v34, 0, v34, s[10:11]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[8:9], v[50:51], 32
-; GFX7-NEXT: v_cndmask_b32_e32 v36, v33, v31, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v37, v39, v36, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e32 v36, v32, v30, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[30:31], 32
-; GFX7-NEXT: v_cndmask_b32_e64 v36, 0, v36, s[4:5]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[32:33], 32
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[10:11], 0, v[34:35]
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[36:37]
-; GFX7-NEXT: v_cndmask_b32_e64 v28, v34, v28, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v29, v35, v29, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v28, v28, v50, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e32 v30, v36, v30, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v31, v37, v31, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v30, v30, v32, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v29, v29, v51, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v31, v31, v33, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v28, v34, v28, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v29, v35, v29, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v30, v36, v30, s[12:13]
-; GFX7-NEXT: v_cndmask_b32_e64 v31, v37, v31, s[12:13]
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_minimum_v16f64:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX8-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill
+; GFX8-NEXT: s_mov_b64 exec, s[4:5]
; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:8
; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4
-; GFX8-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:16
-; GFX8-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:12
-; GFX8-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:24
-; GFX8-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:20
-; GFX8-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:32
-; GFX8-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:28
-; GFX8-NEXT: v_mov_b32_e32 v39, 0x7ff80000
-; GFX8-NEXT: s_waitcnt vmcnt(6)
-; GFX8-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[31:32]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[4:5], v[0:1], v[31:32]
-; GFX8-NEXT: s_waitcnt vmcnt(4)
-; GFX8-NEXT: v_cmp_o_f64_e64 s[6:7], v[2:3], v[33:34]
-; GFX8-NEXT: v_cndmask_b32_e32 v48, v32, v1, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v49, v39, v48, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e32 v48, v31, v0, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 32
-; GFX8-NEXT: v_cndmask_b32_e64 v48, 0, v48, s[4:5]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[31:32], 32
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v48, v0, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v49, v1, vcc
-; GFX8-NEXT: v_cmp_lt_f64_e32 vcc, v[2:3], v[33:34]
-; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, v31, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, v32, s[4:5]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[33:34], 32
-; GFX8-NEXT: v_cndmask_b32_e32 v50, v34, v3, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v31, v33, v2, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[2:3], 32
-; GFX8-NEXT: v_cndmask_b32_e64 v32, v39, v50, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v31, 0, v31, s[6:7]
-; GFX8-NEXT: s_waitcnt vmcnt(2)
-; GFX8-NEXT: v_cmp_o_f64_e64 s[6:7], v[4:5], v[35:36]
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v31, v2, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v32, v3, vcc
-; GFX8-NEXT: v_cmp_lt_f64_e32 vcc, v[4:5], v[35:36]
-; GFX8-NEXT: v_cndmask_b32_e64 v2, v2, v33, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v3, v3, v34, s[4:5]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[35:36], 32
-; GFX8-NEXT: v_cndmask_b32_e32 v50, v36, v5, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v33, v35, v4, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[4:5], 32
-; GFX8-NEXT: v_cndmask_b32_e64 v34, v39, v50, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v33, 0, v33, s[6:7]
+; GFX8-NEXT: v_writelane_b32 v34, s30, 0
+; GFX8-NEXT: v_writelane_b32 v34, s31, 1
+; GFX8-NEXT: v_writelane_b32 v34, s34, 2
+; GFX8-NEXT: v_writelane_b32 v34, s35, 3
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[31:32]
+; GFX8-NEXT: v_min_f64 v[0:1], v[0:1], v[31:32]
+; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:16
+; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:12
+; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[31:32]
+; GFX8-NEXT: v_min_f64 v[2:3], v[2:3], v[31:32]
+; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:24
+; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:20
+; GFX8-NEXT: v_cndmask_b32_e64 v2, v2, 0, s[4:5]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[31:32]
+; GFX8-NEXT: v_min_f64 v[4:5], v[4:5], v[31:32]
+; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:32
+; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:28
+; GFX8-NEXT: v_cndmask_b32_e64 v4, v4, 0, s[6:7]
; GFX8-NEXT: s_waitcnt vmcnt(0)
-; GFX8-NEXT: v_cmp_o_f64_e64 s[6:7], v[6:7], v[37:38]
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v33, v4, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v5, v34, v5, vcc
-; GFX8-NEXT: v_cmp_lt_f64_e32 vcc, v[6:7], v[37:38]
-; GFX8-NEXT: v_cndmask_b32_e64 v4, v4, v35, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v5, v5, v36, s[4:5]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[37:38], 32
-; GFX8-NEXT: v_cndmask_b32_e32 v50, v38, v7, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v35, v37, v6, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[6:7], 32
-; GFX8-NEXT: v_cndmask_b32_e64 v36, v39, v50, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v35, 0, v35, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e32 v6, v35, v6, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v7, v36, v7, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v6, v6, v37, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v7, v7, v38, s[4:5]
-; GFX8-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:36
-; GFX8-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:40
-; GFX8-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[48:49]
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v48, v0, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v49, v1, vcc
-; GFX8-NEXT: buffer_load_dword v49, off, s[0:3], s32 offset:48
-; GFX8-NEXT: buffer_load_dword v48, off, s[0:3], s32 offset:44
-; GFX8-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[31:32]
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v31, v2, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v32, v3, vcc
+; GFX8-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[31:32]
+; GFX8-NEXT: v_min_f64 v[6:7], v[6:7], v[31:32]
+; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:36
+; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:40
+; GFX8-NEXT: v_cndmask_b32_e64 v6, v6, 0, s[8:9]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: v_cmp_u_f64_e64 s[10:11], v[8:9], v[31:32]
+; GFX8-NEXT: v_min_f64 v[8:9], v[8:9], v[31:32]
+; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:48
+; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:44
+; GFX8-NEXT: v_cndmask_b32_e64 v8, v8, 0, s[10:11]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: v_cmp_u_f64_e64 s[12:13], v[10:11], v[31:32]
+; GFX8-NEXT: v_min_f64 v[10:11], v[10:11], v[31:32]
; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:56
; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:52
-; GFX8-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[33:34]
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v33, v4, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v5, v34, v5, vcc
-; GFX8-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:64
-; GFX8-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:60
-; GFX8-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[35:36]
-; GFX8-NEXT: v_cndmask_b32_e32 v6, v35, v6, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v7, v36, v7, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[8:9], 32
-; GFX8-NEXT: s_waitcnt vmcnt(6)
-; GFX8-NEXT: v_cmp_lt_f64_e64 s[4:5], v[8:9], v[37:38]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[6:7], v[8:9], v[37:38]
-; GFX8-NEXT: v_cndmask_b32_e64 v50, v38, v9, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v35, v37, v8, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v36, v39, v50, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v35, 0, v35, s[6:7]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[37:38], 32
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v35, v8, vcc
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[6:7], 0, v[35:36]
-; GFX8-NEXT: v_cndmask_b32_e32 v9, v36, v9, vcc
-; GFX8-NEXT: s_waitcnt vmcnt(4)
-; GFX8-NEXT: v_cmp_lt_f64_e32 vcc, v[10:11], v[48:49]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[8:9], v[10:11], v[48:49]
-; GFX8-NEXT: v_cndmask_b32_e64 v8, v8, v37, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v9, v9, v38, s[4:5]
-; GFX8-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:68
-; GFX8-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:72
-; GFX8-NEXT: v_cndmask_b32_e64 v8, v35, v8, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e32 v50, v49, v11, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v35, v48, v10, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[10:11], 32
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[48:49], 32
-; GFX8-NEXT: v_cndmask_b32_e64 v51, v39, v50, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v50, 0, v35, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v9, v36, v9, s[6:7]
-; GFX8-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:80
-; GFX8-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:76
-; GFX8-NEXT: s_waitcnt vmcnt(6)
-; GFX8-NEXT: v_cmp_o_f64_e64 s[6:7], v[12:13], v[31:32]
-; GFX8-NEXT: v_cndmask_b32_e32 v10, v50, v10, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v11, v51, v11, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v10, v10, v48, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v11, v11, v49, s[4:5]
-; GFX8-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[50:51]
-; GFX8-NEXT: v_cmp_lt_f64_e64 s[4:5], v[12:13], v[31:32]
-; GFX8-NEXT: v_cndmask_b32_e32 v10, v50, v10, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v48, v32, v13, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e32 v11, v51, v11, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[12:13], 32
-; GFX8-NEXT: v_cndmask_b32_e64 v49, v39, v48, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v48, v31, v12, s[4:5]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[31:32], 32
-; GFX8-NEXT: v_cndmask_b32_e64 v48, 0, v48, s[6:7]
-; GFX8-NEXT: s_waitcnt vmcnt(4)
-; GFX8-NEXT: v_cmp_o_f64_e64 s[6:7], v[14:15], v[33:34]
-; GFX8-NEXT: v_cndmask_b32_e32 v12, v48, v12, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v13, v49, v13, vcc
-; GFX8-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[48:49]
-; GFX8-NEXT: v_cndmask_b32_e64 v12, v12, v31, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v13, v13, v32, s[4:5]
+; GFX8-NEXT: v_cndmask_b32_e64 v10, v10, 0, s[12:13]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: v_cmp_u_f64_e64 s[14:15], v[12:13], v[31:32]
+; GFX8-NEXT: v_min_f64 v[12:13], v[12:13], v[31:32]
+; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:64
+; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:60
+; GFX8-NEXT: v_cndmask_b32_e64 v12, v12, 0, s[14:15]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: v_cmp_u_f64_e64 s[16:17], v[14:15], v[31:32]
+; GFX8-NEXT: v_min_f64 v[14:15], v[14:15], v[31:32]
+; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:68
+; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:72
+; GFX8-NEXT: v_cndmask_b32_e64 v14, v14, 0, s[16:17]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: v_cmp_u_f64_e64 s[18:19], v[16:17], v[31:32]
+; GFX8-NEXT: v_min_f64 v[16:17], v[16:17], v[31:32]
+; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:80
+; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:76
+; GFX8-NEXT: v_cndmask_b32_e64 v16, v16, 0, s[18:19]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: v_cmp_u_f64_e64 s[20:21], v[18:19], v[31:32]
+; GFX8-NEXT: v_min_f64 v[18:19], v[18:19], v[31:32]
; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:88
; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:84
-; GFX8-NEXT: v_cmp_lt_f64_e64 s[4:5], v[14:15], v[33:34]
-; GFX8-NEXT: v_cndmask_b32_e32 v12, v48, v12, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v13, v49, v13, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[14:15], 32
-; GFX8-NEXT: v_cndmask_b32_e64 v50, v34, v15, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v48, v33, v14, s[4:5]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[33:34], 32
-; GFX8-NEXT: v_cndmask_b32_e64 v49, v39, v50, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v48, 0, v48, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e32 v14, v48, v14, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v15, v49, v15, vcc
-; GFX8-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[48:49]
-; GFX8-NEXT: v_cndmask_b32_e64 v14, v14, v33, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v15, v15, v34, s[4:5]
-; GFX8-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:96
-; GFX8-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:92
-; GFX8-NEXT: v_cndmask_b32_e32 v14, v48, v14, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v15, v49, v15, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[16:17], 32
-; GFX8-NEXT: s_waitcnt vmcnt(6)
-; GFX8-NEXT: v_cmp_lt_f64_e64 s[4:5], v[16:17], v[37:38]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[6:7], v[16:17], v[37:38]
-; GFX8-NEXT: v_cndmask_b32_e64 v50, v38, v17, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v48, v37, v16, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v49, v39, v50, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v48, 0, v48, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v48, v16, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[37:38], 32
-; GFX8-NEXT: v_cndmask_b32_e32 v17, v49, v17, vcc
-; GFX8-NEXT: s_waitcnt vmcnt(4)
-; GFX8-NEXT: v_cmp_lt_f64_e32 vcc, v[18:19], v[35:36]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[6:7], v[18:19], v[35:36]
-; GFX8-NEXT: v_cndmask_b32_e64 v16, v16, v37, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v17, v17, v38, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e32 v50, v36, v19, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v37, v35, v18, vcc
-; GFX8-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[48:49]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[18:19], 32
-; GFX8-NEXT: v_cndmask_b32_e64 v51, v39, v50, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v50, 0, v37, s[6:7]
-; GFX8-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:100
-; GFX8-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:104
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v48, v16, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v17, v49, v17, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[35:36], 32
-; GFX8-NEXT: v_cndmask_b32_e64 v18, v50, v18, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v19, v51, v19, s[4:5]
-; GFX8-NEXT: s_waitcnt vmcnt(4)
-; GFX8-NEXT: v_cmp_lt_f64_e64 s[4:5], v[20:21], v[31:32]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[6:7], v[20:21], v[31:32]
-; GFX8-NEXT: v_cndmask_b32_e32 v18, v18, v35, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v19, v19, v36, vcc
-; GFX8-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[50:51]
-; GFX8-NEXT: v_cndmask_b32_e64 v48, v32, v21, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v35, v31, v20, s[4:5]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[20:21], 32
-; GFX8-NEXT: v_cndmask_b32_e64 v36, v39, v48, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v35, 0, v35, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e32 v18, v50, v18, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v19, v51, v19, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[31:32], 32
-; GFX8-NEXT: buffer_load_dword v49, off, s[0:3], s32 offset:112
-; GFX8-NEXT: buffer_load_dword v48, off, s[0:3], s32 offset:108
-; GFX8-NEXT: buffer_load_dword v51, off, s[0:3], s32 offset:120
-; GFX8-NEXT: buffer_load_dword v50, off, s[0:3], s32 offset:116
-; GFX8-NEXT: v_cndmask_b32_e64 v20, v35, v20, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v21, v36, v21, s[4:5]
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[4:5], 0, v[35:36]
-; GFX8-NEXT: s_waitcnt vmcnt(6)
-; GFX8-NEXT: v_cmp_lt_f64_e64 s[6:7], v[22:23], v[33:34]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[8:9], v[22:23], v[33:34]
-; GFX8-NEXT: v_cndmask_b32_e32 v20, v20, v31, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v21, v21, v32, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[22:23], 32
-; GFX8-NEXT: v_cndmask_b32_e64 v20, v35, v20, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v21, v36, v21, s[4:5]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[33:34], 32
-; GFX8-NEXT: v_cndmask_b32_e64 v31, v34, v23, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v36, v39, v31, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v31, v33, v22, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v35, 0, v31, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e32 v22, v35, v22, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v23, v36, v23, vcc
-; GFX8-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[35:36]
-; GFX8-NEXT: v_cndmask_b32_e64 v22, v22, v33, s[4:5]
+; GFX8-NEXT: v_cndmask_b32_e64 v18, v18, 0, s[20:21]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: v_cmp_u_f64_e64 s[22:23], v[20:21], v[31:32]
+; GFX8-NEXT: v_min_f64 v[20:21], v[20:21], v[31:32]
+; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:96
+; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:92
+; GFX8-NEXT: v_cndmask_b32_e64 v20, v20, 0, s[22:23]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: v_cmp_u_f64_e64 s[24:25], v[22:23], v[31:32]
+; GFX8-NEXT: v_min_f64 v[22:23], v[22:23], v[31:32]
+; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:100
+; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:104
+; GFX8-NEXT: v_cndmask_b32_e64 v22, v22, 0, s[24:25]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: v_cmp_u_f64_e64 s[26:27], v[24:25], v[31:32]
+; GFX8-NEXT: v_min_f64 v[24:25], v[24:25], v[31:32]
+; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:112
+; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:108
+; GFX8-NEXT: v_cndmask_b32_e64 v24, v24, 0, s[26:27]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: v_cmp_u_f64_e64 s[28:29], v[26:27], v[31:32]
+; GFX8-NEXT: v_min_f64 v[26:27], v[26:27], v[31:32]
+; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:120
+; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:116
+; GFX8-NEXT: v_cndmask_b32_e64 v26, v26, 0, s[28:29]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: v_cmp_u_f64_e64 s[30:31], v[28:29], v[31:32]
+; GFX8-NEXT: v_min_f64 v[28:29], v[28:29], v[31:32]
; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32
; GFX8-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:128
; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:124
-; GFX8-NEXT: v_cndmask_b32_e64 v23, v23, v34, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e32 v22, v35, v22, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v23, v36, v23, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[24:25], 32
-; GFX8-NEXT: s_waitcnt vmcnt(7)
-; GFX8-NEXT: v_cmp_lt_f64_e64 s[4:5], v[24:25], v[37:38]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[6:7], v[24:25], v[37:38]
-; GFX8-NEXT: v_cndmask_b32_e64 v34, v38, v25, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v35, v39, v34, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v34, v37, v24, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v34, 0, v34, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e32 v24, v34, v24, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v25, v35, v25, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[37:38], 32
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[4:5], 0, v[34:35]
-; GFX8-NEXT: v_cndmask_b32_e32 v24, v24, v37, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v25, v25, v38, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[26:27], 32
-; GFX8-NEXT: v_cndmask_b32_e64 v24, v34, v24, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v25, v35, v25, s[4:5]
-; GFX8-NEXT: s_waitcnt vmcnt(5)
-; GFX8-NEXT: v_cmp_lt_f64_e64 s[6:7], v[26:27], v[48:49]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[8:9], v[26:27], v[48:49]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[48:49], 32
-; GFX8-NEXT: s_waitcnt vmcnt(3)
-; GFX8-NEXT: v_cmp_o_f64_e64 s[10:11], v[28:29], v[50:51]
-; GFX8-NEXT: v_cndmask_b32_e64 v36, v49, v27, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v34, v48, v26, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v35, v39, v36, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v34, 0, v34, s[8:9]
-; GFX8-NEXT: v_cmp_lt_f64_e64 s[8:9], v[28:29], v[50:51]
-; GFX8-NEXT: v_cndmask_b32_e32 v26, v34, v26, vcc
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[6:7], 0, v[34:35]
-; GFX8-NEXT: v_cndmask_b32_e32 v27, v35, v27, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v26, v26, v48, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v27, v27, v49, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v36, v51, v29, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v27, v35, v27, s[6:7]
+; GFX8-NEXT: v_cndmask_b32_e64 v28, v28, 0, s[30:31]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: v_cmp_u_f64_e64 s[34:35], v[30:31], v[32:33]
+; GFX8-NEXT: v_min_f64 v[30:31], v[30:31], v[32:33]
+; GFX8-NEXT: v_mov_b32_e32 v32, 0x7ff80000
+; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v32, vcc
+; GFX8-NEXT: v_cndmask_b32_e64 v3, v3, v32, s[4:5]
+; GFX8-NEXT: v_cndmask_b32_e64 v5, v5, v32, s[6:7]
+; GFX8-NEXT: v_cndmask_b32_e64 v7, v7, v32, s[8:9]
+; GFX8-NEXT: v_cndmask_b32_e64 v9, v9, v32, s[10:11]
+; GFX8-NEXT: v_cndmask_b32_e64 v11, v11, v32, s[12:13]
+; GFX8-NEXT: v_cndmask_b32_e64 v13, v13, v32, s[14:15]
+; GFX8-NEXT: v_cndmask_b32_e64 v15, v15, v32, s[16:17]
+; GFX8-NEXT: v_cndmask_b32_e64 v17, v17, v32, s[18:19]
+; GFX8-NEXT: v_cndmask_b32_e64 v19, v19, v32, s[20:21]
+; GFX8-NEXT: v_cndmask_b32_e64 v21, v21, v32, s[22:23]
+; GFX8-NEXT: v_cndmask_b32_e64 v23, v23, v32, s[24:25]
+; GFX8-NEXT: v_cndmask_b32_e64 v25, v25, v32, s[26:27]
+; GFX8-NEXT: v_cndmask_b32_e64 v27, v27, v32, s[28:29]
+; GFX8-NEXT: v_cndmask_b32_e64 v29, v29, v32, s[30:31]
+; GFX8-NEXT: v_cndmask_b32_e64 v31, v31, v32, s[34:35]
+; GFX8-NEXT: v_cndmask_b32_e64 v30, v30, 0, s[34:35]
+; GFX8-NEXT: v_readlane_b32 s35, v34, 3
+; GFX8-NEXT: v_readlane_b32 s34, v34, 2
+; GFX8-NEXT: v_readlane_b32 s31, v34, 1
+; GFX8-NEXT: v_readlane_b32 s30, v34, 0
+; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX8-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload
+; GFX8-NEXT: s_mov_b64 exec, s[4:5]
; GFX8-NEXT: s_waitcnt vmcnt(0)
-; GFX8-NEXT: v_cmp_lt_f64_e32 vcc, v[30:31], v[32:33]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[4:5], v[30:31], v[32:33]
-; GFX8-NEXT: v_cndmask_b32_e64 v35, v39, v36, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v26, v34, v26, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v34, v50, v28, s[8:9]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[6:7], v[28:29], 32
-; GFX8-NEXT: v_cndmask_b32_e64 v34, 0, v34, s[10:11]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[8:9], v[50:51], 32
-; GFX8-NEXT: v_cndmask_b32_e32 v36, v33, v31, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v37, v39, v36, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e32 v36, v32, v30, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[30:31], 32
-; GFX8-NEXT: v_cndmask_b32_e64 v36, 0, v36, s[4:5]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[32:33], 32
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[10:11], 0, v[34:35]
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[36:37]
-; GFX8-NEXT: v_cndmask_b32_e64 v28, v34, v28, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v29, v35, v29, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v28, v28, v50, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e32 v30, v36, v30, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v31, v37, v31, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v30, v30, v32, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v29, v29, v51, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v31, v31, v33, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v28, v34, v28, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v29, v35, v29, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v30, v36, v30, s[12:13]
-; GFX8-NEXT: v_cndmask_b32_e64 v31, v37, v31, s[12:13]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_minimum_v16f64:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill
+; GFX9-NEXT: s_mov_b64 exec, s[4:5]
; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:8
; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4
-; GFX9-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:16
-; GFX9-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:12
-; GFX9-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:24
-; GFX9-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:20
-; GFX9-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:32
-; GFX9-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:28
-; GFX9-NEXT: v_mov_b32_e32 v39, 0x7ff80000
-; GFX9-NEXT: s_waitcnt vmcnt(6)
-; GFX9-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[31:32]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[4:5], v[0:1], v[31:32]
-; GFX9-NEXT: s_waitcnt vmcnt(4)
-; GFX9-NEXT: v_cmp_o_f64_e64 s[6:7], v[2:3], v[33:34]
-; GFX9-NEXT: v_cndmask_b32_e32 v48, v32, v1, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v49, v39, v48, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e32 v48, v31, v0, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 32
-; GFX9-NEXT: v_cndmask_b32_e64 v48, 0, v48, s[4:5]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[31:32], 32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v48, v0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v49, v1, vcc
-; GFX9-NEXT: v_cmp_lt_f64_e32 vcc, v[2:3], v[33:34]
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, v31, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v32, s[4:5]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[33:34], 32
-; GFX9-NEXT: v_cndmask_b32_e32 v50, v34, v3, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v31, v33, v2, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[2:3], 32
-; GFX9-NEXT: v_cndmask_b32_e64 v32, v39, v50, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v31, 0, v31, s[6:7]
-; GFX9-NEXT: s_waitcnt vmcnt(2)
-; GFX9-NEXT: v_cmp_o_f64_e64 s[6:7], v[4:5], v[35:36]
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v31, v2, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v32, v3, vcc
-; GFX9-NEXT: v_cmp_lt_f64_e32 vcc, v[4:5], v[35:36]
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, v33, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v3, v34, s[4:5]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[35:36], 32
-; GFX9-NEXT: v_cndmask_b32_e32 v50, v36, v5, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v33, v35, v4, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[4:5], 32
-; GFX9-NEXT: v_cndmask_b32_e64 v34, v39, v50, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v33, 0, v33, s[6:7]
+; GFX9-NEXT: v_writelane_b32 v34, s30, 0
+; GFX9-NEXT: v_writelane_b32 v34, s31, 1
+; GFX9-NEXT: v_writelane_b32 v34, s34, 2
+; GFX9-NEXT: v_writelane_b32 v34, s35, 3
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[31:32]
+; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[31:32]
+; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:16
+; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:12
+; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_cmp_o_f64_e64 s[6:7], v[6:7], v[37:38]
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v33, v4, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v5, v34, v5, vcc
-; GFX9-NEXT: v_cmp_lt_f64_e32 vcc, v[6:7], v[37:38]
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v4, v35, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v5, v36, s[4:5]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[37:38], 32
-; GFX9-NEXT: v_cndmask_b32_e32 v50, v38, v7, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v35, v37, v6, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[6:7], 32
-; GFX9-NEXT: v_cndmask_b32_e64 v36, v39, v50, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v35, 0, v35, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v35, v6, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v7, v36, v7, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v6, v6, v37, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v7, v7, v38, s[4:5]
-; GFX9-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:36
-; GFX9-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:40
-; GFX9-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[48:49]
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v48, v0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v49, v1, vcc
-; GFX9-NEXT: buffer_load_dword v49, off, s[0:3], s32 offset:48
-; GFX9-NEXT: buffer_load_dword v48, off, s[0:3], s32 offset:44
-; GFX9-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[31:32]
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v31, v2, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v32, v3, vcc
+; GFX9-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[31:32]
+; GFX9-NEXT: v_min_f64 v[2:3], v[2:3], v[31:32]
+; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:24
+; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:20
+; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, 0, s[4:5]
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[31:32]
+; GFX9-NEXT: v_min_f64 v[4:5], v[4:5], v[31:32]
+; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:32
+; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:28
+; GFX9-NEXT: v_cndmask_b32_e64 v4, v4, 0, s[6:7]
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[31:32]
+; GFX9-NEXT: v_min_f64 v[6:7], v[6:7], v[31:32]
+; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:36
+; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:40
+; GFX9-NEXT: v_cndmask_b32_e64 v6, v6, 0, s[8:9]
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_cmp_u_f64_e64 s[10:11], v[8:9], v[31:32]
+; GFX9-NEXT: v_min_f64 v[8:9], v[8:9], v[31:32]
+; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:48
+; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:44
+; GFX9-NEXT: v_cndmask_b32_e64 v8, v8, 0, s[10:11]
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_cmp_u_f64_e64 s[12:13], v[10:11], v[31:32]
+; GFX9-NEXT: v_min_f64 v[10:11], v[10:11], v[31:32]
; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:56
; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:52
-; GFX9-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[33:34]
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v33, v4, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v5, v34, v5, vcc
-; GFX9-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:64
-; GFX9-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:60
-; GFX9-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[35:36]
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v35, v6, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v7, v36, v7, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[8:9], 32
-; GFX9-NEXT: s_waitcnt vmcnt(6)
-; GFX9-NEXT: v_cmp_lt_f64_e64 s[4:5], v[8:9], v[37:38]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[6:7], v[8:9], v[37:38]
-; GFX9-NEXT: v_cndmask_b32_e64 v50, v38, v9, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v35, v37, v8, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v36, v39, v50, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v35, 0, v35, s[6:7]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[37:38], 32
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v35, v8, vcc
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[6:7], 0, v[35:36]
-; GFX9-NEXT: v_cndmask_b32_e32 v9, v36, v9, vcc
-; GFX9-NEXT: s_waitcnt vmcnt(4)
-; GFX9-NEXT: v_cmp_lt_f64_e32 vcc, v[10:11], v[48:49]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[8:9], v[10:11], v[48:49]
-; GFX9-NEXT: v_cndmask_b32_e64 v8, v8, v37, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v9, v9, v38, s[4:5]
-; GFX9-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:68
-; GFX9-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:72
-; GFX9-NEXT: v_cndmask_b32_e64 v8, v35, v8, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e32 v50, v49, v11, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v35, v48, v10, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[10:11], 32
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[48:49], 32
-; GFX9-NEXT: v_cndmask_b32_e64 v51, v39, v50, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v50, 0, v35, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v9, v36, v9, s[6:7]
-; GFX9-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:80
-; GFX9-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:76
-; GFX9-NEXT: s_waitcnt vmcnt(6)
-; GFX9-NEXT: v_cmp_o_f64_e64 s[6:7], v[12:13], v[31:32]
-; GFX9-NEXT: v_cndmask_b32_e32 v10, v50, v10, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v11, v51, v11, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v10, v10, v48, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v11, v11, v49, s[4:5]
-; GFX9-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[50:51]
-; GFX9-NEXT: v_cmp_lt_f64_e64 s[4:5], v[12:13], v[31:32]
-; GFX9-NEXT: v_cndmask_b32_e32 v10, v50, v10, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v48, v32, v13, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e32 v11, v51, v11, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[12:13], 32
-; GFX9-NEXT: v_cndmask_b32_e64 v49, v39, v48, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v48, v31, v12, s[4:5]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[31:32], 32
-; GFX9-NEXT: v_cndmask_b32_e64 v48, 0, v48, s[6:7]
-; GFX9-NEXT: s_waitcnt vmcnt(4)
-; GFX9-NEXT: v_cmp_o_f64_e64 s[6:7], v[14:15], v[33:34]
-; GFX9-NEXT: v_cndmask_b32_e32 v12, v48, v12, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v13, v49, v13, vcc
-; GFX9-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[48:49]
-; GFX9-NEXT: v_cndmask_b32_e64 v12, v12, v31, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v13, v13, v32, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_e64 v10, v10, 0, s[12:13]
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_cmp_u_f64_e64 s[14:15], v[12:13], v[31:32]
+; GFX9-NEXT: v_min_f64 v[12:13], v[12:13], v[31:32]
+; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:64
+; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:60
+; GFX9-NEXT: v_cndmask_b32_e64 v12, v12, 0, s[14:15]
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_cmp_u_f64_e64 s[16:17], v[14:15], v[31:32]
+; GFX9-NEXT: v_min_f64 v[14:15], v[14:15], v[31:32]
+; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:68
+; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:72
+; GFX9-NEXT: v_cndmask_b32_e64 v14, v14, 0, s[16:17]
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_cmp_u_f64_e64 s[18:19], v[16:17], v[31:32]
+; GFX9-NEXT: v_min_f64 v[16:17], v[16:17], v[31:32]
+; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:80
+; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:76
+; GFX9-NEXT: v_cndmask_b32_e64 v16, v16, 0, s[18:19]
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_cmp_u_f64_e64 s[20:21], v[18:19], v[31:32]
+; GFX9-NEXT: v_min_f64 v[18:19], v[18:19], v[31:32]
; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:88
; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:84
-; GFX9-NEXT: v_cmp_lt_f64_e64 s[4:5], v[14:15], v[33:34]
-; GFX9-NEXT: v_cndmask_b32_e32 v12, v48, v12, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v13, v49, v13, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[14:15], 32
-; GFX9-NEXT: v_cndmask_b32_e64 v50, v34, v15, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v48, v33, v14, s[4:5]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[33:34], 32
-; GFX9-NEXT: v_cndmask_b32_e64 v49, v39, v50, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v48, 0, v48, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e32 v14, v48, v14, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v15, v49, v15, vcc
-; GFX9-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[48:49]
-; GFX9-NEXT: v_cndmask_b32_e64 v14, v14, v33, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v15, v15, v34, s[4:5]
-; GFX9-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:96
-; GFX9-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:92
-; GFX9-NEXT: v_cndmask_b32_e32 v14, v48, v14, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v15, v49, v15, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[16:17], 32
-; GFX9-NEXT: s_waitcnt vmcnt(6)
-; GFX9-NEXT: v_cmp_lt_f64_e64 s[4:5], v[16:17], v[37:38]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[6:7], v[16:17], v[37:38]
-; GFX9-NEXT: v_cndmask_b32_e64 v50, v38, v17, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v48, v37, v16, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v49, v39, v50, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v48, 0, v48, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v48, v16, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[37:38], 32
-; GFX9-NEXT: v_cndmask_b32_e32 v17, v49, v17, vcc
-; GFX9-NEXT: s_waitcnt vmcnt(4)
-; GFX9-NEXT: v_cmp_lt_f64_e32 vcc, v[18:19], v[35:36]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[6:7], v[18:19], v[35:36]
-; GFX9-NEXT: v_cndmask_b32_e64 v16, v16, v37, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v17, v17, v38, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e32 v50, v36, v19, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v37, v35, v18, vcc
-; GFX9-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[48:49]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[18:19], 32
-; GFX9-NEXT: v_cndmask_b32_e64 v51, v39, v50, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v50, 0, v37, s[6:7]
-; GFX9-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:100
-; GFX9-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:104
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v48, v16, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v17, v49, v17, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[35:36], 32
-; GFX9-NEXT: v_cndmask_b32_e64 v18, v50, v18, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v19, v51, v19, s[4:5]
-; GFX9-NEXT: s_waitcnt vmcnt(4)
-; GFX9-NEXT: v_cmp_lt_f64_e64 s[4:5], v[20:21], v[31:32]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[6:7], v[20:21], v[31:32]
-; GFX9-NEXT: v_cndmask_b32_e32 v18, v18, v35, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v19, v19, v36, vcc
-; GFX9-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[50:51]
-; GFX9-NEXT: v_cndmask_b32_e64 v48, v32, v21, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v35, v31, v20, s[4:5]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[20:21], 32
-; GFX9-NEXT: v_cndmask_b32_e64 v36, v39, v48, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v35, 0, v35, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e32 v18, v50, v18, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v19, v51, v19, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[31:32], 32
-; GFX9-NEXT: buffer_load_dword v49, off, s[0:3], s32 offset:112
-; GFX9-NEXT: buffer_load_dword v48, off, s[0:3], s32 offset:108
-; GFX9-NEXT: buffer_load_dword v51, off, s[0:3], s32 offset:120
-; GFX9-NEXT: buffer_load_dword v50, off, s[0:3], s32 offset:116
-; GFX9-NEXT: v_cndmask_b32_e64 v20, v35, v20, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v21, v36, v21, s[4:5]
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[4:5], 0, v[35:36]
-; GFX9-NEXT: s_waitcnt vmcnt(6)
-; GFX9-NEXT: v_cmp_lt_f64_e64 s[6:7], v[22:23], v[33:34]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[8:9], v[22:23], v[33:34]
-; GFX9-NEXT: v_cndmask_b32_e32 v20, v20, v31, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v21, v21, v32, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[22:23], 32
-; GFX9-NEXT: v_cndmask_b32_e64 v20, v35, v20, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v21, v36, v21, s[4:5]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[33:34], 32
-; GFX9-NEXT: v_cndmask_b32_e64 v31, v34, v23, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v36, v39, v31, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v31, v33, v22, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v35, 0, v31, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e32 v22, v35, v22, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v23, v36, v23, vcc
-; GFX9-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[35:36]
-; GFX9-NEXT: v_cndmask_b32_e64 v22, v22, v33, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_e64 v18, v18, 0, s[20:21]
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_cmp_u_f64_e64 s[22:23], v[20:21], v[31:32]
+; GFX9-NEXT: v_min_f64 v[20:21], v[20:21], v[31:32]
+; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:96
+; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:92
+; GFX9-NEXT: v_cndmask_b32_e64 v20, v20, 0, s[22:23]
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_cmp_u_f64_e64 s[24:25], v[22:23], v[31:32]
+; GFX9-NEXT: v_min_f64 v[22:23], v[22:23], v[31:32]
+; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:100
+; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:104
+; GFX9-NEXT: v_cndmask_b32_e64 v22, v22, 0, s[24:25]
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_cmp_u_f64_e64 s[26:27], v[24:25], v[31:32]
+; GFX9-NEXT: v_min_f64 v[24:25], v[24:25], v[31:32]
+; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:112
+; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:108
+; GFX9-NEXT: v_cndmask_b32_e64 v24, v24, 0, s[26:27]
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_cmp_u_f64_e64 s[28:29], v[26:27], v[31:32]
+; GFX9-NEXT: v_min_f64 v[26:27], v[26:27], v[31:32]
+; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:120
+; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:116
+; GFX9-NEXT: v_cndmask_b32_e64 v26, v26, 0, s[28:29]
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_cmp_u_f64_e64 s[30:31], v[28:29], v[31:32]
+; GFX9-NEXT: v_min_f64 v[28:29], v[28:29], v[31:32]
; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32
; GFX9-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:128
; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:124
-; GFX9-NEXT: v_cndmask_b32_e64 v23, v23, v34, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e32 v22, v35, v22, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v23, v36, v23, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[24:25], 32
-; GFX9-NEXT: s_waitcnt vmcnt(7)
-; GFX9-NEXT: v_cmp_lt_f64_e64 s[4:5], v[24:25], v[37:38]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[6:7], v[24:25], v[37:38]
-; GFX9-NEXT: v_cndmask_b32_e64 v34, v38, v25, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v35, v39, v34, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v34, v37, v24, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v34, 0, v34, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e32 v24, v34, v24, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v25, v35, v25, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[37:38], 32
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[4:5], 0, v[34:35]
-; GFX9-NEXT: v_cndmask_b32_e32 v24, v24, v37, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v25, v25, v38, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[26:27], 32
-; GFX9-NEXT: v_cndmask_b32_e64 v24, v34, v24, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v25, v35, v25, s[4:5]
-; GFX9-NEXT: s_waitcnt vmcnt(5)
-; GFX9-NEXT: v_cmp_lt_f64_e64 s[6:7], v[26:27], v[48:49]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[8:9], v[26:27], v[48:49]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[48:49], 32
-; GFX9-NEXT: s_waitcnt vmcnt(3)
-; GFX9-NEXT: v_cmp_o_f64_e64 s[10:11], v[28:29], v[50:51]
-; GFX9-NEXT: v_cndmask_b32_e64 v36, v49, v27, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v34, v48, v26, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v35, v39, v36, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v34, 0, v34, s[8:9]
-; GFX9-NEXT: v_cmp_lt_f64_e64 s[8:9], v[28:29], v[50:51]
-; GFX9-NEXT: v_cndmask_b32_e32 v26, v34, v26, vcc
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[6:7], 0, v[34:35]
-; GFX9-NEXT: v_cndmask_b32_e32 v27, v35, v27, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v26, v26, v48, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v27, v27, v49, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v36, v51, v29, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v27, v35, v27, s[6:7]
+; GFX9-NEXT: v_cndmask_b32_e64 v28, v28, 0, s[30:31]
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_cmp_u_f64_e64 s[34:35], v[30:31], v[32:33]
+; GFX9-NEXT: v_min_f64 v[30:31], v[30:31], v[32:33]
+; GFX9-NEXT: v_mov_b32_e32 v32, 0x7ff80000
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v32, vcc
+; GFX9-NEXT: v_cndmask_b32_e64 v3, v3, v32, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_e64 v5, v5, v32, s[6:7]
+; GFX9-NEXT: v_cndmask_b32_e64 v7, v7, v32, s[8:9]
+; GFX9-NEXT: v_cndmask_b32_e64 v9, v9, v32, s[10:11]
+; GFX9-NEXT: v_cndmask_b32_e64 v11, v11, v32, s[12:13]
+; GFX9-NEXT: v_cndmask_b32_e64 v13, v13, v32, s[14:15]
+; GFX9-NEXT: v_cndmask_b32_e64 v15, v15, v32, s[16:17]
+; GFX9-NEXT: v_cndmask_b32_e64 v17, v17, v32, s[18:19]
+; GFX9-NEXT: v_cndmask_b32_e64 v19, v19, v32, s[20:21]
+; GFX9-NEXT: v_cndmask_b32_e64 v21, v21, v32, s[22:23]
+; GFX9-NEXT: v_cndmask_b32_e64 v23, v23, v32, s[24:25]
+; GFX9-NEXT: v_cndmask_b32_e64 v25, v25, v32, s[26:27]
+; GFX9-NEXT: v_cndmask_b32_e64 v27, v27, v32, s[28:29]
+; GFX9-NEXT: v_cndmask_b32_e64 v29, v29, v32, s[30:31]
+; GFX9-NEXT: v_cndmask_b32_e64 v31, v31, v32, s[34:35]
+; GFX9-NEXT: v_cndmask_b32_e64 v30, v30, 0, s[34:35]
+; GFX9-NEXT: v_readlane_b32 s35, v34, 3
+; GFX9-NEXT: v_readlane_b32 s34, v34, 2
+; GFX9-NEXT: v_readlane_b32 s31, v34, 1
+; GFX9-NEXT: v_readlane_b32 s30, v34, 0
+; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload
+; GFX9-NEXT: s_mov_b64 exec, s[4:5]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_cmp_lt_f64_e32 vcc, v[30:31], v[32:33]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[4:5], v[30:31], v[32:33]
-; GFX9-NEXT: v_cndmask_b32_e64 v35, v39, v36, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v26, v34, v26, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v34, v50, v28, s[8:9]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[6:7], v[28:29], 32
-; GFX9-NEXT: v_cndmask_b32_e64 v34, 0, v34, s[10:11]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[8:9], v[50:51], 32
-; GFX9-NEXT: v_cndmask_b32_e32 v36, v33, v31, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v37, v39, v36, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e32 v36, v32, v30, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[30:31], 32
-; GFX9-NEXT: v_cndmask_b32_e64 v36, 0, v36, s[4:5]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[32:33], 32
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[10:11], 0, v[34:35]
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[36:37]
-; GFX9-NEXT: v_cndmask_b32_e64 v28, v34, v28, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v29, v35, v29, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v28, v28, v50, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e32 v30, v36, v30, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v31, v37, v31, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v30, v30, v32, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v29, v29, v51, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v31, v31, v33, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v28, v34, v28, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v29, v35, v29, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v30, v36, v30, s[12:13]
-; GFX9-NEXT: v_cndmask_b32_e64 v31, v37, v31, s[12:13]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_minimum_v16f64:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_accvgpr_write_b32 a0, v40 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a1, v41 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a2, v42 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a3, v43 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a4, v44 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a5, v45 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a6, v46 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a7, v47 ; Reload Reuse
-; GFX940-NEXT: scratch_load_dword v41, off, s32 offset:8
-; GFX940-NEXT: scratch_load_dword v40, off, s32 offset:4
-; GFX940-NEXT: scratch_load_dword v51, off, s32 offset:16
-; GFX940-NEXT: scratch_load_dword v50, off, s32 offset:12
-; GFX940-NEXT: scratch_load_dword v45, off, s32 offset:24
-; GFX940-NEXT: scratch_load_dword v44, off, s32 offset:20
-; GFX940-NEXT: scratch_load_dword v47, off, s32 offset:32
-; GFX940-NEXT: scratch_load_dword v46, off, s32 offset:28
+; GFX940-NEXT: v_accvgpr_write_b32 a1, v40 ; Reload Reuse
+; GFX940-NEXT: v_accvgpr_write_b32 a2, v41 ; Reload Reuse
+; GFX940-NEXT: v_accvgpr_write_b32 a3, v42 ; Reload Reuse
+; GFX940-NEXT: v_accvgpr_write_b32 a4, v43 ; Reload Reuse
+; GFX940-NEXT: v_accvgpr_write_b32 a5, v44 ; Reload Reuse
+; GFX940-NEXT: v_accvgpr_write_b32 a6, v45 ; Reload Reuse
+; GFX940-NEXT: v_accvgpr_write_b32 a7, v46 ; Reload Reuse
+; GFX940-NEXT: v_accvgpr_write_b32 a8, v47 ; Reload Reuse
+; GFX940-NEXT: v_accvgpr_write_b32 a9, v56 ; Reload Reuse
+; GFX940-NEXT: v_accvgpr_write_b32 a10, v57 ; Reload Reuse
+; GFX940-NEXT: scratch_load_dword v37, off, s32 offset:16
+; GFX940-NEXT: scratch_load_dword v36, off, s32 offset:12
+; GFX940-NEXT: scratch_load_dword v39, off, s32 offset:24
+; GFX940-NEXT: scratch_load_dword v38, off, s32 offset:20
+; GFX940-NEXT: scratch_load_dword v49, off, s32 offset:32
+; GFX940-NEXT: scratch_load_dword v48, off, s32 offset:28
+; GFX940-NEXT: scratch_load_dword v57, off, s32 offset:8
+; GFX940-NEXT: scratch_load_dword v56, off, s32 offset:4
+; GFX940-NEXT: scratch_load_dword v47, off, s32 offset:40
+; GFX940-NEXT: scratch_load_dword v46, off, s32 offset:36
+; GFX940-NEXT: scratch_load_dword v45, off, s32 offset:48
+; GFX940-NEXT: scratch_load_dword v44, off, s32 offset:44
+; GFX940-NEXT: scratch_load_dword v43, off, s32 offset:56
+; GFX940-NEXT: scratch_load_dword v42, off, s32 offset:52
+; GFX940-NEXT: scratch_load_dword v41, off, s32 offset:64
+; GFX940-NEXT: scratch_load_dword v40, off, s32 offset:60
+; GFX940-NEXT: scratch_load_dword v55, off, s32 offset:72
+; GFX940-NEXT: scratch_load_dword v54, off, s32 offset:68
+; GFX940-NEXT: scratch_load_dword v53, off, s32 offset:80
+; GFX940-NEXT: scratch_load_dword v52, off, s32 offset:76
+; GFX940-NEXT: scratch_load_dword v51, off, s32 offset:88
+; GFX940-NEXT: scratch_load_dword v50, off, s32 offset:84
+; GFX940-NEXT: scratch_load_dword v35, off, s32 offset:96
+; GFX940-NEXT: scratch_load_dword v34, off, s32 offset:92
; GFX940-NEXT: scratch_load_dword v31, off, s32
-; GFX940-NEXT: scratch_load_dword v33, off, s32 offset:128
-; GFX940-NEXT: scratch_load_dword v32, off, s32 offset:124
-; GFX940-NEXT: scratch_load_dword v35, off, s32 offset:120
-; GFX940-NEXT: scratch_load_dword v34, off, s32 offset:116
-; GFX940-NEXT: scratch_load_dword v43, off, s32 offset:40
-; GFX940-NEXT: scratch_load_dword v42, off, s32 offset:36
+; GFX940-NEXT: scratch_load_dword v33, off, s32 offset:104
+; GFX940-NEXT: scratch_load_dword v32, off, s32 offset:100
+; GFX940-NEXT: v_accvgpr_write_b32 a11, v58 ; Reload Reuse
+; GFX940-NEXT: v_accvgpr_write_b32 a12, v59 ; Reload Reuse
+; GFX940-NEXT: v_accvgpr_write_b32 a13, v60 ; Reload Reuse
+; GFX940-NEXT: v_accvgpr_write_b32 a14, v61 ; Reload Reuse
+; GFX940-NEXT: v_accvgpr_write_b32 a15, v62 ; Reload Reuse
+; GFX940-NEXT: v_accvgpr_write_b32 a16, v63 ; Reload Reuse
+; GFX940-NEXT: s_waitcnt vmcnt(25)
+; GFX940-NEXT: v_min_f64 v[58:59], v[2:3], v[36:37]
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[36:37]
; GFX940-NEXT: scratch_load_dword v37, off, s32 offset:112
; GFX940-NEXT: scratch_load_dword v36, off, s32 offset:108
-; GFX940-NEXT: scratch_load_dword v39, off, s32 offset:104
-; GFX940-NEXT: scratch_load_dword v38, off, s32 offset:100
-; GFX940-NEXT: scratch_load_dword v49, off, s32 offset:96
-; GFX940-NEXT: scratch_load_dword v48, off, s32 offset:92
-; GFX940-NEXT: scratch_load_dword v53, off, s32 offset:56
-; GFX940-NEXT: scratch_load_dword v52, off, s32 offset:52
-; GFX940-NEXT: scratch_load_dword v55, off, s32 offset:48
-; GFX940-NEXT: scratch_load_dword v54, off, s32 offset:44
-; GFX940-NEXT: v_accvgpr_write_b32 a8, v56 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a9, v57 ; Reload Reuse
-; GFX940-NEXT: v_mov_b32_e32 v56, 0x7ff80000
-; GFX940-NEXT: v_accvgpr_write_b32 a11, v59 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a10, v58 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a12, v60 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a13, v61 ; Reload Reuse
+; GFX940-NEXT: s_waitcnt vmcnt(25)
+; GFX940-NEXT: v_min_f64 v[60:61], v[4:5], v[38:39]
+; GFX940-NEXT: v_cmp_u_f64_e64 s[0:1], v[4:5], v[38:39]
+; GFX940-NEXT: scratch_load_dword v39, off, s32 offset:120
+; GFX940-NEXT: scratch_load_dword v38, off, s32 offset:116
+; GFX940-NEXT: s_waitcnt vmcnt(25)
+; GFX940-NEXT: v_min_f64 v[62:63], v[6:7], v[48:49]
+; GFX940-NEXT: v_cmp_u_f64_e64 s[2:3], v[6:7], v[48:49]
+; GFX940-NEXT: scratch_load_dword v49, off, s32 offset:128
+; GFX940-NEXT: scratch_load_dword v48, off, s32 offset:124
+; GFX940-NEXT: s_waitcnt vmcnt(25)
+; GFX940-NEXT: v_min_f64 v[2:3], v[0:1], v[56:57]
+; GFX940-NEXT: v_cmp_u_f64_e64 s[4:5], v[0:1], v[56:57]
+; GFX940-NEXT: v_mov_b32_e32 v0, 0x7ff80000
; GFX940-NEXT: s_waitcnt vmcnt(23)
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[40:41]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v57, v41, v1, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[0:1], v[40:41]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e64 v59, v56, v57, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v57, v40, v0, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v58, 0, v57, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 32
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[40:41], 32
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v58, v0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v59, v1, vcc
+; GFX940-NEXT: v_min_f64 v[56:57], v[8:9], v[46:47]
+; GFX940-NEXT: v_cndmask_b32_e64 v1, v2, 0, s[4:5]
+; GFX940-NEXT: v_accvgpr_write_b32 a0, v1
+; GFX940-NEXT: v_cndmask_b32_e64 v1, v3, v0, s[4:5]
+; GFX940-NEXT: v_cndmask_b32_e64 v2, v58, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v3, v59, v0, vcc
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[8:9], v[46:47]
; GFX940-NEXT: s_waitcnt vmcnt(21)
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[2:3], v[50:51]
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v0, v40, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v1, v1, v41, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v57, v51, v3, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[2:3], v[50:51]
-; GFX940-NEXT: v_cndmask_b32_e32 v40, v50, v2, vcc
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[2:3], 32
-; GFX940-NEXT: v_cndmask_b32_e64 v61, v56, v57, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v60, 0, v40, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v60, v2, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v61, v3, vcc
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[50:51], 32
-; GFX940-NEXT: scratch_load_dword v41, off, s32 offset:64
-; GFX940-NEXT: scratch_load_dword v40, off, s32 offset:60
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v2, v50, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v3, v51, vcc
-; GFX940-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[58:59]
-; GFX940-NEXT: s_waitcnt vmcnt(21)
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[4:5], v[44:45]
-; GFX940-NEXT: scratch_load_dword v51, off, s32 offset:88
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v58, v0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v59, v1, vcc
-; GFX940-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[60:61]
+; GFX940-NEXT: v_min_f64 v[46:47], v[10:11], v[44:45]
+; GFX940-NEXT: v_cndmask_b32_e64 v4, v60, 0, s[0:1]
+; GFX940-NEXT: v_cndmask_b32_e64 v8, v56, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v9, v57, v0, vcc
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[10:11], v[44:45]
+; GFX940-NEXT: s_waitcnt vmcnt(19)
+; GFX940-NEXT: v_min_f64 v[44:45], v[12:13], v[42:43]
+; GFX940-NEXT: v_cndmask_b32_e64 v5, v61, v0, s[0:1]
+; GFX940-NEXT: v_cndmask_b32_e64 v10, v46, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v11, v47, v0, vcc
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[12:13], v[42:43]
+; GFX940-NEXT: s_waitcnt vmcnt(17)
+; GFX940-NEXT: v_min_f64 v[42:43], v[14:15], v[40:41]
+; GFX940-NEXT: v_cndmask_b32_e64 v6, v62, 0, s[2:3]
+; GFX940-NEXT: v_cndmask_b32_e64 v12, v44, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v13, v45, v0, vcc
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[14:15], v[40:41]
+; GFX940-NEXT: s_waitcnt vmcnt(15)
+; GFX940-NEXT: v_min_f64 v[40:41], v[16:17], v[54:55]
+; GFX940-NEXT: v_cndmask_b32_e64 v7, v63, v0, s[2:3]
+; GFX940-NEXT: v_cndmask_b32_e64 v14, v42, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v15, v43, v0, vcc
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[16:17], v[54:55]
+; GFX940-NEXT: s_waitcnt vmcnt(13)
+; GFX940-NEXT: v_min_f64 v[54:55], v[18:19], v[52:53]
+; GFX940-NEXT: v_accvgpr_read_b32 v63, a16 ; Reload Reuse
+; GFX940-NEXT: v_cndmask_b32_e64 v16, v40, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v17, v41, v0, vcc
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[18:19], v[52:53]
+; GFX940-NEXT: s_waitcnt vmcnt(11)
+; GFX940-NEXT: v_min_f64 v[52:53], v[20:21], v[50:51]
+; GFX940-NEXT: v_accvgpr_read_b32 v62, a15 ; Reload Reuse
+; GFX940-NEXT: v_cndmask_b32_e64 v18, v54, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v19, v55, v0, vcc
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[20:21], v[50:51]
+; GFX940-NEXT: s_waitcnt vmcnt(9)
+; GFX940-NEXT: v_min_f64 v[50:51], v[22:23], v[34:35]
+; GFX940-NEXT: v_accvgpr_read_b32 v61, a14 ; Reload Reuse
+; GFX940-NEXT: v_cndmask_b32_e64 v20, v52, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v21, v53, v0, vcc
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[22:23], v[34:35]
+; GFX940-NEXT: s_waitcnt vmcnt(6)
+; GFX940-NEXT: v_min_f64 v[34:35], v[24:25], v[32:33]
+; GFX940-NEXT: v_accvgpr_read_b32 v60, a13 ; Reload Reuse
+; GFX940-NEXT: v_cndmask_b32_e64 v22, v50, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v23, v51, v0, vcc
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[24:25], v[32:33]
+; GFX940-NEXT: v_accvgpr_read_b32 v59, a12 ; Reload Reuse
+; GFX940-NEXT: v_accvgpr_read_b32 v58, a11 ; Reload Reuse
+; GFX940-NEXT: v_cndmask_b32_e64 v24, v34, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v25, v35, v0, vcc
+; GFX940-NEXT: v_accvgpr_read_b32 v57, a10 ; Reload Reuse
+; GFX940-NEXT: v_accvgpr_read_b32 v56, a9 ; Reload Reuse
+; GFX940-NEXT: v_accvgpr_read_b32 v47, a8 ; Reload Reuse
+; GFX940-NEXT: v_accvgpr_read_b32 v46, a7 ; Reload Reuse
+; GFX940-NEXT: v_accvgpr_read_b32 v45, a6 ; Reload Reuse
+; GFX940-NEXT: v_accvgpr_read_b32 v44, a5 ; Reload Reuse
+; GFX940-NEXT: v_accvgpr_read_b32 v43, a4 ; Reload Reuse
+; GFX940-NEXT: v_accvgpr_read_b32 v42, a3 ; Reload Reuse
+; GFX940-NEXT: v_accvgpr_read_b32 v41, a2 ; Reload Reuse
+; GFX940-NEXT: v_accvgpr_read_b32 v40, a1 ; Reload Reuse
+; GFX940-NEXT: s_waitcnt vmcnt(4)
+; GFX940-NEXT: v_min_f64 v[32:33], v[26:27], v[36:37]
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[26:27], v[36:37]
; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v60, v2, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v61, v3, vcc
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[4:5], v[44:45]
-; GFX940-NEXT: v_accvgpr_read_b32 v61, a13 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_read_b32 v60, a12 ; Reload Reuse
-; GFX940-NEXT: v_cndmask_b32_e32 v50, v45, v5, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v59, v56, v50, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v50, v44, v4, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v58, 0, v50, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[4:5], 32
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[44:45], 32
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v58, v4, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v59, v5, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v4, v4, v44, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v5, v5, v45, s[0:1]
-; GFX940-NEXT: scratch_load_dword v45, off, s32 offset:72
-; GFX940-NEXT: scratch_load_dword v44, off, s32 offset:68
-; GFX940-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[58:59]
-; GFX940-NEXT: s_waitcnt vmcnt(22)
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[6:7], v[46:47]
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v58, v4, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v59, v5, vcc
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[6:7], v[46:47]
+; GFX940-NEXT: v_cndmask_b32_e64 v26, v32, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v27, v33, v0, vcc
+; GFX940-NEXT: s_waitcnt vmcnt(2)
+; GFX940-NEXT: v_min_f64 v[32:33], v[28:29], v[38:39]
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[28:29], v[38:39]
; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v50, v47, v7, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v59, v56, v50, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v50, v46, v6, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v58, 0, v50, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[6:7], 32
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[46:47], 32
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v58, v6, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v7, v59, v7, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v6, v6, v46, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v7, v7, v47, s[0:1]
-; GFX940-NEXT: scratch_load_dword v47, off, s32 offset:80
-; GFX940-NEXT: scratch_load_dword v46, off, s32 offset:76
-; GFX940-NEXT: scratch_load_dword v50, off, s32 offset:84
-; GFX940-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[58:59]
-; GFX940-NEXT: s_waitcnt vmcnt(18)
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[8:9], v[42:43]
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v58, v6, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v7, v59, v7, vcc
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[8:9], v[42:43]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v57, v43, v9, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v59, v56, v57, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v57, v42, v8, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v58, 0, v57, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[8:9], 32
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[42:43], 32
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[58:59]
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v58, v8, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v9, v59, v9, vcc
-; GFX940-NEXT: s_waitcnt vmcnt(8)
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[10:11], v[54:55]
-; GFX940-NEXT: v_cndmask_b32_e64 v8, v8, v42, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v9, v9, v43, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v42, v55, v11, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[10:11], v[54:55]
-; GFX940-NEXT: v_cndmask_b32_e64 v8, v58, v8, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v9, v59, v9, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v43, v56, v42, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v42, v54, v10, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v42, 0, v42, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[10:11], 32
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[54:55], 32
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[42:43]
-; GFX940-NEXT: v_cndmask_b32_e32 v10, v42, v10, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v11, v43, v11, vcc
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[12:13], v[52:53]
-; GFX940-NEXT: v_cndmask_b32_e64 v10, v10, v54, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v11, v11, v55, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v54, v53, v13, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[12:13], v[52:53]
-; GFX940-NEXT: v_cndmask_b32_e64 v10, v42, v10, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v11, v43, v11, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v55, v56, v54, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v54, v52, v12, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v54, 0, v54, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[12:13], 32
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[52:53], 32
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[54:55]
-; GFX940-NEXT: v_cndmask_b32_e32 v12, v54, v12, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v13, v55, v13, vcc
-; GFX940-NEXT: s_waitcnt vmcnt(6)
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[14:15], v[40:41]
-; GFX940-NEXT: v_cndmask_b32_e64 v12, v12, v52, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v13, v13, v53, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v52, v41, v15, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[14:15], v[40:41]
-; GFX940-NEXT: v_cndmask_b32_e64 v12, v54, v12, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v13, v55, v13, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v53, v56, v52, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v52, v40, v14, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v52, 0, v52, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[14:15], 32
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[40:41], 32
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[52:53]
-; GFX940-NEXT: v_cndmask_b32_e32 v14, v52, v14, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v14, v14, v40, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v15, v53, v15, vcc
-; GFX940-NEXT: s_waitcnt vmcnt(3)
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[16:17], v[44:45]
-; GFX940-NEXT: v_cndmask_b32_e64 v14, v52, v14, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v15, v15, v41, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v52, v45, v17, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[16:17], v[44:45]
-; GFX940-NEXT: v_cndmask_b32_e64 v15, v53, v15, s[2:3]
-; GFX940-NEXT: v_accvgpr_read_b32 v59, a11 ; Reload Reuse
-; GFX940-NEXT: v_cndmask_b32_e64 v53, v56, v52, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v52, v44, v16, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v52, 0, v52, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[16:17], 32
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[44:45], 32
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[52:53]
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v52, v16, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v16, v16, v44, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v17, v53, v17, vcc
-; GFX940-NEXT: s_waitcnt vmcnt(1)
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[18:19], v[46:47]
-; GFX940-NEXT: v_cndmask_b32_e64 v16, v52, v16, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v17, v17, v45, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v52, v47, v19, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[18:19], v[46:47]
-; GFX940-NEXT: v_cndmask_b32_e64 v17, v53, v17, s[2:3]
-; GFX940-NEXT: v_accvgpr_read_b32 v58, a10 ; Reload Reuse
-; GFX940-NEXT: v_cndmask_b32_e64 v53, v56, v52, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v52, v46, v18, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v52, 0, v52, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[18:19], 32
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[46:47], 32
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[52:53]
-; GFX940-NEXT: v_cndmask_b32_e32 v18, v52, v18, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v18, v18, v46, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v19, v53, v19, vcc
+; GFX940-NEXT: v_cndmask_b32_e64 v28, v32, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v29, v33, v0, vcc
; GFX940-NEXT: s_waitcnt vmcnt(0)
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[20:21], v[50:51]
-; GFX940-NEXT: v_cndmask_b32_e64 v18, v52, v18, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v19, v19, v47, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v52, v51, v21, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[20:21], v[50:51]
-; GFX940-NEXT: v_cndmask_b32_e64 v19, v53, v19, s[2:3]
-; GFX940-NEXT: v_accvgpr_read_b32 v57, a9 ; Reload Reuse
-; GFX940-NEXT: v_cndmask_b32_e64 v53, v56, v52, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v52, v50, v20, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v52, 0, v52, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[20:21], 32
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[50:51], 32
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[52:53]
-; GFX940-NEXT: v_cndmask_b32_e32 v20, v52, v20, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v21, v53, v21, vcc
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[22:23], v[48:49]
-; GFX940-NEXT: v_cndmask_b32_e64 v20, v20, v50, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v21, v21, v51, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v50, v49, v23, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[22:23], v[48:49]
-; GFX940-NEXT: v_cndmask_b32_e64 v20, v52, v20, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v21, v53, v21, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v51, v56, v50, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v50, v48, v22, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v50, 0, v50, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[22:23], 32
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[48:49], 32
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[50:51]
-; GFX940-NEXT: v_cndmask_b32_e32 v22, v50, v22, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v23, v51, v23, vcc
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[24:25], v[38:39]
-; GFX940-NEXT: v_cndmask_b32_e64 v22, v22, v48, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v23, v23, v49, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v48, v39, v25, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[24:25], v[38:39]
-; GFX940-NEXT: v_cndmask_b32_e64 v22, v50, v22, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v23, v51, v23, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v49, v56, v48, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v48, v38, v24, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v48, 0, v48, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[24:25], 32
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[38:39], 32
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[48:49]
-; GFX940-NEXT: v_cndmask_b32_e32 v24, v48, v24, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v25, v49, v25, vcc
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[26:27], v[36:37]
-; GFX940-NEXT: v_cndmask_b32_e64 v24, v24, v38, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v25, v25, v39, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v38, v37, v27, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[26:27], v[36:37]
-; GFX940-NEXT: v_cndmask_b32_e64 v24, v48, v24, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v25, v49, v25, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v39, v56, v38, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v38, v36, v26, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v38, 0, v38, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[26:27], 32
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[36:37], 32
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[38:39]
-; GFX940-NEXT: v_cndmask_b32_e32 v26, v38, v26, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v27, v39, v27, vcc
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[28:29], v[34:35]
-; GFX940-NEXT: v_cndmask_b32_e64 v26, v26, v36, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v27, v27, v37, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v36, v35, v29, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[28:29], v[34:35]
-; GFX940-NEXT: v_cndmask_b32_e64 v26, v38, v26, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v27, v39, v27, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v37, v56, v36, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v36, v34, v28, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v36, 0, v36, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[28:29], 32
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[34:35], 32
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[36:37]
-; GFX940-NEXT: v_cndmask_b32_e32 v28, v36, v28, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v29, v37, v29, vcc
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[30:31], v[32:33]
-; GFX940-NEXT: v_cndmask_b32_e64 v28, v28, v34, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v29, v29, v35, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v34, v33, v31, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[30:31], v[32:33]
-; GFX940-NEXT: v_cndmask_b32_e64 v28, v36, v28, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v29, v37, v29, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v35, v56, v34, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v34, v32, v30, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v34, 0, v34, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[30:31], 32
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[32:33], 32
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[34:35]
-; GFX940-NEXT: v_cndmask_b32_e32 v30, v34, v30, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v31, v35, v31, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v30, v30, v32, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v31, v31, v33, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v30, v34, v30, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v31, v35, v31, s[2:3]
-; GFX940-NEXT: v_accvgpr_read_b32 v56, a8 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_read_b32 v47, a7 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_read_b32 v46, a6 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_read_b32 v45, a5 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_read_b32 v44, a4 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_read_b32 v43, a3 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_read_b32 v42, a2 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_read_b32 v41, a1 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_read_b32 v40, a0 ; Reload Reuse
+; GFX940-NEXT: v_min_f64 v[32:33], v[30:31], v[48:49]
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[30:31], v[48:49]
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e64 v30, v32, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v31, v33, v0, vcc
+; GFX940-NEXT: v_accvgpr_read_b32 v0, a0
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v16f64:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_clause 0x20
+; GFX10-NEXT: s_clause 0x19
+; GFX10-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:16
+; GFX10-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:12
+; GFX10-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:24
+; GFX10-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:20
+; GFX10-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:32
+; GFX10-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:28
+; GFX10-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:36
+; GFX10-NEXT: buffer_load_dword v48, off, s[0:3], s32 offset:68
+; GFX10-NEXT: buffer_load_dword v51, off, s[0:3], s32 offset:64
+; GFX10-NEXT: buffer_load_dword v50, off, s[0:3], s32 offset:60
+; GFX10-NEXT: buffer_load_dword v53, off, s[0:3], s32 offset:56
+; GFX10-NEXT: buffer_load_dword v52, off, s[0:3], s32 offset:52
+; GFX10-NEXT: buffer_load_dword v55, off, s[0:3], s32 offset:48
+; GFX10-NEXT: buffer_load_dword v54, off, s[0:3], s32 offset:44
+; GFX10-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:40
; GFX10-NEXT: buffer_load_dword v65, off, s[0:3], s32 offset:8
; GFX10-NEXT: buffer_load_dword v64, off, s[0:3], s32 offset:4
-; GFX10-NEXT: buffer_load_dword v55, off, s[0:3], s32 offset:16
-; GFX10-NEXT: buffer_load_dword v54, off, s[0:3], s32 offset:12
-; GFX10-NEXT: buffer_load_dword v53, off, s[0:3], s32 offset:24
-; GFX10-NEXT: buffer_load_dword v52, off, s[0:3], s32 offset:20
-; GFX10-NEXT: buffer_load_dword v51, off, s[0:3], s32 offset:32
-; GFX10-NEXT: buffer_load_dword v50, off, s[0:3], s32 offset:28
-; GFX10-NEXT: buffer_load_dword v48, off, s[0:3], s32 offset:36
-; GFX10-NEXT: buffer_load_dword v49, off, s[0:3], s32 offset:40
-; GFX10-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:56
-; GFX10-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:52
-; GFX10-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:48
-; GFX10-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:44
-; GFX10-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:64
-; GFX10-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:60
-; GFX10-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:68
-; GFX10-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:72
-; GFX10-NEXT: buffer_load_dword v83, off, s[0:3], s32 offset:80
-; GFX10-NEXT: buffer_load_dword v82, off, s[0:3], s32 offset:76
-; GFX10-NEXT: buffer_load_dword v67, off, s[0:3], s32 offset:88
-; GFX10-NEXT: buffer_load_dword v66, off, s[0:3], s32 offset:84
+; GFX10-NEXT: buffer_load_dword v66, off, s[0:3], s32 offset:100
; GFX10-NEXT: buffer_load_dword v69, off, s[0:3], s32 offset:96
; GFX10-NEXT: buffer_load_dword v68, off, s[0:3], s32 offset:92
-; GFX10-NEXT: buffer_load_dword v70, off, s[0:3], s32 offset:100
-; GFX10-NEXT: buffer_load_dword v71, off, s[0:3], s32 offset:104
-; GFX10-NEXT: buffer_load_dword v81, off, s[0:3], s32 offset:112
-; GFX10-NEXT: buffer_load_dword v80, off, s[0:3], s32 offset:108
-; GFX10-NEXT: buffer_load_dword v85, off, s[0:3], s32 offset:120
-; GFX10-NEXT: buffer_load_dword v84, off, s[0:3], s32 offset:116
+; GFX10-NEXT: buffer_load_dword v71, off, s[0:3], s32 offset:88
+; GFX10-NEXT: buffer_load_dword v70, off, s[0:3], s32 offset:84
+; GFX10-NEXT: buffer_load_dword v81, off, s[0:3], s32 offset:80
+; GFX10-NEXT: buffer_load_dword v80, off, s[0:3], s32 offset:76
+; GFX10-NEXT: buffer_load_dword v49, off, s[0:3], s32 offset:72
+; GFX10-NEXT: buffer_load_dword v67, off, s[0:3], s32 offset:104
+; GFX10-NEXT: s_waitcnt vmcnt(24)
+; GFX10-NEXT: v_min_f64 v[82:83], v[2:3], v[31:32]
+; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[2:3], v[31:32]
+; GFX10-NEXT: s_waitcnt vmcnt(22)
+; GFX10-NEXT: v_min_f64 v[84:85], v[4:5], v[33:34]
+; GFX10-NEXT: v_cmp_u_f64_e64 s4, v[4:5], v[33:34]
+; GFX10-NEXT: s_clause 0x3
+; GFX10-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:120
+; GFX10-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:116
+; GFX10-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:112
+; GFX10-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:108
+; GFX10-NEXT: s_waitcnt vmcnt(24)
+; GFX10-NEXT: v_min_f64 v[32:33], v[6:7], v[35:36]
+; GFX10-NEXT: v_cmp_u_f64_e64 s5, v[6:7], v[35:36]
+; GFX10-NEXT: s_clause 0x2
; GFX10-NEXT: buffer_load_dword v31, off, s[0:3], s32
-; GFX10-NEXT: buffer_load_dword v87, off, s[0:3], s32 offset:128
-; GFX10-NEXT: buffer_load_dword v86, off, s[0:3], s32 offset:124
-; GFX10-NEXT: v_cmp_class_f64_e64 s10, v[0:1], 32
-; GFX10-NEXT: v_cmp_class_f64_e64 s12, v[2:3], 32
-; GFX10-NEXT: v_cmp_class_f64_e64 s17, v[10:11], 32
-; GFX10-NEXT: s_waitcnt vmcnt(31)
-; GFX10-NEXT: v_cmp_lt_f64_e32 vcc_lo, v[0:1], v[64:65]
-; GFX10-NEXT: v_cmp_o_f64_e64 s4, v[0:1], v[64:65]
-; GFX10-NEXT: s_waitcnt vmcnt(29)
-; GFX10-NEXT: v_cmp_lt_f64_e64 s5, v[2:3], v[54:55]
-; GFX10-NEXT: v_cmp_o_f64_e64 s6, v[2:3], v[54:55]
-; GFX10-NEXT: s_waitcnt vmcnt(27)
-; GFX10-NEXT: v_cmp_lt_f64_e64 s7, v[4:5], v[52:53]
-; GFX10-NEXT: v_cmp_o_f64_e64 s8, v[4:5], v[52:53]
-; GFX10-NEXT: s_waitcnt vmcnt(25)
-; GFX10-NEXT: v_cmp_lt_f64_e64 s9, v[6:7], v[50:51]
-; GFX10-NEXT: v_cmp_o_f64_e64 s11, v[6:7], v[50:51]
+; GFX10-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:128
+; GFX10-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:124
; GFX10-NEXT: s_waitcnt vmcnt(23)
-; GFX10-NEXT: v_cmp_lt_f64_e64 s13, v[8:9], v[48:49]
-; GFX10-NEXT: v_cmp_class_f64_e64 s14, v[64:65], 32
+; GFX10-NEXT: v_cmp_u_f64_e64 s10, v[14:15], v[50:51]
; GFX10-NEXT: s_waitcnt vmcnt(21)
-; GFX10-NEXT: v_cmp_lt_f64_e64 s15, v[12:13], v[36:37]
-; GFX10-NEXT: s_waitcnt vmcnt(17)
-; GFX10-NEXT: v_cmp_o_f64_e64 s16, v[14:15], v[34:35]
-; GFX10-NEXT: v_cndmask_b32_e32 v96, v64, v0, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v97, v54, v2, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v99, v55, v3, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v100, v52, v4, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v96, 0, v96, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v101, v50, v6, s9
-; GFX10-NEXT: v_cndmask_b32_e64 v98, 0, v97, s6
-; GFX10-NEXT: v_cndmask_b32_e32 v97, v65, v1, vcc_lo
-; GFX10-NEXT: v_cmp_class_f64_e64 vcc_lo, v[54:55], 32
-; GFX10-NEXT: v_cndmask_b32_e64 v0, v96, v0, s10
-; GFX10-NEXT: v_cndmask_b32_e64 v99, 0x7ff80000, v99, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v98, v2, s12
-; GFX10-NEXT: v_cndmask_b32_e64 v97, 0x7ff80000, v97, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v100, 0, v100, s8
-; GFX10-NEXT: v_cndmask_b32_e64 v102, 0, v101, s11
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v99, v3, s12
-; GFX10-NEXT: v_cmp_class_f64_e64 s12, v[6:7], 32
-; GFX10-NEXT: v_cndmask_b32_e64 v1, v97, v1, s10
-; GFX10-NEXT: v_cmp_class_f64_e64 s10, v[4:5], 32
-; GFX10-NEXT: v_cmp_lt_f64_e64 s4, v[10:11], v[38:39]
-; GFX10-NEXT: v_cndmask_b32_e64 v112, v48, v8, s13
-; GFX10-NEXT: v_cmp_o_f64_e64 s5, v[12:13], v[36:37]
-; GFX10-NEXT: v_cmp_lt_f64_e64 s6, v[14:15], v[34:35]
-; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v64, s14
-; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, v65, s14
-; GFX10-NEXT: v_cmp_class_f64_e64 s14, v[52:53], 32
-; GFX10-NEXT: v_cndmask_b32_e64 v113, v36, v12, s15
+; GFX10-NEXT: v_cmp_u_f64_e64 s9, v[12:13], v[52:53]
+; GFX10-NEXT: s_waitcnt vmcnt(19)
+; GFX10-NEXT: v_cmp_u_f64_e64 s7, v[10:11], v[54:55]
+; GFX10-NEXT: s_waitcnt vmcnt(18)
+; GFX10-NEXT: v_min_f64 v[34:35], v[8:9], v[37:38]
+; GFX10-NEXT: v_cmp_u_f64_e64 s6, v[8:9], v[37:38]
+; GFX10-NEXT: s_waitcnt vmcnt(16)
+; GFX10-NEXT: v_min_f64 v[8:9], v[0:1], v[64:65]
+; GFX10-NEXT: v_min_f64 v[36:37], v[10:11], v[54:55]
+; GFX10-NEXT: v_cmp_u_f64_e64 s8, v[0:1], v[64:65]
+; GFX10-NEXT: v_min_f64 v[38:39], v[12:13], v[52:53]
+; GFX10-NEXT: v_min_f64 v[52:53], v[14:15], v[50:51]
+; GFX10-NEXT: s_waitcnt vmcnt(11)
+; GFX10-NEXT: v_min_f64 v[54:55], v[20:21], v[70:71]
+; GFX10-NEXT: v_cmp_u_f64_e64 s13, v[20:21], v[70:71]
+; GFX10-NEXT: s_waitcnt vmcnt(9)
+; GFX10-NEXT: v_cmp_u_f64_e64 s12, v[18:19], v[80:81]
+; GFX10-NEXT: s_waitcnt vmcnt(8)
+; GFX10-NEXT: v_min_f64 v[50:51], v[16:17], v[48:49]
+; GFX10-NEXT: v_cmp_u_f64_e64 s11, v[16:17], v[48:49]
+; GFX10-NEXT: v_min_f64 v[48:49], v[18:19], v[80:81]
+; GFX10-NEXT: v_min_f64 v[64:65], v[22:23], v[68:69]
+; GFX10-NEXT: v_cmp_u_f64_e64 s14, v[22:23], v[68:69]
+; GFX10-NEXT: s_waitcnt vmcnt(7)
+; GFX10-NEXT: v_min_f64 v[68:69], v[24:25], v[66:67]
+; GFX10-NEXT: v_cmp_u_f64_e64 s15, v[24:25], v[66:67]
+; GFX10-NEXT: v_cndmask_b32_e64 v10, v36, 0, s7
+; GFX10-NEXT: v_cndmask_b32_e64 v0, v8, 0, s8
+; GFX10-NEXT: v_cndmask_b32_e64 v1, v9, 0x7ff80000, s8
+; GFX10-NEXT: v_cndmask_b32_e64 v8, v34, 0, s6
+; GFX10-NEXT: v_cndmask_b32_e64 v9, v35, 0x7ff80000, s6
+; GFX10-NEXT: v_cndmask_b32_e64 v11, v37, 0x7ff80000, s7
+; GFX10-NEXT: v_cndmask_b32_e64 v12, v38, 0, s9
+; GFX10-NEXT: v_cndmask_b32_e64 v13, v39, 0x7ff80000, s9
+; GFX10-NEXT: v_cndmask_b32_e64 v14, v52, 0, s10
+; GFX10-NEXT: v_cndmask_b32_e64 v15, v53, 0x7ff80000, s10
+; GFX10-NEXT: v_cndmask_b32_e64 v16, v50, 0, s11
+; GFX10-NEXT: v_cndmask_b32_e64 v17, v51, 0x7ff80000, s11
+; GFX10-NEXT: v_cndmask_b32_e64 v18, v48, 0, s12
+; GFX10-NEXT: v_cndmask_b32_e64 v19, v49, 0x7ff80000, s12
+; GFX10-NEXT: v_cndmask_b32_e64 v20, v54, 0, s13
+; GFX10-NEXT: v_cndmask_b32_e64 v21, v55, 0x7ff80000, s13
+; GFX10-NEXT: v_cndmask_b32_e64 v22, v64, 0, s14
+; GFX10-NEXT: v_cndmask_b32_e64 v23, v65, 0x7ff80000, s14
+; GFX10-NEXT: v_cndmask_b32_e64 v24, v68, 0, s15
+; GFX10-NEXT: v_cndmask_b32_e64 v25, v69, 0x7ff80000, s15
+; GFX10-NEXT: s_waitcnt vmcnt(5)
+; GFX10-NEXT: v_min_f64 v[70:71], v[28:29], v[2:3]
+; GFX10-NEXT: v_cmp_u_f64_e64 s17, v[28:29], v[2:3]
+; GFX10-NEXT: s_waitcnt vmcnt(3)
+; GFX10-NEXT: v_min_f64 v[66:67], v[26:27], v[4:5]
+; GFX10-NEXT: v_cmp_u_f64_e64 s16, v[26:27], v[4:5]
+; GFX10-NEXT: v_cndmask_b32_e64 v2, v82, 0, vcc_lo
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: v_cmp_o_f64_e64 s18, v[30:31], v[86:87]
-; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v54, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v54, v53, v5, s7
-; GFX10-NEXT: v_cndmask_b32_e32 v3, v3, v55, vcc_lo
-; GFX10-NEXT: v_cmp_class_f64_e64 vcc_lo, v[50:51], 32
-; GFX10-NEXT: v_cndmask_b32_e64 v55, v51, v7, s9
-; GFX10-NEXT: v_cmp_o_f64_e64 s9, v[8:9], v[48:49]
-; GFX10-NEXT: v_cndmask_b32_e64 v101, 0x7ff80000, v54, s8
-; GFX10-NEXT: v_cmp_lt_f64_e64 s7, v[16:17], v[32:33]
-; GFX10-NEXT: v_cndmask_b32_e64 v6, v102, v6, s12
-; GFX10-NEXT: v_cndmask_b32_e64 v103, 0x7ff80000, v55, s11
-; GFX10-NEXT: v_cndmask_b32_e64 v4, v100, v4, s10
-; GFX10-NEXT: v_cndmask_b32_e64 v5, v101, v5, s10
-; GFX10-NEXT: v_cmp_class_f64_e64 s10, v[8:9], 32
-; GFX10-NEXT: v_cmp_o_f64_e64 s11, v[10:11], v[38:39]
-; GFX10-NEXT: v_cndmask_b32_e64 v7, v103, v7, s12
-; GFX10-NEXT: v_cmp_class_f64_e64 s12, v[48:49], 32
-; GFX10-NEXT: v_cndmask_b32_e64 v114, v38, v10, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v115, v34, v14, s6
-; GFX10-NEXT: v_cmp_o_f64_e64 s8, v[16:17], v[32:33]
-; GFX10-NEXT: v_cndmask_b32_e64 v4, v4, v52, s14
-; GFX10-NEXT: v_cndmask_b32_e64 v5, v5, v53, s14
-; GFX10-NEXT: v_cmp_lt_f64_e64 s14, v[18:19], v[82:83]
-; GFX10-NEXT: v_cndmask_b32_e64 v52, 0, v115, s16
-; GFX10-NEXT: v_cndmask_b32_e32 v6, v6, v50, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v50, v49, v9, s13
-; GFX10-NEXT: v_cndmask_b32_e32 v7, v7, v51, vcc_lo
-; GFX10-NEXT: v_cmp_class_f64_e64 vcc_lo, v[38:39], 32
-; GFX10-NEXT: v_cndmask_b32_e64 v54, 0, v112, s9
-; GFX10-NEXT: v_cndmask_b32_e64 v51, v39, v11, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v55, 0x7ff80000, v50, s9
-; GFX10-NEXT: v_cndmask_b32_e64 v50, 0, v113, s5
-; GFX10-NEXT: v_cmp_o_f64_e64 s4, v[18:19], v[82:83]
-; GFX10-NEXT: v_cndmask_b32_e64 v8, v54, v8, s10
-; GFX10-NEXT: v_cndmask_b32_e64 v64, 0, v114, s11
-; GFX10-NEXT: v_cndmask_b32_e64 v9, v55, v9, s10
-; GFX10-NEXT: v_cmp_class_f64_e64 s10, v[12:13], 32
-; GFX10-NEXT: v_cndmask_b32_e64 v65, 0x7ff80000, v51, s11
-; GFX10-NEXT: v_cndmask_b32_e64 v8, v8, v48, s12
-; GFX10-NEXT: v_cndmask_b32_e64 v10, v64, v10, s17
-; GFX10-NEXT: v_cndmask_b32_e64 v9, v9, v49, s12
-; GFX10-NEXT: v_cmp_class_f64_e64 s12, v[14:15], 32
-; GFX10-NEXT: v_cndmask_b32_e64 v11, v65, v11, s17
-; GFX10-NEXT: v_cndmask_b32_e64 v48, v37, v13, s15
-; GFX10-NEXT: v_cmp_class_f64_e64 s17, v[34:35], 32
-; GFX10-NEXT: v_cmp_lt_f64_e64 s9, v[20:21], v[66:67]
-; GFX10-NEXT: v_cmp_o_f64_e64 s11, v[20:21], v[66:67]
-; GFX10-NEXT: v_cndmask_b32_e64 v116, v32, v16, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v51, 0x7ff80000, v48, s5
-; GFX10-NEXT: v_cndmask_b32_e32 v10, v10, v38, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e32 v11, v11, v39, vcc_lo
-; GFX10-NEXT: v_cmp_lt_f64_e32 vcc_lo, v[22:23], v[68:69]
-; GFX10-NEXT: v_cndmask_b32_e64 v38, v35, v15, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v49, v82, v18, s14
-; GFX10-NEXT: v_cndmask_b32_e64 v48, 0, v116, s8
-; GFX10-NEXT: v_cmp_class_f64_e64 s13, v[36:37], 32
-; GFX10-NEXT: v_cmp_o_f64_e64 s5, v[22:23], v[68:69]
-; GFX10-NEXT: v_cndmask_b32_e64 v53, 0x7ff80000, v38, s16
-; GFX10-NEXT: v_cndmask_b32_e64 v12, v50, v12, s10
-; GFX10-NEXT: v_cndmask_b32_e64 v13, v51, v13, s10
-; GFX10-NEXT: v_cmp_class_f64_e64 s10, v[16:17], 32
-; GFX10-NEXT: v_cndmask_b32_e64 v38, 0, v49, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v112, v83, v19, s14
-; GFX10-NEXT: v_cndmask_b32_e64 v14, v52, v14, s12
-; GFX10-NEXT: v_cndmask_b32_e64 v15, v53, v15, s12
-; GFX10-NEXT: v_cmp_class_f64_e64 s12, v[32:33], 32
-; GFX10-NEXT: v_cmp_class_f64_e64 s14, v[18:19], 32
-; GFX10-NEXT: v_cndmask_b32_e64 v114, v67, v21, s9
-; GFX10-NEXT: v_cndmask_b32_e64 v14, v14, v34, s17
-; GFX10-NEXT: v_cndmask_b32_e64 v15, v15, v35, s17
-; GFX10-NEXT: v_cndmask_b32_e64 v34, v33, v17, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v39, 0x7ff80000, v112, s4
-; GFX10-NEXT: v_cmp_lt_f64_e64 s4, v[24:25], v[70:71]
-; GFX10-NEXT: v_cndmask_b32_e32 v113, v69, v23, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e32 v35, v68, v22, vcc_lo
-; GFX10-NEXT: v_cmp_class_f64_e64 vcc_lo, v[20:21], 32
-; GFX10-NEXT: v_cndmask_b32_e64 v49, 0x7ff80000, v34, s8
-; GFX10-NEXT: v_cndmask_b32_e64 v34, 0x7ff80000, v114, s11
-; GFX10-NEXT: v_cndmask_b32_e64 v12, v12, v36, s13
-; GFX10-NEXT: v_cndmask_b32_e64 v13, v13, v37, s13
-; GFX10-NEXT: v_cmp_class_f64_e64 s13, v[82:83], 32
-; GFX10-NEXT: v_cmp_o_f64_e64 s6, v[24:25], v[70:71]
-; GFX10-NEXT: v_cndmask_b32_e64 v16, v48, v16, s10
-; GFX10-NEXT: v_cndmask_b32_e64 v17, v49, v17, s10
-; GFX10-NEXT: v_cndmask_b32_e64 v36, 0x7ff80000, v113, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v35, 0, v35, s5
-; GFX10-NEXT: v_cmp_lt_f64_e64 s7, v[26:27], v[80:81]
-; GFX10-NEXT: v_cndmask_b32_e64 v16, v16, v32, s12
-; GFX10-NEXT: v_cndmask_b32_e64 v32, v66, v20, s9
-; GFX10-NEXT: v_cndmask_b32_e64 v17, v17, v33, s12
-; GFX10-NEXT: v_cndmask_b32_e64 v18, v38, v18, s14
-; GFX10-NEXT: v_cndmask_b32_e64 v19, v39, v19, s14
-; GFX10-NEXT: v_cmp_o_f64_e64 s15, v[26:27], v[80:81]
-; GFX10-NEXT: v_cndmask_b32_e64 v33, 0, v32, s11
-; GFX10-NEXT: v_cndmask_b32_e64 v112, v71, v25, s4
-; GFX10-NEXT: v_cmp_lt_f64_e64 s16, v[28:29], v[84:85]
-; GFX10-NEXT: v_cmp_o_f64_e64 s8, v[28:29], v[84:85]
-; GFX10-NEXT: v_cndmask_b32_e32 v21, v34, v21, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e32 v20, v33, v20, vcc_lo
-; GFX10-NEXT: v_cmp_class_f64_e64 vcc_lo, v[22:23], 32
-; GFX10-NEXT: v_cmp_lt_f64_e64 s17, v[30:31], v[86:87]
-; GFX10-NEXT: v_cmp_class_f64_e64 s5, v[70:71], 32
-; GFX10-NEXT: v_cndmask_b32_e64 v18, v18, v82, s13
-; GFX10-NEXT: v_cndmask_b32_e64 v82, v70, v24, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v19, v19, v83, s13
-; GFX10-NEXT: v_cndmask_b32_e64 v83, 0x7ff80000, v112, s6
-; GFX10-NEXT: v_cmp_class_f64_e64 s4, v[68:69], 32
-; GFX10-NEXT: v_cmp_eq_f64_e64 s9, 0, v[96:97]
-; GFX10-NEXT: v_cndmask_b32_e64 v82, 0, v82, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v37, v81, v27, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v32, v80, v26, s7
-; GFX10-NEXT: v_cmp_class_f64_e64 s6, v[80:81], 32
-; GFX10-NEXT: v_cmp_class_f64_e64 s7, v[84:85], 32
-; GFX10-NEXT: v_cmp_eq_f64_e64 s10, 0, v[98:99]
-; GFX10-NEXT: v_cndmask_b32_e64 v113, 0x7ff80000, v37, s15
-; GFX10-NEXT: v_cndmask_b32_e64 v112, 0, v32, s15
-; GFX10-NEXT: v_cmp_eq_f64_e64 s11, 0, v[100:101]
-; GFX10-NEXT: v_cndmask_b32_e64 v115, v85, v29, s16
-; GFX10-NEXT: v_cndmask_b32_e64 v114, v84, v28, s16
-; GFX10-NEXT: v_cmp_eq_f64_e64 s12, 0, v[102:103]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s13, 0, v[54:55]
-; GFX10-NEXT: v_cndmask_b32_e32 v22, v35, v22, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e32 v23, v36, v23, vcc_lo
-; GFX10-NEXT: v_cmp_class_f64_e64 vcc_lo, v[24:25], 32
-; GFX10-NEXT: v_cndmask_b32_e64 v115, 0x7ff80000, v115, s8
-; GFX10-NEXT: v_cndmask_b32_e64 v114, 0, v114, s8
-; GFX10-NEXT: v_cndmask_b32_e64 v116, v87, v31, s17
-; GFX10-NEXT: v_cndmask_b32_e64 v32, v86, v30, s17
-; GFX10-NEXT: v_cmp_class_f64_e64 s8, v[86:87], 32
-; GFX10-NEXT: v_cmp_eq_f64_e64 s14, 0, v[64:65]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s15, 0, v[50:51]
-; GFX10-NEXT: v_cndmask_b32_e64 v117, 0x7ff80000, v116, s18
-; GFX10-NEXT: v_cndmask_b32_e64 v116, 0, v32, s18
-; GFX10-NEXT: v_cmp_eq_f64_e64 s16, 0, v[52:53]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s17, 0, v[48:49]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s18, 0, v[38:39]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s19, 0, v[33:34]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s20, 0, v[35:36]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s21, 0, v[82:83]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s22, 0, v[112:113]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s23, 0, v[114:115]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s24, 0, v[116:117]
-; GFX10-NEXT: v_cndmask_b32_e64 v22, v22, v68, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v23, v23, v69, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v0, v96, v0, s9
-; GFX10-NEXT: v_cndmask_b32_e32 v24, v82, v24, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e32 v25, v83, v25, vcc_lo
-; GFX10-NEXT: v_cmp_class_f64_e64 vcc_lo, v[26:27], 32
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v98, v2, s10
-; GFX10-NEXT: v_cndmask_b32_e64 v4, v100, v4, s11
-; GFX10-NEXT: v_cndmask_b32_e64 v24, v24, v70, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v25, v25, v71, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v6, v102, v6, s12
-; GFX10-NEXT: v_cndmask_b32_e64 v8, v54, v8, s13
-; GFX10-NEXT: v_cndmask_b32_e64 v10, v64, v10, s14
-; GFX10-NEXT: v_cndmask_b32_e64 v12, v50, v12, s15
-; GFX10-NEXT: v_cndmask_b32_e64 v14, v52, v14, s16
-; GFX10-NEXT: v_cndmask_b32_e64 v16, v48, v16, s17
-; GFX10-NEXT: v_cndmask_b32_e64 v18, v38, v18, s18
-; GFX10-NEXT: v_cndmask_b32_e64 v22, v35, v22, s20
-; GFX10-NEXT: v_cndmask_b32_e64 v24, v82, v24, s21
-; GFX10-NEXT: v_cndmask_b32_e64 v1, v97, v1, s9
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v99, v3, s10
-; GFX10-NEXT: v_cndmask_b32_e64 v5, v101, v5, s11
-; GFX10-NEXT: v_cndmask_b32_e64 v7, v103, v7, s12
-; GFX10-NEXT: v_cndmask_b32_e64 v9, v55, v9, s13
-; GFX10-NEXT: v_cndmask_b32_e64 v11, v65, v11, s14
-; GFX10-NEXT: v_cndmask_b32_e64 v13, v51, v13, s15
-; GFX10-NEXT: v_cndmask_b32_e64 v15, v53, v15, s16
-; GFX10-NEXT: v_cndmask_b32_e32 v26, v112, v26, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e32 v27, v113, v27, vcc_lo
-; GFX10-NEXT: v_cmp_class_f64_e64 vcc_lo, v[28:29], 32
-; GFX10-NEXT: v_cndmask_b32_e64 v17, v49, v17, s17
-; GFX10-NEXT: v_cndmask_b32_e64 v19, v39, v19, s18
-; GFX10-NEXT: v_cndmask_b32_e64 v26, v26, v80, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v27, v27, v81, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v23, v36, v23, s20
-; GFX10-NEXT: v_cndmask_b32_e64 v25, v83, v25, s21
-; GFX10-NEXT: v_cndmask_b32_e64 v26, v112, v26, s22
-; GFX10-NEXT: v_cndmask_b32_e64 v27, v113, v27, s22
-; GFX10-NEXT: v_cndmask_b32_e32 v28, v114, v28, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e32 v29, v115, v29, vcc_lo
-; GFX10-NEXT: v_cmp_class_f64_e64 vcc_lo, v[30:31], 32
-; GFX10-NEXT: v_cndmask_b32_e64 v28, v28, v84, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v29, v29, v85, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v28, v114, v28, s23
-; GFX10-NEXT: v_cndmask_b32_e64 v29, v115, v29, s23
-; GFX10-NEXT: v_cndmask_b32_e32 v30, v116, v30, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e32 v31, v117, v31, vcc_lo
-; GFX10-NEXT: v_cmp_class_f64_e64 vcc_lo, v[66:67], 32
-; GFX10-NEXT: v_cndmask_b32_e64 v30, v30, v86, s8
-; GFX10-NEXT: v_cndmask_b32_e64 v31, v31, v87, s8
-; GFX10-NEXT: v_cndmask_b32_e64 v30, v116, v30, s24
-; GFX10-NEXT: v_cndmask_b32_e64 v31, v117, v31, s24
-; GFX10-NEXT: v_cndmask_b32_e32 v20, v20, v66, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e32 v21, v21, v67, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v20, v33, v20, s19
-; GFX10-NEXT: v_cndmask_b32_e64 v21, v34, v21, s19
+; GFX10-NEXT: v_min_f64 v[80:81], v[30:31], v[6:7]
+; GFX10-NEXT: v_cmp_u_f64_e64 s18, v[30:31], v[6:7]
+; GFX10-NEXT: v_cndmask_b32_e64 v3, v83, 0x7ff80000, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e64 v4, v84, 0, s4
+; GFX10-NEXT: v_cndmask_b32_e64 v5, v85, 0x7ff80000, s4
+; GFX10-NEXT: v_cndmask_b32_e64 v6, v32, 0, s5
+; GFX10-NEXT: v_cndmask_b32_e64 v7, v33, 0x7ff80000, s5
+; GFX10-NEXT: v_cndmask_b32_e64 v28, v70, 0, s17
+; GFX10-NEXT: v_cndmask_b32_e64 v29, v71, 0x7ff80000, s17
+; GFX10-NEXT: v_cndmask_b32_e64 v26, v66, 0, s16
+; GFX10-NEXT: v_cndmask_b32_e64 v27, v67, 0x7ff80000, s16
+; GFX10-NEXT: v_cndmask_b32_e64 v30, v80, 0, s18
+; GFX10-NEXT: v_cndmask_b32_e64 v31, v81, 0x7ff80000, s18
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_minimum_v16f64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_clause 0x1f
-; GFX11-NEXT: scratch_load_b32 v87, off, s32 offset:8
-; GFX11-NEXT: scratch_load_b32 v86, off, s32 offset:4
-; GFX11-NEXT: scratch_load_b32 v85, off, s32 offset:16
-; GFX11-NEXT: scratch_load_b32 v84, off, s32 offset:12
-; GFX11-NEXT: scratch_load_b32 v33, off, s32 offset:24
-; GFX11-NEXT: scratch_load_b32 v32, off, s32 offset:20
-; GFX11-NEXT: scratch_load_b32 v35, off, s32 offset:32
-; GFX11-NEXT: scratch_load_b32 v34, off, s32 offset:28
-; GFX11-NEXT: scratch_load_b32 v37, off, s32 offset:40
-; GFX11-NEXT: scratch_load_b32 v36, off, s32 offset:36
-; GFX11-NEXT: scratch_load_b32 v39, off, s32 offset:48
-; GFX11-NEXT: scratch_load_b32 v38, off, s32 offset:44
-; GFX11-NEXT: scratch_load_b32 v49, off, s32 offset:56
-; GFX11-NEXT: scratch_load_b32 v48, off, s32 offset:52
-; GFX11-NEXT: scratch_load_b32 v51, off, s32 offset:64
-; GFX11-NEXT: scratch_load_b32 v50, off, s32 offset:60
-; GFX11-NEXT: scratch_load_b32 v53, off, s32 offset:72
-; GFX11-NEXT: scratch_load_b32 v52, off, s32 offset:68
-; GFX11-NEXT: scratch_load_b32 v55, off, s32 offset:80
-; GFX11-NEXT: scratch_load_b32 v54, off, s32 offset:76
-; GFX11-NEXT: scratch_load_b32 v65, off, s32 offset:88
-; GFX11-NEXT: scratch_load_b32 v64, off, s32 offset:84
-; GFX11-NEXT: scratch_load_b32 v67, off, s32 offset:96
-; GFX11-NEXT: scratch_load_b32 v66, off, s32 offset:92
-; GFX11-NEXT: scratch_load_b32 v69, off, s32 offset:104
-; GFX11-NEXT: scratch_load_b32 v68, off, s32 offset:100
-; GFX11-NEXT: scratch_load_b32 v71, off, s32 offset:112
-; GFX11-NEXT: scratch_load_b32 v70, off, s32 offset:108
-; GFX11-NEXT: scratch_load_b32 v81, off, s32 offset:120
-; GFX11-NEXT: scratch_load_b32 v80, off, s32 offset:116
; GFX11-NEXT: scratch_load_b32 v31, off, s32
-; GFX11-NEXT: scratch_load_b32 v83, off, s32 offset:128
-; GFX11-NEXT: scratch_load_b32 v82, off, s32 offset:124
-; GFX11-NEXT: s_waitcnt vmcnt(31)
-; GFX11-NEXT: v_cmp_lt_f64_e64 s9, v[0:1], v[86:87]
-; GFX11-NEXT: v_cmp_o_f64_e64 s11, v[0:1], v[86:87]
-; GFX11-NEXT: s_waitcnt vmcnt(29)
-; GFX11-NEXT: v_cmp_lt_f64_e64 s10, v[2:3], v[84:85]
-; GFX11-NEXT: v_cmp_class_f64_e64 s14, v[86:87], 32
-; GFX11-NEXT: s_waitcnt vmcnt(27)
-; GFX11-NEXT: v_cmp_lt_f64_e64 s0, v[4:5], v[32:33]
-; GFX11-NEXT: v_cmp_o_f64_e32 vcc_lo, v[4:5], v[32:33]
-; GFX11-NEXT: s_waitcnt vmcnt(25)
-; GFX11-NEXT: v_cmp_lt_f64_e64 s2, v[6:7], v[34:35]
-; GFX11-NEXT: v_cmp_o_f64_e64 s12, v[2:3], v[84:85]
-; GFX11-NEXT: v_cmp_o_f64_e64 s1, v[6:7], v[34:35]
-; GFX11-NEXT: s_waitcnt vmcnt(23)
-; GFX11-NEXT: v_cmp_lt_f64_e64 s4, v[8:9], v[36:37]
-; GFX11-NEXT: v_cmp_o_f64_e64 s3, v[8:9], v[36:37]
-; GFX11-NEXT: v_cmp_class_f64_e64 s16, v[84:85], 32
-; GFX11-NEXT: s_waitcnt vmcnt(21)
-; GFX11-NEXT: v_cmp_lt_f64_e64 s6, v[10:11], v[38:39]
-; GFX11-NEXT: v_cmp_o_f64_e64 s5, v[10:11], v[38:39]
-; GFX11-NEXT: s_waitcnt vmcnt(19)
-; GFX11-NEXT: v_cmp_lt_f64_e64 s8, v[12:13], v[48:49]
-; GFX11-NEXT: v_cmp_o_f64_e64 s7, v[12:13], v[48:49]
-; GFX11-NEXT: s_waitcnt vmcnt(17)
-; GFX11-NEXT: v_cmp_lt_f64_e64 s13, v[14:15], v[50:51]
-; GFX11-NEXT: s_waitcnt vmcnt(15)
-; GFX11-NEXT: v_cmp_o_f64_e64 s15, v[16:17], v[52:53]
-; GFX11-NEXT: s_waitcnt vmcnt(13)
-; GFX11-NEXT: v_cmp_lt_f64_e64 s17, v[18:19], v[54:55]
-; GFX11-NEXT: v_cmp_o_f64_e64 s18, v[18:19], v[54:55]
-; GFX11-NEXT: s_waitcnt vmcnt(11)
-; GFX11-NEXT: v_cmp_lt_f64_e64 s19, v[20:21], v[64:65]
-; GFX11-NEXT: v_cmp_o_f64_e64 s20, v[20:21], v[64:65]
-; GFX11-NEXT: s_waitcnt vmcnt(9)
-; GFX11-NEXT: v_cmp_lt_f64_e64 s21, v[22:23], v[66:67]
-; GFX11-NEXT: v_cmp_o_f64_e64 s22, v[22:23], v[66:67]
-; GFX11-NEXT: s_waitcnt vmcnt(7)
-; GFX11-NEXT: v_cmp_lt_f64_e64 s23, v[24:25], v[68:69]
-; GFX11-NEXT: v_cmp_o_f64_e64 s24, v[24:25], v[68:69]
-; GFX11-NEXT: s_waitcnt vmcnt(5)
-; GFX11-NEXT: v_cmp_lt_f64_e64 s25, v[26:27], v[70:71]
-; GFX11-NEXT: v_cmp_o_f64_e64 s26, v[26:27], v[70:71]
-; GFX11-NEXT: s_waitcnt vmcnt(3)
-; GFX11-NEXT: v_cmp_lt_f64_e64 s27, v[28:29], v[80:81]
-; GFX11-NEXT: v_cmp_o_f64_e64 s28, v[28:29], v[80:81]
+; GFX11-NEXT: scratch_load_b32 v33, off, s32 offset:8
+; GFX11-NEXT: scratch_load_b32 v32, off, s32 offset:4
+; GFX11-NEXT: scratch_load_b32 v35, off, s32 offset:16
+; GFX11-NEXT: scratch_load_b32 v34, off, s32 offset:12
+; GFX11-NEXT: scratch_load_b32 v37, off, s32 offset:24
+; GFX11-NEXT: scratch_load_b32 v36, off, s32 offset:20
+; GFX11-NEXT: scratch_load_b32 v39, off, s32 offset:32
+; GFX11-NEXT: scratch_load_b32 v38, off, s32 offset:28
+; GFX11-NEXT: scratch_load_b32 v49, off, s32 offset:40
+; GFX11-NEXT: scratch_load_b32 v48, off, s32 offset:36
+; GFX11-NEXT: scratch_load_b32 v51, off, s32 offset:48
+; GFX11-NEXT: scratch_load_b32 v50, off, s32 offset:44
+; GFX11-NEXT: scratch_load_b32 v53, off, s32 offset:56
+; GFX11-NEXT: scratch_load_b32 v52, off, s32 offset:52
+; GFX11-NEXT: scratch_load_b32 v55, off, s32 offset:64
+; GFX11-NEXT: scratch_load_b32 v54, off, s32 offset:60
+; GFX11-NEXT: scratch_load_b32 v65, off, s32 offset:72
+; GFX11-NEXT: scratch_load_b32 v64, off, s32 offset:68
+; GFX11-NEXT: scratch_load_b32 v67, off, s32 offset:80
+; GFX11-NEXT: scratch_load_b32 v66, off, s32 offset:76
+; GFX11-NEXT: scratch_load_b32 v69, off, s32 offset:88
+; GFX11-NEXT: scratch_load_b32 v68, off, s32 offset:84
+; GFX11-NEXT: scratch_load_b32 v71, off, s32 offset:96
+; GFX11-NEXT: scratch_load_b32 v70, off, s32 offset:92
+; GFX11-NEXT: scratch_load_b32 v81, off, s32 offset:104
+; GFX11-NEXT: scratch_load_b32 v80, off, s32 offset:100
+; GFX11-NEXT: scratch_load_b32 v83, off, s32 offset:112
+; GFX11-NEXT: scratch_load_b32 v82, off, s32 offset:108
+; GFX11-NEXT: scratch_load_b32 v85, off, s32 offset:120
+; GFX11-NEXT: scratch_load_b32 v84, off, s32 offset:116
+; GFX11-NEXT: scratch_load_b32 v87, off, s32 offset:128
+; GFX11-NEXT: scratch_load_b32 v86, off, s32 offset:124
+; GFX11-NEXT: s_waitcnt vmcnt(30)
+; GFX11-NEXT: v_min_f64 v[96:97], v[0:1], v[32:33]
+; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[32:33]
+; GFX11-NEXT: s_waitcnt vmcnt(28)
+; GFX11-NEXT: v_min_f64 v[32:33], v[2:3], v[34:35]
+; GFX11-NEXT: v_cmp_u_f64_e64 s0, v[2:3], v[34:35]
+; GFX11-NEXT: s_waitcnt vmcnt(26)
+; GFX11-NEXT: v_min_f64 v[34:35], v[4:5], v[36:37]
+; GFX11-NEXT: v_cmp_u_f64_e64 s1, v[4:5], v[36:37]
+; GFX11-NEXT: s_waitcnt vmcnt(24)
+; GFX11-NEXT: v_min_f64 v[36:37], v[6:7], v[38:39]
+; GFX11-NEXT: v_cmp_u_f64_e64 s2, v[6:7], v[38:39]
+; GFX11-NEXT: s_waitcnt vmcnt(22)
+; GFX11-NEXT: v_min_f64 v[38:39], v[8:9], v[48:49]
+; GFX11-NEXT: v_cmp_u_f64_e64 s3, v[8:9], v[48:49]
+; GFX11-NEXT: s_waitcnt vmcnt(20)
+; GFX11-NEXT: v_min_f64 v[48:49], v[10:11], v[50:51]
+; GFX11-NEXT: v_cmp_u_f64_e64 s4, v[10:11], v[50:51]
+; GFX11-NEXT: s_waitcnt vmcnt(18)
+; GFX11-NEXT: v_min_f64 v[50:51], v[12:13], v[52:53]
+; GFX11-NEXT: v_cmp_u_f64_e64 s5, v[12:13], v[52:53]
+; GFX11-NEXT: s_waitcnt vmcnt(16)
+; GFX11-NEXT: v_min_f64 v[52:53], v[14:15], v[54:55]
+; GFX11-NEXT: v_cmp_u_f64_e64 s6, v[14:15], v[54:55]
+; GFX11-NEXT: s_waitcnt vmcnt(14)
+; GFX11-NEXT: v_min_f64 v[54:55], v[16:17], v[64:65]
+; GFX11-NEXT: v_cmp_u_f64_e64 s7, v[16:17], v[64:65]
+; GFX11-NEXT: s_waitcnt vmcnt(12)
+; GFX11-NEXT: v_min_f64 v[64:65], v[18:19], v[66:67]
+; GFX11-NEXT: v_cmp_u_f64_e64 s8, v[18:19], v[66:67]
+; GFX11-NEXT: s_waitcnt vmcnt(10)
+; GFX11-NEXT: v_min_f64 v[66:67], v[20:21], v[68:69]
+; GFX11-NEXT: v_cmp_u_f64_e64 s9, v[20:21], v[68:69]
+; GFX11-NEXT: s_waitcnt vmcnt(8)
+; GFX11-NEXT: v_min_f64 v[68:69], v[22:23], v[70:71]
+; GFX11-NEXT: v_cmp_u_f64_e64 s10, v[22:23], v[70:71]
+; GFX11-NEXT: s_waitcnt vmcnt(6)
+; GFX11-NEXT: v_min_f64 v[70:71], v[24:25], v[80:81]
+; GFX11-NEXT: v_cmp_u_f64_e64 s11, v[24:25], v[80:81]
+; GFX11-NEXT: s_waitcnt vmcnt(4)
+; GFX11-NEXT: v_min_f64 v[80:81], v[26:27], v[82:83]
+; GFX11-NEXT: v_cmp_u_f64_e64 s12, v[26:27], v[82:83]
+; GFX11-NEXT: s_waitcnt vmcnt(2)
+; GFX11-NEXT: v_min_f64 v[82:83], v[28:29], v[84:85]
+; GFX11-NEXT: v_cmp_u_f64_e64 s13, v[28:29], v[84:85]
; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: v_cmp_lt_f64_e64 s29, v[30:31], v[82:83]
-; GFX11-NEXT: v_cmp_o_f64_e64 vcc_hi, v[30:31], v[82:83]
-; GFX11-NEXT: v_cndmask_b32_e64 v96, v87, v1, s9
-; GFX11-NEXT: v_cndmask_b32_e64 v101, v86, v0, s9
-; GFX11-NEXT: v_cndmask_b32_e64 v98, v85, v3, s10
-; GFX11-NEXT: v_cndmask_b32_e64 v103, v84, v2, s10
-; GFX11-NEXT: v_cmp_class_f64_e64 s10, v[0:1], 32
-; GFX11-NEXT: v_cndmask_b32_e64 v97, 0x7ff80000, v96, s11
-; GFX11-NEXT: v_cndmask_b32_e64 v96, 0, v101, s11
-; GFX11-NEXT: v_cndmask_b32_e64 v100, v33, v5, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v102, v35, v7, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v99, 0x7ff80000, v98, s12
-; GFX11-NEXT: v_cndmask_b32_e64 v98, 0, v103, s12
-; GFX11-NEXT: v_cmp_class_f64_e64 s11, v[2:3], 32
-; GFX11-NEXT: v_cndmask_b32_e32 v101, 0x7ff80000, v100, vcc_lo
-; GFX11-NEXT: v_cndmask_b32_e64 v103, 0x7ff80000, v102, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v112, v37, v9, s4
-; GFX11-NEXT: v_cndmask_b32_e64 v114, v39, v11, s6
-; GFX11-NEXT: v_cndmask_b32_e64 v116, v49, v13, s8
-; GFX11-NEXT: v_cmp_o_f64_e64 s9, v[14:15], v[50:51]
-; GFX11-NEXT: v_cndmask_b32_e64 v118, v51, v15, s13
-; GFX11-NEXT: v_cndmask_b32_e64 v113, 0x7ff80000, v112, s3
-; GFX11-NEXT: v_cndmask_b32_e64 v115, 0x7ff80000, v114, s5
-; GFX11-NEXT: v_cndmask_b32_e64 v117, 0x7ff80000, v116, s7
-; GFX11-NEXT: v_cmp_lt_f64_e64 s12, v[16:17], v[52:53]
-; GFX11-NEXT: v_cndmask_b32_e64 v130, v55, v19, s17
-; GFX11-NEXT: v_cndmask_b32_e64 v132, v65, v21, s19
-; GFX11-NEXT: v_cndmask_b32_e64 v134, v67, v23, s21
-; GFX11-NEXT: v_cndmask_b32_e64 v144, v69, v25, s23
-; GFX11-NEXT: v_cndmask_b32_e64 v145, v71, v27, s25
-; GFX11-NEXT: v_cndmask_b32_e64 v131, 0x7ff80000, v130, s18
-; GFX11-NEXT: v_cndmask_b32_e64 v133, 0x7ff80000, v132, s20
-; GFX11-NEXT: v_cndmask_b32_e64 v135, 0x7ff80000, v134, s22
-; GFX11-NEXT: v_cndmask_b32_e64 v146, v81, v29, s27
-; GFX11-NEXT: v_cndmask_b32_e64 v148, v80, v28, s27
-; GFX11-NEXT: v_cndmask_b32_e64 v147, v83, v31, s29
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_3)
-; GFX11-NEXT: v_cndmask_b32_e64 v147, 0x7ff80000, v147, vcc_hi
-; GFX11-NEXT: v_cndmask_b32_e64 v0, v96, v0, s10
-; GFX11-NEXT: v_cndmask_b32_e64 v1, v97, v1, s10
-; GFX11-NEXT: v_cmp_class_f64_e64 s10, v[36:37], 32
-; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, v86, s14
-; GFX11-NEXT: v_cndmask_b32_e64 v86, v32, v4, s0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, v87, s14
-; GFX11-NEXT: v_cndmask_b32_e64 v87, v34, v6, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v98, v2, s11
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v99, v3, s11
-; GFX11-NEXT: v_cndmask_b32_e32 v100, 0, v86, vcc_lo
-; GFX11-NEXT: v_cmp_class_f64_e64 vcc_lo, v[4:5], 32
-; GFX11-NEXT: v_cndmask_b32_e64 v102, 0, v87, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, v84, s16
-; GFX11-NEXT: v_cndmask_b32_e64 v84, v36, v8, s4
-; GFX11-NEXT: v_cndmask_b32_e64 v86, v38, v10, s6
-; GFX11-NEXT: v_cndmask_b32_e64 v87, v48, v12, s8
-; GFX11-NEXT: v_cndmask_b32_e64 v119, 0x7ff80000, v118, s9
-; GFX11-NEXT: v_cndmask_b32_e64 v128, v53, v17, s12
-; GFX11-NEXT: v_cndmask_b32_e64 v112, 0, v84, s3
-; GFX11-NEXT: v_cndmask_b32_e64 v114, 0, v86, s5
-; GFX11-NEXT: v_cndmask_b32_e64 v116, 0, v87, s7
-; GFX11-NEXT: v_cndmask_b32_e64 v84, v50, v14, s13
-; GFX11-NEXT: v_cndmask_b32_e64 v129, 0x7ff80000, v128, s15
-; GFX11-NEXT: v_cndmask_b32_e64 v86, v52, v16, s12
-; GFX11-NEXT: v_cndmask_b32_e64 v87, v54, v18, s17
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v3, v85, s16
-; GFX11-NEXT: v_cndmask_b32_e64 v118, 0, v84, s9
-; GFX11-NEXT: v_cndmask_b32_e64 v84, v64, v20, s19
-; GFX11-NEXT: v_cndmask_b32_e64 v128, 0, v86, s15
-; GFX11-NEXT: v_cndmask_b32_e64 v130, 0, v87, s18
-; GFX11-NEXT: v_cndmask_b32_e64 v86, v66, v22, s21
-; GFX11-NEXT: v_cndmask_b32_e64 v85, 0x7ff80000, v144, s24
-; GFX11-NEXT: v_cndmask_b32_e64 v132, 0, v84, s20
-; GFX11-NEXT: v_cndmask_b32_e64 v87, v68, v24, s23
-; GFX11-NEXT: v_cndmask_b32_e64 v144, v70, v26, s25
-; GFX11-NEXT: v_cndmask_b32_e64 v134, 0, v86, s22
-; GFX11-NEXT: v_cmp_class_f64_e64 s0, v[68:69], 32
-; GFX11-NEXT: v_cmp_class_f64_e64 s1, v[70:71], 32
-; GFX11-NEXT: v_cndmask_b32_e64 v84, 0, v87, s24
-; GFX11-NEXT: v_cndmask_b32_e64 v87, 0x7ff80000, v145, s26
-; GFX11-NEXT: v_cndmask_b32_e64 v86, 0, v144, s26
-; GFX11-NEXT: v_cndmask_b32_e64 v145, 0x7ff80000, v146, s28
-; GFX11-NEXT: v_cndmask_b32_e64 v144, 0, v148, s28
-; GFX11-NEXT: v_cndmask_b32_e64 v146, v82, v30, s29
-; GFX11-NEXT: v_cmp_class_f64_e64 s2, v[80:81], 32
-; GFX11-NEXT: v_cmp_class_f64_e64 s3, v[82:83], 32
-; GFX11-NEXT: v_cmp_class_f64_e64 s6, v[32:33], 32
-; GFX11-NEXT: v_cmp_class_f64_e64 s8, v[34:35], 32
-; GFX11-NEXT: v_dual_cndmask_b32 v5, v101, v5 :: v_dual_cndmask_b32 v4, v100, v4
-; GFX11-NEXT: v_cmp_class_f64_e64 vcc_lo, v[6:7], 32
-; GFX11-NEXT: v_cndmask_b32_e64 v146, 0, v146, vcc_hi
-; GFX11-NEXT: v_cmp_class_f64_e64 s12, v[38:39], 32
-; GFX11-NEXT: v_cmp_class_f64_e64 s14, v[48:49], 32
-; GFX11-NEXT: v_cmp_class_f64_e64 s16, v[50:51], 32
-; GFX11-NEXT: v_cmp_class_f64_e64 s18, v[52:53], 32
-; GFX11-NEXT: v_cmp_class_f64_e64 s20, v[54:55], 32
-; GFX11-NEXT: v_cmp_class_f64_e64 s21, v[64:65], 32
-; GFX11-NEXT: v_cmp_eq_f64_e64 s4, 0, v[96:97]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s5, 0, v[98:99]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s7, 0, v[100:101]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s9, 0, v[102:103]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s11, 0, v[112:113]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s13, 0, v[114:115]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s15, 0, v[116:117]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s17, 0, v[118:119]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s19, 0, v[128:129]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s22, 0, v[130:131]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s23, 0, v[132:133]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s24, 0, v[134:135]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s25, 0, v[84:85]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s26, 0, v[86:87]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s27, 0, v[144:145]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s28, 0, v[146:147]
-; GFX11-NEXT: v_cndmask_b32_e64 v5, v5, v33, s6
-; GFX11-NEXT: v_dual_cndmask_b32 v7, v103, v7 :: v_dual_cndmask_b32 v6, v102, v6
-; GFX11-NEXT: v_cmp_class_f64_e64 vcc_lo, v[8:9], 32
-; GFX11-NEXT: v_cndmask_b32_e64 v4, v4, v32, s6
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e64 v7, v7, v35, s8
-; GFX11-NEXT: v_cndmask_b32_e64 v0, v96, v0, s4
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v98, v2, s5
-; GFX11-NEXT: v_cndmask_b32_e64 v4, v100, v4, s7
-; GFX11-NEXT: v_cndmask_b32_e64 v1, v97, v1, s4
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v99, v3, s5
-; GFX11-NEXT: v_cndmask_b32_e64 v5, v101, v5, s7
-; GFX11-NEXT: v_cndmask_b32_e64 v7, v103, v7, s9
-; GFX11-NEXT: v_dual_cndmask_b32 v9, v113, v9 :: v_dual_cndmask_b32 v8, v112, v8
-; GFX11-NEXT: v_cmp_class_f64_e64 vcc_lo, v[10:11], 32
-; GFX11-NEXT: v_cndmask_b32_e64 v6, v6, v34, s8
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_cndmask_b32_e64 v9, v9, v37, s10
-; GFX11-NEXT: v_cndmask_b32_e64 v6, v102, v6, s9
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_3)
-; GFX11-NEXT: v_cndmask_b32_e64 v9, v113, v9, s11
-; GFX11-NEXT: v_dual_cndmask_b32 v11, v115, v11 :: v_dual_cndmask_b32 v10, v114, v10
-; GFX11-NEXT: v_cmp_class_f64_e64 vcc_lo, v[12:13], 32
-; GFX11-NEXT: v_cndmask_b32_e64 v8, v8, v36, s10
-; GFX11-NEXT: v_cndmask_b32_e64 v11, v11, v39, s12
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_cndmask_b32_e64 v8, v112, v8, s11
-; GFX11-NEXT: v_cndmask_b32_e64 v11, v115, v11, s13
-; GFX11-NEXT: v_dual_cndmask_b32 v13, v117, v13 :: v_dual_cndmask_b32 v12, v116, v12
-; GFX11-NEXT: v_cmp_class_f64_e64 vcc_lo, v[14:15], 32
-; GFX11-NEXT: v_cndmask_b32_e64 v10, v10, v38, s12
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_cndmask_b32_e64 v13, v13, v49, s14
-; GFX11-NEXT: v_cndmask_b32_e64 v10, v114, v10, s13
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_3)
-; GFX11-NEXT: v_cndmask_b32_e64 v13, v117, v13, s15
-; GFX11-NEXT: v_dual_cndmask_b32 v15, v119, v15 :: v_dual_cndmask_b32 v14, v118, v14
-; GFX11-NEXT: v_cmp_class_f64_e64 vcc_lo, v[16:17], 32
-; GFX11-NEXT: v_cndmask_b32_e64 v12, v12, v48, s14
-; GFX11-NEXT: v_cndmask_b32_e64 v15, v15, v51, s16
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_cndmask_b32_e64 v12, v116, v12, s15
-; GFX11-NEXT: v_cndmask_b32_e64 v15, v119, v15, s17
-; GFX11-NEXT: v_dual_cndmask_b32 v17, v129, v17 :: v_dual_cndmask_b32 v16, v128, v16
-; GFX11-NEXT: v_cmp_class_f64_e64 vcc_lo, v[18:19], 32
-; GFX11-NEXT: v_cndmask_b32_e64 v14, v14, v50, s16
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_cndmask_b32_e64 v17, v17, v53, s18
-; GFX11-NEXT: v_cndmask_b32_e64 v14, v118, v14, s17
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_3)
-; GFX11-NEXT: v_cndmask_b32_e64 v17, v129, v17, s19
-; GFX11-NEXT: v_dual_cndmask_b32 v19, v131, v19 :: v_dual_cndmask_b32 v18, v130, v18
-; GFX11-NEXT: v_cmp_class_f64_e64 vcc_lo, v[20:21], 32
-; GFX11-NEXT: v_cndmask_b32_e64 v16, v16, v52, s18
-; GFX11-NEXT: v_cndmask_b32_e64 v19, v19, v55, s20
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_cndmask_b32_e64 v16, v128, v16, s19
-; GFX11-NEXT: v_cndmask_b32_e64 v19, v131, v19, s22
-; GFX11-NEXT: v_dual_cndmask_b32 v21, v133, v21 :: v_dual_cndmask_b32 v20, v132, v20
-; GFX11-NEXT: v_cmp_class_f64_e64 vcc_lo, v[22:23], 32
-; GFX11-NEXT: v_cndmask_b32_e64 v18, v18, v54, s20
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_cndmask_b32_e64 v21, v21, v65, s21
-; GFX11-NEXT: v_cndmask_b32_e64 v18, v130, v18, s22
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_1)
-; GFX11-NEXT: v_cndmask_b32_e64 v21, v133, v21, s23
-; GFX11-NEXT: v_dual_cndmask_b32 v23, v135, v23 :: v_dual_cndmask_b32 v22, v134, v22
-; GFX11-NEXT: v_cmp_class_f64_e64 vcc_lo, v[24:25], 32
-; GFX11-NEXT: v_cndmask_b32_e64 v20, v20, v64, s21
-; GFX11-NEXT: v_cndmask_b32_e64 v20, v132, v20, s23
-; GFX11-NEXT: v_dual_cndmask_b32 v25, v85, v25 :: v_dual_cndmask_b32 v24, v84, v24
-; GFX11-NEXT: v_cmp_class_f64_e64 vcc_lo, v[26:27], 32
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT: v_cndmask_b32_e64 v25, v25, v69, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v25, v85, v25, s25
-; GFX11-NEXT: v_dual_cndmask_b32 v27, v87, v27 :: v_dual_cndmask_b32 v26, v86, v26
-; GFX11-NEXT: v_cmp_class_f64_e64 vcc_lo, v[28:29], 32
-; GFX11-NEXT: v_cndmask_b32_e64 v24, v24, v68, s0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_cndmask_b32_e64 v27, v27, v71, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v24, v84, v24, s25
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_3)
-; GFX11-NEXT: v_cndmask_b32_e64 v27, v87, v27, s26
-; GFX11-NEXT: v_dual_cndmask_b32 v29, v145, v29 :: v_dual_cndmask_b32 v28, v144, v28
-; GFX11-NEXT: v_cmp_class_f64_e64 vcc_lo, v[30:31], 32
-; GFX11-NEXT: v_cndmask_b32_e64 v26, v26, v70, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v29, v29, v81, s2
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_cndmask_b32_e64 v26, v86, v26, s26
-; GFX11-NEXT: v_cndmask_b32_e64 v29, v145, v29, s27
-; GFX11-NEXT: v_dual_cndmask_b32 v31, v147, v31 :: v_dual_cndmask_b32 v30, v146, v30
-; GFX11-NEXT: v_cmp_class_f64_e64 vcc_lo, v[66:67], 32
-; GFX11-NEXT: v_cndmask_b32_e64 v28, v28, v80, s2
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_cndmask_b32_e64 v31, v31, v83, s3
-; GFX11-NEXT: v_cndmask_b32_e64 v28, v144, v28, s27
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_cndmask_b32_e64 v31, v147, v31, s28
-; GFX11-NEXT: v_dual_cndmask_b32 v23, v23, v67 :: v_dual_cndmask_b32 v22, v22, v66
-; GFX11-NEXT: v_cndmask_b32_e64 v30, v30, v82, s3
-; GFX11-NEXT: v_cndmask_b32_e64 v23, v135, v23, s24
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT: v_cndmask_b32_e64 v22, v134, v22, s24
-; GFX11-NEXT: v_cndmask_b32_e64 v30, v146, v30, s28
+; GFX11-NEXT: v_min_f64 v[84:85], v[30:31], v[86:87]
+; GFX11-NEXT: v_cmp_u_f64_e64 s14, v[30:31], v[86:87]
+; GFX11-NEXT: v_cndmask_b32_e64 v0, v96, 0, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e64 v1, v97, 0x7ff80000, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e64 v2, v32, 0, s0
+; GFX11-NEXT: v_cndmask_b32_e64 v3, v33, 0x7ff80000, s0
+; GFX11-NEXT: v_cndmask_b32_e64 v4, v34, 0, s1
+; GFX11-NEXT: v_cndmask_b32_e64 v5, v35, 0x7ff80000, s1
+; GFX11-NEXT: v_cndmask_b32_e64 v6, v36, 0, s2
+; GFX11-NEXT: v_cndmask_b32_e64 v7, v37, 0x7ff80000, s2
+; GFX11-NEXT: v_cndmask_b32_e64 v8, v38, 0, s3
+; GFX11-NEXT: v_cndmask_b32_e64 v9, v39, 0x7ff80000, s3
+; GFX11-NEXT: v_cndmask_b32_e64 v10, v48, 0, s4
+; GFX11-NEXT: v_cndmask_b32_e64 v11, v49, 0x7ff80000, s4
+; GFX11-NEXT: v_cndmask_b32_e64 v12, v50, 0, s5
+; GFX11-NEXT: v_cndmask_b32_e64 v13, v51, 0x7ff80000, s5
+; GFX11-NEXT: v_cndmask_b32_e64 v14, v52, 0, s6
+; GFX11-NEXT: v_cndmask_b32_e64 v15, v53, 0x7ff80000, s6
+; GFX11-NEXT: v_cndmask_b32_e64 v16, v54, 0, s7
+; GFX11-NEXT: v_cndmask_b32_e64 v17, v55, 0x7ff80000, s7
+; GFX11-NEXT: v_cndmask_b32_e64 v18, v64, 0, s8
+; GFX11-NEXT: v_cndmask_b32_e64 v19, v65, 0x7ff80000, s8
+; GFX11-NEXT: v_cndmask_b32_e64 v20, v66, 0, s9
+; GFX11-NEXT: v_cndmask_b32_e64 v21, v67, 0x7ff80000, s9
+; GFX11-NEXT: v_cndmask_b32_e64 v22, v68, 0, s10
+; GFX11-NEXT: v_cndmask_b32_e64 v23, v69, 0x7ff80000, s10
+; GFX11-NEXT: v_cndmask_b32_e64 v24, v70, 0, s11
+; GFX11-NEXT: v_cndmask_b32_e64 v25, v71, 0x7ff80000, s11
+; GFX11-NEXT: v_cndmask_b32_e64 v26, v80, 0, s12
+; GFX11-NEXT: v_cndmask_b32_e64 v27, v81, 0x7ff80000, s12
+; GFX11-NEXT: v_cndmask_b32_e64 v28, v82, 0, s13
+; GFX11-NEXT: v_cndmask_b32_e64 v29, v83, 0x7ff80000, s13
+; GFX11-NEXT: v_cndmask_b32_e64 v30, v84, 0, s14
+; GFX11-NEXT: v_cndmask_b32_e64 v31, v85, 0x7ff80000, s14
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_v16f64:
diff --git a/llvm/test/CodeGen/AMDGPU/occupancy-levels.ll b/llvm/test/CodeGen/AMDGPU/occupancy-levels.ll
index d87eb97..e0ccda1 100644
--- a/llvm/test/CodeGen/AMDGPU/occupancy-levels.ll
+++ b/llvm/test/CodeGen/AMDGPU/occupancy-levels.ll
@@ -13,6 +13,8 @@
; RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -mattr=+wavefrontsize64 < %s | FileCheck --check-prefixes=GCN,GFX10,GFX10W64,GFX1030,GFX1030W64 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1151 < %s | FileCheck --check-prefixes=GCN,GFX1100,GFX1100W32 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1151 -mattr=+wavefrontsize64 < %s | FileCheck --check-prefixes=GCN,GFX1100,GFX1100W64 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1152 < %s | FileCheck --check-prefixes=GCN,GFX1030,GFX1030W32 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1152 -mattr=+wavefrontsize64 < %s | FileCheck --check-prefixes=GCN,GFX1030,GFX1030W64 %s
; GCN-LABEL: {{^}}max_occupancy:
; GFX9: ; Occupancy: 10
diff --git a/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.mir b/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.mir
index 4ecce28..6dda1fe 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.mir
+++ b/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.mir
@@ -282,3 +282,168 @@ body: |
%13:vreg_64 = REG_SEQUENCE %9, %subreg.sub0, %11, %subreg.sub1
FLAT_STORE_DWORD %13, %0.sub1, 0, 0, implicit $exec, implicit $flat_scr
...
+
+---
+# GCN-LABEL: name: diffoporder_add_global_atomic_cmpswap
+# GFX9: GLOBAL_ATOMIC_CMPSWAP %{{[0-9]+}}, %0, 1000, 0,
+# GFX9: GLOBAL_ATOMIC_CMPSWAP %{{[0-9]+}}, %0, 0, 0,
+
+# GFX8: GLOBAL_ATOMIC_CMPSWAP %{{[0-9]+}}, %0, 0, 0,
+# GFX8: GLOBAL_ATOMIC_CMPSWAP %{{[0-9]+}}, %0, 0, 0,
+
+name: diffoporder_add_global_atomic_cmpswap
+body: |
+ bb.0.entry:
+
+ %0:vreg_64 = COPY $vgpr0_vgpr1
+
+ %1:sgpr_32 = S_MOV_B32 4000
+ %2:vgpr_32, %3:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %1, 0, implicit $exec
+ %4:vgpr_32, dead %5:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %3, 0, implicit $exec
+ %6:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %4, %subreg.sub1
+ GLOBAL_ATOMIC_CMPSWAP %6:vreg_64, %0:vreg_64, 0, 0, implicit $exec
+
+ %8:sgpr_32 = S_MOV_B32 3000
+ %9:vgpr_32, %10:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %8, 0, implicit $exec
+ %11:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %10, 0, implicit $exec
+ %13:vreg_64 = REG_SEQUENCE %9, %subreg.sub0, %11, %subreg.sub1
+ GLOBAL_ATOMIC_CMPSWAP %13:vreg_64, %0:vreg_64, 0, 0, implicit $exec
+...
+
+---
+# GCN-LABEL: name: diffoporder_add_flat_atomic_cmpswap
+# GFX9: FLAT_ATOMIC_CMPSWAP %{{[0-9]+}}, %0, 1000, 0,
+# GFX9: FLAT_ATOMIC_CMPSWAP %{{[0-9]+}}, %0, 0, 0,
+
+# GFX8: FLAT_ATOMIC_CMPSWAP %{{[0-9]+}}, %0, 0, 0,
+# GFX8: FLAT_ATOMIC_CMPSWAP %{{[0-9]+}}, %0, 0, 0,
+
+name: diffoporder_add_flat_atomic_cmpswap
+body: |
+ bb.0.entry:
+
+ %0:vreg_64 = COPY $vgpr0_vgpr1
+
+ %1:sgpr_32 = S_MOV_B32 4000
+ %2:vgpr_32, %3:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %1, 0, implicit $exec
+ %4:vgpr_32, dead %5:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %3, 0, implicit $exec
+ %6:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %4, %subreg.sub1
+ FLAT_ATOMIC_CMPSWAP %6:vreg_64, %0:vreg_64, 0, 0, implicit $exec, implicit $flat_scr
+
+
+ %8:sgpr_32 = S_MOV_B32 3000
+ %9:vgpr_32, %10:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %8, 0, implicit $exec
+ %11:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %10, 0, implicit $exec
+ %13:vreg_64 = REG_SEQUENCE %9, %subreg.sub0, %11, %subreg.sub1
+ FLAT_ATOMIC_CMPSWAP %13:vreg_64, %0:vreg_64, 0, 0, implicit $exec, implicit $flat_scr
+...
+
+---
+# GCN-LABEL: name: diffoporder_add_global_atomic_add
+# GFX9: GLOBAL_ATOMIC_ADD %{{[0-9]+}}, %0.sub0, 1000, 0,
+# GFX9: GLOBAL_ATOMIC_ADD %{{[0-9]+}}, %0.sub0, 0, 0,
+
+# GFX8: GLOBAL_ATOMIC_ADD %{{[0-9]+}}, %0.sub0, 0, 0,
+# GFX8: GLOBAL_ATOMIC_ADD %{{[0-9]+}}, %0.sub0, 0, 0,
+
+name: diffoporder_add_global_atomic_add
+body: |
+ bb.0.entry:
+
+ %0:vreg_64 = COPY $vgpr0_vgpr1
+
+ %1:sgpr_32 = S_MOV_B32 4000
+ %2:vgpr_32, %3:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %1, 0, implicit $exec
+ %4:vgpr_32, dead %5:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %3, 0, implicit $exec
+ %6:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %4, %subreg.sub1
+ GLOBAL_ATOMIC_ADD %6:vreg_64, %0.sub0, 0, 0, implicit $exec
+
+ %8:sgpr_32 = S_MOV_B32 3000
+ %9:vgpr_32, %10:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %8, 0, implicit $exec
+ %11:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %10, 0, implicit $exec
+ %13:vreg_64 = REG_SEQUENCE %9, %subreg.sub0, %11, %subreg.sub1
+ GLOBAL_ATOMIC_ADD %13:vreg_64, %0.sub0, 0, 0, implicit $exec
+...
+
+---
+# GCN-LABEL: name: diffoporder_add_flat_atomic_add
+# GFX9: FLAT_ATOMIC_ADD %{{[0-9]+}}, %0.sub0, 1000, 0,
+# GFX9: FLAT_ATOMIC_ADD %{{[0-9]+}}, %0.sub0, 0, 0,
+
+# GFX8: FLAT_ATOMIC_ADD %{{[0-9]+}}, %0.sub0, 0, 0,
+# GFX8: FLAT_ATOMIC_ADD %{{[0-9]+}}, %0.sub0, 0, 0,
+
+name: diffoporder_add_flat_atomic_add
+body: |
+ bb.0.entry:
+
+ %0:vreg_64 = COPY $vgpr0_vgpr1
+
+ %1:sgpr_32 = S_MOV_B32 4000
+ %2:vgpr_32, %3:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %1, 0, implicit $exec
+ %4:vgpr_32, dead %5:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %3, 0, implicit $exec
+ %6:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %4, %subreg.sub1
+ FLAT_ATOMIC_ADD %6:vreg_64, %0.sub0, 0, 0, implicit $exec, implicit $flat_scr
+
+
+ %8:sgpr_32 = S_MOV_B32 3000
+ %9:vgpr_32, %10:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %8, 0, implicit $exec
+ %11:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %10, 0, implicit $exec
+ %13:vreg_64 = REG_SEQUENCE %9, %subreg.sub0, %11, %subreg.sub1
+ FLAT_ATOMIC_ADD %13:vreg_64, %0.sub0, 0, 0, implicit $exec, implicit $flat_scr
+...
+
+---
+# GCN-LABEL: name: diffoporder_add_global_atomic_add_rtn
+# GFX9: GLOBAL_ATOMIC_ADD_RTN %{{[0-9]+}}, %0.sub0, 1000, 0,
+# GFX9: GLOBAL_ATOMIC_ADD_RTN %{{[0-9]+}}, %0.sub0, 0, 0,
+
+# GFX8: GLOBAL_ATOMIC_ADD_RTN %{{[0-9]+}}, %0.sub0, 0, 0,
+# GFX8: GLOBAL_ATOMIC_ADD_RTN %{{[0-9]+}}, %0.sub0, 0, 0,
+
+name: diffoporder_add_global_atomic_add_rtn
+body: |
+ bb.0.entry:
+
+ %0:vreg_64 = COPY $vgpr0_vgpr1
+
+ %1:sgpr_32 = S_MOV_B32 4000
+ %2:vgpr_32, %3:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %1, 0, implicit $exec
+ %4:vgpr_32, dead %5:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %3, 0, implicit $exec
+ %6:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %4, %subreg.sub1
+ %14:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN %6:vreg_64, %0.sub0, 0, 0, implicit $exec
+
+ %8:sgpr_32 = S_MOV_B32 3000
+ %9:vgpr_32, %10:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %8, 0, implicit $exec
+ %11:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %10, 0, implicit $exec
+ %13:vreg_64 = REG_SEQUENCE %9, %subreg.sub0, %11, %subreg.sub1
+ %15:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN %13:vreg_64, %0.sub0, 0, 0, implicit $exec
+...
+
+---
+# GCN-LABEL: name: diffoporder_add_flat_atomic_add_rtn
+# GFX9: FLAT_ATOMIC_ADD_RTN %{{[0-9]+}}, %0.sub0, 1000, 0,
+# GFX9: FLAT_ATOMIC_ADD_RTN %{{[0-9]+}}, %0.sub0, 0, 0,
+
+# GFX8: FLAT_ATOMIC_ADD_RTN %{{[0-9]+}}, %0.sub0, 0, 0,
+# GFX8: FLAT_ATOMIC_ADD_RTN %{{[0-9]+}}, %0.sub0, 0, 0,
+
+name: diffoporder_add_flat_atomic_add_rtn
+body: |
+ bb.0.entry:
+
+ %0:vreg_64 = COPY $vgpr0_vgpr1
+
+ %1:sgpr_32 = S_MOV_B32 4000
+ %2:vgpr_32, %3:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %1, 0, implicit $exec
+ %4:vgpr_32, dead %5:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %3, 0, implicit $exec
+ %6:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %4, %subreg.sub1
+ %14:vgpr_32 = FLAT_ATOMIC_ADD_RTN %6:vreg_64, %0.sub0, 0, 0, implicit $exec, implicit $flat_scr
+
+
+ %8:sgpr_32 = S_MOV_B32 3000
+ %9:vgpr_32, %10:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %8, 0, implicit $exec
+ %11:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %10, 0, implicit $exec
+ %13:vreg_64 = REG_SEQUENCE %9, %subreg.sub0, %11, %subreg.sub1
+ %15:vgpr_32 = FLAT_ATOMIC_ADD_RTN %13:vreg_64, %0.sub0, 0, 0, implicit $exec, implicit $flat_scr
+...
diff --git a/llvm/test/CodeGen/AMDGPU/unsupported-calls.ll b/llvm/test/CodeGen/AMDGPU/unsupported-calls.ll
index fc00937..721114e 100644
--- a/llvm/test/CodeGen/AMDGPU/unsupported-calls.ll
+++ b/llvm/test/CodeGen/AMDGPU/unsupported-calls.ll
@@ -43,25 +43,6 @@ define i32 @test_tail_call(ptr addrspace(1) %out, ptr addrspace(1) %in) {
ret i32 %c
}
-declare void @external.varargs(i32, double, i64, ...)
-
-; GCN: error: <unknown>:0:0: in function test_call_varargs void (): unsupported call to variadic function external.varargs
-; R600: in function test_call_varargs{{.*}}: unsupported call to function external.varargs
-define void @test_call_varargs() {
- call void (i32, double, i64, ...) @external.varargs(i32 42, double 1.0, i64 12, i8 3, i16 1, i32 4, float 1.0, double 2.0)
- ret void
-}
-
-declare i32 @extern_variadic(...)
-
-; GCN: in function test_tail_call_bitcast_extern_variadic{{.*}}: unsupported required tail call to function extern_variadic
-; R600: in function test_tail_call_bitcast_extern_variadic{{.*}}: unsupported call to function extern_variadic
-define i32 @test_tail_call_bitcast_extern_variadic(<4 x float> %arg0, <4 x float> %arg1, i32 %arg2) {
- %add = fadd <4 x float> %arg0, %arg1
- %call = tail call i32 @extern_variadic(<4 x float> %add)
- ret i32 %call
-}
-
; R600: in function test_c_call{{.*}}: unsupported call to function defined_function
define amdgpu_ps i32 @test_c_call_from_shader() {
%call = call i32 @defined_function(i32 0)
diff --git a/llvm/test/CodeGen/ARM/neon_vabd.ll b/llvm/test/CodeGen/ARM/neon_vabd.ll
new file mode 100644
index 0000000..14ad1a1
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/neon_vabd.ll
@@ -0,0 +1,890 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s
+
+;
+; SABD
+;
+
+define <8 x i8> @sabd_8b(<8 x i8> %a, <8 x i8> %b) {
+; CHECK-LABEL: sabd_8b:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov d16, r2, r3
+; CHECK-NEXT: vmov d17, r0, r1
+; CHECK-NEXT: vsubl.s8 q8, d17, d16
+; CHECK-NEXT: vabs.s16 q8, q8
+; CHECK-NEXT: vmovn.i16 d16, q8
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: mov pc, lr
+ %a.sext = sext <8 x i8> %a to <8 x i16>
+ %b.sext = sext <8 x i8> %b to <8 x i16>
+ %sub = sub <8 x i16> %a.sext, %b.sext
+ %abs = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %sub, i1 true)
+ %trunc = trunc <8 x i16> %abs to <8 x i8>
+ ret <8 x i8> %trunc
+}
+
+define <16 x i8> @sabd_16b(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: sabd_16b:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: mov r12, sp
+; CHECK-NEXT: vld1.64 {d16, d17}, [r12]
+; CHECK-NEXT: vmov d18, r2, r3
+; CHECK-NEXT: vmov d19, r0, r1
+; CHECK-NEXT: vsubl.s8 q10, d18, d17
+; CHECK-NEXT: vsubl.s8 q8, d19, d16
+; CHECK-NEXT: vabs.s16 q9, q10
+; CHECK-NEXT: vabs.s16 q8, q8
+; CHECK-NEXT: vmovn.i16 d19, q9
+; CHECK-NEXT: vmovn.i16 d18, q8
+; CHECK-NEXT: vmov r2, r3, d19
+; CHECK-NEXT: vmov r0, r1, d18
+; CHECK-NEXT: mov pc, lr
+ %a.sext = sext <16 x i8> %a to <16 x i16>
+ %b.sext = sext <16 x i8> %b to <16 x i16>
+ %sub = sub <16 x i16> %a.sext, %b.sext
+ %abs = call <16 x i16> @llvm.abs.v16i16(<16 x i16> %sub, i1 true)
+ %trunc = trunc <16 x i16> %abs to <16 x i8>
+ ret <16 x i8> %trunc
+}
+
+define <4 x i16> @sabd_4h(<4 x i16> %a, <4 x i16> %b) {
+; CHECK-LABEL: sabd_4h:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov d16, r2, r3
+; CHECK-NEXT: vmov d17, r0, r1
+; CHECK-NEXT: vsubl.s16 q8, d17, d16
+; CHECK-NEXT: vabs.s32 q8, q8
+; CHECK-NEXT: vmovn.i32 d16, q8
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: mov pc, lr
+ %a.sext = sext <4 x i16> %a to <4 x i32>
+ %b.sext = sext <4 x i16> %b to <4 x i32>
+ %sub = sub <4 x i32> %a.sext, %b.sext
+ %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %sub, i1 true)
+ %trunc = trunc <4 x i32> %abs to <4 x i16>
+ ret <4 x i16> %trunc
+}
+
+define <4 x i16> @sabd_4h_promoted_ops(<4 x i8> %a, <4 x i8> %b) {
+; CHECK-LABEL: sabd_4h_promoted_ops:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov d16, r2, r3
+; CHECK-NEXT: vmov d17, r0, r1
+; CHECK-NEXT: vshl.i16 d16, d16, #8
+; CHECK-NEXT: vshl.i16 d17, d17, #8
+; CHECK-NEXT: vshr.s16 d16, d16, #8
+; CHECK-NEXT: vshr.s16 d17, d17, #8
+; CHECK-NEXT: vsub.i16 d16, d17, d16
+; CHECK-NEXT: vabs.s16 d16, d16
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: mov pc, lr
+ %a.sext = sext <4 x i8> %a to <4 x i16>
+ %b.sext = sext <4 x i8> %b to <4 x i16>
+ %sub = sub <4 x i16> %a.sext, %b.sext
+ %abs = call <4 x i16> @llvm.abs.v4i16(<4 x i16> %sub, i1 true)
+ ret <4 x i16> %abs
+}
+
+define <8 x i16> @sabd_8h(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: sabd_8h:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: mov r12, sp
+; CHECK-NEXT: vld1.64 {d16, d17}, [r12]
+; CHECK-NEXT: vmov d18, r2, r3
+; CHECK-NEXT: vmov d19, r0, r1
+; CHECK-NEXT: vsubl.s16 q10, d18, d17
+; CHECK-NEXT: vsubl.s16 q8, d19, d16
+; CHECK-NEXT: vabs.s32 q9, q10
+; CHECK-NEXT: vabs.s32 q8, q8
+; CHECK-NEXT: vmovn.i32 d19, q9
+; CHECK-NEXT: vmovn.i32 d18, q8
+; CHECK-NEXT: vmov r2, r3, d19
+; CHECK-NEXT: vmov r0, r1, d18
+; CHECK-NEXT: mov pc, lr
+ %a.sext = sext <8 x i16> %a to <8 x i32>
+ %b.sext = sext <8 x i16> %b to <8 x i32>
+ %sub = sub <8 x i32> %a.sext, %b.sext
+ %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 true)
+ %trunc = trunc <8 x i32> %abs to <8 x i16>
+ ret <8 x i16> %trunc
+}
+
+define <8 x i16> @sabd_8h_promoted_ops(<8 x i8> %a, <8 x i8> %b) {
+; CHECK-LABEL: sabd_8h_promoted_ops:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov d16, r2, r3
+; CHECK-NEXT: vmov d17, r0, r1
+; CHECK-NEXT: vsubl.s8 q8, d17, d16
+; CHECK-NEXT: vabs.s16 q8, q8
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: mov pc, lr
+ %a.sext = sext <8 x i8> %a to <8 x i16>
+ %b.sext = sext <8 x i8> %b to <8 x i16>
+ %sub = sub <8 x i16> %a.sext, %b.sext
+ %abs = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %sub, i1 true)
+ ret <8 x i16> %abs
+}
+
+define <2 x i32> @sabd_2s(<2 x i32> %a, <2 x i32> %b) {
+; CHECK-LABEL: sabd_2s:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov d16, r2, r3
+; CHECK-NEXT: vmov d17, r0, r1
+; CHECK-NEXT: vsubl.s32 q8, d17, d16
+; CHECK-NEXT: vshr.s64 q9, q8, #63
+; CHECK-NEXT: veor q8, q8, q9
+; CHECK-NEXT: vsub.i64 q8, q8, q9
+; CHECK-NEXT: vmovn.i64 d16, q8
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: mov pc, lr
+ %a.sext = sext <2 x i32> %a to <2 x i64>
+ %b.sext = sext <2 x i32> %b to <2 x i64>
+ %sub = sub <2 x i64> %a.sext, %b.sext
+ %abs = call <2 x i64> @llvm.abs.v2i64(<2 x i64> %sub, i1 true)
+ %trunc = trunc <2 x i64> %abs to <2 x i32>
+ ret <2 x i32> %trunc
+}
+
+define <2 x i32> @sabd_2s_promoted_ops(<2 x i16> %a, <2 x i16> %b) {
+; CHECK-LABEL: sabd_2s_promoted_ops:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov d16, r2, r3
+; CHECK-NEXT: vmov d17, r0, r1
+; CHECK-NEXT: vshl.i32 d16, d16, #16
+; CHECK-NEXT: vshl.i32 d17, d17, #16
+; CHECK-NEXT: vshr.s32 d16, d16, #16
+; CHECK-NEXT: vshr.s32 d17, d17, #16
+; CHECK-NEXT: vsub.i32 d16, d17, d16
+; CHECK-NEXT: vabs.s32 d16, d16
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: mov pc, lr
+ %a.sext = sext <2 x i16> %a to <2 x i32>
+ %b.sext = sext <2 x i16> %b to <2 x i32>
+ %sub = sub <2 x i32> %a.sext, %b.sext
+ %abs = call <2 x i32> @llvm.abs.v2i32(<2 x i32> %sub, i1 true)
+ ret <2 x i32> %abs
+}
+
+define <4 x i32> @sabd_4s(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: sabd_4s:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: mov r12, sp
+; CHECK-NEXT: vld1.64 {d16, d17}, [r12]
+; CHECK-NEXT: vmov d18, r2, r3
+; CHECK-NEXT: vmov d19, r0, r1
+; CHECK-NEXT: vsubl.s32 q10, d18, d17
+; CHECK-NEXT: vsubl.s32 q8, d19, d16
+; CHECK-NEXT: vshr.s64 q9, q10, #63
+; CHECK-NEXT: vshr.s64 q11, q8, #63
+; CHECK-NEXT: veor q10, q10, q9
+; CHECK-NEXT: veor q8, q8, q11
+; CHECK-NEXT: vsub.i64 q9, q10, q9
+; CHECK-NEXT: vsub.i64 q8, q8, q11
+; CHECK-NEXT: vmovn.i64 d19, q9
+; CHECK-NEXT: vmovn.i64 d18, q8
+; CHECK-NEXT: vmov r2, r3, d19
+; CHECK-NEXT: vmov r0, r1, d18
+; CHECK-NEXT: mov pc, lr
+ %a.sext = sext <4 x i32> %a to <4 x i64>
+ %b.sext = sext <4 x i32> %b to <4 x i64>
+ %sub = sub <4 x i64> %a.sext, %b.sext
+ %abs = call <4 x i64> @llvm.abs.v4i64(<4 x i64> %sub, i1 true)
+ %trunc = trunc <4 x i64> %abs to <4 x i32>
+ ret <4 x i32> %trunc
+}
+
+define <4 x i32> @sabd_4s_promoted_ops(<4 x i16> %a, <4 x i16> %b) {
+; CHECK-LABEL: sabd_4s_promoted_ops:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov d16, r2, r3
+; CHECK-NEXT: vmov d17, r0, r1
+; CHECK-NEXT: vsubl.s16 q8, d17, d16
+; CHECK-NEXT: vabs.s32 q8, q8
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: mov pc, lr
+ %a.sext = sext <4 x i16> %a to <4 x i32>
+ %b.sext = sext <4 x i16> %b to <4 x i32>
+ %sub = sub <4 x i32> %a.sext, %b.sext
+ %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %sub, i1 true)
+ ret <4 x i32> %abs
+}
+
+define <2 x i64> @sabd_2d(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: sabd_2d:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: .save {r4, r5, r6, r7, r11, lr}
+; CHECK-NEXT: push {r4, r5, r6, r7, r11, lr}
+; CHECK-NEXT: add r12, sp, #24
+; CHECK-NEXT: asr r6, r3, #31
+; CHECK-NEXT: vld1.64 {d16, d17}, [r12]
+; CHECK-NEXT: vmov r12, lr, d17
+; CHECK-NEXT: vmov r7, r5, d16
+; CHECK-NEXT: subs r2, r2, r12
+; CHECK-NEXT: sbcs r3, r3, lr
+; CHECK-NEXT: sbcs r4, r6, lr, asr #31
+; CHECK-NEXT: sbc r6, r6, lr, asr #31
+; CHECK-NEXT: eor r2, r2, r6, asr #31
+; CHECK-NEXT: eor r3, r3, r6, asr #31
+; CHECK-NEXT: subs r2, r2, r6, asr #31
+; CHECK-NEXT: sbc r3, r3, r6, asr #31
+; CHECK-NEXT: subs r0, r0, r7
+; CHECK-NEXT: asr r6, r1, #31
+; CHECK-NEXT: sbcs r1, r1, r5
+; CHECK-NEXT: sbcs r7, r6, r5, asr #31
+; CHECK-NEXT: vmov.32 d17[0], r2
+; CHECK-NEXT: sbc r7, r6, r5, asr #31
+; CHECK-NEXT: eor r0, r0, r7, asr #31
+; CHECK-NEXT: subs r0, r0, r7, asr #31
+; CHECK-NEXT: vmov.32 d16[0], r0
+; CHECK-NEXT: eor r0, r1, r7, asr #31
+; CHECK-NEXT: sbc r0, r0, r7, asr #31
+; CHECK-NEXT: vmov.32 d17[1], r3
+; CHECK-NEXT: vmov.32 d16[1], r0
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: pop {r4, r5, r6, r7, r11, lr}
+; CHECK-NEXT: mov pc, lr
+ %a.sext = sext <2 x i64> %a to <2 x i128>
+ %b.sext = sext <2 x i64> %b to <2 x i128>
+ %sub = sub <2 x i128> %a.sext, %b.sext
+ %abs = call <2 x i128> @llvm.abs.v2i128(<2 x i128> %sub, i1 true)
+ %trunc = trunc <2 x i128> %abs to <2 x i64>
+ ret <2 x i64> %trunc
+}
+
+define <2 x i64> @sabd_2d_promoted_ops(<2 x i32> %a, <2 x i32> %b) {
+; CHECK-LABEL: sabd_2d_promoted_ops:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov d16, r2, r3
+; CHECK-NEXT: vmov d17, r0, r1
+; CHECK-NEXT: vsubl.s32 q8, d17, d16
+; CHECK-NEXT: vshr.s64 q9, q8, #63
+; CHECK-NEXT: veor q8, q8, q9
+; CHECK-NEXT: vsub.i64 q8, q8, q9
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: mov pc, lr
+ %a.sext = sext <2 x i32> %a to <2 x i64>
+ %b.sext = sext <2 x i32> %b to <2 x i64>
+ %sub = sub <2 x i64> %a.sext, %b.sext
+ %abs = call <2 x i64> @llvm.abs.v2i64(<2 x i64> %sub, i1 true)
+ ret <2 x i64> %abs
+}
+
+;
+; UABD
+;
+
+define <8 x i8> @uabd_8b(<8 x i8> %a, <8 x i8> %b) {
+; CHECK-LABEL: uabd_8b:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov d16, r2, r3
+; CHECK-NEXT: vmov d17, r0, r1
+; CHECK-NEXT: vabdl.u8 q8, d17, d16
+; CHECK-NEXT: vmovn.i16 d16, q8
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: mov pc, lr
+ %a.zext = zext <8 x i8> %a to <8 x i16>
+ %b.zext = zext <8 x i8> %b to <8 x i16>
+ %sub = sub <8 x i16> %a.zext, %b.zext
+ %abs = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %sub, i1 true)
+ %trunc = trunc <8 x i16> %abs to <8 x i8>
+ ret <8 x i8> %trunc
+}
+
+define <16 x i8> @uabd_16b(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: uabd_16b:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: mov r12, sp
+; CHECK-NEXT: vld1.64 {d16, d17}, [r12]
+; CHECK-NEXT: vmov d18, r2, r3
+; CHECK-NEXT: vmov d19, r0, r1
+; CHECK-NEXT: vabdl.u8 q10, d18, d17
+; CHECK-NEXT: vabdl.u8 q8, d19, d16
+; CHECK-NEXT: vmovn.i16 d19, q10
+; CHECK-NEXT: vmovn.i16 d18, q8
+; CHECK-NEXT: vmov r2, r3, d19
+; CHECK-NEXT: vmov r0, r1, d18
+; CHECK-NEXT: mov pc, lr
+ %a.zext = zext <16 x i8> %a to <16 x i16>
+ %b.zext = zext <16 x i8> %b to <16 x i16>
+ %sub = sub <16 x i16> %a.zext, %b.zext
+ %abs = call <16 x i16> @llvm.abs.v16i16(<16 x i16> %sub, i1 true)
+ %trunc = trunc <16 x i16> %abs to <16 x i8>
+ ret <16 x i8> %trunc
+}
+
+define <4 x i16> @uabd_4h(<4 x i16> %a, <4 x i16> %b) {
+; CHECK-LABEL: uabd_4h:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov d16, r2, r3
+; CHECK-NEXT: vmov d17, r0, r1
+; CHECK-NEXT: vabdl.u16 q8, d17, d16
+; CHECK-NEXT: vmovn.i32 d16, q8
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: mov pc, lr
+ %a.zext = zext <4 x i16> %a to <4 x i32>
+ %b.zext = zext <4 x i16> %b to <4 x i32>
+ %sub = sub <4 x i32> %a.zext, %b.zext
+ %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %sub, i1 true)
+ %trunc = trunc <4 x i32> %abs to <4 x i16>
+ ret <4 x i16> %trunc
+}
+
+define <4 x i16> @uabd_4h_promoted_ops(<4 x i8> %a, <4 x i8> %b) {
+; CHECK-LABEL: uabd_4h_promoted_ops:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov d16, r2, r3
+; CHECK-NEXT: vmov d17, r0, r1
+; CHECK-NEXT: vbic.i16 d16, #0xff00
+; CHECK-NEXT: vbic.i16 d17, #0xff00
+; CHECK-NEXT: vsub.i16 d16, d17, d16
+; CHECK-NEXT: vabs.s16 d16, d16
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: mov pc, lr
+ %a.zext = zext <4 x i8> %a to <4 x i16>
+ %b.zext = zext <4 x i8> %b to <4 x i16>
+ %sub = sub <4 x i16> %a.zext, %b.zext
+ %abs = call <4 x i16> @llvm.abs.v4i16(<4 x i16> %sub, i1 true)
+ ret <4 x i16> %abs
+}
+
+define <8 x i16> @uabd_8h(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: uabd_8h:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: mov r12, sp
+; CHECK-NEXT: vld1.64 {d16, d17}, [r12]
+; CHECK-NEXT: vmov d18, r2, r3
+; CHECK-NEXT: vmov d19, r0, r1
+; CHECK-NEXT: vabdl.u16 q10, d18, d17
+; CHECK-NEXT: vabdl.u16 q8, d19, d16
+; CHECK-NEXT: vmovn.i32 d19, q10
+; CHECK-NEXT: vmovn.i32 d18, q8
+; CHECK-NEXT: vmov r2, r3, d19
+; CHECK-NEXT: vmov r0, r1, d18
+; CHECK-NEXT: mov pc, lr
+ %a.zext = zext <8 x i16> %a to <8 x i32>
+ %b.zext = zext <8 x i16> %b to <8 x i32>
+ %sub = sub <8 x i32> %a.zext, %b.zext
+ %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 true)
+ %trunc = trunc <8 x i32> %abs to <8 x i16>
+ ret <8 x i16> %trunc
+}
+
+define <8 x i16> @uabd_8h_promoted_ops(<8 x i8> %a, <8 x i8> %b) {
+; CHECK-LABEL: uabd_8h_promoted_ops:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov d16, r2, r3
+; CHECK-NEXT: vmov d17, r0, r1
+; CHECK-NEXT: vabdl.u8 q8, d17, d16
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: mov pc, lr
+ %a.zext = zext <8 x i8> %a to <8 x i16>
+ %b.zext = zext <8 x i8> %b to <8 x i16>
+ %sub = sub <8 x i16> %a.zext, %b.zext
+ %abs = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %sub, i1 true)
+ ret <8 x i16> %abs
+}
+
+define <2 x i32> @uabd_2s(<2 x i32> %a, <2 x i32> %b) {
+; CHECK-LABEL: uabd_2s:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov d16, r2, r3
+; CHECK-NEXT: vmov d17, r0, r1
+; CHECK-NEXT: vsubl.u32 q8, d17, d16
+; CHECK-NEXT: vshr.s64 q9, q8, #63
+; CHECK-NEXT: veor q8, q8, q9
+; CHECK-NEXT: vsub.i64 q8, q8, q9
+; CHECK-NEXT: vmovn.i64 d16, q8
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: mov pc, lr
+ %a.zext = zext <2 x i32> %a to <2 x i64>
+ %b.zext = zext <2 x i32> %b to <2 x i64>
+ %sub = sub <2 x i64> %a.zext, %b.zext
+ %abs = call <2 x i64> @llvm.abs.v2i64(<2 x i64> %sub, i1 true)
+ %trunc = trunc <2 x i64> %abs to <2 x i32>
+ ret <2 x i32> %trunc
+}
+
+define <2 x i32> @uabd_2s_promoted_ops(<2 x i16> %a, <2 x i16> %b) {
+; CHECK-LABEL: uabd_2s_promoted_ops:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov.i32 d16, #0xffff
+; CHECK-NEXT: vmov d17, r2, r3
+; CHECK-NEXT: vmov d18, r0, r1
+; CHECK-NEXT: vand d17, d17, d16
+; CHECK-NEXT: vand d16, d18, d16
+; CHECK-NEXT: vsub.i32 d16, d16, d17
+; CHECK-NEXT: vabs.s32 d16, d16
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: mov pc, lr
+ %a.zext = zext <2 x i16> %a to <2 x i32>
+ %b.zext = zext <2 x i16> %b to <2 x i32>
+ %sub = sub <2 x i32> %a.zext, %b.zext
+ %abs = call <2 x i32> @llvm.abs.v2i32(<2 x i32> %sub, i1 true)
+ ret <2 x i32> %abs
+}
+
+define <4 x i32> @uabd_4s(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: uabd_4s:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: mov r12, sp
+; CHECK-NEXT: vld1.64 {d16, d17}, [r12]
+; CHECK-NEXT: vmov d18, r2, r3
+; CHECK-NEXT: vmov d19, r0, r1
+; CHECK-NEXT: vsubl.u32 q10, d18, d17
+; CHECK-NEXT: vsubl.u32 q8, d19, d16
+; CHECK-NEXT: vshr.s64 q9, q10, #63
+; CHECK-NEXT: vshr.s64 q11, q8, #63
+; CHECK-NEXT: veor q10, q10, q9
+; CHECK-NEXT: veor q8, q8, q11
+; CHECK-NEXT: vsub.i64 q9, q10, q9
+; CHECK-NEXT: vsub.i64 q8, q8, q11
+; CHECK-NEXT: vmovn.i64 d19, q9
+; CHECK-NEXT: vmovn.i64 d18, q8
+; CHECK-NEXT: vmov r2, r3, d19
+; CHECK-NEXT: vmov r0, r1, d18
+; CHECK-NEXT: mov pc, lr
+ %a.zext = zext <4 x i32> %a to <4 x i64>
+ %b.zext = zext <4 x i32> %b to <4 x i64>
+ %sub = sub <4 x i64> %a.zext, %b.zext
+ %abs = call <4 x i64> @llvm.abs.v4i64(<4 x i64> %sub, i1 true)
+ %trunc = trunc <4 x i64> %abs to <4 x i32>
+ ret <4 x i32> %trunc
+}
+
+define <4 x i32> @uabd_4s_promoted_ops(<4 x i16> %a, <4 x i16> %b) {
+; CHECK-LABEL: uabd_4s_promoted_ops:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov d16, r2, r3
+; CHECK-NEXT: vmov d17, r0, r1
+; CHECK-NEXT: vabdl.u16 q8, d17, d16
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: mov pc, lr
+ %a.zext = zext <4 x i16> %a to <4 x i32>
+ %b.zext = zext <4 x i16> %b to <4 x i32>
+ %sub = sub <4 x i32> %a.zext, %b.zext
+ %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %sub, i1 true)
+ ret <4 x i32> %abs
+}
+
+define <2 x i64> @uabd_2d(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: uabd_2d:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: .save {r4, r5, r6, r7, r11, lr}
+; CHECK-NEXT: push {r4, r5, r6, r7, r11, lr}
+; CHECK-NEXT: add r12, sp, #24
+; CHECK-NEXT: mov r6, #0
+; CHECK-NEXT: vld1.64 {d16, d17}, [r12]
+; CHECK-NEXT: vmov r12, lr, d17
+; CHECK-NEXT: vmov r4, r7, d16
+; CHECK-NEXT: subs r2, r2, r12
+; CHECK-NEXT: sbcs r3, r3, lr
+; CHECK-NEXT: sbcs r5, r6, #0
+; CHECK-NEXT: sbc r5, r6, #0
+; CHECK-NEXT: eor r2, r2, r5, asr #31
+; CHECK-NEXT: eor r3, r3, r5, asr #31
+; CHECK-NEXT: subs r2, r2, r5, asr #31
+; CHECK-NEXT: sbc r3, r3, r5, asr #31
+; CHECK-NEXT: subs r0, r0, r4
+; CHECK-NEXT: sbcs r1, r1, r7
+; CHECK-NEXT: vmov.32 d17[0], r2
+; CHECK-NEXT: sbcs r7, r6, #0
+; CHECK-NEXT: sbc r7, r6, #0
+; CHECK-NEXT: eor r0, r0, r7, asr #31
+; CHECK-NEXT: subs r0, r0, r7, asr #31
+; CHECK-NEXT: vmov.32 d16[0], r0
+; CHECK-NEXT: eor r0, r1, r7, asr #31
+; CHECK-NEXT: sbc r0, r0, r7, asr #31
+; CHECK-NEXT: vmov.32 d17[1], r3
+; CHECK-NEXT: vmov.32 d16[1], r0
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: pop {r4, r5, r6, r7, r11, lr}
+; CHECK-NEXT: mov pc, lr
+ %a.zext = zext <2 x i64> %a to <2 x i128>
+ %b.zext = zext <2 x i64> %b to <2 x i128>
+ %sub = sub <2 x i128> %a.zext, %b.zext
+ %abs = call <2 x i128> @llvm.abs.v2i128(<2 x i128> %sub, i1 true)
+ %trunc = trunc <2 x i128> %abs to <2 x i64>
+ ret <2 x i64> %trunc
+}
+
+define <2 x i64> @uabd_2d_promoted_ops(<2 x i32> %a, <2 x i32> %b) {
+; CHECK-LABEL: uabd_2d_promoted_ops:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov d16, r2, r3
+; CHECK-NEXT: vmov d17, r0, r1
+; CHECK-NEXT: vsubl.u32 q8, d17, d16
+; CHECK-NEXT: vshr.s64 q9, q8, #63
+; CHECK-NEXT: veor q8, q8, q9
+; CHECK-NEXT: vsub.i64 q8, q8, q9
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: mov pc, lr
+ %a.zext = zext <2 x i32> %a to <2 x i64>
+ %b.zext = zext <2 x i32> %b to <2 x i64>
+ %sub = sub <2 x i64> %a.zext, %b.zext
+ %abs = call <2 x i64> @llvm.abs.v2i64(<2 x i64> %sub, i1 true)
+ ret <2 x i64> %abs
+}
+
+define <16 x i8> @uabd_v16i8_nuw(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: uabd_v16i8_nuw:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov d17, r2, r3
+; CHECK-NEXT: mov r12, sp
+; CHECK-NEXT: vld1.64 {d18, d19}, [r12]
+; CHECK-NEXT: vmov d16, r0, r1
+; CHECK-NEXT: vsub.i8 q8, q8, q9
+; CHECK-NEXT: vabs.s8 q8, q8
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: mov pc, lr
+ %sub = sub nuw <16 x i8> %a, %b
+ %abs = call <16 x i8> @llvm.abs.v16i8(<16 x i8> %sub, i1 true)
+ ret <16 x i8> %abs
+}
+
+define <8 x i16> @uabd_v8i16_nuw(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: uabd_v8i16_nuw:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov d17, r2, r3
+; CHECK-NEXT: mov r12, sp
+; CHECK-NEXT: vld1.64 {d18, d19}, [r12]
+; CHECK-NEXT: vmov d16, r0, r1
+; CHECK-NEXT: vsub.i16 q8, q8, q9
+; CHECK-NEXT: vabs.s16 q8, q8
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: mov pc, lr
+ %sub = sub nuw <8 x i16> %a, %b
+ %abs = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %sub, i1 true)
+ ret <8 x i16> %abs
+}
+
+define <4 x i32> @uabd_v4i32_nuw(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: uabd_v4i32_nuw:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov d17, r2, r3
+; CHECK-NEXT: mov r12, sp
+; CHECK-NEXT: vld1.64 {d18, d19}, [r12]
+; CHECK-NEXT: vmov d16, r0, r1
+; CHECK-NEXT: vsub.i32 q8, q8, q9
+; CHECK-NEXT: vabs.s32 q8, q8
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: mov pc, lr
+ %sub = sub nuw <4 x i32> %a, %b
+ %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %sub, i1 true)
+ ret <4 x i32> %abs
+}
+
+define <2 x i64> @uabd_v2i64_nuw(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: uabd_v2i64_nuw:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov d17, r2, r3
+; CHECK-NEXT: mov r12, sp
+; CHECK-NEXT: vld1.64 {d18, d19}, [r12]
+; CHECK-NEXT: vmov d16, r0, r1
+; CHECK-NEXT: vsub.i64 q8, q8, q9
+; CHECK-NEXT: vshr.s64 q9, q8, #63
+; CHECK-NEXT: veor q8, q8, q9
+; CHECK-NEXT: vsub.i64 q8, q8, q9
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: mov pc, lr
+ %sub = sub nuw <2 x i64> %a, %b
+ %abs = call <2 x i64> @llvm.abs.v2i64(<2 x i64> %sub, i1 true)
+ ret <2 x i64> %abs
+}
+
+define <16 x i8> @sabd_v16i8_nsw(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: sabd_v16i8_nsw:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov d17, r2, r3
+; CHECK-NEXT: mov r12, sp
+; CHECK-NEXT: vld1.64 {d18, d19}, [r12]
+; CHECK-NEXT: vmov d16, r0, r1
+; CHECK-NEXT: vsub.i8 q8, q8, q9
+; CHECK-NEXT: vabs.s8 q8, q8
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: mov pc, lr
+ %sub = sub nsw <16 x i8> %a, %b
+ %abs = call <16 x i8> @llvm.abs.v16i8(<16 x i8> %sub, i1 true)
+ ret <16 x i8> %abs
+}
+
+define <8 x i16> @sabd_v8i16_nsw(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: sabd_v8i16_nsw:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov d17, r2, r3
+; CHECK-NEXT: mov r12, sp
+; CHECK-NEXT: vld1.64 {d18, d19}, [r12]
+; CHECK-NEXT: vmov d16, r0, r1
+; CHECK-NEXT: vsub.i16 q8, q8, q9
+; CHECK-NEXT: vabs.s16 q8, q8
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: mov pc, lr
+ %sub = sub nsw <8 x i16> %a, %b
+ %abs = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %sub, i1 true)
+ ret <8 x i16> %abs
+}
+
+define <4 x i32> @sabd_v4i32_nsw(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: sabd_v4i32_nsw:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov d17, r2, r3
+; CHECK-NEXT: mov r12, sp
+; CHECK-NEXT: vld1.64 {d18, d19}, [r12]
+; CHECK-NEXT: vmov d16, r0, r1
+; CHECK-NEXT: vsub.i32 q8, q8, q9
+; CHECK-NEXT: vabs.s32 q8, q8
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: mov pc, lr
+ %sub = sub nsw <4 x i32> %a, %b
+ %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %sub, i1 true)
+ ret <4 x i32> %abs
+}
+
+define <2 x i64> @sabd_v2i64_nsw(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: sabd_v2i64_nsw:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov d17, r2, r3
+; CHECK-NEXT: mov r12, sp
+; CHECK-NEXT: vld1.64 {d18, d19}, [r12]
+; CHECK-NEXT: vmov d16, r0, r1
+; CHECK-NEXT: vsub.i64 q8, q8, q9
+; CHECK-NEXT: vshr.s64 q9, q8, #63
+; CHECK-NEXT: veor q8, q8, q9
+; CHECK-NEXT: vsub.i64 q8, q8, q9
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: mov pc, lr
+ %sub = sub nsw <2 x i64> %a, %b
+ %abs = call <2 x i64> @llvm.abs.v2i64(<2 x i64> %sub, i1 true)
+ ret <2 x i64> %abs
+}
+
+define <16 x i8> @smaxmin_v16i8(<16 x i8> %0, <16 x i8> %1) {
+; CHECK-LABEL: smaxmin_v16i8:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov d17, r2, r3
+; CHECK-NEXT: mov r12, sp
+; CHECK-NEXT: vld1.64 {d18, d19}, [r12]
+; CHECK-NEXT: vmov d16, r0, r1
+; CHECK-NEXT: vmin.s8 q10, q8, q9
+; CHECK-NEXT: vmax.s8 q8, q8, q9
+; CHECK-NEXT: vsub.i8 q8, q8, q10
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: mov pc, lr
+ %a = tail call <16 x i8> @llvm.smax.v16i8(<16 x i8> %0, <16 x i8> %1)
+ %b = tail call <16 x i8> @llvm.smin.v16i8(<16 x i8> %0, <16 x i8> %1)
+ %sub = sub <16 x i8> %a, %b
+ ret <16 x i8> %sub
+}
+
+define <8 x i16> @smaxmin_v8i16(<8 x i16> %0, <8 x i16> %1) {
+; CHECK-LABEL: smaxmin_v8i16:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov d17, r2, r3
+; CHECK-NEXT: mov r12, sp
+; CHECK-NEXT: vld1.64 {d18, d19}, [r12]
+; CHECK-NEXT: vmov d16, r0, r1
+; CHECK-NEXT: vmin.s16 q10, q8, q9
+; CHECK-NEXT: vmax.s16 q8, q8, q9
+; CHECK-NEXT: vsub.i16 q8, q8, q10
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: mov pc, lr
+ %a = tail call <8 x i16> @llvm.smax.v8i16(<8 x i16> %0, <8 x i16> %1)
+ %b = tail call <8 x i16> @llvm.smin.v8i16(<8 x i16> %0, <8 x i16> %1)
+ %sub = sub <8 x i16> %a, %b
+ ret <8 x i16> %sub
+}
+
+define <4 x i32> @smaxmin_v4i32(<4 x i32> %0, <4 x i32> %1) {
+; CHECK-LABEL: smaxmin_v4i32:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov d17, r2, r3
+; CHECK-NEXT: mov r12, sp
+; CHECK-NEXT: vld1.64 {d18, d19}, [r12]
+; CHECK-NEXT: vmov d16, r0, r1
+; CHECK-NEXT: vmin.s32 q10, q8, q9
+; CHECK-NEXT: vmax.s32 q8, q8, q9
+; CHECK-NEXT: vsub.i32 q8, q8, q10
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: mov pc, lr
+ %a = tail call <4 x i32> @llvm.smax.v4i32(<4 x i32> %0, <4 x i32> %1)
+ %b = tail call <4 x i32> @llvm.smin.v4i32(<4 x i32> %0, <4 x i32> %1)
+ %sub = sub <4 x i32> %a, %b
+ ret <4 x i32> %sub
+}
+
+define <2 x i64> @smaxmin_v2i64(<2 x i64> %0, <2 x i64> %1) {
+; CHECK-LABEL: smaxmin_v2i64:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr}
+; CHECK-NEXT: push {r4, r5, r6, r7, r8, lr}
+; CHECK-NEXT: add r6, sp, #24
+; CHECK-NEXT: mov r8, #0
+; CHECK-NEXT: vld1.64 {d18, d19}, [r6]
+; CHECK-NEXT: vmov r7, r12, d19
+; CHECK-NEXT: vmov r4, lr, d18
+; CHECK-NEXT: subs r5, r2, r7
+; CHECK-NEXT: sbcs r5, r3, r12
+; CHECK-NEXT: mov r6, r7
+; CHECK-NEXT: mov r5, #0
+; CHECK-NEXT: movlt r5, #1
+; CHECK-NEXT: cmp r5, #0
+; CHECK-NEXT: movne r6, r2
+; CHECK-NEXT: mov r5, r12
+; CHECK-NEXT: vmov.32 d17[0], r6
+; CHECK-NEXT: movne r5, r3
+; CHECK-NEXT: mov r6, r4
+; CHECK-NEXT: vmov.32 d17[1], r5
+; CHECK-NEXT: subs r5, r4, r0
+; CHECK-NEXT: sbcs r5, lr, r1
+; CHECK-NEXT: mov r5, #0
+; CHECK-NEXT: movlt r5, #1
+; CHECK-NEXT: cmp r5, #0
+; CHECK-NEXT: movne r6, r0
+; CHECK-NEXT: vmov.32 d18[0], r6
+; CHECK-NEXT: subs r6, r7, r2
+; CHECK-NEXT: sbcs r6, r12, r3
+; CHECK-NEXT: mov r6, #0
+; CHECK-NEXT: movlt r6, #1
+; CHECK-NEXT: cmp r6, #0
+; CHECK-NEXT: movne r7, r2
+; CHECK-NEXT: subs r2, r0, r4
+; CHECK-NEXT: sbcs r2, r1, lr
+; CHECK-NEXT: vmov.32 d19[0], r7
+; CHECK-NEXT: movlt r8, #1
+; CHECK-NEXT: cmp r8, #0
+; CHECK-NEXT: movne r4, r0
+; CHECK-NEXT: mov r0, lr
+; CHECK-NEXT: vmov.32 d16[0], r4
+; CHECK-NEXT: movne r0, r1
+; CHECK-NEXT: cmp r6, #0
+; CHECK-NEXT: movne r12, r3
+; CHECK-NEXT: cmp r5, #0
+; CHECK-NEXT: vmov.32 d16[1], r0
+; CHECK-NEXT: movne lr, r1
+; CHECK-NEXT: vmov.32 d19[1], r12
+; CHECK-NEXT: vmov.32 d18[1], lr
+; CHECK-NEXT: vsub.i64 q8, q9, q8
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: pop {r4, r5, r6, r7, r8, lr}
+; CHECK-NEXT: mov pc, lr
+ %a = tail call <2 x i64> @llvm.smax.v2i64(<2 x i64> %0, <2 x i64> %1)
+ %b = tail call <2 x i64> @llvm.smin.v2i64(<2 x i64> %0, <2 x i64> %1)
+ %sub = sub <2 x i64> %a, %b
+ ret <2 x i64> %sub
+}
+
+define <16 x i8> @umaxmin_v16i8(<16 x i8> %0, <16 x i8> %1) {
+; CHECK-LABEL: umaxmin_v16i8:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov d17, r2, r3
+; CHECK-NEXT: mov r12, sp
+; CHECK-NEXT: vld1.64 {d18, d19}, [r12]
+; CHECK-NEXT: vmov d16, r0, r1
+; CHECK-NEXT: vmin.u8 q10, q8, q9
+; CHECK-NEXT: vmax.u8 q8, q8, q9
+; CHECK-NEXT: vsub.i8 q8, q8, q10
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: mov pc, lr
+ %a = tail call <16 x i8> @llvm.umax.v16i8(<16 x i8> %0, <16 x i8> %1)
+ %b = tail call <16 x i8> @llvm.umin.v16i8(<16 x i8> %0, <16 x i8> %1)
+ %sub = sub <16 x i8> %a, %b
+ ret <16 x i8> %sub
+}
+
+define <8 x i16> @umaxmin_v8i16(<8 x i16> %0, <8 x i16> %1) {
+; CHECK-LABEL: umaxmin_v8i16:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov d17, r2, r3
+; CHECK-NEXT: mov r12, sp
+; CHECK-NEXT: vld1.64 {d18, d19}, [r12]
+; CHECK-NEXT: vmov d16, r0, r1
+; CHECK-NEXT: vmin.u16 q10, q8, q9
+; CHECK-NEXT: vmax.u16 q8, q8, q9
+; CHECK-NEXT: vsub.i16 q8, q8, q10
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: mov pc, lr
+ %a = tail call <8 x i16> @llvm.umax.v8i16(<8 x i16> %0, <8 x i16> %1)
+ %b = tail call <8 x i16> @llvm.umin.v8i16(<8 x i16> %0, <8 x i16> %1)
+ %sub = sub <8 x i16> %a, %b
+ ret <8 x i16> %sub
+}
+
+define <4 x i32> @umaxmin_v4i32(<4 x i32> %0, <4 x i32> %1) {
+; CHECK-LABEL: umaxmin_v4i32:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov d17, r2, r3
+; CHECK-NEXT: mov r12, sp
+; CHECK-NEXT: vld1.64 {d18, d19}, [r12]
+; CHECK-NEXT: vmov d16, r0, r1
+; CHECK-NEXT: vmin.u32 q10, q8, q9
+; CHECK-NEXT: vmax.u32 q8, q8, q9
+; CHECK-NEXT: vsub.i32 q8, q8, q10
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: mov pc, lr
+ %a = tail call <4 x i32> @llvm.umax.v4i32(<4 x i32> %0, <4 x i32> %1)
+ %b = tail call <4 x i32> @llvm.umin.v4i32(<4 x i32> %0, <4 x i32> %1)
+ %sub = sub <4 x i32> %a, %b
+ ret <4 x i32> %sub
+}
+
+define <2 x i64> @umaxmin_v2i64(<2 x i64> %0, <2 x i64> %1) {
+; CHECK-LABEL: umaxmin_v2i64:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov d17, r2, r3
+; CHECK-NEXT: mov r12, sp
+; CHECK-NEXT: vld1.64 {d18, d19}, [r12]
+; CHECK-NEXT: vmov d16, r0, r1
+; CHECK-NEXT: vqsub.u64 q10, q8, q9
+; CHECK-NEXT: vqsub.u64 q9, q9, q8
+; CHECK-NEXT: vsub.i64 q10, q10, q8
+; CHECK-NEXT: vadd.i64 q8, q8, q9
+; CHECK-NEXT: vadd.i64 q8, q8, q10
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: mov pc, lr
+ %a = tail call <2 x i64> @llvm.umax.v2i64(<2 x i64> %0, <2 x i64> %1)
+ %b = tail call <2 x i64> @llvm.umin.v2i64(<2 x i64> %0, <2 x i64> %1)
+ %sub = sub <2 x i64> %a, %b
+ ret <2 x i64> %sub
+}
+
+define <16 x i8> @umaxmin_v16i8_com1(<16 x i8> %0, <16 x i8> %1) {
+; CHECK-LABEL: umaxmin_v16i8_com1:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov d17, r2, r3
+; CHECK-NEXT: vmov d16, r0, r1
+; CHECK-NEXT: mov r0, sp
+; CHECK-NEXT: vld1.64 {d18, d19}, [r0]
+; CHECK-NEXT: vmin.u8 q10, q9, q8
+; CHECK-NEXT: vmax.u8 q8, q8, q9
+; CHECK-NEXT: vsub.i8 q8, q8, q10
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: mov pc, lr
+ %a = tail call <16 x i8> @llvm.umax.v16i8(<16 x i8> %0, <16 x i8> %1)
+ %b = tail call <16 x i8> @llvm.umin.v16i8(<16 x i8> %1, <16 x i8> %0)
+ %sub = sub <16 x i8> %a, %b
+ ret <16 x i8> %sub
+}
diff --git a/llvm/test/CodeGen/ARM/vaba.ll b/llvm/test/CodeGen/ARM/vaba.ll
index e4a61ea..14419a3 100644
--- a/llvm/test/CodeGen/ARM/vaba.ll
+++ b/llvm/test/CodeGen/ARM/vaba.ll
@@ -1,8 +1,15 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s
define <8 x i8> @vabas8(ptr %A, ptr %B, ptr %C) nounwind {
-;CHECK-LABEL: vabas8:
-;CHECK: vaba.s8
+; CHECK-LABEL: vabas8:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vldr d16, [r2]
+; CHECK-NEXT: vldr d17, [r1]
+; CHECK-NEXT: vldr d18, [r0]
+; CHECK-NEXT: vaba.s8 d18, d17, d16
+; CHECK-NEXT: vmov r0, r1, d18
+; CHECK-NEXT: mov pc, lr
%tmp1 = load <8 x i8>, ptr %A
%tmp2 = load <8 x i8>, ptr %B
%tmp3 = load <8 x i8>, ptr %C
@@ -12,8 +19,14 @@ define <8 x i8> @vabas8(ptr %A, ptr %B, ptr %C) nounwind {
}
define <4 x i16> @vabas16(ptr %A, ptr %B, ptr %C) nounwind {
-;CHECK-LABEL: vabas16:
-;CHECK: vaba.s16
+; CHECK-LABEL: vabas16:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vldr d16, [r2]
+; CHECK-NEXT: vldr d17, [r1]
+; CHECK-NEXT: vldr d18, [r0]
+; CHECK-NEXT: vaba.s16 d18, d17, d16
+; CHECK-NEXT: vmov r0, r1, d18
+; CHECK-NEXT: mov pc, lr
%tmp1 = load <4 x i16>, ptr %A
%tmp2 = load <4 x i16>, ptr %B
%tmp3 = load <4 x i16>, ptr %C
@@ -23,8 +36,14 @@ define <4 x i16> @vabas16(ptr %A, ptr %B, ptr %C) nounwind {
}
define <2 x i32> @vabas32(ptr %A, ptr %B, ptr %C) nounwind {
-;CHECK-LABEL: vabas32:
-;CHECK: vaba.s32
+; CHECK-LABEL: vabas32:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vldr d16, [r2]
+; CHECK-NEXT: vldr d17, [r1]
+; CHECK-NEXT: vldr d18, [r0]
+; CHECK-NEXT: vaba.s32 d18, d17, d16
+; CHECK-NEXT: vmov r0, r1, d18
+; CHECK-NEXT: mov pc, lr
%tmp1 = load <2 x i32>, ptr %A
%tmp2 = load <2 x i32>, ptr %B
%tmp3 = load <2 x i32>, ptr %C
@@ -34,8 +53,14 @@ define <2 x i32> @vabas32(ptr %A, ptr %B, ptr %C) nounwind {
}
define <8 x i8> @vabau8(ptr %A, ptr %B, ptr %C) nounwind {
-;CHECK-LABEL: vabau8:
-;CHECK: vaba.u8
+; CHECK-LABEL: vabau8:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vldr d16, [r2]
+; CHECK-NEXT: vldr d17, [r1]
+; CHECK-NEXT: vldr d18, [r0]
+; CHECK-NEXT: vaba.u8 d18, d17, d16
+; CHECK-NEXT: vmov r0, r1, d18
+; CHECK-NEXT: mov pc, lr
%tmp1 = load <8 x i8>, ptr %A
%tmp2 = load <8 x i8>, ptr %B
%tmp3 = load <8 x i8>, ptr %C
@@ -45,8 +70,14 @@ define <8 x i8> @vabau8(ptr %A, ptr %B, ptr %C) nounwind {
}
define <4 x i16> @vabau16(ptr %A, ptr %B, ptr %C) nounwind {
-;CHECK-LABEL: vabau16:
-;CHECK: vaba.u16
+; CHECK-LABEL: vabau16:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vldr d16, [r2]
+; CHECK-NEXT: vldr d17, [r1]
+; CHECK-NEXT: vldr d18, [r0]
+; CHECK-NEXT: vaba.u16 d18, d17, d16
+; CHECK-NEXT: vmov r0, r1, d18
+; CHECK-NEXT: mov pc, lr
%tmp1 = load <4 x i16>, ptr %A
%tmp2 = load <4 x i16>, ptr %B
%tmp3 = load <4 x i16>, ptr %C
@@ -56,8 +87,14 @@ define <4 x i16> @vabau16(ptr %A, ptr %B, ptr %C) nounwind {
}
define <2 x i32> @vabau32(ptr %A, ptr %B, ptr %C) nounwind {
-;CHECK-LABEL: vabau32:
-;CHECK: vaba.u32
+; CHECK-LABEL: vabau32:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vldr d16, [r2]
+; CHECK-NEXT: vldr d17, [r1]
+; CHECK-NEXT: vldr d18, [r0]
+; CHECK-NEXT: vaba.u32 d18, d17, d16
+; CHECK-NEXT: vmov r0, r1, d18
+; CHECK-NEXT: mov pc, lr
%tmp1 = load <2 x i32>, ptr %A
%tmp2 = load <2 x i32>, ptr %B
%tmp3 = load <2 x i32>, ptr %C
@@ -67,8 +104,15 @@ define <2 x i32> @vabau32(ptr %A, ptr %B, ptr %C) nounwind {
}
define <16 x i8> @vabaQs8(ptr %A, ptr %B, ptr %C) nounwind {
-;CHECK-LABEL: vabaQs8:
-;CHECK: vaba.s8
+; CHECK-LABEL: vabaQs8:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vld1.64 {d16, d17}, [r2]
+; CHECK-NEXT: vld1.64 {d18, d19}, [r1]
+; CHECK-NEXT: vld1.64 {d20, d21}, [r0]
+; CHECK-NEXT: vaba.s8 q10, q9, q8
+; CHECK-NEXT: vmov r0, r1, d20
+; CHECK-NEXT: vmov r2, r3, d21
+; CHECK-NEXT: mov pc, lr
%tmp1 = load <16 x i8>, ptr %A
%tmp2 = load <16 x i8>, ptr %B
%tmp3 = load <16 x i8>, ptr %C
@@ -78,8 +122,15 @@ define <16 x i8> @vabaQs8(ptr %A, ptr %B, ptr %C) nounwind {
}
define <8 x i16> @vabaQs16(ptr %A, ptr %B, ptr %C) nounwind {
-;CHECK-LABEL: vabaQs16:
-;CHECK: vaba.s16
+; CHECK-LABEL: vabaQs16:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vld1.64 {d16, d17}, [r2]
+; CHECK-NEXT: vld1.64 {d18, d19}, [r1]
+; CHECK-NEXT: vld1.64 {d20, d21}, [r0]
+; CHECK-NEXT: vaba.s16 q10, q9, q8
+; CHECK-NEXT: vmov r0, r1, d20
+; CHECK-NEXT: vmov r2, r3, d21
+; CHECK-NEXT: mov pc, lr
%tmp1 = load <8 x i16>, ptr %A
%tmp2 = load <8 x i16>, ptr %B
%tmp3 = load <8 x i16>, ptr %C
@@ -89,8 +140,15 @@ define <8 x i16> @vabaQs16(ptr %A, ptr %B, ptr %C) nounwind {
}
define <4 x i32> @vabaQs32(ptr %A, ptr %B, ptr %C) nounwind {
-;CHECK-LABEL: vabaQs32:
-;CHECK: vaba.s32
+; CHECK-LABEL: vabaQs32:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vld1.64 {d16, d17}, [r2]
+; CHECK-NEXT: vld1.64 {d18, d19}, [r1]
+; CHECK-NEXT: vld1.64 {d20, d21}, [r0]
+; CHECK-NEXT: vaba.s32 q10, q9, q8
+; CHECK-NEXT: vmov r0, r1, d20
+; CHECK-NEXT: vmov r2, r3, d21
+; CHECK-NEXT: mov pc, lr
%tmp1 = load <4 x i32>, ptr %A
%tmp2 = load <4 x i32>, ptr %B
%tmp3 = load <4 x i32>, ptr %C
@@ -100,8 +158,15 @@ define <4 x i32> @vabaQs32(ptr %A, ptr %B, ptr %C) nounwind {
}
define <16 x i8> @vabaQu8(ptr %A, ptr %B, ptr %C) nounwind {
-;CHECK-LABEL: vabaQu8:
-;CHECK: vaba.u8
+; CHECK-LABEL: vabaQu8:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vld1.64 {d16, d17}, [r2]
+; CHECK-NEXT: vld1.64 {d18, d19}, [r1]
+; CHECK-NEXT: vld1.64 {d20, d21}, [r0]
+; CHECK-NEXT: vaba.u8 q10, q9, q8
+; CHECK-NEXT: vmov r0, r1, d20
+; CHECK-NEXT: vmov r2, r3, d21
+; CHECK-NEXT: mov pc, lr
%tmp1 = load <16 x i8>, ptr %A
%tmp2 = load <16 x i8>, ptr %B
%tmp3 = load <16 x i8>, ptr %C
@@ -111,8 +176,15 @@ define <16 x i8> @vabaQu8(ptr %A, ptr %B, ptr %C) nounwind {
}
define <8 x i16> @vabaQu16(ptr %A, ptr %B, ptr %C) nounwind {
-;CHECK-LABEL: vabaQu16:
-;CHECK: vaba.u16
+; CHECK-LABEL: vabaQu16:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vld1.64 {d16, d17}, [r2]
+; CHECK-NEXT: vld1.64 {d18, d19}, [r1]
+; CHECK-NEXT: vld1.64 {d20, d21}, [r0]
+; CHECK-NEXT: vaba.u16 q10, q9, q8
+; CHECK-NEXT: vmov r0, r1, d20
+; CHECK-NEXT: vmov r2, r3, d21
+; CHECK-NEXT: mov pc, lr
%tmp1 = load <8 x i16>, ptr %A
%tmp2 = load <8 x i16>, ptr %B
%tmp3 = load <8 x i16>, ptr %C
@@ -122,8 +194,15 @@ define <8 x i16> @vabaQu16(ptr %A, ptr %B, ptr %C) nounwind {
}
define <4 x i32> @vabaQu32(ptr %A, ptr %B, ptr %C) nounwind {
-;CHECK-LABEL: vabaQu32:
-;CHECK: vaba.u32
+; CHECK-LABEL: vabaQu32:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vld1.64 {d16, d17}, [r2]
+; CHECK-NEXT: vld1.64 {d18, d19}, [r1]
+; CHECK-NEXT: vld1.64 {d20, d21}, [r0]
+; CHECK-NEXT: vaba.u32 q10, q9, q8
+; CHECK-NEXT: vmov r0, r1, d20
+; CHECK-NEXT: vmov r2, r3, d21
+; CHECK-NEXT: mov pc, lr
%tmp1 = load <4 x i32>, ptr %A
%tmp2 = load <4 x i32>, ptr %B
%tmp3 = load <4 x i32>, ptr %C
@@ -149,8 +228,15 @@ declare <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16>, <8 x i16>) nounwind read
declare <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
define <8 x i16> @vabals8(ptr %A, ptr %B, ptr %C) nounwind {
-;CHECK-LABEL: vabals8:
-;CHECK: vabal.s8
+; CHECK-LABEL: vabals8:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vldr d16, [r2]
+; CHECK-NEXT: vldr d17, [r1]
+; CHECK-NEXT: vld1.64 {d18, d19}, [r0]
+; CHECK-NEXT: vabal.s8 q9, d17, d16
+; CHECK-NEXT: vmov r0, r1, d18
+; CHECK-NEXT: vmov r2, r3, d19
+; CHECK-NEXT: mov pc, lr
%tmp1 = load <8 x i16>, ptr %A
%tmp2 = load <8 x i8>, ptr %B
%tmp3 = load <8 x i8>, ptr %C
@@ -161,8 +247,15 @@ define <8 x i16> @vabals8(ptr %A, ptr %B, ptr %C) nounwind {
}
define <4 x i32> @vabals16(ptr %A, ptr %B, ptr %C) nounwind {
-;CHECK-LABEL: vabals16:
-;CHECK: vabal.s16
+; CHECK-LABEL: vabals16:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vldr d16, [r2]
+; CHECK-NEXT: vldr d17, [r1]
+; CHECK-NEXT: vld1.64 {d18, d19}, [r0]
+; CHECK-NEXT: vabal.s16 q9, d17, d16
+; CHECK-NEXT: vmov r0, r1, d18
+; CHECK-NEXT: vmov r2, r3, d19
+; CHECK-NEXT: mov pc, lr
%tmp1 = load <4 x i32>, ptr %A
%tmp2 = load <4 x i16>, ptr %B
%tmp3 = load <4 x i16>, ptr %C
@@ -173,8 +266,15 @@ define <4 x i32> @vabals16(ptr %A, ptr %B, ptr %C) nounwind {
}
define <2 x i64> @vabals32(ptr %A, ptr %B, ptr %C) nounwind {
-;CHECK-LABEL: vabals32:
-;CHECK: vabal.s32
+; CHECK-LABEL: vabals32:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vldr d16, [r2]
+; CHECK-NEXT: vldr d17, [r1]
+; CHECK-NEXT: vld1.64 {d18, d19}, [r0]
+; CHECK-NEXT: vabal.s32 q9, d17, d16
+; CHECK-NEXT: vmov r0, r1, d18
+; CHECK-NEXT: vmov r2, r3, d19
+; CHECK-NEXT: mov pc, lr
%tmp1 = load <2 x i64>, ptr %A
%tmp2 = load <2 x i32>, ptr %B
%tmp3 = load <2 x i32>, ptr %C
@@ -185,8 +285,15 @@ define <2 x i64> @vabals32(ptr %A, ptr %B, ptr %C) nounwind {
}
define <8 x i16> @vabalu8(ptr %A, ptr %B, ptr %C) nounwind {
-;CHECK-LABEL: vabalu8:
-;CHECK: vabal.u8
+; CHECK-LABEL: vabalu8:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vldr d16, [r2]
+; CHECK-NEXT: vldr d17, [r1]
+; CHECK-NEXT: vld1.64 {d18, d19}, [r0]
+; CHECK-NEXT: vabal.u8 q9, d17, d16
+; CHECK-NEXT: vmov r0, r1, d18
+; CHECK-NEXT: vmov r2, r3, d19
+; CHECK-NEXT: mov pc, lr
%tmp1 = load <8 x i16>, ptr %A
%tmp2 = load <8 x i8>, ptr %B
%tmp3 = load <8 x i8>, ptr %C
@@ -197,8 +304,15 @@ define <8 x i16> @vabalu8(ptr %A, ptr %B, ptr %C) nounwind {
}
define <4 x i32> @vabalu16(ptr %A, ptr %B, ptr %C) nounwind {
-;CHECK-LABEL: vabalu16:
-;CHECK: vabal.u16
+; CHECK-LABEL: vabalu16:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vldr d16, [r2]
+; CHECK-NEXT: vldr d17, [r1]
+; CHECK-NEXT: vld1.64 {d18, d19}, [r0]
+; CHECK-NEXT: vabal.u16 q9, d17, d16
+; CHECK-NEXT: vmov r0, r1, d18
+; CHECK-NEXT: vmov r2, r3, d19
+; CHECK-NEXT: mov pc, lr
%tmp1 = load <4 x i32>, ptr %A
%tmp2 = load <4 x i16>, ptr %B
%tmp3 = load <4 x i16>, ptr %C
@@ -209,8 +323,15 @@ define <4 x i32> @vabalu16(ptr %A, ptr %B, ptr %C) nounwind {
}
define <2 x i64> @vabalu32(ptr %A, ptr %B, ptr %C) nounwind {
-;CHECK-LABEL: vabalu32:
-;CHECK: vabal.u32
+; CHECK-LABEL: vabalu32:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vldr d16, [r2]
+; CHECK-NEXT: vldr d17, [r1]
+; CHECK-NEXT: vld1.64 {d18, d19}, [r0]
+; CHECK-NEXT: vabal.u32 q9, d17, d16
+; CHECK-NEXT: vmov r0, r1, d18
+; CHECK-NEXT: vmov r2, r3, d19
+; CHECK-NEXT: mov pc, lr
%tmp1 = load <2 x i64>, ptr %A
%tmp2 = load <2 x i32>, ptr %B
%tmp3 = load <2 x i32>, ptr %C
diff --git a/llvm/test/CodeGen/ARM/vabd.ll b/llvm/test/CodeGen/ARM/vabd.ll
index eb5eed8..4184e92 100644
--- a/llvm/test/CodeGen/ARM/vabd.ll
+++ b/llvm/test/CodeGen/ARM/vabd.ll
@@ -1,8 +1,14 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s
define <8 x i8> @vabds8(ptr %A, ptr %B) nounwind {
-;CHECK-LABEL: vabds8:
-;CHECK: vabd.s8
+; CHECK-LABEL: vabds8:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vldr d16, [r1]
+; CHECK-NEXT: vldr d17, [r0]
+; CHECK-NEXT: vabd.s8 d16, d17, d16
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: mov pc, lr
%tmp1 = load <8 x i8>, ptr %A
%tmp2 = load <8 x i8>, ptr %B
%tmp3 = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
@@ -10,8 +16,13 @@ define <8 x i8> @vabds8(ptr %A, ptr %B) nounwind {
}
define <4 x i16> @vabds16(ptr %A, ptr %B) nounwind {
-;CHECK-LABEL: vabds16:
-;CHECK: vabd.s16
+; CHECK-LABEL: vabds16:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vldr d16, [r1]
+; CHECK-NEXT: vldr d17, [r0]
+; CHECK-NEXT: vabd.s16 d16, d17, d16
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: mov pc, lr
%tmp1 = load <4 x i16>, ptr %A
%tmp2 = load <4 x i16>, ptr %B
%tmp3 = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
@@ -19,8 +30,13 @@ define <4 x i16> @vabds16(ptr %A, ptr %B) nounwind {
}
define <2 x i32> @vabds32(ptr %A, ptr %B) nounwind {
-;CHECK-LABEL: vabds32:
-;CHECK: vabd.s32
+; CHECK-LABEL: vabds32:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vldr d16, [r1]
+; CHECK-NEXT: vldr d17, [r0]
+; CHECK-NEXT: vabd.s32 d16, d17, d16
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: mov pc, lr
%tmp1 = load <2 x i32>, ptr %A
%tmp2 = load <2 x i32>, ptr %B
%tmp3 = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
@@ -28,8 +44,13 @@ define <2 x i32> @vabds32(ptr %A, ptr %B) nounwind {
}
define <8 x i8> @vabdu8(ptr %A, ptr %B) nounwind {
-;CHECK-LABEL: vabdu8:
-;CHECK: vabd.u8
+; CHECK-LABEL: vabdu8:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vldr d16, [r1]
+; CHECK-NEXT: vldr d17, [r0]
+; CHECK-NEXT: vabd.u8 d16, d17, d16
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: mov pc, lr
%tmp1 = load <8 x i8>, ptr %A
%tmp2 = load <8 x i8>, ptr %B
%tmp3 = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
@@ -37,8 +58,13 @@ define <8 x i8> @vabdu8(ptr %A, ptr %B) nounwind {
}
define <4 x i16> @vabdu16(ptr %A, ptr %B) nounwind {
-;CHECK-LABEL: vabdu16:
-;CHECK: vabd.u16
+; CHECK-LABEL: vabdu16:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vldr d16, [r1]
+; CHECK-NEXT: vldr d17, [r0]
+; CHECK-NEXT: vabd.u16 d16, d17, d16
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: mov pc, lr
%tmp1 = load <4 x i16>, ptr %A
%tmp2 = load <4 x i16>, ptr %B
%tmp3 = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
@@ -46,8 +72,13 @@ define <4 x i16> @vabdu16(ptr %A, ptr %B) nounwind {
}
define <2 x i32> @vabdu32(ptr %A, ptr %B) nounwind {
-;CHECK-LABEL: vabdu32:
-;CHECK: vabd.u32
+; CHECK-LABEL: vabdu32:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vldr d16, [r1]
+; CHECK-NEXT: vldr d17, [r0]
+; CHECK-NEXT: vabd.u32 d16, d17, d16
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: mov pc, lr
%tmp1 = load <2 x i32>, ptr %A
%tmp2 = load <2 x i32>, ptr %B
%tmp3 = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
@@ -55,8 +86,13 @@ define <2 x i32> @vabdu32(ptr %A, ptr %B) nounwind {
}
define <2 x float> @vabdf32(ptr %A, ptr %B) nounwind {
-;CHECK-LABEL: vabdf32:
-;CHECK: vabd.f32
+; CHECK-LABEL: vabdf32:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vldr d16, [r1]
+; CHECK-NEXT: vldr d17, [r0]
+; CHECK-NEXT: vabd.f32 d16, d17, d16
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: mov pc, lr
%tmp1 = load <2 x float>, ptr %A
%tmp2 = load <2 x float>, ptr %B
%tmp3 = call <2 x float> @llvm.arm.neon.vabds.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
@@ -64,8 +100,14 @@ define <2 x float> @vabdf32(ptr %A, ptr %B) nounwind {
}
define <16 x i8> @vabdQs8(ptr %A, ptr %B) nounwind {
-;CHECK-LABEL: vabdQs8:
-;CHECK: vabd.s8
+; CHECK-LABEL: vabdQs8:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vld1.64 {d16, d17}, [r1]
+; CHECK-NEXT: vld1.64 {d18, d19}, [r0]
+; CHECK-NEXT: vabd.s8 q8, q9, q8
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: mov pc, lr
%tmp1 = load <16 x i8>, ptr %A
%tmp2 = load <16 x i8>, ptr %B
%tmp3 = call <16 x i8> @llvm.arm.neon.vabds.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
@@ -73,8 +115,14 @@ define <16 x i8> @vabdQs8(ptr %A, ptr %B) nounwind {
}
define <8 x i16> @vabdQs16(ptr %A, ptr %B) nounwind {
-;CHECK-LABEL: vabdQs16:
-;CHECK: vabd.s16
+; CHECK-LABEL: vabdQs16:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vld1.64 {d16, d17}, [r1]
+; CHECK-NEXT: vld1.64 {d18, d19}, [r0]
+; CHECK-NEXT: vabd.s16 q8, q9, q8
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: mov pc, lr
%tmp1 = load <8 x i16>, ptr %A
%tmp2 = load <8 x i16>, ptr %B
%tmp3 = call <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
@@ -82,8 +130,14 @@ define <8 x i16> @vabdQs16(ptr %A, ptr %B) nounwind {
}
define <4 x i32> @vabdQs32(ptr %A, ptr %B) nounwind {
-;CHECK-LABEL: vabdQs32:
-;CHECK: vabd.s32
+; CHECK-LABEL: vabdQs32:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vld1.64 {d16, d17}, [r1]
+; CHECK-NEXT: vld1.64 {d18, d19}, [r0]
+; CHECK-NEXT: vabd.s32 q8, q9, q8
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: mov pc, lr
%tmp1 = load <4 x i32>, ptr %A
%tmp2 = load <4 x i32>, ptr %B
%tmp3 = call <4 x i32> @llvm.arm.neon.vabds.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
@@ -91,8 +145,14 @@ define <4 x i32> @vabdQs32(ptr %A, ptr %B) nounwind {
}
define <16 x i8> @vabdQu8(ptr %A, ptr %B) nounwind {
-;CHECK-LABEL: vabdQu8:
-;CHECK: vabd.u8
+; CHECK-LABEL: vabdQu8:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vld1.64 {d16, d17}, [r1]
+; CHECK-NEXT: vld1.64 {d18, d19}, [r0]
+; CHECK-NEXT: vabd.u8 q8, q9, q8
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: mov pc, lr
%tmp1 = load <16 x i8>, ptr %A
%tmp2 = load <16 x i8>, ptr %B
%tmp3 = call <16 x i8> @llvm.arm.neon.vabdu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
@@ -100,8 +160,14 @@ define <16 x i8> @vabdQu8(ptr %A, ptr %B) nounwind {
}
define <8 x i16> @vabdQu16(ptr %A, ptr %B) nounwind {
-;CHECK-LABEL: vabdQu16:
-;CHECK: vabd.u16
+; CHECK-LABEL: vabdQu16:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vld1.64 {d16, d17}, [r1]
+; CHECK-NEXT: vld1.64 {d18, d19}, [r0]
+; CHECK-NEXT: vabd.u16 q8, q9, q8
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: mov pc, lr
%tmp1 = load <8 x i16>, ptr %A
%tmp2 = load <8 x i16>, ptr %B
%tmp3 = call <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
@@ -109,8 +175,14 @@ define <8 x i16> @vabdQu16(ptr %A, ptr %B) nounwind {
}
define <4 x i32> @vabdQu32(ptr %A, ptr %B) nounwind {
-;CHECK-LABEL: vabdQu32:
-;CHECK: vabd.u32
+; CHECK-LABEL: vabdQu32:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vld1.64 {d16, d17}, [r1]
+; CHECK-NEXT: vld1.64 {d18, d19}, [r0]
+; CHECK-NEXT: vabd.u32 q8, q9, q8
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: mov pc, lr
%tmp1 = load <4 x i32>, ptr %A
%tmp2 = load <4 x i32>, ptr %B
%tmp3 = call <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
@@ -118,8 +190,14 @@ define <4 x i32> @vabdQu32(ptr %A, ptr %B) nounwind {
}
define <4 x float> @vabdQf32(ptr %A, ptr %B) nounwind {
-;CHECK-LABEL: vabdQf32:
-;CHECK: vabd.f32
+; CHECK-LABEL: vabdQf32:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vld1.64 {d16, d17}, [r1]
+; CHECK-NEXT: vld1.64 {d18, d19}, [r0]
+; CHECK-NEXT: vabd.f32 q8, q9, q8
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: mov pc, lr
%tmp1 = load <4 x float>, ptr %A
%tmp2 = load <4 x float>, ptr %B
%tmp3 = call <4 x float> @llvm.arm.neon.vabds.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
@@ -147,8 +225,14 @@ declare <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32>, <4 x i32>) nounwind read
declare <4 x float> @llvm.arm.neon.vabds.v4f32(<4 x float>, <4 x float>) nounwind readnone
define <8 x i16> @vabdls8(ptr %A, ptr %B) nounwind {
-;CHECK-LABEL: vabdls8:
-;CHECK: vabdl.s8
+; CHECK-LABEL: vabdls8:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vldr d16, [r1]
+; CHECK-NEXT: vldr d17, [r0]
+; CHECK-NEXT: vabdl.s8 q8, d17, d16
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: mov pc, lr
%tmp1 = load <8 x i8>, ptr %A
%tmp2 = load <8 x i8>, ptr %B
%tmp3 = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
@@ -157,8 +241,14 @@ define <8 x i16> @vabdls8(ptr %A, ptr %B) nounwind {
}
define <4 x i32> @vabdls16(ptr %A, ptr %B) nounwind {
-;CHECK-LABEL: vabdls16:
-;CHECK: vabdl.s16
+; CHECK-LABEL: vabdls16:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vldr d16, [r1]
+; CHECK-NEXT: vldr d17, [r0]
+; CHECK-NEXT: vabdl.s16 q8, d17, d16
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: mov pc, lr
%tmp1 = load <4 x i16>, ptr %A
%tmp2 = load <4 x i16>, ptr %B
%tmp3 = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
@@ -167,8 +257,14 @@ define <4 x i32> @vabdls16(ptr %A, ptr %B) nounwind {
}
define <2 x i64> @vabdls32(ptr %A, ptr %B) nounwind {
-;CHECK-LABEL: vabdls32:
-;CHECK: vabdl.s32
+; CHECK-LABEL: vabdls32:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vldr d16, [r1]
+; CHECK-NEXT: vldr d17, [r0]
+; CHECK-NEXT: vabdl.s32 q8, d17, d16
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: mov pc, lr
%tmp1 = load <2 x i32>, ptr %A
%tmp2 = load <2 x i32>, ptr %B
%tmp3 = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
@@ -177,8 +273,14 @@ define <2 x i64> @vabdls32(ptr %A, ptr %B) nounwind {
}
define <8 x i16> @vabdlu8(ptr %A, ptr %B) nounwind {
-;CHECK-LABEL: vabdlu8:
-;CHECK: vabdl.u8
+; CHECK-LABEL: vabdlu8:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vldr d16, [r1]
+; CHECK-NEXT: vldr d17, [r0]
+; CHECK-NEXT: vabdl.u8 q8, d17, d16
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: mov pc, lr
%tmp1 = load <8 x i8>, ptr %A
%tmp2 = load <8 x i8>, ptr %B
%tmp3 = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
@@ -187,8 +289,14 @@ define <8 x i16> @vabdlu8(ptr %A, ptr %B) nounwind {
}
define <4 x i32> @vabdlu16(ptr %A, ptr %B) nounwind {
-;CHECK-LABEL: vabdlu16:
-;CHECK: vabdl.u16
+; CHECK-LABEL: vabdlu16:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vldr d16, [r1]
+; CHECK-NEXT: vldr d17, [r0]
+; CHECK-NEXT: vabdl.u16 q8, d17, d16
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: mov pc, lr
%tmp1 = load <4 x i16>, ptr %A
%tmp2 = load <4 x i16>, ptr %B
%tmp3 = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
@@ -197,8 +305,14 @@ define <4 x i32> @vabdlu16(ptr %A, ptr %B) nounwind {
}
define <2 x i64> @vabdlu32(ptr %A, ptr %B) nounwind {
-;CHECK-LABEL: vabdlu32:
-;CHECK: vabdl.u32
+; CHECK-LABEL: vabdlu32:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vldr d16, [r1]
+; CHECK-NEXT: vldr d17, [r0]
+; CHECK-NEXT: vabdl.u32 q8, d17, d16
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: mov pc, lr
%tmp1 = load <2 x i32>, ptr %A
%tmp2 = load <2 x i32>, ptr %B
%tmp3 = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
diff --git a/llvm/test/CodeGen/LoongArch/fp16-promote.ll b/llvm/test/CodeGen/LoongArch/fp16-promote.ll
new file mode 100644
index 0000000..75f920b
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/fp16-promote.ll
@@ -0,0 +1,326 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc --mtriple=loongarch32 --mattr=+d < %s | FileCheck %s --check-prefix=LA32
+; RUN: llc --mtriple=loongarch64 --mattr=+d < %s | FileCheck %s --check-prefix=LA64
+
+define void @test_load_store(ptr %p, ptr %q) nounwind {
+; LA32-LABEL: test_load_store:
+; LA32: # %bb.0:
+; LA32-NEXT: ld.h $a0, $a0, 0
+; LA32-NEXT: st.h $a0, $a1, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: test_load_store:
+; LA64: # %bb.0:
+; LA64-NEXT: ld.h $a0, $a0, 0
+; LA64-NEXT: st.h $a0, $a1, 0
+; LA64-NEXT: ret
+ %a = load half, ptr %p
+ store half %a, ptr %q
+ ret void
+}
+
+define float @test_fpextend_float(ptr %p) nounwind {
+; LA32-LABEL: test_fpextend_float:
+; LA32: # %bb.0:
+; LA32-NEXT: ld.hu $a0, $a0, 0
+; LA32-NEXT: b %plt(__gnu_h2f_ieee)
+;
+; LA64-LABEL: test_fpextend_float:
+; LA64: # %bb.0:
+; LA64-NEXT: ld.hu $a0, $a0, 0
+; LA64-NEXT: b %plt(__gnu_h2f_ieee)
+ %a = load half, ptr %p
+ %r = fpext half %a to float
+ ret float %r
+}
+
+define double @test_fpextend_double(ptr %p) nounwind {
+; LA32-LABEL: test_fpextend_double:
+; LA32: # %bb.0:
+; LA32-NEXT: addi.w $sp, $sp, -16
+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT: ld.hu $a0, $a0, 0
+; LA32-NEXT: bl %plt(__gnu_h2f_ieee)
+; LA32-NEXT: fcvt.d.s $fa0, $fa0
+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 16
+; LA32-NEXT: ret
+;
+; LA64-LABEL: test_fpextend_double:
+; LA64: # %bb.0:
+; LA64-NEXT: addi.d $sp, $sp, -16
+; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT: ld.hu $a0, $a0, 0
+; LA64-NEXT: bl %plt(__gnu_h2f_ieee)
+; LA64-NEXT: fcvt.d.s $fa0, $fa0
+; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 16
+; LA64-NEXT: ret
+ %a = load half, ptr %p
+ %r = fpext half %a to double
+ ret double %r
+}
+
+define void @test_fptrunc_float(float %f, ptr %p) nounwind {
+; LA32-LABEL: test_fptrunc_float:
+; LA32: # %bb.0:
+; LA32-NEXT: addi.w $sp, $sp, -16
+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT: st.w $fp, $sp, 8 # 4-byte Folded Spill
+; LA32-NEXT: move $fp, $a0
+; LA32-NEXT: bl %plt(__gnu_f2h_ieee)
+; LA32-NEXT: st.h $a0, $fp, 0
+; LA32-NEXT: ld.w $fp, $sp, 8 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 16
+; LA32-NEXT: ret
+;
+; LA64-LABEL: test_fptrunc_float:
+; LA64: # %bb.0:
+; LA64-NEXT: addi.d $sp, $sp, -16
+; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT: st.d $fp, $sp, 0 # 8-byte Folded Spill
+; LA64-NEXT: move $fp, $a0
+; LA64-NEXT: bl %plt(__gnu_f2h_ieee)
+; LA64-NEXT: st.h $a0, $fp, 0
+; LA64-NEXT: ld.d $fp, $sp, 0 # 8-byte Folded Reload
+; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 16
+; LA64-NEXT: ret
+ %a = fptrunc float %f to half
+ store half %a, ptr %p
+ ret void
+}
+
+define void @test_fptrunc_double(double %d, ptr %p) nounwind {
+; LA32-LABEL: test_fptrunc_double:
+; LA32: # %bb.0:
+; LA32-NEXT: addi.w $sp, $sp, -16
+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT: st.w $fp, $sp, 8 # 4-byte Folded Spill
+; LA32-NEXT: move $fp, $a0
+; LA32-NEXT: bl %plt(__truncdfhf2)
+; LA32-NEXT: st.h $a0, $fp, 0
+; LA32-NEXT: ld.w $fp, $sp, 8 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 16
+; LA32-NEXT: ret
+;
+; LA64-LABEL: test_fptrunc_double:
+; LA64: # %bb.0:
+; LA64-NEXT: addi.d $sp, $sp, -16
+; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT: st.d $fp, $sp, 0 # 8-byte Folded Spill
+; LA64-NEXT: move $fp, $a0
+; LA64-NEXT: bl %plt(__truncdfhf2)
+; LA64-NEXT: st.h $a0, $fp, 0
+; LA64-NEXT: ld.d $fp, $sp, 0 # 8-byte Folded Reload
+; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 16
+; LA64-NEXT: ret
+ %a = fptrunc double %d to half
+ store half %a, ptr %p
+ ret void
+}
+
+define half @test_fadd_reg(half %a, half %b) nounwind {
+; LA32-LABEL: test_fadd_reg:
+; LA32: # %bb.0:
+; LA32-NEXT: addi.w $sp, $sp, -32
+; LA32-NEXT: st.w $ra, $sp, 28 # 4-byte Folded Spill
+; LA32-NEXT: fst.d $fs0, $sp, 16 # 8-byte Folded Spill
+; LA32-NEXT: fst.d $fs1, $sp, 8 # 8-byte Folded Spill
+; LA32-NEXT: fmov.s $fs0, $fa0
+; LA32-NEXT: fmov.s $fa0, $fa1
+; LA32-NEXT: bl %plt(__gnu_f2h_ieee)
+; LA32-NEXT: bl %plt(__gnu_h2f_ieee)
+; LA32-NEXT: fmov.s $fs1, $fa0
+; LA32-NEXT: fmov.s $fa0, $fs0
+; LA32-NEXT: bl %plt(__gnu_f2h_ieee)
+; LA32-NEXT: bl %plt(__gnu_h2f_ieee)
+; LA32-NEXT: fadd.s $fa0, $fa0, $fs1
+; LA32-NEXT: fld.d $fs1, $sp, 8 # 8-byte Folded Reload
+; LA32-NEXT: fld.d $fs0, $sp, 16 # 8-byte Folded Reload
+; LA32-NEXT: ld.w $ra, $sp, 28 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 32
+; LA32-NEXT: ret
+;
+; LA64-LABEL: test_fadd_reg:
+; LA64: # %bb.0:
+; LA64-NEXT: addi.d $sp, $sp, -32
+; LA64-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill
+; LA64-NEXT: fst.d $fs0, $sp, 16 # 8-byte Folded Spill
+; LA64-NEXT: fst.d $fs1, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT: fmov.s $fs0, $fa0
+; LA64-NEXT: fmov.s $fa0, $fa1
+; LA64-NEXT: bl %plt(__gnu_f2h_ieee)
+; LA64-NEXT: bl %plt(__gnu_h2f_ieee)
+; LA64-NEXT: fmov.s $fs1, $fa0
+; LA64-NEXT: fmov.s $fa0, $fs0
+; LA64-NEXT: bl %plt(__gnu_f2h_ieee)
+; LA64-NEXT: bl %plt(__gnu_h2f_ieee)
+; LA64-NEXT: fadd.s $fa0, $fa0, $fs1
+; LA64-NEXT: fld.d $fs1, $sp, 8 # 8-byte Folded Reload
+; LA64-NEXT: fld.d $fs0, $sp, 16 # 8-byte Folded Reload
+; LA64-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 32
+; LA64-NEXT: ret
+ %r = fadd half %a, %b
+ ret half %r
+}
+
+define void @test_fadd_mem(ptr %p, ptr %q) nounwind {
+; LA32-LABEL: test_fadd_mem:
+; LA32: # %bb.0:
+; LA32-NEXT: addi.w $sp, $sp, -32
+; LA32-NEXT: st.w $ra, $sp, 28 # 4-byte Folded Spill
+; LA32-NEXT: st.w $fp, $sp, 24 # 4-byte Folded Spill
+; LA32-NEXT: st.w $s0, $sp, 20 # 4-byte Folded Spill
+; LA32-NEXT: fst.d $fs0, $sp, 8 # 8-byte Folded Spill
+; LA32-NEXT: move $fp, $a1
+; LA32-NEXT: move $s0, $a0
+; LA32-NEXT: ld.hu $a0, $a0, 0
+; LA32-NEXT: bl %plt(__gnu_h2f_ieee)
+; LA32-NEXT: fmov.s $fs0, $fa0
+; LA32-NEXT: ld.hu $a0, $fp, 0
+; LA32-NEXT: bl %plt(__gnu_h2f_ieee)
+; LA32-NEXT: fadd.s $fa0, $fs0, $fa0
+; LA32-NEXT: bl %plt(__gnu_f2h_ieee)
+; LA32-NEXT: st.h $a0, $s0, 0
+; LA32-NEXT: fld.d $fs0, $sp, 8 # 8-byte Folded Reload
+; LA32-NEXT: ld.w $s0, $sp, 20 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $fp, $sp, 24 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $ra, $sp, 28 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 32
+; LA32-NEXT: ret
+;
+; LA64-LABEL: test_fadd_mem:
+; LA64: # %bb.0:
+; LA64-NEXT: addi.d $sp, $sp, -32
+; LA64-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill
+; LA64-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill
+; LA64-NEXT: st.d $s0, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT: fst.d $fs0, $sp, 0 # 8-byte Folded Spill
+; LA64-NEXT: move $fp, $a1
+; LA64-NEXT: move $s0, $a0
+; LA64-NEXT: ld.hu $a0, $a0, 0
+; LA64-NEXT: bl %plt(__gnu_h2f_ieee)
+; LA64-NEXT: fmov.s $fs0, $fa0
+; LA64-NEXT: ld.hu $a0, $fp, 0
+; LA64-NEXT: bl %plt(__gnu_h2f_ieee)
+; LA64-NEXT: fadd.s $fa0, $fs0, $fa0
+; LA64-NEXT: bl %plt(__gnu_f2h_ieee)
+; LA64-NEXT: st.h $a0, $s0, 0
+; LA64-NEXT: fld.d $fs0, $sp, 0 # 8-byte Folded Reload
+; LA64-NEXT: ld.d $s0, $sp, 8 # 8-byte Folded Reload
+; LA64-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload
+; LA64-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 32
+; LA64-NEXT: ret
+ %a = load half, ptr %p
+ %b = load half, ptr %q
+ %r = fadd half %a, %b
+ store half %r, ptr %p
+ ret void
+}
+
+define half @test_fmul_reg(half %a, half %b) nounwind {
+; LA32-LABEL: test_fmul_reg:
+; LA32: # %bb.0:
+; LA32-NEXT: addi.w $sp, $sp, -32
+; LA32-NEXT: st.w $ra, $sp, 28 # 4-byte Folded Spill
+; LA32-NEXT: fst.d $fs0, $sp, 16 # 8-byte Folded Spill
+; LA32-NEXT: fst.d $fs1, $sp, 8 # 8-byte Folded Spill
+; LA32-NEXT: fmov.s $fs0, $fa0
+; LA32-NEXT: fmov.s $fa0, $fa1
+; LA32-NEXT: bl %plt(__gnu_f2h_ieee)
+; LA32-NEXT: bl %plt(__gnu_h2f_ieee)
+; LA32-NEXT: fmov.s $fs1, $fa0
+; LA32-NEXT: fmov.s $fa0, $fs0
+; LA32-NEXT: bl %plt(__gnu_f2h_ieee)
+; LA32-NEXT: bl %plt(__gnu_h2f_ieee)
+; LA32-NEXT: fmul.s $fa0, $fa0, $fs1
+; LA32-NEXT: fld.d $fs1, $sp, 8 # 8-byte Folded Reload
+; LA32-NEXT: fld.d $fs0, $sp, 16 # 8-byte Folded Reload
+; LA32-NEXT: ld.w $ra, $sp, 28 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 32
+; LA32-NEXT: ret
+;
+; LA64-LABEL: test_fmul_reg:
+; LA64: # %bb.0:
+; LA64-NEXT: addi.d $sp, $sp, -32
+; LA64-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill
+; LA64-NEXT: fst.d $fs0, $sp, 16 # 8-byte Folded Spill
+; LA64-NEXT: fst.d $fs1, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT: fmov.s $fs0, $fa0
+; LA64-NEXT: fmov.s $fa0, $fa1
+; LA64-NEXT: bl %plt(__gnu_f2h_ieee)
+; LA64-NEXT: bl %plt(__gnu_h2f_ieee)
+; LA64-NEXT: fmov.s $fs1, $fa0
+; LA64-NEXT: fmov.s $fa0, $fs0
+; LA64-NEXT: bl %plt(__gnu_f2h_ieee)
+; LA64-NEXT: bl %plt(__gnu_h2f_ieee)
+; LA64-NEXT: fmul.s $fa0, $fa0, $fs1
+; LA64-NEXT: fld.d $fs1, $sp, 8 # 8-byte Folded Reload
+; LA64-NEXT: fld.d $fs0, $sp, 16 # 8-byte Folded Reload
+; LA64-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 32
+; LA64-NEXT: ret
+ %r = fmul half %a, %b
+ ret half %r
+}
+
+define void @test_fmul_mem(ptr %p, ptr %q) nounwind {
+; LA32-LABEL: test_fmul_mem:
+; LA32: # %bb.0:
+; LA32-NEXT: addi.w $sp, $sp, -32
+; LA32-NEXT: st.w $ra, $sp, 28 # 4-byte Folded Spill
+; LA32-NEXT: st.w $fp, $sp, 24 # 4-byte Folded Spill
+; LA32-NEXT: st.w $s0, $sp, 20 # 4-byte Folded Spill
+; LA32-NEXT: fst.d $fs0, $sp, 8 # 8-byte Folded Spill
+; LA32-NEXT: move $fp, $a1
+; LA32-NEXT: move $s0, $a0
+; LA32-NEXT: ld.hu $a0, $a0, 0
+; LA32-NEXT: bl %plt(__gnu_h2f_ieee)
+; LA32-NEXT: fmov.s $fs0, $fa0
+; LA32-NEXT: ld.hu $a0, $fp, 0
+; LA32-NEXT: bl %plt(__gnu_h2f_ieee)
+; LA32-NEXT: fmul.s $fa0, $fs0, $fa0
+; LA32-NEXT: bl %plt(__gnu_f2h_ieee)
+; LA32-NEXT: st.h $a0, $s0, 0
+; LA32-NEXT: fld.d $fs0, $sp, 8 # 8-byte Folded Reload
+; LA32-NEXT: ld.w $s0, $sp, 20 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $fp, $sp, 24 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $ra, $sp, 28 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 32
+; LA32-NEXT: ret
+;
+; LA64-LABEL: test_fmul_mem:
+; LA64: # %bb.0:
+; LA64-NEXT: addi.d $sp, $sp, -32
+; LA64-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill
+; LA64-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill
+; LA64-NEXT: st.d $s0, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT: fst.d $fs0, $sp, 0 # 8-byte Folded Spill
+; LA64-NEXT: move $fp, $a1
+; LA64-NEXT: move $s0, $a0
+; LA64-NEXT: ld.hu $a0, $a0, 0
+; LA64-NEXT: bl %plt(__gnu_h2f_ieee)
+; LA64-NEXT: fmov.s $fs0, $fa0
+; LA64-NEXT: ld.hu $a0, $fp, 0
+; LA64-NEXT: bl %plt(__gnu_h2f_ieee)
+; LA64-NEXT: fmul.s $fa0, $fs0, $fa0
+; LA64-NEXT: bl %plt(__gnu_f2h_ieee)
+; LA64-NEXT: st.h $a0, $s0, 0
+; LA64-NEXT: fld.d $fs0, $sp, 0 # 8-byte Folded Reload
+; LA64-NEXT: ld.d $s0, $sp, 8 # 8-byte Folded Reload
+; LA64-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload
+; LA64-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 32
+; LA64-NEXT: ret
+ %a = load half, ptr %p
+ %b = load half, ptr %q
+ %r = fmul half %a, %b
+ store half %r, ptr %p
+ ret void
+}
diff --git a/llvm/test/CodeGen/LoongArch/sextw-removal.ll b/llvm/test/CodeGen/LoongArch/sextw-removal.ll
index 502b6cf..ba889fc 100644
--- a/llvm/test/CodeGen/LoongArch/sextw-removal.ll
+++ b/llvm/test/CodeGen/LoongArch/sextw-removal.ll
@@ -762,7 +762,6 @@ define signext i32 @test14(i32 signext %0, i32 signext %1) {
; CHECK-NEXT: # %bb.1: # %.preheader
; CHECK-NEXT: ori $a3, $zero, 1
; CHECK-NEXT: addi.w $a2, $zero, -1
-; CHECK-NEXT: lu32i.d $a2, 0
; CHECK-NEXT: ori $a4, $zero, 1000
; CHECK-NEXT: .p2align 4, , 16
; CHECK-NEXT: .LBB13_2: # =>This Inner Loop Header: Depth=1
@@ -772,10 +771,9 @@ define signext i32 @test14(i32 signext %0, i32 signext %1) {
; CHECK-NEXT: addi.w $a3, $a3, 1
; CHECK-NEXT: blt $a3, $a1, .LBB13_2
; CHECK-NEXT: .LBB13_4:
-; CHECK-NEXT: addi.w $a0, $a0, 0
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB13_5:
-; CHECK-NEXT: addi.w $a0, $a2, 0
+; CHECK-NEXT: move $a0, $a2
; CHECK-NEXT: ret
;
; NORMV-LABEL: test14:
@@ -785,7 +783,6 @@ define signext i32 @test14(i32 signext %0, i32 signext %1) {
; NORMV-NEXT: # %bb.1: # %.preheader
; NORMV-NEXT: ori $a3, $zero, 1
; NORMV-NEXT: addi.w $a2, $zero, -1
-; NORMV-NEXT: lu32i.d $a2, 0
; NORMV-NEXT: ori $a4, $zero, 1000
; NORMV-NEXT: .p2align 4, , 16
; NORMV-NEXT: .LBB13_2: # =>This Inner Loop Header: Depth=1
@@ -795,13 +792,12 @@ define signext i32 @test14(i32 signext %0, i32 signext %1) {
; NORMV-NEXT: add.d $a0, $a3, $a0
; NORMV-NEXT: addi.d $a3, $a3, 1
; NORMV-NEXT: addi.w $a3, $a3, 0
-; NORMV-NEXT: addi.d $a0, $a0, 0
+; NORMV-NEXT: addi.w $a0, $a0, 0
; NORMV-NEXT: blt $a3, $a1, .LBB13_2
; NORMV-NEXT: .LBB13_4:
-; NORMV-NEXT: addi.w $a0, $a0, 0
; NORMV-NEXT: ret
; NORMV-NEXT: .LBB13_5:
-; NORMV-NEXT: addi.w $a0, $a2, 0
+; NORMV-NEXT: move $a0, $a2
; NORMV-NEXT: ret
%3 = icmp sgt i32 %1, 1
br i1 %3, label %4, label %12
@@ -830,8 +826,7 @@ define signext i32 @test14b(i32 %0, i32 signext %1) {
; CHECK-NEXT: blt $a1, $a2, .LBB14_4
; CHECK-NEXT: # %bb.1: # %.preheader
; CHECK-NEXT: ori $a3, $zero, 1
-; CHECK-NEXT: addi.w $a2, $zero, -1
-; CHECK-NEXT: lu32i.d $a2, 0
+; CHECK-NEXT: addi.d $a2, $zero, -1
; CHECK-NEXT: ori $a4, $zero, 1000
; CHECK-NEXT: .p2align 4, , 16
; CHECK-NEXT: .LBB14_2: # =>This Inner Loop Header: Depth=1
@@ -854,8 +849,7 @@ define signext i32 @test14b(i32 %0, i32 signext %1) {
; NORMV-NEXT: blt $a1, $a2, .LBB14_4
; NORMV-NEXT: # %bb.1: # %.preheader
; NORMV-NEXT: ori $a3, $zero, 1
-; NORMV-NEXT: addi.w $a2, $zero, -1
-; NORMV-NEXT: lu32i.d $a2, 0
+; NORMV-NEXT: addi.d $a2, $zero, -1
; NORMV-NEXT: ori $a4, $zero, 1000
; NORMV-NEXT: .p2align 4, , 16
; NORMV-NEXT: .LBB14_2: # =>This Inner Loop Header: Depth=1
@@ -900,8 +894,7 @@ define signext i32 @test14c(i32 zeroext %0, i32 signext %1) {
; CHECK-NEXT: blt $a1, $a2, .LBB15_4
; CHECK-NEXT: # %bb.1: # %.preheader
; CHECK-NEXT: ori $a3, $zero, 1
-; CHECK-NEXT: addi.w $a2, $zero, -1
-; CHECK-NEXT: lu32i.d $a2, 0
+; CHECK-NEXT: addi.d $a2, $zero, -1
; CHECK-NEXT: ori $a4, $zero, 1000
; CHECK-NEXT: .p2align 4, , 16
; CHECK-NEXT: .LBB15_2: # =>This Inner Loop Header: Depth=1
@@ -924,8 +917,7 @@ define signext i32 @test14c(i32 zeroext %0, i32 signext %1) {
; NORMV-NEXT: blt $a1, $a2, .LBB15_4
; NORMV-NEXT: # %bb.1: # %.preheader
; NORMV-NEXT: ori $a3, $zero, 1
-; NORMV-NEXT: addi.w $a2, $zero, -1
-; NORMV-NEXT: lu32i.d $a2, 0
+; NORMV-NEXT: addi.d $a2, $zero, -1
; NORMV-NEXT: ori $a4, $zero, 1000
; NORMV-NEXT: .p2align 4, , 16
; NORMV-NEXT: .LBB15_2: # =>This Inner Loop Header: Depth=1
@@ -971,7 +963,6 @@ define signext i32 @test14d(i31 zeroext %0, i32 signext %1) {
; CHECK-NEXT: # %bb.1: # %.preheader
; CHECK-NEXT: ori $a3, $zero, 1
; CHECK-NEXT: addi.w $a2, $zero, -1
-; CHECK-NEXT: lu32i.d $a2, 0
; CHECK-NEXT: ori $a4, $zero, 1000
; CHECK-NEXT: .p2align 4, , 16
; CHECK-NEXT: .LBB16_2: # =>This Inner Loop Header: Depth=1
@@ -981,10 +972,9 @@ define signext i32 @test14d(i31 zeroext %0, i32 signext %1) {
; CHECK-NEXT: addi.w $a3, $a3, 1
; CHECK-NEXT: blt $a3, $a1, .LBB16_2
; CHECK-NEXT: .LBB16_4:
-; CHECK-NEXT: addi.w $a0, $a0, 0
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB16_5:
-; CHECK-NEXT: addi.w $a0, $a2, 0
+; CHECK-NEXT: move $a0, $a2
; CHECK-NEXT: ret
;
; NORMV-LABEL: test14d:
@@ -994,7 +984,6 @@ define signext i32 @test14d(i31 zeroext %0, i32 signext %1) {
; NORMV-NEXT: # %bb.1: # %.preheader
; NORMV-NEXT: ori $a3, $zero, 1
; NORMV-NEXT: addi.w $a2, $zero, -1
-; NORMV-NEXT: lu32i.d $a2, 0
; NORMV-NEXT: ori $a4, $zero, 1000
; NORMV-NEXT: .p2align 4, , 16
; NORMV-NEXT: .LBB16_2: # =>This Inner Loop Header: Depth=1
@@ -1004,13 +993,12 @@ define signext i32 @test14d(i31 zeroext %0, i32 signext %1) {
; NORMV-NEXT: add.d $a0, $a3, $a0
; NORMV-NEXT: addi.d $a3, $a3, 1
; NORMV-NEXT: addi.w $a3, $a3, 0
-; NORMV-NEXT: addi.d $a0, $a0, 0
+; NORMV-NEXT: addi.w $a0, $a0, 0
; NORMV-NEXT: blt $a3, $a1, .LBB16_2
; NORMV-NEXT: .LBB16_4:
-; NORMV-NEXT: addi.w $a0, $a0, 0
; NORMV-NEXT: ret
; NORMV-NEXT: .LBB16_5:
-; NORMV-NEXT: addi.w $a0, $a2, 0
+; NORMV-NEXT: move $a0, $a2
; NORMV-NEXT: ret
%zext = zext i31 %0 to i32
%3 = icmp sgt i32 %1, 1
diff --git a/llvm/test/CodeGen/NVPTX/intr-range.ll b/llvm/test/CodeGen/NVPTX/intr-range.ll
new file mode 100644
index 0000000..2f3e08a
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/intr-range.ll
@@ -0,0 +1,88 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --version 5
+; RUN: opt < %s -S -mtriple=nvptx-nvidia-cuda -mcpu=sm_20 -passes=nvvm-intr-range | FileCheck %s
+
+define i32 @test_maxntid() {
+; CHECK-LABEL: define i32 @test_maxntid(
+; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[TMP1:%.*]] = call range(i32 0, 96) i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+; CHECK-NEXT: [[TMP3:%.*]] = call range(i32 0, 96) i32 @llvm.nvvm.read.ptx.sreg.tid.y()
+; CHECK-NEXT: [[TMP2:%.*]] = call range(i32 0, 64) i32 @llvm.nvvm.read.ptx.sreg.tid.z()
+; CHECK-NEXT: [[TMP11:%.*]] = call range(i32 1, 97) i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
+; CHECK-NEXT: [[TMP4:%.*]] = call range(i32 1, 97) i32 @llvm.nvvm.read.ptx.sreg.ntid.y()
+; CHECK-NEXT: [[TMP6:%.*]] = call range(i32 1, 65) i32 @llvm.nvvm.read.ptx.sreg.ntid.z()
+; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[TMP1]], [[TMP3]]
+; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TMP7]], [[TMP2]]
+; CHECK-NEXT: [[TMP9:%.*]] = add i32 [[TMP8]], [[TMP11]]
+; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[TMP9]], [[TMP4]]
+; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[TMP10]], [[TMP6]]
+; CHECK-NEXT: ret i32 [[TMP5]]
+;
+ %1 = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+ %2 = call i32 @llvm.nvvm.read.ptx.sreg.tid.y()
+ %3 = call i32 @llvm.nvvm.read.ptx.sreg.tid.z()
+ %4 = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
+ %5 = call i32 @llvm.nvvm.read.ptx.sreg.ntid.y()
+ %6 = call i32 @llvm.nvvm.read.ptx.sreg.ntid.z()
+ %7 = add i32 %1, %2
+ %8 = add i32 %7, %3
+ %9 = add i32 %8, %4
+ %10 = add i32 %9, %5
+ %11 = add i32 %10, %6
+ ret i32 %11
+}
+
+define i32 @test_reqntid() {
+; CHECK-LABEL: define i32 @test_reqntid(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = call range(i32 0, 20) i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+; CHECK-NEXT: [[TMP5:%.*]] = call range(i32 0, 20) i32 @llvm.nvvm.read.ptx.sreg.tid.y()
+; CHECK-NEXT: [[TMP2:%.*]] = call range(i32 0, 20) i32 @llvm.nvvm.read.ptx.sreg.tid.z()
+; CHECK-NEXT: [[TMP4:%.*]] = call range(i32 1, 21) i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
+; CHECK-NEXT: [[TMP3:%.*]] = call range(i32 1, 21) i32 @llvm.nvvm.read.ptx.sreg.ntid.y()
+; CHECK-NEXT: [[TMP6:%.*]] = call range(i32 1, 21) i32 @llvm.nvvm.read.ptx.sreg.ntid.z()
+; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[TMP1]], [[TMP5]]
+; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TMP7]], [[TMP2]]
+; CHECK-NEXT: [[TMP9:%.*]] = add i32 [[TMP8]], [[TMP4]]
+; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[TMP9]], [[TMP3]]
+; CHECK-NEXT: [[TMP11:%.*]] = add i32 [[TMP10]], [[TMP6]]
+; CHECK-NEXT: ret i32 [[TMP3]]
+;
+ %1 = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+ %2 = call i32 @llvm.nvvm.read.ptx.sreg.tid.y()
+ %3 = call i32 @llvm.nvvm.read.ptx.sreg.tid.z()
+ %4 = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
+ %5 = call i32 @llvm.nvvm.read.ptx.sreg.ntid.y()
+ %6 = call i32 @llvm.nvvm.read.ptx.sreg.ntid.z()
+ %7 = add i32 %1, %2
+ %8 = add i32 %7, %3
+ %9 = add i32 %8, %4
+ %10 = add i32 %9, %5
+ %11 = add i32 %10, %6
+ ret i32 %5
+}
+
+;; A case like this could occur if a function with the sreg intrinsic was
+;; inlined into a kernel where the tid metadata is present, ensure the range is
+;; updated.
+define i32 @test_inlined() {
+; CHECK-LABEL: define i32 @test_inlined(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = call range(i32 0, 4) i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+; CHECK-NEXT: ret i32 [[TMP1]]
+;
+ %1 = call range(i32 0, 1024) i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+ ret i32 %1
+}
+
+declare i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+declare i32 @llvm.nvvm.read.ptx.sreg.tid.y()
+declare i32 @llvm.nvvm.read.ptx.sreg.tid.z()
+
+declare i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
+declare i32 @llvm.nvvm.read.ptx.sreg.ntid.y()
+declare i32 @llvm.nvvm.read.ptx.sreg.ntid.z()
+
+!nvvm.annotations = !{!0, !1, !2}
+!0 = !{ptr @test_maxntid, !"kernel", i32 1, !"maxntidx", i32 32, !"maxntidz", i32 3}
+!1 = !{ptr @test_reqntid, !"kernel", i32 1, !"reqntidx", i32 20}
+!2 = !{ptr @test_inlined, !"kernel", i32 1, !"maxntidx", i32 4}
diff --git a/llvm/test/CodeGen/NVPTX/intrinsic-old.ll b/llvm/test/CodeGen/NVPTX/intrinsic-old.ll
index 3930e6d7..85f7817 100644
--- a/llvm/test/CodeGen/NVPTX/intrinsic-old.ll
+++ b/llvm/test/CodeGen/NVPTX/intrinsic-old.ll
@@ -1,21 +1,13 @@
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck -allow-deprecated-dag-overlap %s
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck -allow-deprecated-dag-overlap %s
; RUN: opt < %s -S -mtriple=nvptx-nvidia-cuda -passes=nvvm-intr-range \
-; RUN: | FileCheck -allow-deprecated-dag-overlap --check-prefix=RANGE --check-prefix=RANGE_20 %s
-; RUN: opt < %s -S -mtriple=nvptx-nvidia-cuda -passes=nvvm-intr-range \
-; RUN: | FileCheck -allow-deprecated-dag-overlap --check-prefix=RANGE --check-prefix=RANGE_20 %s
-; RUN: opt < %s -S -mtriple=nvptx-nvidia-cuda \
-; RUN: -passes=nvvm-intr-range -nvvm-intr-range-sm=30 \
-; RUN: | FileCheck -allow-deprecated-dag-overlap --check-prefix=RANGE --check-prefix=RANGE_30 %s
-; RUN: opt < %s -S -mtriple=nvptx-nvidia-cuda \
-; RUN: -passes=nvvm-intr-range -nvvm-intr-range-sm=30 \
-; RUN: | FileCheck -allow-deprecated-dag-overlap --check-prefix=RANGE --check-prefix=RANGE_30 %s
+; RUN: | FileCheck -allow-deprecated-dag-overlap --check-prefix=RANGE %s
; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %}
; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
define ptx_device i32 @test_tid_x() {
; CHECK: mov.u32 %r{{[0-9]+}}, %tid.x;
-; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range ![[BLK_IDX_XY:[0-9]+]]
+; RANGE: call range(i32 0, 1024) i32 @llvm.nvvm.read.ptx.sreg.tid.x()
; CHECK: ret;
%x = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
ret i32 %x
@@ -23,7 +15,7 @@ define ptx_device i32 @test_tid_x() {
define ptx_device i32 @test_tid_y() {
; CHECK: mov.u32 %r{{[0-9]+}}, %tid.y;
-; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.tid.y(), !range ![[BLK_IDX_XY]]
+; RANGE: call range(i32 0, 1024) i32 @llvm.nvvm.read.ptx.sreg.tid.y()
; CHECK: ret;
%x = call i32 @llvm.nvvm.read.ptx.sreg.tid.y()
ret i32 %x
@@ -31,7 +23,7 @@ define ptx_device i32 @test_tid_y() {
define ptx_device i32 @test_tid_z() {
; CHECK: mov.u32 %r{{[0-9]+}}, %tid.z;
-; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.tid.z(), !range ![[BLK_IDX_Z:[0-9]+]]
+; RANGE: call range(i32 0, 64) i32 @llvm.nvvm.read.ptx.sreg.tid.z()
; CHECK: ret;
%x = call i32 @llvm.nvvm.read.ptx.sreg.tid.z()
ret i32 %x
@@ -46,7 +38,7 @@ define ptx_device i32 @test_tid_w() {
define ptx_device i32 @test_ntid_x() {
; CHECK: mov.u32 %r{{[0-9]+}}, %ntid.x;
-; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.ntid.x(), !range ![[BLK_SIZE_XY:[0-9]+]]
+; RANGE: call range(i32 1, 1025) i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
; CHECK: ret;
%x = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
ret i32 %x
@@ -54,7 +46,7 @@ define ptx_device i32 @test_ntid_x() {
define ptx_device i32 @test_ntid_y() {
; CHECK: mov.u32 %r{{[0-9]+}}, %ntid.y;
-; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.ntid.y(), !range ![[BLK_SIZE_XY]]
+; RANGE: call range(i32 1, 1025) i32 @llvm.nvvm.read.ptx.sreg.ntid.y()
; CHECK: ret;
%x = call i32 @llvm.nvvm.read.ptx.sreg.ntid.y()
ret i32 %x
@@ -62,7 +54,7 @@ define ptx_device i32 @test_ntid_y() {
define ptx_device i32 @test_ntid_z() {
; CHECK: mov.u32 %r{{[0-9]+}}, %ntid.z;
-; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.ntid.z(), !range ![[BLK_SIZE_Z:[0-9]+]]
+; RANGE: call range(i32 1, 65) i32 @llvm.nvvm.read.ptx.sreg.ntid.z()
; CHECK: ret;
%x = call i32 @llvm.nvvm.read.ptx.sreg.ntid.z()
ret i32 %x
@@ -77,7 +69,7 @@ define ptx_device i32 @test_ntid_w() {
define ptx_device i32 @test_laneid() {
; CHECK: mov.u32 %r{{[0-9]+}}, %laneid;
-; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.laneid(), !range ![[LANEID:[0-9]+]]
+; RANGE: call range(i32 0, 32) i32 @llvm.nvvm.read.ptx.sreg.laneid()
; CHECK: ret;
%x = call i32 @llvm.nvvm.read.ptx.sreg.laneid()
ret i32 %x
@@ -85,7 +77,7 @@ define ptx_device i32 @test_laneid() {
define ptx_device i32 @test_warpsize() {
; CHECK: mov.u32 %r{{[0-9]+}}, WARP_SZ;
-; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.warpsize(), !range ![[WARPSIZE:[0-9]+]]
+; RANGE: call range(i32 32, 33) i32 @llvm.nvvm.read.ptx.sreg.warpsize()
; CHECK: ret;
%x = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
ret i32 %x
@@ -107,7 +99,7 @@ define ptx_device i32 @test_nwarpid() {
define ptx_device i32 @test_ctaid_y() {
; CHECK: mov.u32 %r{{[0-9]+}}, %ctaid.y;
-; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.y(), !range ![[GRID_IDX_YZ:[0-9]+]]
+; RANGE: call range(i32 0, 65535) i32 @llvm.nvvm.read.ptx.sreg.ctaid.y()
; CHECK: ret;
%x = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.y()
ret i32 %x
@@ -115,7 +107,7 @@ define ptx_device i32 @test_ctaid_y() {
define ptx_device i32 @test_ctaid_z() {
; CHECK: mov.u32 %r{{[0-9]+}}, %ctaid.z;
-; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.z(), !range ![[GRID_IDX_YZ]]
+; RANGE: call range(i32 0, 65535) i32 @llvm.nvvm.read.ptx.sreg.ctaid.z()
; CHECK: ret;
%x = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.z()
ret i32 %x
@@ -123,8 +115,7 @@ define ptx_device i32 @test_ctaid_z() {
define ptx_device i32 @test_ctaid_x() {
; CHECK: mov.u32 %r{{[0-9]+}}, %ctaid.x;
-; RANGE_30: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range ![[GRID_IDX_X:[0-9]+]]
-; RANGE_20: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range ![[GRID_IDX_YZ]]
+; RANGE: call range(i32 0, 2147483647) i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
; CHECK: ret;
%x = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
ret i32 %x
@@ -139,7 +130,7 @@ define ptx_device i32 @test_ctaid_w() {
define ptx_device i32 @test_nctaid_y() {
; CHECK: mov.u32 %r{{[0-9]+}}, %nctaid.y;
-; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.y(), !range ![[GRID_SIZE_YZ:[0-9]+]]
+; RANGE: call range(i32 1, 65536) i32 @llvm.nvvm.read.ptx.sreg.nctaid.y()
; CHECK: ret;
%x = call i32 @llvm.nvvm.read.ptx.sreg.nctaid.y()
ret i32 %x
@@ -147,7 +138,7 @@ define ptx_device i32 @test_nctaid_y() {
define ptx_device i32 @test_nctaid_z() {
; CHECK: mov.u32 %r{{[0-9]+}}, %nctaid.z;
-; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.z(), !range ![[GRID_SIZE_YZ]]
+; RANGE: call range(i32 1, 65536) i32 @llvm.nvvm.read.ptx.sreg.nctaid.z()
; CHECK: ret;
%x = call i32 @llvm.nvvm.read.ptx.sreg.nctaid.z()
ret i32 %x
@@ -155,8 +146,7 @@ define ptx_device i32 @test_nctaid_z() {
define ptx_device i32 @test_nctaid_x() {
; CHECK: mov.u32 %r{{[0-9]+}}, %nctaid.x;
-; RANGE_30: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.x(), !range ![[GRID_SIZE_X:[0-9]+]]
-; RANGE_20: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.x(), !range ![[GRID_SIZE_YZ]]
+; RANGE: call range(i32 1, -2147483648) i32 @llvm.nvvm.read.ptx.sreg.nctaid.x()
; CHECK: ret;
%x = call i32 @llvm.nvvm.read.ptx.sreg.nctaid.x()
ret i32 %x
@@ -327,14 +317,3 @@ declare void @llvm.nvvm.bar.sync(i32 %i)
!0 = !{i32 0, i32 19}
; RANGE-DAG: ![[ALREADY]] = !{i32 0, i32 19}
-; RANGE-DAG: ![[BLK_IDX_XY]] = !{i32 0, i32 1024}
-; RANGE-DAG: ![[BLK_IDX_XY]] = !{i32 0, i32 1024}
-; RANGE-DAG: ![[BLK_IDX_Z]] = !{i32 0, i32 64}
-; RANGE-DAG: ![[BLK_SIZE_XY]] = !{i32 1, i32 1025}
-; RANGE-DAG: ![[BLK_SIZE_Z]] = !{i32 1, i32 65}
-; RANGE-DAG: ![[LANEID]] = !{i32 0, i32 32}
-; RANGE-DAG: ![[WARPSIZE]] = !{i32 32, i32 33}
-; RANGE_30-DAG: ![[GRID_IDX_X]] = !{i32 0, i32 2147483647}
-; RANGE-DAG: ![[GRID_IDX_YZ]] = !{i32 0, i32 65535}
-; RANGE_30-DAG: ![[GRID_SIZE_X]] = !{i32 1, i32 -2147483648}
-; RANGE-DAG: ![[GRID_SIZE_YZ]] = !{i32 1, i32 65536}
diff --git a/llvm/test/CodeGen/PowerPC/toc-data-common.ll b/llvm/test/CodeGen/PowerPC/toc-data-common.ll
index 7747f2e..3b7ca44 100644
--- a/llvm/test/CodeGen/PowerPC/toc-data-common.ll
+++ b/llvm/test/CodeGen/PowerPC/toc-data-common.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple powerpc-ibm-aix-xcoff -verify-machineinstrs < %s | FileCheck %s -DINSTR=lwz --check-prefix=CHECK
-; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -verify-machineinstrs < %s | FileCheck %s -DINSTR=ld --check-prefix=CHECK
+; RUN: llc -mtriple powerpc-ibm-aix-xcoff -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK
+; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK-64
; RUN: llc -filetype=obj -mtriple powerpc-ibm-aix-xcoff -verify-machineinstrs < %s -o %t32.o
; RUN: llvm-objdump -t --symbol-description %t32.o | FileCheck %s --check-prefix=OBJ32
@@ -15,16 +15,28 @@
define void @set(i32 noundef %_a) {
; CHECK-LABEL: set:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: la 4, a2[TD](2)
-; CHECK-NEXT: la 5, a1[TD](2)
-; CHECK-NEXT: stw 3, 0(4)
-; CHECK-NEXT: [[INSTR]] 4, L..C0(2) # @a4
-; CHECK-NEXT: stw 3, 0(5)
-; CHECK-NEXT: [[INSTR]] 5, L..C1(2) # @a3
-; CHECK-NEXT: stw 3, 0(4)
-; CHECK-NEXT: stw 3, 0(5)
-; CHECK-NEXT: blr
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: la 4, a2[TD](2)
+; CHECK-NEXT: lwz 5, L..C0(2) # @a4
+; CHECK-NEXT: stw 3, 0(4)
+; CHECK-NEXT: la 4, a1[TD](2)
+; CHECK-NEXT: stw 3, 0(4)
+; CHECK-NEXT: lwz 4, L..C1(2) # @a3
+; CHECK-NEXT: stw 3, 0(5)
+; CHECK-NEXT: stw 3, 0(4)
+; CHECK-NEXT: blr
+;
+; CHECK-64-LABEL: set:
+; CHECK-64: # %bb.0: # %entry
+; CHECK-64-NEXT: la 4, a2[TD](2)
+; CHECK-64-NEXT: ld 5, L..C0(2) # @a4
+; CHECK-64-NEXT: stw 3, 0(4)
+; CHECK-64-NEXT: la 4, a1[TD](2)
+; CHECK-64-NEXT: stw 3, 0(4)
+; CHECK-64-NEXT: ld 4, L..C1(2) # @a3
+; CHECK-64-NEXT: stw 3, 0(5)
+; CHECK-64-NEXT: stw 3, 0(4)
+; CHECK-64-NEXT: blr
entry:
store i32 %_a, ptr @a2, align 4
store i32 %_a, ptr @a1, align 4
@@ -35,10 +47,16 @@ ret void
define i32 @get1() {
; CHECK-LABEL: get1:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: la 3, a2[TD](2)
-; CHECK-NEXT: lwz 3, 0(3)
-; CHECK-NEXT: blr
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: la 3, a2[TD](2)
+; CHECK-NEXT: lwz 3, 0(3)
+; CHECK-NEXT: blr
+;
+; CHECK-64-LABEL: get1:
+; CHECK-64: # %bb.0: # %entry
+; CHECK-64-NEXT: la 3, a2[TD](2)
+; CHECK-64-NEXT: lwz 3, 0(3)
+; CHECK-64-NEXT: blr
entry:
%0 = load i32, ptr @a2, align 4
ret i32 %0
@@ -46,10 +64,16 @@ ret i32 %0
define i32 @get2() {
; CHECK-LABEL: get2:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: la 3, a1[TD](2)
-; CHECK-NEXT: lwz 3, 0(3)
-; CHECK-NEXT: blr
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: la 3, a1[TD](2)
+; CHECK-NEXT: lwz 3, 0(3)
+; CHECK-NEXT: blr
+;
+; CHECK-64-LABEL: get2:
+; CHECK-64: # %bb.0: # %entry
+; CHECK-64-NEXT: la 3, a1[TD](2)
+; CHECK-64-NEXT: lwz 3, 0(3)
+; CHECK-64-NEXT: blr
entry:
%0 = load i32, ptr @a1, align 4
ret i32 %0
@@ -57,10 +81,16 @@ ret i32 %0
define i32 @get3() {
; CHECK-LABEL: get3:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: [[INSTR]] 3, L..C0(2) # @a4
-; CHECK-NEXT: lwz 3, 0(3)
-; CHECK-NEXT: blr
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: lwz 3, L..C0(2) # @a4
+; CHECK-NEXT: lwz 3, 0(3)
+; CHECK-NEXT: blr
+;
+; CHECK-64-LABEL: get3:
+; CHECK-64: # %bb.0: # %entry
+; CHECK-64-NEXT: ld 3, L..C0(2) # @a4
+; CHECK-64-NEXT: lwz 3, 0(3)
+; CHECK-64-NEXT: blr
entry:
%0 = load i32, ptr @a4, align 4
ret i32 %0
@@ -68,10 +98,16 @@ ret i32 %0
define i32 @get4() {
; CHECK-LABEL: get4:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: [[INSTR]] 3, L..C1(2) # @a3
-; CHECK-NEXT: lwz 3, 0(3)
-; CHECK-NEXT: blr
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: lwz 3, L..C1(2) # @a3
+; CHECK-NEXT: lwz 3, 0(3)
+; CHECK-NEXT: blr
+;
+; CHECK-64-LABEL: get4:
+; CHECK-64: # %bb.0: # %entry
+; CHECK-64-NEXT: ld 3, L..C1(2) # @a3
+; CHECK-64-NEXT: lwz 3, 0(3)
+; CHECK-64-NEXT: blr
entry:
%0 = load i32, ptr @a3, align 4
ret i32 %0
@@ -79,36 +115,56 @@ ret i32 %0
define nonnull ptr @escape1() {
; CHECK-LABEL: escape1:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: la 3, a2[TD](2)
-; CHECK-NEXT: blr
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: la 3, a2[TD](2)
+; CHECK-NEXT: blr
+;
+; CHECK-64-LABEL: escape1:
+; CHECK-64: # %bb.0: # %entry
+; CHECK-64-NEXT: la 3, a2[TD](2)
+; CHECK-64-NEXT: blr
entry:
ret ptr @a2
}
define nonnull ptr @escape2() {
; CHECK-LABEL: escape2:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: la 3, a1[TD](2)
-; CHECK-NEXT: blr
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: la 3, a1[TD](2)
+; CHECK-NEXT: blr
+;
+; CHECK-64-LABEL: escape2:
+; CHECK-64: # %bb.0: # %entry
+; CHECK-64-NEXT: la 3, a1[TD](2)
+; CHECK-64-NEXT: blr
entry:
ret ptr @a1
}
define nonnull ptr @escape3() {
; CHECK-LABEL: escape3:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: [[INSTR]] 3, L..C0(2) # @a4
-; CHECK-NEXT: blr
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: lwz 3, L..C0(2) # @a4
+; CHECK-NEXT: blr
+;
+; CHECK-64-LABEL: escape3:
+; CHECK-64: # %bb.0: # %entry
+; CHECK-64-NEXT: ld 3, L..C0(2) # @a4
+; CHECK-64-NEXT: blr
entry:
ret ptr @a4
}
define nonnull ptr @escape4() {
; CHECK-LABEL: escape4:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: [[INSTR]] 3, L..C1(2) # @a3
-; CHECK-NEXT: blr
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: lwz 3, L..C1(2) # @a3
+; CHECK-NEXT: blr
+;
+; CHECK-64-LABEL: escape4:
+; CHECK-64: # %bb.0: # %entry
+; CHECK-64-NEXT: ld 3, L..C1(2) # @a3
+; CHECK-64-NEXT: blr
entry:
ret ptr @a3
}
diff --git a/llvm/test/CodeGen/PowerPC/toc-data.ll b/llvm/test/CodeGen/PowerPC/toc-data.ll
index 1228665..ee1dde1 100644
--- a/llvm/test/CodeGen/PowerPC/toc-data.ll
+++ b/llvm/test/CodeGen/PowerPC/toc-data.ll
@@ -36,7 +36,7 @@ define dso_local void @write_int(i32 signext %in) {
ret void
}
; CHECK32: name: write_int
-; CHECK32: %[[SCRATCH:[0-9]+]]:gprc_and_gprc_nor0 = ADDItoc @i, $r2
+; CHECK32: %[[SCRATCH:[0-9]+]]:gprc_and_gprc_nor0 = ADDItoc $r2, @i
; CHECK32-NEXT: STW %{{[0-9]+}}, 0, killed %[[SCRATCH]] :: (store (s32) into @i)
; TEST32: .write_int:
@@ -44,12 +44,12 @@ define dso_local void @write_int(i32 signext %in) {
; TEST32-NEXT: stw 3, 0(4)
; CHECK64: name: write_int
-; CHECK64: %[[SCRATCH:[0-9]+]]:g8rc_and_g8rc_nox0 = ADDItoc8 @i, $x2
+; CHECK64: %[[SCRATCH:[0-9]+]]:g8rc_and_g8rc_nox0 = ADDItoc8 $x2, @i
; CHECK64-NEXT: STW8 %{{[0-9]+}}, 0, killed %[[SCRATCH]] :: (store (s32) into @i)
; CHECK64-NOOPT: name: write_int
; CHECK64-NOOPT: %[[SUBREG:[0-9]+]]:gprc = COPY %{{[0-9]}}.sub_32
-; CHECK64-NOOPT: %[[ADDR:[0-9]+]]:g8rc_and_g8rc_nox0 = ADDItoc8 @i, $x2
+; CHECK64-NOOPT: %[[ADDR:[0-9]+]]:g8rc_and_g8rc_nox0 = ADDItoc8 $x2, @i
; CHECK64-NOOPT: STW %[[SUBREG]], 0, %[[ADDR]]
; TEST64: .write_int:
@@ -128,7 +128,7 @@ define dso_local float @read_float() {
ret float %0
}
; CHECK32: name: read_float
-; CHECK32: %[[SCRATCH:[0-9]+]]:gprc_and_gprc_nor0 = ADDItoc @f, $r2
+; CHECK32: %[[SCRATCH:[0-9]+]]:gprc_and_gprc_nor0 = ADDItoc $r2, @f
; CHECK32: %{{[0-9]+}}:f4rc = LFS 0, killed %[[SCRATCH]] :: (dereferenceable load (s32) from @f)
; TEST32: .read_float:
@@ -136,11 +136,11 @@ define dso_local float @read_float() {
; TEST32-NEXT: lfs 1, 0(3)
; CHECK64: name: read_float
-; CHECK64: %[[SCRATCH:[0-9]+]]:g8rc_and_g8rc_nox0 = ADDItoc8 @f, $x2
+; CHECK64: %[[SCRATCH:[0-9]+]]:g8rc_and_g8rc_nox0 = ADDItoc8 $x2, @f
; CHECK64: %{{[0-9]+}}:f4rc = LFS 0, killed %[[SCRATCH]] :: (dereferenceable load (s32) from @f)
; CHECK64-NOOPT: name: read_float
-; CHECK64-NOOPT: %[[SCRATCH:[0-9]+]]:g8rc_and_g8rc_nox0 = ADDItoc8 @f, $x2
+; CHECK64-NOOPT: %[[SCRATCH:[0-9]+]]:g8rc_and_g8rc_nox0 = ADDItoc8 $x2, @f
; CHECK64-NOOPT: %{{[0-9]+}}:f4rc = LFS 0, %[[SCRATCH]]
; TEST64: .read_float:
@@ -217,18 +217,18 @@ define dso_local nonnull ptr @addr() {
ret ptr @i
}
; CHECK32: name: addr
-; CHECK32: %[[SCRATCH:[0-9]+]]:gprc = ADDItoc @i, $r2
+; CHECK32: %[[SCRATCH:[0-9]+]]:gprc = ADDItoc $r2, @i
; CHECK32-NEXT: $r3 = COPY %[[SCRATCH]]
; TEST32: .addr
; TEST32: la 3, i[TD](2)
; CHECK64: name: addr
-; CHECK64: %[[SCRATCH:[0-9]+]]:g8rc = ADDItoc8 @i, $x2
+; CHECK64: %[[SCRATCH:[0-9]+]]:g8rc = ADDItoc8 $x2, @i
; CHECK64-NEXT: $x3 = COPY %[[SCRATCH]]
; CHECK64-NOOPT: name: addr
-; CHECK64-NOOPT: %[[SCRATCH:[0-9]+]]:g8rc_and_g8rc_nox0 = ADDItoc8 @i, $x2
+; CHECK64-NOOPT: %[[SCRATCH:[0-9]+]]:g8rc_and_g8rc_nox0 = ADDItoc8 $x2, @i
; CHECK64-NOOPT: $x3 = COPY %[[SCRATCH]]
; TEST64: .addr
diff --git a/llvm/test/CodeGen/PowerPC/tocdata-firm-alignment.ll b/llvm/test/CodeGen/PowerPC/tocdata-firm-alignment.ll
new file mode 100644
index 0000000..c982713
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/tocdata-firm-alignment.ll
@@ -0,0 +1,24 @@
+; RUN: opt -S -passes='default<O3>' < %s | FileCheck %s
+
+target datalayout = "E-m:a-p:32:32-Fi32-i64:64-n32"
+target triple = "powerpc-ibm-aix7.2.0.0"
+
+%struct.widget = type { i8, i8, i8 }
+
+; CHECK: @global = {{.*}}constant %struct.widget { i8 4, i8 0, i8 0 }, align 8 #0
+@global = constant %struct.widget { i8 4, i8 0, i8 0 }, align 4 #0
+
+define void @baz() #1 {
+bb:
+ call void @snork(ptr @global)
+ ret void
+}
+
+define void @snork(ptr byval(%struct.widget) %arg) #1 {
+bb:
+ %load = load volatile ptr, ptr null, align 4
+ ret void
+}
+
+attributes #0 = { "toc-data" }
+attributes #1 = { "target-cpu"="pwr7" "target-features"="+altivec,+bpermd,+extdiv,+isa-v206-instructions,+vsx,-aix-shared-lib-tls-model-opt,-aix-small-local-dynamic-tls,-aix-small-local-exec-tls,-crbits,-crypto,-direct-move,-htm,-isa-v207-instructions,-isa-v30-instructions,-power8-vector,-power9-vector,-privileged,-quadword-atomics,-rop-protect,-spe" }
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/merge-unmerge-rv32.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/merge-unmerge-rv32.mir
index 2e4a39c..46a7df4 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/merge-unmerge-rv32.mir
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/merge-unmerge-rv32.mir
@@ -68,12 +68,14 @@ body: |
; RV32: liveins: $x10
; RV32-NEXT: {{ $}}
; RV32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10
- ; RV32-NEXT: $x10 = COPY [[COPY]](s32)
+ ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; RV32-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
+ ; RV32-NEXT: $x10 = COPY [[AND]](s32)
; RV32-NEXT: PseudoRET implicit $x10
%0:_(s32) = COPY $x10
- %1:_(s64) = G_ZEXT %0(s32)
- %2:_(s32), %3:_(s32) = G_UNMERGE_VALUES %1(s64)
- $x10 = COPY %2(s32)
+ %2:_(s16), %3:_(s16) = G_UNMERGE_VALUES %0(s32)
+ %4:_(s32) = G_ZEXT %2(s16)
+ $x10 = COPY %4(s32)
PseudoRET implicit $x10
...
---
diff --git a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector-csr.ll b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector-csr.ll
index 8210ea2..ac74a82 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector-csr.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector-csr.ll
@@ -22,7 +22,7 @@ define <vscale x 1 x double> @foo(<vscale x 1 x double> %a, <vscale x 1 x double
; SPILL-O0-NEXT: addi a1, a1, 16
; SPILL-O0-NEXT: vs1r.v v9, (a1) # Unknown-size Folded Spill
; SPILL-O0-NEXT: # implicit-def: $v8
-; SPILL-O0-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; SPILL-O0-NEXT: vsetvli zero, a0, e64, m1, tu, ma
; SPILL-O0-NEXT: vfadd.vv v8, v9, v10
; SPILL-O0-NEXT: addi a0, sp, 16
; SPILL-O0-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
@@ -38,7 +38,7 @@ define <vscale x 1 x double> @foo(<vscale x 1 x double> %a, <vscale x 1 x double
; SPILL-O0-NEXT: # kill: def $x11 killed $x10
; SPILL-O0-NEXT: lw a0, 8(sp) # 4-byte Folded Reload
; SPILL-O0-NEXT: # implicit-def: $v8
-; SPILL-O0-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; SPILL-O0-NEXT: vsetvli zero, a0, e64, m1, tu, ma
; SPILL-O0-NEXT: vfadd.vv v8, v9, v10
; SPILL-O0-NEXT: csrr a0, vlenb
; SPILL-O0-NEXT: slli a0, a0, 1
diff --git a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector-csr.ll b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector-csr.ll
index 3523629..9054048 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector-csr.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector-csr.ll
@@ -25,7 +25,7 @@ define <vscale x 1 x double> @foo(<vscale x 1 x double> %a, <vscale x 1 x double
; SPILL-O0-NEXT: addi a1, a1, 32
; SPILL-O0-NEXT: vs1r.v v9, (a1) # Unknown-size Folded Spill
; SPILL-O0-NEXT: # implicit-def: $v8
-; SPILL-O0-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; SPILL-O0-NEXT: vsetvli zero, a0, e64, m1, tu, ma
; SPILL-O0-NEXT: vfadd.vv v8, v9, v10
; SPILL-O0-NEXT: addi a0, sp, 32
; SPILL-O0-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
@@ -41,7 +41,7 @@ define <vscale x 1 x double> @foo(<vscale x 1 x double> %a, <vscale x 1 x double
; SPILL-O0-NEXT: # kill: def $x11 killed $x10
; SPILL-O0-NEXT: ld a0, 16(sp) # 8-byte Folded Reload
; SPILL-O0-NEXT: # implicit-def: $v8
-; SPILL-O0-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; SPILL-O0-NEXT: vsetvli zero, a0, e64, m1, tu, ma
; SPILL-O0-NEXT: vfadd.vv v8, v9, v10
; SPILL-O0-NEXT: csrr a0, vlenb
; SPILL-O0-NEXT: slli a0, a0, 1
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsbf.ll b/llvm/test/CodeGen/RISCV/rvv/vmsbf.ll
index 14a1f084..d1f344d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmsbf.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmsbf.ll
@@ -33,7 +33,7 @@ define <vscale x 1 x i1> @intrinsic_vmsbf_mask_m_nxv1i1_nxv1i1(<vscale x 1 x i1>
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: vmv1r.v v0, v9
-; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, mu
+; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu
; CHECK-NEXT: vmsbf.m v10, v8, v0.t
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: ret
@@ -75,7 +75,7 @@ define <vscale x 2 x i1> @intrinsic_vmsbf_mask_m_nxv2i1_nxv2i1(<vscale x 2 x i1>
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: vmv1r.v v0, v9
-; CHECK-NEXT: vsetvli zero, a0, e8, mf4, tu, mu
+; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu
; CHECK-NEXT: vmsbf.m v10, v8, v0.t
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: ret
@@ -117,7 +117,7 @@ define <vscale x 4 x i1> @intrinsic_vmsbf_mask_m_nxv4i1_nxv4i1(<vscale x 4 x i1>
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: vmv1r.v v0, v9
-; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, mu
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu
; CHECK-NEXT: vmsbf.m v10, v8, v0.t
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: ret
@@ -159,9 +159,9 @@ define <vscale x 8 x i1> @intrinsic_vmsbf_mask_m_nxv8i1_nxv8i1(<vscale x 8 x i1>
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: vmv1r.v v0, v9
-; CHECK-NEXT: vsetvli zero, a0, e8, m1, tu, mu
+; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu
; CHECK-NEXT: vmsbf.m v10, v8, v0.t
-; CHECK-NEXT: vmv1r.v v0, v10
+; CHECK-NEXT: vmv.v.v v0, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i1> @llvm.riscv.vmsbf.mask.nxv8i1(
@@ -201,7 +201,7 @@ define <vscale x 16 x i1> @intrinsic_vmsbf_mask_m_nxv16i1_nxv16i1(<vscale x 16 x
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: vmv1r.v v0, v9
-; CHECK-NEXT: vsetvli zero, a0, e8, m2, tu, mu
+; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu
; CHECK-NEXT: vmsbf.m v10, v8, v0.t
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: ret
@@ -243,7 +243,7 @@ define <vscale x 32 x i1> @intrinsic_vmsbf_mask_m_nxv32i1_nxv32i1(<vscale x 32 x
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: vmv1r.v v0, v9
-; CHECK-NEXT: vsetvli zero, a0, e8, m4, tu, mu
+; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu
; CHECK-NEXT: vmsbf.m v10, v8, v0.t
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: ret
@@ -285,7 +285,7 @@ define <vscale x 64 x i1> @intrinsic_vmsbf_mask_m_nxv64i1_nxv64i1(<vscale x 64 x
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: vmv1r.v v0, v9
-; CHECK-NEXT: vsetvli zero, a0, e8, m8, tu, mu
+; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu
; CHECK-NEXT: vmsbf.m v10, v8, v0.t
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsif.ll b/llvm/test/CodeGen/RISCV/rvv/vmsif.ll
index 05d402a..1dc52eb 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmsif.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmsif.ll
@@ -33,7 +33,7 @@ define <vscale x 1 x i1> @intrinsic_vmsif_mask_m_nxv1i1_nxv1i1(<vscale x 1 x i1>
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: vmv1r.v v0, v9
-; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, mu
+; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu
; CHECK-NEXT: vmsif.m v10, v8, v0.t
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: ret
@@ -75,7 +75,7 @@ define <vscale x 2 x i1> @intrinsic_vmsif_mask_m_nxv2i1_nxv2i1(<vscale x 2 x i1>
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: vmv1r.v v0, v9
-; CHECK-NEXT: vsetvli zero, a0, e8, mf4, tu, mu
+; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu
; CHECK-NEXT: vmsif.m v10, v8, v0.t
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: ret
@@ -117,7 +117,7 @@ define <vscale x 4 x i1> @intrinsic_vmsif_mask_m_nxv4i1_nxv4i1(<vscale x 4 x i1>
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: vmv1r.v v0, v9
-; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, mu
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu
; CHECK-NEXT: vmsif.m v10, v8, v0.t
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: ret
@@ -159,9 +159,9 @@ define <vscale x 8 x i1> @intrinsic_vmsif_mask_m_nxv8i1_nxv8i1(<vscale x 8 x i1>
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: vmv1r.v v0, v9
-; CHECK-NEXT: vsetvli zero, a0, e8, m1, tu, mu
+; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu
; CHECK-NEXT: vmsif.m v10, v8, v0.t
-; CHECK-NEXT: vmv1r.v v0, v10
+; CHECK-NEXT: vmv.v.v v0, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i1> @llvm.riscv.vmsif.mask.nxv8i1(
@@ -201,7 +201,7 @@ define <vscale x 16 x i1> @intrinsic_vmsif_mask_m_nxv16i1_nxv16i1(<vscale x 16 x
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: vmv1r.v v0, v9
-; CHECK-NEXT: vsetvli zero, a0, e8, m2, tu, mu
+; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu
; CHECK-NEXT: vmsif.m v10, v8, v0.t
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: ret
@@ -243,7 +243,7 @@ define <vscale x 32 x i1> @intrinsic_vmsif_mask_m_nxv32i1_nxv32i1(<vscale x 32 x
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: vmv1r.v v0, v9
-; CHECK-NEXT: vsetvli zero, a0, e8, m4, tu, mu
+; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu
; CHECK-NEXT: vmsif.m v10, v8, v0.t
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: ret
@@ -285,7 +285,7 @@ define <vscale x 64 x i1> @intrinsic_vmsif_mask_m_nxv64i1_nxv64i1(<vscale x 64 x
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: vmv1r.v v0, v9
-; CHECK-NEXT: vsetvli zero, a0, e8, m8, tu, mu
+; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu
; CHECK-NEXT: vmsif.m v10, v8, v0.t
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsof.ll b/llvm/test/CodeGen/RISCV/rvv/vmsof.ll
index 0c60681..b0a28e6 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmsof.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmsof.ll
@@ -33,7 +33,7 @@ define <vscale x 1 x i1> @intrinsic_vmsof_mask_m_nxv1i1_nxv1i1(<vscale x 1 x i1>
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: vmv1r.v v0, v9
-; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, mu
+; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu
; CHECK-NEXT: vmsof.m v10, v8, v0.t
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: ret
@@ -75,7 +75,7 @@ define <vscale x 2 x i1> @intrinsic_vmsof_mask_m_nxv2i1_nxv2i1(<vscale x 2 x i1>
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: vmv1r.v v0, v9
-; CHECK-NEXT: vsetvli zero, a0, e8, mf4, tu, mu
+; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu
; CHECK-NEXT: vmsof.m v10, v8, v0.t
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: ret
@@ -117,7 +117,7 @@ define <vscale x 4 x i1> @intrinsic_vmsof_mask_m_nxv4i1_nxv4i1(<vscale x 4 x i1>
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: vmv1r.v v0, v9
-; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, mu
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu
; CHECK-NEXT: vmsof.m v10, v8, v0.t
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: ret
@@ -159,9 +159,9 @@ define <vscale x 8 x i1> @intrinsic_vmsof_mask_m_nxv8i1_nxv8i1(<vscale x 8 x i1>
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: vmv1r.v v0, v9
-; CHECK-NEXT: vsetvli zero, a0, e8, m1, tu, mu
+; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu
; CHECK-NEXT: vmsof.m v10, v8, v0.t
-; CHECK-NEXT: vmv1r.v v0, v10
+; CHECK-NEXT: vmv.v.v v0, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i1> @llvm.riscv.vmsof.mask.nxv8i1(
@@ -201,7 +201,7 @@ define <vscale x 16 x i1> @intrinsic_vmsof_mask_m_nxv16i1_nxv16i1(<vscale x 16 x
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: vmv1r.v v0, v9
-; CHECK-NEXT: vsetvli zero, a0, e8, m2, tu, mu
+; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu
; CHECK-NEXT: vmsof.m v10, v8, v0.t
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: ret
@@ -243,7 +243,7 @@ define <vscale x 32 x i1> @intrinsic_vmsof_mask_m_nxv32i1_nxv32i1(<vscale x 32 x
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: vmv1r.v v0, v9
-; CHECK-NEXT: vsetvli zero, a0, e8, m4, tu, mu
+; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu
; CHECK-NEXT: vmsof.m v10, v8, v0.t
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: ret
@@ -285,7 +285,7 @@ define <vscale x 64 x i1> @intrinsic_vmsof_mask_m_nxv64i1_nxv64i1(<vscale x 64 x
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: vmv1r.v v0, v9
-; CHECK-NEXT: vsetvli zero, a0, e8, m8, tu, mu
+; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu
; CHECK-NEXT: vmsof.m v10, v8, v0.t
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsll.ll b/llvm/test/CodeGen/RISCV/rvv/vsll.ll
index 1fdafd7..a089b10 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsll.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsll.ll
@@ -2108,6 +2108,22 @@ entry:
ret <vscale x 1 x i8> %a
}
+define <vscale x 1 x i8> @intrinsic_vsll_1_tu_nxv1i8_nxv1i8_i8(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vsll_1_tu_nxv1i8_nxv1i8_i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, ma
+; CHECK-NEXT: vadd.vv v8, v9, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i8> @llvm.riscv.vsll.nxv1i8(
+ <vscale x 1 x i8> %0,
+ <vscale x 1 x i8> %1,
+ iXLen 1,
+ iXLen %2)
+
+ ret <vscale x 1 x i8> %a
+}
+
define <vscale x 1 x i8> @intrinsic_vsll_mask_vi_nxv1i8_nxv1i8_i8(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
; CHECK-LABEL: intrinsic_vsll_mask_vi_nxv1i8_nxv1i8_i8:
; CHECK: # %bb.0: # %entry
diff --git a/llvm/test/CodeGen/SPIRV/event-wait-ptr-type.ll b/llvm/test/CodeGen/SPIRV/event-wait-ptr-type.ll
index d6fb70b..ec9afc7 100644
--- a/llvm/test/CodeGen/SPIRV/event-wait-ptr-type.ll
+++ b/llvm/test/CodeGen/SPIRV/event-wait-ptr-type.ll
@@ -4,16 +4,16 @@
; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s
; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-unknown %s -o - -filetype=obj | spirv-val %}
-; CHECK: %[[#EventTy:]] = OpTypeEvent
-; CHECK: %[[#StructEventTy:]] = OpTypeStruct %[[#EventTy]]
-; CHECK: %[[#GenPtrStructEventTy:]] = OpTypePointer Generic %[[#StructEventTy]]
-; CHECK: %[[#FunPtrStructEventTy:]] = OpTypePointer Function %[[#StructEventTy]]
-; CHECK: %[[#GenPtrEventTy:]] = OpTypePointer Generic %[[#EventTy:]]
+; CHECK-DAG: %[[#EventTy:]] = OpTypeEvent
+; CHECK-DAG: %[[#StructEventTy:]] = OpTypeStruct %[[#EventTy]]
+; CHECK-DAG: %[[#FunPtrStructEventTy:]] = OpTypePointer Function %[[#StructEventTy]]
+; CHECK-DAG: %[[#GenPtrEventTy:]] = OpTypePointer Generic %[[#EventTy]]
+; CHECK-DAG: %[[#FunPtrEventTy:]] = OpTypePointer Function %[[#EventTy]]
; CHECK: OpFunction
; CHECK: %[[#Var:]] = OpVariable %[[#FunPtrStructEventTy]] Function
-; CHECK-NEXT: %[[#AddrspacecastVar:]] = OpPtrCastToGeneric %[[#GenPtrStructEventTy]] %[[#Var]]
-; CHECK-NEXT: %[[#BitcastVar:]] = OpBitcast %[[#GenPtrEventTy]] %[[#AddrspacecastVar]]
-; CHECK-NEXT: OpGroupWaitEvents %[[#]] %[[#]] %[[#BitcastVar]]
+; CHECK-NEXT: %[[#FunEvent:]] = OpBitcast %[[#FunPtrEventTy]] %[[#Var]]
+; CHECK-NEXT: %[[#GenEvent:]] = OpPtrCastToGeneric %[[#GenPtrEventTy]] %[[#FunEvent]]
+; CHECK-NEXT: OpGroupWaitEvents %[[#]] %[[#]] %[[#GenEvent]]
%"class.sycl::_V1::device_event" = type { target("spirv.Event") }
diff --git a/llvm/test/CodeGen/SPIRV/passes/SPIRVEmitIntrinsics-no-duplicate-spv_assign_type.ll b/llvm/test/CodeGen/SPIRV/passes/SPIRVEmitIntrinsics-no-duplicate-spv_assign_type.ll
index 7056b9c..9db4f26 100644
--- a/llvm/test/CodeGen/SPIRV/passes/SPIRVEmitIntrinsics-no-duplicate-spv_assign_type.ll
+++ b/llvm/test/CodeGen/SPIRV/passes/SPIRVEmitIntrinsics-no-duplicate-spv_assign_type.ll
@@ -3,9 +3,9 @@
; CHECK: *** IR Dump After SPIRV emit intrinsics (emit-intrinsics) ***
define spir_kernel void @test(ptr addrspace(1) %srcimg) {
-; CHECK: call void @llvm.spv.assign.type.p1(ptr addrspace(1) %srcimg, metadata target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0) undef)
+; CHECK: call void @llvm.spv.assign.type.p1(ptr addrspace(1) %srcimg, metadata target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0) poison)
%call1 = call spir_func <2 x i32> @_Z13get_image_dim14ocl_image2d_ro(ptr addrspace(1) %srcimg)
-; CHECK-NOT: call void @llvm.spv.assign.type.p1(ptr addrspace(1) %srcimg, metadata target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0) undef)
+; CHECK-NOT: call void @llvm.spv.assign.type.p1(ptr addrspace(1) %srcimg, metadata target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0) poison)
%call2 = call spir_func <2 x i32> @_Z13get_image_dim14ocl_image2d_ro(ptr addrspace(1) %srcimg)
ret void
; CHECK: }
diff --git a/llvm/test/CodeGen/SPIRV/transcoding/OpGroupAsyncCopy-strided.ll b/llvm/test/CodeGen/SPIRV/transcoding/OpGroupAsyncCopy-strided.ll
new file mode 100644
index 0000000..96d6016
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/transcoding/OpGroupAsyncCopy-strided.ll
@@ -0,0 +1,36 @@
+; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %}
+
+; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-unknown %s -o - -filetype=obj | spirv-val %}
+
+; CHECK-SPIRV-DAG: %[[#LongTy:]] = OpTypeInt 64 0
+; CHECK-SPIRV-DAG: %[[#IntTy:]] = OpTypeInt 32 0
+; CHECK-SPIRV-DAG: %[[#EventTy:]] = OpTypeEvent
+; CHECK-SPIRV-DAG: %[[#Scope:]] = OpConstant %[[#IntTy]] 2
+; CHECK-SPIRV-DAG: %[[#Num:]] = OpConstant %[[#LongTy]] 123
+; CHECK-SPIRV-DAG: %[[#Null:]] = OpConstantNull
+; CHECK-SPIRV-DAG: %[[#Stride:]] = OpConstant %[[#LongTy]] 1
+; CHECK-SPIRV-DAG: %[[#GenPtrEventTy:]] = OpTypePointer Generic %[[#EventTy]]
+; CHECK-SPIRV-DAG: %[[#FunPtrEventTy:]] = OpTypePointer Function %[[#EventTy]]
+; CHECK-SPIRV: OpFunction
+; CHECK-SPIRV: %[[#Var:]] = OpVariable %[[#]] Function
+; CHECK-SPIRV: %[[#ResEvent:]] = OpGroupAsyncCopy %[[#EventTy]] %[[#Scope]] %[[#Null]] %[[#Null]] %[[#Num]] %[[#Stride]] %[[#Null]]
+; CHECK-SPIRV: %[[#VarPtrEvent:]] = OpBitcast %[[#FunPtrEventTy]] %[[#Var]]
+; CHECK-SPIRV: OpStore %[[#VarPtrEvent]] %[[#ResEvent]]
+; CHECK-SPIRV: %[[#VarPtrEvent2:]] = OpBitcast %[[#FunPtrEventTy]] %[[#Var]]
+; CHECK-SPIRV: %[[#PtrEventGen:]] = OpPtrCastToGeneric %[[#]] %[[#VarPtrEvent2]]
+; CHECK-SPIRV: OpGroupWaitEvents %[[#Scope]] %[[#Num]] %[[#PtrEventGen]]
+; CHECK-SPIRV: OpFunctionEnd
+
+define spir_kernel void @foo() {
+ %event = alloca ptr, align 8
+ %call = call spir_func ptr @_Z29async_work_group_strided_copyPU3AS3hPU3AS1Khmm9ocl_event(ptr null, ptr null, i64 123, i64 1, ptr null)
+ store ptr %call, ptr %event, align 8
+ %event.ascast = addrspacecast ptr %event to ptr addrspace(4)
+ call spir_func void @_Z17wait_group_eventsiPU3AS49ocl_event(i64 123, ptr addrspace(4) %event.ascast)
+ ret void
+}
+
+declare spir_func ptr @_Z29async_work_group_strided_copyPU3AS3hPU3AS1Khmm9ocl_event(ptr, ptr, i64, i64, ptr)
+declare spir_func void @_Z17wait_group_eventsiPU3AS49ocl_event(i64, ptr addrspace(4))
diff --git a/llvm/test/CodeGen/SPIRV/transcoding/check_ro_qualifier.ll b/llvm/test/CodeGen/SPIRV/transcoding/check_ro_qualifier.ll
index 824ca1b2..6f61aba 100644
--- a/llvm/test/CodeGen/SPIRV/transcoding/check_ro_qualifier.ll
+++ b/llvm/test/CodeGen/SPIRV/transcoding/check_ro_qualifier.ll
@@ -1,5 +1,5 @@
; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV
-; TODO: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %}
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %}
; CHECK-SPIRV: %[[#IMAGE_TYPE:]] = OpTypeImage
; CHECK-SPIRV: %[[#IMAGE_ARG:]] = OpFunctionParameter %[[#IMAGE_TYPE]]
diff --git a/llvm/test/CodeGen/Thumb2/mve-tailpred-vptblock.ll b/llvm/test/CodeGen/Thumb2/mve-tailpred-vptblock.ll
index f9b3757..6392452 100644
--- a/llvm/test/CodeGen/Thumb2/mve-tailpred-vptblock.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-tailpred-vptblock.ll
@@ -20,50 +20,35 @@ define void @convert_vptblock(ptr %pchTarget, i16 signext %iTargetStride, ptr %p
; CHECK-NEXT: mov.w r8, #0
; CHECK-NEXT: ldrd r4, r5, [sp, #88]
; CHECK-NEXT: mov r7, r0
-; CHECK-NEXT: cmp.w r10, #8
-; CHECK-NEXT: mov.w r0, #1
-; CHECK-NEXT: mov r3, r10
; CHECK-NEXT: mov.w r11, #0
-; CHECK-NEXT: it ge
-; CHECK-NEXT: movge r3, #8
; CHECK-NEXT: vidup.u16 q0, r8, #4
-; CHECK-NEXT: sub.w r3, r10, r3
; CHECK-NEXT: vmov.i32 q1, #0x0
-; CHECK-NEXT: adds r3, #7
; CHECK-NEXT: vmov.i16 q2, #0x100
; CHECK-NEXT: vmov.i16 q3, #0xff
-; CHECK-NEXT: add.w r9, r0, r3, lsr #3
; CHECK-NEXT: .LBB0_2: @ %for.body
; CHECK-NEXT: @ =>This Loop Header: Depth=1
; CHECK-NEXT: @ Child Loop BB0_3 Depth 2
-; CHECK-NEXT: mov r3, r10
; CHECK-NEXT: vmov q4, q0
; CHECK-NEXT: mov r6, r8
; CHECK-NEXT: mov r0, r7
-; CHECK-NEXT: dls lr, r9
+; CHECK-NEXT: dlstp.16 lr, r10
; CHECK-NEXT: .LBB0_3: @ %do.body
; CHECK-NEXT: @ Parent Loop BB0_2 Depth=1
; CHECK-NEXT: @ => This Inner Loop Header: Depth=2
-; CHECK-NEXT: vctp.16 r3
-; CHECK-NEXT: vpst
-; CHECK-NEXT: vldrbt.u16 q5, [r2, q4]
+; CHECK-NEXT: vldrb.u16 q5, [r2, q4]
; CHECK-NEXT: vmul.i16 q4, q5, r5
; CHECK-NEXT: vshr.u16 q4, q4, #8
; CHECK-NEXT: vsub.i16 q5, q2, q4
; CHECK-NEXT: vpt.i16 eq, q4, q3
; CHECK-NEXT: vmovt q5, q1
-; CHECK-NEXT: vctp.16 r3
-; CHECK-NEXT: vpst
-; CHECK-NEXT: vldrbt.u16 q6, [r0]
+; CHECK-NEXT: vldrb.u16 q6, [r0]
; CHECK-NEXT: vsub.i16 q4, q2, q5
-; CHECK-NEXT: subs r3, #8
; CHECK-NEXT: vmul.i16 q5, q5, q6
; CHECK-NEXT: vmla.i16 q5, q4, r4
; CHECK-NEXT: vshr.u16 q4, q5, #8
-; CHECK-NEXT: vpst
-; CHECK-NEXT: vstrbt.16 q4, [r0], #8
+; CHECK-NEXT: vstrb.16 q4, [r0], #8
; CHECK-NEXT: vidup.u16 q4, r6, #4
-; CHECK-NEXT: le lr, .LBB0_3
+; CHECK-NEXT: letp lr, .LBB0_3
; CHECK-NEXT: @ %bb.4: @ %do.end
; CHECK-NEXT: @ in Loop: Header=BB0_2 Depth=1
; CHECK-NEXT: add.w r0, r11, #1
diff --git a/llvm/test/CodeGen/WebAssembly/expand-variadic-call.ll b/llvm/test/CodeGen/WebAssembly/expand-variadic-call.ll
new file mode 100644
index 0000000..80f3db0
--- /dev/null
+++ b/llvm/test/CodeGen/WebAssembly/expand-variadic-call.ll
@@ -0,0 +1,484 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: -p --function-signature
+; RUN: opt -S --passes=expand-variadics --expand-variadics-override=lowering < %s | FileCheck %s
+; REQUIRES: webassembly-registered-target
+target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128-ni:1:10:20"
+target triple = "wasm32-unknown-unknown"
+
+; Check the variables are lowered to the locations this target expects
+
+; The types show the call frames
+; CHECK: %single_i32.vararg = type <{ i32 }>
+; CHECK: %single_double.vararg = type <{ double }>
+; CHECK: %single_v4f32.vararg = type <{ <4 x float> }>
+; CHECK: %single_v8f32.vararg = type <{ <8 x float> }>
+; CHECK: %single_v16f32.vararg = type <{ <16 x float> }>
+; CHECK: %single_v32f32.vararg = type <{ <32 x float> }>
+; CHECK: %i32_double.vararg = type <{ i32, [4 x i8], double }>
+; CHECK: %double_i32.vararg = type <{ double, i32 }>
+; CHECK: %i32_libcS.vararg = type <{ i32, ptr }>
+; CHECK: %libcS_i32.vararg = type <{ ptr, i32 }>
+; CHECK: %i32_v4f32.vararg = type <{ i32, [12 x i8], <4 x float> }>
+; CHECK: %v4f32_i32.vararg = type <{ <4 x float>, i32 }>
+; CHECK: %i32_v8f32.vararg = type <{ i32, [28 x i8], <8 x float> }>
+; CHECK: %v8f32_i32.vararg = type <{ <8 x float>, i32 }>
+; CHECK: %i32_v16f32.vararg = type <{ i32, [60 x i8], <16 x float> }>
+; CHECK: %v16f32_i32.vararg = type <{ <16 x float>, i32 }>
+; CHECK: %i32_v32f32.vararg = type <{ i32, [124 x i8], <32 x float> }>
+; CHECK: %v32f32_i32.vararg = type <{ <32 x float>, i32 }>
+; CHECK: %fptr_single_i32.vararg = type <{ i32 }>
+; CHECK: %fptr_libcS.vararg = type <{ ptr }>
+
+%struct.libcS = type { i8, i16, i32, i32, float, double }
+
+@vararg_ptr = hidden global ptr @vararg, align 4
+
+define hidden void @copy(ptr noundef %va) {
+; CHECK-LABEL: define {{[^@]+}}@copy(ptr noundef %va) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %va.addr = alloca ptr, align 4
+; CHECK-NEXT: %cp = alloca ptr, align 4
+; CHECK-NEXT: store ptr %va, ptr %va.addr, align 4
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %cp)
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr %cp, ptr %va.addr, i32 4, i1 false)
+; CHECK-NEXT: %0 = load ptr, ptr %cp, align 4
+; CHECK-NEXT: call void @valist(ptr noundef %0)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %cp)
+; CHECK-NEXT: ret void
+;
+entry:
+ %va.addr = alloca ptr, align 4
+ %cp = alloca ptr, align 4
+ store ptr %va, ptr %va.addr, align 4
+ call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %cp)
+ call void @llvm.va_copy.p0(ptr nonnull %cp, ptr nonnull %va.addr)
+ %0 = load ptr, ptr %cp, align 4
+ call void @valist(ptr noundef %0)
+ call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %cp)
+ ret void
+}
+
+declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture)
+
+declare void @llvm.va_copy.p0(ptr, ptr)
+
+declare void @valist(ptr noundef)
+
+declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture)
+
+define hidden void @start_once(...) {
+; CHECK-LABEL: define {{[^@]+}}@start_once(ptr %varargs) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %s = alloca ptr, align 4
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %s)
+; CHECK-NEXT: store ptr %varargs, ptr %s, align 4
+; CHECK-NEXT: %0 = load ptr, ptr %s, align 4
+; CHECK-NEXT: call void @valist(ptr noundef %0)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %s)
+; CHECK-NEXT: ret void
+;
+entry:
+ %s = alloca ptr, align 4
+ call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %s)
+ call void @llvm.va_start.p0(ptr nonnull %s)
+ %0 = load ptr, ptr %s, align 4
+ call void @valist(ptr noundef %0)
+ call void @llvm.va_end.p0(ptr %s)
+ call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %s)
+ ret void
+}
+
+declare void @llvm.va_start.p0(ptr)
+
+declare void @llvm.va_end.p0(ptr)
+
+define hidden void @start_twice(...) {
+; CHECK-LABEL: define {{[^@]+}}@start_twice(ptr %varargs) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %s0 = alloca ptr, align 4
+; CHECK-NEXT: %s1 = alloca ptr, align 4
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %s0)
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %s1)
+; CHECK-NEXT: store ptr %varargs, ptr %s0, align 4
+; CHECK-NEXT: %0 = load ptr, ptr %s0, align 4
+; CHECK-NEXT: call void @valist(ptr noundef %0)
+; CHECK-NEXT: store ptr %varargs, ptr %s1, align 4
+; CHECK-NEXT: %1 = load ptr, ptr %s1, align 4
+; CHECK-NEXT: call void @valist(ptr noundef %1)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %s1)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %s0)
+; CHECK-NEXT: ret void
+;
+entry:
+ %s0 = alloca ptr, align 4
+ %s1 = alloca ptr, align 4
+ call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %s0)
+ call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %s1)
+ call void @llvm.va_start.p0(ptr nonnull %s0)
+ %0 = load ptr, ptr %s0, align 4
+ call void @valist(ptr noundef %0)
+ call void @llvm.va_end.p0(ptr %s0)
+ call void @llvm.va_start.p0(ptr nonnull %s1)
+ %1 = load ptr, ptr %s1, align 4
+ call void @valist(ptr noundef %1)
+ call void @llvm.va_end.p0(ptr %s1)
+ call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %s1)
+ call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %s0)
+ ret void
+}
+
+define hidden void @single_i32(i32 noundef %x) {
+; CHECK-LABEL: define {{[^@]+}}@single_i32(i32 noundef %x) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %single_i32.vararg, align 16
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %single_i32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr %0, align 4
+; CHECK-NEXT: call void @vararg(ptr %vararg_buffer)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(i32 noundef %x)
+ ret void
+}
+
+declare void @vararg(...)
+
+define hidden void @single_double(double noundef %x) {
+; CHECK-LABEL: define {{[^@]+}}@single_double(double noundef %x) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %single_double.vararg, align 16
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %single_double.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store double %x, ptr %0, align 8
+; CHECK-NEXT: call void @vararg(ptr %vararg_buffer)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(double noundef %x)
+ ret void
+}
+
+define hidden void @single_v4f32(<4 x float> noundef %x) {
+; CHECK-LABEL: define {{[^@]+}}@single_v4f32(<4 x float> noundef %x) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %single_v4f32.vararg, align 16
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %single_v4f32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store <4 x float> %x, ptr %0, align 16
+; CHECK-NEXT: call void @vararg(ptr %vararg_buffer)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(<4 x float> noundef %x)
+ ret void
+}
+
+define hidden void @single_v8f32(<8 x float> noundef %x) {
+; CHECK-LABEL: define {{[^@]+}}@single_v8f32(<8 x float> noundef %x) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %single_v8f32.vararg, align 32
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %single_v8f32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store <8 x float> %x, ptr %0, align 32
+; CHECK-NEXT: call void @vararg(ptr %vararg_buffer)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(<8 x float> noundef %x)
+ ret void
+}
+
+define hidden void @single_v16f32(<16 x float> noundef %x) {
+; CHECK-LABEL: define {{[^@]+}}@single_v16f32(<16 x float> noundef %x) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %single_v16f32.vararg, align 64
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 64, ptr %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %single_v16f32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store <16 x float> %x, ptr %0, align 64
+; CHECK-NEXT: call void @vararg(ptr %vararg_buffer)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 64, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(<16 x float> noundef %x)
+ ret void
+}
+
+define hidden void @single_v32f32(<32 x float> noundef %x) {
+; CHECK-LABEL: define {{[^@]+}}@single_v32f32(<32 x float> noundef %x) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %single_v32f32.vararg, align 128
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 128, ptr %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %single_v32f32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store <32 x float> %x, ptr %0, align 128
+; CHECK-NEXT: call void @vararg(ptr %vararg_buffer)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 128, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(<32 x float> noundef %x)
+ ret void
+}
+
+define hidden void @i32_double(i32 noundef %x, double noundef %y) {
+; CHECK-LABEL: define {{[^@]+}}@i32_double(i32 noundef %x, double noundef %y) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %i32_double.vararg, align 16
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %i32_double.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr %0, align 4
+; CHECK-NEXT: %1 = getelementptr inbounds %i32_double.vararg, ptr %vararg_buffer, i32 0, i32 2
+; CHECK-NEXT: store double %y, ptr %1, align 8
+; CHECK-NEXT: call void @vararg(ptr %vararg_buffer)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(i32 noundef %x, double noundef %y)
+ ret void
+}
+
+define hidden void @double_i32(double noundef %x, i32 noundef %y) {
+; CHECK-LABEL: define {{[^@]+}}@double_i32(double noundef %x, i32 noundef %y) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %double_i32.vararg, align 16
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %double_i32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store double %x, ptr %0, align 8
+; CHECK-NEXT: %1 = getelementptr inbounds %double_i32.vararg, ptr %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: store i32 %y, ptr %1, align 4
+; CHECK-NEXT: call void @vararg(ptr %vararg_buffer)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(double noundef %x, i32 noundef %y)
+ ret void
+}
+
+define hidden void @i32_libcS(i32 noundef %x, ptr noundef byval(%struct.libcS) align 8 %y) {
+; CHECK-LABEL: define {{[^@]+}}@i32_libcS(i32 noundef %x, ptr noundef byval(%struct.libcS) align 8 %y) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %IndirectAlloca = alloca %struct.libcS, align 8
+; CHECK-NEXT: %vararg_buffer = alloca %i32_libcS.vararg, align 16
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr %IndirectAlloca, ptr %y, i64 24, i1 false)
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %i32_libcS.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr %0, align 4
+; CHECK-NEXT: %1 = getelementptr inbounds %i32_libcS.vararg, ptr %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: store ptr %IndirectAlloca, ptr %1, align 4
+; CHECK-NEXT: call void @vararg(ptr %vararg_buffer)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(i32 noundef %x, ptr noundef nonnull byval(%struct.libcS) align 8 %y)
+ ret void
+}
+
+define hidden void @libcS_i32(ptr noundef byval(%struct.libcS) align 8 %x, i32 noundef %y) {
+; CHECK-LABEL: define {{[^@]+}}@libcS_i32(ptr noundef byval(%struct.libcS) align 8 %x, i32 noundef %y) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %IndirectAlloca = alloca %struct.libcS, align 8
+; CHECK-NEXT: %vararg_buffer = alloca %libcS_i32.vararg, align 16
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr %IndirectAlloca, ptr %x, i64 24, i1 false)
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %libcS_i32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store ptr %IndirectAlloca, ptr %0, align 4
+; CHECK-NEXT: %1 = getelementptr inbounds %libcS_i32.vararg, ptr %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: store i32 %y, ptr %1, align 4
+; CHECK-NEXT: call void @vararg(ptr %vararg_buffer)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(ptr noundef nonnull byval(%struct.libcS) align 8 %x, i32 noundef %y)
+ ret void
+}
+
+define hidden void @i32_v4f32(i32 noundef %x, <4 x float> noundef %y) {
+; CHECK-LABEL: define {{[^@]+}}@i32_v4f32(i32 noundef %x, <4 x float> noundef %y) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %i32_v4f32.vararg, align 16
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %i32_v4f32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr %0, align 4
+; CHECK-NEXT: %1 = getelementptr inbounds %i32_v4f32.vararg, ptr %vararg_buffer, i32 0, i32 2
+; CHECK-NEXT: store <4 x float> %y, ptr %1, align 16
+; CHECK-NEXT: call void @vararg(ptr %vararg_buffer)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(i32 noundef %x, <4 x float> noundef %y)
+ ret void
+}
+
+define hidden void @v4f32_i32(<4 x float> noundef %x, i32 noundef %y) {
+; CHECK-LABEL: define {{[^@]+}}@v4f32_i32(<4 x float> noundef %x, i32 noundef %y) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %v4f32_i32.vararg, align 16
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 20, ptr %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %v4f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store <4 x float> %x, ptr %0, align 16
+; CHECK-NEXT: %1 = getelementptr inbounds %v4f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: store i32 %y, ptr %1, align 4
+; CHECK-NEXT: call void @vararg(ptr %vararg_buffer)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 20, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(<4 x float> noundef %x, i32 noundef %y)
+ ret void
+}
+
+define hidden void @i32_v8f32(i32 noundef %x, <8 x float> noundef %y) {
+; CHECK-LABEL: define {{[^@]+}}@i32_v8f32(i32 noundef %x, <8 x float> noundef %y) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %i32_v8f32.vararg, align 32
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 64, ptr %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %i32_v8f32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr %0, align 4
+; CHECK-NEXT: %1 = getelementptr inbounds %i32_v8f32.vararg, ptr %vararg_buffer, i32 0, i32 2
+; CHECK-NEXT: store <8 x float> %y, ptr %1, align 32
+; CHECK-NEXT: call void @vararg(ptr %vararg_buffer)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 64, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(i32 noundef %x, <8 x float> noundef %y)
+ ret void
+}
+
+define hidden void @v8f32_i32(<8 x float> noundef %x, i32 noundef %y) {
+; CHECK-LABEL: define {{[^@]+}}@v8f32_i32(<8 x float> noundef %x, i32 noundef %y) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %v8f32_i32.vararg, align 32
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 36, ptr %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %v8f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store <8 x float> %x, ptr %0, align 32
+; CHECK-NEXT: %1 = getelementptr inbounds %v8f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: store i32 %y, ptr %1, align 4
+; CHECK-NEXT: call void @vararg(ptr %vararg_buffer)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 36, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(<8 x float> noundef %x, i32 noundef %y)
+ ret void
+}
+
+define hidden void @i32_v16f32(i32 noundef %x, <16 x float> noundef %y) {
+; CHECK-LABEL: define {{[^@]+}}@i32_v16f32(i32 noundef %x, <16 x float> noundef %y) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %i32_v16f32.vararg, align 64
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 128, ptr %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %i32_v16f32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr %0, align 4
+; CHECK-NEXT: %1 = getelementptr inbounds %i32_v16f32.vararg, ptr %vararg_buffer, i32 0, i32 2
+; CHECK-NEXT: store <16 x float> %y, ptr %1, align 64
+; CHECK-NEXT: call void @vararg(ptr %vararg_buffer)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 128, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(i32 noundef %x, <16 x float> noundef %y)
+ ret void
+}
+
+define hidden void @v16f32_i32(<16 x float> noundef %x, i32 noundef %y) {
+; CHECK-LABEL: define {{[^@]+}}@v16f32_i32(<16 x float> noundef %x, i32 noundef %y) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %v16f32_i32.vararg, align 64
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 68, ptr %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %v16f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store <16 x float> %x, ptr %0, align 64
+; CHECK-NEXT: %1 = getelementptr inbounds %v16f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: store i32 %y, ptr %1, align 4
+; CHECK-NEXT: call void @vararg(ptr %vararg_buffer)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 68, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(<16 x float> noundef %x, i32 noundef %y)
+ ret void
+}
+
+define hidden void @i32_v32f32(i32 noundef %x, <32 x float> noundef %y) {
+; CHECK-LABEL: define {{[^@]+}}@i32_v32f32(i32 noundef %x, <32 x float> noundef %y) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %i32_v32f32.vararg, align 128
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 256, ptr %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %i32_v32f32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr %0, align 4
+; CHECK-NEXT: %1 = getelementptr inbounds %i32_v32f32.vararg, ptr %vararg_buffer, i32 0, i32 2
+; CHECK-NEXT: store <32 x float> %y, ptr %1, align 128
+; CHECK-NEXT: call void @vararg(ptr %vararg_buffer)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 256, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(i32 noundef %x, <32 x float> noundef %y)
+ ret void
+}
+
+define hidden void @v32f32_i32(<32 x float> noundef %x, i32 noundef %y) {
+; CHECK-LABEL: define {{[^@]+}}@v32f32_i32(<32 x float> noundef %x, i32 noundef %y) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %v32f32_i32.vararg, align 128
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 132, ptr %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %v32f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store <32 x float> %x, ptr %0, align 128
+; CHECK-NEXT: %1 = getelementptr inbounds %v32f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: store i32 %y, ptr %1, align 4
+; CHECK-NEXT: call void @vararg(ptr %vararg_buffer)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 132, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(<32 x float> noundef %x, i32 noundef %y)
+ ret void
+}
+
+define hidden void @fptr_single_i32(i32 noundef %x) {
+; CHECK-LABEL: define {{[^@]+}}@fptr_single_i32(i32 noundef %x) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %fptr_single_i32.vararg, align 16
+; CHECK-NEXT: %0 = load volatile ptr, ptr @vararg_ptr, align 4
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr %vararg_buffer)
+; CHECK-NEXT: %1 = getelementptr inbounds %fptr_single_i32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr %1, align 4
+; CHECK-NEXT: call void %0(ptr %vararg_buffer)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ %0 = load volatile ptr, ptr @vararg_ptr, align 4
+ tail call void (...) %0(i32 noundef %x)
+ ret void
+}
+
+define hidden void @fptr_libcS(ptr noundef byval(%struct.libcS) align 8 %x) {
+; CHECK-LABEL: define {{[^@]+}}@fptr_libcS(ptr noundef byval(%struct.libcS) align 8 %x) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %IndirectAlloca = alloca %struct.libcS, align 8
+; CHECK-NEXT: %vararg_buffer = alloca %fptr_libcS.vararg, align 16
+; CHECK-NEXT: %0 = load volatile ptr, ptr @vararg_ptr, align 4
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr %IndirectAlloca, ptr %x, i64 24, i1 false)
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr %vararg_buffer)
+; CHECK-NEXT: %1 = getelementptr inbounds %fptr_libcS.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store ptr %IndirectAlloca, ptr %1, align 4
+; CHECK-NEXT: call void %0(ptr %vararg_buffer)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ %0 = load volatile ptr, ptr @vararg_ptr, align 4
+ tail call void (...) %0(ptr noundef nonnull byval(%struct.libcS) align 8 %x)
+ ret void
+}
+
diff --git a/llvm/test/CodeGen/WebAssembly/simd-arith.ll b/llvm/test/CodeGen/WebAssembly/simd-arith.ll
index 761a754..67388b6 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-arith.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-arith.ll
@@ -11788,27 +11788,35 @@ define <4 x float> @minnum_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) {
; NO-SIMD128-LABEL: minnum_intrinsic_v4f32:
; NO-SIMD128: .functype minnum_intrinsic_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
-; NO-SIMD128-NEXT: call $push0=, fminf, $4, $8
-; NO-SIMD128-NEXT: f32.store 12($0), $pop0
-; NO-SIMD128-NEXT: call $push1=, fminf, $3, $7
-; NO-SIMD128-NEXT: f32.store 8($0), $pop1
-; NO-SIMD128-NEXT: call $push2=, fminf, $2, $6
-; NO-SIMD128-NEXT: f32.store 4($0), $pop2
-; NO-SIMD128-NEXT: call $push3=, fminf, $1, $5
-; NO-SIMD128-NEXT: f32.store 0($0), $pop3
+; NO-SIMD128-NEXT: f32.lt $push0=, $4, $8
+; NO-SIMD128-NEXT: f32.select $push1=, $4, $8, $pop0
+; NO-SIMD128-NEXT: f32.store 12($0), $pop1
+; NO-SIMD128-NEXT: f32.lt $push2=, $3, $7
+; NO-SIMD128-NEXT: f32.select $push3=, $3, $7, $pop2
+; NO-SIMD128-NEXT: f32.store 8($0), $pop3
+; NO-SIMD128-NEXT: f32.lt $push4=, $2, $6
+; NO-SIMD128-NEXT: f32.select $push5=, $2, $6, $pop4
+; NO-SIMD128-NEXT: f32.store 4($0), $pop5
+; NO-SIMD128-NEXT: f32.lt $push6=, $1, $5
+; NO-SIMD128-NEXT: f32.select $push7=, $1, $5, $pop6
+; NO-SIMD128-NEXT: f32.store 0($0), $pop7
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: minnum_intrinsic_v4f32:
; NO-SIMD128-FAST: .functype minnum_intrinsic_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
; NO-SIMD128-FAST-NEXT: # %bb.0:
-; NO-SIMD128-FAST-NEXT: call $push0=, fminf, $1, $5
-; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop0
-; NO-SIMD128-FAST-NEXT: call $push1=, fminf, $2, $6
-; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop1
-; NO-SIMD128-FAST-NEXT: call $push2=, fminf, $3, $7
-; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop2
-; NO-SIMD128-FAST-NEXT: call $push3=, fminf, $4, $8
-; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop3
+; NO-SIMD128-FAST-NEXT: f32.lt $push0=, $1, $5
+; NO-SIMD128-FAST-NEXT: f32.select $push1=, $1, $5, $pop0
+; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop1
+; NO-SIMD128-FAST-NEXT: f32.lt $push2=, $2, $6
+; NO-SIMD128-FAST-NEXT: f32.select $push3=, $2, $6, $pop2
+; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop3
+; NO-SIMD128-FAST-NEXT: f32.lt $push4=, $3, $7
+; NO-SIMD128-FAST-NEXT: f32.select $push5=, $3, $7, $pop4
+; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop5
+; NO-SIMD128-FAST-NEXT: f32.lt $push6=, $4, $8
+; NO-SIMD128-FAST-NEXT: f32.select $push7=, $4, $8, $pop6
+; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop7
; NO-SIMD128-FAST-NEXT: return
%a = call nnan <4 x float> @llvm.minnum.v4f32(<4 x float> %x, <4 x float> %y)
ret <4 x float> %a
@@ -11830,26 +11838,26 @@ define <4 x float> @minnum_nsz_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) {
; NO-SIMD128-LABEL: minnum_nsz_intrinsic_v4f32:
; NO-SIMD128: .functype minnum_nsz_intrinsic_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
-; NO-SIMD128-NEXT: call $push0=, fminf, $4, $8
+; NO-SIMD128-NEXT: f32.min $push0=, $4, $8
; NO-SIMD128-NEXT: f32.store 12($0), $pop0
-; NO-SIMD128-NEXT: call $push1=, fminf, $3, $7
+; NO-SIMD128-NEXT: f32.min $push1=, $3, $7
; NO-SIMD128-NEXT: f32.store 8($0), $pop1
-; NO-SIMD128-NEXT: call $push2=, fminf, $2, $6
+; NO-SIMD128-NEXT: f32.min $push2=, $2, $6
; NO-SIMD128-NEXT: f32.store 4($0), $pop2
-; NO-SIMD128-NEXT: call $push3=, fminf, $1, $5
+; NO-SIMD128-NEXT: f32.min $push3=, $1, $5
; NO-SIMD128-NEXT: f32.store 0($0), $pop3
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: minnum_nsz_intrinsic_v4f32:
; NO-SIMD128-FAST: .functype minnum_nsz_intrinsic_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
; NO-SIMD128-FAST-NEXT: # %bb.0:
-; NO-SIMD128-FAST-NEXT: call $push0=, fminf, $1, $5
+; NO-SIMD128-FAST-NEXT: f32.min $push0=, $1, $5
; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop0
-; NO-SIMD128-FAST-NEXT: call $push1=, fminf, $2, $6
+; NO-SIMD128-FAST-NEXT: f32.min $push1=, $2, $6
; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop1
-; NO-SIMD128-FAST-NEXT: call $push2=, fminf, $3, $7
+; NO-SIMD128-FAST-NEXT: f32.min $push2=, $3, $7
; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop2
-; NO-SIMD128-FAST-NEXT: call $push3=, fminf, $4, $8
+; NO-SIMD128-FAST-NEXT: f32.min $push3=, $4, $8
; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop3
; NO-SIMD128-FAST-NEXT: return
%a = call nnan nsz <4 x float> @llvm.minnum.v4f32(<4 x float> %x, <4 x float> %y)
@@ -11875,16 +11883,16 @@ define <4 x float> @fminnumv432_non_zero_intrinsic(<4 x float> %x) {
; NO-SIMD128: .functype fminnumv432_non_zero_intrinsic (i32, f32, f32, f32, f32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
; NO-SIMD128-NEXT: f32.const $push0=, -0x1p0
-; NO-SIMD128-NEXT: call $push1=, fminf, $4, $pop0
+; NO-SIMD128-NEXT: f32.min $push1=, $4, $pop0
; NO-SIMD128-NEXT: f32.store 12($0), $pop1
; NO-SIMD128-NEXT: f32.const $push7=, -0x1p0
-; NO-SIMD128-NEXT: call $push2=, fminf, $3, $pop7
+; NO-SIMD128-NEXT: f32.min $push2=, $3, $pop7
; NO-SIMD128-NEXT: f32.store 8($0), $pop2
; NO-SIMD128-NEXT: f32.const $push6=, -0x1p0
-; NO-SIMD128-NEXT: call $push3=, fminf, $2, $pop6
+; NO-SIMD128-NEXT: f32.min $push3=, $2, $pop6
; NO-SIMD128-NEXT: f32.store 4($0), $pop3
; NO-SIMD128-NEXT: f32.const $push5=, -0x1p0
-; NO-SIMD128-NEXT: call $push4=, fminf, $1, $pop5
+; NO-SIMD128-NEXT: f32.min $push4=, $1, $pop5
; NO-SIMD128-NEXT: f32.store 0($0), $pop4
; NO-SIMD128-NEXT: return
;
@@ -11892,16 +11900,16 @@ define <4 x float> @fminnumv432_non_zero_intrinsic(<4 x float> %x) {
; NO-SIMD128-FAST: .functype fminnumv432_non_zero_intrinsic (i32, f32, f32, f32, f32) -> ()
; NO-SIMD128-FAST-NEXT: # %bb.0:
; NO-SIMD128-FAST-NEXT: f32.const $push0=, -0x1p0
-; NO-SIMD128-FAST-NEXT: call $push1=, fminf, $1, $pop0
+; NO-SIMD128-FAST-NEXT: f32.min $push1=, $1, $pop0
; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop1
; NO-SIMD128-FAST-NEXT: f32.const $push7=, -0x1p0
-; NO-SIMD128-FAST-NEXT: call $push2=, fminf, $2, $pop7
+; NO-SIMD128-FAST-NEXT: f32.min $push2=, $2, $pop7
; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop2
; NO-SIMD128-FAST-NEXT: f32.const $push6=, -0x1p0
-; NO-SIMD128-FAST-NEXT: call $push3=, fminf, $3, $pop6
+; NO-SIMD128-FAST-NEXT: f32.min $push3=, $3, $pop6
; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop3
; NO-SIMD128-FAST-NEXT: f32.const $push5=, -0x1p0
-; NO-SIMD128-FAST-NEXT: call $push4=, fminf, $4, $pop5
+; NO-SIMD128-FAST-NEXT: f32.min $push4=, $4, $pop5
; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop4
; NO-SIMD128-FAST-NEXT: return
%a = call nnan <4 x float> @llvm.minnum.v4f32(<4 x float> %x, <4 x float><float -1.0, float -1.0, float -1.0, float -1.0>)
@@ -11979,34 +11987,38 @@ define <4 x float> @fminnumv432_one_zero_intrinsic(<4 x float> %x) {
; NO-SIMD128: .functype fminnumv432_one_zero_intrinsic (i32, f32, f32, f32, f32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
; NO-SIMD128-NEXT: f32.const $push0=, -0x1p0
-; NO-SIMD128-NEXT: call $push1=, fminf, $4, $pop0
+; NO-SIMD128-NEXT: f32.min $push1=, $4, $pop0
; NO-SIMD128-NEXT: f32.store 12($0), $pop1
-; NO-SIMD128-NEXT: f32.const $push7=, -0x1p0
-; NO-SIMD128-NEXT: call $push2=, fminf, $3, $pop7
+; NO-SIMD128-NEXT: f32.const $push9=, -0x1p0
+; NO-SIMD128-NEXT: f32.min $push2=, $3, $pop9
; NO-SIMD128-NEXT: f32.store 8($0), $pop2
; NO-SIMD128-NEXT: f32.const $push3=, 0x0p0
-; NO-SIMD128-NEXT: call $push4=, fminf, $2, $pop3
-; NO-SIMD128-NEXT: f32.store 4($0), $pop4
-; NO-SIMD128-NEXT: f32.const $push6=, -0x1p0
-; NO-SIMD128-NEXT: call $push5=, fminf, $1, $pop6
-; NO-SIMD128-NEXT: f32.store 0($0), $pop5
+; NO-SIMD128-NEXT: f32.const $push8=, 0x0p0
+; NO-SIMD128-NEXT: f32.lt $push4=, $2, $pop8
+; NO-SIMD128-NEXT: f32.select $push5=, $2, $pop3, $pop4
+; NO-SIMD128-NEXT: f32.store 4($0), $pop5
+; NO-SIMD128-NEXT: f32.const $push7=, -0x1p0
+; NO-SIMD128-NEXT: f32.min $push6=, $1, $pop7
+; NO-SIMD128-NEXT: f32.store 0($0), $pop6
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: fminnumv432_one_zero_intrinsic:
; NO-SIMD128-FAST: .functype fminnumv432_one_zero_intrinsic (i32, f32, f32, f32, f32) -> ()
; NO-SIMD128-FAST-NEXT: # %bb.0:
; NO-SIMD128-FAST-NEXT: f32.const $push0=, -0x1p0
-; NO-SIMD128-FAST-NEXT: call $push1=, fminf, $1, $pop0
+; NO-SIMD128-FAST-NEXT: f32.min $push1=, $1, $pop0
; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop1
-; NO-SIMD128-FAST-NEXT: f32.const $push2=, 0x0p0
-; NO-SIMD128-FAST-NEXT: call $push3=, fminf, $2, $pop2
-; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop3
+; NO-SIMD128-FAST-NEXT: f32.const $push9=, -0x1p0
+; NO-SIMD128-FAST-NEXT: f32.min $push2=, $3, $pop9
+; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop2
+; NO-SIMD128-FAST-NEXT: f32.const $push3=, 0x0p0
+; NO-SIMD128-FAST-NEXT: f32.const $push8=, 0x0p0
+; NO-SIMD128-FAST-NEXT: f32.lt $push4=, $2, $pop8
+; NO-SIMD128-FAST-NEXT: f32.select $push5=, $2, $pop3, $pop4
+; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop5
; NO-SIMD128-FAST-NEXT: f32.const $push7=, -0x1p0
-; NO-SIMD128-FAST-NEXT: call $push4=, fminf, $3, $pop7
-; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop4
-; NO-SIMD128-FAST-NEXT: f32.const $push6=, -0x1p0
-; NO-SIMD128-FAST-NEXT: call $push5=, fminf, $4, $pop6
-; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop5
+; NO-SIMD128-FAST-NEXT: f32.min $push6=, $4, $pop7
+; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop6
; NO-SIMD128-FAST-NEXT: return
%a = call nnan <4 x float> @llvm.minnum.v4f32(<4 x float> %x, <4 x float><float -1.0, float 0.0, float -1.0, float -1.0>)
ret <4 x float> %a
@@ -12126,27 +12138,35 @@ define <4 x float> @maxnum_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) {
; NO-SIMD128-LABEL: maxnum_intrinsic_v4f32:
; NO-SIMD128: .functype maxnum_intrinsic_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
-; NO-SIMD128-NEXT: call $push0=, fmaxf, $4, $8
-; NO-SIMD128-NEXT: f32.store 12($0), $pop0
-; NO-SIMD128-NEXT: call $push1=, fmaxf, $3, $7
-; NO-SIMD128-NEXT: f32.store 8($0), $pop1
-; NO-SIMD128-NEXT: call $push2=, fmaxf, $2, $6
-; NO-SIMD128-NEXT: f32.store 4($0), $pop2
-; NO-SIMD128-NEXT: call $push3=, fmaxf, $1, $5
-; NO-SIMD128-NEXT: f32.store 0($0), $pop3
+; NO-SIMD128-NEXT: f32.gt $push0=, $4, $8
+; NO-SIMD128-NEXT: f32.select $push1=, $4, $8, $pop0
+; NO-SIMD128-NEXT: f32.store 12($0), $pop1
+; NO-SIMD128-NEXT: f32.gt $push2=, $3, $7
+; NO-SIMD128-NEXT: f32.select $push3=, $3, $7, $pop2
+; NO-SIMD128-NEXT: f32.store 8($0), $pop3
+; NO-SIMD128-NEXT: f32.gt $push4=, $2, $6
+; NO-SIMD128-NEXT: f32.select $push5=, $2, $6, $pop4
+; NO-SIMD128-NEXT: f32.store 4($0), $pop5
+; NO-SIMD128-NEXT: f32.gt $push6=, $1, $5
+; NO-SIMD128-NEXT: f32.select $push7=, $1, $5, $pop6
+; NO-SIMD128-NEXT: f32.store 0($0), $pop7
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: maxnum_intrinsic_v4f32:
; NO-SIMD128-FAST: .functype maxnum_intrinsic_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
; NO-SIMD128-FAST-NEXT: # %bb.0:
-; NO-SIMD128-FAST-NEXT: call $push0=, fmaxf, $1, $5
-; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop0
-; NO-SIMD128-FAST-NEXT: call $push1=, fmaxf, $2, $6
-; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop1
-; NO-SIMD128-FAST-NEXT: call $push2=, fmaxf, $3, $7
-; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop2
-; NO-SIMD128-FAST-NEXT: call $push3=, fmaxf, $4, $8
-; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop3
+; NO-SIMD128-FAST-NEXT: f32.gt $push0=, $1, $5
+; NO-SIMD128-FAST-NEXT: f32.select $push1=, $1, $5, $pop0
+; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop1
+; NO-SIMD128-FAST-NEXT: f32.gt $push2=, $2, $6
+; NO-SIMD128-FAST-NEXT: f32.select $push3=, $2, $6, $pop2
+; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop3
+; NO-SIMD128-FAST-NEXT: f32.gt $push4=, $3, $7
+; NO-SIMD128-FAST-NEXT: f32.select $push5=, $3, $7, $pop4
+; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop5
+; NO-SIMD128-FAST-NEXT: f32.gt $push6=, $4, $8
+; NO-SIMD128-FAST-NEXT: f32.select $push7=, $4, $8, $pop6
+; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop7
; NO-SIMD128-FAST-NEXT: return
%a = call nnan <4 x float> @llvm.maxnum.v4f32(<4 x float> %x, <4 x float> %y)
ret <4 x float> %a
@@ -12168,26 +12188,26 @@ define <4 x float> @maxnum_nsz_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) {
; NO-SIMD128-LABEL: maxnum_nsz_intrinsic_v4f32:
; NO-SIMD128: .functype maxnum_nsz_intrinsic_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
-; NO-SIMD128-NEXT: call $push0=, fmaxf, $4, $8
+; NO-SIMD128-NEXT: f32.max $push0=, $4, $8
; NO-SIMD128-NEXT: f32.store 12($0), $pop0
-; NO-SIMD128-NEXT: call $push1=, fmaxf, $3, $7
+; NO-SIMD128-NEXT: f32.max $push1=, $3, $7
; NO-SIMD128-NEXT: f32.store 8($0), $pop1
-; NO-SIMD128-NEXT: call $push2=, fmaxf, $2, $6
+; NO-SIMD128-NEXT: f32.max $push2=, $2, $6
; NO-SIMD128-NEXT: f32.store 4($0), $pop2
-; NO-SIMD128-NEXT: call $push3=, fmaxf, $1, $5
+; NO-SIMD128-NEXT: f32.max $push3=, $1, $5
; NO-SIMD128-NEXT: f32.store 0($0), $pop3
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: maxnum_nsz_intrinsic_v4f32:
; NO-SIMD128-FAST: .functype maxnum_nsz_intrinsic_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
; NO-SIMD128-FAST-NEXT: # %bb.0:
-; NO-SIMD128-FAST-NEXT: call $push0=, fmaxf, $1, $5
+; NO-SIMD128-FAST-NEXT: f32.max $push0=, $1, $5
; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop0
-; NO-SIMD128-FAST-NEXT: call $push1=, fmaxf, $2, $6
+; NO-SIMD128-FAST-NEXT: f32.max $push1=, $2, $6
; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop1
-; NO-SIMD128-FAST-NEXT: call $push2=, fmaxf, $3, $7
+; NO-SIMD128-FAST-NEXT: f32.max $push2=, $3, $7
; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop2
-; NO-SIMD128-FAST-NEXT: call $push3=, fmaxf, $4, $8
+; NO-SIMD128-FAST-NEXT: f32.max $push3=, $4, $8
; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop3
; NO-SIMD128-FAST-NEXT: return
%a = call nnan nsz <4 x float> @llvm.maxnum.v4f32(<4 x float> %x, <4 x float> %y)
@@ -12265,34 +12285,38 @@ define <4 x float> @maxnum_one_zero_intrinsic_v4f32(<4 x float> %x, <4 x float>
; NO-SIMD128: .functype maxnum_one_zero_intrinsic_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
; NO-SIMD128-NEXT: f32.const $push0=, -0x1p0
-; NO-SIMD128-NEXT: call $push1=, fmaxf, $4, $pop0
+; NO-SIMD128-NEXT: f32.max $push1=, $4, $pop0
; NO-SIMD128-NEXT: f32.store 12($0), $pop1
-; NO-SIMD128-NEXT: f32.const $push7=, -0x1p0
-; NO-SIMD128-NEXT: call $push2=, fmaxf, $3, $pop7
+; NO-SIMD128-NEXT: f32.const $push9=, -0x1p0
+; NO-SIMD128-NEXT: f32.max $push2=, $3, $pop9
; NO-SIMD128-NEXT: f32.store 8($0), $pop2
; NO-SIMD128-NEXT: f32.const $push3=, 0x0p0
-; NO-SIMD128-NEXT: call $push4=, fmaxf, $2, $pop3
-; NO-SIMD128-NEXT: f32.store 4($0), $pop4
-; NO-SIMD128-NEXT: f32.const $push6=, -0x1p0
-; NO-SIMD128-NEXT: call $push5=, fmaxf, $1, $pop6
-; NO-SIMD128-NEXT: f32.store 0($0), $pop5
+; NO-SIMD128-NEXT: f32.const $push8=, 0x0p0
+; NO-SIMD128-NEXT: f32.gt $push4=, $2, $pop8
+; NO-SIMD128-NEXT: f32.select $push5=, $2, $pop3, $pop4
+; NO-SIMD128-NEXT: f32.store 4($0), $pop5
+; NO-SIMD128-NEXT: f32.const $push7=, -0x1p0
+; NO-SIMD128-NEXT: f32.max $push6=, $1, $pop7
+; NO-SIMD128-NEXT: f32.store 0($0), $pop6
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: maxnum_one_zero_intrinsic_v4f32:
; NO-SIMD128-FAST: .functype maxnum_one_zero_intrinsic_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
; NO-SIMD128-FAST-NEXT: # %bb.0:
; NO-SIMD128-FAST-NEXT: f32.const $push0=, -0x1p0
-; NO-SIMD128-FAST-NEXT: call $push1=, fmaxf, $1, $pop0
+; NO-SIMD128-FAST-NEXT: f32.max $push1=, $1, $pop0
; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop1
-; NO-SIMD128-FAST-NEXT: f32.const $push2=, 0x0p0
-; NO-SIMD128-FAST-NEXT: call $push3=, fmaxf, $2, $pop2
-; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop3
+; NO-SIMD128-FAST-NEXT: f32.const $push9=, -0x1p0
+; NO-SIMD128-FAST-NEXT: f32.max $push2=, $3, $pop9
+; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop2
+; NO-SIMD128-FAST-NEXT: f32.const $push3=, 0x0p0
+; NO-SIMD128-FAST-NEXT: f32.const $push8=, 0x0p0
+; NO-SIMD128-FAST-NEXT: f32.gt $push4=, $2, $pop8
+; NO-SIMD128-FAST-NEXT: f32.select $push5=, $2, $pop3, $pop4
+; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop5
; NO-SIMD128-FAST-NEXT: f32.const $push7=, -0x1p0
-; NO-SIMD128-FAST-NEXT: call $push4=, fmaxf, $3, $pop7
-; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop4
-; NO-SIMD128-FAST-NEXT: f32.const $push6=, -0x1p0
-; NO-SIMD128-FAST-NEXT: call $push5=, fmaxf, $4, $pop6
-; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop5
+; NO-SIMD128-FAST-NEXT: f32.max $push6=, $4, $pop7
+; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop6
; NO-SIMD128-FAST-NEXT: return
%a = call nnan <4 x float> @llvm.maxnum.v4f32(<4 x float> %x, <4 x float><float -1.0, float 0.0, float -1.0, float -1.0>)
ret <4 x float> %a
@@ -12317,16 +12341,16 @@ define <4 x float> @maxnum_non_zero_intrinsic_v4f32(<4 x float> %x, <4 x float>
; NO-SIMD128: .functype maxnum_non_zero_intrinsic_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
; NO-SIMD128-NEXT: f32.const $push0=, -0x1p0
-; NO-SIMD128-NEXT: call $push1=, fmaxf, $4, $pop0
+; NO-SIMD128-NEXT: f32.max $push1=, $4, $pop0
; NO-SIMD128-NEXT: f32.store 12($0), $pop1
; NO-SIMD128-NEXT: f32.const $push7=, -0x1p0
-; NO-SIMD128-NEXT: call $push2=, fmaxf, $3, $pop7
+; NO-SIMD128-NEXT: f32.max $push2=, $3, $pop7
; NO-SIMD128-NEXT: f32.store 8($0), $pop2
; NO-SIMD128-NEXT: f32.const $push3=, 0x1p0
-; NO-SIMD128-NEXT: call $push4=, fmaxf, $2, $pop3
+; NO-SIMD128-NEXT: f32.max $push4=, $2, $pop3
; NO-SIMD128-NEXT: f32.store 4($0), $pop4
; NO-SIMD128-NEXT: f32.const $push6=, -0x1p0
-; NO-SIMD128-NEXT: call $push5=, fmaxf, $1, $pop6
+; NO-SIMD128-NEXT: f32.max $push5=, $1, $pop6
; NO-SIMD128-NEXT: f32.store 0($0), $pop5
; NO-SIMD128-NEXT: return
;
@@ -12334,16 +12358,16 @@ define <4 x float> @maxnum_non_zero_intrinsic_v4f32(<4 x float> %x, <4 x float>
; NO-SIMD128-FAST: .functype maxnum_non_zero_intrinsic_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
; NO-SIMD128-FAST-NEXT: # %bb.0:
; NO-SIMD128-FAST-NEXT: f32.const $push0=, -0x1p0
-; NO-SIMD128-FAST-NEXT: call $push1=, fmaxf, $1, $pop0
+; NO-SIMD128-FAST-NEXT: f32.max $push1=, $1, $pop0
; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop1
; NO-SIMD128-FAST-NEXT: f32.const $push2=, 0x1p0
-; NO-SIMD128-FAST-NEXT: call $push3=, fmaxf, $2, $pop2
+; NO-SIMD128-FAST-NEXT: f32.max $push3=, $2, $pop2
; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop3
; NO-SIMD128-FAST-NEXT: f32.const $push7=, -0x1p0
-; NO-SIMD128-FAST-NEXT: call $push4=, fmaxf, $3, $pop7
+; NO-SIMD128-FAST-NEXT: f32.max $push4=, $3, $pop7
; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop4
; NO-SIMD128-FAST-NEXT: f32.const $push6=, -0x1p0
-; NO-SIMD128-FAST-NEXT: call $push5=, fmaxf, $4, $pop6
+; NO-SIMD128-FAST-NEXT: f32.max $push5=, $4, $pop6
; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop5
; NO-SIMD128-FAST-NEXT: return
%a = call nnan <4 x float> @llvm.maxnum.v4f32(<4 x float> %x, <4 x float><float -1.0, float 1.0, float -1.0, float -1.0>)
diff --git a/llvm/test/CodeGen/WebAssembly/vararg-frame.ll b/llvm/test/CodeGen/WebAssembly/vararg-frame.ll
new file mode 100644
index 0000000..5c76040
--- /dev/null
+++ b/llvm/test/CodeGen/WebAssembly/vararg-frame.ll
@@ -0,0 +1,526 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -verify-machineinstrs | FileCheck %s
+; REQUIRES: webassembly-registered-target
+target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128-ni:1:10:20"
+target triple = "wasm32-unknown-unknown"
+
+; Function Attrs: nounwind
+define void @pass_s0() {
+; CHECK-LABEL: pass_s0:
+; CHECK: .functype pass_s0 () -> ()
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: i32.const 0
+; CHECK-NEXT: call sink
+; CHECK-NEXT: # fallthrough-return
+entry:
+ tail call void (...) @sink()
+ ret void
+}
+
+declare void @sink(...)
+
+; Function Attrs: nounwind
+define void @pass_s1(i8 %x.coerce) {
+; CHECK-LABEL: pass_s1:
+; CHECK: .functype pass_s1 (i32) -> ()
+; CHECK-NEXT: .local i32
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: global.get __stack_pointer
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.sub
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: call sink
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: # fallthrough-return
+entry:
+ tail call void (...) @sink(i8 %x.coerce)
+ ret void
+}
+
+; Function Attrs: nounwind
+define void @pass_s2(i16 %x.coerce) {
+; CHECK-LABEL: pass_s2:
+; CHECK: .functype pass_s2 (i32) -> ()
+; CHECK-NEXT: .local i32
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: global.get __stack_pointer
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.sub
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: call sink
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: # fallthrough-return
+entry:
+ tail call void (...) @sink(i16 %x.coerce)
+ ret void
+}
+
+; Function Attrs: nounwind
+define void @pass_s3(i32 %x.coerce) {
+; CHECK-LABEL: pass_s3:
+; CHECK: .functype pass_s3 (i32) -> ()
+; CHECK-NEXT: .local i32
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: global.get __stack_pointer
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.sub
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: call sink
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: # fallthrough-return
+entry:
+ tail call void (...) @sink(i32 %x.coerce)
+ ret void
+}
+
+; Function Attrs: nounwind
+define void @pass_s4(i64 %x.coerce) {
+; CHECK-LABEL: pass_s4:
+; CHECK: .functype pass_s4 (i64) -> ()
+; CHECK-NEXT: .local i32
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: global.get __stack_pointer
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.sub
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i64.store 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: call sink
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: # fallthrough-return
+entry:
+ tail call void (...) @sink(i64 %x.coerce)
+ ret void
+}
+
+; Function Attrs: nounwind
+define void @pass_s5(<4 x i32> noundef %x) {
+; CHECK-LABEL: pass_s5:
+; CHECK: .functype pass_s5 (i32, i32, i32, i32) -> ()
+; CHECK-NEXT: .local i32
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: global.get __stack_pointer
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.sub
+; CHECK-NEXT: local.tee 4
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i32.store 12
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i32.store 8
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.store 4
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: call sink
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: # fallthrough-return
+entry:
+ tail call void (...) @sink(<4 x i32> noundef %x)
+ ret void
+}
+
+; Function Attrs: nounwind
+define void @pass_int_s0(i32 noundef %i) {
+; CHECK-LABEL: pass_int_s0:
+; CHECK: .functype pass_int_s0 (i32) -> ()
+; CHECK-NEXT: .local i32
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: global.get __stack_pointer
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.sub
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: call sink
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: # fallthrough-return
+entry:
+ tail call void (...) @sink(i32 noundef %i)
+ ret void
+}
+
+; Function Attrs: nounwind
+define void @pass_int_s1(i32 noundef %i, i8 %x.coerce) {
+; CHECK-LABEL: pass_int_s1:
+; CHECK: .functype pass_int_s1 (i32, i32) -> ()
+; CHECK-NEXT: .local i32
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: global.get __stack_pointer
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.sub
+; CHECK-NEXT: local.tee 2
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.store 4
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: call sink
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: # fallthrough-return
+entry:
+ tail call void (...) @sink(i32 noundef %i, i8 %x.coerce)
+ ret void
+}
+
+; Function Attrs: nounwind
+define void @pass_int_s2(i32 noundef %i, i16 %x.coerce) {
+; CHECK-LABEL: pass_int_s2:
+; CHECK: .functype pass_int_s2 (i32, i32) -> ()
+; CHECK-NEXT: .local i32
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: global.get __stack_pointer
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.sub
+; CHECK-NEXT: local.tee 2
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.store 4
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: call sink
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: # fallthrough-return
+entry:
+ tail call void (...) @sink(i32 noundef %i, i16 %x.coerce)
+ ret void
+}
+
+; Function Attrs: nounwind
+define void @pass_int_s3(i32 noundef %i, i32 %x.coerce) {
+; CHECK-LABEL: pass_int_s3:
+; CHECK: .functype pass_int_s3 (i32, i32) -> ()
+; CHECK-NEXT: .local i32
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: global.get __stack_pointer
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.sub
+; CHECK-NEXT: local.tee 2
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.store 4
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: call sink
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: # fallthrough-return
+entry:
+ tail call void (...) @sink(i32 noundef %i, i32 %x.coerce)
+ ret void
+}
+
+; Function Attrs: nounwind
+define void @pass_int_s4(i32 noundef %i, i64 %x.coerce) {
+; CHECK-LABEL: pass_int_s4:
+; CHECK: .functype pass_int_s4 (i32, i64) -> ()
+; CHECK-NEXT: .local i32
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: global.get __stack_pointer
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.sub
+; CHECK-NEXT: local.tee 2
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.store 8
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: call sink
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: # fallthrough-return
+entry:
+ tail call void (...) @sink(i32 noundef %i, i64 %x.coerce)
+ ret void
+}
+
+; Function Attrs: nounwind
+define void @pass_int_s5(i32 noundef %i, <4 x i32> noundef %x) {
+; CHECK-LABEL: pass_int_s5:
+; CHECK: .functype pass_int_s5 (i32, i32, i32, i32, i32) -> ()
+; CHECK-NEXT: .local i32
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: global.get __stack_pointer
+; CHECK-NEXT: i32.const 32
+; CHECK-NEXT: i32.sub
+; CHECK-NEXT: local.tee 5
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i32.const 28
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i32.const 24
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i32.const 20
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: call sink
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i32.const 32
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: # fallthrough-return
+entry:
+ tail call void (...) @sink(i32 noundef %i, <4 x i32> noundef %x)
+ ret void
+}
+
+; Function Attrs: nounwind
+define void @pass_asc(i8 %x1.coerce, i16 %x2.coerce, i32 %x3.coerce, i64 %x4.coerce, <4 x i32> noundef %x5) {
+; CHECK-LABEL: pass_asc:
+; CHECK: .functype pass_asc (i32, i32, i32, i64, i32, i32, i32, i32) -> ()
+; CHECK-NEXT: .local i32
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: global.get __stack_pointer
+; CHECK-NEXT: i32.const 48
+; CHECK-NEXT: i32.sub
+; CHECK-NEXT: local.tee 8
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 8
+; CHECK-NEXT: i32.const 44
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: local.get 7
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 8
+; CHECK-NEXT: i32.const 40
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: local.get 6
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 8
+; CHECK-NEXT: i32.const 36
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 8
+; CHECK-NEXT: i32.const 32
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 8
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i64.store 0
+; CHECK-NEXT: local.get 8
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i32.store 8
+; CHECK-NEXT: local.get 8
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.store 4
+; CHECK-NEXT: local.get 8
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 8
+; CHECK-NEXT: call sink
+; CHECK-NEXT: local.get 8
+; CHECK-NEXT: i32.const 48
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: # fallthrough-return
+entry:
+ tail call void (...) @sink(i8 %x1.coerce, i16 %x2.coerce, i32 %x3.coerce, i64 %x4.coerce, <4 x i32> noundef %x5)
+ ret void
+}
+
+; Function Attrs: nounwind
+define void @pass_dsc(<4 x i32> noundef %x0, i64 %x1.coerce, i32 %x2.coerce, i16 %x3.coerce, i8 %x4.coerce) {
+; CHECK-LABEL: pass_dsc:
+; CHECK: .functype pass_dsc (i32, i32, i32, i32, i64, i32, i32, i32) -> ()
+; CHECK-NEXT: .local i32
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: global.get __stack_pointer
+; CHECK-NEXT: i32.const 48
+; CHECK-NEXT: i32.sub
+; CHECK-NEXT: local.tee 8
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 8
+; CHECK-NEXT: i32.const 32
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: local.get 7
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 8
+; CHECK-NEXT: i32.const 28
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: local.get 6
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 8
+; CHECK-NEXT: i32.const 24
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 8
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i64.store 0
+; CHECK-NEXT: local.get 8
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i32.store 12
+; CHECK-NEXT: local.get 8
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i32.store 8
+; CHECK-NEXT: local.get 8
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.store 4
+; CHECK-NEXT: local.get 8
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 8
+; CHECK-NEXT: call sink
+; CHECK-NEXT: local.get 8
+; CHECK-NEXT: i32.const 48
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: # fallthrough-return
+entry:
+ tail call void (...) @sink(<4 x i32> noundef %x0, i64 %x1.coerce, i32 %x2.coerce, i16 %x3.coerce, i8 %x4.coerce)
+ ret void
+}
+
+; Function Attrs: nounwind
+define void @pass_multiple(i32 noundef %i, i8 %x1.coerce, i16 %x2.coerce, i32 %x3.coerce, i64 %x4.coerce, <4 x i32> noundef %x5) {
+; CHECK-LABEL: pass_multiple:
+; CHECK: .functype pass_multiple (i32, i32, i32, i32, i64, i32, i32, i32, i32) -> ()
+; CHECK-NEXT: .local i32
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: global.get __stack_pointer
+; CHECK-NEXT: i32.const 48
+; CHECK-NEXT: i32.sub
+; CHECK-NEXT: local.tee 9
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 9
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i64.store 40
+; CHECK-NEXT: local.get 9
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i32.store 36
+; CHECK-NEXT: local.get 9
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32.store 32
+; CHECK-NEXT: local.get 9
+; CHECK-NEXT: i32.const 32
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: call sink
+; CHECK-NEXT: local.get 9
+; CHECK-NEXT: i32.const 28
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: local.get 8
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 9
+; CHECK-NEXT: i32.const 24
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: local.get 7
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 9
+; CHECK-NEXT: i32.const 20
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: local.get 6
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 9
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 9
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i32.store 8
+; CHECK-NEXT: local.get 9
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.store 4
+; CHECK-NEXT: local.get 9
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 9
+; CHECK-NEXT: call sink
+; CHECK-NEXT: local.get 9
+; CHECK-NEXT: i32.const 48
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: # fallthrough-return
+entry:
+ tail call void (...) @sink(i32 noundef %i, i16 %x2.coerce, i64 %x4.coerce)
+ tail call void (...) @sink(i32 noundef %i, i8 %x1.coerce, i32 %x3.coerce, <4 x i32> noundef %x5)
+ ret void
+}
+
diff --git a/llvm/test/CodeGen/X86/avx512fp16-fminimum-fmaximum.ll b/llvm/test/CodeGen/X86/avx512fp16-fminimum-fmaximum.ll
index 1782e52..55b86ca 100644
--- a/llvm/test/CodeGen/X86/avx512fp16-fminimum-fmaximum.ll
+++ b/llvm/test/CodeGen/X86/avx512fp16-fminimum-fmaximum.ll
@@ -28,35 +28,17 @@ define half @test_fminimum(half %x, half %y) {
define <8 x half> @test_fminimum_scalarize(<8 x half> %x, <8 x half> %y) "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" {
; CHECK-LABEL: test_fminimum_scalarize:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpsrldq {{.*#+}} xmm2 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; CHECK-NEXT: vpsrldq {{.*#+}} xmm3 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; CHECK-NEXT: vminsh %xmm2, %xmm3, %xmm2
-; CHECK-NEXT: vshufps {{.*#+}} xmm3 = xmm1[3,3,3,3]
-; CHECK-NEXT: vshufps {{.*#+}} xmm4 = xmm0[3,3,3,3]
-; CHECK-NEXT: vminsh %xmm3, %xmm4, %xmm3
-; CHECK-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
-; CHECK-NEXT: vpsrldq {{.*#+}} xmm3 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; CHECK-NEXT: vpsrldq {{.*#+}} xmm4 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; CHECK-NEXT: vminsh %xmm3, %xmm4, %xmm3
-; CHECK-NEXT: vshufpd {{.*#+}} xmm4 = xmm1[1,0]
-; CHECK-NEXT: vshufpd {{.*#+}} xmm5 = xmm0[1,0]
-; CHECK-NEXT: vminsh %xmm4, %xmm5, %xmm4
-; CHECK-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
-; CHECK-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
-; CHECK-NEXT: vpsrlq $48, %xmm1, %xmm3
-; CHECK-NEXT: vpsrlq $48, %xmm0, %xmm4
-; CHECK-NEXT: vminsh %xmm3, %xmm4, %xmm3
-; CHECK-NEXT: vmovshdup {{.*#+}} xmm4 = xmm1[1,1,3,3]
-; CHECK-NEXT: vmovshdup {{.*#+}} xmm5 = xmm0[1,1,3,3]
-; CHECK-NEXT: vminsh %xmm4, %xmm5, %xmm4
-; CHECK-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
-; CHECK-NEXT: vminsh %xmm1, %xmm0, %xmm4
-; CHECK-NEXT: vpsrld $16, %xmm1, %xmm1
-; CHECK-NEXT: vpsrld $16, %xmm0, %xmm0
-; CHECK-NEXT: vminsh %xmm1, %xmm0, %xmm0
-; CHECK-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3]
-; CHECK-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
-; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; CHECK-NEXT: vcmpltph %xmm1, %xmm0, %k1
+; CHECK-NEXT: vpblendmw %xmm0, %xmm1, %xmm2 {%k1}
+; CHECK-NEXT: vpbroadcastw {{.*#+}} xmm3 = [32768,32768,32768,32768,32768,32768,32768,32768]
+; CHECK-NEXT: vpcmpeqw %xmm3, %xmm0, %k1
+; CHECK-NEXT: vpblendmw %xmm0, %xmm2, %xmm0 {%k1}
+; CHECK-NEXT: vpcmpeqw %xmm3, %xmm1, %k1
+; CHECK-NEXT: vmovdqu16 %xmm1, %xmm0 {%k1}
+; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vcmpeqph %xmm1, %xmm2, %k1
+; CHECK-NEXT: vmovdqu16 %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: vmovdqa %xmm2, %xmm0
; CHECK-NEXT: retq
%r = call <8 x half> @llvm.minimum.v8f16(<8 x half> %x, <8 x half> %y)
ret <8 x half> %r
@@ -134,35 +116,16 @@ define half @test_fmaximum(half %x, half %y) {
define <8 x half> @test_fmaximum_scalarize(<8 x half> %x, <8 x half> %y) "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" {
; CHECK-LABEL: test_fmaximum_scalarize:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpsrldq {{.*#+}} xmm2 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; CHECK-NEXT: vpsrldq {{.*#+}} xmm3 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; CHECK-NEXT: vmaxsh %xmm2, %xmm3, %xmm2
-; CHECK-NEXT: vshufps {{.*#+}} xmm3 = xmm1[3,3,3,3]
-; CHECK-NEXT: vshufps {{.*#+}} xmm4 = xmm0[3,3,3,3]
-; CHECK-NEXT: vmaxsh %xmm3, %xmm4, %xmm3
-; CHECK-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
-; CHECK-NEXT: vpsrldq {{.*#+}} xmm3 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; CHECK-NEXT: vpsrldq {{.*#+}} xmm4 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; CHECK-NEXT: vmaxsh %xmm3, %xmm4, %xmm3
-; CHECK-NEXT: vshufpd {{.*#+}} xmm4 = xmm1[1,0]
-; CHECK-NEXT: vshufpd {{.*#+}} xmm5 = xmm0[1,0]
-; CHECK-NEXT: vmaxsh %xmm4, %xmm5, %xmm4
-; CHECK-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
-; CHECK-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
-; CHECK-NEXT: vpsrlq $48, %xmm1, %xmm3
-; CHECK-NEXT: vpsrlq $48, %xmm0, %xmm4
-; CHECK-NEXT: vmaxsh %xmm3, %xmm4, %xmm3
-; CHECK-NEXT: vmovshdup {{.*#+}} xmm4 = xmm1[1,1,3,3]
-; CHECK-NEXT: vmovshdup {{.*#+}} xmm5 = xmm0[1,1,3,3]
-; CHECK-NEXT: vmaxsh %xmm4, %xmm5, %xmm4
-; CHECK-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
-; CHECK-NEXT: vmaxsh %xmm1, %xmm0, %xmm4
-; CHECK-NEXT: vpsrld $16, %xmm1, %xmm1
-; CHECK-NEXT: vpsrld $16, %xmm0, %xmm0
-; CHECK-NEXT: vmaxsh %xmm1, %xmm0, %xmm0
-; CHECK-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3]
-; CHECK-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
-; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; CHECK-NEXT: vcmpltph %xmm0, %xmm1, %k1
+; CHECK-NEXT: vpblendmw %xmm0, %xmm1, %xmm2 {%k1}
+; CHECK-NEXT: vptestnmw %xmm0, %xmm0, %k1
+; CHECK-NEXT: vpblendmw %xmm0, %xmm2, %xmm0 {%k1}
+; CHECK-NEXT: vptestnmw %xmm1, %xmm1, %k1
+; CHECK-NEXT: vmovdqu16 %xmm1, %xmm0 {%k1}
+; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vcmpeqph %xmm1, %xmm2, %k1
+; CHECK-NEXT: vmovdqu16 %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: vmovdqa %xmm2, %xmm0
; CHECK-NEXT: retq
%r = call <8 x half> @llvm.maximum.v8f16(<8 x half> %x, <8 x half> %y)
ret <8 x half> %r
diff --git a/llvm/test/CodeGen/X86/llvm.tan.ll b/llvm/test/CodeGen/X86/llvm.tan.ll
new file mode 100644
index 0000000..24b3003
--- /dev/null
+++ b/llvm/test/CodeGen/X86/llvm.tan.ll
@@ -0,0 +1,70 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
+
+define half @use_tanf16(half %a) nounwind {
+; CHECK-LABEL: use_tanf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: callq __extendhfsf2@PLT
+; CHECK-NEXT: callq tanf@PLT
+; CHECK-NEXT: callq __truncsfhf2@PLT
+; CHECK-NEXT: popq %rax
+; CHECK-NEXT: retq
+ %x = call half @llvm.tan.f16(half %a)
+ ret half %x
+}
+
+define float @use_tanf32(float %a) nounwind {
+; CHECK-LABEL: use_tanf32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: jmp tanf@PLT # TAILCALL
+ %x = call float @llvm.tan.f32(float %a)
+ ret float %x
+}
+
+define double @use_tanf64(double %a) nounwind {
+; CHECK-LABEL: use_tanf64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: jmp tan@PLT # TAILCALL
+ %x = call double @llvm.tan.f64(double %a)
+ ret double %x
+}
+
+define x86_fp80 @use_tanf80(x86_fp80 %a) nounwind {
+; CHECK-LABEL: use_tanf80:
+; CHECK: # %bb.0:
+; CHECK-NEXT: subq $24, %rsp
+; CHECK-NEXT: fldt 32(%rsp)
+; CHECK-NEXT: fstpt (%rsp)
+; CHECK-NEXT: callq tanl@PLT
+; CHECK-NEXT: addq $24, %rsp
+; CHECK-NEXT: retq
+ %x = call x86_fp80 @llvm.tan.f80(x86_fp80 %a)
+ ret x86_fp80 %x
+}
+
+define fp128 @use_tanfp128(fp128 %a) nounwind {
+; CHECK-LABEL: use_tanfp128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: jmp tanf128@PLT # TAILCALL
+ %x = call fp128 @llvm.tan.f128(fp128 %a)
+ ret fp128 %x
+}
+
+define ppc_fp128 @use_tanppc_fp128(ppc_fp128 %a) nounwind {
+; CHECK-LABEL: use_tanppc_fp128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: callq tanl@PLT
+; CHECK-NEXT: popq %rax
+; CHECK-NEXT: retq
+ %x = call ppc_fp128 @llvm.tan.ppcf128(ppc_fp128 %a)
+ ret ppc_fp128 %x
+}
+
+declare half @llvm.tan.f16(half)
+declare float @llvm.tan.f32(float)
+declare double @llvm.tan.f64(double)
+declare x86_fp80 @llvm.tan.f80(x86_fp80)
+declare fp128 @llvm.tan.f128(fp128)
+declare ppc_fp128 @llvm.tan.ppcf128(ppc_fp128)
diff --git a/llvm/test/CodeGen/X86/vec-libcalls.ll b/llvm/test/CodeGen/X86/vec-libcalls.ll
index 3a13154..6857101 100644
--- a/llvm/test/CodeGen/X86/vec-libcalls.ll
+++ b/llvm/test/CodeGen/X86/vec-libcalls.ll
@@ -17,6 +17,14 @@ declare <5 x float> @llvm.sin.v5f32(<5 x float>)
declare <6 x float> @llvm.sin.v6f32(<6 x float>)
declare <3 x double> @llvm.sin.v3f64(<3 x double>)
+declare <1 x float> @llvm.tan.v1f32(<1 x float>)
+declare <2 x float> @llvm.tan.v2f32(<2 x float>)
+declare <3 x float> @llvm.tan.v3f32(<3 x float>)
+declare <4 x float> @llvm.tan.v4f32(<4 x float>)
+declare <5 x float> @llvm.tan.v5f32(<5 x float>)
+declare <6 x float> @llvm.tan.v6f32(<6 x float>)
+declare <3 x double> @llvm.tan.v3f64(<3 x double>)
+
; Verify that all of the potential libcall candidates are handled.
; Some of these have custom lowering, so those cases won't have
; libcalls.
@@ -230,6 +238,200 @@ define <3 x double> @sin_v3f64(<3 x double> %x) nounwind {
ret <3 x double> %r
}
+define <1 x float> @tan_v1f32(<1 x float> %x) nounwind {
+; CHECK-LABEL: tan_v1f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: callq tanf@PLT
+; CHECK-NEXT: popq %rax
+; CHECK-NEXT: retq
+ %r = call <1 x float> @llvm.tan.v1f32(<1 x float> %x)
+ ret <1 x float> %r
+}
+
+define <2 x float> @tan_v2f32(<2 x float> %x) nounwind {
+; CHECK-LABEL: tan_v2f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: subq $40, %rsp
+; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: callq tanf@PLT
+; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
+; CHECK-NEXT: callq tanf@PLT
+; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
+; CHECK-NEXT: addq $40, %rsp
+; CHECK-NEXT: retq
+ %r = call <2 x float> @llvm.tan.v2f32(<2 x float> %x)
+ ret <2 x float> %r
+}
+
+define <3 x float> @tan_v3f32(<3 x float> %x) nounwind {
+; CHECK-LABEL: tan_v3f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: subq $40, %rsp
+; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: callq tanf@PLT
+; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
+; CHECK-NEXT: callq tanf@PLT
+; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
+; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
+; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = mem[1,0]
+; CHECK-NEXT: callq tanf@PLT
+; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
+; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
+; CHECK-NEXT: addq $40, %rsp
+; CHECK-NEXT: retq
+ %r = call <3 x float> @llvm.tan.v3f32(<3 x float> %x)
+ ret <3 x float> %r
+}
+
+define <4 x float> @tan_v4f32(<4 x float> %x) nounwind {
+; CHECK-LABEL: tan_v4f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: subq $40, %rsp
+; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: callq tanf@PLT
+; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
+; CHECK-NEXT: callq tanf@PLT
+; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
+; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
+; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = mem[1,0]
+; CHECK-NEXT: callq tanf@PLT
+; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
+; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
+; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = mem[3,3,3,3]
+; CHECK-NEXT: callq tanf@PLT
+; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
+; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
+; CHECK-NEXT: addq $40, %rsp
+; CHECK-NEXT: retq
+ %r = call <4 x float> @llvm.tan.v4f32(<4 x float> %x)
+ ret <4 x float> %r
+}
+
+define <5 x float> @tan_v5f32(<5 x float> %x) nounwind {
+; CHECK-LABEL: tan_v5f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: subq $72, %rsp
+; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
+; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: callq tanf@PLT
+; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
+; CHECK-NEXT: callq tanf@PLT
+; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
+; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
+; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = mem[1,0]
+; CHECK-NEXT: callq tanf@PLT
+; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
+; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
+; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = mem[3,3,3,3]
+; CHECK-NEXT: callq tanf@PLT
+; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
+; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
+; CHECK-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill
+; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
+; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: callq tanf@PLT
+; CHECK-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload
+; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; CHECK-NEXT: addq $72, %rsp
+; CHECK-NEXT: retq
+ %r = call <5 x float> @llvm.tan.v5f32(<5 x float> %x)
+ ret <5 x float> %r
+}
+
+define <6 x float> @tan_v6f32(<6 x float> %x) nounwind {
+; CHECK-LABEL: tan_v6f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: subq $72, %rsp
+; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
+; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
+; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: callq tanf@PLT
+; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
+; CHECK-NEXT: callq tanf@PLT
+; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
+; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
+; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: callq tanf@PLT
+; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
+; CHECK-NEXT: callq tanf@PLT
+; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
+; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
+; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = mem[1,0]
+; CHECK-NEXT: callq tanf@PLT
+; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
+; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
+; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = mem[3,3,3,3]
+; CHECK-NEXT: callq tanf@PLT
+; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
+; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
+; CHECK-NEXT: vinsertf128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
+; CHECK-NEXT: addq $72, %rsp
+; CHECK-NEXT: retq
+ %r = call <6 x float> @llvm.tan.v6f32(<6 x float> %x)
+ ret <6 x float> %r
+}
+
+define <3 x double> @tan_v3f64(<3 x double> %x) nounwind {
+; CHECK-LABEL: tan_v3f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: subq $72, %rsp
+; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
+; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: callq tan@PLT
+; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = mem[1,0]
+; CHECK-NEXT: callq tan@PLT
+; CHECK-NEXT: vmovapd (%rsp), %xmm1 # 16-byte Reload
+; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; CHECK-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill
+; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
+; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: callq tan@PLT
+; CHECK-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload
+; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; CHECK-NEXT: addq $72, %rsp
+; CHECK-NEXT: retq
+ %r = call <3 x double> @llvm.tan.v3f64(<3 x double> %x)
+ ret <3 x double> %r
+}
+
define <2 x float> @fabs_v2f32(<2 x float> %x) nounwind {
; CHECK-LABEL: fabs_v2f32:
; CHECK: # %bb.0:
diff --git a/llvm/test/DebugInfo/X86/sdag-order.ll b/llvm/test/DebugInfo/X86/sdag-order.ll
new file mode 100644
index 0000000..f959a80
--- /dev/null
+++ b/llvm/test/DebugInfo/X86/sdag-order.ll
@@ -0,0 +1,46 @@
+; RUN: llc %s --stop-after=finalize-isel -o - | FileCheck %s
+
+;; Check the DBG_VALUE which is salvaged from the dbg.value using an otherwised
+;; unused value is emitted at the correct position in the function.
+;; Prior (-) to patching (+), these DBG_VALUEs would sink to the bottom of the
+;; function:
+;; │ bb.1.if.then:
+;; │- $rax = COPY %1
+;; │ DBG_VALUE 0, $noreg, !9, !DIExpression(DW_OP_plus_uconst, 4, DW_OP_stack_value)
+;; │+ $rax = COPY %1
+;; │ RET 0, $rax
+
+; CHECK: bb.1.if.then:
+; CHECK-NEXT: DBG_VALUE 0, $noreg, ![[#]], !DIExpression(DW_OP_plus_uconst, 4, DW_OP_stack_value)
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @badger(ptr sret(i64) %sret) !dbg !5 {
+entry:
+ %f.i = getelementptr i8, ptr null, i64 4
+ br label %if.then
+
+if.then: ; preds = %entry
+ tail call void @llvm.dbg.value(metadata ptr %f.i, metadata !9, metadata !DIExpression()), !dbg !11
+ ret void
+}
+
+declare void @llvm.dbg.value(metadata, metadata, metadata)
+
+!llvm.dbg.cu = !{!0}
+!llvm.debugify = !{!2, !3}
+!llvm.module.flags = !{!4}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C, file: !1, producer: "debugify", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug)
+!1 = !DIFile(filename: "test.ll", directory: "/")
+!2 = !{i32 3}
+!3 = !{i32 1}
+!4 = !{i32 2, !"Debug Info Version", i32 3}
+!5 = distinct !DISubprogram(name: "_ZNK1d1gEv", linkageName: "_ZNK1d1gEv", scope: null, file: !1, line: 1, type: !6, scopeLine: 1, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !8)
+!6 = !DISubroutineType(types: !7)
+!7 = !{}
+!8 = !{!9}
+!9 = !DILocalVariable(name: "1", scope: !5, file: !1, line: 1, type: !10)
+!10 = !DIBasicType(name: "ty64", size: 64, encoding: DW_ATE_unsigned)
+!11 = !DILocation(line: 5, column: 1, scope: !5)
diff --git a/llvm/test/DebugInfo/symbolize-gnu-debuglink-no-realpath.test b/llvm/test/DebugInfo/symbolize-gnu-debuglink-no-realpath.test
index 5141ff6..9e46570 100644
--- a/llvm/test/DebugInfo/symbolize-gnu-debuglink-no-realpath.test
+++ b/llvm/test/DebugInfo/symbolize-gnu-debuglink-no-realpath.test
@@ -1,4 +1,3 @@
-# REQUIRES: shell
# Ensure that no realpath assumptions are made about .gnu_debuglink paths.
# Copy inputs to some other location with arbitrary names, with the original
diff --git a/llvm/test/Instrumentation/HWAddressSanitizer/stack-safety-analysis.ll b/llvm/test/Instrumentation/HWAddressSanitizer/stack-safety-analysis.ll
index dad5f8e..8610645 100644
--- a/llvm/test/Instrumentation/HWAddressSanitizer/stack-safety-analysis.ll
+++ b/llvm/test/Instrumentation/HWAddressSanitizer/stack-safety-analysis.ll
@@ -1,4 +1,5 @@
-; RUN: opt -mtriple=aarch64-unknown-linux-gnu -passes=hwasan -hwasan-instrument-with-calls -hwasan-use-stack-safety=1 -hwasan-generate-tags-with-calls -S < %s | FileCheck %s --check-prefixes=SAFETY,CHECK
+; RUN: opt -pass-remarks-output=%t.pass-remarks -mtriple=aarch64-unknown-linux-gnu -passes=hwasan -hwasan-instrument-with-calls -hwasan-use-stack-safety=1 -hwasan-generate-tags-with-calls -S < %s | FileCheck %s --check-prefixes=SAFETY,CHECK
+; RUN: cat %t.pass-remarks | FileCheck %s --check-prefixes=SAFETY-REMARKS
; RUN: opt -mtriple=aarch64-unknown-linux-gnu -passes=hwasan -hwasan-instrument-with-calls -hwasan-use-stack-safety=0 -hwasan-generate-tags-with-calls -S < %s | FileCheck %s --check-prefixes=NOSAFETY,CHECK
; RUN: opt -mtriple=aarch64-unknown-linux-gnu -passes=hwasan -hwasan-instrument-with-calls -hwasan-generate-tags-with-calls -S < %s | FileCheck %s --check-prefixes=SAFETY,CHECK
; RUN: opt -mtriple=aarch64-unknown-linux-gnu -passes=hwasan -hwasan-instrument-stack=0 -hwasan-instrument-with-calls -hwasan-generate-tags-with-calls -S < %s | FileCheck %s --check-prefixes=NOSTACK,CHECK
@@ -20,6 +21,7 @@ entry:
; SAFETY-NOT: call {{.*}}__hwasan_store
; NOSTACK-NOT: call {{.*}}__hwasan_generate_tag
; NOSTACK-NOT: call {{.*}}__hwasan_store
+ ; SAFETY-REMARKS: --- !Passed{{[[:space:]]}}Pass: hwasan{{[[:space:]]}}Name: ignoreAccess{{[[:space:]]}}Function: test_simple
%buf.sroa.0 = alloca i8, align 4
call void @llvm.lifetime.start.p0(i64 1, ptr nonnull %buf.sroa.0)
store volatile i8 0, ptr %buf.sroa.0, align 4, !tbaa !8
@@ -37,6 +39,7 @@ entry:
; SAFETY-NOT: call {{.*}}__hwasan_store
; NOSTACK-NOT: call {{.*}}__hwasan_generate_tag
; NOSTACK-NOT: call {{.*}}__hwasan_store
+ ; SAFETY-REMARKS: --- !Passed{{[[:space:]]}}Pass: hwasan{{[[:space:]]}}Name: ignoreAccess{{[[:space:]]}}Function: test_cmpxchg
%buf.sroa.0 = alloca i8, align 4
call void @llvm.lifetime.start.p0(i64 1, ptr nonnull %buf.sroa.0)
%0 = cmpxchg ptr %buf.sroa.0, i8 1, i8 2 monotonic monotonic, align 4
@@ -54,6 +57,7 @@ entry:
; SAFETY-NOT: call {{.*}}__hwasan_store
; NOSTACK-NOT: call {{.*}}__hwasan_generate_tag
; NOSTACK-NOT: call {{.*}}__hwasan_store
+ ; SAFETY-REMARKS: --- !Passed{{[[:space:]]}}Pass: hwasan{{[[:space:]]}}Name: ignoreAccess{{[[:space:]]}}Function: test_atomicrwm
%buf.sroa.0 = alloca i8, align 4
call void @llvm.lifetime.start.p0(i64 1, ptr nonnull %buf.sroa.0)
%0 = atomicrmw add ptr %buf.sroa.0, i8 1 monotonic, align 4
@@ -71,6 +75,7 @@ entry:
; SAFETY-NOT: call {{.*}}__hwasan_store
; NOSTACK-NOT: call {{.*}}__hwasan_generate_tag
; NOSTACK-NOT: call {{.*}}__hwasan_store
+ ; SAFETY-REMARKS: --- !Passed{{[[:space:]]}}Pass: hwasan{{[[:space:]]}}Name: ignoreAccess{{[[:space:]]}}Function: test_use
%buf.sroa.0 = alloca i8, align 4
call void @use(ptr nonnull %buf.sroa.0)
call void @llvm.lifetime.start.p0(i64 1, ptr nonnull %buf.sroa.0)
@@ -89,6 +94,7 @@ entry:
; SAFETY-NOT: call {{.*}}__hwasan_store
; NOSTACK-NOT: call {{.*}}__hwasan_generate_tag
; NOSTACK-NOT: call {{.*}}__hwasan_store
+ ; SAFETY-REMARKS: --- !Passed{{[[:space:]]}}Pass: hwasan{{[[:space:]]}}Name: ignoreAccess{{[[:space:]]}}Function: test_in_range
%buf.sroa.0 = alloca [10 x i8], align 4
call void @llvm.lifetime.start.p0(i64 10, ptr nonnull %buf.sroa.0)
store volatile i8 0, ptr %buf.sroa.0, align 4, !tbaa !8
@@ -106,6 +112,7 @@ entry:
; SAFETY-NOT: call {{.*}}__hwasan_store
; NOSTACK-NOT: call {{.*}}__hwasan_generate_tag
; NOSTACK-NOT: call {{.*}}__hwasan_store
+ ; SAFETY-REMARKS: --- !Passed{{[[:space:]]}}Pass: hwasan{{[[:space:]]}}Name: ignoreAccess{{[[:space:]]}}Function: test_in_range2
%buf.sroa.0 = alloca [10 x i8], align 4
%ptr = getelementptr [10 x i8], ptr %buf.sroa.0, i32 0, i32 9
call void @llvm.lifetime.start.p0(i64 10, ptr nonnull %buf.sroa.0)
@@ -123,6 +130,7 @@ entry:
; SAFETY-NOT: call {{.*}}__hwasan_memset
; NOSTACK-NOT: call {{.*}}__hwasan_generate_tag
; NOSTACK-NOT: call {{.*}}__hwasan_memset
+ ; SAFETY-REMARKS: --- !Passed{{[[:space:]]}}Pass: hwasan{{[[:space:]]}}Name: ignoreAccess{{[[:space:]]}}Function: test_in_range3
%buf.sroa.0 = alloca [10 x i8], align 4
%ptr = getelementptr [10 x i8], ptr %buf.sroa.0, i32 0, i32 9
call void @llvm.memset.p0.i32(ptr %ptr, i8 0, i32 1, i1 true)
@@ -138,6 +146,7 @@ entry:
; SAFETY-NOT: call {{.*}}__hwasan_memmove
; NOSTACK-NOT: call {{.*}}__hwasan_generate_tag
; NOSTACK-NOT: call {{.*}}__hwasan_memmove
+ ; SAFETY-REMARKS: --- !Passed{{[[:space:]]}}Pass: hwasan{{[[:space:]]}}Name: ignoreAccess{{[[:space:]]}}Function: test_in_range4
%buf.sroa.0 = alloca [10 x i8], align 4
%ptr = getelementptr [10 x i8], ptr %buf.sroa.0, i32 0, i32 9
call void @llvm.memmove.p0.p0.i32(ptr %ptr, ptr %ptr, i32 1, i1 true)
@@ -153,6 +162,7 @@ entry:
; SAFETY-NOT: call {{.*}}__hwasan_memmove
; NOSTACK-NOT: call {{.*}}__hwasan_generate_tag
; NOSTACK-NOT: call {{.*}}__hwasan_memmove
+ ; SAFETY-REMARKS: --- !Passed{{[[:space:]]}}Pass: hwasan{{[[:space:]]}}Name: ignoreAccess{{[[:space:]]}}Function: test_in_range5
%buf.sroa.0 = alloca [10 x i8], align 4
%ptr = getelementptr [10 x i8], ptr %buf.sroa.0, i32 0, i32 9
%buf.sroa.1 = alloca [10 x i8], align 4
@@ -171,6 +181,7 @@ entry:
; SAFETY: call {{.*}}__hwasan_store
; NOSTACK-NOT: call {{.*}}__hwasan_generate_tag
; NOSTACK-NOT: call {{.*}}__hwasan_store
+ ; SAFETY-REMARKS: --- !Missed{{[[:space:]]}}Pass: hwasan{{[[:space:]]}}Name: ignoreAccess{{[[:space:]]}}Function: test_out_of_range
%buf.sroa.0 = alloca [10 x i8], align 4
%ptr = getelementptr [10 x i8], ptr %buf.sroa.0, i32 0, i32 10
call void @llvm.lifetime.start.p0(i64 10, ptr nonnull %buf.sroa.0)
@@ -188,6 +199,7 @@ entry:
; SAFETY: call {{.*}}__hwasan_store
; NOSTACK-NOT: call {{.*}}__hwasan_generate_tag
; NOSTACK-NOT: call {{.*}}__hwasan_store
+ ; SAFETY-REMARKS: --- !Missed{{[[:space:]]}}Pass: hwasan{{[[:space:]]}}Name: ignoreAccess{{[[:space:]]}}Function: test_out_of_range2
%buf.sroa.0 = alloca [10 x i8], align 4
%ptr = getelementptr [10 x i8], ptr %buf.sroa.0, i32 0, i32 10
call void @llvm.lifetime.start.p0(i64 10, ptr nonnull %buf.sroa.0)
@@ -205,6 +217,7 @@ entry:
; SAFETY: call {{.*}}__hwasan_memset
; NOSTACK-NOT: call {{.*}}__hwasan_generate_tag
; NOSTACK-NOT: call {{.*}}__hwasan_memset
+ ; SAFETY-REMARKS: --- !Missed{{[[:space:]]}}Pass: hwasan{{[[:space:]]}}Name: ignoreAccess{{[[:space:]]}}Function: test_out_of_range3
%buf.sroa.0 = alloca [10 x i8], align 4
%ptr = getelementptr [10 x i8], ptr %buf.sroa.0, i32 0, i32 9
call void @llvm.memset.p0.i32(ptr %ptr, i8 0, i32 2, i1 true)
@@ -220,6 +233,7 @@ entry:
; SAFETY: call {{.*}}__hwasan_memmove
; NOSTACK-NOT: call {{.*}}__hwasan_generate_tag
; NOSTACK-NOT: call {{.*}}__hwasan_memmove
+ ; SAFETY-REMARKS: --- !Missed{{[[:space:]]}}Pass: hwasan{{[[:space:]]}}Name: ignoreAccess{{[[:space:]]}}Function: test_out_of_range4
%buf.sroa.0 = alloca [10 x i8], align 4
%ptr = getelementptr [10 x i8], ptr %buf.sroa.0, i32 0, i32 9
call void @llvm.memmove.p0.p0.i32(ptr %ptr, ptr %ptr, i32 2, i1 true)
@@ -235,6 +249,7 @@ entry:
; SAFETY: call {{.*}}__hwasan_memmove
; NOSTACK-NOT: call {{.*}}__hwasan_generate_tag
; NOSTACK-NOT: call {{.*}}__hwasan_memmove
+ ; SAFETY-REMARKS: --- !Missed{{[[:space:]]}}Pass: hwasan{{[[:space:]]}}Name: ignoreAccess{{[[:space:]]}}Function: test_out_of_range5
%buf.sroa.0 = alloca [10 x i8], align 4
%ptr = getelementptr [10 x i8], ptr %buf.sroa.0, i32 0, i32 9
%buf.sroa.1 = alloca [10 x i8], align 4
@@ -256,6 +271,7 @@ entry:
; SAFETY: call {{.*}}__hwasan_store
; NOSTACK-NOT: call {{.*}}__hwasan_generate_tag
; NOSTACK-NOT: call {{.*}}__hwasan_store
+ ; SAFETY-REMARKS: --- !Missed{{[[:space:]]}}Pass: hwasan{{[[:space:]]}}Name: ignoreAccess{{[[:space:]]}}Function: test_out_of_range6
%buf.sroa.0 = alloca [10 x i8], align 4
%ptr = getelementptr [10 x i8], ptr %buf.sroa.0, i32 0, i32 10
call void @llvm.lifetime.start.p0(i64 10, ptr nonnull %buf.sroa.0)
@@ -275,6 +291,7 @@ entry:
; SAFETY: call {{.*}}__hwasan_store
; NOSTACK-NOT: call {{.*}}__hwasan_generate_tag
; NOSTACK-NOT: call {{.*}}__hwasan_store
+ ; SAFETY-REMARKS: --- !Missed{{[[:space:]]}}Pass: hwasan{{[[:space:]]}}Name: ignoreAccess{{[[:space:]]}}Function: test_potentially_out_of_range
%buf.sroa.0 = alloca [10 x i8], align 4
%off = call i32 @getoffset()
%ptr = getelementptr [10 x i8], ptr %buf.sroa.0, i32 0, i32 %off
@@ -293,6 +310,7 @@ entry:
; SAFETY: call {{.*}}__hwasan_memmove
; NOSTACK-NOT: call {{.*}}__hwasan_generate_tag
; NOSTACK: call {{.*}}__hwasan_memmove
+ ; SAFETY-REMARKS: --- !Missed{{[[:space:]]}}Pass: hwasan{{[[:space:]]}}Name: ignoreAccess{{[[:space:]]}}Function: test_potentially_out_of_range2
%buf.sroa.0 = alloca [10 x i8], align 4
%ptr = getelementptr [10 x i8], ptr %buf.sroa.0, i32 0, i32 9
call void @llvm.memmove.p0.p0.i32(ptr %ptr, ptr %a, i32 1, i1 true)
@@ -309,6 +327,7 @@ entry:
; SAFETY: call {{.*}}__hwasan_store
; NOSTACK-NOT: call {{.*}}__hwasan_generate_tag
; NOSTACK: call {{.*}}__hwasan_store
+ ; SAFETY-REMARKS: --- !Missed{{[[:space:]]}}Pass: hwasan{{[[:space:]]}}Name: ignoreAccess{{[[:space:]]}}Function: test_unclear
%buf.sroa.0 = alloca i8, align 4
%ptr = call ptr @getptr(ptr %buf.sroa.0)
call void @llvm.lifetime.start.p0(i64 10, ptr nonnull %ptr)
@@ -326,6 +345,7 @@ entry:
; SAFETY: call {{.*}}__hwasan_store
; NOSTACK-NOT: call {{.*}}__hwasan_generate_tag
; NOSTACK: call {{.*}}__hwasan_store
+ ; SAFETY-REMARKS: --- !Missed{{[[:space:]]}}Pass: hwasan{{[[:space:]]}}Name: ignoreAccess{{[[:space:]]}}Function: test_select
%x = call ptr @getptr(ptr %a)
%buf.sroa.0 = alloca i8, align 4
call void @llvm.lifetime.start.p0(i64 1, ptr nonnull %buf.sroa.0)
@@ -346,6 +366,7 @@ entry:
; SAFETY-NOT: call {{.*}}__hwasan_store
; NOSTACK-NOT: call {{.*}}__hwasan_generate_tag
; NOSTACK-NOT: call {{.*}}__hwasan_store
+ ; SAFETY-REMARKS: --- !Passed{{[[:space:]]}}Pass: hwasan{{[[:space:]]}}Name: ignoreAccess{{[[:space:]]}}Function: test_retptr
%buf.sroa.0 = alloca i8, align 4
call void @llvm.lifetime.start.p0(i64 1, ptr nonnull %buf.sroa.0)
%ptr = call ptr @retptr(ptr %buf.sroa.0)
diff --git a/llvm/test/MC/AMDGPU/gfx1150_asm_features.s b/llvm/test/MC/AMDGPU/gfx1150_asm_features.s
index 58b7847..d3f82a2 100644
--- a/llvm/test/MC/AMDGPU/gfx1150_asm_features.s
+++ b/llvm/test/MC/AMDGPU/gfx1150_asm_features.s
@@ -1,5 +1,6 @@
// RUN: llvm-mc -triple=amdgcn -show-encoding -mcpu=gfx1150 %s | FileCheck --check-prefix=GFX1150 %s
// RUN: llvm-mc -triple=amdgcn -show-encoding -mcpu=gfx1151 %s | FileCheck --check-prefix=GFX1150 %s
+// RUN: llvm-mc -triple=amdgcn -show-encoding -mcpu=gfx1152 %s | FileCheck --check-prefix=GFX1150 %s
//
// Subtargets allow src1 of VOP3 DPP instructions to be SGPR or inlinable
diff --git a/llvm/test/MC/RISCV/relocations.s b/llvm/test/MC/RISCV/relocations.s
index 3cad3d4..f5f6417 100644
--- a/llvm/test/MC/RISCV/relocations.s
+++ b/llvm/test/MC/RISCV/relocations.s
@@ -185,7 +185,7 @@ auipc a0, %tlsdesc_hi(a_symbol)
lw a1, %tlsdesc_load_lo(.L5)(a0)
# RELOC: R_RISCV_TLSDESC_LOAD_LO12
-# INSTR: a1, %tlsdesc_load_lo(.L5)(a0)
+# INSTR: lw a1, %tlsdesc_load_lo(.L5)(a0)
# FIXUP: fixup A - offset: 0, value: %tlsdesc_load_lo(.L5), kind: fixup_riscv_tlsdesc_load_lo12
addi a0, a0, %tlsdesc_add_lo(.L5)
diff --git a/llvm/test/MC/WebAssembly/reloc-pic64.s b/llvm/test/MC/WebAssembly/reloc-pic64.s
index 0f2ebba..4c5ec4f 100644
--- a/llvm/test/MC/WebAssembly/reloc-pic64.s
+++ b/llvm/test/MC/WebAssembly/reloc-pic64.s
@@ -93,6 +93,7 @@ hidden_func:
# CHECK-NEXT: Index: 0
# CHECK-NEXT: ElemType: FUNCREF
# CHECK-NEXT: Limits:
+# CHECK-NEXT: Flags: [ IS_64 ]
# CHECK-NEXT: Minimum: 0x1
# CHECK-NEXT: - Module: GOT.mem
# CHECK-NEXT: Field: default_data
@@ -109,7 +110,7 @@ hidden_func:
# CHECK-NEXT: - Type: ELEM
# CHECK-NEXT: Segments:
# CHECK-NEXT: Offset:
-# CHECK-NEXT: Opcode: I32_CONST
+# CHECK-NEXT: Opcode: I64_CONST
# CHECK-NEXT: Value: 1
# CHECK-NEXT: Functions: [ 5 ]
# CHECK-NEXT: - Type: DATACOUNT
diff --git a/llvm/test/Object/AMDGPU/elf-header-flags-mach.yaml b/llvm/test/Object/AMDGPU/elf-header-flags-mach.yaml
index 7f36795..7512edd 100644
--- a/llvm/test/Object/AMDGPU/elf-header-flags-mach.yaml
+++ b/llvm/test/Object/AMDGPU/elf-header-flags-mach.yaml
@@ -230,6 +230,10 @@
# RUN: llvm-readobj -S --file-headers %t.o.AMDGCN_GFX1151 | FileCheck --check-prefixes=ELF-AMDGCN-ALL,ELF-AMDGCN-GFX1151 %s
# RUN: obj2yaml %t.o.AMDGCN_GFX1151 | FileCheck --check-prefixes=YAML-AMDGCN-ALL,YAML-AMDGCN-GFX1151 %s
+# RUN: sed -e 's/<BITS>/64/' -e 's/<MACH>/AMDGCN_GFX1152/' %s | yaml2obj -o %t.o.AMDGCN_GFX1152
+# RUN: llvm-readobj -S --file-headers %t.o.AMDGCN_GFX1152 | FileCheck --check-prefixes=ELF-AMDGCN-ALL,ELF-AMDGCN-GFX1152 %s
+# RUN: obj2yaml %t.o.AMDGCN_GFX1152 | FileCheck --check-prefixes=YAML-AMDGCN-ALL,YAML-AMDGCN-GFX1152 %s
+
# RUN: sed -e 's/<BITS>/64/' -e 's/<MACH>/AMDGCN_GFX1200/' %s | yaml2obj -o %t.o.AMDGCN_GFX1200
# RUN: llvm-readobj -S --file-headers %t.o.AMDGCN_GFX1200 | FileCheck --check-prefixes=ELF-AMDGCN-ALL,ELF-AMDGCN-GFX1200 %s
# RUN: obj2yaml %t.o.AMDGCN_GFX1200 | FileCheck --check-prefixes=YAML-AMDGCN-ALL,YAML-AMDGCN-GFX1200 %s
@@ -450,6 +454,9 @@
# ELF-AMDGCN-GFX1151: EF_AMDGPU_MACH_AMDGCN_GFX1151 (0x4A)
# YAML-AMDGCN-GFX1151: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX1151 ]
+# ELF-AMDGCN-GFX1152: EF_AMDGPU_MACH_AMDGCN_GFX1152 (0x55)
+# YAML-AMDGCN-GFX1152: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX1152 ]
+
# ELF-AMDGCN-GFX1200: EF_AMDGPU_MACH_AMDGCN_GFX1200 (0x48)
# YAML-AMDGCN-GFX1200: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX1200 ]
diff --git a/llvm/test/Other/can-execute.txt b/llvm/test/Other/can-execute.txt
index 46791cb..37626e7 100644
--- a/llvm/test/Other/can-execute.txt
+++ b/llvm/test/Other/can-execute.txt
@@ -1,5 +1,4 @@
REQUIRES: can-execute
-REQUIRES: shell
This tests that we abstract two peculiarities of unix in can_execute:
diff --git a/llvm/test/Other/lit-unicode.txt b/llvm/test/Other/lit-unicode.txt
index 2f400014..b375fc50 100644
--- a/llvm/test/Other/lit-unicode.txt
+++ b/llvm/test/Other/lit-unicode.txt
@@ -1,5 +1,4 @@
FIXME: See if we can fix this in lit by using Unicode strings.
-REQUIRES: shell
RUN: echo "ようこそ" | FileCheck %s
CHECK: {{^}}ようこそ{{$}}
diff --git a/llvm/test/Transforms/ConstraintElimination/induction-condition-in-loop-exit.ll b/llvm/test/Transforms/ConstraintElimination/induction-condition-in-loop-exit.ll
new file mode 100644
index 0000000..2f0b51c
--- /dev/null
+++ b/llvm/test/Transforms/ConstraintElimination/induction-condition-in-loop-exit.ll
@@ -0,0 +1,443 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -p constraint-elimination -S %s | FileCheck %s
+
+declare void @llvm.assume(i1)
+
+define i1 @multi_exiting_loop_eq_same_unique_exit_const_compare_known(ptr %s) {
+; CHECK-LABEL: define i1 @multi_exiting_loop_eq_same_unique_exit_const_compare_known(
+; CHECK-SAME: ptr [[S:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
+; CHECK: [[LOOP_HEADER]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[IV]], 1234
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP_LATCH]]
+; CHECK: [[LOOP_LATCH]]:
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[S]], i32 [[IV]]
+; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[LATCH_C:%.*]] = icmp ult i8 [[TMP0]], 10
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
+; CHECK-NEXT: br i1 [[LATCH_C]], label %[[LOOP_HEADER]], label %[[EXIT]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[T:%.*]] = icmp ult i32 [[IV]], 1235
+; CHECK-NEXT: ret i1 [[T]]
+;
+entry:
+ br label %loop.header
+
+loop.header:
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
+ %exitcond.not = icmp eq i32 %iv, 1234
+ br i1 %exitcond.not, label %exit, label %loop.latch
+
+loop.latch:
+ %arrayidx = getelementptr inbounds i8, ptr %s, i32 %iv
+ %0 = load i8, ptr %arrayidx, align 1
+ %latch.c = icmp ult i8 %0, 10
+ %iv.next = add nuw nsw i32 %iv, 1
+ br i1 %latch.c, label %loop.header, label %exit
+
+exit:
+ %t = icmp ult i32 %iv, 1235
+ ret i1 %t
+}
+
+define i1 @multi_exiting_loop_eq_same_unique_exit_const_compare_not_known_due_to_start_value(ptr %s) {
+; CHECK-LABEL: define i1 @multi_exiting_loop_eq_same_unique_exit_const_compare_not_known_due_to_start_value(
+; CHECK-SAME: ptr [[S:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
+; CHECK: [[LOOP_HEADER]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 1235, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[IV]], 1234
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP_LATCH]]
+; CHECK: [[LOOP_LATCH]]:
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[S]], i32 [[IV]]
+; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[LATCH_C:%.*]] = icmp ult i8 [[TMP0]], 10
+; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
+; CHECK-NEXT: br i1 [[LATCH_C]], label %[[LOOP_HEADER]], label %[[EXIT]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[T:%.*]] = icmp ult i32 [[IV]], 1235
+; CHECK-NEXT: ret i1 [[T]]
+;
+entry:
+ br label %loop.header
+
+loop.header:
+ %iv = phi i32 [ 1235, %entry ], [ %iv.next, %loop.latch ]
+ %exitcond.not = icmp eq i32 %iv, 1234
+ br i1 %exitcond.not, label %exit, label %loop.latch
+
+loop.latch:
+ %arrayidx = getelementptr inbounds i8, ptr %s, i32 %iv
+ %0 = load i8, ptr %arrayidx, align 1
+ %latch.c = icmp ult i8 %0, 10
+ %iv.next = add i32 %iv, 1
+ br i1 %latch.c, label %loop.header, label %exit
+
+exit:
+ %t = icmp ult i32 %iv, 1235
+ ret i1 %t
+}
+
+define i1 @multi_exiting_loop_eq_same_unique_exit_const_compare_known_due_to_precond_on_start_value(ptr %s, i32 %start) {
+; CHECK-LABEL: define i1 @multi_exiting_loop_eq_same_unique_exit_const_compare_known_due_to_precond_on_start_value(
+; CHECK-SAME: ptr [[S:%.*]], i32 [[START:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[PRE_C:%.*]] = icmp ule i32 [[START]], 1234
+; CHECK-NEXT: call void @llvm.assume(i1 [[PRE_C]])
+; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
+; CHECK: [[LOOP_HEADER]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[START]], %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[IV]], 1234
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP_LATCH]]
+; CHECK: [[LOOP_LATCH]]:
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[S]], i32 [[IV]]
+; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[LATCH_C:%.*]] = icmp ult i8 [[TMP0]], 10
+; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
+; CHECK-NEXT: br i1 [[LATCH_C]], label %[[LOOP_HEADER]], label %[[EXIT]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[T:%.*]] = icmp ult i32 [[IV]], 1235
+; CHECK-NEXT: ret i1 [[T]]
+;
+entry:
+ %pre.c = icmp ule i32 %start, 1234
+ call void @llvm.assume(i1 %pre.c)
+ br label %loop.header
+
+loop.header:
+ %iv = phi i32 [ %start, %entry ], [ %iv.next, %loop.latch ]
+ %exitcond.not = icmp eq i32 %iv, 1234
+ br i1 %exitcond.not, label %exit, label %loop.latch
+
+loop.latch:
+ %arrayidx = getelementptr inbounds i8, ptr %s, i32 %iv
+ %0 = load i8, ptr %arrayidx, align 1
+ %latch.c = icmp ult i8 %0, 10
+ %iv.next = add i32 %iv, 1
+ br i1 %latch.c, label %loop.header, label %exit
+
+exit:
+ %t = icmp ult i32 %iv, 1235
+ ret i1 %t
+}
+
+define i1 @multi_exiting_loop_eq_same_unique_exit_const_compare_not_known_due_to_precond_on_start_value(ptr %s, i32 %start) {
+; CHECK-LABEL: define i1 @multi_exiting_loop_eq_same_unique_exit_const_compare_not_known_due_to_precond_on_start_value(
+; CHECK-SAME: ptr [[S:%.*]], i32 [[START:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[PRE_C:%.*]] = icmp ule i32 [[START]], 1236
+; CHECK-NEXT: call void @llvm.assume(i1 [[PRE_C]])
+; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
+; CHECK: [[LOOP_HEADER]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[START]], %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[IV]], 1234
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP_LATCH]]
+; CHECK: [[LOOP_LATCH]]:
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[S]], i32 [[IV]]
+; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[LATCH_C:%.*]] = icmp ult i8 [[TMP0]], 10
+; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
+; CHECK-NEXT: br i1 [[LATCH_C]], label %[[LOOP_HEADER]], label %[[EXIT]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[T:%.*]] = icmp ult i32 [[IV]], 1236
+; CHECK-NEXT: ret i1 [[T]]
+;
+entry:
+ %pre.c = icmp ule i32 %start, 1236
+ call void @llvm.assume(i1 %pre.c)
+ br label %loop.header
+
+loop.header:
+ %iv = phi i32 [ %start, %entry ], [ %iv.next, %loop.latch ]
+ %exitcond.not = icmp eq i32 %iv, 1234
+ br i1 %exitcond.not, label %exit, label %loop.latch
+
+loop.latch:
+ %arrayidx = getelementptr inbounds i8, ptr %s, i32 %iv
+ %0 = load i8, ptr %arrayidx, align 1
+ %latch.c = icmp ult i8 %0, 10
+ %iv.next = add i32 %iv, 1
+ br i1 %latch.c, label %loop.header, label %exit
+
+exit:
+ %t = icmp ult i32 %iv, 1236
+ ret i1 %t
+}
+
+define i1 @multi_exiting_loop_eq_same_unique_exit_const_compare_not_known_due_to_missing_precond(ptr %s, i32 %start) {
+; CHECK-LABEL: define i1 @multi_exiting_loop_eq_same_unique_exit_const_compare_not_known_due_to_missing_precond(
+; CHECK-SAME: ptr [[S:%.*]], i32 [[START:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
+; CHECK: [[LOOP_HEADER]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[START]], %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[IV]], 1234
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP_LATCH]]
+; CHECK: [[LOOP_LATCH]]:
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[S]], i32 [[IV]]
+; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[LATCH_C:%.*]] = icmp ult i8 [[TMP0]], 10
+; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
+; CHECK-NEXT: br i1 [[LATCH_C]], label %[[LOOP_HEADER]], label %[[EXIT]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[T:%.*]] = icmp ult i32 [[IV]], 1236
+; CHECK-NEXT: ret i1 [[T]]
+;
+entry:
+ br label %loop.header
+
+loop.header:
+ %iv = phi i32 [ %start, %entry ], [ %iv.next, %loop.latch ]
+ %exitcond.not = icmp eq i32 %iv, 1234
+ br i1 %exitcond.not, label %exit, label %loop.latch
+
+loop.latch:
+ %arrayidx = getelementptr inbounds i8, ptr %s, i32 %iv
+ %0 = load i8, ptr %arrayidx, align 1
+ %latch.c = icmp ult i8 %0, 10
+ %iv.next = add i32 %iv, 1
+ br i1 %latch.c, label %loop.header, label %exit
+
+exit:
+ %t = icmp ult i32 %iv, 1236
+ ret i1 %t
+}
+
+define i1 @multi_exiting_loop_eq_same_exit_with_out_loop_preds_const_compare_not_known(ptr %s, i1 %pre.c, i32 %x) {
+; CHECK-LABEL: define i1 @multi_exiting_loop_eq_same_exit_with_out_loop_preds_const_compare_not_known(
+; CHECK-SAME: ptr [[S:%.*]], i1 [[PRE_C:%.*]], i32 [[X:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br i1 [[PRE_C]], label %[[LOOP_HEADER:.*]], label %[[EXIT:.*]]
+; CHECK: [[LOOP_HEADER]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[IV]], 1234
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP_LATCH]]
+; CHECK: [[LOOP_LATCH]]:
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[S]], i32 [[IV]]
+; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[LATCH_C:%.*]] = icmp ult i8 [[TMP0]], 10
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
+; CHECK-NEXT: br i1 [[LATCH_C]], label %[[LOOP_HEADER]], label %[[EXIT]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[P:%.*]] = phi i32 [ [[X]], %[[ENTRY]] ], [ [[IV]], %[[LOOP_HEADER]] ], [ [[IV]], %[[LOOP_LATCH]] ]
+; CHECK-NEXT: [[U:%.*]] = icmp ult i32 [[P]], 1235
+; CHECK-NEXT: ret i1 [[U]]
+;
+entry:
+ br i1 %pre.c, label %loop.header, label %exit
+
+loop.header:
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
+ %exitcond.not = icmp eq i32 %iv, 1234
+ br i1 %exitcond.not, label %exit, label %loop.latch
+
+loop.latch:
+ %arrayidx = getelementptr inbounds i8, ptr %s, i32 %iv
+ %0 = load i8, ptr %arrayidx, align 1
+ %latch.c = icmp ult i8 %0, 10
+ %iv.next = add nuw nsw i32 %iv, 1
+ br i1 %latch.c, label %loop.header, label %exit
+
+exit:
+ %p = phi i32 [ %x, %entry ], [ %iv, %loop.header ], [ %iv, %loop.latch ]
+ %u = icmp ult i32 %p, 1235
+ ret i1 %u
+}
+
+define i1 @multi_exiting_loop_eq_same_unique_exit_successors_swapped(ptr %s) {
+; CHECK-LABEL: define i1 @multi_exiting_loop_eq_same_unique_exit_successors_swapped(
+; CHECK-SAME: ptr [[S:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
+; CHECK: [[LOOP_HEADER]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[IV]], 1234
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[LOOP_LATCH]], label %[[EXIT:.*]]
+; CHECK: [[LOOP_LATCH]]:
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[S]], i32 [[IV]]
+; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[LATCH_C:%.*]] = icmp ult i8 [[TMP0]], 10
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
+; CHECK-NEXT: br i1 [[LATCH_C]], label %[[LOOP_HEADER]], label %[[EXIT]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[U:%.*]] = icmp ult i32 [[IV]], 1235
+; CHECK-NEXT: ret i1 [[U]]
+;
+entry:
+ br label %loop.header
+
+loop.header:
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
+ %exitcond.not = icmp eq i32 %iv, 1234
+ br i1 %exitcond.not, label %loop.latch, label %exit
+
+loop.latch:
+ %arrayidx = getelementptr inbounds i8, ptr %s, i32 %iv
+ %0 = load i8, ptr %arrayidx, align 1
+ %latch.c = icmp ult i8 %0, 10
+ %iv.next = add nuw nsw i32 %iv, 1
+ br i1 %latch.c, label %loop.header, label %exit
+
+exit:
+ %u = icmp ult i32 %iv, 1235
+ ret i1 %u
+}
+
+define i1 @multi_exiting_loop_eq_same_unique_exit_const_compare_not_known(ptr %s) {
+; CHECK-LABEL: define i1 @multi_exiting_loop_eq_same_unique_exit_const_compare_not_known(
+; CHECK-SAME: ptr [[S:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
+; CHECK: [[LOOP_HEADER]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[IV]], 1234
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP_LATCH]]
+; CHECK: [[LOOP_LATCH]]:
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[S]], i32 [[IV]]
+; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[LATCH_C:%.*]] = icmp ult i8 [[TMP0]], 10
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
+; CHECK-NEXT: br i1 [[LATCH_C]], label %[[LOOP_HEADER]], label %[[EXIT]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[U:%.*]] = icmp ult i32 [[IV]], 1234
+; CHECK-NEXT: ret i1 [[U]]
+;
+entry:
+ br label %loop.header
+
+loop.header:
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
+ %exitcond.not = icmp eq i32 %iv, 1234
+ br i1 %exitcond.not, label %exit, label %loop.latch
+
+loop.latch:
+ %arrayidx = getelementptr inbounds i8, ptr %s, i32 %iv
+ %0 = load i8, ptr %arrayidx, align 1
+ %latch.c = icmp ult i8 %0, 10
+ %iv.next = add nuw nsw i32 %iv, 1
+ br i1 %latch.c, label %loop.header, label %exit
+
+exit:
+ %u = icmp ult i32 %iv, 1234
+ ret i1 %u
+}
+
+define i1 @multi_exiting_loop_eq_same_unique_exit_var_compare_known(ptr %s, i32 %N) {
+; CHECK-LABEL: define i1 @multi_exiting_loop_eq_same_unique_exit_var_compare_known(
+; CHECK-SAME: ptr [[S:%.*]], i32 [[N:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
+; CHECK: [[LOOP_HEADER]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[IV]], [[N]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP_LATCH]]
+; CHECK: [[LOOP_LATCH]]:
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[S]], i32 [[IV]]
+; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[LATCH_C:%.*]] = icmp ult i8 [[TMP0]], 10
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
+; CHECK-NEXT: br i1 [[LATCH_C]], label %[[LOOP_HEADER]], label %[[EXIT]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[T:%.*]] = icmp ule i32 [[IV]], [[N]]
+; CHECK-NEXT: ret i1 [[T]]
+;
+entry:
+ br label %loop.header
+
+loop.header:
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
+ %exitcond.not = icmp eq i32 %iv, %N
+ br i1 %exitcond.not, label %exit, label %loop.latch
+
+loop.latch:
+ %arrayidx = getelementptr inbounds i8, ptr %s, i32 %iv
+ %0 = load i8, ptr %arrayidx, align 1
+ %latch.c = icmp ult i8 %0, 10
+ %iv.next = add nuw nsw i32 %iv, 1
+ br i1 %latch.c, label %loop.header, label %exit
+
+exit:
+ %t = icmp ule i32 %iv, %N
+ ret i1 %t
+}
+
+define i1 @multi_exiting_loop_ne_same_unique_exit_const_compare_known(ptr %s) {
+; CHECK-LABEL: define i1 @multi_exiting_loop_ne_same_unique_exit_const_compare_known(
+; CHECK-SAME: ptr [[S:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
+; CHECK: [[LOOP_HEADER]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp ne i32 [[IV]], 1234
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[LOOP_LATCH]], label %[[EXIT:.*]]
+; CHECK: [[LOOP_LATCH]]:
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[S]], i32 [[IV]]
+; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[LATCH_C:%.*]] = icmp ult i8 [[TMP0]], 10
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
+; CHECK-NEXT: br i1 [[LATCH_C]], label %[[LOOP_HEADER]], label %[[EXIT]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[T:%.*]] = icmp ult i32 [[IV]], 1235
+; CHECK-NEXT: ret i1 [[T]]
+;
+entry:
+ br label %loop.header
+
+loop.header:
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
+ %exitcond.not = icmp ne i32 %iv, 1234
+ br i1 %exitcond.not, label %loop.latch, label %exit
+
+loop.latch:
+ %arrayidx = getelementptr inbounds i8, ptr %s, i32 %iv
+ %0 = load i8, ptr %arrayidx, align 1
+ %latch.c = icmp ult i8 %0, 10
+ %iv.next = add nuw nsw i32 %iv, 1
+ br i1 %latch.c, label %loop.header, label %exit
+
+exit:
+ %t = icmp ult i32 %iv, 1235
+ ret i1 %t
+}
+
+define i1 @multi_exiting_loop_ne_same_unique_exit_successors_swapped(ptr %s) {
+; CHECK-LABEL: define i1 @multi_exiting_loop_ne_same_unique_exit_successors_swapped(
+; CHECK-SAME: ptr [[S:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
+; CHECK: [[LOOP_HEADER]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp ne i32 [[IV]], 1234
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP_LATCH]]
+; CHECK: [[LOOP_LATCH]]:
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[S]], i32 [[IV]]
+; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[LATCH_C:%.*]] = icmp ult i8 [[TMP0]], 10
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
+; CHECK-NEXT: br i1 [[LATCH_C]], label %[[LOOP_HEADER]], label %[[EXIT]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[U:%.*]] = icmp ult i32 [[IV]], 1235
+; CHECK-NEXT: ret i1 [[U]]
+;
+entry:
+ br label %loop.header
+
+loop.header:
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
+ %exitcond.not = icmp ne i32 %iv, 1234
+ br i1 %exitcond.not, label %exit, label %loop.latch
+
+loop.latch:
+ %arrayidx = getelementptr inbounds i8, ptr %s, i32 %iv
+ %0 = load i8, ptr %arrayidx, align 1
+ %latch.c = icmp ult i8 %0, 10
+ %iv.next = add nuw nsw i32 %iv, 1
+ br i1 %latch.c, label %loop.header, label %exit
+
+exit:
+ %u = icmp ult i32 %iv, 1235
+ ret i1 %u
+}
diff --git a/llvm/test/Transforms/ExpandVariadics/expand-va-intrinsic-split-linkage.ll b/llvm/test/Transforms/ExpandVariadics/expand-va-intrinsic-split-linkage.ll
new file mode 100644
index 0000000..f7e21cd
--- /dev/null
+++ b/llvm/test/Transforms/ExpandVariadics/expand-va-intrinsic-split-linkage.ll
@@ -0,0 +1,232 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: -p --function-signature
+; RUN: opt -mtriple=wasm32-unknown-unknown -S --passes=expand-variadics --expand-variadics-override=optimize < %s | FileCheck %s --check-prefixes=OPT
+; RUN: opt -mtriple=wasm32-unknown-unknown -S --passes=expand-variadics --expand-variadics-override=lowering < %s | FileCheck %s --check-prefixes=ABI
+; REQUIRES: webassembly-registered-target
+
+; Split variadic functions into two functions:
+; - one equivalent to the original, same symbol etc
+; - one implementing the contents of the original but taking a valist
+; IR here is applicable to any target that uses a ptr for valist
+;
+; Defines a function with each linkage (in the order of the llvm documentation).
+; If split applies it does the same transform to each.
+; Whether split applies depends on whether the ABI is being changed or not - e.g. a weak
+; function is not normally useful to split as the contents cannot be called from elsewhere.
+; If the ABI is being rewritten then the function is still converted. Call sites tested elsewhere.
+
+; Update test checks doesn't emit checks for declares
+
+declare void @sink_valist(ptr)
+declare void @llvm.va_start(ptr)
+declare void @llvm.va_end(ptr)
+
+declare void @decl_simple(...)
+define void @defn_simple(...) {
+; OPT-LABEL: define {{[^@]+}}@defn_simple(...) {
+; OPT-NEXT: entry:
+; OPT-NEXT: %va_start = alloca ptr, align 4
+; OPT-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr %va_start)
+; OPT-NEXT: call void @llvm.va_start.p0(ptr %va_start)
+; OPT-NEXT: %0 = load ptr, ptr %va_start, align 4
+; OPT-NEXT: call void @defn_simple.valist(ptr %0)
+; OPT-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr %va_start)
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: define {{[^@]+}}@defn_simple(ptr %varargs) {
+; ABI-NEXT: %va = alloca ptr, align 4
+; ABI-NEXT: store ptr %varargs, ptr %va, align 4
+; ABI-NEXT: call void @sink_valist(ptr %va)
+; ABI-NEXT: ret void
+;
+ %va = alloca ptr, align 4
+ call void @llvm.va_start(ptr %va)
+ call void @sink_valist(ptr %va)
+ call void @llvm.va_end(ptr %va)
+ ret void
+}
+
+; no declare for private
+define private void @defn_private_simple(...) {
+; OPT-LABEL: define {{[^@]+}}@defn_private_simple(...) {
+; OPT-NEXT: entry:
+; OPT-NEXT: %va_start = alloca ptr, align 4
+; OPT-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr %va_start)
+; OPT-NEXT: call void @llvm.va_start.p0(ptr %va_start)
+; OPT-NEXT: %0 = load ptr, ptr %va_start, align 4
+; OPT-NEXT: call void @defn_private_simple.valist(ptr %0)
+; OPT-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr %va_start)
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: define {{[^@]+}}@defn_private_simple(ptr %varargs) {
+; ABI-NEXT: %va = alloca ptr, align 4
+; ABI-NEXT: store ptr %varargs, ptr %va, align 4
+; ABI-NEXT: call void @sink_valist(ptr %va)
+; ABI-NEXT: ret void
+;
+ %va = alloca ptr, align 4
+ call void @llvm.va_start(ptr %va)
+ call void @sink_valist(ptr %va)
+ call void @llvm.va_end(ptr %va)
+ ret void
+}
+
+; no declare for internal
+define internal void @defn_internal_simple(...) {
+; OPT-LABEL: define {{[^@]+}}@defn_internal_simple(...) {
+; OPT-NEXT: entry:
+; OPT-NEXT: %va_start = alloca ptr, align 4
+; OPT-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr %va_start)
+; OPT-NEXT: call void @llvm.va_start.p0(ptr %va_start)
+; OPT-NEXT: %0 = load ptr, ptr %va_start, align 4
+; OPT-NEXT: call void @defn_internal_simple.valist(ptr %0)
+; OPT-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr %va_start)
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: define {{[^@]+}}@defn_internal_simple(ptr %varargs) {
+; ABI-NEXT: %va = alloca ptr, align 4
+; ABI-NEXT: store ptr %varargs, ptr %va, align 4
+; ABI-NEXT: call void @sink_valist(ptr %va)
+; ABI-NEXT: ret void
+;
+ %va = alloca ptr, align 4
+ call void @llvm.va_start(ptr %va)
+ call void @sink_valist(ptr %va)
+ call void @llvm.va_end(ptr %va)
+ ret void
+}
+
+; no declare for available_externally
+define available_externally void @available_externally_simple(...) {
+; OPT-LABEL: define {{[^@]+}}@available_externally_simple(...) {
+; OPT-NEXT: %va = alloca ptr, align 4
+; OPT-NEXT: call void @llvm.va_start.p0(ptr %va)
+; OPT-NEXT: call void @sink_valist(ptr %va)
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: define {{[^@]+}}@available_externally_simple(ptr %varargs) {
+; ABI-NEXT: %va = alloca ptr, align 4
+; ABI-NEXT: store ptr %varargs, ptr %va, align 4
+; ABI-NEXT: call void @sink_valist(ptr %va)
+; ABI-NEXT: ret void
+;
+ %va = alloca ptr, align 4
+ call void @llvm.va_start(ptr %va)
+ call void @sink_valist(ptr %va)
+ call void @llvm.va_end(ptr %va)
+ ret void
+}
+
+; no declare for linkonce
+define linkonce void @defn_linkonce_simple(...) {
+; OPT-LABEL: define {{[^@]+}}@defn_linkonce_simple(...) {
+; OPT-NEXT: %va = alloca ptr, align 4
+; OPT-NEXT: call void @llvm.va_start.p0(ptr %va)
+; OPT-NEXT: call void @sink_valist(ptr %va)
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: define {{[^@]+}}@defn_linkonce_simple(ptr %varargs) {
+; ABI-NEXT: %va = alloca ptr, align 4
+; ABI-NEXT: store ptr %varargs, ptr %va, align 4
+; ABI-NEXT: call void @sink_valist(ptr %va)
+; ABI-NEXT: ret void
+;
+ %va = alloca ptr, align 4
+ call void @llvm.va_start(ptr %va)
+ call void @sink_valist(ptr %va)
+ call void @llvm.va_end(ptr %va)
+ ret void
+}
+
+; no declare for weak
+define weak void @defn_weak_simple(...) {
+; OPT-LABEL: define {{[^@]+}}@defn_weak_simple(...) {
+; OPT-NEXT: %va = alloca ptr, align 4
+; OPT-NEXT: call void @llvm.va_start.p0(ptr %va)
+; OPT-NEXT: call void @sink_valist(ptr %va)
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: define {{[^@]+}}@defn_weak_simple(ptr %varargs) {
+; ABI-NEXT: %va = alloca ptr, align 4
+; ABI-NEXT: store ptr %varargs, ptr %va, align 4
+; ABI-NEXT: call void @sink_valist(ptr %va)
+; ABI-NEXT: ret void
+;
+ %va = alloca ptr, align 4
+ call void @llvm.va_start(ptr %va)
+ call void @sink_valist(ptr %va)
+ call void @llvm.va_end(ptr %va)
+ ret void
+}
+
+; common is not applicable to functions
+; appending is not applicable to functions
+
+declare extern_weak void @decl_extern_weak_simple(...)
+; no define for extern_weak
+
+; no declare for linkonce_odr
+define linkonce_odr void @defn_linkonce_odr_simple(...) {
+; OPT-LABEL: define {{[^@]+}}@defn_linkonce_odr_simple(...) {
+; OPT-NEXT: %va = alloca ptr, align 4
+; OPT-NEXT: call void @llvm.va_start.p0(ptr %va)
+; OPT-NEXT: call void @sink_valist(ptr %va)
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: define {{[^@]+}}@defn_linkonce_odr_simple(ptr %varargs) {
+; ABI-NEXT: %va = alloca ptr, align 4
+; ABI-NEXT: store ptr %varargs, ptr %va, align 4
+; ABI-NEXT: call void @sink_valist(ptr %va)
+; ABI-NEXT: ret void
+;
+ %va = alloca ptr, align 4
+ call void @llvm.va_start(ptr %va)
+ call void @sink_valist(ptr %va)
+ call void @llvm.va_end(ptr %va)
+ ret void
+}
+
+; no declare for weak_odr
+define weak_odr void @defn_weak_odr_simple(...) {
+; OPT-LABEL: define {{[^@]+}}@defn_weak_odr_simple(...) {
+; OPT-NEXT: %va = alloca ptr, align 4
+; OPT-NEXT: call void @llvm.va_start.p0(ptr %va)
+; OPT-NEXT: call void @sink_valist(ptr %va)
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: define {{[^@]+}}@defn_weak_odr_simple(ptr %varargs) {
+; ABI-NEXT: %va = alloca ptr, align 4
+; ABI-NEXT: store ptr %varargs, ptr %va, align 4
+; ABI-NEXT: call void @sink_valist(ptr %va)
+; ABI-NEXT: ret void
+;
+ %va = alloca ptr, align 4
+ call void @llvm.va_start(ptr %va)
+ call void @sink_valist(ptr %va)
+ call void @llvm.va_end(ptr %va)
+ ret void
+}
+
+declare external void @decl_external_simple(...)
+define external void @defn_external_simple(...) {
+; OPT-LABEL: define {{[^@]+}}@defn_external_simple(...) {
+; OPT-NEXT: entry:
+; OPT-NEXT: %va_start = alloca ptr, align 4
+; OPT-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr %va_start)
+; OPT-NEXT: call void @llvm.va_start.p0(ptr %va_start)
+; OPT-NEXT: %0 = load ptr, ptr %va_start, align 4
+; OPT-NEXT: call void @defn_external_simple.valist(ptr %0)
+; OPT-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr %va_start)
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: define {{[^@]+}}@defn_external_simple(ptr %varargs) {
+; ABI-NEXT: %va = alloca ptr, align 4
+; ABI-NEXT: store ptr %varargs, ptr %va, align 4
+; ABI-NEXT: call void @sink_valist(ptr %va)
+; ABI-NEXT: ret void
+;
+ %va = alloca ptr, align 4
+ call void @llvm.va_start(ptr %va)
+ call void @sink_valist(ptr %va)
+ call void @llvm.va_end(ptr %va)
+ ret void
+}
diff --git a/llvm/test/Transforms/ExpandVariadics/expand-va-intrinsic-split-simple.ll b/llvm/test/Transforms/ExpandVariadics/expand-va-intrinsic-split-simple.ll
new file mode 100644
index 0000000..9a86540
--- /dev/null
+++ b/llvm/test/Transforms/ExpandVariadics/expand-va-intrinsic-split-simple.ll
@@ -0,0 +1,214 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: -p --function-signature
+; RUN: opt -mtriple=wasm32-unknown-unknown -S --passes=expand-variadics --expand-variadics-override=optimize < %s | FileCheck %s --check-prefixes=OPT
+; RUN: opt -mtriple=wasm32-unknown-unknown -S --passes=expand-variadics --expand-variadics-override=lowering < %s | FileCheck %s --check-prefixes=ABI
+; REQUIRES: webassembly-registered-target
+
+; Examples are variadic functions that return the first or the second of an int and a double
+; Split the functions into an internal equivalent that takes a va_list and a ABI preserving wrapper
+
+define i32 @variadic_int_double_get_firstz(...) {
+; OPT-LABEL: define {{[^@]+}}@variadic_int_double_get_firstz(...) {
+; OPT-NEXT: entry:
+; OPT-NEXT: %va_start = alloca ptr, align 4
+; OPT-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr %va_start)
+; OPT-NEXT: call void @llvm.va_start.p0(ptr %va_start)
+; OPT-NEXT: %0 = load ptr, ptr %va_start, align 4
+; OPT-NEXT: %1 = call i32 @variadic_int_double_get_firstz.valist(ptr %0)
+; OPT-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr %va_start)
+; OPT-NEXT: ret i32 %1
+;
+; ABI-LABEL: define {{[^@]+}}@variadic_int_double_get_firstz(ptr %varargs) {
+; ABI-NEXT: entry:
+; ABI-NEXT: %va = alloca ptr, align 4
+; ABI-NEXT: store ptr %varargs, ptr %va, align 4
+; ABI-NEXT: %argp.cur = load ptr, ptr %va, align 4
+; ABI-NEXT: %argp.next = getelementptr inbounds i8, ptr %argp.cur, i32 4
+; ABI-NEXT: store ptr %argp.next, ptr %va, align 4
+; ABI-NEXT: %0 = load i32, ptr %argp.cur, align 4
+; ABI-NEXT: ret i32 %0
+;
+entry:
+ %va = alloca ptr, align 4
+ call void @llvm.va_start.p0(ptr nonnull %va)
+ %argp.cur = load ptr, ptr %va, align 4
+ %argp.next = getelementptr inbounds i8, ptr %argp.cur, i32 4
+ store ptr %argp.next, ptr %va, align 4
+ %0 = load i32, ptr %argp.cur, align 4
+ call void @llvm.va_end.p0(ptr %va)
+ ret i32 %0
+}
+
+; CHECK-LABEL: define i32 @variadic_int_double_get_firstz(...) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %va_list = alloca ptr, align 4
+; CHECK-NEXT: call void @llvm.va_start.p0(ptr %va_list)
+; CHECK-NEXT: %0 = tail call i32 @variadic_int_double_get_firstz.valist(ptr %va_list)
+; CHECK-NEXT: ret i32 %0
+; CHECK-NEXT: }
+
+; CHECK-LABEL: define internal i32 @variadic_int_double_get_firstz.valist(ptr noalias %varargs) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %va = alloca ptr, align 4
+; CHECK-NEXT: store ptr %varargs, ptr %va, align 4
+; CHECK-NEXT: %argp.cur = load ptr, ptr %va, align 4
+; CHECK-NEXT: %argp.next = getelementptr inbounds i8, ptr %argp.cur, i32 4
+; CHECK-NEXT: store ptr %argp.next, ptr %va, align 4
+; CHECK-NEXT: %0 = load i32, ptr %argp.cur, align 4
+; CHECK-NEXT: ret i32 %0
+; CHECK-NEXT: }
+
+define double @variadic_int_double_get_secondz(...) {
+; OPT-LABEL: define {{[^@]+}}@variadic_int_double_get_secondz(...) {
+; OPT-NEXT: entry:
+; OPT-NEXT: %va_start = alloca ptr, align 4
+; OPT-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr %va_start)
+; OPT-NEXT: call void @llvm.va_start.p0(ptr %va_start)
+; OPT-NEXT: %0 = load ptr, ptr %va_start, align 4
+; OPT-NEXT: %1 = call double @variadic_int_double_get_secondz.valist(ptr %0)
+; OPT-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr %va_start)
+; OPT-NEXT: ret double %1
+;
+; ABI-LABEL: define {{[^@]+}}@variadic_int_double_get_secondz(ptr %varargs) {
+; ABI-NEXT: entry:
+; ABI-NEXT: %va = alloca ptr, align 4
+; ABI-NEXT: store ptr %varargs, ptr %va, align 4
+; ABI-NEXT: %argp.cur = load ptr, ptr %va, align 4
+; ABI-NEXT: %argp.next = getelementptr inbounds i8, ptr %argp.cur, i32 4
+; ABI-NEXT: %argp.next2 = getelementptr inbounds i8, ptr %argp.cur, i32 12
+; ABI-NEXT: store ptr %argp.next2, ptr %va, align 4
+; ABI-NEXT: %0 = load double, ptr %argp.next, align 4
+; ABI-NEXT: ret double %0
+;
+entry:
+ %va = alloca ptr, align 4
+ call void @llvm.va_start.p0(ptr nonnull %va)
+ %argp.cur = load ptr, ptr %va, align 4
+ %argp.next = getelementptr inbounds i8, ptr %argp.cur, i32 4
+ %argp.next2 = getelementptr inbounds i8, ptr %argp.cur, i32 12
+ store ptr %argp.next2, ptr %va, align 4
+ %0 = load double, ptr %argp.next, align 4
+ call void @llvm.va_end.p0(ptr %va)
+ ret double %0
+}
+
+; CHECK-LABEL: define double @variadic_int_double_get_secondz(...) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %va_list = alloca ptr, align 4
+; CHECK-NEXT: call void @llvm.va_start.p0(ptr %va_list)
+; CHECK-NEXT: %0 = tail call double @variadic_int_double_get_secondz.valist(ptr %va_list)
+; CHECK-NEXT: ret double %0
+; CHECK-NEXT: }
+
+; CHECK-LABEL: define internal double @variadic_int_double_get_secondz.valist(ptr noalias %varargs) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %va = alloca ptr, align 4
+; CHECK-NEXT: store ptr %varargs, ptr %va, align 4
+; CHECK-NEXT: %argp.cur = load ptr, ptr %va, align 4
+; CHECK-NEXT: %argp.next = getelementptr inbounds i8, ptr %argp.cur, i32 4
+; CHECK-NEXT: %argp.next2 = getelementptr inbounds i8, ptr %argp.cur, i32 12
+; CHECK-NEXT: store ptr %argp.next2, ptr %va, align 4
+; CHECK-NEXT: %0 = load double, ptr %argp.next, align 4
+; CHECK-NEXT: ret double %0
+; CHECK-NEXT: }
+
+
+; CHECK-LABEL: @variadic_can_get_firstIidEEbT_T0_(i32 %x, double %y) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %variadic_can_get_firstIidEEbT_T0_.vararg, align 16
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %variadic_can_get_firstIidEEbT_T0_.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr %0, align 4
+; CHECK-NEXT: %1 = getelementptr inbounds %variadic_can_get_firstIidEEbT_T0_.vararg, ptr %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: store double %y, ptr %1, align 4
+; CHECK-NEXT: %call = call i32 @variadic_int_double_get_firstz.valist(ptr %vararg_buffer)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr %vararg_buffer)
+; CHECK-NEXT: %cmp.i = icmp eq i32 %call, %x
+; CHECK-NEXT: ret i1 %cmp.i
+; CHECK-NEXT: }
+
+define zeroext i1 @variadic_can_get_firstIidEEbT_T0_(i32 %x, double %y) {
+; OPT-LABEL: define {{[^@]+}}@variadic_can_get_firstIidEEbT_T0_(i32 %x, double %y) {
+; OPT-NEXT: entry:
+; OPT-NEXT: %vararg_buffer = alloca %variadic_can_get_firstIidEEbT_T0_.vararg, align 16
+; OPT-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr %vararg_buffer)
+; OPT-NEXT: %0 = getelementptr inbounds %variadic_can_get_firstIidEEbT_T0_.vararg, ptr %vararg_buffer, i32 0, i32 0
+; OPT-NEXT: store i32 %x, ptr %0, align 4
+; OPT-NEXT: %1 = getelementptr inbounds %variadic_can_get_firstIidEEbT_T0_.vararg, ptr %vararg_buffer, i32 0, i32 2
+; OPT-NEXT: store double %y, ptr %1, align 8
+; OPT-NEXT: %call = call i32 @variadic_int_double_get_firstz.valist(ptr %vararg_buffer)
+; OPT-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr %vararg_buffer)
+; OPT-NEXT: %cmp.i = icmp eq i32 %call, %x
+; OPT-NEXT: ret i1 %cmp.i
+;
+; ABI-LABEL: define {{[^@]+}}@variadic_can_get_firstIidEEbT_T0_(i32 %x, double %y) {
+; ABI-NEXT: entry:
+; ABI-NEXT: %vararg_buffer = alloca %variadic_can_get_firstIidEEbT_T0_.vararg, align 16
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr %vararg_buffer)
+; ABI-NEXT: %0 = getelementptr inbounds %variadic_can_get_firstIidEEbT_T0_.vararg, ptr %vararg_buffer, i32 0, i32 0
+; ABI-NEXT: store i32 %x, ptr %0, align 4
+; ABI-NEXT: %1 = getelementptr inbounds %variadic_can_get_firstIidEEbT_T0_.vararg, ptr %vararg_buffer, i32 0, i32 2
+; ABI-NEXT: store double %y, ptr %1, align 8
+; ABI-NEXT: %call = call i32 @variadic_int_double_get_firstz(ptr %vararg_buffer)
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr %vararg_buffer)
+; ABI-NEXT: %cmp.i = icmp eq i32 %call, %x
+; ABI-NEXT: ret i1 %cmp.i
+;
+entry:
+ %call = call i32 (...) @variadic_int_double_get_firstz(i32 %x, double %y)
+ %cmp.i = icmp eq i32 %call, %x
+ ret i1 %cmp.i
+}
+
+; CHECK-LABEL: @variadic_can_get_secondIidEEbT_T0_(i32 %x, double %y) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %variadic_can_get_secondIidEEbT_T0_.vararg, align 16
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %variadic_can_get_secondIidEEbT_T0_.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr %0, align 4
+; CHECK-NEXT: %1 = getelementptr inbounds %variadic_can_get_secondIidEEbT_T0_.vararg, ptr %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: store double %y, ptr %1, align 4
+; CHECK-NEXT: %call = call double @variadic_int_double_get_secondz.valist(ptr %vararg_buffer)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr %vararg_buffer)
+; CHECK-NEXT: %cmp.i = fcmp oeq double %call, %y
+; CHECK-NEXT: ret i1 %cmp.i
+; CHECK-NEXT: }
+
+define zeroext i1 @variadic_can_get_secondIidEEbT_T0_(i32 %x, double %y) {
+; OPT-LABEL: define {{[^@]+}}@variadic_can_get_secondIidEEbT_T0_(i32 %x, double %y) {
+; OPT-NEXT: entry:
+; OPT-NEXT: %vararg_buffer = alloca %variadic_can_get_secondIidEEbT_T0_.vararg, align 16
+; OPT-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr %vararg_buffer)
+; OPT-NEXT: %0 = getelementptr inbounds %variadic_can_get_secondIidEEbT_T0_.vararg, ptr %vararg_buffer, i32 0, i32 0
+; OPT-NEXT: store i32 %x, ptr %0, align 4
+; OPT-NEXT: %1 = getelementptr inbounds %variadic_can_get_secondIidEEbT_T0_.vararg, ptr %vararg_buffer, i32 0, i32 2
+; OPT-NEXT: store double %y, ptr %1, align 8
+; OPT-NEXT: %call = call double @variadic_int_double_get_secondz.valist(ptr %vararg_buffer)
+; OPT-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr %vararg_buffer)
+; OPT-NEXT: %cmp.i = fcmp oeq double %call, %y
+; OPT-NEXT: ret i1 %cmp.i
+;
+; ABI-LABEL: define {{[^@]+}}@variadic_can_get_secondIidEEbT_T0_(i32 %x, double %y) {
+; ABI-NEXT: entry:
+; ABI-NEXT: %vararg_buffer = alloca %variadic_can_get_secondIidEEbT_T0_.vararg, align 16
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr %vararg_buffer)
+; ABI-NEXT: %0 = getelementptr inbounds %variadic_can_get_secondIidEEbT_T0_.vararg, ptr %vararg_buffer, i32 0, i32 0
+; ABI-NEXT: store i32 %x, ptr %0, align 4
+; ABI-NEXT: %1 = getelementptr inbounds %variadic_can_get_secondIidEEbT_T0_.vararg, ptr %vararg_buffer, i32 0, i32 2
+; ABI-NEXT: store double %y, ptr %1, align 8
+; ABI-NEXT: %call = call double @variadic_int_double_get_secondz(ptr %vararg_buffer)
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr %vararg_buffer)
+; ABI-NEXT: %cmp.i = fcmp oeq double %call, %y
+; ABI-NEXT: ret i1 %cmp.i
+;
+entry:
+ %call = call double (...) @variadic_int_double_get_secondz(i32 %x, double %y)
+ %cmp.i = fcmp oeq double %call, %y
+ ret i1 %cmp.i
+}
+
+; Declaration unchanged
+; CHECK: declare void @variadic_without_callers(...)
+declare void @variadic_without_callers(...)
+
+declare void @llvm.va_start.p0(ptr)
+declare void @llvm.va_end.p0(ptr)
diff --git a/llvm/test/Transforms/ExpandVariadics/indirect-calls.ll b/llvm/test/Transforms/ExpandVariadics/indirect-calls.ll
new file mode 100644
index 0000000..de04c72
--- /dev/null
+++ b/llvm/test/Transforms/ExpandVariadics/indirect-calls.ll
@@ -0,0 +1,59 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -mtriple=wasm32-unknown-unknown -S --passes=expand-variadics --expand-variadics-override=optimize < %s | FileCheck %s -check-prefixes=OPT
+; RUN: opt -mtriple=wasm32-unknown-unknown -S --passes=expand-variadics --expand-variadics-override=lowering < %s | FileCheck %s -check-prefixes=ABI
+; REQUIRES: webassembly-registered-target
+
+declare void @vararg(...)
+@vararg_ptr = hidden global ptr @vararg, align 4
+
+%struct.libcS = type { i8, i16, i32, i32, float, double }
+
+define hidden void @fptr_single_i32(i32 noundef %x) {
+; OPT-LABEL: @fptr_single_i32(
+; OPT-NEXT: entry:
+; OPT-NEXT: [[TMP0:%.*]] = load volatile ptr, ptr @vararg_ptr, align 4
+; OPT-NEXT: tail call void (...) [[TMP0]](i32 noundef [[X:%.*]])
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: @fptr_single_i32(
+; ABI-NEXT: entry:
+; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[FPTR_SINGLE_I32_VARARG:%.*]], align 16
+; ABI-NEXT: [[TMP0:%.*]] = load volatile ptr, ptr @vararg_ptr, align 4
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[FPTR_SINGLE_I32_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0
+; ABI-NEXT: store i32 [[X:%.*]], ptr [[TMP1]], align 4
+; ABI-NEXT: call void [[TMP0]](ptr [[VARARG_BUFFER]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: ret void
+;
+entry:
+ %0 = load volatile ptr, ptr @vararg_ptr, align 4
+ tail call void (...) %0(i32 noundef %x)
+ ret void
+}
+
+define hidden void @fptr_libcS(ptr noundef byval(%struct.libcS) align 8 %x) {
+; OPT-LABEL: @fptr_libcS(
+; OPT-NEXT: entry:
+; OPT-NEXT: [[TMP0:%.*]] = load volatile ptr, ptr @vararg_ptr, align 4
+; OPT-NEXT: tail call void (...) [[TMP0]](ptr noundef nonnull byval([[STRUCT_LIBCS:%.*]]) align 8 [[X:%.*]])
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: @fptr_libcS(
+; ABI-NEXT: entry:
+; ABI-NEXT: [[INDIRECTALLOCA:%.*]] = alloca [[STRUCT_LIBCS:%.*]], align 8
+; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[FPTR_LIBCS_VARARG:%.*]], align 16
+; ABI-NEXT: [[TMP0:%.*]] = load volatile ptr, ptr @vararg_ptr, align 4
+; ABI-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[INDIRECTALLOCA]], ptr [[X:%.*]], i64 24, i1 false)
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[FPTR_LIBCS_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0
+; ABI-NEXT: store ptr [[INDIRECTALLOCA]], ptr [[TMP1]], align 4
+; ABI-NEXT: call void [[TMP0]](ptr [[VARARG_BUFFER]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: ret void
+;
+entry:
+ %0 = load volatile ptr, ptr @vararg_ptr, align 4
+ tail call void (...) %0(ptr noundef nonnull byval(%struct.libcS) align 8 %x)
+ ret void
+}
diff --git a/llvm/test/Transforms/ExpandVariadics/intrinsics.ll b/llvm/test/Transforms/ExpandVariadics/intrinsics.ll
new file mode 100644
index 0000000..1782c92
--- /dev/null
+++ b/llvm/test/Transforms/ExpandVariadics/intrinsics.ll
@@ -0,0 +1,120 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -mtriple=wasm32-unknown-unknown -S --passes=expand-variadics --expand-variadics-override=optimize < %s | FileCheck %s -check-prefixes=CHECK,OPT
+; RUN: opt -mtriple=wasm32-unknown-unknown -S --passes=expand-variadics --expand-variadics-override=lowering < %s | FileCheck %s -check-prefixes=CHECK,ABI
+; REQUIRES: webassembly-registered-target
+
+declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture)
+
+declare void @llvm.va_copy.p0(ptr, ptr)
+
+declare void @valist(ptr noundef)
+
+declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture)
+
+declare void @llvm.va_start.p0(ptr)
+
+declare void @llvm.va_end.p0(ptr)
+
+
+define void @start_once(...) {
+; OPT-LABEL: @start_once(
+; OPT-NEXT: entry:
+; OPT-NEXT: [[VA_START:%.*]] = alloca ptr, align 4
+; OPT-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[VA_START]])
+; OPT-NEXT: call void @llvm.va_start.p0(ptr [[VA_START]])
+; OPT-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VA_START]], align 4
+; OPT-NEXT: call void @start_once.valist(ptr [[TMP0]])
+; OPT-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[VA_START]])
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: @start_once(
+; ABI-NEXT: entry:
+; ABI-NEXT: [[S:%.*]] = alloca ptr, align 4
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[S]])
+; ABI-NEXT: store ptr [[VARARGS:%.*]], ptr [[S]], align 4
+; ABI-NEXT: [[TMP0:%.*]] = load ptr, ptr [[S]], align 4
+; ABI-NEXT: call void @valist(ptr noundef [[TMP0]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[S]])
+; ABI-NEXT: ret void
+;
+entry:
+ %s = alloca ptr, align 4
+ call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %s)
+ call void @llvm.va_start.p0(ptr nonnull %s)
+ %0 = load ptr, ptr %s, align 4
+ call void @valist(ptr noundef %0)
+ call void @llvm.va_end.p0(ptr %s)
+ call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %s)
+ ret void
+}
+
+
+define void @start_twice(...) {
+; OPT-LABEL: @start_twice(
+; OPT-NEXT: entry:
+; OPT-NEXT: [[VA_START:%.*]] = alloca ptr, align 4
+; OPT-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[VA_START]])
+; OPT-NEXT: call void @llvm.va_start.p0(ptr [[VA_START]])
+; OPT-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VA_START]], align 4
+; OPT-NEXT: call void @start_twice.valist(ptr [[TMP0]])
+; OPT-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[VA_START]])
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: @start_twice(
+; ABI-NEXT: entry:
+; ABI-NEXT: [[S0:%.*]] = alloca ptr, align 4
+; ABI-NEXT: [[S1:%.*]] = alloca ptr, align 4
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[S0]])
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[S1]])
+; ABI-NEXT: store ptr [[VARARGS:%.*]], ptr [[S0]], align 4
+; ABI-NEXT: [[TMP0:%.*]] = load ptr, ptr [[S0]], align 4
+; ABI-NEXT: call void @valist(ptr noundef [[TMP0]])
+; ABI-NEXT: store ptr [[VARARGS]], ptr [[S1]], align 4
+; ABI-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S1]], align 4
+; ABI-NEXT: call void @valist(ptr noundef [[TMP1]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[S1]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[S0]])
+; ABI-NEXT: ret void
+;
+entry:
+ %s0 = alloca ptr, align 4
+ %s1 = alloca ptr, align 4
+ call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %s0)
+ call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %s1)
+ call void @llvm.va_start.p0(ptr nonnull %s0)
+ %0 = load ptr, ptr %s0, align 4
+ call void @valist(ptr noundef %0)
+ call void @llvm.va_end.p0(ptr %s0)
+ call void @llvm.va_start.p0(ptr nonnull %s1)
+ %1 = load ptr, ptr %s1, align 4
+ call void @valist(ptr noundef %1)
+ call void @llvm.va_end.p0(ptr %s1)
+ call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %s1)
+ call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %s0)
+ ret void
+}
+
+define void @copy(ptr noundef %va) {
+; CHECK-LABEL: @copy(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[VA_ADDR:%.*]] = alloca ptr, align 4
+; CHECK-NEXT: [[CP:%.*]] = alloca ptr, align 4
+; CHECK-NEXT: store ptr [[VA:%.*]], ptr [[VA_ADDR]], align 4
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[CP]])
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr [[CP]], ptr [[VA_ADDR]], i32 4, i1 false)
+; CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[CP]], align 4
+; CHECK-NEXT: call void @valist(ptr noundef [[TMP0]])
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[CP]])
+; CHECK-NEXT: ret void
+;
+entry:
+ %va.addr = alloca ptr, align 4
+ %cp = alloca ptr, align 4
+ store ptr %va, ptr %va.addr, align 4
+ call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %cp)
+ call void @llvm.va_copy.p0(ptr nonnull %cp, ptr nonnull %va.addr)
+ %0 = load ptr, ptr %cp, align 4
+ call void @valist(ptr noundef %0)
+ call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %cp)
+ ret void
+}
diff --git a/llvm/test/Transforms/ExpandVariadics/invoke.ll b/llvm/test/Transforms/ExpandVariadics/invoke.ll
new file mode 100644
index 0000000..ced2edf
--- /dev/null
+++ b/llvm/test/Transforms/ExpandVariadics/invoke.ll
@@ -0,0 +1,89 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -mtriple=wasm32-unknown-unknown -S --passes=expand-variadics --expand-variadics-override=optimize < %s | FileCheck %s -check-prefixes=CHECK
+; RUN: not --crash opt -mtriple=wasm32-unknown-unknown -S --passes=expand-variadics --expand-variadics-override=lowering < %s 2>&1 | FileCheck %s -check-prefixes=ERROR
+; REQUIRES: webassembly-registered-target
+target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128-ni:1:10:20"
+
+; ERROR: LLVM ERROR: Cannot lower callbase instruction
+
+@_ZTIi = external constant ptr
+
+; Function Attrs: mustprogress
+define hidden void @test0(i32 noundef %x) #0 personality ptr @__gxx_wasm_personality_v0 {
+; CHECK-LABEL: @test0(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: invoke void (...) @may_throw(i32 noundef [[X:%.*]])
+; CHECK-NEXT: to label [[TRY_CONT:%.*]] unwind label [[CATCH_DISPATCH:%.*]]
+; CHECK: catch.dispatch:
+; CHECK-NEXT: [[TMP0:%.*]] = catchswitch within none [label %catch.start] unwind to caller
+; CHECK: catch.start:
+; CHECK-NEXT: [[TMP1:%.*]] = catchpad within [[TMP0]] [ptr @_ZTIi]
+; CHECK-NEXT: [[TMP2:%.*]] = tail call ptr @llvm.wasm.get.exception(token [[TMP1]])
+; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.wasm.get.ehselector(token [[TMP1]])
+; CHECK-NEXT: [[TMP4:%.*]] = tail call i32 @llvm.eh.typeid.for.p0(ptr nonnull @_ZTIi)
+; CHECK-NEXT: [[MATCHES:%.*]] = icmp eq i32 [[TMP3]], [[TMP4]]
+; CHECK-NEXT: br i1 [[MATCHES]], label [[CATCH:%.*]], label [[RETHROW:%.*]]
+; CHECK: catch:
+; CHECK-NEXT: [[TMP5:%.*]] = call ptr @__cxa_begin_catch(ptr [[TMP2]]) [ "funclet"(token [[TMP1]]) ]
+; CHECK-NEXT: call void (...) @dont_throw(i32 noundef [[X]]) [ "funclet"(token [[TMP1]]) ]
+; CHECK-NEXT: call void @__cxa_end_catch() [ "funclet"(token [[TMP1]]) ]
+; CHECK-NEXT: catchret from [[TMP1]] to label [[TRY_CONT]]
+; CHECK: rethrow:
+; CHECK-NEXT: call void @llvm.wasm.rethrow() [ "funclet"(token [[TMP1]]) ]
+; CHECK-NEXT: unreachable
+; CHECK: try.cont:
+; CHECK-NEXT: ret void
+;
+entry:
+ invoke void (...) @may_throw(i32 noundef %x)
+ to label %try.cont unwind label %catch.dispatch
+
+catch.dispatch: ; preds = %entry
+ %0 = catchswitch within none [label %catch.start] unwind to caller
+
+catch.start: ; preds = %catch.dispatch
+ %1 = catchpad within %0 [ptr @_ZTIi]
+ %2 = tail call ptr @llvm.wasm.get.exception(token %1)
+ %3 = tail call i32 @llvm.wasm.get.ehselector(token %1)
+ %4 = tail call i32 @llvm.eh.typeid.for.p0(ptr nonnull @_ZTIi)
+ %matches = icmp eq i32 %3, %4
+ br i1 %matches, label %catch, label %rethrow
+
+catch: ; preds = %catch.start
+ %5 = call ptr @__cxa_begin_catch(ptr %2) #6 [ "funclet"(token %1) ]
+ call void (...) @dont_throw(i32 noundef %x) #6 [ "funclet"(token %1) ]
+ call void @__cxa_end_catch() #6 [ "funclet"(token %1) ]
+ catchret from %1 to label %try.cont
+
+rethrow: ; preds = %catch.start
+ call void @llvm.wasm.rethrow() #5 [ "funclet"(token %1) ]
+ unreachable
+
+try.cont: ; preds = %entry, %catch
+ ret void
+}
+
+declare void @may_throw(...)
+
+declare i32 @__gxx_wasm_personality_v0(...)
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn
+declare ptr @llvm.wasm.get.exception(token)
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn
+declare i32 @llvm.wasm.get.ehselector(token)
+
+; Function Attrs: nofree nosync nounwind memory(none)
+declare i32 @llvm.eh.typeid.for.p0(ptr)
+
+declare ptr @__cxa_begin_catch(ptr)
+
+; Function Attrs: nounwind
+declare void @dont_throw(...)
+
+declare void @__cxa_end_catch()
+
+; Function Attrs: noreturn
+declare void @llvm.wasm.rethrow()
+
+
diff --git a/llvm/test/Transforms/ExpandVariadics/pass-byval-byref.ll b/llvm/test/Transforms/ExpandVariadics/pass-byval-byref.ll
new file mode 100644
index 0000000..85fefda
--- /dev/null
+++ b/llvm/test/Transforms/ExpandVariadics/pass-byval-byref.ll
@@ -0,0 +1,153 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -mtriple=wasm32-unknown-unknown -S --passes=expand-variadics --expand-variadics-override=optimize < %s | FileCheck %s -check-prefixes=OPT
+; RUN: opt -mtriple=wasm32-unknown-unknown -S --passes=expand-variadics --expand-variadics-override=lowering < %s | FileCheck %s -check-prefixes=ABI
+; REQUIRES: webassembly-registered-target
+
+; CHECK: @sink
+declare void @sink(...)
+
+
+define void @pass_byval(ptr byval(i32) %b) {
+; OPT-LABEL: @pass_byval(
+; OPT-NEXT: entry:
+; OPT-NEXT: tail call void (...) @sink(ptr byval(i32) [[B:%.*]])
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: @pass_byval(
+; ABI-NEXT: entry:
+; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[PASS_BYVAL_VARARG:%.*]], align 16
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[PASS_BYVAL_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0
+; ABI-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[TMP0]], ptr [[B:%.*]], i64 4, i1 false)
+; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: ret void
+;
+entry:
+ tail call void (...) @sink(ptr byval(i32) %b)
+ ret void
+}
+
+%struct.libcS = type { i8, i16, i32, i32, float, double }
+
+define void @i32_libcS_byval(i32 %x, ptr noundef byval(%struct.libcS) align 8 %y) {
+; OPT-LABEL: @i32_libcS_byval(
+; OPT-NEXT: entry:
+; OPT-NEXT: tail call void (...) @sink(i32 [[X:%.*]], ptr byval([[STRUCT_LIBCS:%.*]]) align 8 [[Y:%.*]])
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: @i32_libcS_byval(
+; ABI-NEXT: entry:
+; ABI-NEXT: [[INDIRECTALLOCA:%.*]] = alloca [[STRUCT_LIBCS:%.*]], align 8
+; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[I32_LIBCS_BYVAL_VARARG:%.*]], align 16
+; ABI-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[INDIRECTALLOCA]], ptr [[Y:%.*]], i64 24, i1 false)
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[I32_LIBCS_BYVAL_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0
+; ABI-NEXT: store i32 [[X:%.*]], ptr [[TMP0]], align 4
+; ABI-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[I32_LIBCS_BYVAL_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 1
+; ABI-NEXT: store ptr [[INDIRECTALLOCA]], ptr [[TMP1]], align 4
+; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: ret void
+;
+entry:
+ tail call void (...) @sink(i32 %x, ptr byval(%struct.libcS) align 8 %y)
+ ret void
+}
+
+define void @libcS_i32_byval(ptr byval(%struct.libcS) align 8 %x, i32 %y) {
+; OPT-LABEL: @libcS_i32_byval(
+; OPT-NEXT: entry:
+; OPT-NEXT: tail call void (...) @sink(ptr byval([[STRUCT_LIBCS:%.*]]) align 8 [[X:%.*]], i32 [[Y:%.*]])
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: @libcS_i32_byval(
+; ABI-NEXT: entry:
+; ABI-NEXT: [[INDIRECTALLOCA:%.*]] = alloca [[STRUCT_LIBCS:%.*]], align 8
+; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[LIBCS_I32_BYVAL_VARARG:%.*]], align 16
+; ABI-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[INDIRECTALLOCA]], ptr [[X:%.*]], i64 24, i1 false)
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[LIBCS_I32_BYVAL_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0
+; ABI-NEXT: store ptr [[INDIRECTALLOCA]], ptr [[TMP0]], align 4
+; ABI-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[LIBCS_I32_BYVAL_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 1
+; ABI-NEXT: store i32 [[Y:%.*]], ptr [[TMP1]], align 4
+; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: ret void
+;
+entry:
+ tail call void (...) @sink(ptr byval(%struct.libcS) align 8 %x, i32 %y)
+ ret void
+}
+
+
+define void @pass_byref(ptr byref(i32) %b) {
+; OPT-LABEL: @pass_byref(
+; OPT-NEXT: entry:
+; OPT-NEXT: tail call void (...) @sink(ptr byref(i32) [[B:%.*]])
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: @pass_byref(
+; ABI-NEXT: entry:
+; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[PASS_BYREF_VARARG:%.*]], align 16
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[PASS_BYREF_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0
+; ABI-NEXT: store ptr [[B:%.*]], ptr [[TMP0]], align 4
+; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: ret void
+;
+entry:
+ tail call void (...) @sink(ptr byref(i32) %b)
+ ret void
+}
+
+define void @i32_libcS_byref(i32 %x, ptr noundef byref(%struct.libcS) align 8 %y) {
+; OPT-LABEL: @i32_libcS_byref(
+; OPT-NEXT: entry:
+; OPT-NEXT: tail call void (...) @sink(i32 [[X:%.*]], ptr byref([[STRUCT_LIBCS:%.*]]) align 8 [[Y:%.*]])
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: @i32_libcS_byref(
+; ABI-NEXT: entry:
+; ABI-NEXT: [[INDIRECTALLOCA:%.*]] = alloca [[STRUCT_LIBCS:%.*]], align 8
+; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[I32_LIBCS_BYREF_VARARG:%.*]], align 16
+; ABI-NEXT: store ptr [[Y:%.*]], ptr [[INDIRECTALLOCA]], align 4
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[I32_LIBCS_BYREF_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0
+; ABI-NEXT: store i32 [[X:%.*]], ptr [[TMP0]], align 4
+; ABI-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[I32_LIBCS_BYREF_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 1
+; ABI-NEXT: store ptr [[INDIRECTALLOCA]], ptr [[TMP1]], align 4
+; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: ret void
+;
+entry:
+ tail call void (...) @sink(i32 %x, ptr byref(%struct.libcS) align 8 %y)
+ ret void
+}
+
+define void @libcS_i32_byref(ptr byref(%struct.libcS) align 8 %x, i32 %y) {
+; OPT-LABEL: @libcS_i32_byref(
+; OPT-NEXT: entry:
+; OPT-NEXT: tail call void (...) @sink(ptr byref([[STRUCT_LIBCS:%.*]]) align 8 [[X:%.*]], i32 [[Y:%.*]])
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: @libcS_i32_byref(
+; ABI-NEXT: entry:
+; ABI-NEXT: [[INDIRECTALLOCA:%.*]] = alloca [[STRUCT_LIBCS:%.*]], align 8
+; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[LIBCS_I32_BYREF_VARARG:%.*]], align 16
+; ABI-NEXT: store ptr [[X:%.*]], ptr [[INDIRECTALLOCA]], align 4
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[LIBCS_I32_BYREF_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0
+; ABI-NEXT: store ptr [[INDIRECTALLOCA]], ptr [[TMP0]], align 4
+; ABI-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[LIBCS_I32_BYREF_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 1
+; ABI-NEXT: store i32 [[Y:%.*]], ptr [[TMP1]], align 4
+; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: ret void
+;
+entry:
+ tail call void (...) @sink(ptr byref(%struct.libcS) align 8 %x, i32 %y)
+ ret void
+}
diff --git a/llvm/test/Transforms/ExpandVariadics/pass-indirect.ll b/llvm/test/Transforms/ExpandVariadics/pass-indirect.ll
new file mode 100644
index 0000000..8dcbb86
--- /dev/null
+++ b/llvm/test/Transforms/ExpandVariadics/pass-indirect.ll
@@ -0,0 +1,59 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -mtriple=wasm32-unknown-unknown -S --passes=expand-variadics --expand-variadics-override=optimize < %s | FileCheck %s -check-prefixes=OPT
+; RUN: opt -mtriple=wasm32-unknown-unknown -S --passes=expand-variadics --expand-variadics-override=lowering < %s | FileCheck %s -check-prefixes=ABI
+; REQUIRES: webassembly-registered-target
+
+; CHECK: @sink
+declare void @sink(...)
+
+%struct.libcS = type { i8, i16, i32, i32, float, double }
+
+define void @i32_libcS(i32 %x, %struct.libcS %y) {
+; OPT-LABEL: @i32_libcS(
+; OPT-NEXT: entry:
+; OPT-NEXT: tail call void (...) @sink(i32 [[X:%.*]], [[STRUCT_LIBCS:%.*]] [[Y:%.*]])
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: @i32_libcS(
+; ABI-NEXT: entry:
+; ABI-NEXT: [[INDIRECTALLOCA:%.*]] = alloca [[STRUCT_LIBCS:%.*]], align 8
+; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[I32_LIBCS_VARARG:%.*]], align 16
+; ABI-NEXT: store [[STRUCT_LIBCS]] [[Y:%.*]], ptr [[INDIRECTALLOCA]], align 8
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[I32_LIBCS_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0
+; ABI-NEXT: store i32 [[X:%.*]], ptr [[TMP0]], align 4
+; ABI-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[I32_LIBCS_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 1
+; ABI-NEXT: store ptr [[INDIRECTALLOCA]], ptr [[TMP1]], align 4
+; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: ret void
+;
+entry:
+ tail call void (...) @sink(i32 %x, %struct.libcS %y)
+ ret void
+}
+
+define void @libcS_i32(%struct.libcS %x, i32 %y) {
+; OPT-LABEL: @libcS_i32(
+; OPT-NEXT: entry:
+; OPT-NEXT: tail call void (...) @sink([[STRUCT_LIBCS:%.*]] [[X:%.*]], i32 [[Y:%.*]])
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: @libcS_i32(
+; ABI-NEXT: entry:
+; ABI-NEXT: [[INDIRECTALLOCA:%.*]] = alloca [[STRUCT_LIBCS:%.*]], align 8
+; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[LIBCS_I32_VARARG:%.*]], align 16
+; ABI-NEXT: store [[STRUCT_LIBCS]] [[X:%.*]], ptr [[INDIRECTALLOCA]], align 8
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[LIBCS_I32_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0
+; ABI-NEXT: store ptr [[INDIRECTALLOCA]], ptr [[TMP0]], align 4
+; ABI-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[LIBCS_I32_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 1
+; ABI-NEXT: store i32 [[Y:%.*]], ptr [[TMP1]], align 4
+; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: ret void
+;
+entry:
+ tail call void (...) @sink(%struct.libcS %x, i32 %y)
+ ret void
+}
diff --git a/llvm/test/Transforms/ExpandVariadics/pass-integers.ll b/llvm/test/Transforms/ExpandVariadics/pass-integers.ll
new file mode 100644
index 0000000..a1cb681
--- /dev/null
+++ b/llvm/test/Transforms/ExpandVariadics/pass-integers.ll
@@ -0,0 +1,345 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -mtriple=wasm32-unknown-unknown -S --passes=expand-variadics --expand-variadics-override=optimize < %s | FileCheck %s -check-prefixes=OPT
+; RUN: opt -mtriple=wasm32-unknown-unknown -S --passes=expand-variadics --expand-variadics-override=lowering < %s | FileCheck %s -check-prefixes=ABI
+; REQUIRES: webassembly-registered-target
+
+; Wasm passes struct {char} as an i8 so can check the varargs passing works on integers smaller than the slot size
+
+declare void @sink(...)
+
+
+define void @pass_nothing() {
+; OPT-LABEL: @pass_nothing(
+; OPT-NEXT: entry:
+; OPT-NEXT: tail call void (...) @sink()
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: @pass_nothing(
+; ABI-NEXT: entry:
+; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[PASS_NOTHING_VARARG:%.*]], align 16
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 1, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 1, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: ret void
+;
+entry:
+ tail call void (...) @sink()
+ ret void
+}
+
+define void @pass_s1(i8 %x) {
+; OPT-LABEL: @pass_s1(
+; OPT-NEXT: entry:
+; OPT-NEXT: tail call void (...) @sink(i8 [[X:%.*]])
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: @pass_s1(
+; ABI-NEXT: entry:
+; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[PASS_S1_VARARG:%.*]], align 16
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 1, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[PASS_S1_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0
+; ABI-NEXT: store i8 [[X:%.*]], ptr [[TMP0]], align 1
+; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 1, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: ret void
+;
+entry:
+ tail call void (...) @sink(i8 %x)
+ ret void
+}
+
+define void @pass_s2(i16 %x) {
+; OPT-LABEL: @pass_s2(
+; OPT-NEXT: entry:
+; OPT-NEXT: tail call void (...) @sink(i16 [[X:%.*]])
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: @pass_s2(
+; ABI-NEXT: entry:
+; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[PASS_S2_VARARG:%.*]], align 16
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 2, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[PASS_S2_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0
+; ABI-NEXT: store i16 [[X:%.*]], ptr [[TMP0]], align 2
+; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 2, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: ret void
+;
+entry:
+ tail call void (...) @sink(i16 %x)
+ ret void
+}
+
+define void @pass_s3(i32 %x) {
+; OPT-LABEL: @pass_s3(
+; OPT-NEXT: entry:
+; OPT-NEXT: tail call void (...) @sink(i32 [[X:%.*]])
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: @pass_s3(
+; ABI-NEXT: entry:
+; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[PASS_S3_VARARG:%.*]], align 16
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[PASS_S3_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0
+; ABI-NEXT: store i32 [[X:%.*]], ptr [[TMP0]], align 4
+; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: ret void
+;
+entry:
+ tail call void (...) @sink(i32 %x)
+ ret void
+}
+
+define void @pass_s4(i64 %x) {
+; OPT-LABEL: @pass_s4(
+; OPT-NEXT: entry:
+; OPT-NEXT: tail call void (...) @sink(i64 [[X:%.*]])
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: @pass_s4(
+; ABI-NEXT: entry:
+; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[PASS_S4_VARARG:%.*]], align 16
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[PASS_S4_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0
+; ABI-NEXT: store i64 [[X:%.*]], ptr [[TMP0]], align 8
+; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: ret void
+;
+entry:
+ tail call void (...) @sink(i64 %x)
+ ret void
+}
+
+define void @pass_s5(<4 x i32> %x) {
+; OPT-LABEL: @pass_s5(
+; OPT-NEXT: entry:
+; OPT-NEXT: tail call void (...) @sink(<4 x i32> [[X:%.*]])
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: @pass_s5(
+; ABI-NEXT: entry:
+; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[PASS_S5_VARARG:%.*]], align 16
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[PASS_S5_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0
+; ABI-NEXT: store <4 x i32> [[X:%.*]], ptr [[TMP0]], align 16
+; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: ret void
+;
+entry:
+ tail call void (...) @sink(<4 x i32> %x)
+ ret void
+}
+
+define void @pass_int_s1(i32 %i, i8 %x) {
+; OPT-LABEL: @pass_int_s1(
+; OPT-NEXT: entry:
+; OPT-NEXT: tail call void (...) @sink(i32 [[I:%.*]], i8 [[X:%.*]])
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: @pass_int_s1(
+; ABI-NEXT: entry:
+; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[PASS_INT_S1_VARARG:%.*]], align 16
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 5, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[PASS_INT_S1_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0
+; ABI-NEXT: store i32 [[I:%.*]], ptr [[TMP0]], align 4
+; ABI-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[PASS_INT_S1_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 1
+; ABI-NEXT: store i8 [[X:%.*]], ptr [[TMP1]], align 1
+; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 5, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: ret void
+;
+entry:
+ tail call void (...) @sink(i32 %i, i8 %x)
+ ret void
+}
+
+define void @pass_int_s2(i32 %i, i16 %x) {
+; OPT-LABEL: @pass_int_s2(
+; OPT-NEXT: entry:
+; OPT-NEXT: tail call void (...) @sink(i32 [[I:%.*]], i16 [[X:%.*]])
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: @pass_int_s2(
+; ABI-NEXT: entry:
+; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[PASS_INT_S2_VARARG:%.*]], align 16
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 6, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[PASS_INT_S2_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0
+; ABI-NEXT: store i32 [[I:%.*]], ptr [[TMP0]], align 4
+; ABI-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[PASS_INT_S2_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 1
+; ABI-NEXT: store i16 [[X:%.*]], ptr [[TMP1]], align 2
+; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 6, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: ret void
+;
+entry:
+ tail call void (...) @sink(i32 %i, i16 %x)
+ ret void
+}
+
+define void @pass_int_s3(i32 %i, i32 %x) {
+; OPT-LABEL: @pass_int_s3(
+; OPT-NEXT: entry:
+; OPT-NEXT: tail call void (...) @sink(i32 [[I:%.*]], i32 [[X:%.*]])
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: @pass_int_s3(
+; ABI-NEXT: entry:
+; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[PASS_INT_S3_VARARG:%.*]], align 16
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[PASS_INT_S3_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0
+; ABI-NEXT: store i32 [[I:%.*]], ptr [[TMP0]], align 4
+; ABI-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[PASS_INT_S3_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 1
+; ABI-NEXT: store i32 [[X:%.*]], ptr [[TMP1]], align 4
+; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: ret void
+;
+entry:
+ tail call void (...) @sink(i32 %i, i32 %x)
+ ret void
+}
+
+define void @pass_int_s4(i32 %i, i64 %x) {
+; OPT-LABEL: @pass_int_s4(
+; OPT-NEXT: entry:
+; OPT-NEXT: tail call void (...) @sink(i32 [[I:%.*]], i64 [[X:%.*]])
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: @pass_int_s4(
+; ABI-NEXT: entry:
+; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[PASS_INT_S4_VARARG:%.*]], align 16
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[PASS_INT_S4_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0
+; ABI-NEXT: store i32 [[I:%.*]], ptr [[TMP0]], align 4
+; ABI-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[PASS_INT_S4_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 2
+; ABI-NEXT: store i64 [[X:%.*]], ptr [[TMP1]], align 8
+; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: ret void
+;
+entry:
+ tail call void (...) @sink(i32 %i, i64 %x)
+ ret void
+}
+
+define void @pass_int_s5(i32 %i, <4 x i32> %x) {
+; OPT-LABEL: @pass_int_s5(
+; OPT-NEXT: entry:
+; OPT-NEXT: tail call void (...) @sink(i32 [[I:%.*]], <4 x i32> [[X:%.*]])
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: @pass_int_s5(
+; ABI-NEXT: entry:
+; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[PASS_INT_S5_VARARG:%.*]], align 16
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[PASS_INT_S5_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0
+; ABI-NEXT: store i32 [[I:%.*]], ptr [[TMP0]], align 4
+; ABI-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[PASS_INT_S5_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 2
+; ABI-NEXT: store <4 x i32> [[X:%.*]], ptr [[TMP1]], align 16
+; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: ret void
+;
+entry:
+ tail call void (...) @sink(i32 %i, <4 x i32> %x)
+ ret void
+}
+
+define void @pass_asc(i8 %x1, i16 %x2, i32 %x3, i64 %x4, <4 x i32> %x5) {
+; OPT-LABEL: @pass_asc(
+; OPT-NEXT: entry:
+; OPT-NEXT: tail call void (...) @sink(i8 [[X1:%.*]], i16 [[X2:%.*]], i32 [[X3:%.*]], i64 [[X4:%.*]], <4 x i32> [[X5:%.*]])
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: @pass_asc(
+; ABI-NEXT: entry:
+; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[PASS_ASC_VARARG:%.*]], align 16
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 48, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[PASS_ASC_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0
+; ABI-NEXT: store i8 [[X1:%.*]], ptr [[TMP0]], align 1
+; ABI-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[PASS_ASC_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 2
+; ABI-NEXT: store i16 [[X2:%.*]], ptr [[TMP1]], align 2
+; ABI-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[PASS_ASC_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 4
+; ABI-NEXT: store i32 [[X3:%.*]], ptr [[TMP2]], align 4
+; ABI-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[PASS_ASC_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 6
+; ABI-NEXT: store i64 [[X4:%.*]], ptr [[TMP3]], align 8
+; ABI-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[PASS_ASC_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 8
+; ABI-NEXT: store <4 x i32> [[X5:%.*]], ptr [[TMP4]], align 16
+; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 48, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: ret void
+;
+entry:
+ tail call void (...) @sink(i8 %x1, i16 %x2, i32 %x3, i64 %x4, <4 x i32> %x5)
+ ret void
+}
+
+define void @pass_dsc(<4 x i32> %x0, i64 %x1, i32 %x2, i16 %x3, i8 %x4) {
+; OPT-LABEL: @pass_dsc(
+; OPT-NEXT: entry:
+; OPT-NEXT: tail call void (...) @sink(<4 x i32> [[X0:%.*]], i64 [[X1:%.*]], i32 [[X2:%.*]], i16 [[X3:%.*]], i8 [[X4:%.*]])
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: @pass_dsc(
+; ABI-NEXT: entry:
+; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[PASS_DSC_VARARG:%.*]], align 16
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 33, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[PASS_DSC_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0
+; ABI-NEXT: store <4 x i32> [[X0:%.*]], ptr [[TMP0]], align 16
+; ABI-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[PASS_DSC_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 1
+; ABI-NEXT: store i64 [[X1:%.*]], ptr [[TMP1]], align 8
+; ABI-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[PASS_DSC_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 2
+; ABI-NEXT: store i32 [[X2:%.*]], ptr [[TMP2]], align 4
+; ABI-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[PASS_DSC_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 3
+; ABI-NEXT: store i16 [[X3:%.*]], ptr [[TMP3]], align 2
+; ABI-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[PASS_DSC_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 5
+; ABI-NEXT: store i8 [[X4:%.*]], ptr [[TMP4]], align 1
+; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 33, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: ret void
+;
+entry:
+ tail call void (...) @sink(<4 x i32> %x0, i64 %x1, i32 %x2, i16 %x3, i8 %x4)
+ ret void
+}
+
+define void @pass_multiple(i32 %i, i8 %x1, i16 %x2, i32 %x3, i64 %x4, <4 x i32> %x5) {
+; OPT-LABEL: @pass_multiple(
+; OPT-NEXT: entry:
+; OPT-NEXT: tail call void (...) @sink(i32 [[I:%.*]], i16 [[X2:%.*]], i64 [[X4:%.*]])
+; OPT-NEXT: tail call void (...) @sink(i32 [[I]], i8 [[X1:%.*]], i32 [[X3:%.*]], <4 x i32> [[X5:%.*]])
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: @pass_multiple(
+; ABI-NEXT: entry:
+; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[PASS_MULTIPLE_VARARG:%.*]], align 16
+; ABI-NEXT: [[VARARG_BUFFER1:%.*]] = alloca [[PASS_MULTIPLE_VARARG_0:%.*]], align 16
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[PASS_MULTIPLE_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0
+; ABI-NEXT: store i32 [[I:%.*]], ptr [[TMP0]], align 4
+; ABI-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[PASS_MULTIPLE_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 1
+; ABI-NEXT: store i16 [[X2:%.*]], ptr [[TMP1]], align 2
+; ABI-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[PASS_MULTIPLE_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 3
+; ABI-NEXT: store i64 [[X4:%.*]], ptr [[TMP2]], align 8
+; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr [[VARARG_BUFFER1]])
+; ABI-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[PASS_MULTIPLE_VARARG_0]], ptr [[VARARG_BUFFER1]], i32 0, i32 0
+; ABI-NEXT: store i32 [[I]], ptr [[TMP3]], align 4
+; ABI-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[PASS_MULTIPLE_VARARG_0]], ptr [[VARARG_BUFFER1]], i32 0, i32 1
+; ABI-NEXT: store i8 [[X1:%.*]], ptr [[TMP4]], align 1
+; ABI-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[PASS_MULTIPLE_VARARG_0]], ptr [[VARARG_BUFFER1]], i32 0, i32 3
+; ABI-NEXT: store i32 [[X3:%.*]], ptr [[TMP5]], align 4
+; ABI-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[PASS_MULTIPLE_VARARG_0]], ptr [[VARARG_BUFFER1]], i32 0, i32 5
+; ABI-NEXT: store <4 x i32> [[X5:%.*]], ptr [[TMP6]], align 16
+; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER1]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr [[VARARG_BUFFER1]])
+; ABI-NEXT: ret void
+;
+entry:
+ tail call void (...) @sink(i32 %i, i16 %x2, i64 %x4)
+ tail call void (...) @sink(i32 %i, i8 %x1, i32 %x3, <4 x i32> %x5)
+ ret void
+}
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sge-to-icmp-sle.ll b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sge-to-icmp-sle.ll
index ae503bf..e103fe9 100644
--- a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sge-to-icmp-sle.ll
+++ b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sge-to-icmp-sle.ll
@@ -98,8 +98,7 @@ declare i8 @gen8()
define i1 @c0() {
; CHECK-LABEL: @c0(
; CHECK-NEXT: [[X:%.*]] = call i8 @gen8()
-; CHECK-NEXT: [[TMP0:%.*]] = and i8 [[X]], 3
-; CHECK-NEXT: [[RET:%.*]] = icmp sge i8 [[X]], [[TMP0]]
+; CHECK-NEXT: [[RET:%.*]] = icmp sgt i8 [[X]], -1
; CHECK-NEXT: ret i1 [[RET]]
;
%x = call i8 @gen8()
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sgt-to-icmp-sgt.ll b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sgt-to-icmp-sgt.ll
index d1dd411..bbd733e 100644
--- a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sgt-to-icmp-sgt.ll
+++ b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sgt-to-icmp-sgt.ll
@@ -125,8 +125,7 @@ define i1 @oneuse0() {
define i1 @c0(i8 %x) {
; CHECK-LABEL: @c0(
-; CHECK-NEXT: [[TMP0:%.*]] = and i8 [[X:%.*]], 3
-; CHECK-NEXT: [[RET:%.*]] = icmp sgt i8 [[TMP0]], [[X]]
+; CHECK-NEXT: [[RET:%.*]] = icmp slt i8 [[X:%.*]], 0
; CHECK-NEXT: ret i1 [[RET]]
;
%tmp0 = and i8 %x, 3
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sle-to-icmp-sle.ll b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sle-to-icmp-sle.ll
index 4bed21a..b167c8a 100644
--- a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sle-to-icmp-sle.ll
+++ b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sle-to-icmp-sle.ll
@@ -113,8 +113,7 @@ define i1 @oneuse0() {
define i1 @c0(i8 %x) {
; CHECK-LABEL: @c0(
-; CHECK-NEXT: [[TMP0:%.*]] = and i8 [[X:%.*]], 3
-; CHECK-NEXT: [[RET:%.*]] = icmp sle i8 [[TMP0]], [[X]]
+; CHECK-NEXT: [[RET:%.*]] = icmp sgt i8 [[X:%.*]], -1
; CHECK-NEXT: ret i1 [[RET]]
;
%tmp0 = and i8 %x, 3
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-slt-to-icmp-sgt.ll b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-slt-to-icmp-sgt.ll
index 8415204..8281502 100644
--- a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-slt-to-icmp-sgt.ll
+++ b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-slt-to-icmp-sgt.ll
@@ -108,8 +108,7 @@ declare i8 @gen8()
define i1 @c0() {
; CHECK-LABEL: @c0(
; CHECK-NEXT: [[X:%.*]] = call i8 @gen8()
-; CHECK-NEXT: [[TMP0:%.*]] = and i8 [[X]], 3
-; CHECK-NEXT: [[RET:%.*]] = icmp slt i8 [[X]], [[TMP0]]
+; CHECK-NEXT: [[RET:%.*]] = icmp slt i8 [[X]], 0
; CHECK-NEXT: ret i1 [[RET]]
;
%x = call i8 @gen8()
diff --git a/llvm/test/Transforms/InstCombine/icmp-and-lowbit-mask.ll b/llvm/test/Transforms/InstCombine/icmp-and-lowbit-mask.ll
index 8bb7fd0..0aace5f 100644
--- a/llvm/test/Transforms/InstCombine/icmp-and-lowbit-mask.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-and-lowbit-mask.ll
@@ -7,8 +7,8 @@ define i1 @src_is_mask_zext(i16 %x_in, i8 %y) {
; CHECK-LABEL: @src_is_mask_zext(
; CHECK-NEXT: [[M_IN:%.*]] = lshr i8 -1, [[Y:%.*]]
; CHECK-NEXT: [[MASK:%.*]] = zext i8 [[M_IN]] to i16
-; CHECK-NEXT: [[X:%.*]] = xor i16 [[X_IN:%.*]], 123
-; CHECK-NEXT: [[R:%.*]] = icmp ule i16 [[X]], [[MASK]]
+; CHECK-NEXT: [[TMP1:%.*]] = xor i16 [[X_IN:%.*]], 123
+; CHECK-NEXT: [[R:%.*]] = icmp ule i16 [[TMP1]], [[MASK]]
; CHECK-NEXT: ret i1 [[R]]
;
%x = xor i16 %x_in, 123
@@ -83,8 +83,8 @@ define i1 @src_is_mask_and(i8 %x_in, i8 %y, i8 %z) {
; CHECK-NEXT: [[MY:%.*]] = lshr i8 7, [[Y:%.*]]
; CHECK-NEXT: [[MZ:%.*]] = lshr i8 -1, [[Z:%.*]]
; CHECK-NEXT: [[MASK:%.*]] = and i8 [[MY]], [[MZ]]
-; CHECK-NEXT: [[X:%.*]] = xor i8 [[X_IN:%.*]], 123
-; CHECK-NEXT: [[R:%.*]] = icmp ule i8 [[X]], [[MASK]]
+; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[X_IN:%.*]], 123
+; CHECK-NEXT: [[R:%.*]] = icmp ule i8 [[TMP1]], [[MASK]]
; CHECK-NEXT: ret i1 [[R]]
;
%x = xor i8 %x_in, 123
@@ -121,8 +121,8 @@ define i1 @src_is_mask_or(i8 %x_in, i8 %y) {
; CHECK-LABEL: @src_is_mask_or(
; CHECK-NEXT: [[MY:%.*]] = lshr i8 -1, [[Y:%.*]]
; CHECK-NEXT: [[MASK:%.*]] = and i8 [[MY]], 7
-; CHECK-NEXT: [[X:%.*]] = xor i8 [[X_IN:%.*]], 123
-; CHECK-NEXT: [[R:%.*]] = icmp ule i8 [[X]], [[MASK]]
+; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[X_IN:%.*]], 123
+; CHECK-NEXT: [[R:%.*]] = icmp ule i8 [[TMP1]], [[MASK]]
; CHECK-NEXT: ret i1 [[R]]
;
%x = xor i8 %x_in, 123
@@ -138,8 +138,8 @@ define i1 @src_is_mask_xor(i8 %x_in, i8 %y) {
; CHECK-LABEL: @src_is_mask_xor(
; CHECK-NEXT: [[Y_M1:%.*]] = add i8 [[Y:%.*]], -1
; CHECK-NEXT: [[MASK:%.*]] = xor i8 [[Y_M1]], [[Y]]
-; CHECK-NEXT: [[X:%.*]] = xor i8 [[X_IN:%.*]], 123
-; CHECK-NEXT: [[R:%.*]] = icmp ugt i8 [[X]], [[MASK]]
+; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[X_IN:%.*]], 123
+; CHECK-NEXT: [[R:%.*]] = icmp ugt i8 [[TMP1]], [[MASK]]
; CHECK-NEXT: ret i1 [[R]]
;
%x = xor i8 %x_in, 123
@@ -173,8 +173,8 @@ define i1 @src_is_mask_select(i8 %x_in, i8 %y, i1 %cond) {
; CHECK-NEXT: [[Y_M1:%.*]] = add i8 [[Y:%.*]], -1
; CHECK-NEXT: [[YMASK:%.*]] = xor i8 [[Y_M1]], [[Y]]
; CHECK-NEXT: [[MASK:%.*]] = select i1 [[COND:%.*]], i8 [[YMASK]], i8 15
-; CHECK-NEXT: [[X:%.*]] = xor i8 [[X_IN:%.*]], 123
-; CHECK-NEXT: [[R:%.*]] = icmp ugt i8 [[X]], [[MASK]]
+; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[X_IN:%.*]], 123
+; CHECK-NEXT: [[R:%.*]] = icmp ugt i8 [[TMP1]], [[MASK]]
; CHECK-NEXT: ret i1 [[R]]
;
%x = xor i8 %x_in, 123
@@ -249,8 +249,8 @@ define i1 @src_is_mask_lshr(i8 %x_in, i8 %y, i8 %z, i1 %cond) {
; CHECK-NEXT: [[YMASK:%.*]] = xor i8 [[Y_M1]], [[Y]]
; CHECK-NEXT: [[SMASK:%.*]] = select i1 [[COND:%.*]], i8 [[YMASK]], i8 15
; CHECK-NEXT: [[MASK:%.*]] = lshr i8 [[SMASK]], [[Z:%.*]]
-; CHECK-NEXT: [[X:%.*]] = xor i8 [[X_IN:%.*]], 123
-; CHECK-NEXT: [[R:%.*]] = icmp ugt i8 [[X]], [[MASK]]
+; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[X_IN:%.*]], 123
+; CHECK-NEXT: [[R:%.*]] = icmp ugt i8 [[TMP1]], [[MASK]]
; CHECK-NEXT: ret i1 [[R]]
;
%x = xor i8 %x_in, 123
@@ -269,8 +269,8 @@ define i1 @src_is_mask_ashr(i8 %x_in, i8 %y, i8 %z, i1 %cond) {
; CHECK-NEXT: [[YMASK:%.*]] = xor i8 [[Y_M1]], [[Y]]
; CHECK-NEXT: [[SMASK:%.*]] = select i1 [[COND:%.*]], i8 [[YMASK]], i8 15
; CHECK-NEXT: [[MASK:%.*]] = ashr i8 [[SMASK]], [[Z:%.*]]
-; CHECK-NEXT: [[X:%.*]] = xor i8 [[X_IN:%.*]], 123
-; CHECK-NEXT: [[R:%.*]] = icmp ugt i8 [[X]], [[MASK]]
+; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[X_IN:%.*]], 123
+; CHECK-NEXT: [[R:%.*]] = icmp ugt i8 [[TMP1]], [[MASK]]
; CHECK-NEXT: ret i1 [[R]]
;
%x = xor i8 %x_in, 123
@@ -287,8 +287,8 @@ define i1 @src_is_mask_p2_m1(i8 %x_in, i8 %y) {
; CHECK-LABEL: @src_is_mask_p2_m1(
; CHECK-NEXT: [[P2ORZ:%.*]] = shl i8 2, [[Y:%.*]]
; CHECK-NEXT: [[MASK:%.*]] = add i8 [[P2ORZ]], -1
-; CHECK-NEXT: [[X:%.*]] = xor i8 [[X_IN:%.*]], 123
-; CHECK-NEXT: [[R:%.*]] = icmp ugt i8 [[X]], [[MASK]]
+; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[X_IN:%.*]], 123
+; CHECK-NEXT: [[R:%.*]] = icmp ugt i8 [[TMP1]], [[MASK]]
; CHECK-NEXT: ret i1 [[R]]
;
%x = xor i8 %x_in, 123
@@ -304,8 +304,8 @@ define i1 @src_is_mask_umax(i8 %x_in, i8 %y) {
; CHECK-NEXT: [[Y_M1:%.*]] = add i8 [[Y:%.*]], -1
; CHECK-NEXT: [[YMASK:%.*]] = xor i8 [[Y_M1]], [[Y]]
; CHECK-NEXT: [[MASK:%.*]] = call i8 @llvm.umax.i8(i8 [[YMASK]], i8 3)
-; CHECK-NEXT: [[X:%.*]] = xor i8 [[X_IN:%.*]], 123
-; CHECK-NEXT: [[R:%.*]] = icmp ugt i8 [[X]], [[MASK]]
+; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[X_IN:%.*]], 123
+; CHECK-NEXT: [[R:%.*]] = icmp ugt i8 [[TMP1]], [[MASK]]
; CHECK-NEXT: ret i1 [[R]]
;
%x = xor i8 %x_in, 123
@@ -324,8 +324,8 @@ define i1 @src_is_mask_umin(i8 %x_in, i8 %y, i8 %z) {
; CHECK-NEXT: [[YMASK:%.*]] = xor i8 [[Y_M1]], [[Y]]
; CHECK-NEXT: [[ZMASK:%.*]] = lshr i8 15, [[Z:%.*]]
; CHECK-NEXT: [[MASK:%.*]] = call i8 @llvm.umin.i8(i8 [[YMASK]], i8 [[ZMASK]])
-; CHECK-NEXT: [[X:%.*]] = xor i8 [[X_IN:%.*]], 123
-; CHECK-NEXT: [[R:%.*]] = icmp ugt i8 [[X]], [[MASK]]
+; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[X_IN:%.*]], 123
+; CHECK-NEXT: [[R:%.*]] = icmp ugt i8 [[TMP1]], [[MASK]]
; CHECK-NEXT: ret i1 [[R]]
;
%x = xor i8 %x_in, 123
@@ -364,8 +364,8 @@ define i1 @src_is_mask_smax(i8 %x_in, i8 %y) {
; CHECK-NEXT: [[Y_M1:%.*]] = add i8 [[Y:%.*]], -1
; CHECK-NEXT: [[YMASK:%.*]] = xor i8 [[Y_M1]], [[Y]]
; CHECK-NEXT: [[MASK:%.*]] = call i8 @llvm.smax.i8(i8 [[YMASK]], i8 -1)
-; CHECK-NEXT: [[X:%.*]] = xor i8 [[X_IN:%.*]], 123
-; CHECK-NEXT: [[R:%.*]] = icmp ule i8 [[X]], [[MASK]]
+; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[X_IN:%.*]], 123
+; CHECK-NEXT: [[R:%.*]] = icmp ule i8 [[TMP1]], [[MASK]]
; CHECK-NEXT: ret i1 [[R]]
;
%x = xor i8 %x_in, 123
@@ -383,8 +383,8 @@ define i1 @src_is_mask_smin(i8 %x_in, i8 %y) {
; CHECK-NEXT: [[Y_M1:%.*]] = add i8 [[Y:%.*]], -1
; CHECK-NEXT: [[YMASK:%.*]] = xor i8 [[Y_M1]], [[Y]]
; CHECK-NEXT: [[MASK:%.*]] = call i8 @llvm.smin.i8(i8 [[YMASK]], i8 0)
-; CHECK-NEXT: [[X:%.*]] = xor i8 [[X_IN:%.*]], 123
-; CHECK-NEXT: [[R:%.*]] = icmp ule i8 [[X]], [[MASK]]
+; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[X_IN:%.*]], 123
+; CHECK-NEXT: [[R:%.*]] = icmp ule i8 [[TMP1]], [[MASK]]
; CHECK-NEXT: ret i1 [[R]]
;
%x = xor i8 %x_in, 123
@@ -401,8 +401,8 @@ define i1 @src_is_mask_bitreverse_not_mask(i8 %x_in, i8 %y) {
; CHECK-LABEL: @src_is_mask_bitreverse_not_mask(
; CHECK-NEXT: [[NMASK:%.*]] = shl nsw i8 -1, [[Y:%.*]]
; CHECK-NEXT: [[MASK:%.*]] = call i8 @llvm.bitreverse.i8(i8 [[NMASK]])
-; CHECK-NEXT: [[X:%.*]] = xor i8 [[X_IN:%.*]], 123
-; CHECK-NEXT: [[R:%.*]] = icmp ule i8 [[X]], [[MASK]]
+; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[X_IN:%.*]], 123
+; CHECK-NEXT: [[R:%.*]] = icmp ule i8 [[TMP1]], [[MASK]]
; CHECK-NEXT: ret i1 [[R]]
;
%x = xor i8 %x_in, 123
@@ -455,9 +455,9 @@ define i1 @src_is_notmask_shl(i8 %x_in, i8 %y, i1 %cond) {
define i1 @src_is_notmask_x_xor_neg_x(i8 %x_in, i8 %y, i1 %cond) {
; CHECK-LABEL: @src_is_notmask_x_xor_neg_x(
; CHECK-NEXT: [[X:%.*]] = xor i8 [[X_IN:%.*]], 123
-; CHECK-NEXT: [[NEG_Y:%.*]] = add i8 [[Y:%.*]], -1
-; CHECK-NEXT: [[NOTMASK0:%.*]] = xor i8 [[NEG_Y]], [[Y]]
-; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[COND:%.*]], i8 [[NOTMASK0]], i8 7
+; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[Y:%.*]], -1
+; CHECK-NEXT: [[TMP2:%.*]] = xor i8 [[TMP1]], [[Y]]
+; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[COND:%.*]], i8 [[TMP2]], i8 7
; CHECK-NEXT: [[R:%.*]] = icmp ule i8 [[X]], [[TMP3]]
; CHECK-NEXT: ret i1 [[R]]
;
@@ -473,9 +473,9 @@ define i1 @src_is_notmask_x_xor_neg_x(i8 %x_in, i8 %y, i1 %cond) {
define i1 @src_is_notmask_x_xor_neg_x_inv(i8 %x_in, i8 %y, i1 %cond) {
; CHECK-LABEL: @src_is_notmask_x_xor_neg_x_inv(
; CHECK-NEXT: [[X:%.*]] = xor i8 [[X_IN:%.*]], 123
-; CHECK-NEXT: [[NEG_Y:%.*]] = add i8 [[Y:%.*]], -1
-; CHECK-NEXT: [[NOTMASK0:%.*]] = xor i8 [[NEG_Y]], [[Y]]
-; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[COND:%.*]], i8 [[NOTMASK0]], i8 7
+; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[Y:%.*]], -1
+; CHECK-NEXT: [[TMP2:%.*]] = xor i8 [[TMP1]], [[Y]]
+; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[COND:%.*]], i8 [[TMP2]], i8 7
; CHECK-NEXT: [[R:%.*]] = icmp ule i8 [[X]], [[TMP3]]
; CHECK-NEXT: ret i1 [[R]]
;
@@ -625,9 +625,7 @@ define i1 @src_is_notmask_xor_fail(i8 %x_in, i8 %y) {
define i1 @src_is_mask_const_slt(i8 %x_in) {
; CHECK-LABEL: @src_is_mask_const_slt(
-; CHECK-NEXT: [[X:%.*]] = xor i8 [[X_IN:%.*]], 123
-; CHECK-NEXT: [[AND:%.*]] = and i8 [[X]], 7
-; CHECK-NEXT: [[R:%.*]] = icmp slt i8 [[X]], [[AND]]
+; CHECK-NEXT: [[R:%.*]] = icmp slt i8 [[X_IN:%.*]], 0
; CHECK-NEXT: ret i1 [[R]]
;
%x = xor i8 %x_in, 123
@@ -650,9 +648,7 @@ define i1 @src_is_mask_const_sgt(i8 %x_in) {
define i1 @src_is_mask_const_sle(i8 %x_in) {
; CHECK-LABEL: @src_is_mask_const_sle(
-; CHECK-NEXT: [[X:%.*]] = xor i8 [[X_IN:%.*]], 123
-; CHECK-NEXT: [[AND:%.*]] = and i8 [[X]], 31
-; CHECK-NEXT: [[R:%.*]] = icmp sle i8 [[AND]], [[X]]
+; CHECK-NEXT: [[R:%.*]] = icmp sgt i8 [[X_IN:%.*]], -1
; CHECK-NEXT: ret i1 [[R]]
;
%x = xor i8 %x_in, 123
diff --git a/llvm/test/Transforms/InstCombine/icmp-of-and-x.ll b/llvm/test/Transforms/InstCombine/icmp-of-and-x.ll
index 0f26be1..75badab 100644
--- a/llvm/test/Transforms/InstCombine/icmp-of-and-x.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-of-and-x.ll
@@ -58,7 +58,7 @@ define i1 @icmp_sge_x_negy(i8 %x, i8 %y) {
; CHECK-NEXT: [[CY:%.*]] = icmp slt i8 [[Y:%.*]], 0
; CHECK-NEXT: call void @llvm.assume(i1 [[CY]])
; CHECK-NEXT: [[AND:%.*]] = and i8 [[X:%.*]], [[Y]]
-; CHECK-NEXT: [[Z:%.*]] = icmp sge i8 [[AND]], [[X]]
+; CHECK-NEXT: [[Z:%.*]] = icmp eq i8 [[AND]], [[X]]
; CHECK-NEXT: ret i1 [[Z]]
;
%cy = icmp slt i8 %y, 0
@@ -74,7 +74,7 @@ define i1 @icmp_slt_x_negy(i8 %x, i8 %y) {
; CHECK-NEXT: br i1 [[CY]], label [[NEGY:%.*]], label [[POSY:%.*]]
; CHECK: negy:
; CHECK-NEXT: [[AND:%.*]] = and i8 [[X:%.*]], [[Y]]
-; CHECK-NEXT: [[Z:%.*]] = icmp slt i8 [[AND]], [[X]]
+; CHECK-NEXT: [[Z:%.*]] = icmp ne i8 [[AND]], [[X]]
; CHECK-NEXT: ret i1 [[Z]]
; CHECK: posy:
; CHECK-NEXT: [[R:%.*]] = call i1 @barrier()
@@ -116,10 +116,7 @@ posy:
define i1 @icmp_sle_x_negy(i8 %x, i8 %yy) {
; CHECK-LABEL: @icmp_sle_x_negy(
-; CHECK-NEXT: [[Y:%.*]] = or i8 [[YY:%.*]], -128
-; CHECK-NEXT: [[AND:%.*]] = and i8 [[Y]], [[X:%.*]]
-; CHECK-NEXT: [[Z:%.*]] = icmp sle i8 [[AND]], [[X]]
-; CHECK-NEXT: ret i1 [[Z]]
+; CHECK-NEXT: ret i1 true
;
%y = or i8 %yy, 128
%and = and i8 %y, %x
@@ -129,10 +126,7 @@ define i1 @icmp_sle_x_negy(i8 %x, i8 %yy) {
define <2 x i1> @icmp_sgt_x_negy(<2 x i8> %x, <2 x i8> %yy) {
; CHECK-LABEL: @icmp_sgt_x_negy(
-; CHECK-NEXT: [[Y:%.*]] = or <2 x i8> [[YY:%.*]], <i8 -128, i8 -128>
-; CHECK-NEXT: [[AND:%.*]] = and <2 x i8> [[Y]], [[X:%.*]]
-; CHECK-NEXT: [[Z:%.*]] = icmp sgt <2 x i8> [[AND]], [[X]]
-; CHECK-NEXT: ret <2 x i1> [[Z]]
+; CHECK-NEXT: ret <2 x i1> zeroinitializer
;
%y = or <2 x i8> %yy, <i8 128, i8 128>
%and = and <2 x i8> %y, %x
@@ -155,9 +149,7 @@ define <2 x i1> @icmp_sgt_x_negy_fail_partial(<2 x i8> %x, <2 x i8> %yy) {
define <2 x i1> @icmp_sle_x_posy(<2 x i8> %x, <2 x i8> %yy) {
; CHECK-LABEL: @icmp_sle_x_posy(
-; CHECK-NEXT: [[Y:%.*]] = and <2 x i8> [[YY:%.*]], <i8 127, i8 127>
-; CHECK-NEXT: [[AND:%.*]] = and <2 x i8> [[Y]], [[X:%.*]]
-; CHECK-NEXT: [[Z:%.*]] = icmp sle <2 x i8> [[AND]], [[X]]
+; CHECK-NEXT: [[Z:%.*]] = icmp sgt <2 x i8> [[X:%.*]], <i8 -1, i8 -1>
; CHECK-NEXT: ret <2 x i1> [[Z]]
;
%y = and <2 x i8> %yy, <i8 127, i8 127>
@@ -183,8 +175,7 @@ define i1 @icmp_sgt_x_posy(i8 %x, i8 %y) {
; CHECK-LABEL: @icmp_sgt_x_posy(
; CHECK-NEXT: [[CY:%.*]] = icmp sgt i8 [[Y:%.*]], -1
; CHECK-NEXT: call void @llvm.assume(i1 [[CY]])
-; CHECK-NEXT: [[AND:%.*]] = and i8 [[X:%.*]], [[Y]]
-; CHECK-NEXT: [[Z:%.*]] = icmp sgt i8 [[AND]], [[X]]
+; CHECK-NEXT: [[Z:%.*]] = icmp slt i8 [[X:%.*]], 0
; CHECK-NEXT: ret i1 [[Z]]
;
%cy = icmp sge i8 %y, 0
@@ -196,9 +187,7 @@ define i1 @icmp_sgt_x_posy(i8 %x, i8 %y) {
define <2 x i1> @icmp_sgt_negx_y(<2 x i8> %xx, <2 x i8> %y) {
; CHECK-LABEL: @icmp_sgt_negx_y(
-; CHECK-NEXT: [[X:%.*]] = or <2 x i8> [[XX:%.*]], <i8 -128, i8 -128>
-; CHECK-NEXT: [[AND:%.*]] = and <2 x i8> [[X]], [[Y:%.*]]
-; CHECK-NEXT: [[Z:%.*]] = icmp sgt <2 x i8> [[AND]], [[X]]
+; CHECK-NEXT: [[Z:%.*]] = icmp sgt <2 x i8> [[Y:%.*]], <i8 -1, i8 -1>
; CHECK-NEXT: ret <2 x i1> [[Z]]
;
%x = or <2 x i8> %xx, <i8 128, i8 128>
@@ -211,8 +200,7 @@ define i1 @icmp_sle_negx_y(i8 %x, i8 %y) {
; CHECK-LABEL: @icmp_sle_negx_y(
; CHECK-NEXT: [[CX:%.*]] = icmp slt i8 [[X:%.*]], 0
; CHECK-NEXT: call void @llvm.assume(i1 [[CX]])
-; CHECK-NEXT: [[AND:%.*]] = and i8 [[X]], [[Y:%.*]]
-; CHECK-NEXT: [[Z:%.*]] = icmp sle i8 [[AND]], [[X]]
+; CHECK-NEXT: [[Z:%.*]] = icmp slt i8 [[Y:%.*]], 0
; CHECK-NEXT: ret i1 [[Z]]
;
%cx = icmp slt i8 %x, 0
@@ -239,9 +227,9 @@ define i1 @icmp_sle_negx_y_fail_maybe_zero(i8 %x, i8 %y) {
define i1 @icmp_eq_x_invertable_y_todo(i8 %x, i1 %y) {
; CHECK-LABEL: @icmp_eq_x_invertable_y_todo(
-; CHECK-NEXT: [[YY:%.*]] = select i1 [[Y:%.*]], i8 -8, i8 -25
-; CHECK-NEXT: [[AND:%.*]] = and i8 [[YY]], [[X:%.*]]
-; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[AND]], 0
+; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[Y:%.*]], i8 -8, i8 -25
+; CHECK-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], [[X:%.*]]
+; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[TMP2]], 0
; CHECK-NEXT: ret i1 [[R]]
;
%yy = select i1 %y, i8 7, i8 24
@@ -252,8 +240,8 @@ define i1 @icmp_eq_x_invertable_y_todo(i8 %x, i1 %y) {
define i1 @icmp_eq_x_invertable_y(i8 %x, i8 %y) {
; CHECK-LABEL: @icmp_eq_x_invertable_y(
-; CHECK-NEXT: [[AND:%.*]] = and i8 [[YY:%.*]], [[X:%.*]]
-; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[AND]], 0
+; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[TMP1]], 0
; CHECK-NEXT: ret i1 [[R]]
;
%yy = xor i8 %y, -1
diff --git a/llvm/test/Transforms/InstCombine/icmp-of-or-x.ll b/llvm/test/Transforms/InstCombine/icmp-of-or-x.ll
index 26f53cb..3048746 100644
--- a/llvm/test/Transforms/InstCombine/icmp-of-or-x.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-of-or-x.ll
@@ -373,3 +373,29 @@ define i1 @pr64610(ptr %b) {
%r = icmp ugt i32 %or, %s
ret i1 %r
}
+
+define i1 @icmp_eq_x_invertable_y2_todo(i8 %x, i1 %y, i8 %z) {
+; CHECK-LABEL: @icmp_eq_x_invertable_y2_todo(
+; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[Y:%.*]], i8 -8, i8 [[Z:%.*]]
+; CHECK-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], [[X:%.*]]
+; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[TMP2]], 0
+; CHECK-NEXT: ret i1 [[R]]
+;
+ %zz = xor i8 %z, -1
+ %yy = select i1 %y, i8 7, i8 %zz
+ %or = or i8 %x, %yy
+ %r = icmp eq i8 %yy, %or
+ ret i1 %r
+}
+
+define i1 @icmp_eq_x_invertable_y2(i8 %x, i8 %y) {
+; CHECK-LABEL: @icmp_eq_x_invertable_y2(
+; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[TMP1]], 0
+; CHECK-NEXT: ret i1 [[R]]
+;
+ %yy = xor i8 %y, -1
+ %or = or i8 %x, %yy
+ %r = icmp eq i8 %yy, %or
+ ret i1 %r
+}
diff --git a/llvm/test/Transforms/InstCombine/select.ll b/llvm/test/Transforms/InstCombine/select.ll
index d3cf2af..a0ee438 100644
--- a/llvm/test/Transforms/InstCombine/select.ll
+++ b/llvm/test/Transforms/InstCombine/select.ll
@@ -2787,11 +2787,23 @@ define <2 x i8> @select_replacement_add_eq_vec_nonuniform(<2 x i8> %x, <2 x i8>
define <2 x i8> @select_replacement_add_eq_vec_poison(<2 x i8> %x, <2 x i8> %y) {
; CHECK-LABEL: @select_replacement_add_eq_vec_poison(
; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[X:%.*]], <i8 1, i8 poison>
+; CHECK-NEXT: [[SEL:%.*]] = select <2 x i1> [[CMP]], <2 x i8> <i8 2, i8 poison>, <2 x i8> [[Y:%.*]]
+; CHECK-NEXT: ret <2 x i8> [[SEL]]
+;
+ %cmp = icmp eq <2 x i8> %x, <i8 1, i8 poison>
+ %add = add <2 x i8> %x, <i8 1, i8 1>
+ %sel = select <2 x i1> %cmp, <2 x i8> %add, <2 x i8> %y
+ ret <2 x i8> %sel
+}
+
+define <2 x i8> @select_replacement_add_eq_vec_undef(<2 x i8> %x, <2 x i8> %y) {
+; CHECK-LABEL: @select_replacement_add_eq_vec_undef(
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[X:%.*]], <i8 1, i8 undef>
; CHECK-NEXT: [[ADD:%.*]] = add <2 x i8> [[X]], <i8 1, i8 1>
; CHECK-NEXT: [[SEL:%.*]] = select <2 x i1> [[CMP]], <2 x i8> [[ADD]], <2 x i8> [[Y:%.*]]
; CHECK-NEXT: ret <2 x i8> [[SEL]]
;
- %cmp = icmp eq <2 x i8> %x, <i8 1, i8 poison>
+ %cmp = icmp eq <2 x i8> %x, <i8 1, i8 undef>
%add = add <2 x i8> %x, <i8 1, i8 1>
%sel = select <2 x i1> %cmp, <2 x i8> %add, <2 x i8> %y
ret <2 x i8> %sel
@@ -2835,6 +2847,20 @@ define i8 @select_replacement_sub_noundef(i8 %x, i8 noundef %y, i8 %z) {
ret i8 %sel
}
+define i8 @select_replacement_sub_noundef_but_may_be_poison(i8 %x, i8 noundef %yy, i8 %z) {
+; CHECK-LABEL: @select_replacement_sub_noundef_but_may_be_poison(
+; CHECK-NEXT: [[Y:%.*]] = shl nuw i8 [[YY:%.*]], 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[Y]], [[X:%.*]]
+; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i8 0, i8 [[Z:%.*]]
+; CHECK-NEXT: ret i8 [[SEL]]
+;
+ %y = shl nuw i8 %yy, 1
+ %cmp = icmp eq i8 %x, %y
+ %sub = sub i8 %x, %y
+ %sel = select i1 %cmp, i8 %sub, i8 %z
+ ret i8 %sel
+}
+
; TODO: The transform is also safe without noundef.
define i8 @select_replacement_sub(i8 %x, i8 %y, i8 %z) {
; CHECK-LABEL: @select_replacement_sub(
diff --git a/llvm/test/Transforms/LoopStrengthReduce/AArch64/lsr-reuse.ll b/llvm/test/Transforms/LoopStrengthReduce/AArch64/lsr-reuse.ll
index 323e242..64e8a6b 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/AArch64/lsr-reuse.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/AArch64/lsr-reuse.ll
@@ -2,7 +2,7 @@
declare void @foo(i64)
-; Verify that redundant adds aren't inserted by LSR.
+; Verify that redundant adds or geps aren't inserted by LSR.
; CHECK-LABEL: @bar(
define void @bar(ptr %A) {
entry:
@@ -10,9 +10,11 @@ entry:
while.cond:
; CHECK-LABEL: while.cond:
-; CHECK: add i64 %lsr.iv, 1
; CHECK-NOT: add i64 %lsr.iv, 1
; CHECK-LABEL: land.rhs:
+; CHECK: getelementptr i8, ptr %lsr.iv, i64 -8
+; CHECK-NOT: getelementptr i8, ptr %lsr.iv, i64 -8
+; CHECK-NOT: add i64, %lsr.iv, 1
%indvars.iv28 = phi i64 [ %indvars.iv.next29, %land.rhs ], [ 50, %entry ]
%cmp = icmp sgt i64 %indvars.iv28, 0
br i1 %cmp, label %land.rhs, label %while.end
diff --git a/llvm/test/Transforms/LoopStrengthReduce/RISCV/many-geps.ll b/llvm/test/Transforms/LoopStrengthReduce/RISCV/many-geps.ll
new file mode 100644
index 0000000..4914bb7
--- /dev/null
+++ b/llvm/test/Transforms/LoopStrengthReduce/RISCV/many-geps.ll
@@ -0,0 +1,109 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -loop-reduce -S | FileCheck %s
+
+target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128"
+target triple = "riscv64"
+
+; This test was added as example motivation for the changes in #89927, which
+; causes LSR to drop solutions if deemed to be less profitable than the
+; starting point. At the time of adding this test, LSR's search heuristics
+; best identified solution was an unprofitable one. This could of course
+; change with future LSR improvements.
+
+%struct = type { i64, i32, i32, i32, i32, i32, i32, i32, i32, i64, i32, i64, i64, i32, i64 }
+
+define i32 @main() {
+; CHECK-LABEL: define i32 @main() {
+; CHECK-NEXT: [[CALL:%.*]] = tail call ptr null(i64 0)
+; CHECK-NEXT: br label %[[BB2:.*]]
+; CHECK: [[BB1:.*:]]
+; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[CALL]], align 4
+; CHECK-NEXT: ret i32 0
+; CHECK: [[BB2]]:
+; CHECK-NEXT: [[LSR_IV30:%.*]] = phi i64 [ [[LSR_IV_NEXT31:%.*]], %[[BB2]] ], [ 8, [[BB:%.*]] ]
+; CHECK-NEXT: [[LSR_IV27:%.*]] = phi i64 [ [[LSR_IV_NEXT28:%.*]], %[[BB2]] ], [ 12, [[BB]] ]
+; CHECK-NEXT: [[LSR_IV24:%.*]] = phi i64 [ [[LSR_IV_NEXT25:%.*]], %[[BB2]] ], [ 16, [[BB]] ]
+; CHECK-NEXT: [[LSR_IV21:%.*]] = phi i64 [ [[LSR_IV_NEXT22:%.*]], %[[BB2]] ], [ 20, [[BB]] ]
+; CHECK-NEXT: [[LSR_IV18:%.*]] = phi i64 [ [[LSR_IV_NEXT19:%.*]], %[[BB2]] ], [ 24, [[BB]] ]
+; CHECK-NEXT: [[LSR_IV15:%.*]] = phi i64 [ [[LSR_IV_NEXT16:%.*]], %[[BB2]] ], [ 28, [[BB]] ]
+; CHECK-NEXT: [[LSR_IV12:%.*]] = phi i64 [ [[LSR_IV_NEXT13:%.*]], %[[BB2]] ], [ 32, [[BB]] ]
+; CHECK-NEXT: [[LSR_IV9:%.*]] = phi i64 [ [[LSR_IV_NEXT10:%.*]], %[[BB2]] ], [ 36, [[BB]] ]
+; CHECK-NEXT: [[LSR_IV4:%.*]] = phi i64 [ [[LSR_IV_NEXT5:%.*]], %[[BB2]] ], [ 40, [[BB]] ]
+; CHECK-NEXT: [[LSR_IV1:%.*]] = phi i64 [ [[LSR_IV_NEXT2:%.*]], %[[BB2]] ], [ 48, [[BB]] ]
+; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], %[[BB2]] ], [ 72, [[BB]] ]
+; CHECK-NEXT: [[SCEVGEP32:%.*]] = getelementptr i8, ptr [[CALL]], i64 [[LSR_IV30]]
+; CHECK-NEXT: store i32 0, ptr [[SCEVGEP32]], align 8
+; CHECK-NEXT: [[SCEVGEP29:%.*]] = getelementptr i8, ptr [[CALL]], i64 [[LSR_IV27]]
+; CHECK-NEXT: store i32 0, ptr [[SCEVGEP29]], align 4
+; CHECK-NEXT: [[SCEVGEP26:%.*]] = getelementptr i8, ptr [[CALL]], i64 [[LSR_IV24]]
+; CHECK-NEXT: store i32 0, ptr [[SCEVGEP26]], align 8
+; CHECK-NEXT: [[SCEVGEP23:%.*]] = getelementptr i8, ptr [[CALL]], i64 [[LSR_IV21]]
+; CHECK-NEXT: store i32 0, ptr [[SCEVGEP23]], align 4
+; CHECK-NEXT: [[SCEVGEP20:%.*]] = getelementptr i8, ptr [[CALL]], i64 [[LSR_IV18]]
+; CHECK-NEXT: store i32 0, ptr [[SCEVGEP20]], align 8
+; CHECK-NEXT: [[SCEVGEP17:%.*]] = getelementptr i8, ptr [[CALL]], i64 [[LSR_IV15]]
+; CHECK-NEXT: store i32 0, ptr [[SCEVGEP17]], align 4
+; CHECK-NEXT: [[SCEVGEP14:%.*]] = getelementptr i8, ptr [[CALL]], i64 [[LSR_IV12]]
+; CHECK-NEXT: store i32 0, ptr [[SCEVGEP14]], align 8
+; CHECK-NEXT: [[SCEVGEP11:%.*]] = getelementptr i8, ptr [[CALL]], i64 [[LSR_IV9]]
+; CHECK-NEXT: store i32 0, ptr [[SCEVGEP11]], align 4
+; CHECK-NEXT: [[SCEVGEP6:%.*]] = getelementptr i8, ptr [[CALL]], i64 [[LSR_IV4]]
+; CHECK-NEXT: store i64 0, ptr [[SCEVGEP6]], align 8
+; CHECK-NEXT: [[SCEVGEP3:%.*]] = getelementptr i8, ptr [[CALL]], i64 [[LSR_IV1]]
+; CHECK-NEXT: store i32 0, ptr [[SCEVGEP3]], align 8
+; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[CALL]], i64 [[LSR_IV]]
+; CHECK-NEXT: store i32 0, ptr [[SCEVGEP]], align 8
+; CHECK-NEXT: [[SCEVGEP7:%.*]] = getelementptr i8, ptr [[CALL]], i64 [[LSR_IV4]]
+; CHECK-NEXT: [[SCEVGEP8:%.*]] = getelementptr i8, ptr [[SCEVGEP7]], i64 40
+; CHECK-NEXT: store i64 0, ptr [[SCEVGEP8]], align 8
+; CHECK-NEXT: [[LSR_IV_NEXT]] = add i64 [[LSR_IV]], 88
+; CHECK-NEXT: [[LSR_IV_NEXT2]] = add i64 [[LSR_IV1]], 88
+; CHECK-NEXT: [[LSR_IV_NEXT5]] = add i64 [[LSR_IV4]], 88
+; CHECK-NEXT: [[LSR_IV_NEXT10]] = add i64 [[LSR_IV9]], 88
+; CHECK-NEXT: [[LSR_IV_NEXT13]] = add i64 [[LSR_IV12]], 88
+; CHECK-NEXT: [[LSR_IV_NEXT16]] = add i64 [[LSR_IV15]], 88
+; CHECK-NEXT: [[LSR_IV_NEXT19]] = add i64 [[LSR_IV18]], 88
+; CHECK-NEXT: [[LSR_IV_NEXT22]] = add i64 [[LSR_IV21]], 88
+; CHECK-NEXT: [[LSR_IV_NEXT25]] = add i64 [[LSR_IV24]], 88
+; CHECK-NEXT: [[LSR_IV_NEXT28]] = add i64 [[LSR_IV27]], 88
+; CHECK-NEXT: [[LSR_IV_NEXT31]] = add i64 [[LSR_IV30]], 88
+; CHECK-NEXT: br label %[[BB2]]
+;
+0:
+ %call = tail call ptr null(i64 0)
+ br label %2
+
+1:
+ %load = load i32, ptr %call, align 4
+ ret i32 0
+
+2:
+ %phi = phi i64 [ 0, %0 ], [ %add, %2 ]
+ %getelementptr = getelementptr %struct, ptr %call, i64 %phi
+ %getelementptr3 = getelementptr i8, ptr %getelementptr, i64 8
+ store i32 0, ptr %getelementptr3, align 8
+ %getelementptr4 = getelementptr i8, ptr %getelementptr, i64 12
+ store i32 0, ptr %getelementptr4, align 4
+ %getelementptr5 = getelementptr i8, ptr %getelementptr, i64 16
+ store i32 0, ptr %getelementptr5, align 8
+ %getelementptr6 = getelementptr i8, ptr %getelementptr, i64 20
+ store i32 0, ptr %getelementptr6, align 4
+ %getelementptr7 = getelementptr i8, ptr %getelementptr, i64 24
+ store i32 0, ptr %getelementptr7, align 8
+ %getelementptr8 = getelementptr i8, ptr %getelementptr, i64 28
+ store i32 0, ptr %getelementptr8, align 4
+ %getelementptr9 = getelementptr i8, ptr %getelementptr, i64 32
+ store i32 0, ptr %getelementptr9, align 8
+ %getelementptr10 = getelementptr i8, ptr %getelementptr, i64 36
+ store i32 0, ptr %getelementptr10, align 4
+ %getelementptr11 = getelementptr i8, ptr %getelementptr, i64 40
+ store i64 0, ptr %getelementptr11, align 8
+ %getelementptr12 = getelementptr i8, ptr %getelementptr, i64 48
+ store i32 0, ptr %getelementptr12, align 8
+ %getelementptr13 = getelementptr i8, ptr %getelementptr, i64 72
+ store i32 0, ptr %getelementptr13, align 8
+ %getelementptr14 = getelementptr i8, ptr %getelementptr, i64 80
+ store i64 0, ptr %getelementptr14, align 8
+ %add = add i64 %phi, 1
+ br label %2
+}
diff --git a/llvm/test/Transforms/LoopUnroll/convergent.controlled.ll b/llvm/test/Transforms/LoopUnroll/convergent.controlled.ll
new file mode 100644
index 0000000..7fd4eb1
--- /dev/null
+++ b/llvm/test/Transforms/LoopUnroll/convergent.controlled.ll
@@ -0,0 +1,562 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -passes=loop-unroll -unroll-runtime -unroll-allow-partial -S | FileCheck %s
+
+declare void @f() convergent
+declare void @g()
+
+; Although this loop contains a convergent instruction, it should be
+; fully unrolled.
+define i32 @full_unroll() {
+; CHECK-LABEL: @full_unroll(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[ANCHOR:%.*]] = call token @llvm.experimental.convergence.anchor()
+; CHECK-NEXT: br label [[L3:%.*]]
+; CHECK: l3:
+; CHECK-NEXT: [[TOK_LOOP:%.*]] = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token [[ANCHOR]]) ]
+; CHECK-NEXT: br label [[A:%.*]]
+; CHECK: a:
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
+; CHECK-NEXT: br label [[A_1:%.*]]
+; CHECK: a.1:
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
+; CHECK-NEXT: br label [[A_2:%.*]]
+; CHECK: a.2:
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
+; CHECK-NEXT: ret i32 0
+;
+entry:
+ %anchor = call token @llvm.experimental.convergence.anchor()
+ br label %l3
+
+l3:
+ %x.0 = phi i32 [ 0, %entry ], [ %inc, %a ]
+ %tok.loop = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token %anchor) ]
+ %inc = add nsw i32 %x.0, 1
+ %exitcond = icmp eq i32 %inc, 3
+ br label %a
+
+a:
+ call void @f() [ "convergencectrl"(token %tok.loop) ]
+ br i1 %exitcond, label %exit, label %l3
+
+exit:
+ ret i32 0
+}
+
+; This loop contains a convergent instruction, but it should be partially
+; unrolled. The unroll count is the largest power of 2 that divides the
+; multiple -- 4, in this case.
+define i32 @runtime_unroll(i32 %n) {
+; CHECK-LABEL: @runtime_unroll(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[ANCHOR:%.*]] = call token @llvm.experimental.convergence.anchor()
+; CHECK-NEXT: [[LOOP_CTL:%.*]] = mul nsw i32 [[N:%.*]], 12
+; CHECK-NEXT: br label [[L3:%.*]]
+; CHECK: l3:
+; CHECK-NEXT: [[X_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_3:%.*]], [[A_3:%.*]] ]
+; CHECK-NEXT: [[TOK_LOOP:%.*]] = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token [[ANCHOR]]) ]
+; CHECK-NEXT: br label [[A:%.*]]
+; CHECK: a:
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
+; CHECK-NEXT: br label [[A_1:%.*]]
+; CHECK: a.1:
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
+; CHECK-NEXT: br label [[A_2:%.*]]
+; CHECK: a.2:
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
+; CHECK-NEXT: br label [[A_3]]
+; CHECK: a.3:
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
+; CHECK-NEXT: [[INC_3]] = add nsw i32 [[X_0]], 4
+; CHECK-NEXT: [[EXITCOND_3:%.*]] = icmp eq i32 [[INC_3]], [[LOOP_CTL]]
+; CHECK-NEXT: br i1 [[EXITCOND_3]], label [[EXIT:%.*]], label [[L3]]
+; CHECK: exit:
+; CHECK-NEXT: ret i32 0
+;
+entry:
+ %anchor = call token @llvm.experimental.convergence.anchor()
+ %loop_ctl = mul nsw i32 %n, 12
+ br label %l3
+
+l3:
+ %x.0 = phi i32 [ 0, %entry ], [ %inc, %a ]
+ %tok.loop = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token %anchor) ]
+ br label %a
+
+a:
+ call void @f() [ "convergencectrl"(token %tok.loop) ]
+ %inc = add nsw i32 %x.0, 1
+ %exitcond = icmp eq i32 %inc, %loop_ctl
+ br i1 %exitcond, label %exit, label %l3
+
+exit:
+ ret i32 0
+}
+
+; This loop contains a convergent instruction, so its partial unroll
+; count must divide its trip multiple. This overrides its unroll
+; pragma -- we unroll exactly 8 times, even though 16 is requested.
+define i32 @pragma_unroll(i32 %n) {
+; CHECK-LABEL: @pragma_unroll(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[ANCHOR:%.*]] = call token @llvm.experimental.convergence.anchor()
+; CHECK-NEXT: [[LOOP_CTL:%.*]] = mul nsw i32 [[N:%.*]], 24
+; CHECK-NEXT: br label [[L3:%.*]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK: l3:
+; CHECK-NEXT: [[X_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_7:%.*]], [[A_7:%.*]] ]
+; CHECK-NEXT: [[TOK_LOOP:%.*]] = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token [[ANCHOR]]) ]
+; CHECK-NEXT: br label [[A:%.*]]
+; CHECK: a:
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
+; CHECK-NEXT: br label [[A_1:%.*]]
+; CHECK: a.1:
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
+; CHECK-NEXT: br label [[A_2:%.*]]
+; CHECK: a.2:
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
+; CHECK-NEXT: br label [[A_3:%.*]]
+; CHECK: a.3:
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
+; CHECK-NEXT: br label [[A_4:%.*]]
+; CHECK: a.4:
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
+; CHECK-NEXT: br label [[A_5:%.*]]
+; CHECK: a.5:
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
+; CHECK-NEXT: br label [[A_6:%.*]]
+; CHECK: a.6:
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
+; CHECK-NEXT: br label [[A_7]]
+; CHECK: a.7:
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
+; CHECK-NEXT: [[INC_7]] = add nsw i32 [[X_0]], 8
+; CHECK-NEXT: [[EXITCOND_7:%.*]] = icmp eq i32 [[INC_7]], [[LOOP_CTL]]
+; CHECK-NEXT: br i1 [[EXITCOND_7]], label [[EXIT:%.*]], label [[L3]], !llvm.loop [[LOOP2:![0-9]+]]
+; CHECK: exit:
+; CHECK-NEXT: ret i32 0
+;
+entry:
+ %anchor = call token @llvm.experimental.convergence.anchor()
+ %loop_ctl = mul nsw i32 %n, 24
+ br label %l3, !llvm.loop !0
+
+l3:
+ %x.0 = phi i32 [ 0, %entry ], [ %inc, %a ]
+ %tok.loop = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token %anchor) ]
+ br label %a
+
+a:
+ call void @f() [ "convergencectrl"(token %tok.loop) ]
+ %inc = add nsw i32 %x.0, 1
+ %exitcond = icmp eq i32 %inc, %loop_ctl
+ br i1 %exitcond, label %exit, label %l3, !llvm.loop !0
+
+exit:
+ ret i32 0
+}
+
+; This loop contains a convergent instruction. Since the pragma loop unroll
+; count 2 divides trip count 4. The loop unroll should respect the pragma.
+define void @pragma_unroll_divisible_trip_count() {
+; CHECK-LABEL: @pragma_unroll_divisible_trip_count(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[ANCHOR:%.*]] = call token @llvm.experimental.convergence.anchor()
+; CHECK-NEXT: br label [[L3:%.*]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK: l3:
+; CHECK-NEXT: [[X_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_1:%.*]], [[L3]] ]
+; CHECK-NEXT: [[TOK_LOOP:%.*]] = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token [[ANCHOR]]) ]
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
+; CHECK-NEXT: [[INC_1]] = add nuw nsw i32 [[X_0]], 2
+; CHECK-NEXT: [[EXITCOND_1:%.*]] = icmp eq i32 [[INC_1]], 4
+; CHECK-NEXT: br i1 [[EXITCOND_1]], label [[EXIT:%.*]], label [[L3]], !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK: exit:
+; CHECK-NEXT: ret void
+;
+entry:
+ %anchor = call token @llvm.experimental.convergence.anchor()
+ br label %l3, !llvm.loop !1
+
+l3:
+ %x.0 = phi i32 [ 0, %entry ], [ %inc, %l3 ]
+ %tok.loop = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token %anchor) ]
+ call void @f() [ "convergencectrl"(token %tok.loop) ]
+ %inc = add nsw i32 %x.0, 1
+ %exitcond = icmp eq i32 %inc, 4
+ br i1 %exitcond, label %exit, label %l3, !llvm.loop !1
+
+exit:
+ ret void
+}
+
+; This loop contains a convergent instruction. Since the pragma loop unroll
+; count 2 divides trip multiple 2. The loop unroll should respect the pragma.
+define i32 @pragma_unroll_divisible_trip_multiple(i32 %n) {
+; CHECK-LABEL: @pragma_unroll_divisible_trip_multiple(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[ANCHOR:%.*]] = call token @llvm.experimental.convergence.anchor()
+; CHECK-NEXT: [[LOOP_CTL:%.*]] = mul nsw i32 [[N:%.*]], 2
+; CHECK-NEXT: br label [[L3:%.*]], !llvm.loop [[LOOP4]]
+; CHECK: l3:
+; CHECK-NEXT: [[X_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_1:%.*]], [[L3]] ]
+; CHECK-NEXT: [[TOK_LOOP:%.*]] = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token [[ANCHOR]]) ]
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
+; CHECK-NEXT: [[INC_1]] = add nsw i32 [[X_0]], 2
+; CHECK-NEXT: [[EXITCOND_1:%.*]] = icmp eq i32 [[INC_1]], [[LOOP_CTL]]
+; CHECK-NEXT: br i1 [[EXITCOND_1]], label [[EXIT:%.*]], label [[L3]], !llvm.loop [[LOOP7:![0-9]+]]
+; CHECK: exit:
+; CHECK-NEXT: ret i32 0
+;
+entry:
+ %anchor = call token @llvm.experimental.convergence.anchor()
+ %loop_ctl = mul nsw i32 %n, 2
+ br label %l3, !llvm.loop !1
+
+l3:
+ %x.0 = phi i32 [ 0, %entry ], [ %inc, %l3 ]
+ %tok.loop = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token %anchor) ]
+ call void @f() [ "convergencectrl"(token %tok.loop) ]
+ %inc = add nsw i32 %x.0, 1
+ %exitcond = icmp eq i32 %inc, %loop_ctl
+ br i1 %exitcond, label %exit, label %l3, !llvm.loop !1
+
+exit:
+ ret i32 0
+}
+
+; This loop contains a convergent instruction. Since the pragma loop unroll
+; count 2 is unknown to divide runtime trip count, the loop is not unrolled
+; since remainder is forbidden for unrolling convergent loop.
+define i32 @pragma_unroll_indivisible_runtime_trip_count(i32 %n) {
+; CHECK-LABEL: @pragma_unroll_indivisible_runtime_trip_count(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[ANCHOR:%.*]] = call token @llvm.experimental.convergence.anchor()
+; CHECK-NEXT: br label [[L3:%.*]], !llvm.loop [[LOOP4]]
+; CHECK: l3:
+; CHECK-NEXT: [[X_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[L3]] ]
+; CHECK-NEXT: [[TOK_LOOP:%.*]] = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token [[ANCHOR]]) ]
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
+; CHECK-NEXT: [[INC]] = add nsw i32 [[X_0]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N:%.*]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[L3]], !llvm.loop [[LOOP4]]
+; CHECK: exit:
+; CHECK-NEXT: ret i32 0
+;
+entry:
+ %anchor = call token @llvm.experimental.convergence.anchor()
+ br label %l3, !llvm.loop !1
+
+l3:
+ %x.0 = phi i32 [ 0, %entry ], [ %inc, %l3 ]
+ %tok.loop = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token %anchor) ]
+ call void @f() [ "convergencectrl"(token %tok.loop) ]
+ %inc = add nsw i32 %x.0, 1
+ %exitcond = icmp eq i32 %inc, %n
+ br i1 %exitcond, label %exit, label %l3, !llvm.loop !1
+
+exit:
+ ret i32 0
+}
+
+; This loop contains a convergent instruction. Since the pragma loop unroll
+; count 2 does not divide trip count 5, the loop is not unrolled by 2
+; since remainder is forbidden for unrolling convergent loop. Instead, the
+; loop gets fully unrolled.
+define i32 @pragma_unroll_indivisible_trip_count() {
+; CHECK-LABEL: @pragma_unroll_indivisible_trip_count(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[ANCHOR:%.*]] = call token @llvm.experimental.convergence.anchor()
+; CHECK-NEXT: br label [[L3:%.*]], !llvm.loop [[LOOP4]]
+; CHECK: l3:
+; CHECK-NEXT: [[TOK_LOOP:%.*]] = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token [[ANCHOR]]) ]
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
+; CHECK-NEXT: ret i32 0
+;
+entry:
+ %anchor = call token @llvm.experimental.convergence.anchor()
+ br label %l3, !llvm.loop !1
+
+l3:
+ %x.0 = phi i32 [ 0, %entry ], [ %inc, %l3 ]
+ %tok.loop = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token %anchor) ]
+ call void @f() [ "convergencectrl"(token %tok.loop) ]
+ %inc = add nsw i32 %x.0, 1
+ %exitcond = icmp eq i32 %inc, 5
+ br i1 %exitcond, label %exit, label %l3, !llvm.loop !1
+
+exit:
+ ret i32 0
+}
+
+; This loop contains a convergent instruction that is anchored inside the loop
+; itself. It is unrolled by 2 with remainder, as requested by the loop metadata.
+define i32 @pragma_unroll_with_remainder(i32 %n) {
+; CHECK-LABEL: @pragma_unroll_with_remainder(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = freeze i32 [[N:%.*]]
+; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[TMP0]], -1
+; CHECK-NEXT: [[XTRAITER:%.*]] = and i32 [[TMP0]], 1
+; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[TMP1]], 1
+; CHECK-NEXT: br i1 [[TMP2]], label [[EXIT_UNR_LCSSA:%.*]], label [[ENTRY_NEW:%.*]]
+; CHECK: entry.new:
+; CHECK-NEXT: [[UNROLL_ITER:%.*]] = sub i32 [[TMP0]], [[XTRAITER]]
+; CHECK-NEXT: br label [[L3:%.*]], !llvm.loop [[LOOP4]]
+; CHECK: l3:
+; CHECK-NEXT: [[X_0:%.*]] = phi i32 [ 0, [[ENTRY_NEW]] ], [ [[INC_1:%.*]], [[L3]] ]
+; CHECK-NEXT: [[NITER:%.*]] = phi i32 [ 0, [[ENTRY_NEW]] ], [ [[NITER_NEXT_1:%.*]], [[L3]] ]
+; CHECK-NEXT: [[TOK_LOOP:%.*]] = call token @llvm.experimental.convergence.anchor()
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
+; CHECK-NEXT: [[TOK_LOOP_1:%.*]] = call token @llvm.experimental.convergence.anchor()
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_LOOP_1]]) ]
+; CHECK-NEXT: [[INC_1]] = add nsw i32 [[X_0]], 2
+; CHECK-NEXT: [[NITER_NEXT_1]] = add i32 [[NITER]], 2
+; CHECK-NEXT: [[NITER_NCMP_1:%.*]] = icmp eq i32 [[NITER_NEXT_1]], [[UNROLL_ITER]]
+; CHECK-NEXT: br i1 [[NITER_NCMP_1]], label [[EXIT_UNR_LCSSA_LOOPEXIT:%.*]], label [[L3]], !llvm.loop [[LOOP8:![0-9]+]]
+; CHECK: exit.unr-lcssa.loopexit:
+; CHECK-NEXT: br label [[EXIT_UNR_LCSSA]]
+; CHECK: exit.unr-lcssa:
+; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i32 [[XTRAITER]], 0
+; CHECK-NEXT: br i1 [[LCMP_MOD]], label [[L3_EPIL_PREHEADER:%.*]], label [[EXIT:%.*]]
+; CHECK: l3.epil.preheader:
+; CHECK-NEXT: br label [[L3_EPIL:%.*]]
+; CHECK: l3.epil:
+; CHECK-NEXT: [[TOK_LOOP_EPIL:%.*]] = call token @llvm.experimental.convergence.anchor()
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_LOOP_EPIL]]) ]
+; CHECK-NEXT: br label [[EXIT]]
+; CHECK: exit:
+; CHECK-NEXT: ret i32 0
+;
+entry:
+ br label %l3, !llvm.loop !1
+
+l3:
+ %x.0 = phi i32 [ 0, %entry ], [ %inc, %l3 ]
+ %tok.loop = call token @llvm.experimental.convergence.anchor()
+ call void @f() [ "convergencectrl"(token %tok.loop) ]
+ %inc = add nsw i32 %x.0, 1
+ %exitcond = icmp eq i32 %inc, %n
+ br i1 %exitcond, label %exit, label %l3, !llvm.loop !1
+
+exit:
+ ret i32 0
+}
+
+; Don't unroll a loop that is extended by convergence controls.
+;
+; We could theoretically duplicate the extension part, but this is not
+; implemented.
+define i32 @extended_loop(i32 %n) {
+; CHECK-LABEL: @extended_loop(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[L3:%.*]], !llvm.loop [[LOOP4]]
+; CHECK: l3:
+; CHECK-NEXT: [[X_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[L3]] ]
+; CHECK-NEXT: [[TOK_LOOP:%.*]] = call token @llvm.experimental.convergence.anchor()
+; CHECK-NEXT: [[INC]] = add nsw i32 [[X_0]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N:%.*]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[L3]], !llvm.loop [[LOOP4]]
+; CHECK: exit:
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
+; CHECK-NEXT: ret i32 0
+;
+entry:
+ br label %l3, !llvm.loop !1
+
+l3:
+ %x.0 = phi i32 [ 0, %entry ], [ %inc, %l3 ]
+ %tok.loop = call token @llvm.experimental.convergence.anchor()
+ %inc = add nsw i32 %x.0, 1
+ %exitcond = icmp eq i32 %inc, %n
+ br i1 %exitcond, label %exit, label %l3, !llvm.loop !1
+
+exit:
+ call void @f() [ "convergencectrl"(token %tok.loop) ]
+ ret i32 0
+}
+
+; Inner loop is extended beyond the outer loop. No unrolling possible.
+
+define i32 @extended_inner_loop_1(i32 %n, i1 %cond) {
+; CHECK-LABEL: @extended_inner_loop_1(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[L3:%.*]]
+; CHECK: l3:
+; CHECK-NEXT: [[X_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[LATCH:%.*]] ]
+; CHECK-NEXT: [[TOK_LOOP:%.*]] = call token @llvm.experimental.convergence.anchor()
+; CHECK-NEXT: [[INC]] = add nsw i32 [[X_0]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 4
+; CHECK-NEXT: br label [[L2:%.*]], !llvm.loop [[LOOP4]]
+; CHECK: l2:
+; CHECK-NEXT: [[TOK_L2:%.*]] = call token @llvm.experimental.convergence.anchor()
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_L2]]) ]
+; CHECK-NEXT: br i1 [[COND:%.*]], label [[L2]], label [[LATCH]], !llvm.loop [[LOOP4]]
+; CHECK: latch:
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[L3]]
+; CHECK: exit:
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_L2]]) ]
+; CHECK-NEXT: ret i32 0
+;
+entry:
+ br label %l3
+
+l3:
+ %x.0 = phi i32 [ 0, %entry ], [ %inc, %latch ]
+ %tok.loop = call token @llvm.experimental.convergence.anchor()
+ %inc = add nsw i32 %x.0, 1
+ %exitcond = icmp eq i32 %inc, 4
+ br label %l2, !llvm.loop !1
+
+l2:
+ %tok.l2 = call token @llvm.experimental.convergence.anchor()
+ call void @f() [ "convergencectrl"(token %tok.l2) ]
+ br i1 %cond, label %l2, label %latch, !llvm.loop !1
+
+latch:
+ br i1 %exitcond, label %exit, label %l3
+
+exit:
+ call void @f() [ "convergencectrl"(token %tok.l2) ]
+ ret i32 0
+}
+
+; Inner loop is extended inside the outer loop. Outer loop is unrolled.
+
+define i32 @extended_inner_loop_2(i32 %n, i1 %cond) {
+; CHECK-LABEL: @extended_inner_loop_2(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[L3:%.*]]
+; CHECK: l3:
+; CHECK-NEXT: br label [[L2:%.*]], !llvm.loop [[LOOP4]]
+; CHECK: l2:
+; CHECK-NEXT: [[TOK_L2:%.*]] = call token @llvm.experimental.convergence.anchor()
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_L2]]) ]
+; CHECK-NEXT: br i1 [[COND:%.*]], label [[L2]], label [[LATCH:%.*]], !llvm.loop [[LOOP4]]
+; CHECK: latch:
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_L2]]) ]
+; CHECK-NEXT: br label [[L2_1:%.*]], !llvm.loop [[LOOP4]]
+; CHECK: l2.1:
+; CHECK-NEXT: [[TOK_L2_1:%.*]] = call token @llvm.experimental.convergence.anchor()
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_L2_1]]) ]
+; CHECK-NEXT: br i1 [[COND]], label [[L2_1]], label [[LATCH_1:%.*]], !llvm.loop [[LOOP4]]
+; CHECK: latch.1:
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_L2_1]]) ]
+; CHECK-NEXT: br label [[L2_2:%.*]], !llvm.loop [[LOOP4]]
+; CHECK: l2.2:
+; CHECK-NEXT: [[TOK_L2_2:%.*]] = call token @llvm.experimental.convergence.anchor()
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_L2_2]]) ]
+; CHECK-NEXT: br i1 [[COND]], label [[L2_2]], label [[LATCH_2:%.*]], !llvm.loop [[LOOP4]]
+; CHECK: latch.2:
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_L2_2]]) ]
+; CHECK-NEXT: br label [[L2_3:%.*]], !llvm.loop [[LOOP4]]
+; CHECK: l2.3:
+; CHECK-NEXT: [[TOK_L2_3:%.*]] = call token @llvm.experimental.convergence.anchor()
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_L2_3]]) ]
+; CHECK-NEXT: br i1 [[COND]], label [[L2_3]], label [[LATCH_3:%.*]], !llvm.loop [[LOOP4]]
+; CHECK: latch.3:
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_L2_3]]) ]
+; CHECK-NEXT: ret i32 0
+;
+entry:
+ br label %l3
+
+l3:
+ %x.0 = phi i32 [ 0, %entry ], [ %inc, %latch ]
+ %tok.loop = call token @llvm.experimental.convergence.anchor()
+ %inc = add nsw i32 %x.0, 1
+ %exitcond = icmp eq i32 %inc, 4
+ br label %l2, !llvm.loop !1
+
+l2:
+ %tok.l2 = call token @llvm.experimental.convergence.anchor()
+ call void @f() [ "convergencectrl"(token %tok.l2) ]
+ br i1 %cond, label %l2, label %latch, !llvm.loop !1
+
+latch:
+ call void @f() [ "convergencectrl"(token %tok.l2) ]
+ br i1 %exitcond, label %exit, label %l3
+
+exit:
+ ret i32 0
+}
+
+; No extension. Both loops unrolled.
+
+define i32 @unroll_nest(i32 %n, i1 %cond) {
+; CHECK-LABEL: @unroll_nest(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[L3:%.*]]
+; CHECK: l3:
+; CHECK-NEXT: br label [[L2:%.*]], !llvm.loop [[LOOP4]]
+; CHECK: l2:
+; CHECK-NEXT: [[TOK_L2:%.*]] = call token @llvm.experimental.convergence.anchor()
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_L2]]) ]
+; CHECK-NEXT: br i1 [[COND:%.*]], label [[L2_1:%.*]], label [[LATCH:%.*]], !llvm.loop [[LOOP4]]
+; CHECK: l2.1:
+; CHECK-NEXT: [[TOK_L2_1:%.*]] = call token @llvm.experimental.convergence.anchor()
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_L2_1]]) ]
+; CHECK-NEXT: br i1 [[COND]], label [[L2]], label [[LATCH]], !llvm.loop [[LOOP9:![0-9]+]]
+; CHECK: latch:
+; CHECK-NEXT: br label [[L2_12:%.*]], !llvm.loop [[LOOP4]]
+; CHECK: l2.12:
+; CHECK-NEXT: [[TOK_L2_11:%.*]] = call token @llvm.experimental.convergence.anchor()
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_L2_11]]) ]
+; CHECK-NEXT: br i1 [[COND]], label [[L2_1_1:%.*]], label [[LATCH_1:%.*]], !llvm.loop [[LOOP4]]
+; CHECK: l2.1.1:
+; CHECK-NEXT: [[TOK_L2_1_1:%.*]] = call token @llvm.experimental.convergence.anchor()
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_L2_1_1]]) ]
+; CHECK-NEXT: br i1 [[COND]], label [[L2_12]], label [[LATCH_1]], !llvm.loop [[LOOP9]]
+; CHECK: latch.1:
+; CHECK-NEXT: br label [[L2_2:%.*]], !llvm.loop [[LOOP4]]
+; CHECK: l2.2:
+; CHECK-NEXT: [[TOK_L2_2:%.*]] = call token @llvm.experimental.convergence.anchor()
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_L2_2]]) ]
+; CHECK-NEXT: br i1 [[COND]], label [[L2_1_2:%.*]], label [[LATCH_2:%.*]], !llvm.loop [[LOOP4]]
+; CHECK: l2.1.2:
+; CHECK-NEXT: [[TOK_L2_1_2:%.*]] = call token @llvm.experimental.convergence.anchor()
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_L2_1_2]]) ]
+; CHECK-NEXT: br i1 [[COND]], label [[L2_2]], label [[LATCH_2]], !llvm.loop [[LOOP9]]
+; CHECK: latch.2:
+; CHECK-NEXT: br label [[L2_3:%.*]], !llvm.loop [[LOOP4]]
+; CHECK: l2.3:
+; CHECK-NEXT: [[TOK_L2_3:%.*]] = call token @llvm.experimental.convergence.anchor()
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_L2_3]]) ]
+; CHECK-NEXT: br i1 [[COND]], label [[L2_1_3:%.*]], label [[LATCH_3:%.*]], !llvm.loop [[LOOP4]]
+; CHECK: l2.1.3:
+; CHECK-NEXT: [[TOK_L2_1_3:%.*]] = call token @llvm.experimental.convergence.anchor()
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_L2_1_3]]) ]
+; CHECK-NEXT: br i1 [[COND]], label [[L2_3]], label [[LATCH_3]], !llvm.loop [[LOOP9]]
+; CHECK: latch.3:
+; CHECK-NEXT: ret i32 0
+;
+entry:
+ br label %l3
+
+l3:
+ %x.0 = phi i32 [ 0, %entry ], [ %inc, %latch ]
+ %tok.loop = call token @llvm.experimental.convergence.anchor()
+ %inc = add nsw i32 %x.0, 1
+ %exitcond = icmp eq i32 %inc, 4
+ br label %l2, !llvm.loop !1
+
+l2:
+ %tok.l2 = call token @llvm.experimental.convergence.anchor()
+ call void @f() [ "convergencectrl"(token %tok.l2) ]
+ br i1 %cond, label %l2, label %latch, !llvm.loop !1
+
+latch:
+ br i1 %exitcond, label %exit, label %l3
+
+exit:
+ ret i32 0
+}
+
+declare token @llvm.experimental.convergence.anchor()
+declare token @llvm.experimental.convergence.loop()
+
+!0 = !{!0, !{!"llvm.loop.unroll.count", i32 16}}
+!1 = !{!1, !{!"llvm.loop.unroll.count", i32 2}}
diff --git a/llvm/test/Transforms/LoopVectorize/X86/amdlibm-calls.ll b/llvm/test/Transforms/LoopVectorize/X86/amdlibm-calls.ll
index 8d2820a..1627292 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/amdlibm-calls.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/amdlibm-calls.ll
@@ -15,6 +15,11 @@ declare float @cosf(float) #0
declare double @llvm.cos.f64(double) #0
declare float @llvm.cos.f32(float) #0
+declare double @tan(double) #0
+declare float @tanf(float) #0
+declare double @llvm.tan.f64(double) #0
+declare float @llvm.tan.f32(float) #0
+
declare double @pow(double, double) #0
declare float @powf(float, float) #0
declare double @llvm.pow.f64(double, double) #0
@@ -264,6 +269,114 @@ for.end:
ret void
}
+define void @tan_f64(ptr nocapture %varray) {
+; CHECK-LABEL: @tan_f64(
+; CHECK: [[TMP5:%.*]] = call <4 x double> @amd_vrd4_tan(<4 x double> [[TMP4:%.*]])
+; CHECK: ret void
+;
+; CHECK-AVX512-VF8-LABEL: @tan_f64(
+; CHECK-AVX512-VF8: [[TMP5:%.*]] = call <8 x double> @amd_vrd8_tan(<8 x double> [[TMP4:%.*]])
+; CHECK-AVX512-VF8: ret void
+;
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %tmp = trunc i64 %iv to i32
+ %conv = sitofp i32 %tmp to double
+ %call = tail call double @tan(double %conv)
+ %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+ store double %call, ptr %arrayidx, align 4
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+ ret void
+}
+
+define void @tan_f32(ptr nocapture %varray) {
+; CHECK-LABEL: @tan_f32(
+; CHECK: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_tanf(<4 x float> [[TMP4:%.*]])
+; CHECK: ret void
+;
+; CHECK-AVX512-VF16-LABEL: @tan_f32(
+; CHECK-AVX512-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_tanf(<16 x float> [[TMP4:%.*]])
+; CHECK-AVX512-VF16: ret void
+;
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %tmp = trunc i64 %iv to i32
+ %conv = sitofp i32 %tmp to float
+ %call = tail call float @tanf(float %conv)
+ %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+ store float %call, ptr %arrayidx, align 4
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+ ret void
+}
+
+define void @tan_f64_intrinsic(ptr nocapture %varray) {
+; CHECK-LABEL: @tan_f64_intrinsic(
+; CHECK: [[TMP5:%.*]] = call <4 x double> @amd_vrd4_tan(<4 x double> [[TMP4:%.*]])
+; CHECK: ret void
+;
+; CHECK-AVX512-VF8-LABEL: @tan_f64_intrinsic(
+; CHECK-AVX512-VF8: [[TMP5:%.*]] = call <8 x double> @amd_vrd8_tan(<8 x double> [[TMP4:%.*]])
+; CHECK-AVX512-VF8: ret void
+;
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %tmp = trunc i64 %iv to i32
+ %conv = sitofp i32 %tmp to double
+ %call = tail call double @llvm.tan.f64(double %conv)
+ %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+ store double %call, ptr %arrayidx, align 4
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+ ret void
+}
+
+define void @tan_f32_intrinsic(ptr nocapture %varray) {
+; CHECK-LABEL: @tan_f32_intrinsic(
+; CHECK: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_tanf(<4 x float> [[TMP4:%.*]])
+; CHECK: ret void
+;
+; CHECK-AVX512-VF16-LABEL: @tan_f32_intrinsic(
+; CHECK-AVX512-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_tanf(<16 x float> [[TMP4:%.*]])
+; CHECK-AVX512-VF16: ret void
+;
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %tmp = trunc i64 %iv to i32
+ %conv = sitofp i32 %tmp to float
+ %call = tail call float @llvm.tan.f32(float %conv)
+ %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+ store float %call, ptr %arrayidx, align 4
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+ ret void
+}
+
define void @pow_f64(ptr nocapture %varray, ptr nocapture readonly %exp) {
; CHECK-LABEL: @pow_f64(
; CHECK: [[TMP8:%.*]] = call <4 x double> @amd_vrd4_pow(<4 x double> [[TMP4:%.*]], <4 x double> [[WIDE_LOAD:%.*]])
diff --git a/llvm/test/Transforms/LoopVectorize/X86/libm-vector-calls-VF2-VF8.ll b/llvm/test/Transforms/LoopVectorize/X86/libm-vector-calls-VF2-VF8.ll
index 038852f..67a2cf2 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/libm-vector-calls-VF2-VF8.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/libm-vector-calls-VF2-VF8.ll
@@ -356,6 +356,117 @@ for.end: ; preds = %for.body
!132 = !{!"llvm.loop.vectorize.width", i32 8}
!133 = !{!"llvm.loop.vectorize.enable", i1 true}
+define void @tan_f64(ptr nocapture %varray) {
+; CHECK-LABEL: @tan_f64(
+; CHECK-LABEL: vector.body
+; CHECK: [[TMP5:%.*]] = call <2 x double> @_ZGVbN2v_tan(<2 x double> [[TMP4:%.*]])
+;
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %tmp = trunc i64 %iv to i32
+ %conv = sitofp i32 %tmp to double
+ %call = tail call double @tan(double %conv)
+ %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+ store double %call, ptr %arrayidx, align 4
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !1
+
+for.end:
+ ret void
+}
+
+!141 = distinct !{!141, !142, !143}
+!142 = !{!"llvm.loop.vectorize.width", i32 2}
+!143 = !{!"llvm.loop.vectorize.enable", i1 true}
+
+
+define void @tan_f32(ptr nocapture %varray) {
+; CHECK-LABEL: @tan_f32(
+; CHECK-LABEL: vector.body
+; CHECK: [[TMP5:%.*]] = call <8 x float> @_ZGVdN8v_tanf(<8 x float> [[TMP4:%.*]])
+;
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %tmp = trunc i64 %iv to i32
+ %conv = sitofp i32 %tmp to float
+ %call = tail call float @tanf(float %conv)
+ %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+ store float %call, ptr %arrayidx, align 4
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !21
+
+for.end:
+ ret void
+}
+
+!151 = distinct !{!151, !152, !153}
+!152 = !{!"llvm.loop.vectorize.width", i32 8}
+!153 = !{!"llvm.loop.vectorize.enable", i1 true}
+
+define void @tan_f64_intrinsic(ptr nocapture %varray) {
+; CHECK-LABEL: @tan_f64_intrinsic(
+; CHECK-LABEL: vector.body
+; CHECK: [[TMP5:%.*]] = call <2 x double> @_ZGVbN2v_tan(<2 x double> [[TMP4:%.*]])
+;
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %tmp = trunc i64 %iv to i32
+ %conv = sitofp i32 %tmp to double
+ %call = tail call double @llvm.tan.f64(double %conv)
+ %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+ store double %call, ptr %arrayidx, align 4
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !31
+
+for.end:
+ ret void
+}
+
+!161 = distinct !{!161, !162, !163}
+!162 = !{!"llvm.loop.vectorize.width", i32 2}
+!163 = !{!"llvm.loop.vectorize.enable", i1 true}
+
+define void @tan_f32_intrinsic(ptr nocapture %varray) {
+; CHECK-LABEL: @tan_f32_intrinsic(
+; CHECK-LABEL: vector.body
+; CHECK: [[TMP5:%.*]] = call <8 x float> @_ZGVdN8v_tanf(<8 x float> [[TMP4:%.*]])
+;
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %tmp = trunc i64 %iv to i32
+ %conv = sitofp i32 %tmp to float
+ %call = tail call float @llvm.tan.f32(float %conv)
+ %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+ store float %call, ptr %arrayidx, align 4
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !41
+
+for.end:
+ ret void
+}
+
+
+
+!171 = distinct !{!171, !172, !173}
+!172 = !{!"llvm.loop.vectorize.width", i32 8}
+!173 = !{!"llvm.loop.vectorize.enable", i1 true}
+
attributes #0 = { nounwind readnone }
declare double @sin(double) #0
@@ -366,6 +477,10 @@ declare double @cos(double) #0
declare float @cosf(float) #0
declare double @llvm.cos.f64(double) #0
declare float @llvm.cos.f32(float) #0
+declare double @tan(double) #0
+declare float @tanf(float) #0
+declare double @llvm.tan.f64(double) #0
+declare float @llvm.tan.f32(float) #0
declare float @expf(float) #0
declare float @powf(float, float) #0
declare float @llvm.exp.f32(float) #0
diff --git a/llvm/test/Transforms/LoopVectorize/X86/svml-calls.ll b/llvm/test/Transforms/LoopVectorize/X86/svml-calls.ll
index 005557d..2e78e36 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/svml-calls.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/svml-calls.ll
@@ -230,6 +230,52 @@ for.end:
ret void
}
+define void @tan_f64_intrinsic(ptr nocapture %varray) {
+; CHECK-LABEL: @tan_f64_intrinsic(
+; CHECK: [[TMP5:%.*]] = call <4 x double> @__svml_tan4(<4 x double> [[TMP4:%.*]])
+; CHECK: ret void
+;
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %tmp = trunc i64 %iv to i32
+ %conv = sitofp i32 %tmp to double
+ %call = tail call double @llvm.tan.f64(double %conv)
+ %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+ store double %call, ptr %arrayidx, align 4
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+ ret void
+}
+
+define void @tan_f32_intrinsic(ptr nocapture %varray) {
+; CHECK-LABEL: @tan_f32_intrinsic(
+; CHECK: [[TMP5:%.*]] = call <4 x float> @__svml_tanf4(<4 x float> [[TMP4:%.*]])
+; CHECK: ret void
+;
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %tmp = trunc i64 %iv to i32
+ %conv = sitofp i32 %tmp to float
+ %call = tail call float @llvm.tan.f32(float %conv)
+ %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+ store float %call, ptr %arrayidx, align 4
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+ ret void
+}
+
define void @pow_f64(ptr nocapture %varray, ptr nocapture readonly %exp) {
; CHECK-LABEL: @pow_f64(
; CHECK: [[TMP8:%.*]] = call <4 x double> @__svml_pow4(<4 x double> [[TMP4:%.*]], <4 x double> [[WIDE_LOAD:%.*]])
diff --git a/llvm/test/Transforms/LoopVectorize/X86/veclib-calls.ll b/llvm/test/Transforms/LoopVectorize/X86/veclib-calls.ll
index 2e78a96..27038f3 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/veclib-calls.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/veclib-calls.ll
@@ -406,6 +406,31 @@ for.end: ; preds = %for.body, %entry
ret void
}
+;CHECK-LABEL: @tan_f32_intrinsic(
+;CHECK: vtanf{{.*}}<4 x float>
+;CHECK: ret void
+declare float @llvm.tan.f32(float) nounwind readnone
+define void @tan_f32_intrinsic(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
+entry:
+ %cmp6 = icmp sgt i32 %n, 0
+ br i1 %cmp6, label %for.body, label %for.end
+
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+ %0 = load float, ptr %arrayidx, align 4
+ %call = tail call float @llvm.tan.f32(float %0) nounwind readnone
+ %arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+ store float %call, ptr %arrayidx2, align 4
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+
;CHECK-LABEL: @asin_f32(
;CHECK: vasinf{{.*}}<4 x float>
;CHECK: ret void
diff --git a/llvm/test/tools/llvm-cov/gcov/intermediate-format.test b/llvm/test/tools/llvm-cov/gcov/intermediate-format.test
index 583e670..a3f4695 100644
--- a/llvm/test/tools/llvm-cov/gcov/intermediate-format.test
+++ b/llvm/test/tools/llvm-cov/gcov/intermediate-format.test
@@ -1,5 +1,3 @@
-REQUIRES: shell
-
RUN: rm -rf %t && mkdir %t && cd %t
RUN: cp %S/Inputs/test.gcno %S/Inputs/test.gcda .
diff --git a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx1.s
index bd7a489..7150a58 100644
--- a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx1.s
+++ b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx1.s
@@ -1455,20 +1455,20 @@ vzeroupper
# CHECK-NEXT: 3 2 1.00 * vpextrq $1, %xmm0, (%rax)
# CHECK-NEXT: 2 3 1.00 vpextrw $1, %xmm0, %ecx
# CHECK-NEXT: 3 2 1.00 * vpextrw $1, %xmm0, (%rax)
-# CHECK-NEXT: 3 3 2.00 vphaddd %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: 4 9 2.00 * vphaddd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 3 3 2.00 vphaddsw %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: 4 9 2.00 * vphaddsw (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 3 3 2.00 vphaddw %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: 4 9 2.00 * vphaddw (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 3 3 1.00 vphaddd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 4 9 1.00 * vphaddd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 3 3 1.00 vphaddsw %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 4 9 1.00 * vphaddsw (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 3 3 1.00 vphaddw %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 4 9 1.00 * vphaddw (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 1.00 vphminposuw %xmm0, %xmm2
# CHECK-NEXT: 2 10 1.00 * vphminposuw (%rax), %xmm2
-# CHECK-NEXT: 3 3 2.00 vphsubd %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: 4 9 2.00 * vphsubd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 3 3 2.00 vphsubsw %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: 4 9 2.00 * vphsubsw (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 3 3 2.00 vphsubw %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: 4 9 2.00 * vphsubw (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 3 3 1.00 vphsubd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 4 9 1.00 * vphsubd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 3 3 1.00 vphsubsw %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 4 9 1.00 * vphsubsw (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 3 3 1.00 vphsubw %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 4 9 1.00 * vphsubw (%rax), %xmm1, %xmm2
# CHECK-NEXT: 2 2 2.00 vpinsrb $1, %eax, %xmm1, %xmm2
# CHECK-NEXT: 2 6 1.00 * vpinsrb $1, (%rax), %xmm1, %xmm2
# CHECK-NEXT: 2 2 2.00 vpinsrd $1, %eax, %xmm1, %xmm2
@@ -1738,7 +1738,7 @@ vzeroupper
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11]
-# CHECK-NEXT: - 126.00 322.92 237.92 160.50 160.50 19.00 291.92 6.25 19.00 19.00 19.00
+# CHECK-NEXT: - 126.00 325.58 252.58 160.50 160.50 19.00 274.58 6.25 19.00 19.00 19.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions:
@@ -1908,22 +1908,22 @@ vzeroupper
# CHECK-NEXT: - - - - - - 0.50 - - 0.50 0.50 0.50 vextractf128 $1, %ymm0, (%rax)
# CHECK-NEXT: - - 1.00 - - - - 1.00 - - - - vextractps $1, %xmm0, %ecx
# CHECK-NEXT: - - - - - - 0.50 1.00 - 0.50 0.50 0.50 vextractps $1, %xmm0, (%rax)
-# CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - - - vhaddpd %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 2.33 - - - - vhaddpd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - - - vhaddpd %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 2.33 - - - - vhaddpd (%rax), %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - - - vhaddps %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 2.33 - - - - vhaddps (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - - - vhaddps %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 2.33 - - - - vhaddps (%rax), %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - - - vhsubpd %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 2.33 - - - - vhsubpd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - - - vhsubpd %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 2.33 - - - - vhsubpd (%rax), %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - - - vhsubps %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 2.33 - - - - vhsubps (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - - - vhsubps %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 2.33 - - - - vhsubps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - 2.00 - - - - vhaddpd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 2.00 - - - - vhaddpd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - 2.00 - - - - vhaddpd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 2.00 - - - - vhaddpd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - 2.00 - - - - vhaddps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 2.00 - - - - vhaddps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - 2.00 - - - - vhaddps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 2.00 - - - - vhaddps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - 2.00 - - - - vhsubpd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 2.00 - - - - vhsubpd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - 2.00 - - - - vhsubpd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 2.00 - - - - vhsubpd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - 2.00 - - - - vhsubps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 2.00 - - - - vhsubps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - 2.00 - - - - vhsubps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 2.00 - - - - vhsubps (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - - - - - 1.00 - - - - vinsertf128 $1, %xmm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vinsertf128 $1, (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - - - - - 1.00 - - - - vinsertps $1, %xmm0, %xmm1, %xmm2
@@ -2167,20 +2167,20 @@ vzeroupper
# CHECK-NEXT: - - - - - - 0.50 1.00 - 0.50 0.50 0.50 vpextrq $1, %xmm0, (%rax)
# CHECK-NEXT: - - 1.00 - - - - 1.00 - - - - vpextrw $1, %xmm0, %ecx
# CHECK-NEXT: - - - - - - 0.50 1.00 - 0.50 0.50 0.50 vpextrw $1, %xmm0, (%rax)
-# CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - - - vphaddd %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 2.33 - - - - vphaddd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.50 0.50 - - - 2.00 - - - - vphaddsw %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 2.00 - - - - vphaddsw (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - - - vphaddw %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 2.33 - - - - vphaddw (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.33 1.33 - - - 1.33 - - - - vphaddd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.33 1.33 0.50 0.50 - 1.33 - - - - vphaddd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 1.50 - - - 1.00 - - - - vphaddsw %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 1.50 0.50 0.50 - 1.00 - - - - vphaddsw (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.33 1.33 - - - 1.33 - - - - vphaddw %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.33 1.33 0.50 0.50 - 1.33 - - - - vphaddw (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 1.00 - - - - - - - - - vphminposuw %xmm0, %xmm2
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - - - vphminposuw (%rax), %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - - - vphsubd %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 2.33 - - - - vphsubd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.50 0.50 - - - 2.00 - - - - vphsubsw %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 2.00 - - - - vphsubsw (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - - - vphsubw %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 2.33 - - - - vphsubw (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.33 1.33 - - - 1.33 - - - - vphsubd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.33 1.33 0.50 0.50 - 1.33 - - - - vphsubd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 1.50 - - - 1.00 - - - - vphsubsw %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 1.50 0.50 0.50 - 1.00 - - - - vphsubsw (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.33 1.33 - - - 1.33 - - - - vphsubw %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.33 1.33 0.50 0.50 - 1.33 - - - - vphsubw (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - - - - - - 2.00 - - - - vpinsrb $1, %eax, %xmm1, %xmm2
# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - - - vpinsrb $1, (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - - - - - - 2.00 - - - - vpinsrd $1, %eax, %xmm1, %xmm2
diff --git a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx2.s b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx2.s
index dcf8834..c251dc3 100644
--- a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx2.s
+++ b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx2.s
@@ -576,18 +576,18 @@ vpxor (%rax), %ymm1, %ymm2
# CHECK-NEXT: 5 20 2.00 * vpgatherqd %xmm0, (%rax,%ymm1,2), %xmm2
# CHECK-NEXT: 5 18 1.00 * vpgatherqq %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: 5 20 2.00 * vpgatherqq %ymm0, (%rax,%ymm1,2), %ymm2
-# CHECK-NEXT: 3 3 2.00 vphaddd %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 4 10 2.00 * vphaddd (%rax), %ymm1, %ymm2
-# CHECK-NEXT: 3 3 2.00 vphaddsw %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 4 10 2.00 * vphaddsw (%rax), %ymm1, %ymm2
-# CHECK-NEXT: 3 3 2.00 vphaddw %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 4 10 2.00 * vphaddw (%rax), %ymm1, %ymm2
-# CHECK-NEXT: 3 3 2.00 vphsubd %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 4 10 2.00 * vphsubd (%rax), %ymm1, %ymm2
-# CHECK-NEXT: 3 3 2.00 vphsubsw %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 4 10 2.00 * vphsubsw (%rax), %ymm1, %ymm2
-# CHECK-NEXT: 3 3 2.00 vphsubw %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 4 10 2.00 * vphsubw (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 3 3 1.00 vphaddd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 4 10 1.00 * vphaddd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 3 3 1.00 vphaddsw %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 4 10 1.00 * vphaddsw (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 3 3 1.00 vphaddw %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 4 10 1.00 * vphaddw (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 3 3 1.00 vphsubd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 4 10 1.00 * vphsubd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 3 3 1.00 vphsubsw %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 4 10 1.00 * vphsubsw (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 3 3 1.00 vphsubw %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 4 10 1.00 * vphsubw (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 5 0.50 vpmaddubsw %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 12 0.50 * vpmaddubsw (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 5 0.50 vpmaddwd %ymm0, %ymm1, %ymm2
@@ -778,7 +778,7 @@ vpxor (%rax), %ymm1, %ymm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11]
-# CHECK-NEXT: - - 110.33 104.33 98.00 98.00 2.50 149.33 - 2.50 2.50 2.50
+# CHECK-NEXT: - - 110.33 116.33 98.00 98.00 2.50 137.33 - 2.50 2.50 2.50
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions:
@@ -898,18 +898,18 @@ vpxor (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 1.33 0.33 2.00 2.00 - 1.33 - - - - vpgatherqd %xmm0, (%rax,%ymm1,2), %xmm2
# CHECK-NEXT: - - 1.33 0.33 1.00 1.00 - 1.33 - - - - vpgatherqq %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: - - 1.33 0.33 2.00 2.00 - 1.33 - - - - vpgatherqq %ymm0, (%rax,%ymm1,2), %ymm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - - - vphaddd %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 2.33 - - - - vphaddd (%rax), %ymm1, %ymm2
-# CHECK-NEXT: - - 0.50 0.50 - - - 2.00 - - - - vphaddsw %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 2.00 - - - - vphaddsw (%rax), %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - - - vphaddw %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 2.33 - - - - vphaddw (%rax), %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - - - vphsubd %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 2.33 - - - - vphsubd (%rax), %ymm1, %ymm2
-# CHECK-NEXT: - - 0.50 0.50 - - - 2.00 - - - - vphsubsw %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 2.00 - - - - vphsubsw (%rax), %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - - - vphsubw %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 2.33 - - - - vphsubw (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.33 1.33 - - - 1.33 - - - - vphaddd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.33 1.33 0.50 0.50 - 1.33 - - - - vphaddd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 1.50 - - - 1.00 - - - - vphaddsw %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 1.50 0.50 0.50 - 1.00 - - - - vphaddsw (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.33 1.33 - - - 1.33 - - - - vphaddw %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.33 1.33 0.50 0.50 - 1.33 - - - - vphaddw (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.33 1.33 - - - 1.33 - - - - vphsubd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.33 1.33 0.50 0.50 - 1.33 - - - - vphsubd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 1.50 - - - 1.00 - - - - vphsubsw %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 1.50 0.50 0.50 - 1.00 - - - - vphsubsw (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.33 1.33 - - - 1.33 - - - - vphsubw %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.33 1.33 0.50 0.50 - 1.33 - - - - vphsubw (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - vpmaddubsw %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vpmaddubsw (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - vpmaddwd %ymm0, %ymm1, %ymm2
diff --git a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-sse3.s b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-sse3.s
index 4d19424..0d075a9 100644
--- a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-sse3.s
+++ b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-sse3.s
@@ -81,7 +81,7 @@ mwait
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11]
-# CHECK-NEXT: - - 6.67 7.67 5.00 5.00 - 23.67 4.00 - - -
+# CHECK-NEXT: - - 8.00 9.00 5.00 5.00 - 21.00 4.00 - - -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions:
@@ -89,14 +89,14 @@ mwait
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - addsubpd (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - addsubps %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - addsubps (%rax), %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - - - haddpd %xmm0, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 2.33 - - - - haddpd (%rax), %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - - - haddps %xmm0, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 2.33 - - - - haddps (%rax), %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - - - hsubpd %xmm0, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 2.33 - - - - hsubpd (%rax), %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - - - hsubps %xmm0, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 2.33 - - - - hsubps (%rax), %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - 2.00 - - - - haddpd %xmm0, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 2.00 - - - - haddpd (%rax), %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - 2.00 - - - - haddps %xmm0, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 2.00 - - - - haddps (%rax), %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - 2.00 - - - - hsubpd %xmm0, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 2.00 - - - - hsubpd (%rax), %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - 2.00 - - - - hsubps %xmm0, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 2.00 - - - - hsubps (%rax), %xmm2
# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - lddqu (%rax), %xmm2
# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - - - monitor
# CHECK-NEXT: - - - - - - - 1.00 - - - - movddup %xmm0, %xmm2
diff --git a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-ssse3.s b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-ssse3.s
index 3a6668ce..d034cbd 100644
--- a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-ssse3.s
+++ b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-ssse3.s
@@ -124,28 +124,28 @@ psignw (%rax), %xmm2
# CHECK-NEXT: 2 7 1.00 * palignr $1, (%rax), %xmm2
# CHECK-NEXT: 3 3 2.00 phaddd %mm0, %mm2
# CHECK-NEXT: 4 8 2.00 * phaddd (%rax), %mm2
-# CHECK-NEXT: 3 3 2.00 phaddd %xmm0, %xmm2
-# CHECK-NEXT: 4 9 2.00 * phaddd (%rax), %xmm2
+# CHECK-NEXT: 3 3 1.00 phaddd %xmm0, %xmm2
+# CHECK-NEXT: 4 9 1.00 * phaddd (%rax), %xmm2
# CHECK-NEXT: 3 3 2.00 phaddsw %mm0, %mm2
# CHECK-NEXT: 4 8 2.00 * phaddsw (%rax), %mm2
-# CHECK-NEXT: 3 3 2.00 phaddsw %xmm0, %xmm2
-# CHECK-NEXT: 4 9 2.00 * phaddsw (%rax), %xmm2
+# CHECK-NEXT: 3 3 1.00 phaddsw %xmm0, %xmm2
+# CHECK-NEXT: 4 9 1.00 * phaddsw (%rax), %xmm2
# CHECK-NEXT: 3 3 2.00 phaddw %mm0, %mm2
# CHECK-NEXT: 4 8 2.00 * phaddw (%rax), %mm2
-# CHECK-NEXT: 3 3 2.00 phaddw %xmm0, %xmm2
-# CHECK-NEXT: 4 9 2.00 * phaddw (%rax), %xmm2
+# CHECK-NEXT: 3 3 1.00 phaddw %xmm0, %xmm2
+# CHECK-NEXT: 4 9 1.00 * phaddw (%rax), %xmm2
# CHECK-NEXT: 3 3 2.00 phsubd %mm0, %mm2
# CHECK-NEXT: 4 8 2.00 * phsubd (%rax), %mm2
-# CHECK-NEXT: 3 3 2.00 phsubd %xmm0, %xmm2
-# CHECK-NEXT: 4 9 2.00 * phsubd (%rax), %xmm2
+# CHECK-NEXT: 3 3 1.00 phsubd %xmm0, %xmm2
+# CHECK-NEXT: 4 9 1.00 * phsubd (%rax), %xmm2
# CHECK-NEXT: 3 3 2.00 phsubsw %mm0, %mm2
# CHECK-NEXT: 4 8 2.00 * phsubsw (%rax), %mm2
-# CHECK-NEXT: 3 3 2.00 phsubsw %xmm0, %xmm2
-# CHECK-NEXT: 4 9 2.00 * phsubsw (%rax), %xmm2
+# CHECK-NEXT: 3 3 1.00 phsubsw %xmm0, %xmm2
+# CHECK-NEXT: 4 9 1.00 * phsubsw (%rax), %xmm2
# CHECK-NEXT: 3 3 2.00 phsubw %mm0, %mm2
# CHECK-NEXT: 4 8 2.00 * phsubw (%rax), %mm2
-# CHECK-NEXT: 3 3 2.00 phsubw %xmm0, %xmm2
-# CHECK-NEXT: 4 9 2.00 * phsubw (%rax), %xmm2
+# CHECK-NEXT: 3 3 1.00 phsubw %xmm0, %xmm2
+# CHECK-NEXT: 4 9 1.00 * phsubw (%rax), %xmm2
# CHECK-NEXT: 1 5 1.00 pmaddubsw %mm0, %mm2
# CHECK-NEXT: 2 10 1.00 * pmaddubsw (%rax), %mm2
# CHECK-NEXT: 1 5 0.50 pmaddubsw %xmm0, %xmm2
@@ -187,7 +187,7 @@ psignw (%rax), %xmm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11]
-# CHECK-NEXT: - - 30.67 13.67 16.00 16.00 - 67.67 - - - -
+# CHECK-NEXT: - - 30.67 25.67 16.00 16.00 - 55.67 - - - -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions:
@@ -209,28 +209,28 @@ psignw (%rax), %xmm2
# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - - - palignr $1, (%rax), %xmm2
# CHECK-NEXT: - - 0.50 - - - - 2.50 - - - - phaddd %mm0, %mm2
# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 2.50 - - - - phaddd (%rax), %mm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - - - phaddd %xmm0, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 2.33 - - - - phaddd (%rax), %xmm2
+# CHECK-NEXT: - - 0.33 1.33 - - - 1.33 - - - - phaddd %xmm0, %xmm2
+# CHECK-NEXT: - - 0.33 1.33 0.50 0.50 - 1.33 - - - - phaddd (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - - - - 2.00 - - - - phaddsw %mm0, %mm2
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - 2.00 - - - - phaddsw (%rax), %mm2
-# CHECK-NEXT: - - 0.50 0.50 - - - 2.00 - - - - phaddsw %xmm0, %xmm2
-# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 2.00 - - - - phaddsw (%rax), %xmm2
+# CHECK-NEXT: - - 0.50 1.50 - - - 1.00 - - - - phaddsw %xmm0, %xmm2
+# CHECK-NEXT: - - 0.50 1.50 0.50 0.50 - 1.00 - - - - phaddsw (%rax), %xmm2
# CHECK-NEXT: - - 0.50 - - - - 2.50 - - - - phaddw %mm0, %mm2
# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 2.50 - - - - phaddw (%rax), %mm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - - - phaddw %xmm0, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 2.33 - - - - phaddw (%rax), %xmm2
+# CHECK-NEXT: - - 0.33 1.33 - - - 1.33 - - - - phaddw %xmm0, %xmm2
+# CHECK-NEXT: - - 0.33 1.33 0.50 0.50 - 1.33 - - - - phaddw (%rax), %xmm2
# CHECK-NEXT: - - 0.50 - - - - 2.50 - - - - phsubd %mm0, %mm2
# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 2.50 - - - - phsubd (%rax), %mm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - - - phsubd %xmm0, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 2.33 - - - - phsubd (%rax), %xmm2
+# CHECK-NEXT: - - 0.33 1.33 - - - 1.33 - - - - phsubd %xmm0, %xmm2
+# CHECK-NEXT: - - 0.33 1.33 0.50 0.50 - 1.33 - - - - phsubd (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - - - - 2.00 - - - - phsubsw %mm0, %mm2
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - 2.00 - - - - phsubsw (%rax), %mm2
-# CHECK-NEXT: - - 0.50 0.50 - - - 2.00 - - - - phsubsw %xmm0, %xmm2
-# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 2.00 - - - - phsubsw (%rax), %xmm2
+# CHECK-NEXT: - - 0.50 1.50 - - - 1.00 - - - - phsubsw %xmm0, %xmm2
+# CHECK-NEXT: - - 0.50 1.50 0.50 0.50 - 1.00 - - - - phsubsw (%rax), %xmm2
# CHECK-NEXT: - - 0.50 - - - - 2.50 - - - - phsubw %mm0, %mm2
# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 2.50 - - - - phsubw (%rax), %mm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - - - phsubw %xmm0, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 2.33 - - - - phsubw (%rax), %xmm2
+# CHECK-NEXT: - - 0.33 1.33 - - - 1.33 - - - - phsubw %xmm0, %xmm2
+# CHECK-NEXT: - - 0.33 1.33 0.50 0.50 - 1.33 - - - - phsubw (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - - - - - - - - - pmaddubsw %mm0, %mm2
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - - - pmaddubsw (%rax), %mm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - pmaddubsw %xmm0, %xmm2
diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx1.s
index cabb002..f4904e4 100644
--- a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx1.s
+++ b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx1.s
@@ -1736,7 +1736,7 @@ vzeroupper
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
-# CHECK-NEXT: - 126.00 325.25 202.25 173.83 173.83 38.00 326.25 7.25 11.33
+# CHECK-NEXT: - 126.00 327.92 204.92 173.83 173.83 38.00 320.92 7.25 11.33
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
@@ -1906,22 +1906,22 @@ vzeroupper
# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vextractf128 $1, %ymm0, (%rax)
# CHECK-NEXT: - - 1.00 - - - - 1.00 - - vextractps $1, %xmm0, %ecx
# CHECK-NEXT: - - - - 0.33 0.33 1.00 1.00 - 0.33 vextractps $1, %xmm0, (%rax)
-# CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - vhaddpd %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 2.33 - - vhaddpd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - vhaddpd %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 2.33 - - vhaddpd (%rax), %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - vhaddps %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 2.33 - - vhaddps (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - vhaddps %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 2.33 - - vhaddps (%rax), %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - vhsubpd %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 2.33 - - vhsubpd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - vhsubpd %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 2.33 - - vhsubpd (%rax), %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - vhsubps %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 2.33 - - vhsubps (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - vhsubps %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 2.33 - - vhsubps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - 2.00 - - vhaddpd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 2.00 - - vhaddpd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - 2.00 - - vhaddpd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 2.00 - - vhaddpd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - 2.00 - - vhaddps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 2.00 - - vhaddps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - 2.00 - - vhaddps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 2.00 - - vhaddps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - 2.00 - - vhsubpd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 2.00 - - vhsubpd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - 2.00 - - vhsubpd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 2.00 - - vhsubpd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - 2.00 - - vhsubps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 2.00 - - vhsubps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - 2.00 - - vhsubps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 2.00 - - vhsubps (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - - - - - 1.00 - - vinsertf128 $1, %xmm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vinsertf128 $1, (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - - - - - 1.00 - - vinsertps $1, %xmm0, %xmm1, %xmm2
diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-sse3.s b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-sse3.s
index e6bec19..0b6b035 100644
--- a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-sse3.s
+++ b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-sse3.s
@@ -79,7 +79,7 @@ mwait
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
-# CHECK-NEXT: - - 6.67 6.67 5.00 5.00 - 24.67 4.00 -
+# CHECK-NEXT: - - 8.00 8.00 5.00 5.00 - 22.00 4.00 -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
@@ -87,14 +87,14 @@ mwait
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - addsubpd (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - addsubps %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - addsubps (%rax), %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - haddpd %xmm0, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 2.33 - - haddpd (%rax), %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - haddps %xmm0, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 2.33 - - haddps (%rax), %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - hsubpd %xmm0, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 2.33 - - hsubpd (%rax), %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - hsubps %xmm0, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 2.33 - - hsubps (%rax), %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - 2.00 - - haddpd %xmm0, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 2.00 - - haddpd (%rax), %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - 2.00 - - haddps %xmm0, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 2.00 - - haddps (%rax), %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - 2.00 - - hsubpd %xmm0, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 2.00 - - hsubpd (%rax), %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - 2.00 - - hsubps %xmm0, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 2.00 - - hsubps (%rax), %xmm2
# CHECK-NEXT: - - - - 0.50 0.50 - - - - lddqu (%rax), %xmm2
# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - monitor
# CHECK-NEXT: - - - - - - - 1.00 - - movddup %xmm0, %xmm2
diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/subtarget.ll b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/subtarget.ll
index f1262c5..f79f358 100644
--- a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/subtarget.ll
+++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/subtarget.ll
@@ -28,6 +28,11 @@ define amdgpu_kernel void @test_kernel() {
; RUN: llvm-objdump -D -mllvm --amdhsa-code-object-version=6 %t.o > %t-detect.txt
; RUN: diff %t-specify.txt %t-detect.txt
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1152 -filetype=obj -O0 -o %t.o %s
+; RUN: llvm-objdump -D --arch-name=amdgcn --mcpu=gfx1152 %t.o > %t-specify.txt
+; RUN: llvm-objdump -D %t.o > %t-detect.txt
+; RUN: diff %t-specify.txt %t-detect.txt
+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1151 -filetype=obj -O0 -o %t.o %s
; RUN: llvm-objdump -D --arch-name=amdgcn --mcpu=gfx1151 %t.o > %t-specify.txt
; RUN: llvm-objdump -D %t.o > %t-detect.txt
diff --git a/llvm/test/tools/llvm-rc/windres-prefix.test b/llvm/test/tools/llvm-rc/windres-prefix.test
index 4c53fdf..7dda51d 100644
--- a/llvm/test/tools/llvm-rc/windres-prefix.test
+++ b/llvm/test/tools/llvm-rc/windres-prefix.test
@@ -1,5 +1,3 @@
-; REQUIRES: shell
-
; RUN: rm -rf %t && mkdir %t
; Check that a triple prefix on the executable gets picked up as target triple.
diff --git a/llvm/test/tools/llvm-readobj/ELF/AMDGPU/elf-headers.test b/llvm/test/tools/llvm-readobj/ELF/AMDGPU/elf-headers.test
index f9e90e2..50d437b 100644
--- a/llvm/test/tools/llvm-readobj/ELF/AMDGPU/elf-headers.test
+++ b/llvm/test/tools/llvm-readobj/ELF/AMDGPU/elf-headers.test
@@ -421,6 +421,15 @@
# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1151
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1151 -DFLAG_VALUE=0x4A
+# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1152
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1152 -DFLAG_VALUE=0x55
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1152
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1152 -DFLAG_VALUE=0x55
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1152
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1152 -DFLAG_VALUE=0x55
+
# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1200
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1200 -DFLAG_VALUE=0x48
diff --git a/llvm/test/tools/llvm-reduce/remove-debug-info-nodes.ll b/llvm/test/tools/llvm-reduce/remove-debug-info-nodes.ll
index 1ceeca8..127543c 100644
--- a/llvm/test/tools/llvm-reduce/remove-debug-info-nodes.ll
+++ b/llvm/test/tools/llvm-reduce/remove-debug-info-nodes.ll
@@ -2,7 +2,7 @@
; DICompileUnit and DISuprogram.
;
; RUN: llvm-reduce --delta-passes=di-metadata --abort-on-invalid-reduction --test FileCheck --test-arg --check-prefixes=CHECK-INTERESTINGNESS --test-arg %s --test-arg --input-file %s -o %t
-; RUN: FileCheck <%t --enable-var-scope %s
+; RUN: FileCheck <%t --enable-var-scope %s --implicit-check-not=DIGlobalVariableExpression
; CHECK-INTERESTINGNESS: define void @test() !dbg [[SUBPROG:![0-9]+]]
; CHECK-INTERESTINGNESS: !llvm.module.flags = !{
@@ -21,12 +21,10 @@
; CHECK: !llvm.dbg.cu = !{[[CU:.+]]}
-; CHECK-DAG: [[CU]] = distinct !DICompileUnit(language: DW_LANG_C99,{{.*}}, retainedTypes: [[TYPES:![0-9]+]], globals: [[GLOBALS:![0-9]+]]
-; CHECK-DAG: [[EMPTY:![0-9]+]] = !{}
+; CHECK-DAG: [[CU]] = distinct !DICompileUnit(language: DW_LANG_C99,{{.*}}, retainedTypes: [[TYPES:![0-9]+]], globals: [[EMPTY:![0-9]+]]
+; CHECK-DAG: [[EMPTY]] = !{}
; CHECK-DAG: [[TYPES]] = !{[[T0:![0-9]+]]
; CHECK-DAG: [[T0]] = !DIBasicType(name: "unsigned int",
-; CHECK-DAG: [[GLOBALS]] = !{{{![0-9]+}}
-
; CHECK-DAG: [[SUBPROG]] = distinct !DISubprogram(name: "test", {{.*}}retainedNodes: [[EMPTY]])
define void @test() !dbg !17 {
diff --git a/llvm/test/tools/split-file/output-is-special.test b/llvm/test/tools/split-file/output-is-special.test
index 98bb4d3..0b1e0f7 100644
--- a/llvm/test/tools/split-file/output-is-special.test
+++ b/llvm/test/tools/split-file/output-is-special.test
@@ -1,5 +1,4 @@
# UNSUPPORTED: system-windows
-# REQUIRES: shell
## Don't delete the output if it is special, otherwise root may accidentally
## remove important special files.
diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp
index 40494da..c696934 100644
--- a/llvm/tools/llvm-readobj/ELFDumper.cpp
+++ b/llvm/tools/llvm-readobj/ELFDumper.cpp
@@ -1631,6 +1631,7 @@ const EnumEntry<unsigned> ElfHeaderMipsFlags[] = {
ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1103, "gfx1103"), \
ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1150, "gfx1150"), \
ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1151, "gfx1151"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1152, "gfx1152"), \
ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1200, "gfx1200"), \
ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1201, "gfx1201"), \
ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC, "gfx9-generic"), \
diff --git a/llvm/tools/llvm-reduce/deltas/ReduceDIMetadata.cpp b/llvm/tools/llvm-reduce/deltas/ReduceDIMetadata.cpp
index f4d8496..38352d634 100644
--- a/llvm/tools/llvm-reduce/deltas/ReduceDIMetadata.cpp
+++ b/llvm/tools/llvm-reduce/deltas/ReduceDIMetadata.cpp
@@ -65,12 +65,13 @@ void identifyUninterestingMDNodes(Oracle &O, MDNodeList &MDs) {
SmallVector<Metadata *, 16> TN;
for (size_t I = 0; I < Tup->getNumOperands(); ++I) {
// Ignore any operands that are not DebugInfo metadata nodes.
- if (isa_and_nonnull<DINode>(Tup->getOperand(I)))
- // Don't add uninteresting operands to the tuple.
- if (!O.shouldKeep())
- continue;
-
- TN.push_back(Tup->getOperand(I));
+ if (Metadata *Op = Tup->getOperand(I).get()) {
+ if (isa<DINode>(Op) || isa<DIGlobalVariableExpression>(Op))
+ // Don't add uninteresting operands to the tuple.
+ if (!O.shouldKeep())
+ continue;
+ TN.push_back(Op);
+ }
}
if (TN.size() != Tup->getNumOperands())
DbgNode->replaceOperandWith(OpIdx, DbgNode->get(DbgNode->getContext(), TN));
diff --git a/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp b/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp
index 7148e29..ca50187 100644
--- a/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp
+++ b/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp
@@ -63,6 +63,11 @@ TEST(DataLayoutUpgradeTest, ValidDataLayoutUpgrade) {
"riscv64"),
"e-m:e-p:64:64-i64:64-i128:128-n32:64-S128");
+ // Check that LoongArch64 upgrades -n64 to -n32:64.
+ EXPECT_EQ(UpgradeDataLayoutString("e-m:e-p:64:64-i64:64-i128:128-n64-S128",
+ "loongarch64"),
+ "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128");
+
// Check that SPIR && SPIRV targets add -G1 if it's not present.
EXPECT_EQ(UpgradeDataLayoutString("e-p:32:32", "spir"), "e-p:32:32-G1");
EXPECT_EQ(UpgradeDataLayoutString("e-p:32:32", "spir64"), "e-p:32:32-G1");
diff --git a/llvm/unittests/Support/VirtualFileSystemTest.cpp b/llvm/unittests/Support/VirtualFileSystemTest.cpp
index e9fd967..9e9b4fb 100644
--- a/llvm/unittests/Support/VirtualFileSystemTest.cpp
+++ b/llvm/unittests/Support/VirtualFileSystemTest.cpp
@@ -1138,6 +1138,11 @@ TEST_F(InMemoryFileSystemTest, DuplicatedFile) {
ASSERT_FALSE(FS.addFile("/a/b", 0, MemoryBuffer::getMemBuffer("a")));
ASSERT_TRUE(FS.addFile("/a", 0, MemoryBuffer::getMemBuffer("a")));
ASSERT_FALSE(FS.addFile("/a", 0, MemoryBuffer::getMemBuffer("b")));
+ ASSERT_TRUE(FS.addFile("/b/c/d", 0, MemoryBuffer::getMemBuffer("a")));
+ ASSERT_FALSE(FS.addFile("/b/c", 0, MemoryBuffer::getMemBuffer("a")));
+ ASSERT_TRUE(FS.addFile(
+ "/b/c", 0, MemoryBuffer::getMemBuffer(""), /*User=*/std::nullopt,
+ /*Group=*/std::nullopt, sys::fs::file_type::directory_file));
}
TEST_F(InMemoryFileSystemTest, DirectoryIteration) {
diff --git a/llvm/unittests/TargetParser/Host.cpp b/llvm/unittests/TargetParser/Host.cpp
index 6aa1d7a..61921a9 100644
--- a/llvm/unittests/TargetParser/Host.cpp
+++ b/llvm/unittests/TargetParser/Host.cpp
@@ -125,6 +125,9 @@ TEST(getLinuxHostCPUName, AArch64) {
EXPECT_EQ(sys::detail::getHostCPUNameForARM("CPU implementer : 0xc0\n"
"CPU part : 0xac5"),
"ampere1b");
+ EXPECT_EQ(sys::detail::getHostCPUNameForARM("CPU implementer : 0x51\n"
+ "CPU part : 0x001"),
+ "oryon-1");
// MSM8992/4 weirdness
StringRef MSM8992ProcCpuInfo = R"(
diff --git a/llvm/unittests/TargetParser/TargetParserTest.cpp b/llvm/unittests/TargetParser/TargetParserTest.cpp
index 797d7df..571031d 100644
--- a/llvm/unittests/TargetParser/TargetParserTest.cpp
+++ b/llvm/unittests/TargetParser/TargetParserTest.cpp
@@ -1815,11 +1815,23 @@ INSTANTIATE_TEST_SUITE_P(
{AArch64::AEK_CRC, AArch64::AEK_AES, AArch64::AEK_SHA2,
AArch64::AEK_FP, AArch64::AEK_SIMD, AArch64::AEK_FP16,
AArch64::AEK_RAS, AArch64::AEK_LSE, AArch64::AEK_RDM}),
- "8.2-A")),
+ "8.2-A"),
+ ARMCPUTestParams<AArch64::ExtensionBitset>(
+ "oryon-1", "armv8.6-a", "crypto-neon-fp-armv8",
+ (AArch64::ExtensionBitset(
+ {AArch64::AEK_CRC, AArch64::AEK_FP, AArch64::AEK_PAUTH,
+ AArch64::AEK_FCMA, AArch64::AEK_JSCVT, AArch64::AEK_SIMD,
+ AArch64::AEK_RAS, AArch64::AEK_LSE, AArch64::AEK_RDM,
+ AArch64::AEK_RCPC, AArch64::AEK_DOTPROD, AArch64::AEK_SM4,
+ AArch64::AEK_SHA3, AArch64::AEK_BF16, AArch64::AEK_SHA2,
+ AArch64::AEK_AES, AArch64::AEK_I8MM, AArch64::AEK_RAND,
+ AArch64::AEK_PROFILE, AArch64::AEK_CRYPTO})),
+ "8.6-A")),
+
ARMCPUTestParams<AArch64::ExtensionBitset>::PrintToStringParamName);
// Note: number of CPUs includes aliases.
-static constexpr unsigned NumAArch64CPUArchs = 76;
+static constexpr unsigned NumAArch64CPUArchs = 77;
TEST(TargetParserTest, testAArch64CPUArchList) {
SmallVector<StringRef, NumAArch64CPUArchs> List;
diff --git a/llvm/utils/gn/secondary/bolt/lib/Core/BUILD.gn b/llvm/utils/gn/secondary/bolt/lib/Core/BUILD.gn
index 210dd12..e88df02 100644
--- a/llvm/utils/gn/secondary/bolt/lib/Core/BUILD.gn
+++ b/llvm/utils/gn/secondary/bolt/lib/Core/BUILD.gn
@@ -29,6 +29,7 @@ static_library("Core") {
"DynoStats.cpp",
"Exceptions.cpp",
"FunctionLayout.cpp",
+ "GDBIndex.cpp",
"HashUtilities.cpp",
"JumpTable.cpp",
"MCPlusBuilder.cpp",
diff --git a/llvm/utils/gn/secondary/clang/lib/AST/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/AST/BUILD.gn
index 0cf9925..d3a3ee75 100644
--- a/llvm/utils/gn/secondary/clang/lib/AST/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang/lib/AST/BUILD.gn
@@ -110,6 +110,7 @@ static_library("AST") {
"Interp/InterpShared.cpp",
"Interp/InterpStack.cpp",
"Interp/InterpState.cpp",
+ "Interp/MemberPointer.cpp",
"Interp/Pointer.cpp",
"Interp/PrimType.cpp",
"Interp/Program.cpp",
diff --git a/llvm/utils/gn/secondary/llvm/lib/Transforms/IPO/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Transforms/IPO/BUILD.gn
index 0d134c7..bcf2ea7 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Transforms/IPO/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Transforms/IPO/BUILD.gn
@@ -33,6 +33,7 @@ static_library("IPO") {
"DeadArgumentElimination.cpp",
"ElimAvailExtern.cpp",
"EmbedBitcodePass.cpp",
+ "ExpandVariadics.cpp",
"ExtractGV.cpp",
"ForceFunctionAttrs.cpp",
"FunctionAttrs.cpp",
diff --git a/llvm/utils/lit/lit/llvm/config.py b/llvm/utils/lit/lit/llvm/config.py
index 1d4babc..afb7f07 100644
--- a/llvm/utils/lit/lit/llvm/config.py
+++ b/llvm/utils/lit/lit/llvm/config.py
@@ -588,7 +588,10 @@ class LLVMConfig(object):
if getattr(self.config, pp, None)
]
- self.with_environment("LD_LIBRARY_PATH", lib_paths, append_path=True)
+ if platform.system() == "AIX":
+ self.with_environment("LIBPATH", lib_paths, append_path=True)
+ else:
+ self.with_environment("LD_LIBRARY_PATH", lib_paths, append_path=True)
shl = getattr(self.config, "llvm_shlib_dir", None)
pext = getattr(self.config, "llvm_plugin_ext", None)
diff --git a/mlir/include/mlir-c/IR.h b/mlir/include/mlir-c/IR.h
index 32abacf..e3d69b7 100644
--- a/mlir/include/mlir-c/IR.h
+++ b/mlir/include/mlir-c/IR.h
@@ -858,6 +858,9 @@ MLIR_CAPI_EXPORTED MlirValue mlirBlockAddArgument(MlirBlock block,
MlirType type,
MlirLocation loc);
+/// Erase the argument at 'index' and remove it from the argument list.
+MLIR_CAPI_EXPORTED void mlirBlockEraseArgument(MlirBlock block, unsigned index);
+
/// Inserts an argument of the specified type at a specified index to the block.
/// Returns the newly added argument.
MLIR_CAPI_EXPORTED MlirValue mlirBlockInsertArgument(MlirBlock block,
diff --git a/mlir/include/mlir/Dialect/LLVMIR/FunctionCallUtils.h b/mlir/include/mlir/Dialect/LLVMIR/FunctionCallUtils.h
index 123ce36..852490c 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/FunctionCallUtils.h
+++ b/mlir/include/mlir/Dialect/LLVMIR/FunctionCallUtils.h
@@ -33,36 +33,36 @@ class LLVMFuncOp;
/// external C function calls. The list of functions provided here must be
/// implemented separately (e.g. as part of a support runtime library or as part
/// of the libc).
-LLVM::LLVMFuncOp lookupOrCreatePrintI64Fn(ModuleOp moduleOp);
-LLVM::LLVMFuncOp lookupOrCreatePrintU64Fn(ModuleOp moduleOp);
-LLVM::LLVMFuncOp lookupOrCreatePrintF16Fn(ModuleOp moduleOp);
-LLVM::LLVMFuncOp lookupOrCreatePrintBF16Fn(ModuleOp moduleOp);
-LLVM::LLVMFuncOp lookupOrCreatePrintF32Fn(ModuleOp moduleOp);
-LLVM::LLVMFuncOp lookupOrCreatePrintF64Fn(ModuleOp moduleOp);
+LLVM::LLVMFuncOp lookupOrCreatePrintI64Fn(Operation *moduleOp);
+LLVM::LLVMFuncOp lookupOrCreatePrintU64Fn(Operation *moduleOp);
+LLVM::LLVMFuncOp lookupOrCreatePrintF16Fn(Operation *moduleOp);
+LLVM::LLVMFuncOp lookupOrCreatePrintBF16Fn(Operation *moduleOp);
+LLVM::LLVMFuncOp lookupOrCreatePrintF32Fn(Operation *moduleOp);
+LLVM::LLVMFuncOp lookupOrCreatePrintF64Fn(Operation *moduleOp);
/// Declares a function to print a C-string.
/// If a custom runtime function is defined via `runtimeFunctionName`, it must
/// have the signature void(char const*). The default function is `printString`.
LLVM::LLVMFuncOp
-lookupOrCreatePrintStringFn(ModuleOp moduleOp,
+lookupOrCreatePrintStringFn(Operation *moduleOp,
std::optional<StringRef> runtimeFunctionName = {});
-LLVM::LLVMFuncOp lookupOrCreatePrintOpenFn(ModuleOp moduleOp);
-LLVM::LLVMFuncOp lookupOrCreatePrintCloseFn(ModuleOp moduleOp);
-LLVM::LLVMFuncOp lookupOrCreatePrintCommaFn(ModuleOp moduleOp);
-LLVM::LLVMFuncOp lookupOrCreatePrintNewlineFn(ModuleOp moduleOp);
-LLVM::LLVMFuncOp lookupOrCreateMallocFn(ModuleOp moduleOp, Type indexType);
-LLVM::LLVMFuncOp lookupOrCreateAlignedAllocFn(ModuleOp moduleOp,
+LLVM::LLVMFuncOp lookupOrCreatePrintOpenFn(Operation *moduleOp);
+LLVM::LLVMFuncOp lookupOrCreatePrintCloseFn(Operation *moduleOp);
+LLVM::LLVMFuncOp lookupOrCreatePrintCommaFn(Operation *moduleOp);
+LLVM::LLVMFuncOp lookupOrCreatePrintNewlineFn(Operation *moduleOp);
+LLVM::LLVMFuncOp lookupOrCreateMallocFn(Operation *moduleOp, Type indexType);
+LLVM::LLVMFuncOp lookupOrCreateAlignedAllocFn(Operation *moduleOp,
Type indexType);
-LLVM::LLVMFuncOp lookupOrCreateFreeFn(ModuleOp moduleOp);
-LLVM::LLVMFuncOp lookupOrCreateGenericAllocFn(ModuleOp moduleOp,
+LLVM::LLVMFuncOp lookupOrCreateFreeFn(Operation *moduleOp);
+LLVM::LLVMFuncOp lookupOrCreateGenericAllocFn(Operation *moduleOp,
Type indexType);
-LLVM::LLVMFuncOp lookupOrCreateGenericAlignedAllocFn(ModuleOp moduleOp,
+LLVM::LLVMFuncOp lookupOrCreateGenericAlignedAllocFn(Operation *moduleOp,
Type indexType);
-LLVM::LLVMFuncOp lookupOrCreateGenericFreeFn(ModuleOp moduleOp);
-LLVM::LLVMFuncOp lookupOrCreateMemRefCopyFn(ModuleOp moduleOp, Type indexType,
+LLVM::LLVMFuncOp lookupOrCreateGenericFreeFn(Operation *moduleOp);
+LLVM::LLVMFuncOp lookupOrCreateMemRefCopyFn(Operation *moduleOp, Type indexType,
Type unrankedDescriptorType);
/// Create a FuncOp with signature `resultType`(`paramTypes`)` and name `name`.
-LLVM::LLVMFuncOp lookupOrCreateFn(ModuleOp moduleOp, StringRef name,
+LLVM::LLVMFuncOp lookupOrCreateFn(Operation *moduleOp, StringRef name,
ArrayRef<Type> paramTypes = {},
Type resultType = {}, bool isVarArg = false);
diff --git a/mlir/include/mlir/Dialect/Tensor/TransformOps/TensorTransformOps.td b/mlir/include/mlir/Dialect/Tensor/TransformOps/TensorTransformOps.td
index fea5afa..81bab1b 100644
--- a/mlir/include/mlir/Dialect/Tensor/TransformOps/TensorTransformOps.td
+++ b/mlir/include/mlir/Dialect/Tensor/TransformOps/TensorTransformOps.td
@@ -114,12 +114,14 @@ def ApplyReassociativeReshapeFoldingPatternsOp : Op<Transform_Dialect,
def ApplyRewriteTensorOpsAsConstantPatternsOp : Op<Transform_Dialect,
"apply_patterns.tensor.rewrite_as_constant",
[DeclareOpInterfaceMethods<PatternDescriptorOpInterface>]> {
+ let arguments = (ins UnitAttr:$aggressive);
let description = [{
Indicates that tensor ops (such as tensor.generate) should be replaced with
constants (arith.constant) when possible.
}];
- let assemblyFormat = "attr-dict";
+ let assemblyFormat =
+ "(`aggressive` $aggressive^)? attr-dict";
}
def Transform_TensorPadOp : Transform_ConcreteOpType<"tensor.pad">;
diff --git a/mlir/include/mlir/Dialect/Tensor/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Tensor/Transforms/Transforms.h
index 7dabc26..7f983b8 100644
--- a/mlir/include/mlir/Dialect/Tensor/Transforms/Transforms.h
+++ b/mlir/include/mlir/Dialect/Tensor/Transforms/Transforms.h
@@ -91,9 +91,12 @@ void populateSimplifyPackAndUnpackPatterns(RewritePatternSet &patterns);
/// respectively.
void populateFoldIntoPackAndUnpackPatterns(RewritePatternSet &patterns);
+using ControlFoldFn = std::function<bool(OpOperand *)>;
+
/// Populates `patterns` with patterns that replace tensor ops (such as
/// tensor.generate) with constants when possible.
-void populateRewriteAsConstantPatterns(RewritePatternSet &patterns);
+void populateRewriteAsConstantPatterns(RewritePatternSet &patterns,
+ const ControlFoldFn &controlFn);
//===----------------------------------------------------------------------===//
// Transform helpers
diff --git a/mlir/include/mlir/Target/LLVMIR/Export.h b/mlir/include/mlir/Target/LLVMIR/Export.h
index 2244968..893aaaa 100644
--- a/mlir/include/mlir/Target/LLVMIR/Export.h
+++ b/mlir/include/mlir/Target/LLVMIR/Export.h
@@ -20,10 +20,11 @@ class Module;
namespace mlir {
class Operation;
-/// Translate operation that satisfies LLVM dialect module requirements into an
-/// LLVM IR module living in the given context. This translates operations from
-/// any dilalect that has a registered implementation of
-/// LLVMTranslationDialectInterface.
+/// Translates a given LLVM dialect `module` into an LLVM IR module living in
+/// the given context. Operates on any operation from dialects that provide a
+/// registered implementation of the LLVMTranslationDialectInterface. Returns
+/// nullptr when the translation fails.
+/// Verifies the produced LLVM module, except when `disableVerification` is set.
std::unique_ptr<llvm::Module>
translateModuleToLLVMIR(Operation *module, llvm::LLVMContext &llvmContext,
llvm::StringRef name = "LLVMDialectModule",
diff --git a/mlir/lib/Bindings/Python/IRCore.cpp b/mlir/lib/Bindings/Python/IRCore.cpp
index de20632..4b6b54d 100644
--- a/mlir/lib/Bindings/Python/IRCore.cpp
+++ b/mlir/lib/Bindings/Python/IRCore.cpp
@@ -3238,6 +3238,19 @@ void mlir::python::populateIRCore(py::module &m) {
return PyBlockArgumentList(self.getParentOperation(), self.get());
},
"Returns a list of block arguments.")
+ .def(
+ "add_argument",
+ [](PyBlock &self, const PyType &type, const PyLocation &loc) {
+ return mlirBlockAddArgument(self.get(), type, loc);
+ },
+ "Append an argument of the specified type to the block and returns "
+ "the newly added argument.")
+ .def(
+ "erase_argument",
+ [](PyBlock &self, unsigned index) {
+ return mlirBlockEraseArgument(self.get(), index);
+ },
+ "Erase the argument at 'index' and remove it from the argument list.")
.def_property_readonly(
"operations",
[](PyBlock &self) {
diff --git a/mlir/lib/CAPI/IR/IR.cpp b/mlir/lib/CAPI/IR/IR.cpp
index a72cd24..4e823c8 100644
--- a/mlir/lib/CAPI/IR/IR.cpp
+++ b/mlir/lib/CAPI/IR/IR.cpp
@@ -906,6 +906,10 @@ MlirValue mlirBlockAddArgument(MlirBlock block, MlirType type,
return wrap(unwrap(block)->addArgument(unwrap(type), unwrap(loc)));
}
+void mlirBlockEraseArgument(MlirBlock block, unsigned index) {
+ return unwrap(block)->eraseArgument(index);
+}
+
MlirValue mlirBlockInsertArgument(MlirBlock block, intptr_t pos, MlirType type,
MlirLocation loc) {
return wrap(unwrap(block)->insertArgument(pos, unwrap(type), unwrap(loc)));
diff --git a/mlir/lib/Conversion/MemRefToLLVM/AllocLikeConversion.cpp b/mlir/lib/Conversion/MemRefToLLVM/AllocLikeConversion.cpp
index b29abc9..e48ca51 100644
--- a/mlir/lib/Conversion/MemRefToLLVM/AllocLikeConversion.cpp
+++ b/mlir/lib/Conversion/MemRefToLLVM/AllocLikeConversion.cpp
@@ -10,18 +10,14 @@
#include "mlir/Analysis/DataLayoutAnalysis.h"
#include "mlir/Dialect/LLVMIR/FunctionCallUtils.h"
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
+#include "mlir/IR/SymbolTable.h"
using namespace mlir;
namespace {
-// TODO: Fix the LLVM utilities for looking up functions to take Operation*
-// with SymbolTable trait instead of ModuleOp and make similar change here. This
-// allows call sites to use getParentWithTrait<OpTrait::SymbolTable> instead
-// of getParentOfType<ModuleOp> to pass down the operation.
LLVM::LLVMFuncOp getNotalignedAllocFn(const LLVMTypeConverter *typeConverter,
- ModuleOp module, Type indexType) {
+ Operation *module, Type indexType) {
bool useGenericFn = typeConverter->getOptions().useGenericFunctions;
-
if (useGenericFn)
return LLVM::lookupOrCreateGenericAllocFn(module, indexType);
@@ -29,7 +25,7 @@ LLVM::LLVMFuncOp getNotalignedAllocFn(const LLVMTypeConverter *typeConverter,
}
LLVM::LLVMFuncOp getAlignedAllocFn(const LLVMTypeConverter *typeConverter,
- ModuleOp module, Type indexType) {
+ Operation *module, Type indexType) {
bool useGenericFn = typeConverter->getOptions().useGenericFunctions;
if (useGenericFn)
@@ -79,7 +75,8 @@ std::tuple<Value, Value> AllocationOpLLVMLowering::allocateBufferManuallyAlign(
// Allocate the underlying buffer.
Type elementPtrType = this->getElementPtrType(memRefType);
LLVM::LLVMFuncOp allocFuncOp = getNotalignedAllocFn(
- getTypeConverter(), op->getParentOfType<ModuleOp>(), getIndexType());
+ getTypeConverter(), op->getParentWithTrait<OpTrait::SymbolTable>(),
+ getIndexType());
auto results = rewriter.create<LLVM::CallOp>(loc, allocFuncOp, sizeBytes);
Value allocatedPtr =
@@ -144,7 +141,8 @@ Value AllocationOpLLVMLowering::allocateBufferAutoAlign(
Type elementPtrType = this->getElementPtrType(memRefType);
LLVM::LLVMFuncOp allocFuncOp = getAlignedAllocFn(
- getTypeConverter(), op->getParentOfType<ModuleOp>(), getIndexType());
+ getTypeConverter(), op->getParentWithTrait<OpTrait::SymbolTable>(),
+ getIndexType());
auto results = rewriter.create<LLVM::CallOp>(
loc, allocFuncOp, ValueRange({allocAlignment, sizeBytes}));
diff --git a/mlir/lib/Dialect/LLVMIR/IR/FunctionCallUtils.cpp b/mlir/lib/Dialect/LLVMIR/IR/FunctionCallUtils.cpp
index 0004c2e..88421a1 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/FunctionCallUtils.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/FunctionCallUtils.cpp
@@ -45,49 +45,53 @@ static constexpr llvm::StringRef kGenericFree = "_mlir_memref_to_llvm_free";
static constexpr llvm::StringRef kMemRefCopy = "memrefCopy";
/// Generic print function lookupOrCreate helper.
-LLVM::LLVMFuncOp mlir::LLVM::lookupOrCreateFn(ModuleOp moduleOp, StringRef name,
+LLVM::LLVMFuncOp mlir::LLVM::lookupOrCreateFn(Operation *moduleOp,
+ StringRef name,
ArrayRef<Type> paramTypes,
Type resultType, bool isVarArg) {
- auto func = moduleOp.lookupSymbol<LLVM::LLVMFuncOp>(name);
+ assert(moduleOp->hasTrait<OpTrait::SymbolTable>() &&
+ "expected SymbolTable operation");
+ auto func = llvm::dyn_cast_or_null<LLVM::LLVMFuncOp>(
+ SymbolTable::lookupSymbolIn(moduleOp, name));
if (func)
return func;
- OpBuilder b(moduleOp.getBodyRegion());
+ OpBuilder b(moduleOp->getRegion(0));
return b.create<LLVM::LLVMFuncOp>(
moduleOp->getLoc(), name,
LLVM::LLVMFunctionType::get(resultType, paramTypes, isVarArg));
}
-LLVM::LLVMFuncOp mlir::LLVM::lookupOrCreatePrintI64Fn(ModuleOp moduleOp) {
+LLVM::LLVMFuncOp mlir::LLVM::lookupOrCreatePrintI64Fn(Operation *moduleOp) {
return lookupOrCreateFn(moduleOp, kPrintI64,
IntegerType::get(moduleOp->getContext(), 64),
LLVM::LLVMVoidType::get(moduleOp->getContext()));
}
-LLVM::LLVMFuncOp mlir::LLVM::lookupOrCreatePrintU64Fn(ModuleOp moduleOp) {
+LLVM::LLVMFuncOp mlir::LLVM::lookupOrCreatePrintU64Fn(Operation *moduleOp) {
return lookupOrCreateFn(moduleOp, kPrintU64,
IntegerType::get(moduleOp->getContext(), 64),
LLVM::LLVMVoidType::get(moduleOp->getContext()));
}
-LLVM::LLVMFuncOp mlir::LLVM::lookupOrCreatePrintF16Fn(ModuleOp moduleOp) {
+LLVM::LLVMFuncOp mlir::LLVM::lookupOrCreatePrintF16Fn(Operation *moduleOp) {
return lookupOrCreateFn(moduleOp, kPrintF16,
IntegerType::get(moduleOp->getContext(), 16), // bits!
LLVM::LLVMVoidType::get(moduleOp->getContext()));
}
-LLVM::LLVMFuncOp mlir::LLVM::lookupOrCreatePrintBF16Fn(ModuleOp moduleOp) {
+LLVM::LLVMFuncOp mlir::LLVM::lookupOrCreatePrintBF16Fn(Operation *moduleOp) {
return lookupOrCreateFn(moduleOp, kPrintBF16,
IntegerType::get(moduleOp->getContext(), 16), // bits!
LLVM::LLVMVoidType::get(moduleOp->getContext()));
}
-LLVM::LLVMFuncOp mlir::LLVM::lookupOrCreatePrintF32Fn(ModuleOp moduleOp) {
+LLVM::LLVMFuncOp mlir::LLVM::lookupOrCreatePrintF32Fn(Operation *moduleOp) {
return lookupOrCreateFn(moduleOp, kPrintF32,
Float32Type::get(moduleOp->getContext()),
LLVM::LLVMVoidType::get(moduleOp->getContext()));
}
-LLVM::LLVMFuncOp mlir::LLVM::lookupOrCreatePrintF64Fn(ModuleOp moduleOp) {
+LLVM::LLVMFuncOp mlir::LLVM::lookupOrCreatePrintF64Fn(Operation *moduleOp) {
return lookupOrCreateFn(moduleOp, kPrintF64,
Float64Type::get(moduleOp->getContext()),
LLVM::LLVMVoidType::get(moduleOp->getContext()));
@@ -103,72 +107,72 @@ static LLVM::LLVMPointerType getVoidPtr(MLIRContext *context) {
}
LLVM::LLVMFuncOp mlir::LLVM::lookupOrCreatePrintStringFn(
- ModuleOp moduleOp, std::optional<StringRef> runtimeFunctionName) {
+ Operation *moduleOp, std::optional<StringRef> runtimeFunctionName) {
return lookupOrCreateFn(moduleOp, runtimeFunctionName.value_or(kPrintString),
getCharPtr(moduleOp->getContext()),
LLVM::LLVMVoidType::get(moduleOp->getContext()));
}
-LLVM::LLVMFuncOp mlir::LLVM::lookupOrCreatePrintOpenFn(ModuleOp moduleOp) {
+LLVM::LLVMFuncOp mlir::LLVM::lookupOrCreatePrintOpenFn(Operation *moduleOp) {
return lookupOrCreateFn(moduleOp, kPrintOpen, {},
LLVM::LLVMVoidType::get(moduleOp->getContext()));
}
-LLVM::LLVMFuncOp mlir::LLVM::lookupOrCreatePrintCloseFn(ModuleOp moduleOp) {
+LLVM::LLVMFuncOp mlir::LLVM::lookupOrCreatePrintCloseFn(Operation *moduleOp) {
return lookupOrCreateFn(moduleOp, kPrintClose, {},
LLVM::LLVMVoidType::get(moduleOp->getContext()));
}
-LLVM::LLVMFuncOp mlir::LLVM::lookupOrCreatePrintCommaFn(ModuleOp moduleOp) {
+LLVM::LLVMFuncOp mlir::LLVM::lookupOrCreatePrintCommaFn(Operation *moduleOp) {
return lookupOrCreateFn(moduleOp, kPrintComma, {},
LLVM::LLVMVoidType::get(moduleOp->getContext()));
}
-LLVM::LLVMFuncOp mlir::LLVM::lookupOrCreatePrintNewlineFn(ModuleOp moduleOp) {
+LLVM::LLVMFuncOp mlir::LLVM::lookupOrCreatePrintNewlineFn(Operation *moduleOp) {
return lookupOrCreateFn(moduleOp, kPrintNewline, {},
LLVM::LLVMVoidType::get(moduleOp->getContext()));
}
-LLVM::LLVMFuncOp mlir::LLVM::lookupOrCreateMallocFn(ModuleOp moduleOp,
+LLVM::LLVMFuncOp mlir::LLVM::lookupOrCreateMallocFn(Operation *moduleOp,
Type indexType) {
return LLVM::lookupOrCreateFn(moduleOp, kMalloc, indexType,
getVoidPtr(moduleOp->getContext()));
}
-LLVM::LLVMFuncOp mlir::LLVM::lookupOrCreateAlignedAllocFn(ModuleOp moduleOp,
+LLVM::LLVMFuncOp mlir::LLVM::lookupOrCreateAlignedAllocFn(Operation *moduleOp,
Type indexType) {
return LLVM::lookupOrCreateFn(moduleOp, kAlignedAlloc, {indexType, indexType},
getVoidPtr(moduleOp->getContext()));
}
-LLVM::LLVMFuncOp mlir::LLVM::lookupOrCreateFreeFn(ModuleOp moduleOp) {
+LLVM::LLVMFuncOp mlir::LLVM::lookupOrCreateFreeFn(Operation *moduleOp) {
return LLVM::lookupOrCreateFn(
moduleOp, kFree, getVoidPtr(moduleOp->getContext()),
LLVM::LLVMVoidType::get(moduleOp->getContext()));
}
-LLVM::LLVMFuncOp mlir::LLVM::lookupOrCreateGenericAllocFn(ModuleOp moduleOp,
+LLVM::LLVMFuncOp mlir::LLVM::lookupOrCreateGenericAllocFn(Operation *moduleOp,
Type indexType) {
return LLVM::lookupOrCreateFn(moduleOp, kGenericAlloc, indexType,
getVoidPtr(moduleOp->getContext()));
}
LLVM::LLVMFuncOp
-mlir::LLVM::lookupOrCreateGenericAlignedAllocFn(ModuleOp moduleOp,
+mlir::LLVM::lookupOrCreateGenericAlignedAllocFn(Operation *moduleOp,
Type indexType) {
return LLVM::lookupOrCreateFn(moduleOp, kGenericAlignedAlloc,
{indexType, indexType},
getVoidPtr(moduleOp->getContext()));
}
-LLVM::LLVMFuncOp mlir::LLVM::lookupOrCreateGenericFreeFn(ModuleOp moduleOp) {
+LLVM::LLVMFuncOp mlir::LLVM::lookupOrCreateGenericFreeFn(Operation *moduleOp) {
return LLVM::lookupOrCreateFn(
moduleOp, kGenericFree, getVoidPtr(moduleOp->getContext()),
LLVM::LLVMVoidType::get(moduleOp->getContext()));
}
LLVM::LLVMFuncOp
-mlir::LLVM::lookupOrCreateMemRefCopyFn(ModuleOp moduleOp, Type indexType,
+mlir::LLVM::lookupOrCreateMemRefCopyFn(Operation *moduleOp, Type indexType,
Type unrankedDescriptorType) {
return LLVM::lookupOrCreateFn(
moduleOp, kMemRefCopy,
diff --git a/mlir/lib/Dialect/Tensor/TransformOps/TensorTransformOps.cpp b/mlir/lib/Dialect/Tensor/TransformOps/TensorTransformOps.cpp
index 5c6a32c..33016f8 100644
--- a/mlir/lib/Dialect/Tensor/TransformOps/TensorTransformOps.cpp
+++ b/mlir/lib/Dialect/Tensor/TransformOps/TensorTransformOps.cpp
@@ -127,7 +127,20 @@ void transform::ApplyReassociativeReshapeFoldingPatternsOp::populatePatterns(
void transform::ApplyRewriteTensorOpsAsConstantPatternsOp::populatePatterns(
RewritePatternSet &patterns) {
- tensor::populateRewriteAsConstantPatterns(patterns);
+ ControlFoldFn defaultControlFn = [](OpOperand *fusedOperand) {
+ Operation *producer = fusedOperand->get().getDefiningOp();
+ return producer && producer->hasOneUse();
+ };
+
+ ControlFoldFn aggressiveControlFn = [](OpOperand *fusedOperand) {
+ return true;
+ };
+
+ // Add folding with reshape by expansion patterns.
+ if (getAggressive())
+ tensor::populateRewriteAsConstantPatterns(patterns, aggressiveControlFn);
+ else
+ tensor::populateRewriteAsConstantPatterns(patterns, defaultControlFn);
}
//===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Dialect/Tensor/Transforms/PackAndUnpackPatterns.cpp b/mlir/lib/Dialect/Tensor/Transforms/PackAndUnpackPatterns.cpp
index 5d6e3ec..c681cad 100644
--- a/mlir/lib/Dialect/Tensor/Transforms/PackAndUnpackPatterns.cpp
+++ b/mlir/lib/Dialect/Tensor/Transforms/PackAndUnpackPatterns.cpp
@@ -48,6 +48,34 @@ static LogicalResult isPackOn1D(RewriterBase &rewriter, Operation *op,
return success();
}
+// If the `linalgOp` represents a transpose, return the permutation vector for
+// the transpose. Otherwise, return failure.
+static FailureOr<SmallVector<int64_t>>
+getTransposeOpPermutation(linalg::LinalgOp linalgOp) {
+ if (auto transposeOp = dyn_cast<linalg::TransposeOp>(linalgOp.getOperation()))
+ return SmallVector<int64_t>(transposeOp.getPermutation());
+ if (linalgOp.getNumParallelLoops() != linalgOp.getNumLoops())
+ return failure();
+
+ if (linalgOp.getNumDpsInputs() != 1 || linalgOp.getNumDpsInits() != 1)
+ return failure();
+ auto mapRange = linalgOp.getIndexingMapsArray();
+ if (!mapRange.front().isPermutation() || !mapRange.back().isPermutation() ||
+ mapRange.front() == mapRange.back()) {
+ return failure();
+ }
+ if (!llvm::hasSingleElement(linalgOp.getBlock()->getOperations()))
+ return failure();
+ AffineMap outMap = mapRange.back();
+ AffineMap inMap = mapRange.front();
+ // To get the permutation, look at each output index and find which
+ // dimension in the input we're reading from for that index.
+ return llvm::map_to_vector(outMap.getResults(),
+ [&](AffineExpr expr) -> int64_t {
+ return *inMap.getResultPosition(expr);
+ });
+}
+
/// Packing one-dimensional tensor can be expressed as an expand shape op.
struct SimplifyPackToExpandShape : public OpRewritePattern<PackOp> {
using OpRewritePattern<PackOp>::OpRewritePattern;
@@ -246,14 +274,10 @@ static bool checkAndPermute(ArrayRef<int64_t> permutation,
for (unsigned int i = 0; i < rank; ++i) {
int64_t remappedPosition = permutation[i];
-
- if (!inVec.empty()) {
- if (remappedPosition >= rank) {
- return false;
- }
+ if (remappedPosition >= rank)
+ return false;
+ if (!inVec.empty())
remappedPosition = inVec[remappedPosition];
- }
-
resVec.push_back(remappedPosition);
}
@@ -263,20 +287,25 @@ static bool checkAndPermute(ArrayRef<int64_t> permutation,
/// Fold 'pack' -> 'transpose' into 'pack' since 'pack' already has transpose
/// semantics.
struct FoldProducerPackWithConsumerLinalgTransposeOp
- : public OpRewritePattern<linalg::TransposeOp> {
- using OpRewritePattern<linalg::TransposeOp>::OpRewritePattern;
+ : public OpInterfaceRewritePattern<linalg::LinalgOp> {
+ using OpInterfaceRewritePattern<linalg::LinalgOp>::OpInterfaceRewritePattern;
- LogicalResult matchAndRewrite(linalg::TransposeOp transposeOp,
+ LogicalResult matchAndRewrite(linalg::LinalgOp linalgOp,
PatternRewriter &rewriter) const override {
- auto packOp = transposeOp.getOperand(0).getDefiningOp<PackOp>();
+ auto packOp = linalgOp->getOperand(0).getDefiningOp<PackOp>();
if (!packOp)
return failure();
+ FailureOr<SmallVector<int64_t>> maybePerm =
+ getTransposeOpPermutation(linalgOp);
+ if (failed(maybePerm))
+ return failure();
+
auto innerDimsPos = packOp.getInnerDimsPos();
auto mixedInnerTiles = packOp.getMixedTiles();
auto outerDimsPerm = packOp.getOuterDimsPerm();
- auto transposePerm = transposeOp.getPermutation();
+ auto transposePerm = maybePerm.value();
SmallVector<int64_t> newOuterDimsPermVec;
SmallVector<int64_t> newInnerDimsPosVec;
SmallVector<OpFoldResult> newMixedInnerTilesVec;
@@ -285,7 +314,7 @@ struct FoldProducerPackWithConsumerLinalgTransposeOp
if (!checkAndPermute(transposePerm, outerDimsPerm, newOuterDimsPermVec,
srcRank))
return rewriter.notifyMatchFailure(
- transposeOp,
+ linalgOp,
"Cannot fold in tensor.pack if a tile dimension was transposed "
"with a non-tile dimension in linalg.transpose.");
@@ -297,11 +326,11 @@ struct FoldProducerPackWithConsumerLinalgTransposeOp
}
Value output = packOp.createDestinationTensor(
- rewriter, transposeOp.getLoc(), packOp.getSource(),
- newMixedInnerTilesVec, newInnerDimsPosVec, newOuterDimsPermVec);
+ rewriter, linalgOp.getLoc(), packOp.getSource(), newMixedInnerTilesVec,
+ newInnerDimsPosVec, newOuterDimsPermVec);
rewriter.replaceOpWithNewOp<PackOp>(
- transposeOp, packOp.getSource(), output, newInnerDimsPosVec,
+ linalgOp, packOp.getSource(), output, newInnerDimsPosVec,
newMixedInnerTilesVec, packOp.getPaddingValue(), newOuterDimsPermVec);
return success();
@@ -316,12 +345,16 @@ struct FoldConsumerPackWithProducerLinalgTransposeOp
LogicalResult matchAndRewrite(PackOp packOp,
PatternRewriter &rewriter) const override {
- auto transposeOp = packOp.getSource().getDefiningOp<linalg::TransposeOp>();
+ auto linalgOp = packOp.getSource().getDefiningOp<linalg::LinalgOp>();
+ if (!linalgOp)
+ return failure();
- if (!transposeOp)
+ FailureOr<SmallVector<int64_t>> maybePerm =
+ getTransposeOpPermutation(linalgOp);
+ if (failed(maybePerm))
return failure();
- auto transposePermutation = transposeOp.getPermutation();
+ auto transposePermutation = maybePerm.value();
auto outerDimsPerm = packOp.getOuterDimsPerm();
auto innerDimsPos = packOp.getInnerDimsPos();
SmallVector<int64_t> newInnerDimsPosVec;
@@ -337,11 +370,11 @@ struct FoldConsumerPackWithProducerLinalgTransposeOp
newInnerDimsPosVec.push_back(transposePermutation[dim]);
Value output = packOp.createDestinationTensor(
- rewriter, packOp.getLoc(), transposeOp.getOperand(0),
+ rewriter, packOp.getLoc(), linalgOp->getOperand(0),
packOp.getMixedTiles(), newInnerDimsPosVec, newOuterDimsPermVec);
rewriter.replaceOpWithNewOp<PackOp>(
- packOp, transposeOp.getOperand(0), output, newInnerDimsPosVec,
+ packOp, linalgOp->getOperand(0), output, newInnerDimsPosVec,
packOp.getMixedTiles(), packOp.getPaddingValue(), newOuterDimsPermVec);
return success();
@@ -351,34 +384,38 @@ struct FoldConsumerPackWithProducerLinalgTransposeOp
/// Fold 'unpack' -> 'transpose' into 'unpack' since 'unpack' already has
/// transpose semantics.
struct FoldProducerUnPackWithConsumerLinalgTransposeOp
- : public OpRewritePattern<linalg::TransposeOp> {
- using OpRewritePattern<linalg::TransposeOp>::OpRewritePattern;
+ : public OpInterfaceRewritePattern<linalg::LinalgOp> {
+ using OpInterfaceRewritePattern<linalg::LinalgOp>::OpInterfaceRewritePattern;
- LogicalResult matchAndRewrite(linalg::TransposeOp transposeOp,
+ LogicalResult matchAndRewrite(linalg::LinalgOp linalgOp,
PatternRewriter &rewriter) const override {
- auto unPackOp = transposeOp.getOperand(0).getDefiningOp<UnPackOp>();
+ auto unPackOp = linalgOp->getOperand(0).getDefiningOp<UnPackOp>();
if (!unPackOp)
return failure();
- auto transposePermutation = transposeOp.getPermutation();
+ FailureOr<SmallVector<int64_t>> maybePerm =
+ getTransposeOpPermutation(linalgOp);
+ if (failed(maybePerm))
+ return failure();
+
auto outerDimsPerm = unPackOp.getOuterDimsPerm();
auto innerDimsPos = unPackOp.getInnerDimsPos();
SmallVector<int64_t> newInnerDimsPosVec;
SmallVector<int64_t> newOuterDimsPermVec =
- llvm::to_vector(transposePermutation);
-
- if (!outerDimsPerm.empty())
- applyPermutationToVector(newOuterDimsPermVec, outerDimsPerm);
+ invertPermutationVector(maybePerm.value());
// Can't use applyPermutationToVector for newInnerDimsPosVec since input and
// permutation rank won't necessarily be equal in all cases.
for (auto dim : innerDimsPos)
- newInnerDimsPosVec.push_back(transposePermutation[dim]);
+ newInnerDimsPosVec.push_back(newOuterDimsPermVec[dim]);
+
+ if (!outerDimsPerm.empty())
+ applyPermutationToVector(newOuterDimsPermVec, outerDimsPerm);
// Reuse the destination of the transpose op.
rewriter.replaceOpWithNewOp<UnPackOp>(
- transposeOp, unPackOp.getSource(), transposeOp.getDpsInits()[0],
+ linalgOp, unPackOp.getSource(), linalgOp.getDpsInits()[0],
newInnerDimsPosVec, unPackOp.getMixedTiles(), newOuterDimsPermVec);
return success();
@@ -393,13 +430,17 @@ struct FoldConsumerUnPackWithProducerLinalgTransposeOp
LogicalResult matchAndRewrite(UnPackOp unPackOp,
PatternRewriter &rewriter) const override {
- auto transposeOp =
- unPackOp.getSource().getDefiningOp<linalg::TransposeOp>();
+ auto linalgOp = unPackOp.getSource().getDefiningOp<linalg::LinalgOp>();
+ if (!linalgOp)
+ return failure();
- if (!transposeOp)
+ FailureOr<SmallVector<int64_t>> maybePerm =
+ getTransposeOpPermutation(linalgOp);
+ if (failed(maybePerm))
return failure();
- auto transposePermutation = transposeOp.getPermutation();
+ SmallVector<int64_t> inverseTransposePerm =
+ invertPermutationVector(maybePerm.value());
auto outerDimsPerm = unPackOp.getOuterDimsPerm();
auto innerDimsPos = unPackOp.getInnerDimsPos();
int64_t destRank = unPackOp.getSourceRank() - innerDimsPos.size();
@@ -408,7 +449,7 @@ struct FoldConsumerUnPackWithProducerLinalgTransposeOp
SmallVector<int64_t> newInnerDimsPosVec;
SmallVector<OpFoldResult> newMixedInnerTilesVec;
- if (!checkAndPermute(transposePermutation, outerDimsPerm,
+ if (!checkAndPermute(inverseTransposePerm, outerDimsPerm,
newOuterDimsPermVec, destRank))
return rewriter.notifyMatchFailure(
unPackOp,
@@ -416,18 +457,18 @@ struct FoldConsumerUnPackWithProducerLinalgTransposeOp
"with a non-tile dimension in linalg.transpose.");
// Process transpose operation for tiled inner dimensions
- for (unsigned int i = destRank; i < transposePermutation.size(); ++i) {
- int64_t remappedPosition = transposePermutation[i] - destRank;
+ for (unsigned int i = destRank; i < inverseTransposePerm.size(); ++i) {
+ int64_t remappedPosition = inverseTransposePerm[i] - destRank;
newMixedInnerTilesVec.push_back(mixedInnerTilesVec[remappedPosition]);
newInnerDimsPosVec.push_back(innerDimsPos[remappedPosition]);
}
Value output = unPackOp.createDestinationTensor(
- rewriter, unPackOp.getLoc(), transposeOp.getOperand(0),
+ rewriter, unPackOp.getLoc(), linalgOp->getOperand(0),
newMixedInnerTilesVec, newInnerDimsPosVec, newOuterDimsPermVec);
rewriter.replaceOpWithNewOp<UnPackOp>(
- unPackOp, transposeOp.getOperand(0), output, newInnerDimsPosVec,
+ unPackOp, linalgOp->getOperand(0), output, newInnerDimsPosVec,
newMixedInnerTilesVec, newOuterDimsPermVec);
return success();
diff --git a/mlir/lib/Dialect/Tensor/Transforms/RewriteAsConstant.cpp b/mlir/lib/Dialect/Tensor/Transforms/RewriteAsConstant.cpp
index 11e1de5..7c9fced 100644
--- a/mlir/lib/Dialect/Tensor/Transforms/RewriteAsConstant.cpp
+++ b/mlir/lib/Dialect/Tensor/Transforms/RewriteAsConstant.cpp
@@ -8,9 +8,12 @@
//
#include "mlir/Dialect/Tensor/IR/Tensor.h"
#include "mlir/Dialect/Tensor/Transforms/Transforms.h"
+#include "mlir/Dialect/Utils/IndexingUtils.h"
#include "mlir/IR/Matchers.h"
#include "mlir/IR/PatternMatch.h"
+#include "llvm/ADT/TypeSwitch.h"
+
using namespace mlir;
using namespace mlir::tensor;
@@ -45,9 +48,169 @@ struct GenerateToConstant : public OpRewritePattern<GenerateOp> {
}
};
+/// Transform a linear index from one indexing space to another given:
+///
+/// - the shape of the source indexing space,
+/// - the strides of the target indexing space,
+/// - a linear index into the source indexing space.
+///
+/// This function is logically a sequence of linearize/delinearize over
+/// different bases but avoids allocating intermediate SmallVectors.
+int64_t transformIndexSpace(ArrayRef<int64_t> inputShape,
+ ArrayRef<int64_t> outputStrides,
+ int64_t srcLinearIndex) {
+ assert(inputShape.size() == outputStrides.size());
+
+ int64_t dstLinearIndex = 0;
+
+ for (int64_t dim = inputShape.size() - 1; dim >= 0; --dim) {
+ // Compute the index into the current dimension of the source tensor.
+ // `quotient` is the remaining linear index after accounting for the
+ // current dimension.
+ //
+ // `remainder` is the index into the source tensor for the current
+ // dimension.
+ auto [quotient, remainder] = std::div(srcLinearIndex, inputShape[dim]);
+
+ srcLinearIndex = quotient;
+
+ // Add the contribution of the current dimension to the output using the
+ // permutation map.
+ dstLinearIndex += outputStrides[dim] * remainder;
+ }
+
+ return dstLinearIndex;
+}
+
+template <typename ElemType, typename AttrType>
+Value constantFoldPadOp(PatternRewriter &rewriter, Location loc,
+ DenseElementsAttr input, AttrType padValue,
+ ArrayRef<int64_t> padLow, ArrayRef<int64_t> padHigh) {
+ auto inputValues = input.tryGetValues<ElemType>();
+ if (failed(inputValues))
+ return nullptr;
+
+ auto oldShape = input.getType().getShape();
+
+ // Compute the output shape of the new value.
+ auto newShape =
+ llvm::map_to_vector(llvm::zip(oldShape, padLow, padHigh),
+ [](std::tuple<int64_t, int64_t, int64_t> pack) {
+ auto [old, low, high] = pack;
+ return old + low + high;
+ });
+
+ int64_t outputSize = computeProduct(newShape);
+
+ // Fully initialize the vector with the padding value.
+ // The non-padded area will then be copied.
+ SmallVector<ElemType> values(outputSize, padValue.getValue());
+
+ // Strides for input and output are used to transform between the indexing
+ // space of the input and output tensors.
+ SmallVector<int64_t> outputStrides = computeStrides(newShape);
+
+ // The contribution of the low padding to the offset in the output tensor.
+ // This is the starting position of the source tensor within the padding
+ // tensor.
+ int64_t startingOffset = linearize(padLow, outputStrides);
+
+ // Copy values from the input tensor to the corresponding sub-region
+ // of the output tensor.
+ for (auto [inputIndex, inputValue] : llvm::enumerate(*inputValues)) {
+ auto outputIndex = transformIndexSpace(oldShape, outputStrides, inputIndex);
+ values[outputIndex + startingOffset] = inputValue;
+ }
+
+ // Create an attribute for the folded value.
+ auto newType = input.getType().clone(newShape);
+ auto newAttr = DenseElementsAttr::get(newType, values);
+
+ Operation *constantOp =
+ rewriter.getContext()
+ ->getLoadedDialect<TensorDialect>()
+ ->materializeConstant(rewriter, newAttr, newType, loc);
+
+ return constantOp ? constantOp->getResult(0) : nullptr;
+}
+
+struct PadOpToConstant final : public OpRewritePattern<PadOp> {
+
+ PadOpToConstant(MLIRContext *context, const ControlFoldFn &controlFn,
+ PatternBenefit benefit = 1)
+ : OpRewritePattern<PadOp>(context, benefit), controlFn{controlFn} {}
+
+ LogicalResult matchAndRewrite(PadOp padTensorOp,
+ PatternRewriter &rewriter) const override {
+ if (padTensorOp.getNofold())
+ return rewriter.notifyMatchFailure(
+ padTensorOp, "refusing to fold nofold pad operation");
+
+ TypedValue<RankedTensorType> input = padTensorOp.getSource();
+ RankedTensorType resultType = padTensorOp.getResult().getType();
+
+ DenseElementsAttr inputAttr = nullptr;
+ if (!matchPattern(input, m_Constant(&inputAttr)))
+ return failure();
+
+ Value paddingValue = padTensorOp.getConstantPaddingValue();
+
+ // Extract the constant value used for padding or bail out.
+ Attribute paddingAttr = nullptr;
+ if (!paddingValue || !matchPattern(paddingValue, m_Constant(&paddingAttr)))
+ return rewriter.notifyMatchFailure(padTensorOp,
+ "unable to get constant value");
+
+ // Try to extract the constant values of the low and high padding.
+ auto lowPad = getConstantIntValues(padTensorOp.getMixedLowPad());
+ auto highPad = getConstantIntValues(padTensorOp.getMixedHighPad());
+
+ // If the padding cannot be extracted, bail out.
+ if (!lowPad || !highPad)
+ return rewriter.notifyMatchFailure(padTensorOp,
+ "unable to extract constant padding");
+
+ // We have a potential candidate, consult the control function to
+ // determine if the op should fold.
+ if (!controlFn(&padTensorOp.getSourceMutable()))
+ return rewriter.notifyMatchFailure(padTensorOp,
+ "not folding due to cost function");
+
+ Location loc = padTensorOp.getLoc();
+
+ // Try constant folding the supported cases of integer and float values.
+ Value newOp =
+ llvm::TypeSwitch<Attribute, Value>(paddingAttr)
+ .Case([&](FloatAttr floatAttr) {
+ return constantFoldPadOp<llvm::APFloat>(
+ rewriter, loc, inputAttr, floatAttr, *lowPad, *highPad);
+ })
+ .Case([&](IntegerAttr integerAttr) {
+ return constantFoldPadOp<llvm::APInt>(
+ rewriter, loc, inputAttr, integerAttr, *lowPad, *highPad);
+ })
+ .Default(Value());
+
+ if (!newOp)
+ return rewriter.notifyMatchFailure(padTensorOp,
+ "tensor type not supported");
+
+ if (newOp.getType() != resultType)
+ newOp = rewriter.create<tensor::CastOp>(loc, resultType, newOp);
+
+ rewriter.replaceOp(padTensorOp, newOp);
+ return success();
+ }
+
+private:
+ ControlFoldFn controlFn;
+};
+
} // namespace
void mlir::tensor::populateRewriteAsConstantPatterns(
- RewritePatternSet &patterns) {
+ RewritePatternSet &patterns, const ControlFoldFn &controlFn) {
patterns.add<GenerateToConstant>(patterns.getContext());
+
+ patterns.add<PadOpToConstant>(patterns.getContext(), controlFn);
}
diff --git a/mlir/lib/Dialect/Utils/IndexingUtils.cpp b/mlir/lib/Dialect/Utils/IndexingUtils.cpp
index 4c96065..aba225b 100644
--- a/mlir/lib/Dialect/Utils/IndexingUtils.cpp
+++ b/mlir/lib/Dialect/Utils/IndexingUtils.cpp
@@ -92,7 +92,7 @@ int64_t mlir::computeProduct(ArrayRef<int64_t> basis) {
assert(llvm::all_of(basis, [](int64_t s) { return s > 0; }) &&
"basis must be nonnegative");
if (basis.empty())
- return 0;
+ return 1;
return std::accumulate(basis.begin(), basis.end(), 1,
std::multiplies<int64_t>());
}
diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorTransferOpTransforms.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorTransferOpTransforms.cpp
index 997b56a..c131fde 100644
--- a/mlir/lib/Dialect/Vector/Transforms/VectorTransferOpTransforms.cpp
+++ b/mlir/lib/Dialect/Vector/Transforms/VectorTransferOpTransforms.cpp
@@ -505,25 +505,61 @@ static Value collapseInnerDims(PatternRewriter &rewriter, mlir::Location loc,
return rewriter.create<memref::CollapseShapeOp>(loc, input, reassociation);
}
-/// Checks that the indices corresponding to dimensions starting at
-/// `firstDimToCollapse` are constant 0, and writes to `outIndices`
-/// the truncated indices where `firstDimToCollapse` is now the innermost dim.
-/// TODO: Extract the logic that writes to outIndices so that this method
-/// simply checks one pre-condition.
-static LogicalResult
-checkAndCollapseInnerZeroIndices(ValueRange indices, int64_t firstDimToCollapse,
- SmallVector<Value> &outIndices) {
- int64_t rank = indices.size();
- if (firstDimToCollapse >= rank)
- return failure();
- for (int64_t i = firstDimToCollapse; i < rank; ++i) {
- std::optional<int64_t> cst = getConstantIntValue(indices[i]);
- if (!cst || cst.value() != 0)
- return failure();
+/// Returns the new indices that collapses the inner dimensions starting from
+/// the `firstDimToCollapse` dimension.
+static SmallVector<Value> getCollapsedIndices(RewriterBase &rewriter,
+ Location loc,
+ ArrayRef<int64_t> shape,
+ ValueRange indices,
+ int64_t firstDimToCollapse) {
+ assert(firstDimToCollapse < static_cast<int64_t>(indices.size()));
+
+ // If all the collapsed indices are zero then no extra logic is needed.
+ // Otherwise, a new offset/index has to be computed.
+ SmallVector<Value> indicesAfterCollapsing(
+ indices.begin(), indices.begin() + firstDimToCollapse);
+ SmallVector<Value> indicesToCollapse(indices.begin() + firstDimToCollapse,
+ indices.end());
+ if (llvm::all_of(indicesToCollapse, isZeroIndex)) {
+ indicesAfterCollapsing.push_back(indicesToCollapse[0]);
+ return indicesAfterCollapsing;
+ }
+
+ // Compute the remaining trailing index/offset required for reading from
+ // the collapsed memref:
+ //
+ // offset = 0
+ // for (i = firstDimToCollapse; i < outputRank; ++i)
+ // offset += sourceType.getDimSize(i) * transferReadOp.indices[i]
+ //
+ // For this example:
+ // %2 = vector.transfer_read/write %arg4[%c0, %arg0, %c0] (...) :
+ // memref<1x43x2xi32>, vector<1x2xi32>
+ // which would be collapsed to:
+ // %1 = vector.transfer_read/write %collapse_shape[%c0, %offset] (...) :
+ // memref<1x86xi32>, vector<2xi32>
+ // one would get the following offset:
+ // %offset = %arg0 * 43
+ OpFoldResult collapsedOffset =
+ rewriter.create<arith::ConstantIndexOp>(loc, 0).getResult();
+
+ auto collapsedStrides = computeSuffixProduct(
+ ArrayRef<int64_t>(shape.begin() + firstDimToCollapse, shape.end()));
+
+ // Compute the collapsed offset.
+ auto &&[collapsedExpr, collapsedVals] =
+ computeLinearIndex(collapsedOffset, collapsedStrides, indicesToCollapse);
+ collapsedOffset = affine::makeComposedFoldedAffineApply(
+ rewriter, loc, collapsedExpr, collapsedVals);
+
+ if (collapsedOffset.is<Value>()) {
+ indicesAfterCollapsing.push_back(collapsedOffset.get<Value>());
+ } else {
+ indicesAfterCollapsing.push_back(rewriter.create<arith::ConstantIndexOp>(
+ loc, *getConstantIntValue(collapsedOffset)));
}
- outIndices = indices;
- outIndices.resize(firstDimToCollapse + 1);
- return success();
+
+ return indicesAfterCollapsing;
}
namespace {
@@ -594,54 +630,9 @@ public:
AffineMap::get(collapsedRank, 0, dimExprs, rewriter.getContext());
// 2.2 New indices
- // If all the collapsed indices are zero then no extra logic is needed.
- // Otherwise, a new offset/index has to be computed.
- SmallVector<Value> collapsedIndices;
- if (failed(checkAndCollapseInnerZeroIndices(transferReadOp.getIndices(),
- firstDimToCollapse,
- collapsedIndices))) {
- // Copy all the leading indices.
- SmallVector<Value> indices = transferReadOp.getIndices();
- collapsedIndices.append(indices.begin(),
- indices.begin() + firstDimToCollapse);
-
- // Compute the remaining trailing index/offset required for reading from
- // the collapsed memref:
- //
- // offset = 0
- // for (i = firstDimToCollapse; i < outputRank; ++i)
- // offset += sourceType.getDimSize(i) * transferReadOp.indices[i]
- //
- // For this example:
- // %2 = vector.transfer_read %arg4[%c0, %arg0, %c0] (...) :
- // memref<1x43x2xi32>, vector<1x2xi32>
- // which would be collapsed to:
- // %1 = vector.transfer_read %collapse_shape[%c0, %offset] (...) :
- // memref<1x86xi32>, vector<2xi32>
- // one would get the following offset:
- // %offset = %arg0 * 43
- OpFoldResult collapsedOffset =
- rewriter.create<arith::ConstantIndexOp>(loc, 0).getResult();
-
- auto sourceShape = sourceType.getShape();
- auto collapsedStrides = computeSuffixProduct(ArrayRef<int64_t>(
- sourceShape.begin() + firstDimToCollapse, sourceShape.end()));
-
- // Compute the collapsed offset.
- ArrayRef<Value> indicesToCollapse(indices.begin() + firstDimToCollapse,
- indices.end());
- auto &&[collapsedExpr, collapsedVals] = computeLinearIndex(
- collapsedOffset, collapsedStrides, indicesToCollapse);
- collapsedOffset = affine::makeComposedFoldedAffineApply(
- rewriter, loc, collapsedExpr, collapsedVals);
-
- if (collapsedOffset.is<Value>()) {
- collapsedIndices.push_back(collapsedOffset.get<Value>());
- } else {
- collapsedIndices.push_back(rewriter.create<arith::ConstantIndexOp>(
- loc, *getConstantIntValue(collapsedOffset)));
- }
- }
+ SmallVector<Value> collapsedIndices =
+ getCollapsedIndices(rewriter, loc, sourceType.getShape(),
+ transferReadOp.getIndices(), firstDimToCollapse);
// 3. Create new vector.transfer_read that reads from the collapsed memref
VectorType flatVectorType = VectorType::get({vectorType.getNumElements()},
@@ -697,8 +688,7 @@ public:
return failure();
if (!vector::isContiguousSlice(sourceType, vectorType))
return failure();
- int64_t firstContiguousInnerDim =
- sourceType.getRank() - vectorType.getRank();
+ int64_t firstDimToCollapse = sourceType.getRank() - vectorType.getRank();
// TODO: generalize this pattern, relax the requirements here.
if (transferWriteOp.hasOutOfBoundsDim())
return failure();
@@ -706,22 +696,23 @@ public:
return failure();
if (transferWriteOp.getMask())
return failure();
- SmallVector<Value> collapsedIndices;
- if (failed(checkAndCollapseInnerZeroIndices(transferWriteOp.getIndices(),
- firstContiguousInnerDim,
- collapsedIndices)))
- return failure();
+
+ SmallVector<Value> collapsedIndices =
+ getCollapsedIndices(rewriter, loc, sourceType.getShape(),
+ transferWriteOp.getIndices(), firstDimToCollapse);
Value collapsedSource =
- collapseInnerDims(rewriter, loc, source, firstContiguousInnerDim);
+ collapseInnerDims(rewriter, loc, source, firstDimToCollapse);
MemRefType collapsedSourceType =
cast<MemRefType>(collapsedSource.getType());
int64_t collapsedRank = collapsedSourceType.getRank();
- assert(collapsedRank == firstContiguousInnerDim + 1);
+ assert(collapsedRank == firstDimToCollapse + 1);
+
SmallVector<AffineExpr, 1> dimExprs{
- getAffineDimExpr(firstContiguousInnerDim, rewriter.getContext())};
+ getAffineDimExpr(firstDimToCollapse, rewriter.getContext())};
auto collapsedMap =
AffineMap::get(collapsedRank, 0, dimExprs, rewriter.getContext());
+
VectorType flatVectorType = VectorType::get({vectorType.getNumElements()},
vectorType.getElementType());
Value flatVector =
diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
index 2295922..5ef47fb 100644
--- a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
+++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
@@ -110,7 +110,7 @@ void CreateNdDescOp::build(OpBuilder &builder, OperationState &state,
dispatchIndexOpFoldResults(offsets, dynamicOffsets, staticOffsets);
dispatchIndexOpFoldResults(shape, dynamicShape, staticShape);
- dispatchIndexOpFoldResults(strides, dynamicStrides, staticOffsets);
+ dispatchIndexOpFoldResults(strides, dynamicStrides, staticStrides);
auto staticOffsetsAttr = builder.getDenseI64ArrayAttr(staticOffsets);
auto staticShapeAttr = builder.getDenseI64ArrayAttr(staticShape);
diff --git a/mlir/lib/IR/AsmPrinter.cpp b/mlir/lib/IR/AsmPrinter.cpp
index 29e3621..6a362af 100644
--- a/mlir/lib/IR/AsmPrinter.cpp
+++ b/mlir/lib/IR/AsmPrinter.cpp
@@ -2061,7 +2061,8 @@ void AsmPrinter::Impl::printLocationInternal(LocationAttr loc, bool pretty,
/// Print a floating point value in a way that the parser will be able to
/// round-trip losslessly.
-static void printFloatValue(const APFloat &apValue, raw_ostream &os) {
+static void printFloatValue(const APFloat &apValue, raw_ostream &os,
+ bool *printedHex = nullptr) {
// We would like to output the FP constant value in exponential notation,
// but we cannot do this if doing so will lose precision. Check here to
// make sure that we only output it in exponential format if we can parse
@@ -2102,6 +2103,8 @@ static void printFloatValue(const APFloat &apValue, raw_ostream &os) {
// Print special values in hexadecimal format. The sign bit should be included
// in the literal.
+ if (printedHex)
+ *printedHex = true;
SmallVector<char, 16> str;
APInt apInt = apValue.bitcastToAPInt();
apInt.toString(str, /*Radix=*/16, /*Signed=*/false,
@@ -2275,10 +2278,12 @@ void AsmPrinter::Impl::printAttributeImpl(Attribute attr,
return;
} else if (auto floatAttr = llvm::dyn_cast<FloatAttr>(attr)) {
- printFloatValue(floatAttr.getValue(), os);
+ bool printedHex = false;
+ printFloatValue(floatAttr.getValue(), os, &printedHex);
// FloatAttr elides the type if F64.
- if (typeElision == AttrTypeElision::May && floatAttr.getType().isF64())
+ if (typeElision == AttrTypeElision::May && floatAttr.getType().isF64() &&
+ !printedHex)
return;
} else if (auto strAttr = llvm::dyn_cast<StringAttr>(attr)) {
diff --git a/mlir/test/Dialect/Tensor/fold-into-pack-and-unpack.mlir b/mlir/test/Dialect/Tensor/fold-into-pack-and-unpack.mlir
index 9a3143f5..629a4c2 100644
--- a/mlir/test/Dialect/Tensor/fold-into-pack-and-unpack.mlir
+++ b/mlir/test/Dialect/Tensor/fold-into-pack-and-unpack.mlir
@@ -636,3 +636,142 @@ func.func @tensor_padded_unpack_linalg_transpose_fold(%arg0: tensor<71x7x4x16x16
// CHECK-SAME: into %[[OUT:.+]] : tensor<71x7x4x16x16xf32> -> tensor<100x71x64xf32>
// CHECK: return %[[UNPACK]] : tensor<100x71x64xf32>
// CHECK: }
+
+// -----
+
+func.func @non_involution_transpose_unpack_fold(%arg0: tensor<2x3x5x4x16xi32>) -> tensor<5x48x8xi32> {
+ %0 = tensor.empty() : tensor<5x2x3x16x4xi32>
+ %transposed = linalg.transpose ins(%arg0 : tensor<2x3x5x4x16xi32>)
+ outs(%0 : tensor<5x2x3x16x4xi32>)
+ permutation = [2, 0, 1, 4, 3]
+ %1 = tensor.empty() : tensor<5x48x8xi32>
+ %unpack = tensor.unpack %transposed
+ outer_dims_perm = [0, 2, 1]
+ inner_dims_pos = [1, 2]
+ inner_tiles = [16, 4] into
+ %1 : tensor<5x2x3x16x4xi32> -> tensor<5x48x8xi32>
+ return %unpack : tensor<5x48x8xi32>
+}
+//CHECK-LABEL: func.func @non_involution_transpose_unpack_fold(
+// CHECK-SAME: %[[ARG0:.+]]: tensor<2x3x5x4x16xi32>) -> tensor<5x48x8xi32> {
+// CHECK: %[[OUT:.+]] = tensor.empty() : tensor<5x48x8xi32>
+// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[ARG0]]
+// CHECK-SAME: outer_dims_perm = [2, 1, 0]
+// CHECK-SAME: inner_dims_pos = [2, 1]
+// CHECK-SAME: inner_tiles = [4, 16]
+// CHEKC-SAME: into %[[OUT]] : tensor<2x3x5x4x16xi32> -> tensor<5x48x8xi32>
+// CHECK: return %[[UNPACK]] : tensor<5x48x8xi32>
+// CHECK: }
+
+// -----
+
+func.func @unpack_non_involution_transpose_fold(%arg0: tensor<57x3x56x1x64xf32>) -> tensor<3648x3x56xf32> {
+ %0 = tensor.empty() : tensor<3x56x3648xf32>
+ %unpack = tensor.unpack %arg0
+ outer_dims_perm = [2, 0, 1]
+ inner_dims_pos = [1, 2]
+ inner_tiles = [1, 64]
+ into %0 : tensor<57x3x56x1x64xf32> -> tensor<3x56x3648xf32>
+
+ %1 = tensor.empty() : tensor<3648x3x56xf32>
+ %transposed = linalg.transpose
+ ins(%unpack : tensor<3x56x3648xf32>)
+ outs(%1 : tensor<3648x3x56xf32>)
+ permutation = [2, 0, 1]
+ return %transposed : tensor<3648x3x56xf32>
+}
+// CHECK-LABEL: func.func @unpack_non_involution_transpose_fold(
+// CHECK-SAME: %[[ARG0:.+]]: tensor<57x3x56x1x64xf32>) -> tensor<3648x3x56xf32> {
+// CHECK: %[[OUT:.+]] = tensor.empty() : tensor<3648x3x56xf32>
+// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[ARG0]]
+// CHECK-SAME: outer_dims_perm = [0, 1, 2]
+// CHECK-SAME: inner_dims_pos = [2, 0]
+// CHECK-SAME: inner_tiles = [1, 64]
+// CHECK-SAME: into %[[OUT:.+]] : tensor<57x3x56x1x64xf32> -> tensor<3648x3x56xf32>
+// CHECK: return %[[UNPACK]] : tensor<3648x3x56xf32>
+// CHECK: }
+
+// -----
+
+func.func @transpose_unpacked_dims_no_fold(%arg0: tensor<2x16x5x4x3xi32>) -> tensor<5x32x12xi32> {
+ %0 = tensor.empty() : tensor<5x2x3x16x4xi32>
+ %transposed = linalg.transpose ins(%arg0 : tensor<2x16x5x4x3xi32>)
+ outs(%0 : tensor<5x2x3x16x4xi32>)
+ permutation = [2, 0, 4, 1, 3]
+ %1 = tensor.empty() : tensor<5x32x12xi32>
+ %unpack = tensor.unpack %transposed
+ inner_dims_pos = [1, 2]
+ inner_tiles = [16, 4] into
+ %1 : tensor<5x2x3x16x4xi32> -> tensor<5x32x12xi32>
+ return %unpack : tensor<5x32x12xi32>
+}
+//CHECK-LABEL: func.func @transpose_unpacked_dims_no_fold(
+// CHECK: linalg.transpose
+// CHECK: tensor.unpack
+
+// -----
+
+#map = affine_map<(d0, d1, d2, d3, d4)->(d1, d2, d0, d4, d3)>
+#map1 = affine_map<(d0, d1, d2, d3, d4)->(d0, d1, d2, d3, d4)>
+func.func @generic_transpose_unpack_fold(%arg0: tensor<2x3x5x4x16xi32>) -> tensor<5x48x8xi32> {
+ %0 = tensor.empty() : tensor<5x2x3x16x4xi32>
+ %transposed = linalg.generic {
+ iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel"],
+ indexing_maps = [#map, #map1]}
+ ins(%arg0 : tensor<2x3x5x4x16xi32>)
+ outs(%0 : tensor<5x2x3x16x4xi32>) {
+ ^bb0(%in : i32, %out : i32):
+ linalg.yield %in : i32
+ } -> tensor<5x2x3x16x4xi32>
+ %1 = tensor.empty() : tensor<5x48x8xi32>
+ %unpack = tensor.unpack %transposed
+ outer_dims_perm = [0, 2, 1]
+ inner_dims_pos = [1, 2]
+ inner_tiles = [16, 4] into
+ %1 : tensor<5x2x3x16x4xi32> -> tensor<5x48x8xi32>
+ return %unpack : tensor<5x48x8xi32>
+}
+//CHECK-LABEL: func.func @generic_transpose_unpack_fold(
+// CHECK-SAME: %[[ARG0:.+]]: tensor<2x3x5x4x16xi32>) -> tensor<5x48x8xi32> {
+// CHECK: %[[OUT:.+]] = tensor.empty() : tensor<5x48x8xi32>
+// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[ARG0]]
+// CHECK-SAME: outer_dims_perm = [2, 1, 0]
+// CHECK-SAME: inner_dims_pos = [2, 1]
+// CHECK-SAME: inner_tiles = [4, 16]
+// CHEKC-SAME: into %[[OUT]] : tensor<2x3x5x4x16xi32> -> tensor<5x48x8xi32>
+// CHECK: return %[[UNPACK]] : tensor<5x48x8xi32>
+// CHECK: }
+
+// -----
+
+#map = affine_map<(d0, d1, d2)->(d1, d2, d0)>
+#map1 = affine_map<(d0, d1, d2)->(d0, d1, d2)>
+func.func @unpack_generic_transpose_fold(%arg0: tensor<57x3x56x1x64xf32>) -> tensor<3648x3x56xf32> {
+ %0 = tensor.empty() : tensor<3x56x3648xf32>
+ %unpack = tensor.unpack %arg0
+ outer_dims_perm = [2, 0, 1]
+ inner_dims_pos = [1, 2]
+ inner_tiles = [1, 64]
+ into %0 : tensor<57x3x56x1x64xf32> -> tensor<3x56x3648xf32>
+
+ %1 = tensor.empty() : tensor<3648x3x56xf32>
+ %transposed = linalg.generic {
+ iterator_types = ["parallel", "parallel", "parallel"],
+ indexing_maps = [#map, #map1]}
+ ins(%unpack : tensor<3x56x3648xf32>)
+ outs(%1 : tensor<3648x3x56xf32>) {
+ ^bb0(%in : f32, %out : f32):
+ linalg.yield %in : f32
+ } -> tensor<3648x3x56xf32>
+ return %transposed : tensor<3648x3x56xf32>
+}
+// CHECK-LABEL: func.func @unpack_generic_transpose_fold(
+// CHECK-SAME: %[[ARG0:.+]]: tensor<57x3x56x1x64xf32>) -> tensor<3648x3x56xf32> {
+// CHECK: %[[OUT:.+]] = tensor.empty() : tensor<3648x3x56xf32>
+// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[ARG0]]
+// CHECK-SAME: outer_dims_perm = [0, 1, 2]
+// CHECK-SAME: inner_dims_pos = [2, 0]
+// CHECK-SAME: inner_tiles = [1, 64]
+// CHECK-SAME: into %[[OUT:.+]] : tensor<57x3x56x1x64xf32> -> tensor<3648x3x56xf32>
+// CHECK: return %[[UNPACK]] : tensor<3648x3x56xf32>
+// CHECK: }
diff --git a/mlir/test/Dialect/Tensor/rewrite-as-constant.mlir b/mlir/test/Dialect/Tensor/rewrite-as-constant.mlir
index 1a1cf9e..35ee6f1 100644
--- a/mlir/test/Dialect/Tensor/rewrite-as-constant.mlir
+++ b/mlir/test/Dialect/Tensor/rewrite-as-constant.mlir
@@ -21,3 +21,138 @@ func.func @tensor_generate_constant() -> tensor<2x3x5xf32> {
} : tensor<2x3x5xf32>
return %0 : tensor<2x3x5xf32>
}
+
+// CHECK-LABEL: func @pad_of_ints(
+// CHECK: %[[cst:.*]] = arith.constant dense<[
+// CHECK-SAME{LITERAL}: [0, 0, 0, 0],
+// CHECK-SAME{LITERAL}: [0, 6, 7, 0],
+// CHECK-SAME{LITERAL}: [0, 8, 9, 0],
+// CHECK-SAME{LITERAL}: [0, 0, 0, 0]
+// CHECK-SAME{LITERAL}: ]> : tensor<4x4xi32>
+// CHECK: %[[cast:.*]] = tensor.cast %[[cst]] : tensor<4x4xi32> to tensor<?x?xi32>
+// CHECK: return %[[cast]]
+func.func @pad_of_ints() -> tensor<?x?xi32> {
+ %init = arith.constant dense<[[6, 7], [8, 9]]> : tensor<2x2xi32>
+ %pad_value = arith.constant 0 : i32
+
+ %c1 = arith.constant 1 : index
+
+ %0 = tensor.pad %init low[%c1, %c1] high[%c1, %c1] {
+ ^bb0(%arg1: index, %arg2: index):
+ tensor.yield %pad_value : i32
+ } : tensor<2x2xi32> to tensor<?x?xi32>
+
+ return %0 : tensor<?x?xi32>
+}
+
+// CHECK-LABEL: func @pad_of_floats(
+// CHECK: %[[cst:.*]] = arith.constant dense<[
+// CHECK-SAME{LITERAL}: [0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00],
+// CHECK-SAME{LITERAL}: [0.000000e+00, 6.000000e+00, 7.000000e+00, 0.000000e+00],
+// CHECK-SAME{LITERAL}: [0.000000e+00, 8.000000e+00, 9.000000e+00, 0.000000e+00],
+// CHECK-SAME{LITERAL}: [0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00]
+// CHECK-SAME{LITERAL}: ]> : tensor<4x4xf32>
+// CHECK: return %[[cst]]
+
+func.func @pad_of_floats() -> tensor<4x4xf32> {
+ %init = arith.constant dense<[[6.0, 7.0], [8.0, 9.0]]> : tensor<2x2xf32>
+ %pad_value = arith.constant 0.0 : f32
+
+ %0 = tensor.pad %init low[1, 1] high[1, 1] {
+ ^bb0(%arg1: index, %arg2: index):
+ tensor.yield %pad_value : f32
+ } : tensor<2x2xf32> to tensor<4x4xf32>
+
+ return %0 : tensor<4x4xf32>
+}
+
+// CHECK-LABEL: func @pad_of_ints_no_low_dims(
+// CHECK: %[[cst:.*]] = arith.constant dense<[
+// CHECK-SAME{LITERAL}: [6, 7, 0],
+// CHECK-SAME{LITERAL}: [8, 9, 0],
+// CHECK-SAME{LITERAL}: [0, 0, 0]
+// CHECK-SAME{LITERAL}: ]> : tensor<3x3xi32>
+// CHECK: return %[[cst]]
+func.func @pad_of_ints_no_low_dims() -> tensor<3x3xi32> {
+ %init = arith.constant dense<[[6, 7], [8, 9]]> : tensor<2x2xi32>
+ %pad_value = arith.constant 0 : i32
+
+ %0 = tensor.pad %init low[0, 0] high[1, 1] {
+ ^bb0(%arg1: index, %arg2: index):
+ tensor.yield %pad_value : i32
+ } : tensor<2x2xi32> to tensor<3x3xi32>
+
+ return %0 : tensor<3x3xi32>
+}
+
+// CHECK-LABEL: func @pad_of_ints_no_high_dims(
+// CHECK: %[[cst:.*]] = arith.constant dense<[
+// CHECK-SAME{LITERAL}: [0, 0, 0],
+// CHECK-SAME{LITERAL}: [0, 6, 7],
+// CHECK-SAME{LITERAL}: [0, 8, 9]
+// CHECK-SAME{LITERAL}: ]> : tensor<3x3xi32>
+// CHECK: return %[[cst]]
+func.func @pad_of_ints_no_high_dims() -> tensor<3x3xi32> {
+ %init = arith.constant dense<[[6, 7], [8, 9]]> : tensor<2x2xi32>
+ %pad_value = arith.constant 0 : i32
+
+ %0 = tensor.pad %init low[1, 1] high[0, 0] {
+ ^bb0(%arg1: index, %arg2: index):
+ tensor.yield %pad_value : i32
+ } : tensor<2x2xi32> to tensor<3x3xi32>
+
+ return %0 : tensor<3x3xi32>
+}
+
+// CHECK-LABEL: func @pad_multi_use_do_not_fold(
+// CHECK: %[[pad:.+]] = tensor.pad
+// CHECK: return %[[pad]]
+func.func @pad_multi_use_do_not_fold() -> (tensor<?x?xi32>, tensor<2x2xi32>) {
+ %init = arith.constant dense<[[6, 7], [8, 9]]> : tensor<2x2xi32>
+ %pad_value = arith.constant 0 : i32
+
+ %c1 = arith.constant 1 : index
+
+ %0 = tensor.pad %init low[%c1, %c1] high[%c1, %c1] {
+ ^bb0(%arg1: index, %arg2: index):
+ tensor.yield %pad_value : i32
+ } : tensor<2x2xi32> to tensor<?x?xi32>
+
+ return %0, %init : tensor<?x?xi32>, tensor<2x2xi32>
+}
+
+// -----
+
+module attributes {transform.with_named_sequence} {
+ transform.named_sequence @__transform_main(%root : !transform.any_op {transform.readonly}) {
+ %func_op = transform.structured.match ops{["func.func"]} in %root : (!transform.any_op) -> !transform.op<"func.func">
+ transform.apply_patterns to %func_op {
+ transform.apply_patterns.tensor.rewrite_as_constant aggressive
+ } : !transform.op<"func.func">
+ transform.yield
+ }
+}
+
+// CHECK-LABEL: func @pad_aggressive_fold(
+// CHECK: %[[init:.*]] = arith.constant dense<7> : tensor<2x2xi32>
+// CHECK: %[[cst:.*]] = arith.constant dense<[
+// CHECK-SAME{LITERAL}: [0, 0, 0, 0],
+// CHECK-SAME{LITERAL}: [0, 7, 7, 0],
+// CHECK-SAME{LITERAL}: [0, 7, 7, 0],
+// CHECK-SAME{LITERAL}: [0, 0, 0, 0]
+// CHECK-SAME{LITERAL}: ]> : tensor<4x4xi32>
+// CHECK: %[[cast:.*]] = tensor.cast %[[cst]] : tensor<4x4xi32> to tensor<?x?xi32>
+// CHECK: return %[[cast]]
+func.func @pad_aggressive_fold() -> (tensor<?x?xi32>, tensor<2x2xi32>) {
+ %init = arith.constant dense<7> : tensor<2x2xi32>
+ %pad_value = arith.constant 0 : i32
+
+ %c1 = arith.constant 1 : index
+
+ %0 = tensor.pad %init low[%c1, %c1] high[%c1, %c1] {
+ ^bb0(%arg1: index, %arg2: index):
+ tensor.yield %pad_value : i32
+ } : tensor<2x2xi32> to tensor<?x?xi32>
+
+ return %0, %init : tensor<?x?xi32>, tensor<2x2xi32>
+}
diff --git a/mlir/test/Dialect/Vector/vector-transfer-flatten.mlir b/mlir/test/Dialect/Vector/vector-transfer-flatten.mlir
index 788ae9a..65bf0b9 100644
--- a/mlir/test/Dialect/Vector/vector-transfer-flatten.mlir
+++ b/mlir/test/Dialect/Vector/vector-transfer-flatten.mlir
@@ -471,16 +471,16 @@ func.func @regression_non_contiguous_dim_read(%subview : memref<1x3x3x2xf32, str
}
// CHECK: #[[$MAP:.+]] = affine_map<()[s0] -> (s0 * 2)>
-// CHECK-LABEL: func.func @regression_non_contiguous_dim_read(
-// CHECK: %[[COLLAPSE:.+]] = memref.collapse_shape %{{.*}} {{\[}}[0], [1], [2, 3]] : memref<1x3x3x2xf32, strided<[40, 10, 2, 1], offset: ?>> into memref<1x3x6xf32, strided<[40, 10, 1], offset: ?>>
-// CHECK: %[[APPLY:.*]] = affine.apply #[[$MAP]]()
+// CHECK-LABEL: func.func @regression_non_contiguous_dim_read(
+// CHECK: %[[COLLAPSE:.+]] = memref.collapse_shape %{{.*}} {{\[}}[0], [1], [2, 3]] : memref<1x3x3x2xf32, strided<[40, 10, 2, 1], offset: ?>> into memref<1x3x6xf32, strided<[40, 10, 1], offset: ?>>
+// CHECK: %[[APPLY:.*]] = affine.apply #[[$MAP]]()
// CHECK-128B-LABEL: func @regression_non_contiguous_dim_read(
// CHECK-128B: memref.collapse_shape
// -----
-func.func @unsupported_non_contiguous_dim_write(%value : vector<2x2xf32>,
+func.func @regression_non_contiguous_dim_write(%value : vector<2x2xf32>,
%subview : memref<1x3x3x2xf32, strided<[40, 10, 2, 1], offset: ?>>,
%idx0 : index, %idx1 : index) {
%c0 = arith.constant 0 : index
@@ -488,8 +488,35 @@ func.func @unsupported_non_contiguous_dim_write(%value : vector<2x2xf32>,
return
}
-// CHECK-LABEL: func.func @unsupported_non_contiguous_dim_write(
-// CHECK-NOT: memref.collapse_shape
+// CHECK: #[[$MAP:.+]] = affine_map<()[s0] -> (s0 * 2)>
+// CHECK-LABEL: func.func @regression_non_contiguous_dim_write(
+// CHECK: %[[APPLY:.*]] = affine.apply #[[$MAP]]()
+// CHECK: %[[COLLAPSE:.+]] = memref.collapse_shape %{{.*}} {{\[}}[0], [1], [2, 3]] : memref<1x3x3x2xf32, strided<[40, 10, 2, 1], offset: ?>> into memref<1x3x6xf32, strided<[40, 10, 1], offset: ?>>
-// CHECK-128B-LABEL: func @unsupported_non_contiguous_dim_write(
-// CHECK-128B-NOT: memref.collapse_shape
+// CHECK-128B-LABEL: func @regression_non_contiguous_dim_write(
+// CHECK-128B: memref.collapse_shape
+
+// -----
+
+func.func @negative_out_of_bound_transfer_read(
+ %arg : memref<?x4x3x2xi8, strided<[24, 6, 2, 1], offset: ?>>) -> vector<5x4x3x2xi8> {
+ %c0 = arith.constant 0 : index
+ %cst = arith.constant 0 : i8
+ %v = vector.transfer_read %arg[%c0, %c0, %c0, %c0], %cst {in_bounds = [false, true, true, true]} :
+ memref<?x4x3x2xi8, strided<[24, 6, 2, 1], offset: ?>>, vector<5x4x3x2xi8>
+ return %v : vector<5x4x3x2xi8>
+}
+// CHECK: func.func @negative_out_of_bound_transfer_read
+// CHECK-NOT: memref.collapse_shape
+
+// -----
+
+func.func @negative_out_of_bound_transfer_write(
+ %arg : memref<?x4x3x2xi8, strided<[24, 6, 2, 1], offset: ?>>, %vec : vector<1x1x3x2xi8>) {
+ %c0 = arith.constant 0 : index
+ vector.transfer_write %vec, %arg [%c0, %c0, %c0, %c0] {in_bounds = [false, true, true, true]} :
+ vector<1x1x3x2xi8>, memref<?x4x3x2xi8, strided<[24, 6, 2, 1], offset: ?>>
+ return
+}
+// CHECK: func.func @negative_out_of_bound_transfer_write
+// CHECK-NOT: memref.collapse_shape
diff --git a/mlir/test/IR/array-of-attr.mlir b/mlir/test/IR/array-of-attr.mlir
index 1b6fe55..c2a6075 100644
--- a/mlir/test/IR/array-of-attr.mlir
+++ b/mlir/test/IR/array-of-attr.mlir
@@ -12,3 +12,7 @@ test.array_of_attr_op
// CHECK: test.array_of_attr_op
// CHECK-SAME: a = [], b = [], c = []
test.array_of_attr_op a = [], b = [], c = []
+
+// CHECK: "test.test_array_float"
+// CHECK-SAME: 1.000000e+00 : f32, 1.000000e+00, 0x7FF0000000000000 : f64
+"test.test_array_float"() {test.float_arr = [1.0 : f32, 1.0 : f64, 0x7FF0000000000000 : f64]} : () -> ()
diff --git a/mlir/test/python/ir/blocks.py b/mlir/test/python/ir/blocks.py
index 8b4d946..70ccaee 100644
--- a/mlir/test/python/ir/blocks.py
+++ b/mlir/test/python/ir/blocks.py
@@ -145,3 +145,35 @@ def testBlockHash():
block1 = Block.create_at_start(dummy.operation.regions[0], [f32])
block2 = Block.create_at_start(dummy.operation.regions[0], [f32])
assert hash(block1) != hash(block2)
+
+
+# CHECK-LABEL: TEST: testBlockAddArgs
+@run
+def testBlockAddArgs():
+ with Context() as ctx, Location.unknown(ctx) as loc:
+ ctx.allow_unregistered_dialects = True
+ f32 = F32Type.get()
+ op = Operation.create("test", regions=1, loc=Location.unknown())
+ blocks = op.regions[0].blocks
+ blocks.append()
+ # CHECK: ^bb0:
+ op.print(enable_debug_info=True)
+ blocks[0].add_argument(f32, loc)
+ # CHECK: ^bb0(%{{.+}}: f32 loc(unknown)):
+ op.print(enable_debug_info=True)
+
+
+# CHECK-LABEL: TEST: testBlockEraseArgs
+@run
+def testBlockEraseArgs():
+ with Context() as ctx, Location.unknown(ctx) as loc:
+ ctx.allow_unregistered_dialects = True
+ f32 = F32Type.get()
+ op = Operation.create("test", regions=1, loc=Location.unknown())
+ blocks = op.regions[0].blocks
+ blocks.append(f32)
+ # CHECK: ^bb0(%{{.+}}: f32 loc(unknown)):
+ op.print(enable_debug_info=True)
+ blocks[0].erase_argument(0)
+ # CHECK: ^bb0:
+ op.print(enable_debug_info=True)
diff --git a/offload/DeviceRTL/CMakeLists.txt b/offload/DeviceRTL/CMakeLists.txt
index b3560036..d88430a 100644
--- a/offload/DeviceRTL/CMakeLists.txt
+++ b/offload/DeviceRTL/CMakeLists.txt
@@ -46,7 +46,7 @@ set(all_amdgpu_architectures "gfx700;gfx701;gfx801;gfx803;gfx900;gfx902;gfx906"
"gfx908;gfx90a;gfx90c;gfx940;gfx941;gfx942;gfx1010"
"gfx1030;gfx1031;gfx1032;gfx1033;gfx1034;gfx1035"
"gfx1036;gfx1100;gfx1101;gfx1102;gfx1103;gfx1150"
- "gfx1151")
+ "gfx1151;gfx1152")
set(all_nvptx_architectures "sm_35;sm_37;sm_50;sm_52;sm_53;sm_60;sm_61;sm_62"
"sm_70;sm_72;sm_75;sm_80;sm_86;sm_87;sm_89;sm_90")
set(all_gpu_architectures
diff --git a/offload/include/PluginManager.h b/offload/include/PluginManager.h
index 1d6804d..fce2adc 100644
--- a/offload/include/PluginManager.h
+++ b/offload/include/PluginManager.h
@@ -64,10 +64,6 @@ struct PluginManager {
std::make_unique<DeviceImageTy>(TgtBinDesc, TgtDeviceImage));
}
- /// Initialize as many devices as possible for this plugin. Devices that fail
- /// to initialize are ignored.
- void initDevices(GenericPluginTy &RTL);
-
/// Return the device presented to the user as device \p DeviceNo if it is
/// initialized and ready. Otherwise return an error explaining the problem.
llvm::Expected<DeviceTy &> getDevice(uint32_t DeviceNo);
@@ -117,20 +113,31 @@ struct PluginManager {
return Devices.getExclusiveAccessor();
}
- int getNumUsedPlugins() const { return DeviceOffsets.size(); }
-
// Initialize all plugins.
void initAllPlugins();
/// Iterator range for all plugins (in use or not, but always valid).
auto plugins() { return llvm::make_pointee_range(Plugins); }
+ /// Iterator range for all plugins (in use or not, but always valid).
+ auto plugins() const { return llvm::make_pointee_range(Plugins); }
+
/// Return the user provided requirements.
int64_t getRequirements() const { return Requirements.getRequirements(); }
/// Add \p Flags to the user provided requirements.
void addRequirements(int64_t Flags) { Requirements.addRequirements(Flags); }
+ /// Returns the number of plugins that are active.
+ int getNumActivePlugins() const {
+ int count = 0;
+ for (auto &R : plugins())
+ if (R.is_initialized())
+ ++count;
+
+ return count;
+ }
+
private:
bool RTLsLoaded = false;
llvm::SmallVector<__tgt_bin_desc *> DelayedBinDesc;
@@ -138,11 +145,9 @@ private:
// List of all plugins, in use or not.
llvm::SmallVector<std::unique_ptr<GenericPluginTy>> Plugins;
- // Mapping of plugins to offsets in the device table.
- llvm::DenseMap<const GenericPluginTy *, int32_t> DeviceOffsets;
-
- // Mapping of plugins to the number of used devices.
- llvm::DenseMap<const GenericPluginTy *, int32_t> DeviceUsed;
+ // Mapping of plugins to the OpenMP device identifier.
+ llvm::DenseMap<std::pair<const GenericPluginTy *, int32_t>, int32_t>
+ DeviceIds;
// Set of all device images currently in use.
llvm::DenseSet<const __tgt_device_image *> UsedImages;
diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
index c6dd954..663cfdc 100644
--- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -3163,25 +3163,24 @@ struct AMDGPUPluginTy final : public GenericPluginTy {
uint16_t getMagicElfBits() const override { return ELF::EM_AMDGPU; }
/// Check whether the image is compatible with an AMDGPU device.
- Expected<bool> isELFCompatible(StringRef Image) const override {
+ Expected<bool> isELFCompatible(uint32_t DeviceId,
+ StringRef Image) const override {
// Get the associated architecture and flags from the ELF.
auto ElfOrErr = ELF64LEObjectFile::create(
MemoryBufferRef(Image, /*Identifier=*/""), /*InitContent=*/false);
if (!ElfOrErr)
return ElfOrErr.takeError();
std::optional<StringRef> Processor = ElfOrErr->tryGetCPUName();
+ if (!Processor)
+ return false;
- for (hsa_agent_t Agent : KernelAgents) {
- auto TargeTripleAndFeaturesOrError =
- utils::getTargetTripleAndFeatures(Agent);
- if (!TargeTripleAndFeaturesOrError)
- return TargeTripleAndFeaturesOrError.takeError();
- if (!utils::isImageCompatibleWithEnv(Processor ? *Processor : "",
+ auto TargeTripleAndFeaturesOrError =
+ utils::getTargetTripleAndFeatures(getKernelAgent(DeviceId));
+ if (!TargeTripleAndFeaturesOrError)
+ return TargeTripleAndFeaturesOrError.takeError();
+ return utils::isImageCompatibleWithEnv(Processor ? *Processor : "",
ElfOrErr->getPlatformFlags(),
- *TargeTripleAndFeaturesOrError))
- return false;
- }
- return true;
+ *TargeTripleAndFeaturesOrError);
}
bool isDataExchangable(int32_t SrcDeviceId, int32_t DstDeviceId) override {
@@ -3273,19 +3272,13 @@ Error AMDGPUKernelTy::launchImpl(GenericDeviceTy &GenericDevice,
if (ArgsSize < KernelArgsSize)
return Plugin::error("Mismatch of kernel arguments size");
- // The args size reported by HSA may or may not contain the implicit args.
- // For now, assume that HSA does not consider the implicit arguments when
- // reporting the arguments of a kernel. In the worst case, we can waste
- // 56 bytes per allocation.
- uint32_t AllArgsSize = KernelArgsSize + ImplicitArgsSize;
-
AMDGPUPluginTy &AMDGPUPlugin =
static_cast<AMDGPUPluginTy &>(GenericDevice.Plugin);
AMDHostDeviceTy &HostDevice = AMDGPUPlugin.getHostDevice();
AMDGPUMemoryManagerTy &ArgsMemoryManager = HostDevice.getArgsMemoryManager();
void *AllArgs = nullptr;
- if (auto Err = ArgsMemoryManager.allocate(AllArgsSize, &AllArgs))
+ if (auto Err = ArgsMemoryManager.allocate(ArgsSize, &AllArgs))
return Err;
// Account for user requested dynamic shared memory.
diff --git a/offload/plugins-nextgen/common/include/PluginInterface.h b/offload/plugins-nextgen/common/include/PluginInterface.h
index eda6a4f..88423be 100644
--- a/offload/plugins-nextgen/common/include/PluginInterface.h
+++ b/offload/plugins-nextgen/common/include/PluginInterface.h
@@ -993,11 +993,11 @@ struct GenericPluginTy {
/// Get the number of active devices.
int32_t getNumDevices() const { return NumDevices; }
- /// Get the plugin-specific device identifier offset.
- int32_t getDeviceIdStartIndex() const { return DeviceIdStartIndex; }
-
- /// Set the plugin-specific device identifier offset.
- void setDeviceIdStartIndex(int32_t Offset) { DeviceIdStartIndex = Offset; }
+ /// Get the plugin-specific device identifier.
+ int32_t getUserId(int32_t DeviceId) const {
+ assert(UserDeviceIds.contains(DeviceId) && "No user-id registered");
+ return UserDeviceIds.at(DeviceId);
+ }
/// Get the ELF code to recognize the binary image of this plugin.
virtual uint16_t getMagicElfBits() const = 0;
@@ -1059,7 +1059,8 @@ struct GenericPluginTy {
/// Indicate if an image is compatible with the plugin devices. Notice that
/// this function may be called before actually initializing the devices. So
/// we could not move this function into GenericDeviceTy.
- virtual Expected<bool> isELFCompatible(StringRef Image) const = 0;
+ virtual Expected<bool> isELFCompatible(uint32_t DeviceID,
+ StringRef Image) const = 0;
protected:
/// Indicate whether a device id is valid.
@@ -1070,11 +1071,18 @@ protected:
public:
// TODO: This plugin interface needs to be cleaned up.
- /// Returns true if the plugin has been initialized.
+ /// Returns non-zero if the plugin runtime has been initialized.
int32_t is_initialized() const;
- /// Returns non-zero if the provided \p Image can be executed by the runtime.
- int32_t is_valid_binary(__tgt_device_image *Image, bool Initialized = true);
+ /// Returns non-zero if the \p Image is compatible with the plugin. This
+ /// function does not require the plugin to be initialized before use.
+ int32_t is_plugin_compatible(__tgt_device_image *Image);
+
+ /// Returns non-zero if the \p Image is compatible with the device.
+ int32_t is_device_compatible(int32_t DeviceId, __tgt_device_image *Image);
+
+ /// Returns non-zero if the plugin device has been initialized.
+ int32_t is_device_initialized(int32_t DeviceId) const;
/// Initialize the device inside of the plugin.
int32_t init_device(int32_t DeviceId);
@@ -1180,7 +1188,7 @@ public:
const char **ErrStr);
/// Sets the offset into the devices for use by OMPT.
- int32_t set_device_offset(int32_t DeviceIdOffset);
+ int32_t set_device_identifier(int32_t UserId, int32_t DeviceId);
/// Returns if the plugin can support auotmatic copy.
int32_t use_auto_zero_copy(int32_t DeviceId);
@@ -1200,10 +1208,8 @@ private:
/// Number of devices available for the plugin.
int32_t NumDevices = 0;
- /// Index offset, which when added to a DeviceId, will yield a unique
- /// user-observable device identifier. This is especially important when
- /// DeviceIds of multiple plugins / RTLs need to be distinguishable.
- int32_t DeviceIdStartIndex = 0;
+ /// Map of plugin device identifiers to the user device identifier.
+ llvm::DenseMap<int32_t, int32_t> UserDeviceIds;
/// Array of pointers to the devices. Initially, they are all set to nullptr.
/// Once a device is initialized, the pointer is stored in the position given
diff --git a/offload/plugins-nextgen/common/src/PluginInterface.cpp b/offload/plugins-nextgen/common/src/PluginInterface.cpp
index 913721a..5a53c47 100644
--- a/offload/plugins-nextgen/common/src/PluginInterface.cpp
+++ b/offload/plugins-nextgen/common/src/PluginInterface.cpp
@@ -748,8 +748,7 @@ Error GenericDeviceTy::init(GenericPluginTy &Plugin) {
if (ompt::Initialized) {
bool ExpectedStatus = false;
if (OmptInitialized.compare_exchange_strong(ExpectedStatus, true))
- performOmptCallback(device_initialize, /*device_num=*/DeviceId +
- Plugin.getDeviceIdStartIndex(),
+ performOmptCallback(device_initialize, Plugin.getUserId(DeviceId),
/*type=*/getComputeUnitKind().c_str(),
/*device=*/reinterpret_cast<ompt_device_t *>(this),
/*lookup=*/ompt::lookupCallbackByName,
@@ -847,9 +846,7 @@ Error GenericDeviceTy::deinit(GenericPluginTy &Plugin) {
if (ompt::Initialized) {
bool ExpectedStatus = true;
if (OmptInitialized.compare_exchange_strong(ExpectedStatus, false))
- performOmptCallback(device_finalize,
- /*device_num=*/DeviceId +
- Plugin.getDeviceIdStartIndex());
+ performOmptCallback(device_finalize, Plugin.getUserId(DeviceId));
}
#endif
@@ -908,7 +905,7 @@ GenericDeviceTy::loadBinary(GenericPluginTy &Plugin,
size_t Bytes =
getPtrDiff(InputTgtImage->ImageEnd, InputTgtImage->ImageStart);
performOmptCallback(
- device_load, /*device_num=*/DeviceId + Plugin.getDeviceIdStartIndex(),
+ device_load, Plugin.getUserId(DeviceId),
/*FileName=*/nullptr, /*FileOffset=*/0, /*VmaInFile=*/nullptr,
/*ImgSize=*/Bytes, /*HostAddr=*/InputTgtImage->ImageStart,
/*DeviceAddr=*/nullptr, /* FIXME: ModuleId */ 0);
@@ -1492,11 +1489,14 @@ Error GenericDeviceTy::syncEvent(void *EventPtr) {
bool GenericDeviceTy::useAutoZeroCopy() { return useAutoZeroCopyImpl(); }
Error GenericPluginTy::init() {
+ if (Initialized)
+ return Plugin::success();
+
auto NumDevicesOrErr = initImpl();
if (!NumDevicesOrErr)
return NumDevicesOrErr.takeError();
-
Initialized = true;
+
NumDevices = *NumDevicesOrErr;
if (NumDevices == 0)
return Plugin::success();
@@ -1517,6 +1517,8 @@ Error GenericPluginTy::init() {
}
Error GenericPluginTy::deinit() {
+ assert(Initialized && "Plugin was not initialized!");
+
// Deinitialize all active devices.
for (int32_t DeviceId = 0; DeviceId < NumDevices; ++DeviceId) {
if (Devices[DeviceId]) {
@@ -1537,7 +1539,11 @@ Error GenericPluginTy::deinit() {
delete RecordReplay;
// Perform last deinitializations on the plugin.
- return deinitImpl();
+ if (Error Err = deinitImpl())
+ return Err;
+ Initialized = false;
+
+ return Plugin::success();
}
Error GenericPluginTy::initDevice(int32_t DeviceId) {
@@ -1599,8 +1605,7 @@ Expected<bool> GenericPluginTy::checkBitcodeImage(StringRef Image) const {
int32_t GenericPluginTy::is_initialized() const { return Initialized; }
-int32_t GenericPluginTy::is_valid_binary(__tgt_device_image *Image,
- bool Initialized) {
+int32_t GenericPluginTy::is_plugin_compatible(__tgt_device_image *Image) {
StringRef Buffer(reinterpret_cast<const char *>(Image->ImageStart),
target::getPtrDiff(Image->ImageEnd, Image->ImageStart));
@@ -1618,11 +1623,43 @@ int32_t GenericPluginTy::is_valid_binary(__tgt_device_image *Image,
auto MatchOrErr = checkELFImage(Buffer);
if (Error Err = MatchOrErr.takeError())
return HandleError(std::move(Err));
- if (!Initialized || !*MatchOrErr)
- return *MatchOrErr;
+ return *MatchOrErr;
+ }
+ case file_magic::bitcode: {
+ auto MatchOrErr = checkBitcodeImage(Buffer);
+ if (Error Err = MatchOrErr.takeError())
+ return HandleError(std::move(Err));
+ return *MatchOrErr;
+ }
+ default:
+ return false;
+ }
+}
+
+int32_t GenericPluginTy::is_device_compatible(int32_t DeviceId,
+ __tgt_device_image *Image) {
+ StringRef Buffer(reinterpret_cast<const char *>(Image->ImageStart),
+ target::getPtrDiff(Image->ImageEnd, Image->ImageStart));
+
+ auto HandleError = [&](Error Err) -> bool {
+ [[maybe_unused]] std::string ErrStr = toString(std::move(Err));
+ DP("Failure to check validity of image %p: %s", Image, ErrStr.c_str());
+ return false;
+ };
+ switch (identify_magic(Buffer)) {
+ case file_magic::elf:
+ case file_magic::elf_relocatable:
+ case file_magic::elf_executable:
+ case file_magic::elf_shared_object:
+ case file_magic::elf_core: {
+ auto MatchOrErr = checkELFImage(Buffer);
+ if (Error Err = MatchOrErr.takeError())
+ return HandleError(std::move(Err));
+ if (!*MatchOrErr)
+ return false;
// Perform plugin-dependent checks for the specific architecture if needed.
- auto CompatibleOrErr = isELFCompatible(Buffer);
+ auto CompatibleOrErr = isELFCompatible(DeviceId, Buffer);
if (Error Err = CompatibleOrErr.takeError())
return HandleError(std::move(Err));
return *CompatibleOrErr;
@@ -1638,6 +1675,10 @@ int32_t GenericPluginTy::is_valid_binary(__tgt_device_image *Image,
}
}
+int32_t GenericPluginTy::is_device_initialized(int32_t DeviceId) const {
+ return isValidDeviceId(DeviceId) && Devices[DeviceId] != nullptr;
+}
+
int32_t GenericPluginTy::init_device(int32_t DeviceId) {
auto Err = initDevice(DeviceId);
if (Err) {
@@ -1985,8 +2026,9 @@ int32_t GenericPluginTy::init_device_info(int32_t DeviceId,
return OFFLOAD_SUCCESS;
}
-int32_t GenericPluginTy::set_device_offset(int32_t DeviceIdOffset) {
- setDeviceIdStartIndex(DeviceIdOffset);
+int32_t GenericPluginTy::set_device_identifier(int32_t UserId,
+ int32_t DeviceId) {
+ UserDeviceIds[DeviceId] = UserId;
return OFFLOAD_SUCCESS;
}
diff --git a/offload/plugins-nextgen/cuda/src/rtl.cpp b/offload/plugins-nextgen/cuda/src/rtl.cpp
index b260334..62460c0 100644
--- a/offload/plugins-nextgen/cuda/src/rtl.cpp
+++ b/offload/plugins-nextgen/cuda/src/rtl.cpp
@@ -1388,8 +1388,9 @@ struct CUDAPluginTy final : public GenericPluginTy {
const char *getName() const override { return GETNAME(TARGET_NAME); }
- /// Check whether the image is compatible with the available CUDA devices.
- Expected<bool> isELFCompatible(StringRef Image) const override {
+ /// Check whether the image is compatible with a CUDA device.
+ Expected<bool> isELFCompatible(uint32_t DeviceId,
+ StringRef Image) const override {
auto ElfOrErr =
ELF64LEObjectFile::create(MemoryBufferRef(Image, /*Identifier=*/""),
/*InitContent=*/false);
@@ -1399,33 +1400,29 @@ struct CUDAPluginTy final : public GenericPluginTy {
// Get the numeric value for the image's `sm_` value.
auto SM = ElfOrErr->getPlatformFlags() & ELF::EF_CUDA_SM;
- for (int32_t DevId = 0; DevId < getNumDevices(); ++DevId) {
- CUdevice Device;
- CUresult Res = cuDeviceGet(&Device, DevId);
- if (auto Err = Plugin::check(Res, "Error in cuDeviceGet: %s"))
- return std::move(Err);
-
- int32_t Major, Minor;
- Res = cuDeviceGetAttribute(
- &Major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, Device);
- if (auto Err = Plugin::check(Res, "Error in cuDeviceGetAttribute: %s"))
- return std::move(Err);
-
- Res = cuDeviceGetAttribute(
- &Minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, Device);
- if (auto Err = Plugin::check(Res, "Error in cuDeviceGetAttribute: %s"))
- return std::move(Err);
-
- int32_t ImageMajor = SM / 10;
- int32_t ImageMinor = SM % 10;
-
- // A cubin generated for a certain compute capability is supported to
- // run on any GPU with the same major revision and same or higher minor
- // revision.
- if (Major != ImageMajor || Minor < ImageMinor)
- return false;
- }
- return true;
+ CUdevice Device;
+ CUresult Res = cuDeviceGet(&Device, DeviceId);
+ if (auto Err = Plugin::check(Res, "Error in cuDeviceGet: %s"))
+ return std::move(Err);
+
+ int32_t Major, Minor;
+ Res = cuDeviceGetAttribute(
+ &Major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, Device);
+ if (auto Err = Plugin::check(Res, "Error in cuDeviceGetAttribute: %s"))
+ return std::move(Err);
+
+ Res = cuDeviceGetAttribute(
+ &Minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, Device);
+ if (auto Err = Plugin::check(Res, "Error in cuDeviceGetAttribute: %s"))
+ return std::move(Err);
+
+ int32_t ImageMajor = SM / 10;
+ int32_t ImageMinor = SM % 10;
+
+ // A cubin generated for a certain compute capability is supported to
+ // run on any GPU with the same major revision and same or higher minor
+ // revision.
+ return Major == ImageMajor && Minor >= ImageMinor;
}
};
diff --git a/offload/plugins-nextgen/host/src/rtl.cpp b/offload/plugins-nextgen/host/src/rtl.cpp
index ef84cba..aa59ea6 100644
--- a/offload/plugins-nextgen/host/src/rtl.cpp
+++ b/offload/plugins-nextgen/host/src/rtl.cpp
@@ -418,7 +418,9 @@ struct GenELF64PluginTy final : public GenericPluginTy {
}
/// All images (ELF-compatible) should be compatible with this plugin.
- Expected<bool> isELFCompatible(StringRef) const override { return true; }
+ Expected<bool> isELFCompatible(uint32_t, StringRef) const override {
+ return true;
+ }
Triple::ArchType getTripleArch() const override {
#if defined(__x86_64__)
diff --git a/offload/src/PluginManager.cpp b/offload/src/PluginManager.cpp
index 13f08b1..5e8f917 100644
--- a/offload/src/PluginManager.cpp
+++ b/offload/src/PluginManager.cpp
@@ -47,6 +47,9 @@ void PluginManager::deinit() {
DP("Unloading RTLs...\n");
for (auto &Plugin : Plugins) {
+ if (!Plugin->is_initialized())
+ continue;
+
if (auto Err = Plugin->deinit()) {
[[maybe_unused]] std::string InfoMsg = toString(std::move(Err));
DP("Failed to deinit plugin: %s\n", InfoMsg.c_str());
@@ -57,90 +60,15 @@ void PluginManager::deinit() {
DP("RTLs unloaded!\n");
}
-void PluginManager::initDevices(GenericPluginTy &RTL) {
- // If this RTL has already been initialized.
- if (PM->DeviceOffsets.contains(&RTL))
- return;
- TIMESCOPE();
-
- // If this RTL is not already in use, initialize it.
- assert(RTL.number_of_devices() > 0 && "Tried to initialize useless plugin!");
-
- // Initialize the device information for the RTL we are about to use.
- auto ExclusiveDevicesAccessor = getExclusiveDevicesAccessor();
-
- // Initialize the index of this RTL and save it in the used RTLs.
- int32_t DeviceOffset = ExclusiveDevicesAccessor->size();
-
- // Set the device identifier offset in the plugin.
- RTL.set_device_offset(DeviceOffset);
-
- int32_t NumberOfUserDevices = 0;
- int32_t NumPD = RTL.number_of_devices();
- ExclusiveDevicesAccessor->reserve(DeviceOffset + NumPD);
- // Auto zero-copy is a per-device property. We need to ensure
- // that all devices are suggesting to use it.
- bool UseAutoZeroCopy = !(NumPD == 0);
- for (int32_t PDevI = 0, UserDevId = DeviceOffset; PDevI < NumPD; PDevI++) {
- auto Device = std::make_unique<DeviceTy>(&RTL, UserDevId, PDevI);
- if (auto Err = Device->init()) {
- DP("Skip plugin known device %d: %s\n", PDevI,
- toString(std::move(Err)).c_str());
- continue;
- }
- UseAutoZeroCopy = UseAutoZeroCopy && Device->useAutoZeroCopy();
-
- ExclusiveDevicesAccessor->push_back(std::move(Device));
- ++NumberOfUserDevices;
- ++UserDevId;
- }
-
- // Auto Zero-Copy can only be currently triggered when the system is an
- // homogeneous APU architecture without attached discrete GPUs.
- // If all devices suggest to use it, change requirment flags to trigger
- // zero-copy behavior when mapping memory.
- if (UseAutoZeroCopy)
- addRequirements(OMPX_REQ_AUTO_ZERO_COPY);
-
- DeviceOffsets[&RTL] = DeviceOffset;
- DeviceUsed[&RTL] = NumberOfUserDevices;
- DP("Plugin has index %d, exposes %d out of %d devices!\n", DeviceOffset,
- NumberOfUserDevices, RTL.number_of_devices());
-}
-
void PluginManager::initAllPlugins() {
- for (auto &R : Plugins)
- initDevices(*R);
-}
-
-static void registerImageIntoTranslationTable(TranslationTable &TT,
- int32_t DeviceOffset,
- int32_t NumberOfUserDevices,
- __tgt_device_image *Image) {
-
- // same size, as when we increase one, we also increase the other.
- assert(TT.TargetsTable.size() == TT.TargetsImages.size() &&
- "We should have as many images as we have tables!");
-
- // Resize the Targets Table and Images to accommodate the new targets if
- // required
- unsigned TargetsTableMinimumSize = DeviceOffset + NumberOfUserDevices;
-
- if (TT.TargetsTable.size() < TargetsTableMinimumSize) {
- TT.DeviceTables.resize(TargetsTableMinimumSize, {});
- TT.TargetsImages.resize(TargetsTableMinimumSize, 0);
- TT.TargetsEntries.resize(TargetsTableMinimumSize, {});
- TT.TargetsTable.resize(TargetsTableMinimumSize, 0);
- }
-
- // Register the image in all devices for this target type.
- for (int32_t I = 0; I < NumberOfUserDevices; ++I) {
- // If we are changing the image we are also invalidating the target table.
- if (TT.TargetsImages[DeviceOffset + I] != Image) {
- TT.TargetsImages[DeviceOffset + I] = Image;
- TT.TargetsTable[DeviceOffset + I] =
- 0; // lazy initialization of target table.
+ for (auto &R : plugins()) {
+ if (auto Err = R.init()) {
+ [[maybe_unused]] std::string InfoMsg = toString(std::move(Err));
+ DP("Failed to init plugin: %s\n", InfoMsg.c_str());
+ continue;
}
+ DP("Registered plugin %s with %d visible device(s)\n", R.getName(),
+ R.number_of_devices());
}
}
@@ -153,27 +81,6 @@ void PluginManager::registerLib(__tgt_bin_desc *Desc) {
if (Entry.flags == OMP_REGISTER_REQUIRES)
PM->addRequirements(Entry.data);
- // Initialize all the plugins that have associated images.
- for (auto &Plugin : Plugins) {
- // Extract the exectuable image and extra information if availible.
- for (int32_t i = 0; i < Desc->NumDeviceImages; ++i) {
- if (Plugin->is_initialized())
- continue;
-
- if (!Plugin->is_valid_binary(&Desc->DeviceImages[i],
- /*Initialized=*/false))
- continue;
-
- if (auto Err = Plugin->init()) {
- [[maybe_unused]] std::string InfoMsg = toString(std::move(Err));
- DP("Failed to init plugin: %s\n", InfoMsg.c_str());
- } else {
- DP("Registered plugin %s with %d visible device(s)\n",
- Plugin->getName(), Plugin->number_of_devices());
- }
- }
- }
-
// Extract the exectuable image and extra information if availible.
for (int32_t i = 0; i < Desc->NumDeviceImages; ++i)
PM->addDeviceImage(*Desc, Desc->DeviceImages[i]);
@@ -188,54 +95,110 @@ void PluginManager::registerLib(__tgt_bin_desc *Desc) {
// Scan the RTLs that have associated images until we find one that supports
// the current image.
for (auto &R : PM->plugins()) {
- if (!R.number_of_devices())
+ if (!R.is_plugin_compatible(Img))
continue;
- if (!R.is_valid_binary(Img, /*Initialized=*/true)) {
- DP("Image " DPxMOD " is NOT compatible with RTL %s!\n",
- DPxPTR(Img->ImageStart), R.getName());
- continue;
+ if (!R.is_initialized()) {
+ if (auto Err = R.init()) {
+ [[maybe_unused]] std::string InfoMsg = toString(std::move(Err));
+ DP("Failed to init plugin: %s\n", InfoMsg.c_str());
+ continue;
+ }
+ DP("Registered plugin %s with %d visible device(s)\n", R.getName(),
+ R.number_of_devices());
}
- DP("Image " DPxMOD " is compatible with RTL %s!\n",
- DPxPTR(Img->ImageStart), R.getName());
-
- PM->initDevices(R);
+ if (!R.number_of_devices()) {
+ DP("Skipping plugin %s with no visible devices\n", R.getName());
+ continue;
+ }
- // Initialize (if necessary) translation table for this library.
- PM->TrlTblMtx.lock();
- if (!PM->HostEntriesBeginToTransTable.count(Desc->HostEntriesBegin)) {
- PM->HostEntriesBeginRegistrationOrder.push_back(Desc->HostEntriesBegin);
- TranslationTable &TransTable =
+ for (int32_t DeviceId = 0; DeviceId < R.number_of_devices(); ++DeviceId) {
+ if (!R.is_device_compatible(DeviceId, Img))
+ continue;
+
+ DP("Image " DPxMOD " is compatible with RTL %s device %d!\n",
+ DPxPTR(Img->ImageStart), R.getName(), DeviceId);
+
+ if (!R.is_device_initialized(DeviceId)) {
+ // Initialize the device information for the RTL we are about to use.
+ auto ExclusiveDevicesAccessor = getExclusiveDevicesAccessor();
+
+ int32_t UserId = ExclusiveDevicesAccessor->size();
+
+ // Set the device identifier offset in the plugin.
+#ifdef OMPT_SUPPORT
+ R.set_device_identifier(UserId, DeviceId);
+#endif
+
+ auto Device = std::make_unique<DeviceTy>(&R, UserId, DeviceId);
+ if (auto Err = Device->init()) {
+ [[maybe_unused]] std::string InfoMsg = toString(std::move(Err));
+ DP("Failed to init device %d: %s\n", DeviceId, InfoMsg.c_str());
+ continue;
+ }
+
+ ExclusiveDevicesAccessor->push_back(std::move(Device));
+
+ // We need to map between the plugin's device identifier and the one
+ // that OpenMP will use.
+ PM->DeviceIds[std::make_pair(&R, DeviceId)] = UserId;
+ }
+
+ // Initialize (if necessary) translation table for this library.
+ PM->TrlTblMtx.lock();
+ if (!PM->HostEntriesBeginToTransTable.count(Desc->HostEntriesBegin)) {
+ PM->HostEntriesBeginRegistrationOrder.push_back(
+ Desc->HostEntriesBegin);
+ TranslationTable &TT =
+ (PM->HostEntriesBeginToTransTable)[Desc->HostEntriesBegin];
+ TT.HostTable.EntriesBegin = Desc->HostEntriesBegin;
+ TT.HostTable.EntriesEnd = Desc->HostEntriesEnd;
+ }
+
+ // Retrieve translation table for this library.
+ TranslationTable &TT =
(PM->HostEntriesBeginToTransTable)[Desc->HostEntriesBegin];
- TransTable.HostTable.EntriesBegin = Desc->HostEntriesBegin;
- TransTable.HostTable.EntriesEnd = Desc->HostEntriesEnd;
- }
- // Retrieve translation table for this library.
- TranslationTable &TransTable =
- (PM->HostEntriesBeginToTransTable)[Desc->HostEntriesBegin];
+ DP("Registering image " DPxMOD " with RTL %s!\n",
+ DPxPTR(Img->ImageStart), R.getName());
- DP("Registering image " DPxMOD " with RTL %s!\n", DPxPTR(Img->ImageStart),
- R.getName());
+ auto UserId = PM->DeviceIds[std::make_pair(&R, DeviceId)];
+ if (TT.TargetsTable.size() < static_cast<size_t>(UserId + 1)) {
+ TT.DeviceTables.resize(UserId + 1, {});
+ TT.TargetsImages.resize(UserId + 1, nullptr);
+ TT.TargetsEntries.resize(UserId + 1, {});
+ TT.TargetsTable.resize(UserId + 1, nullptr);
+ }
- registerImageIntoTranslationTable(TransTable, PM->DeviceOffsets[&R],
- PM->DeviceUsed[&R], Img);
- PM->UsedImages.insert(Img);
+ // Register the image for this target type and invalidate the table.
+ TT.TargetsImages[UserId] = Img;
+ TT.TargetsTable[UserId] = nullptr;
- PM->TrlTblMtx.unlock();
- FoundRTL = &R;
+ PM->UsedImages.insert(Img);
+ FoundRTL = &R;
- // if an RTL was found we are done - proceed to register the next image
- break;
+ PM->TrlTblMtx.unlock();
+ }
}
-
- if (!FoundRTL) {
+ if (!FoundRTL)
DP("No RTL found for image " DPxMOD "!\n", DPxPTR(Img->ImageStart));
- }
}
PM->RTLsMtx.unlock();
+ bool UseAutoZeroCopy = Plugins.size() > 0;
+
+ auto ExclusiveDevicesAccessor = getExclusiveDevicesAccessor();
+ for (const auto &Device : *ExclusiveDevicesAccessor)
+ UseAutoZeroCopy &= Device->useAutoZeroCopy();
+
+ // Auto Zero-Copy can only be currently triggered when the system is an
+ // homogeneous APU architecture without attached discrete GPUs.
+ // If all devices suggest to use it, change requirment flags to trigger
+ // zero-copy behavior when mapping memory.
+ if (UseAutoZeroCopy)
+ addRequirements(OMPX_REQ_AUTO_ZERO_COPY);
+
DP("Done registering entries!\n");
}
@@ -257,7 +220,7 @@ void PluginManager::unregisterLib(__tgt_bin_desc *Desc) {
// Scan the RTLs that have associated images until we find one that supports
// the current image. We only need to scan RTLs that are already being used.
for (auto &R : PM->plugins()) {
- if (!DeviceOffsets.contains(&R))
+ if (R.is_initialized())
continue;
// Ensure that we do not use any unused images associated with this RTL.
diff --git a/offload/src/omptarget.cpp b/offload/src/omptarget.cpp
index 91e1213..9bca852 100644
--- a/offload/src/omptarget.cpp
+++ b/offload/src/omptarget.cpp
@@ -315,7 +315,7 @@ void handleTargetOutcome(bool Success, ident_t *Loc) {
FAILURE_MESSAGE("Consult https://openmp.llvm.org/design/Runtimes.html "
"for debugging options.\n");
- if (!PM->getNumUsedPlugins()) {
+ if (!PM->getNumActivePlugins()) {
FAILURE_MESSAGE(
"No images found compatible with the installed hardware. ");
diff --git a/offload/test/offloading/ompx_bare_shfl_down_sync.cpp b/offload/test/offloading/ompx_bare_shfl_down_sync.cpp
index d2569a5..c924689 100644
--- a/offload/test/offloading/ompx_bare_shfl_down_sync.cpp
+++ b/offload/test/offloading/ompx_bare_shfl_down_sync.cpp
@@ -23,7 +23,7 @@ bool equal(T LHS, T RHS) {
template <typename T,
std::enable_if_t<std::is_floating_point<T>::value, bool> = true>
bool equal(T LHS, T RHS) {
- return std::abs(LHS - RHS) < std::numeric_limits<T>::epsilon();
+ return __builtin_fabs(LHS - RHS) < std::numeric_limits<T>::epsilon();
}
template <typename T> void test() {
diff --git a/openmp/CMakeLists.txt b/openmp/CMakeLists.txt
index c228a39..e565354 100644
--- a/openmp/CMakeLists.txt
+++ b/openmp/CMakeLists.txt
@@ -137,8 +137,10 @@ if (OPENMP_ENABLE_OMPT_TOOLS)
endif()
# Propagate OMPT support to offload
-set(LIBOMP_HAVE_OMPT_SUPPORT ${LIBOMP_HAVE_OMPT_SUPPORT} PARENT_SCOPE)
-set(LIBOMP_OMP_TOOLS_INCLUDE_DIR ${LIBOMP_OMP_TOOLS_INCLUDE_DIR} PARENT_SCOPE)
+if(NOT ${OPENMP_STANDALONE_BUILD})
+ set(LIBOMP_HAVE_OMPT_SUPPORT ${LIBOMP_HAVE_OMPT_SUPPORT} PARENT_SCOPE)
+ set(LIBOMP_OMP_TOOLS_INCLUDE_DIR ${LIBOMP_OMP_TOOLS_INCLUDE_DIR} PARENT_SCOPE)
+endif()
option(OPENMP_MSVC_NAME_SCHEME "Build dll with MSVC naming scheme." OFF)
diff --git a/third-party/unittest/googletest/include/gtest/internal/gtest-port.h b/third-party/unittest/googletest/include/gtest/internal/gtest-port.h
index a17349e..02e1eb0 100644
--- a/third-party/unittest/googletest/include/gtest/internal/gtest-port.h
+++ b/third-party/unittest/googletest/include/gtest/internal/gtest-port.h
@@ -652,7 +652,7 @@ typedef struct _RTL_CRITICAL_SECTION GTEST_CRITICAL_SECTION;
// Determines whether to support death tests.
// pops up a dialog window that cannot be suppressed programmatically.
#if (defined(GTEST_OS_LINUX) || defined(GTEST_OS_CYGWIN) || \
- defined(GTEST_OS_SOLARIS) || \
+ defined(GTEST_OS_SOLARIS) || defined(GTEST_OS_ZOS) || \
(defined(GTEST_OS_MAC) && !defined(GTEST_OS_IOS)) || \
(defined(GTEST_OS_WINDOWS_DESKTOP) && _MSC_VER) || \
defined(GTEST_OS_WINDOWS_MINGW) || defined(GTEST_OS_AIX) || \
diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
index f3809bd..96cc895 100644
--- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
@@ -1111,7 +1111,7 @@ libc_support_library(
],
defines = [
"LIBC_COPT_TIMEOUT_ENSURE_MONOTONICITY",
- "LIBC_COPT_RAW_MUTEX_DEFAULT_SPIN_COUNT"
+ "LIBC_COPT_RAW_MUTEX_DEFAULT_SPIN_COUNT",
],
target_compatible_with = select({
"@platforms//os:linux": [],
@@ -1119,9 +1119,9 @@ libc_support_library(
}),
deps = [
":__support_cpp_optional",
- ":__support_time_linux",
":__support_threads_linux_futex_utils",
":__support_threads_sleep",
+ ":__support_time_linux",
":types_pid_t",
],
)
@@ -3580,6 +3580,7 @@ libc_function(
}),
weak = True,
deps = [
+ ":__support_macros_sanitizer",
":__support_osutil_syscall",
":errno",
":hdr_signal_macros",
@@ -3599,6 +3600,7 @@ libc_function(
}),
weak = True,
deps = [
+ ":__support_macros_sanitizer",
":__support_osutil_syscall",
":errno",
":hdr_signal_macros",
@@ -3620,6 +3622,7 @@ libc_function(
# }),
# weak = True,
# deps = [
+# ":__support_macros_sanitizer",
# ":__support_osutil_syscall",
# ":errno",
# ":hdr_signal_macros",
diff --git a/utils/bazel/llvm-project-overlay/libc/test/src/__support/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/test/src/__support/BUILD.bazel
index 3980ef6..c8001fe 100644
--- a/utils/bazel/llvm-project-overlay/libc/test/src/__support/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/libc/test/src/__support/BUILD.bazel
@@ -99,6 +99,7 @@ libc_test(
name = "fixedvector_test",
srcs = ["fixedvector_test.cpp"],
deps = [
+ "//libc:__support_cpp_array",
"//libc:__support_fixedvector",
],
)
diff --git a/utils/bazel/llvm-project-overlay/libc/test/src/math/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/test/src/math/BUILD.bazel
index 4f72a0a..fac692a 100644
--- a/utils/bazel/llvm-project-overlay/libc/test/src/math/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/libc/test/src/math/BUILD.bazel
@@ -529,7 +529,9 @@ libc_support_library(
"//libc:__support_cpp_bit",
"//libc:__support_cpp_type_traits",
"//libc:__support_fputil_basic_operations",
+ "//libc:__support_fputil_fenv_impl",
"//libc:__support_fputil_fp_bits",
+ "//libc:hdr_fenv_macros",
"//libc:hdr_math_macros",
"//libc/test/UnitTest:LibcUnitTest",
"//libc/test/UnitTest:fp_test_helpers",
diff --git a/utils/bazel/llvm-project-overlay/libc/test/src/math/libc_math_test_rules.bzl b/utils/bazel/llvm-project-overlay/libc/test/src/math/libc_math_test_rules.bzl
index 11d2e5a..9ec3a5e 100644
--- a/utils/bazel/llvm-project-overlay/libc/test/src/math/libc_math_test_rules.bzl
+++ b/utils/bazel/llvm-project-overlay/libc/test/src/math/libc_math_test_rules.bzl
@@ -25,6 +25,7 @@ def math_test(name, hdrs = [], deps = [], **kwargs):
srcs = [test_name + ".cpp"] + hdrs,
libc_function_deps = ["//libc:func_name".replace("func_name", name)],
deps = [
+ "//libc/test/UnitTest:fp_test_helpers",
"//libc:__support_cpp_algorithm",
"//libc:__support_fputil_basic_operations",
"//libc:__support_fputil_fenv_impl",
@@ -32,9 +33,9 @@ def math_test(name, hdrs = [], deps = [], **kwargs):
"//libc:__support_fputil_manipulation_functions",
"//libc:__support_fputil_nearest_integer_operations",
"//libc:__support_fputil_normal_float",
+ "//libc:__support_macros_properties_architectures",
"//libc:__support_math_extras",
"//libc:__support_uint128",
- "//libc/test/UnitTest:fp_test_helpers",
"//libc:hdr_math_macros",
] + deps,
**kwargs
diff --git a/utils/bazel/llvm-project-overlay/libc/test/src/math/smoke/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/test/src/math/smoke/BUILD.bazel
index 7d4b9978..2ad2209 100644
--- a/utils/bazel/llvm-project-overlay/libc/test/src/math/smoke/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/libc/test/src/math/smoke/BUILD.bazel
@@ -117,6 +117,7 @@ math_test(
name = "llrintf128",
hdrs = ["RoundToIntegerTest.h"],
)
+
math_test(
name = "lroundf128",
hdrs = ["RoundToIntegerTest.h"],
@@ -135,7 +136,9 @@ libc_support_library(
"//libc:__support_cpp_bit",
"//libc:__support_cpp_type_traits",
"//libc:__support_fputil_basic_operations",
+ "//libc:__support_fputil_fenv_impl",
"//libc:__support_fputil_fp_bits",
+ "//libc:hdr_fenv_macros",
"//libc:hdr_math_macros",
"//libc/test/UnitTest:LibcUnitTest",
"//libc/test/UnitTest:fp_test_helpers",
diff --git a/utils/bazel/llvm-project-overlay/libc/utils/MPFRWrapper/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/utils/MPFRWrapper/BUILD.bazel
index 53a8c9b..6dd1fc4 100644
--- a/utils/bazel/llvm-project-overlay/libc/utils/MPFRWrapper/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/libc/utils/MPFRWrapper/BUILD.bazel
@@ -46,6 +46,7 @@ libc_support_library(
"//libc:__support_cpp_type_traits",
"//libc:__support_fputil_fp_bits",
"//libc:__support_fputil_fpbits_str",
+ "//libc:__support_macros_properties_types",
"//libc:hdr_math_macros",
"//libc/test/UnitTest:LibcUnitTest",
"//libc/test/UnitTest:fp_test_helpers",
diff --git a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel
index a67f205..aebb05d 100644
--- a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel
@@ -164,6 +164,10 @@ td_library(
includes = ["include"],
)
+llvm_config_target_defines = [
+ "LLVM_HAS_{}_TARGET=1".format(t) for t in llvm_targets
+]
+
cc_library(
name = "config",
hdrs = [
@@ -171,7 +175,7 @@ cc_library(
"include/llvm/Config/llvm-config.h",
],
copts = llvm_copts,
- defines = llvm_config_defines,
+ defines = llvm_config_defines + llvm_config_target_defines,
includes = ["include"],
textual_hdrs = [
"include/llvm/Config/AsmParsers.def",
@@ -287,6 +291,7 @@ cc_library(
linkopts = select({
"@platforms//os:windows": [
"ws2_32.lib",
+ "ntdll.lib",
],
"@platforms//os:freebsd": [
"-pthread",
@@ -1754,6 +1759,7 @@ cc_library(
":TransformUtils",
":Vectorize",
":config",
+ ":ir_headers",
],
)