aboutsummaryrefslogtreecommitdiff
path: root/llvm
diff options
context:
space:
mode:
Diffstat (limited to 'llvm')
-rw-r--r--llvm/docs/NVPTXUsage.rst444
-rw-r--r--llvm/include/llvm/IR/IntrinsicsNVVM.td69
-rw-r--r--llvm/include/llvm/Support/SpecialCaseList.h4
-rw-r--r--llvm/include/llvm/TargetParser/ARMTargetParser.def3
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp28
-rw-r--r--llvm/lib/Support/SpecialCaseList.cpp32
-rw-r--r--llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp1
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPU.td10
-rw-r--r--llvm/lib/Target/AMDGPU/GCNSubtarget.h5
-rw-r--r--llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp3
-rw-r--r--llvm/lib/Target/ARM/ARMProcessors.td11
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp43
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXIntrinsics.td155
-rw-r--r--llvm/lib/Target/PowerPC/P10InstrResources.td13
-rw-r--r--llvm/lib/Target/PowerPC/P9InstrResources.td2
-rw-r--r--llvm/lib/Target/PowerPC/PPC.td1
-rw-r--r--llvm/lib/Target/PowerPC/PPCBack2BackFusion.def8
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrFormats.td20
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrInfo.td23
-rw-r--r--llvm/lib/Target/X86/X86.h15
-rw-r--r--llvm/lib/Target/X86/X86CodeGenPassBuilder.cpp1
-rw-r--r--llvm/lib/Target/X86/X86LowerAMXType.cpp96
-rw-r--r--llvm/lib/Target/X86/X86PassRegistry.def7
-rw-r--r--llvm/lib/Target/X86/X86TargetMachine.cpp2
-rw-r--r--llvm/lib/TargetParser/Host.cpp1
-rw-r--r--llvm/lib/Transforms/InstCombine/InstructionCombining.cpp20
-rw-r--r--llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp6
-rw-r--r--llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp4
-rw-r--r--llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp2
-rw-r--r--llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp4
-rw-r--r--llvm/lib/Transforms/Instrumentation/MemProfInstrumentation.cpp2
-rw-r--r--llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp4
-rw-r--r--llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp2
-rw-r--r--llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp21
-rw-r--r--llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp4
-rw-r--r--llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp2
-rw-r--r--llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp2
-rw-r--r--llvm/lib/Transforms/Utils/SimplifyCFG.cpp27
-rw-r--r--llvm/test/CodeGen/AArch64/arm64-vhadd.ll82
-rw-r--r--llvm/test/CodeGen/AMDGPU/wait-before-stores-with-scope_sys.ll50
-rw-r--r--llvm/test/CodeGen/LoongArch/lasx/shufflevector-reverse.ll13
-rw-r--r--llvm/test/CodeGen/LoongArch/lsx/shufflevector-reverse.ll12
-rw-r--r--llvm/test/CodeGen/NVPTX/mbarrier_arr.ll165
-rw-r--r--llvm/test/CodeGen/NVPTX/mbarrier_arr_relaxed.ll165
-rw-r--r--llvm/test/CodeGen/NVPTX/mbarrier_tx.ll87
-rw-r--r--llvm/test/CodeGen/NVPTX/mbarrier_wait_sm80_ptx70.ll35
-rw-r--r--llvm/test/CodeGen/NVPTX/mbarrier_wait_sm80_ptx71.ll36
-rw-r--r--llvm/test/CodeGen/NVPTX/mbarrier_wait_sm90_ptx78.ll83
-rw-r--r--llvm/test/CodeGen/NVPTX/mbarrier_wait_sm90_ptx80.ll123
-rw-r--r--llvm/test/CodeGen/NVPTX/mbarrier_wait_sm90_ptx86.ll148
-rw-r--r--llvm/test/CodeGen/PowerPC/p10-spill-crun.ll2
-rw-r--r--llvm/test/CodeGen/PowerPC/vector-reduce-add.ll22
-rw-r--r--llvm/test/CodeGen/X86/AMX/amx-combine-undef.ll3
-rw-r--r--llvm/test/CodeGen/X86/AMX/amx-combine.ll3
-rw-r--r--llvm/test/CodeGen/X86/AMX/amx-configO2toO0-lower.ll3
-rw-r--r--llvm/test/CodeGen/X86/AMX/amx-type.ll3
-rw-r--r--llvm/test/CodeGen/X86/AMX/lat-combine-amx-bitcast.ll3
-rw-r--r--llvm/test/CodeGen/X86/AMX/lat-transform-amx-bitcast.ll3
-rw-r--r--llvm/test/CodeGen/X86/amx_tile_pair_lower_type_O0.ll3
-rw-r--r--llvm/test/CodeGen/X86/amx_tile_pair_lower_type_O2.ll3
-rw-r--r--llvm/test/CodeGen/X86/trunc-srl-load.ll1672
-rw-r--r--llvm/test/DebugInfo/ARM/lowerbdgdeclare_vla.ll2
-rw-r--r--llvm/test/DebugInfo/ARM/multiple-constant-uses-drops-dbgloc.ll2
-rw-r--r--llvm/test/DebugInfo/BPF/extern-void.ll2
-rw-r--r--llvm/test/DebugInfo/COFF/array-odr-violation.ll2
-rw-r--r--llvm/test/DebugInfo/COFF/asan-module-ctor.ll2
-rw-r--r--llvm/test/DebugInfo/COFF/asm.ll4
-rw-r--r--llvm/test/DebugInfo/COFF/class-options-common.ll2
-rw-r--r--llvm/test/DebugInfo/COFF/comdat.ll10
-rw-r--r--llvm/test/DebugInfo/COFF/cpp-mangling.ll2
-rw-r--r--llvm/test/DebugInfo/COFF/defer-complete-type.ll2
-rw-r--r--llvm/test/DebugInfo/COFF/enum-co.ll2
-rw-r--r--llvm/test/DebugInfo/COFF/fpo-argsize.ll2
-rw-r--r--llvm/test/DebugInfo/COFF/fpo-csrs.ll4
-rw-r--r--llvm/test/DebugInfo/COFF/fpo-funclet.ll4
-rw-r--r--llvm/test/DebugInfo/COFF/fpo-realign-alloca.ll4
-rw-r--r--llvm/test/DebugInfo/COFF/fpo-realign-vframe.ll4
-rw-r--r--llvm/test/DebugInfo/COFF/fpo-shrink-wrap.ll4
-rw-r--r--llvm/test/DebugInfo/COFF/fpo-stack-protect.ll4
-rw-r--r--llvm/test/DebugInfo/COFF/frameproc-flags.ll14
-rw-r--r--llvm/test/DebugInfo/COFF/function-options.ll4
-rw-r--r--llvm/test/DebugInfo/COFF/global-constants.ll4
-rw-r--r--llvm/test/DebugInfo/COFF/global_visibility.ll4
-rw-r--r--llvm/test/DebugInfo/COFF/globals.ll2
-rw-r--r--llvm/test/DebugInfo/COFF/inheritance.ll4
-rw-r--r--llvm/test/DebugInfo/COFF/inlining-files.ll2
-rw-r--r--llvm/test/DebugInfo/COFF/inlining-header.ll2
-rw-r--r--llvm/test/DebugInfo/COFF/inlining-levels.ll2
-rw-r--r--llvm/test/DebugInfo/COFF/inlining-padding.ll2
-rw-r--r--llvm/test/DebugInfo/COFF/inlining.ll2
-rw-r--r--llvm/test/DebugInfo/COFF/lambda.ll4
-rw-r--r--llvm/test/DebugInfo/COFF/lexicalblock.ll2
-rw-r--r--llvm/test/DebugInfo/COFF/lines-difile.ll6
-rw-r--r--llvm/test/DebugInfo/COFF/local-constant.ll4
-rw-r--r--llvm/test/DebugInfo/COFF/local-variable-gap.ll6
-rw-r--r--llvm/test/DebugInfo/COFF/local-variables.ll4
-rw-r--r--llvm/test/DebugInfo/COFF/long-name.ll2
-rw-r--r--llvm/test/DebugInfo/COFF/multifile.ll4
-rw-r--r--llvm/test/DebugInfo/COFF/multifunction.ll4
-rw-r--r--llvm/test/DebugInfo/COFF/nrvo.ll4
-rw-r--r--llvm/test/DebugInfo/COFF/parameter-order.ll2
-rw-r--r--llvm/test/DebugInfo/COFF/parent-type-scopes.ll2
-rw-r--r--llvm/test/DebugInfo/COFF/pieces.ll8
-rw-r--r--llvm/test/DebugInfo/COFF/purge-typedef-udts.ll2
-rw-r--r--llvm/test/DebugInfo/COFF/register-variables.ll4
-rw-r--r--llvm/test/DebugInfo/COFF/retained-types.ll2
-rw-r--r--llvm/test/DebugInfo/COFF/scopes.ll2
-rw-r--r--llvm/test/DebugInfo/COFF/simple.ll4
-rw-r--r--llvm/test/DebugInfo/COFF/static-methods.ll4
-rw-r--r--llvm/test/DebugInfo/COFF/tail-call-without-lexical-scopes.ll4
-rw-r--r--llvm/test/DebugInfo/COFF/thunk.ll10
-rw-r--r--llvm/test/DebugInfo/COFF/type-quals.ll2
-rw-r--r--llvm/test/DebugInfo/COFF/types-array.ll4
-rw-r--r--llvm/test/DebugInfo/COFF/types-basic.ll6
-rw-r--r--llvm/test/DebugInfo/COFF/types-calling-conv.ll4
-rw-r--r--llvm/test/DebugInfo/COFF/types-cvarargs.ll4
-rw-r--r--llvm/test/DebugInfo/COFF/types-data-members.ll4
-rw-r--r--llvm/test/DebugInfo/COFF/types-method-ref-qualifiers.ll2
-rw-r--r--llvm/test/DebugInfo/COFF/types-recursive-struct.ll2
-rw-r--r--llvm/test/DebugInfo/COFF/types-recursive-unnamed.ll2
-rw-r--r--llvm/test/DebugInfo/COFF/udts.ll2
-rw-r--r--llvm/test/DebugInfo/COFF/unnamed.ll2
-rw-r--r--llvm/test/DebugInfo/COFF/vframe-csr.ll4
-rw-r--r--llvm/test/DebugInfo/COFF/vframe-fpo.ll4
-rw-r--r--llvm/test/DebugInfo/COFF/vftables.ll8
-rw-r--r--llvm/test/DebugInfo/COFF/virtual-method-kinds.ll8
-rw-r--r--llvm/test/DebugInfo/COFF/virtual-methods.ll8
-rw-r--r--llvm/test/DebugInfo/COFF/vtable-optzn-array.ll4
-rw-r--r--llvm/test/DebugInfo/Generic/PR20038.ll4
-rw-r--r--llvm/test/DebugInfo/Generic/block-asan.ll4
-rw-r--r--llvm/test/DebugInfo/Generic/constant-pointers.ll2
-rw-r--r--llvm/test/DebugInfo/Generic/cross-cu-inlining.ll4
-rw-r--r--llvm/test/DebugInfo/Generic/cross-cu-linkonce.ll2
-rw-r--r--llvm/test/DebugInfo/Generic/cu-range-hole.ll2
-rw-r--r--llvm/test/DebugInfo/Generic/cu-ranges.ll2
-rw-r--r--llvm/test/DebugInfo/Generic/dead-argument-order.ll2
-rw-r--r--llvm/test/DebugInfo/Generic/debug-info-always-inline.ll6
-rw-r--r--llvm/test/DebugInfo/Generic/def-line.ll4
-rw-r--r--llvm/test/DebugInfo/Generic/directives-only.ll4
-rw-r--r--llvm/test/DebugInfo/Generic/discriminator.ll4
-rw-r--r--llvm/test/DebugInfo/Generic/enum-types.ll2
-rw-r--r--llvm/test/DebugInfo/Generic/enum.ll2
-rw-r--r--llvm/test/DebugInfo/Generic/extended-loc-directive.ll4
-rw-r--r--llvm/test/DebugInfo/Generic/global-sra-array.ll2
-rw-r--r--llvm/test/DebugInfo/Generic/global.ll2
-rw-r--r--llvm/test/DebugInfo/Generic/incorrect-variable-debugloc.ll6
-rw-r--r--llvm/test/DebugInfo/Generic/incorrect-variable-debugloc1.ll4
-rw-r--r--llvm/test/DebugInfo/Generic/inline-no-debug-info.ll2
-rw-r--r--llvm/test/DebugInfo/Generic/inline-scopes.ll4
-rw-r--r--llvm/test/DebugInfo/Generic/inlined-arguments.ll4
-rw-r--r--llvm/test/DebugInfo/Generic/inlined-strings.ll2
-rw-r--r--llvm/test/DebugInfo/Generic/lto-comp-dir.ll4
-rw-r--r--llvm/test/DebugInfo/Generic/mainsubprogram.ll2
-rw-r--r--llvm/test/DebugInfo/Generic/member-order.ll2
-rw-r--r--llvm/test/DebugInfo/Generic/multiline.ll4
-rw-r--r--llvm/test/DebugInfo/Generic/namespace.ll2
-rw-r--r--llvm/test/DebugInfo/Generic/namespace_function_definition.ll2
-rw-r--r--llvm/test/DebugInfo/Generic/namespace_inline_function_definition.ll4
-rw-r--r--llvm/test/DebugInfo/Generic/recursive_inlining.ll4
-rw-r--r--llvm/test/DebugInfo/Generic/restrict.ll2
-rw-r--r--llvm/test/DebugInfo/Generic/tu-composite.ll2
-rw-r--r--llvm/test/DebugInfo/Generic/unconditional-branch.ll2
-rw-r--r--llvm/test/DebugInfo/Generic/version.ll2
-rw-r--r--llvm/test/DebugInfo/Inputs/gmlt.ll4
-rw-r--r--llvm/test/DebugInfo/Inputs/line.ll2
-rw-r--r--llvm/test/DebugInfo/MSP430/cu-ranges.ll2
-rw-r--r--llvm/test/DebugInfo/Mips/fn-call-line.ll4
-rw-r--r--llvm/test/DebugInfo/NVPTX/cu-range-hole.ll2
-rw-r--r--llvm/test/DebugInfo/NVPTX/debug-info.ll2
-rw-r--r--llvm/test/DebugInfo/NVPTX/debug-loc-offset.ll4
-rw-r--r--llvm/test/DebugInfo/RISCV/dwarf-riscv-relocs.ll2
-rw-r--r--llvm/test/DebugInfo/Sparc/gnu-window-save.ll4
-rw-r--r--llvm/test/DebugInfo/WebAssembly/dbg-loop-loc.ll2
-rw-r--r--llvm/test/DebugInfo/WebAssembly/debugtest-opt.ll2
-rw-r--r--llvm/test/DebugInfo/X86/DW_AT_calling-convention.ll2
-rw-r--r--llvm/test/DebugInfo/X86/DW_AT_stmt_list_sec_offset.ll2
-rw-r--r--llvm/test/DebugInfo/X86/addr_comments.ll2
-rw-r--r--llvm/test/DebugInfo/X86/arguments.ll2
-rw-r--r--llvm/test/DebugInfo/X86/coff_debug_info_type.ll2
-rw-r--r--llvm/test/DebugInfo/X86/coff_relative_names.ll2
-rw-r--r--llvm/test/DebugInfo/X86/convert-loclist.ll4
-rw-r--r--llvm/test/DebugInfo/X86/cu-ranges-odr.ll2
-rw-r--r--llvm/test/DebugInfo/X86/cu-ranges.ll2
-rw-r--r--llvm/test/DebugInfo/X86/dbg_value_direct.ll4
-rw-r--r--llvm/test/DebugInfo/X86/debug-dead-local-var.ll2
-rw-r--r--llvm/test/DebugInfo/X86/debug-info-blocks.ll2
-rw-r--r--llvm/test/DebugInfo/X86/debug-loc-asan.mir2
-rw-r--r--llvm/test/DebugInfo/X86/debug-loc-offset.mir4
-rw-r--r--llvm/test/DebugInfo/X86/debug-ranges-offset.ll4
-rw-r--r--llvm/test/DebugInfo/X86/decl-derived-member.ll6
-rw-r--r--llvm/test/DebugInfo/X86/discriminator.ll2
-rw-r--r--llvm/test/DebugInfo/X86/discriminator2.ll4
-rw-r--r--llvm/test/DebugInfo/X86/discriminator3.ll4
-rw-r--r--llvm/test/DebugInfo/X86/dwarf-aranges-no-dwarf-labels.ll4
-rw-r--r--llvm/test/DebugInfo/X86/dwarf-linkage-names.ll2
-rw-r--r--llvm/test/DebugInfo/X86/dwarf-pubnames-split.ll2
-rw-r--r--llvm/test/DebugInfo/X86/fission-inline.ll4
-rw-r--r--llvm/test/DebugInfo/X86/fission-no-inline-gsym.ll4
-rw-r--r--llvm/test/DebugInfo/X86/fission-no-inlining.ll4
-rw-r--r--llvm/test/DebugInfo/X86/fission-ranges.ll2
-rw-r--r--llvm/test/DebugInfo/X86/generate-odr-hash.ll2
-rw-r--r--llvm/test/DebugInfo/X86/ghost-sdnode-dbgvalues.ll2
-rw-r--r--llvm/test/DebugInfo/X86/gmlt-empty-base-address.ll2
-rw-r--r--llvm/test/DebugInfo/X86/gnu-public-names-gmlt.ll4
-rw-r--r--llvm/test/DebugInfo/X86/gnu-public-names.ll2
-rw-r--r--llvm/test/DebugInfo/X86/inline-member-function.ll2
-rw-r--r--llvm/test/DebugInfo/X86/inline-seldag-test.ll2
-rw-r--r--llvm/test/DebugInfo/X86/lexical_block.ll2
-rw-r--r--llvm/test/DebugInfo/X86/line-info.ll2
-rw-r--r--llvm/test/DebugInfo/X86/low-pc-cu.ll2
-rw-r--r--llvm/test/DebugInfo/X86/mi-print.ll2
-rw-r--r--llvm/test/DebugInfo/X86/missing-abstract-variable.ll4
-rw-r--r--llvm/test/DebugInfo/X86/no_debug_ranges.ll2
-rw-r--r--llvm/test/DebugInfo/X86/nodebug.ll2
-rw-r--r--llvm/test/DebugInfo/X86/nodebug_with_debug_loc.ll4
-rw-r--r--llvm/test/DebugInfo/X86/objc-property-void.ll2
-rw-r--r--llvm/test/DebugInfo/X86/pieces-4.ll4
-rw-r--r--llvm/test/DebugInfo/X86/pr19307.mir4
-rw-r--r--llvm/test/DebugInfo/X86/pr28270.ll4
-rw-r--r--llvm/test/DebugInfo/X86/pr45181.ll6
-rw-r--r--llvm/test/DebugInfo/X86/safestack-byval.ll2
-rw-r--r--llvm/test/DebugInfo/X86/set.ll2
-rw-r--r--llvm/test/DebugInfo/X86/spill-nospill.ll4
-rw-r--r--llvm/test/DebugInfo/X86/sret.ll6
-rw-r--r--llvm/test/DebugInfo/X86/tls.ll2
-rw-r--r--llvm/test/DebugInfo/X86/tu-to-non-named-type.ll2
-rw-r--r--llvm/test/DebugInfo/X86/void-typedef.ll2
-rw-r--r--llvm/test/MC/AMDGPU/literals.s117
-rw-r--r--llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-bookIII.txt3
-rw-r--r--llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-p9.txt (renamed from llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-p9vector.txt)6
-rw-r--r--llvm/test/Transforms/InstCombine/ptrtoaddr.ll66
-rw-r--r--llvm/test/Transforms/SimplifyCFG/RISCV/switch-of-powers-of-two.ll65
-rw-r--r--llvm/test/Transforms/SimplifyCFG/X86/switch-of-powers-of-two.ll94
-rw-r--r--llvm/unittests/TargetParser/TargetParserTest.cpp7
-rwxr-xr-xllvm/utils/update_mc_test_checks.py68
235 files changed, 4249 insertions, 535 deletions
diff --git a/llvm/docs/NVPTXUsage.rst b/llvm/docs/NVPTXUsage.rst
index e8dceb8..5ad8f9a 100644
--- a/llvm/docs/NVPTXUsage.rst
+++ b/llvm/docs/NVPTXUsage.rst
@@ -322,6 +322,450 @@ aligned '``@llvm.nvvm.barrier.cta.*``' instruction should only be used if it is
known that all threads in the CTA evaluate the condition identically, otherwise
behavior is undefined.
+MBarrier family of Intrinsics
+-----------------------------
+
+Overview:
+^^^^^^^^^
+
+An ``mbarrier`` is a barrier created in shared memory that supports:
+
+* Synchronizing any subset of threads within a CTA.
+* One-way synchronization of threads across CTAs of a cluster.
+ Threads can perform only ``arrive`` operations but not ``*_wait`` on an
+ mbarrier located in shared::cluster space.
+* Waiting for completion of asynchronous memory operations initiated by a
+ thread and making them visible to other threads.
+
+Unlike ``bar{.cta}/barrier{.cta}`` instructions which can access a limited
+number of barriers per CTA, ``mbarrier`` objects are user-defined and are
+only limited by the total shared memory size available.
+
+An mbarrier object is an opaque object in shared memory with an
+alignment of 8-bytes. It keeps track of:
+
+* Current phase of the mbarrier object
+* Count of pending arrivals for the current phase of the mbarrier object
+* Count of expected arrivals for the next phase of the mbarrier object
+* Count of pending asynchronous memory operations (or transactions)
+ tracked by the current phase of the mbarrier object. This is also
+ referred to as ``tx-count``. The unit of ``tx-count`` is specified
+ by the asynchronous memory operation (for example,
+ ``llvm.nvvm.cp.async.bulk.tensor.g2s.*``).
+
+The ``phase`` of an mbarrier object is the number of times the mbarrier
+object has been used to synchronize threads/track async operations.
+In each phase, threads perform:
+
+* arrive/expect-tx/complete-tx operations to progress the current phase.
+* test_wait/try_wait operations to check for completion of the current phase.
+
+An mbarrier object completes the current phase when:
+
+* The count of the pending arrivals has reached zero AND
+* The tx-count has reached zero.
+
+When an mbarrier object completes the current phase, below
+actions are performed ``atomically``:
+
+* The mbarrier object transitions to the next phase.
+* The pending arrival count is reinitialized to the expected arrival count.
+
+For more information, refer PTX ISA
+`<https://docs.nvidia.com/cuda/parallel-thread-execution/#parallel-synchronization-and-communication-instructions-mbarrier>`_.
+
+'``llvm.nvvm.mbarrier.init``'
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+.. code-block:: llvm
+
+ declare void @llvm.nvvm.mbarrier.init(ptr %addr, i32 %count)
+ declare void @llvm.nvvm.mbarrier.init.shared(ptr addrspace(3) %addr, i32 %count)
+
+Overview:
+"""""""""
+
+The '``@llvm.nvvm.mbarrier.init.*``' intrinsics are used to initialize
+an mbarrier object located at ``addr`` with the value ``count``.
+``count`` is a 32-bit unsigned integer value and must be within
+the range [1...2^20-1]. During initialization:
+
+* The tx-count and the current phase of the mbarrier object are set to 0.
+* The expected and pending arrival counts are set to ``count``.
+
+Semantics:
+""""""""""
+
+The ``.shared`` variant explicitly uses shared memory address space for
+the ``addr`` operand. If the ``addr`` does not fall within the
+shared::cta space, then the behavior of this intrinsic is undefined.
+Performing ``mbarrier.init`` on a valid mbarrier object is undefined;
+use ``mbarrier.inval`` before reusing the memory for another mbarrier
+or any other purpose.
+
+'``llvm.nvvm.mbarrier.inval``'
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+.. code-block:: llvm
+
+ declare void @llvm.nvvm.mbarrier.inval(ptr %addr)
+ declare void @llvm.nvvm.mbarrier.inval.shared(ptr addrspace(3) %addr)
+
+Overview:
+"""""""""
+
+The '``@llvm.nvvm.mbarrier.inval.*``' intrinsics invalidate the mbarrier
+object at the address specified by ``addr``.
+
+Semantics:
+""""""""""
+
+The ``.shared`` variant explicitly uses shared memory address space for
+the ``addr`` operand. If the ``addr`` does not fall within the
+shared::cta space, then the behavior of this intrinsic is undefined.
+It is expected that ``addr`` was previously initialized using
+``mbarrier.init``; otherwise, the behavior is undefined.
+
+'``llvm.nvvm.mbarrier.expect.tx``'
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+.. code-block:: llvm
+
+ declare void @llvm.nvvm.mbarrier.expect.tx.scope.cta.space.cta(ptr addrspace(3) %addr, i32 %tx_count)
+ declare void @llvm.nvvm.mbarrier.expect.tx.scope.cluster.space.cta(ptr addrspace(3) %addr, i32 %tx_count)
+ declare void @llvm.nvvm.mbarrier.expect.tx.scope.cta.space.cluster(ptr addrspace(7) %addr, i32 %tx_count)
+ declare void @llvm.nvvm.mbarrier.expect.tx.scope.cluster.space.cluster(ptr addrspace(7) %addr, i32 %tx_count)
+
+Overview:
+"""""""""
+
+The '``@llvm.nvvm.mbarrier.expect.tx.*``' intrinsics increase the transaction
+count of the mbarrier object at ``%addr`` by ``%tx_count``. The ``%tx_count``
+is a 32-bit unsigned integer value.
+
+Semantics:
+""""""""""
+
+The ``.space.{cta/cluster}`` indicates the address space where the mbarrier
+object resides.
+
+The ``.scope.{cta/cluster}`` denotes the set of threads that can directly
+observe the synchronizing effect of the mbarrier operation. When scope is
+"cta", all threads executing in the same CTA (as the current thread) can
+directly observe the effect of the ``expect.tx`` operation. Similarly,
+when scope is "cluster", all threads executing in the same Cluster
+(as the current thread) can directly observe the effect of the operation.
+
+If the ``addr`` does not fall within shared::cta or shared::cluster space,
+then the behavior of this intrinsic is undefined. This intrinsic has
+``relaxed`` semantics and hence does not provide any memory ordering
+or visibility guarantees.
+
+'``llvm.nvvm.mbarrier.complete.tx``'
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+.. code-block:: llvm
+
+ declare void @llvm.nvvm.mbarrier.complete.tx.scope.cta.space.cta(ptr addrspace(3) %addr, i32 %tx_count)
+ declare void @llvm.nvvm.mbarrier.complete.tx.scope.cluster.space.cta(ptr addrspace(3) %addr, i32 %tx_count)
+ declare void @llvm.nvvm.mbarrier.complete.tx.scope.cta.space.cluster(ptr addrspace(7) %addr, i32 %tx_count)
+ declare void @llvm.nvvm.mbarrier.complete.tx.scope.cluster.space.cluster(ptr addrspace(7) %addr, i32 %tx_count)
+
+Overview:
+"""""""""
+
+The '``@llvm.nvvm.mbarrier.complete.tx.*``' intrinsics decrease the transaction
+count of the mbarrier object at ``%addr`` by ``%tx_count``. The ``%tx_count``
+is a 32-bit unsigned integer value. As a result of this decrement,
+the mbarrier can potentially complete its current phase and transition
+to the next phase.
+
+Semantics:
+""""""""""
+
+The semantics of these intrinsics are identical to those of the
+``llvm.nvvm.mbarrier.expect.tx.*`` intrinsics described above.
+
+'``llvm.nvvm.mbarrier.arrive``'
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+.. code-block:: llvm
+
+ declare i64 @llvm.nvvm.mbarrier.arrive.scope.cta.space.cta(ptr addrspace(3) %addr, i32 %count)
+ declare i64 @llvm.nvvm.mbarrier.arrive.scope.cluster.space.cta(ptr addrspace(3) %addr, i32 %count)
+ declare void @llvm.nvvm.mbarrier.arrive.scope.cta.space.cluster(ptr addrspace(7) %addr, i32 %count)
+ declare void @llvm.nvvm.mbarrier.arrive.scope.cluster.space.cluster(ptr addrspace(7) %addr, i32 %count)
+
+ declare i64 @llvm.nvvm.mbarrier.arrive.relaxed.scope.cta.space.cta(ptr addrspace(3) %addr, i32 %count)
+ declare i64 @llvm.nvvm.mbarrier.arrive.relaxed.scope.cluster.space.cta(ptr addrspace(3) %addr, i32 %count)
+ declare void @llvm.nvvm.mbarrier.arrive.relaxed.scope.cta.space.cluster(ptr addrspace(7) %addr, i32 %count)
+ declare void @llvm.nvvm.mbarrier.arrive.relaxed.scope.cluster.space.cluster(ptr addrspace(7) %addr, i32 %count)
+
+Overview:
+"""""""""
+
+The ``@llvm.nvvm.mbarrier.arrive.*`` intrinsics signal the arrival of the
+executing thread or completion of an asynchronous instruction associated with
+an arrive operation on the mbarrier object at ``%addr``. This operation
+decrements the pending arrival count by ``%count``, a 32-bit unsigned integer,
+potentially completing the current phase and triggering a transition to the
+next phase.
+
+Semantics:
+""""""""""
+
+The ``.space.{cta/cluster}`` indicates the address space where the mbarrier
+object resides. When the mbarrier is in shared::cta space, the intrinsics
+return an opaque 64-bit value capturing the phase of the mbarrier object
+_prior_ to this arrive operation. This value can be used with a try_wait
+or test_wait operation to check for the completion of the mbarrier.
+
+The ``.scope.{cta/cluster}`` denotes the set of threads that can directly
+observe the synchronizing effect of the mbarrier operation. When scope is
+"cta", all threads executing in the same CTA (as the current thread) can
+directly observe the effect of the ``arrive`` operation. Similarly,
+when scope is "cluster", all threads executing in the same Cluster
+(as the current thread) can directly observe the effect of the operation.
+
+If the ``addr`` does not fall within shared::cta or shared::cluster space,
+then the behavior of this intrinsic is undefined.
+
+These intrinsics have ``release`` semantics by default. The release semantics
+ensure ordering of operations that occur in program order _before_ this arrive
+instruction, making their effects visible to subsequent operations in other
+threads of the CTA (or cluster, depending on scope). Threads performing
+corresponding acquire operations (such as mbarrier.test.wait) synchronize
+with this release. The ``relaxed`` variants of these intrinsics do not
+provide any memory ordering or visibility guarantees.
+
+'``llvm.nvvm.mbarrier.arrive.expect.tx``'
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+.. code-block:: llvm
+
+ declare i64 @llvm.nvvm.mbarrier.arrive.expect.tx.scope.cta.space.cta(ptr addrspace(3) %addr, i32 %tx_count)
+ declare i64 @llvm.nvvm.mbarrier.arrive.expect.tx.scope.cluster.space.cta(ptr addrspace(3) %addr, i32 %tx_count)
+ declare void @llvm.nvvm.mbarrier.arrive.expect.tx.scope.cta.space.cluster(ptr addrspace(7) %addr, i32 %tx_count)
+ declare void @llvm.nvvm.mbarrier.arrive.expect.tx.scope.cluster.space.cluster(ptr addrspace(7) %addr, i32 %tx_count)
+
+ declare i64 @llvm.nvvm.mbarrier.arrive.expect.tx.relaxed.scope.cta.space.cta(ptr addrspace(3) %addr, i32 %tx_count)
+ declare i64 @llvm.nvvm.mbarrier.arrive.expect.tx.relaxed.scope.cluster.space.cta(ptr addrspace(3) %addr, i32 %tx_count)
+ declare void @llvm.nvvm.mbarrier.arrive.expect.tx.relaxed.scope.cta.space.cluster(ptr addrspace(7) %addr, i32 %tx_count)
+ declare void @llvm.nvvm.mbarrier.arrive.expect.tx.relaxed.scope.cluster.space.cluster(ptr addrspace(7) %addr, i32 %tx_count)
+
+Overview:
+"""""""""
+
+The ``@llvm.nvvm.mbarrier.arrive.expect.tx.*`` intrinsics are similar to
+the ``@llvm.nvvm.mbarrier.arrive`` intrinsics except that they also
+perform an ``expect-tx`` operation _prior_ to the ``arrive`` operation.
+The ``%tx_count`` specifies the transaction count for the ``expect-tx``
+operation and the count for the ``arrive`` operation is assumed to be 1.
+
+Semantics:
+""""""""""
+
+The semantics of these intrinsics are identical to those of the
+``llvm.nvvm.mbarrier.arrive.*`` intrinsics described above.
+
+'``llvm.nvvm.mbarrier.arrive.drop``'
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+.. code-block:: llvm
+
+ declare i64 @llvm.nvvm.mbarrier.arrive.drop.scope.cta.space.cta(ptr addrspace(3) %addr, i32 %count)
+ declare i64 @llvm.nvvm.mbarrier.arrive.drop.scope.cluster.space.cta(ptr addrspace(3) %addr, i32 %count)
+ declare void @llvm.nvvm.mbarrier.arrive.drop.scope.cta.space.cluster(ptr addrspace(7) %addr, i32 %count)
+ declare void @llvm.nvvm.mbarrier.arrive.drop.scope.cluster.space.cluster(ptr addrspace(7) %addr, i32 %count)
+
+ declare i64 @llvm.nvvm.mbarrier.arrive.drop.relaxed.scope.cta.space.cta(ptr addrspace(3) %addr, i32 %count)
+ declare i64 @llvm.nvvm.mbarrier.arrive.drop.relaxed.scope.cluster.space.cta(ptr addrspace(3) %addr, i32 %count)
+ declare void @llvm.nvvm.mbarrier.arrive.drop.relaxed.scope.cta.space.cluster(ptr addrspace(7) %addr, i32 %count)
+ declare void @llvm.nvvm.mbarrier.arrive.drop.relaxed.scope.cluster.space.cluster(ptr addrspace(7) %addr, i32 %count)
+
+Overview:
+"""""""""
+
+The ``@llvm.nvvm.mbarrier.arrive.drop.*`` intrinsics decrement the
+expected arrival count of the mbarrier object at ``%addr`` by
+``%count`` and then perform an ``arrive`` operation with ``%count``.
+The ``%count`` is a 32-bit integer.
+
+Semantics:
+""""""""""
+
+The semantics of these intrinsics are identical to those of the
+``llvm.nvvm.mbarrier.arrive.*`` intrinsics described above.
+
+'``llvm.nvvm.mbarrier.arrive.drop.expect.tx``'
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+.. code-block:: llvm
+
+ declare i64 @llvm.nvvm.mbarrier.arrive.drop.expect.tx.scope.cta.space.cta(ptr addrspace(3) %addr, i32 %tx_count)
+ declare i64 @llvm.nvvm.mbarrier.arrive.drop.expect.tx.scope.cluster.space.cta(ptr addrspace(3) %addr, i32 %tx_count)
+ declare void @llvm.nvvm.mbarrier.arrive.drop.expect.tx.scope.cta.space.cluster(ptr addrspace(7) %addr, i32 %tx_count)
+ declare void @llvm.nvvm.mbarrier.arrive.drop.expect.tx.scope.cluster.space.cluster(ptr addrspace(7) %addr, i32 %tx_count)
+
+ declare i64 @llvm.nvvm.mbarrier.arrive.drop.expect.tx.relaxed.scope.cta.space.cta(ptr addrspace(3) %addr, i32 %tx_count)
+ declare i64 @llvm.nvvm.mbarrier.arrive.drop.expect.tx.relaxed.scope.cluster.space.cta(ptr addrspace(3) %addr, i32 %tx_count)
+ declare void @llvm.nvvm.mbarrier.arrive.drop.expect.tx.relaxed.scope.cta.space.cluster(ptr addrspace(7) %addr, i32 %tx_count)
+ declare void @llvm.nvvm.mbarrier.arrive.drop.expect.tx.relaxed.scope.cluster.space.cluster(ptr addrspace(7) %addr, i32 %tx_count)
+
+Overview:
+"""""""""
+
+The ``@llvm.nvvm.mbarrier.arrive.drop.expect.tx.*`` intrinsics perform
+the below operations on the mbarrier located at ``%addr``.
+
+* Perform an ``expect-tx`` operation i.e. increase the transaction count
+ of the mbarrier by ``%tx_count``, a 32-bit unsigned integer value.
+* Decrement the expected arrival count of the mbarrier by 1.
+* Perform an ``arrive`` operation on the mbarrier with a value of 1.
+
+Semantics:
+""""""""""
+
+The semantics of these intrinsics are identical to those of the
+``llvm.nvvm.mbarrier.arrive.*`` intrinsics described above.
+
+'``llvm.nvvm.mbarrier.test.wait``'
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+.. code-block:: llvm
+
+ declare i1 @llvm.nvvm.mbarrier.test.wait.scope.cta.space.cta(ptr addrspace(3) %addr, i64 %state)
+ declare i1 @llvm.nvvm.mbarrier.test.wait.scope.cluster.space.cta(ptr addrspace(3) %addr, i64 %state)
+ declare i1 @llvm.nvvm.mbarrier.test.wait.parity.scope.cta.space.cta(ptr addrspace(3) %addr, i32 %phase)
+ declare i1 @llvm.nvvm.mbarrier.test.wait.parity.scope.cluster.space.cta(ptr addrspace(3) %addr, i32 %phase)
+
+ declare i1 @llvm.nvvm.mbarrier.test.wait.relaxed.scope.cta.space.cta(ptr addrspace(3) %addr, i64 %state)
+ declare i1 @llvm.nvvm.mbarrier.test.wait.relaxed.scope.cluster.space.cta(ptr addrspace(3) %addr, i64 %state)
+ declare i1 @llvm.nvvm.mbarrier.test.wait.parity.relaxed.scope.cta.space.cta(ptr addrspace(3) %addr, i32 %phase)
+ declare i1 @llvm.nvvm.mbarrier.test.wait.parity.relaxed.scope.cluster.space.cta(ptr addrspace(3) %addr, i32 %phase)
+
+Overview:
+"""""""""
+
+The ``@llvm.nvvm.mbarrier.test.wait.*`` intrinsics test for the completion
+of the current or the immediately preceding phase of an mbarrier object at
+``%addr``. The test for completion can be done with either the ``state`` or
+the ``phase-parity`` of the mbarrier object.
+
+* When done through the ``i64 %state`` operand, the state must be
+ returned by an ``llvm.nvvm.mbarrier.arrive.*`` on the _same_
+ mbarrier object.
+* The ``.parity`` variant of these intrinsics test for completion
+ of the phase indicated by the operand ``i32 %phase``, which is
+ the integer parity of either the current phase or the immediately
+ preceding phase of the mbarrier object. An even phase has integer
+ parity 0 and an odd phase has integer parity of 1. So the valid
+ values for phase-parity are 0 and 1.
+
+Semantics:
+""""""""""
+
+The ``.scope.{cta/cluster}`` denotes the set of threads that the
+test_wait operation can directly synchronize with.
+
+If the ``addr`` does not fall within shared::cta space, then the
+the behavior of this intrinsic is undefined.
+
+These intrinsics have ``acquire`` semantics by default. This acquire
+pattern establishes memory ordering for operations occurring in program
+order after this ``test_wait`` instruction by making operations from
+other threads in the CTA (or cluster, depending on scope) visible to
+subsequent operations in the current thread. When this wait completes,
+it synchronizes with the corresponding release pattern from the
+``mbarrier.arrive`` operation. The ``relaxed`` variants of these intrinsics
+do not provide any memory ordering or visibility guarantees.
+
+This ``test.wait`` intrinsic is non-blocking and immediately returns
+the completion status without suspending the executing thread.
+
+The boolean return value indicates:
+
+* True: The immediately preceding phase has completed
+* False: The current phase is still incomplete
+
+When this wait returns true, the following ordering guarantees hold:
+
+* All memory accesses (except async operations) requested prior to
+ ``mbarrier.arrive`` having release semantics by participating
+ threads of a CTA (or cluster, depending on scope) are visible to
+ the executing thread.
+* All ``cp.async`` operations requested prior to ``cp.async.mbarrier.arrive``
+ by participating threads of a CTA are visible to the executing thread.
+* All ``cp.async.bulk`` operations using the same mbarrier object requested
+ prior to ``mbarrier.arrive`` having release semantics by participating CTA
+ threads are visible to the executing thread.
+* Memory accesses requested after this wait are not visible to memory
+ accesses performed prior to ``mbarrier.arrive`` by other participating
+ threads.
+* No ordering guarantee exists for memory accesses by the same thread
+ between an ``mbarrier.arrive`` and this wait.
+
+'``llvm.nvvm.mbarrier.try.wait``'
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+.. code-block:: llvm
+
+ declare i1 @llvm.nvvm.mbarrier.try.wait{.relaxed}.scope.cta.space.cta(ptr addrspace(3) %addr, i64 %state)
+ declare i1 @llvm.nvvm.mbarrier.try.wait{.relaxed}.scope.cluster.space.cta(ptr addrspace(3) %addr, i64 %state)
+
+ declare i1 @llvm.nvvm.mbarrier.try.wait.parity{.relaxed}.scope.cta.space.cta(ptr addrspace(3) %addr, i32 %phase)
+ declare i1 @llvm.nvvm.mbarrier.try.wait.parity{.relaxed}.scope.cluster.space.cta(ptr addrspace(3) %addr, i32 %phase)
+
+ declare i1 @llvm.nvvm.mbarrier.try.wait.tl{.relaxed}.scope.cta.space.cta(ptr addrspace(3) %addr, i64 %state, i32 %timelimit)
+ declare i1 @llvm.nvvm.mbarrier.try.wait.tl{.relaxed}.scope.cluster.space.cta(ptr addrspace(3) %addr, i64 %state, i32 %timelimit)
+
+ declare i1 @llvm.nvvm.mbarrier.try.wait.parity.tl{.relaxed}.scope.cta.space.cta(ptr addrspace(3) %addr, i32 %phase, i32 %timelimit)
+ declare i1 @llvm.nvvm.mbarrier.try.wait.parity.tl{.relaxed}.scope.cluster.space.cta(ptr addrspace(3) %addr, i32 %phase, i32 %timelimit)
+
+Overview:
+"""""""""
+
+The ``@llvm.nvvm.mbarrier.try.wait.*`` intrinsics test for the completion of
+the current or immediately preceding phase of an mbarrier object at ``%addr``.
+Unlike the ``test.wait`` intrinsics, which perform a non-blocking test, these
+intrinsics may block the executing thread until the specified phase completes
+or a system-dependent time limit expires. Suspended threads resume execution
+when the phase completes or the time limit elapses. This time limit is
+configurable through the ``.tl`` variants of these intrinsics, where the
+``%timelimit`` operand (an unsigned integer) specifies the limit in
+nanoseconds. Other semantics are identical to those of the ``test.wait``
+intrinsics described above.
+
Electing a thread
-----------------
diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td
index c9df6c4..719181a 100644
--- a/llvm/include/llvm/IR/IntrinsicsNVVM.td
+++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td
@@ -1866,6 +1866,73 @@ let IntrProperties = [IntrConvergent, IntrNoCallback] in {
def int_nvvm_mbarrier_pending_count : NVVMBuiltin,
Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem, IntrConvergent, IntrNoCallback]>;
+// mbarrier.{expect_tx/complete_tx}
+foreach op = ["expect_tx", "complete_tx"] in {
+ foreach scope = ["scope_cta", "scope_cluster"] in {
+ foreach space = ["space_cta", "space_cluster"] in {
+ defvar suffix = StrJoin<"_", [op, scope, space]>.ret;
+ defvar mbar_addr_ty = !if(!eq(space, "space_cta"),
+ llvm_shared_ptr_ty, llvm_shared_cluster_ptr_ty);
+
+ def int_nvvm_mbarrier_ # suffix :
+ Intrinsic<[], [mbar_addr_ty, llvm_i32_ty],
+ [IntrConvergent, IntrArgMemOnly, IntrNoCallback]>;
+ } // space
+ } // scope
+} // op
+
+// mbarrier.arrive and mbarrier.arrive.expect_tx
+// mbarrier.arrive_drop and mbarrier.arrive_drop.expect_tx
+foreach op = ["arrive", "arrive_expect_tx",
+ "arrive_drop", "arrive_drop_expect_tx"] in {
+ foreach scope = ["scope_cta", "scope_cluster"] in {
+ foreach space = ["space_cta", "space_cluster"] in {
+ defvar suffix = StrJoin<"_", [scope, space]>.ret;
+ defvar mbar_addr_ty = !if(!eq(space, "space_cta"),
+ llvm_shared_ptr_ty, llvm_shared_cluster_ptr_ty);
+ defvar args_ty = [mbar_addr_ty, // mbar_address_ptr
+ llvm_i32_ty]; // tx-count
+
+ // mbarriers in shared_cluster space cannot return any value.
+ defvar mbar_ret_ty = !if(!eq(space, "space_cta"),
+ [llvm_i64_ty], []<LLVMType>);
+
+ def int_nvvm_mbarrier_ # op # "_" # suffix:
+ Intrinsic<mbar_ret_ty, args_ty,
+ [IntrConvergent, IntrNoCallback]>;
+ def int_nvvm_mbarrier_ # op # "_relaxed_" # suffix :
+ Intrinsic<mbar_ret_ty, args_ty,
+ [IntrConvergent, IntrArgMemOnly, IntrNoCallback]>;
+ } // space
+ } // scope
+} // op
+
+// mbarrier.{test_wait and try_wait}
+foreach op = ["test_wait", "try_wait"] in {
+ foreach scope = ["scope_cta", "scope_cluster"] in {
+ foreach parity = [true, false] in {
+ foreach time_limit = !if(!eq(op, "try_wait"), [true, false], [false]) in {
+ defvar base_args = [llvm_shared_ptr_ty]; // mbar_ptr
+ defvar parity_args = !if(parity, [llvm_i32_ty], [llvm_i64_ty]);
+ defvar tl_args = !if(time_limit, [llvm_i32_ty], []<LLVMType>);
+ defvar args = !listconcat(base_args, parity_args, tl_args);
+ defvar tmp_op = StrJoin<"_", [op,
+ !if(parity, "parity", ""),
+ !if(time_limit, "tl", "")]>.ret;
+ defvar suffix = StrJoin<"_", [scope, "space_cta"]>.ret;
+
+ def int_nvvm_mbarrier_ # tmp_op # "_" # suffix :
+ Intrinsic<[llvm_i1_ty], args,
+ [IntrConvergent, NoCapture<ArgIndex<0>>, IntrNoCallback]>;
+ def int_nvvm_mbarrier_ # tmp_op # "_relaxed_" # suffix :
+ Intrinsic<[llvm_i1_ty], args,
+ [IntrConvergent, NoCapture<ArgIndex<0>>, IntrNoCallback,
+ IntrArgMemOnly, IntrReadMem]>;
+ } // tl
+ } // parity
+ } // scope
+} // op
+
// Generated within nvvm. Use for ldu on sm_20 or later. Second arg is the
// pointer's alignment.
let IntrProperties = [IntrReadMem, IntrArgMemOnly, IntrNoCallback, IntrWillReturn, NoCapture<ArgIndex<0>>] in {
@@ -3000,4 +3067,4 @@ foreach sp = [0, 1] in {
}
}
-} // let TargetPrefix = "nvvm" \ No newline at end of file
+} // let TargetPrefix = "nvvm"
diff --git a/llvm/include/llvm/Support/SpecialCaseList.h b/llvm/include/llvm/Support/SpecialCaseList.h
index 860f73c..cb8e568de 100644
--- a/llvm/include/llvm/Support/SpecialCaseList.h
+++ b/llvm/include/llvm/Support/SpecialCaseList.h
@@ -170,6 +170,10 @@ private:
RadixTree<iterator_range<StringRef::const_reverse_iterator>,
SmallVector<const GlobMatcher::Glob *, 1>>>
PrefixSuffixToGlob;
+
+ RadixTree<iterator_range<StringRef::const_iterator>,
+ SmallVector<const GlobMatcher::Glob *, 1>>
+ SubstrToGlob;
};
/// Represents a set of patterns and their line numbers
diff --git a/llvm/include/llvm/TargetParser/ARMTargetParser.def b/llvm/include/llvm/TargetParser/ARMTargetParser.def
index 0ada2e7..27dbfcaf 100644
--- a/llvm/include/llvm/TargetParser/ARMTargetParser.def
+++ b/llvm/include/llvm/TargetParser/ARMTargetParser.def
@@ -357,6 +357,9 @@ ARM_CPU_NAME("cortex-m85", ARMV8_1MMainline, FK_FP_ARMV8_FULLFP16_D16, false,
ARM_CPU_NAME("cortex-m52", ARMV8_1MMainline, FK_FP_ARMV8_FULLFP16_D16, false,
(ARM::AEK_DSP | ARM::AEK_MVE | ARM::AEK_FP | ARM::AEK_FP16 |
ARM::AEK_RAS | ARM::AEK_PACBTI))
+ARM_CPU_NAME("star-mc3", ARMV8_1MMainline, FK_FP_ARMV8_FULLFP16_D16, false,
+ (ARM::AEK_DSP | ARM::AEK_MVE | ARM::AEK_FP | ARM::AEK_FP16 |
+ ARM::AEK_RAS | ARM::AEK_PACBTI))
ARM_CPU_NAME("cortex-a32", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, ARM::AEK_CRC)
ARM_CPU_NAME("cortex-a35", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, ARM::AEK_CRC)
ARM_CPU_NAME("cortex-a53", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, ARM::AEK_CRC)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 8676060..cf221bb 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -16481,10 +16481,34 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
DAG, DL);
}
break;
- case ISD::AVGFLOORS:
- case ISD::AVGFLOORU:
case ISD::AVGCEILS:
case ISD::AVGCEILU:
+ // trunc (avgceilu (sext (x), sext (y))) -> avgceils(x, y)
+ // trunc (avgceils (zext (x), zext (y))) -> avgceilu(x, y)
+ if (N0.hasOneUse()) {
+ SDValue Op0 = N0.getOperand(0);
+ SDValue Op1 = N0.getOperand(1);
+ if (N0.getOpcode() == ISD::AVGCEILU) {
+ if (TLI.isOperationLegalOrCustom(ISD::AVGCEILS, VT) &&
+ Op0.getOpcode() == ISD::SIGN_EXTEND &&
+ Op1.getOpcode() == ISD::SIGN_EXTEND &&
+ Op0.getOperand(0).getValueType() == VT &&
+ Op1.getOperand(0).getValueType() == VT)
+ return DAG.getNode(ISD::AVGCEILS, DL, VT, Op0.getOperand(0),
+ Op1.getOperand(0));
+ } else {
+ if (TLI.isOperationLegalOrCustom(ISD::AVGCEILU, VT) &&
+ Op0.getOpcode() == ISD::ZERO_EXTEND &&
+ Op1.getOpcode() == ISD::ZERO_EXTEND &&
+ Op0.getOperand(0).getValueType() == VT &&
+ Op1.getOperand(0).getValueType() == VT)
+ return DAG.getNode(ISD::AVGCEILU, DL, VT, Op0.getOperand(0),
+ Op1.getOperand(0));
+ }
+ }
+ [[fallthrough]];
+ case ISD::AVGFLOORS:
+ case ISD::AVGFLOORU:
case ISD::ABDS:
case ISD::ABDU:
// (trunc (avg a, b)) -> (avg (trunc a), (trunc b))
diff --git a/llvm/lib/Support/SpecialCaseList.cpp b/llvm/lib/Support/SpecialCaseList.cpp
index 3a97185..246d90c 100644
--- a/llvm/lib/Support/SpecialCaseList.cpp
+++ b/llvm/lib/Support/SpecialCaseList.cpp
@@ -94,6 +94,19 @@ void SpecialCaseList::GlobMatcher::preprocess(bool BySize) {
StringRef Prefix = G.Pattern.prefix();
StringRef Suffix = G.Pattern.suffix();
+ if (Suffix.empty() && Prefix.empty()) {
+ // If both prefix and suffix are empty put into special tree to search by
+ // substring in a middle.
+ StringRef Substr = G.Pattern.longest_substr();
+ if (!Substr.empty()) {
+ // But only if substring is not empty. Searching this tree is more
+ // expensive.
+ auto &V = SubstrToGlob.emplace(Substr).first->second;
+ V.emplace_back(&G);
+ continue;
+ }
+ }
+
auto &SToGlob = PrefixSuffixToGlob.emplace(Prefix).first->second;
auto &V = SToGlob.emplace(reverse(Suffix)).first->second;
V.emplace_back(&G);
@@ -119,6 +132,25 @@ void SpecialCaseList::GlobMatcher::match(
}
}
}
+
+ if (!SubstrToGlob.empty()) {
+ // As we don't know when substring exactly starts, we will try all
+ // possibilities. In most cases search will fail on first characters.
+ for (StringRef Q = Query; !Q.empty(); Q = Q.drop_front()) {
+ for (const auto &[_, V] : SubstrToGlob.find_prefixes(Q)) {
+ for (const auto *G : V) {
+ if (G->Pattern.match(Query)) {
+ Cb(G->Name, G->LineNo);
+ // As soon as we find a match in the vector, we can break for this
+ // vector, since the globs are already sorted by priority within the
+ // prefix group. However, we continue searching other prefix groups
+ // in the map, as they may contain a better match overall.
+ break;
+ }
+ }
+ }
+ }
+ }
}
SpecialCaseList::Matcher::Matcher(bool UseGlobs, bool RemoveDotSlash)
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
index 5c3e26e..4cd51d6 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
@@ -1114,7 +1114,6 @@ bool AArch64InstPrinter::printSyslAlias(const MCInst *MI,
} else
return false;
- std::string Str;
llvm::transform(Name, Name.begin(), ::tolower);
O << '\t' << Ins << '\t' << Reg.str() << ", " << Name;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 1c8383c..54d94b1 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -1466,6 +1466,13 @@ def FeatureClusters : SubtargetFeature< "clusters",
"Has clusters of workgroups support"
>;
+def FeatureWaitsBeforeSystemScopeStores : SubtargetFeature<
+ "waits-before-system-scope-stores",
+ "RequiresWaitsBeforeSystemScopeStores",
+ "true",
+ "Target requires waits for loads and atomics before system scope stores"
+>;
+
// Dummy feature used to disable assembler instructions.
def FeatureDisable : SubtargetFeature<"",
"FeatureDisable","true",
@@ -2060,7 +2067,8 @@ def FeatureISAVersion12 : FeatureSet<
FeatureMaxHardClauseLength32,
Feature1_5xVGPRs,
FeatureMemoryAtomicFAddF32DenormalSupport,
- FeatureBVHDualAndBVH8Insts
+ FeatureBVHDualAndBVH8Insts,
+ FeatureWaitsBeforeSystemScopeStores,
]>;
def FeatureISAVersion12_50 : FeatureSet<
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index ac660d5..f377b8a 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -290,6 +290,7 @@ protected:
bool Has45BitNumRecordsBufferResource = false;
bool HasClusters = false;
+ bool RequiresWaitsBeforeSystemScopeStores = false;
// Dummy feature to use for assembler in tablegen.
bool FeatureDisable = false;
@@ -1861,6 +1862,10 @@ public:
bool has45BitNumRecordsBufferResource() const {
return Has45BitNumRecordsBufferResource;
}
+
+ bool requiresWaitsBeforeSystemScopeStores() const {
+ return RequiresWaitsBeforeSystemScopeStores;
+ }
};
class GCNUserSGPRUsageInfo {
diff --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
index a177a42..6ab8d552 100644
--- a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
@@ -2673,7 +2673,8 @@ bool SIGfx12CacheControl::finalizeStore(MachineInstr &MI, bool Atomic) const {
const unsigned Scope = CPol->getImm() & CPol::SCOPE;
// GFX12.0 only: Extra waits needed before system scope stores.
- if (!ST.hasGFX1250Insts() && !Atomic && Scope == CPol::SCOPE_SYS)
+ if (ST.requiresWaitsBeforeSystemScopeStores() && !Atomic &&
+ Scope == CPol::SCOPE_SYS)
Changed |= insertWaitsBeforeSystemScopeStore(MI.getIterator());
return Changed;
diff --git a/llvm/lib/Target/ARM/ARMProcessors.td b/llvm/lib/Target/ARM/ARMProcessors.td
index 7453727..b60569e 100644
--- a/llvm/lib/Target/ARM/ARMProcessors.td
+++ b/llvm/lib/Target/ARM/ARMProcessors.td
@@ -421,6 +421,17 @@ def : ProcessorModel<"cortex-m52", CortexM55Model, [ARMv81mMainline,
FeatureMVEVectorCostFactor1,
HasMVEFloatOps]>;
+def : ProcessorModel<"star-mc3", CortexM55Model, [ARMv81mMainline,
+ FeatureDSP,
+ FeatureFPARMv8_D16,
+ FeatureHasNoBranchPredictor,
+ FeaturePACBTI,
+ FeatureUseMISched,
+ FeaturePreferBranchAlign32,
+ FeatureHasSlowFPVMLx,
+ FeatureMVEVectorCostFactor1,
+ HasMVEFloatOps]>;
+
def : ProcNoItin<"cortex-a32", [ARMv8a,
FeatureHWDivThumb,
FeatureHWDivARM,
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index ca4a655..80c96c6 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -1701,6 +1701,43 @@ lowerVECTOR_SHUFFLE_VSHUF4I(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
DAG.getConstant(Imm, DL, GRLenVT));
}
+/// Lower VECTOR_SHUFFLE whose result is the reversed source vector.
+///
+/// It is possible to do optimization for VECTOR_SHUFFLE performing vector
+/// reverse whose mask likes:
+/// <7, 6, 5, 4, 3, 2, 1, 0>
+///
+/// When undef's appear in the mask they are treated as if they were whatever
+/// value is necessary in order to fit the above forms.
+static SDValue
+lowerVECTOR_SHUFFLE_IsReverse(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
+ SDValue V1, SelectionDAG &DAG,
+ const LoongArchSubtarget &Subtarget) {
+ // Only vectors with i8/i16 elements which cannot match other patterns
+ // directly needs to do this.
+ if (VT != MVT::v16i8 && VT != MVT::v8i16 && VT != MVT::v32i8 &&
+ VT != MVT::v16i16)
+ return SDValue();
+
+ if (!ShuffleVectorInst::isReverseMask(Mask, Mask.size()))
+ return SDValue();
+
+ int WidenNumElts = VT.getVectorNumElements() / 4;
+ SmallVector<int, 16> WidenMask(WidenNumElts, -1);
+ for (int i = 0; i < WidenNumElts; ++i)
+ WidenMask[i] = WidenNumElts - 1 - i;
+
+ MVT WidenVT = MVT::getVectorVT(
+ VT.getVectorElementType() == MVT::i8 ? MVT::i32 : MVT::i64, WidenNumElts);
+ SDValue NewV1 = DAG.getBitcast(WidenVT, V1);
+ SDValue WidenRev = DAG.getVectorShuffle(WidenVT, DL, NewV1,
+ DAG.getUNDEF(WidenVT), WidenMask);
+
+ return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT,
+ DAG.getBitcast(VT, WidenRev),
+ DAG.getConstant(27, DL, Subtarget.getGRLenVT()));
+}
+
/// Lower VECTOR_SHUFFLE into VPACKEV (if possible).
///
/// VPACKEV interleaves the even elements from each vector.
@@ -2004,6 +2041,9 @@ static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
if ((Result =
lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget)))
return Result;
+ if ((Result =
+ lowerVECTOR_SHUFFLE_IsReverse(DL, Mask, VT, V1, DAG, Subtarget)))
+ return Result;
// TODO: This comment may be enabled in the future to better match the
// pattern for instruction selection.
@@ -2622,6 +2662,9 @@ static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
return Result;
if ((Result = lowerVECTOR_SHUFFLE_XVPERM(DL, Mask, VT, V1, DAG, Subtarget)))
return Result;
+ if ((Result =
+ lowerVECTOR_SHUFFLE_IsReverse(DL, Mask, VT, V1, DAG, Subtarget)))
+ return Result;
// TODO: This comment may be enabled in the future to better match the
// pattern for instruction selection.
diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
index 598735f..c923f0e 100644
--- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -1082,6 +1082,161 @@ let Predicates = [hasPTX<70>, hasSM<80>] in {
"mbarrier.pending_count.b64",
[(set i32:$res, (int_nvvm_mbarrier_pending_count i64:$state))]>;
}
+
+class MBAR_UTIL<string op, string scope,
+ string space = "", string sem = "",
+ bit tl = 0, bit parity = 0> {
+ // The mbarrier instructions in PTX ISA are of the general form:
+ // mbarrier.op.semantics.scope.space.b64 arg1, arg2 ...
+ // where:
+ // op -> arrive, expect_tx, complete_tx, arrive.expect_tx etc.
+ // semantics -> acquire, release, relaxed (default depends on the op)
+ // scope -> cta or cluster (default is cta-scope)
+ // space -> shared::cta or shared::cluster (default is shared::cta)
+ //
+ // The 'semantics' and 'scope' go together. If one is specified,
+ // then the other _must_ be specified. For example:
+ // (A) mbarrier.arrive <args> (valid, release and cta are default)
+ // (B) mbarrier.arrive.release.cta <args> (valid, sem/scope mentioned explicitly)
+ // (C) mbarrier.arrive.release <args> (invalid, needs scope)
+ // (D) mbarrier.arrive.cta <args> (invalid, needs order)
+ //
+ // Wherever possible, we prefer form (A) to (B) since it is available
+ // from early PTX versions. In most cases, explicitly specifying the
+ // scope requires a later version of PTX.
+ string _scope_asm = !cond(
+ !eq(scope, "scope_cluster") : "cluster",
+ !eq(scope, "scope_cta") : !if(!empty(sem), "", "cta"),
+ true : scope);
+ string _space_asm = !cond(
+ !eq(space, "space_cta") : "shared",
+ !eq(space, "space_cluster") : "shared::cluster",
+ true : space);
+
+ string _parity = !if(parity, "parity", "");
+ string asm_str = StrJoin<".", ["mbarrier", op, _parity,
+ sem, _scope_asm, _space_asm, "b64"]>.ret;
+
+ string _intr_suffix = StrJoin<"_", [!subst(".", "_", op), _parity,
+ !if(tl, "tl", ""),
+ sem, scope, space]>.ret;
+ string intr_name = "int_nvvm_mbarrier_" # _intr_suffix;
+
+ // Predicate checks:
+ // These are used only for the "test_wait/try_wait" variants as they
+ // have evolved since sm80 and are complex. The predicates for the
+ // remaining instructions are straightforward and have already been
+ // applied directly.
+ Predicate _sm_pred = !cond(!or(
+ !eq(op, "try_wait"),
+ !eq(scope, "scope_cluster"),
+ !eq(sem, "relaxed")) : hasSM<90>,
+ true : hasSM<80>);
+ Predicate _ptx_pred = !cond(
+ !eq(sem, "relaxed") : hasPTX<86>,
+ !ne(_scope_asm, "") : hasPTX<80>,
+ !eq(op, "try_wait") : hasPTX<78>,
+ parity : hasPTX<71>,
+ true : hasPTX<70>);
+ list<Predicate> preds = [_ptx_pred, _sm_pred];
+}
+
+foreach op = ["expect_tx", "complete_tx"] in {
+ foreach scope = ["scope_cta", "scope_cluster"] in {
+ foreach space = ["space_cta", "space_cluster"] in {
+ defvar intr = !cast<Intrinsic>(MBAR_UTIL<op, scope, space>.intr_name);
+ defvar suffix = StrJoin<"_", [op, scope, space]>.ret;
+ def mbar_ # suffix : BasicNVPTXInst<(outs), (ins ADDR:$addr, B32:$tx_count),
+ MBAR_UTIL<op, scope, space, "relaxed">.asm_str,
+ [(intr addr:$addr, i32:$tx_count)]>,
+ Requires<[hasPTX<80>, hasSM<90>]>;
+ } // space
+ } // scope
+} // op
+
+multiclass MBAR_ARR_INTR<string op, string scope, string sem,
+ list<Predicate> pred = []> {
+ // When either of sem or scope is non-default, both have to
+ // be explicitly specified. So, explicitly state that
+ // sem is `release` when scope is `cluster`.
+ defvar asm_sem = !if(!and(!empty(sem), !eq(scope, "scope_cluster")),
+ "release", sem);
+
+ defvar asm_cta = MBAR_UTIL<op, scope, "space_cta", asm_sem>.asm_str;
+ defvar intr_cta = !cast<Intrinsic>(MBAR_UTIL<op, scope,
+ "space_cta", sem>.intr_name);
+
+ defvar asm_cluster = MBAR_UTIL<op, scope, "space_cluster", asm_sem>.asm_str;
+ defvar intr_cluster = !cast<Intrinsic>(MBAR_UTIL<op, scope,
+ "space_cluster", sem>.intr_name);
+
+ def _CTA : NVPTXInst<(outs B64:$state),
+ (ins ADDR:$addr, B32:$tx_count),
+ asm_cta # " $state, [$addr], $tx_count;",
+ [(set i64:$state, (intr_cta addr:$addr, i32:$tx_count))]>,
+ Requires<pred>;
+ def _CLUSTER : NVPTXInst<(outs),
+ (ins ADDR:$addr, B32:$tx_count),
+ asm_cluster # " _, [$addr], $tx_count;",
+ [(intr_cluster addr:$addr, i32:$tx_count)]>,
+ Requires<pred>;
+}
+foreach op = ["arrive", "arrive.expect_tx",
+ "arrive_drop", "arrive_drop.expect_tx"] in {
+ foreach scope = ["scope_cta", "scope_cluster"] in {
+ defvar suffix = !subst(".", "_", op) # scope;
+ defm mbar_ # suffix # _release : MBAR_ARR_INTR<op, scope, "", [hasPTX<80>, hasSM<90>]>;
+ defm mbar_ # suffix # _relaxed : MBAR_ARR_INTR<op, scope, "relaxed", [hasPTX<86>, hasSM<90>]>;
+ } // scope
+} // op
+
+multiclass MBAR_WAIT_INTR<string op, string scope, string sem, bit time_limit> {
+ // When either of sem or scope is non-default, both have to
+ // be explicitly specified. So, explicitly state that the
+ // semantics is `acquire` when the scope is `cluster`.
+ defvar asm_sem = !if(!and(!empty(sem), !eq(scope, "scope_cluster")),
+ "acquire", sem);
+
+ defvar asm_parity = MBAR_UTIL<op, scope, "space_cta", asm_sem,
+ time_limit, 1>.asm_str;
+ defvar pred_parity = MBAR_UTIL<op, scope, "space_cta", asm_sem,
+ time_limit, 1>.preds;
+ defvar intr_parity = !cast<Intrinsic>(MBAR_UTIL<op, scope, "space_cta",
+ sem, time_limit, 1>.intr_name);
+
+ defvar asm_state = MBAR_UTIL<op, scope, "space_cta", asm_sem,
+ time_limit>.asm_str;
+ defvar pred_state = MBAR_UTIL<op, scope, "space_cta", asm_sem,
+ time_limit>.preds;
+ defvar intr_state = !cast<Intrinsic>(MBAR_UTIL<op, scope, "space_cta",
+ sem, time_limit>.intr_name);
+
+ defvar ins_tl_dag = !if(time_limit, (ins B32:$tl), (ins));
+ defvar tl_suffix = !if(time_limit, ", $tl;", ";");
+ defvar intr_state_dag = !con((intr_state addr:$addr, i64:$state),
+ !if(time_limit, (intr_state i32:$tl), (intr_state)));
+ defvar intr_parity_dag = !con((intr_parity addr:$addr, i32:$phase),
+ !if(time_limit, (intr_parity i32:$tl), (intr_parity)));
+
+ def _STATE : NVPTXInst<(outs B1:$res), !con((ins ADDR:$addr, B64:$state), ins_tl_dag),
+ asm_state # " $res, [$addr], $state" # tl_suffix,
+ [(set i1:$res, intr_state_dag)]>,
+ Requires<pred_state>;
+ def _PARITY : NVPTXInst<(outs B1:$res), !con((ins ADDR:$addr, B32:$phase), ins_tl_dag),
+ asm_parity # " $res, [$addr], $phase" # tl_suffix,
+ [(set i1:$res, intr_parity_dag)]>,
+ Requires<pred_parity>;
+}
+foreach op = ["test_wait", "try_wait"] in {
+ foreach scope = ["scope_cta", "scope_cluster"] in {
+ foreach time_limit = !if(!eq(op, "try_wait"), [true, false], [false]) in {
+ defvar suffix = StrJoin<"_", [op, scope, !if(time_limit, "tl", "")]>.ret;
+ defm mbar_ # suffix # "_acquire" : MBAR_WAIT_INTR<op, scope, "", time_limit>;
+ defm mbar_ # suffix # "_relaxed" : MBAR_WAIT_INTR<op, scope, "relaxed", time_limit>;
+ } // time_limit
+ } // scope
+} // op
+
//-----------------------------------
// Math Functions
//-----------------------------------
diff --git a/llvm/lib/Target/PowerPC/P10InstrResources.td b/llvm/lib/Target/PowerPC/P10InstrResources.td
index 92af04a..4695a6f 100644
--- a/llvm/lib/Target/PowerPC/P10InstrResources.td
+++ b/llvm/lib/Target/PowerPC/P10InstrResources.td
@@ -825,8 +825,7 @@ def : InstRW<[P10W_F2_4C, P10W_DISP_ANY, P10F2_Read, P10F2_Read, P10F2_Read],
def : InstRW<[P10W_F2_4C, P10W_DISP_EVEN, P10W_DISP_ANY, P10F2_Read],
(instrs
SRADI_rec,
- SRAWI_rec,
- SRAWI8_rec
+ SRAWI8_rec, SRAWI_rec
)>;
// Single crack instructions
@@ -834,8 +833,7 @@ def : InstRW<[P10W_F2_4C, P10W_DISP_EVEN, P10W_DISP_ANY, P10F2_Read],
def : InstRW<[P10W_F2_4C, P10W_DISP_EVEN, P10W_DISP_ANY, P10F2_Read, P10F2_Read],
(instrs
SRAD_rec,
- SRAW_rec,
- SRAW8_rec
+ SRAW8_rec, SRAW_rec
)>;
// 2-way crack instructions
@@ -883,7 +881,7 @@ def : InstRW<[P10W_FX_3C, P10W_DISP_ANY],
// 3 Cycles ALU operations, 1 input operands
def : InstRW<[P10W_FX_3C, P10W_DISP_ANY, P10FX_Read],
(instrs
- ADDI, ADDI8, ADDIdtprelL32, ADDItlsldLADDR32, ADDItocL, ADDItocL8, LI, LI8,
+ ADDI, ADDI8, ADDIdtprelL32, ADDItlsldLADDR32, ADDItocL, LI, LI8,
ADDIC, ADDIC8,
ADDIS, ADDIS8, ADDISdtprelHA32, ADDIStocHA, ADDIStocHA8, LIS, LIS8,
ADDME, ADDME8,
@@ -1864,7 +1862,7 @@ def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_DISP_ANY, P10ST_Read, P10ST_Read]
(instrs
CP_PASTE8_rec, CP_PASTE_rec,
SLBIEG,
- TLBIE
+ TLBIE, TLBIE8P9, TLBIEP9
)>;
// Single crack instructions
@@ -1886,8 +1884,7 @@ def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_DISP_ANY, P10ST_Read, P10ST_Read,
def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY],
(instrs
ISYNC,
- SYNCP10,
- SYNC
+ SYNC, SYNCP10
)>;
// Expand instructions
diff --git a/llvm/lib/Target/PowerPC/P9InstrResources.td b/llvm/lib/Target/PowerPC/P9InstrResources.td
index 801ae83..3f5f7d3 100644
--- a/llvm/lib/Target/PowerPC/P9InstrResources.td
+++ b/llvm/lib/Target/PowerPC/P9InstrResources.td
@@ -905,7 +905,7 @@ def : InstRW<[P9_LS_1C, IP_EXEC_1C, IP_AGEN_1C, DISP_3SLOTS_1C],
SLBIEG,
STMW,
STSWI,
- TLBIE
+ TLBIE, TLBIEP9, TLBIE8P9
)>;
// Vector Store Instruction
diff --git a/llvm/lib/Target/PowerPC/PPC.td b/llvm/lib/Target/PowerPC/PPC.td
index 4ff2f8a..5d9ec4a 100644
--- a/llvm/lib/Target/PowerPC/PPC.td
+++ b/llvm/lib/Target/PowerPC/PPC.td
@@ -409,6 +409,7 @@ def HasP10Vector : Predicate<"Subtarget->hasP10Vector()">;
def IsISA2_06 : Predicate<"Subtarget->isISA2_06()">;
def IsISA2_07 : Predicate<"Subtarget->isISA2_07()">;
def IsISA3_0 : Predicate<"Subtarget->isISA3_0()">;
+def IsNotISA3_0 : Predicate<"!Subtarget->isISA3_0()">;
def IsISA3_1 : Predicate<"Subtarget->isISA3_1()">;
def IsNotISA3_1 : Predicate<"!Subtarget->isISA3_1()">;
def IsISAFuture : Predicate<"Subtarget->isISAFuture()">;
diff --git a/llvm/lib/Target/PowerPC/PPCBack2BackFusion.def b/llvm/lib/Target/PowerPC/PPCBack2BackFusion.def
index 6bb66bc..043c9e4 100644
--- a/llvm/lib/Target/PowerPC/PPCBack2BackFusion.def
+++ b/llvm/lib/Target/PowerPC/PPCBack2BackFusion.def
@@ -29,7 +29,7 @@ FUSION_FEATURE(GeneralBack2Back, hasBack2BackFusion, -1,
ADDIStocHA8,
ADDIdtprelL32,
ADDItlsldLADDR32,
- ADDItocL8,
+ ADDItocL,
ADDME,
ADDME8,
ADDME8O,
@@ -209,7 +209,9 @@ FUSION_FEATURE(GeneralBack2Back, hasBack2BackFusion, -1,
SRADI,
SRADI_32,
SRAW,
+ SRAW8,
SRAWI,
+ SRAWI8,
SRD,
SRD_rec,
SRW,
@@ -518,7 +520,7 @@ FUSION_FEATURE(GeneralBack2Back, hasBack2BackFusion, -1,
ADDIStocHA8,
ADDIdtprelL32,
ADDItlsldLADDR32,
- ADDItocL8,
+ ADDItocL,
ADDME,
ADDME8,
ADDME8O,
@@ -747,7 +749,9 @@ FUSION_FEATURE(GeneralBack2Back, hasBack2BackFusion, -1,
SRADI,
SRADI_32,
SRAW,
+ SRAW8,
SRAWI,
+ SRAWI8,
SRD,
SRD_rec,
SRW,
diff --git a/llvm/lib/Target/PowerPC/PPCInstrFormats.td b/llvm/lib/Target/PowerPC/PPCInstrFormats.td
index fba1c66..98c5f09 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrFormats.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrFormats.td
@@ -850,6 +850,26 @@ class XForm_45<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
let Inst{31} = 0;
}
+class XForm_RSB5_UIMM2_2UIMM1<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
+ string asmstr, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, NoItinerary> {
+
+ bits<5> RS;
+ bits<5> RB;
+ bits<2> RIC;
+ bits<1> PRS;
+ bits<1> R;
+
+ let Pattern = pattern;
+
+ let Inst{6...10} = RS;
+ let Inst{12...13} = RIC;
+ let Inst{14} = PRS;
+ let Inst{15} = R;
+ let Inst{16...20} = RB;
+ let Inst{21...30} = xo;
+}
+
class X_FRT5_XO2_XO3_XO10<bits<6> opcode, bits<2> xo1, bits<3> xo2, bits<10> xo,
dag OOL, dag IOL, string asmstr, InstrItinClass itin,
list<dag> pattern>
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index 44d1a44..f399811 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -4321,7 +4321,22 @@ def TLBLI : XForm_16b<31, 1010, (outs), (ins gprc:$RB),
"tlbli $RB", IIC_LdStLoad, []>, Requires<[IsPPC6xx]>;
def TLBIE : XForm_26<31, 306, (outs), (ins gprc:$RST, gprc:$RB),
- "tlbie $RB,$RST", IIC_SprTLBIE, []>;
+ "tlbie $RB, $RST", IIC_SprTLBIE, []>,
+ Requires<[IsNotISA3_0]>;
+
+let Predicates = [IsISA3_0] in {
+ def TLBIEP9 : XForm_RSB5_UIMM2_2UIMM1<31, 306, (outs),
+ (ins gprc:$RB, gprc:$RS, u2imm:$RIC,
+ u1imm:$PRS, u1imm:$R),
+ "tlbie $RB, $RS, $RIC, $PRS, $R", []>;
+ let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
+ def TLBIE8P9
+ : XForm_RSB5_UIMM2_2UIMM1<31, 306, (outs),
+ (ins g8rc:$RB, g8rc:$RS, u2imm:$RIC,
+ u1imm:$PRS, u1imm:$R),
+ "tlbie $RB, $RS, $RIC, $PRS, $R", []>;
+ }
+}
def TLBSX : XForm_tlb<914, (outs), (ins gprc:$RA, gprc:$RB), "tlbsx $RA, $RB",
IIC_LdStLoad>, Requires<[IsBookE]>;
@@ -4669,7 +4684,11 @@ def : InstAlias<"mficcr $Rx", (MFSPR gprc:$Rx, 1019)>, Requires<[IsPPC4xx]>;
}
-def : InstAlias<"tlbie $RB", (TLBIE R0, gprc:$RB)>;
+def : InstAlias<"tlbie $RB", (TLBIE R0, gprc:$RB)>, Requires<[IsNotISA3_0]>;
+let Predicates = [IsISA3_0] in {
+ def : InstAlias<"tlbie $RB", (TLBIEP9 R0, gprc:$RB, 0, 0, 0)>;
+ def : InstAlias<"tlbie $RB, $RS", (TLBIEP9 gprc:$RB, gprc:$RS, 0, 0, 0)>;
+}
def : InstAlias<"tlbrehi $RS, $A", (TLBRE2 gprc:$RS, gprc:$A, 0)>,
Requires<[IsPPC4xx]>;
diff --git a/llvm/lib/Target/X86/X86.h b/llvm/lib/Target/X86/X86.h
index 706ab2b..51b540a 100644
--- a/llvm/lib/Target/X86/X86.h
+++ b/llvm/lib/Target/X86/X86.h
@@ -14,7 +14,10 @@
#ifndef LLVM_LIB_TARGET_X86_X86_H
#define LLVM_LIB_TARGET_X86_X86_H
+#include "llvm/IR/Analysis.h"
+#include "llvm/IR/PassManager.h"
#include "llvm/Support/CodeGen.h"
+#include "llvm/Target/TargetMachine.h"
namespace llvm {
@@ -162,7 +165,17 @@ FunctionPass *createX86WinEHUnwindV2Pass();
/// The pass transforms load/store <256 x i32> to AMX load/store intrinsics
/// or split the data to two <128 x i32>.
-FunctionPass *createX86LowerAMXTypePass();
+class X86LowerAMXTypePass : public PassInfoMixin<X86LowerAMXTypePass> {
+private:
+ const TargetMachine *TM;
+
+public:
+ X86LowerAMXTypePass(const TargetMachine *TM) : TM(TM) {}
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM);
+ static bool isRequired() { return true; }
+};
+
+FunctionPass *createX86LowerAMXTypeLegacyPass();
/// The pass transforms amx intrinsics to scalar operation if the function has
/// optnone attribute or it is O0.
diff --git a/llvm/lib/Target/X86/X86CodeGenPassBuilder.cpp b/llvm/lib/Target/X86/X86CodeGenPassBuilder.cpp
index d979517..2c0443d 100644
--- a/llvm/lib/Target/X86/X86CodeGenPassBuilder.cpp
+++ b/llvm/lib/Target/X86/X86CodeGenPassBuilder.cpp
@@ -10,6 +10,7 @@
/// TODO: Port CodeGen passes to new pass manager.
//===----------------------------------------------------------------------===//
+#include "X86.h"
#include "X86ISelDAGToDAG.h"
#include "X86TargetMachine.h"
diff --git a/llvm/lib/Target/X86/X86LowerAMXType.cpp b/llvm/lib/Target/X86/X86LowerAMXType.cpp
index 0ba71ad..8ffd454 100644
--- a/llvm/lib/Target/X86/X86LowerAMXType.cpp
+++ b/llvm/lib/Target/X86/X86LowerAMXType.cpp
@@ -46,12 +46,14 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/Analysis.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/IntrinsicsX86.h"
+#include "llvm/IR/PassManager.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
@@ -64,7 +66,7 @@
using namespace llvm;
using namespace PatternMatch;
-#define DEBUG_TYPE "lower-amx-type"
+#define DEBUG_TYPE "x86-lower-amx-type"
static bool isAMXCast(Instruction *II) {
return match(II,
@@ -137,7 +139,7 @@ static Instruction *getFirstNonAllocaInTheEntryBlock(Function &F) {
class ShapeCalculator {
private:
- TargetMachine *TM = nullptr;
+ const TargetMachine *TM = nullptr;
// In AMX intrinsics we let Shape = {Row, Col}, but the
// RealCol = Col / ElementSize. We may use the RealCol
@@ -145,7 +147,7 @@ private:
std::map<Value *, Value *> Col2Row, Row2Col;
public:
- ShapeCalculator(TargetMachine *TargetM) : TM(TargetM) {}
+ ShapeCalculator(const TargetMachine *TargetM) : TM(TargetM) {}
std::pair<Value *, Value *> getShape(IntrinsicInst *II, unsigned OpNo);
std::pair<Value *, Value *> getShape(PHINode *Phi);
Value *getRowFromCol(Instruction *II, Value *V, unsigned Granularity);
@@ -1432,8 +1434,58 @@ bool X86LowerAMXCast::transformAllAMXCast() {
return Change;
}
+bool lowerAmxType(Function &F, const TargetMachine *TM,
+ TargetLibraryInfo *TLI) {
+ // Performance optimization: most code doesn't use AMX, so return early if
+ // there are no instructions that produce AMX values. This is sufficient, as
+ // AMX arguments and constants are not allowed -- so any producer of an AMX
+ // value must be an instruction.
+ // TODO: find a cheaper way for this, without looking at all instructions.
+ if (!containsAMXCode(F))
+ return false;
+
+ bool C = false;
+ ShapeCalculator SC(TM);
+ X86LowerAMXCast LAC(F, &SC);
+ C |= LAC.combineAMXcast(TLI);
+ // There might be remaining AMXcast after combineAMXcast and they should be
+ // handled elegantly.
+ C |= LAC.transformAllAMXCast();
+
+ X86LowerAMXType LAT(F, &SC);
+ C |= LAT.visit();
+
+ // Prepare for fast register allocation at O0.
+ // Todo: May better check the volatile model of AMX code, not just
+ // by checking Attribute::OptimizeNone and CodeGenOptLevel::None.
+ if (TM->getOptLevel() == CodeGenOptLevel::None) {
+ // If Front End not use O0 but the Mid/Back end use O0, (e.g.
+ // "Clang -O2 -S -emit-llvm t.c" + "llc t.ll") we should make
+ // sure the amx data is volatile, that is necessary for AMX fast
+ // register allocation.
+ if (!F.hasFnAttribute(Attribute::OptimizeNone)) {
+ X86VolatileTileData VTD(F);
+ C = VTD.volatileTileData() || C;
+ }
+ }
+
+ return C;
+}
+
} // anonymous namespace
+PreservedAnalyses X86LowerAMXTypePass::run(Function &F,
+ FunctionAnalysisManager &FAM) {
+ TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
+ bool Changed = lowerAmxType(F, TM, &TLI);
+ if (!Changed)
+ return PreservedAnalyses::all();
+
+ PreservedAnalyses PA = PreservedAnalyses::none();
+ PA.preserveSet<CFGAnalyses>();
+ return PA;
+}
+
namespace {
class X86LowerAMXTypeLegacyPass : public FunctionPass {
@@ -1443,44 +1495,10 @@ public:
X86LowerAMXTypeLegacyPass() : FunctionPass(ID) {}
bool runOnFunction(Function &F) override {
- // Performance optimization: most code doesn't use AMX, so return early if
- // there are no instructions that produce AMX values. This is sufficient, as
- // AMX arguments and constants are not allowed -- so any producer of an AMX
- // value must be an instruction.
- // TODO: find a cheaper way for this, without looking at all instructions.
- if (!containsAMXCode(F))
- return false;
-
- bool C = false;
TargetMachine *TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
TargetLibraryInfo *TLI =
&getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
-
- ShapeCalculator SC(TM);
- X86LowerAMXCast LAC(F, &SC);
- C |= LAC.combineAMXcast(TLI);
- // There might be remaining AMXcast after combineAMXcast and they should be
- // handled elegantly.
- C |= LAC.transformAllAMXCast();
-
- X86LowerAMXType LAT(F, &SC);
- C |= LAT.visit();
-
- // Prepare for fast register allocation at O0.
- // Todo: May better check the volatile model of AMX code, not just
- // by checking Attribute::OptimizeNone and CodeGenOptLevel::None.
- if (TM->getOptLevel() == CodeGenOptLevel::None) {
- // If Front End not use O0 but the Mid/Back end use O0, (e.g.
- // "Clang -O2 -S -emit-llvm t.c" + "llc t.ll") we should make
- // sure the amx data is volatile, that is nessary for AMX fast
- // register allocation.
- if (!F.hasFnAttribute(Attribute::OptimizeNone)) {
- X86VolatileTileData VTD(F);
- C = VTD.volatileTileData() || C;
- }
- }
-
- return C;
+ return lowerAmxType(F, TM, TLI);
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -1501,6 +1519,6 @@ INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_END(X86LowerAMXTypeLegacyPass, DEBUG_TYPE, PassName, false,
false)
-FunctionPass *llvm::createX86LowerAMXTypePass() {
+FunctionPass *llvm::createX86LowerAMXTypeLegacyPass() {
return new X86LowerAMXTypeLegacyPass();
}
diff --git a/llvm/lib/Target/X86/X86PassRegistry.def b/llvm/lib/Target/X86/X86PassRegistry.def
index 3f2a433..fc25d55 100644
--- a/llvm/lib/Target/X86/X86PassRegistry.def
+++ b/llvm/lib/Target/X86/X86PassRegistry.def
@@ -12,11 +12,16 @@
// NOTE: NO INCLUDE GUARD DESIRED!
+#ifndef FUNCTION_PASS
+#define FUNCTION_PASS(NAME, CREATE_PASS)
+#endif
+FUNCTION_PASS("x86-lower-amx-type", X86LowerAMXTypePass(this))
+#undef FUNCTION_PASS
+
#ifndef DUMMY_FUNCTION_PASS
#define DUMMY_FUNCTION_PASS(NAME, CREATE_PASS)
#endif
DUMMY_FUNCTION_PASS("lower-amx-intrinsics", X86LowerAMXIntrinsics(*this))
-DUMMY_FUNCTION_PASS("lower-amx-type", X86LowerAMXTypePass(*this))
DUMMY_FUNCTION_PASS("x86-partial-reduction", X86PartialReduction())
DUMMY_FUNCTION_PASS("x86-winehstate", WinEHStatePass())
#undef DUMMY_FUNCTION_PASS
diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp
index 8dd6f3d..9a76abc 100644
--- a/llvm/lib/Target/X86/X86TargetMachine.cpp
+++ b/llvm/lib/Target/X86/X86TargetMachine.cpp
@@ -423,7 +423,7 @@ void X86PassConfig::addIRPasses() {
// We add both pass anyway and when these two passes run, we skip the pass
// based on the option level and option attribute.
addPass(createX86LowerAMXIntrinsicsPass());
- addPass(createX86LowerAMXTypePass());
+ addPass(createX86LowerAMXTypeLegacyPass());
TargetPassConfig::addIRPasses();
diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp
index 6065575..c8d1938 100644
--- a/llvm/lib/TargetParser/Host.cpp
+++ b/llvm/lib/TargetParser/Host.cpp
@@ -369,6 +369,7 @@ getHostCPUNameForARMFromComponents(StringRef Implementer, StringRef Hardware,
if (Implementer == "0x63") { // Arm China.
return StringSwitch<const char *>(Part)
.Case("0x132", "star-mc1")
+ .Case("0xd25", "star-mc3")
.Default("generic");
}
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 9c8de45..67f837c 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -3358,21 +3358,21 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
if (TyAllocSize == 1) {
// Canonicalize (gep i8* X, (ptrtoint Y)-(ptrtoint X)) to (bitcast Y),
- // but only if the result pointer is only used as if it were an integer,
- // or both point to the same underlying object (otherwise provenance is
- // not necessarily retained).
+ // but only if the result pointer is only used as if it were an integer.
+ // (The case where the underlying object is the same is handled by
+ // InstSimplify.)
Value *X = GEP.getPointerOperand();
Value *Y;
- if (match(GEP.getOperand(1),
- m_Sub(m_PtrToInt(m_Value(Y)), m_PtrToInt(m_Specific(X)))) &&
+ if (match(GEP.getOperand(1), m_Sub(m_PtrToIntOrAddr(m_Value(Y)),
+ m_PtrToIntOrAddr(m_Specific(X)))) &&
GEPType == Y->getType()) {
- bool HasSameUnderlyingObject =
- getUnderlyingObject(X) == getUnderlyingObject(Y);
+ bool HasNonAddressBits =
+ DL.getAddressSizeInBits(AS) != DL.getPointerSizeInBits(AS);
bool Changed = false;
GEP.replaceUsesWithIf(Y, [&](Use &U) {
- bool ShouldReplace = HasSameUnderlyingObject ||
- isa<ICmpInst>(U.getUser()) ||
- isa<PtrToIntInst>(U.getUser());
+ bool ShouldReplace = isa<PtrToAddrInst>(U.getUser()) ||
+ (!HasNonAddressBits &&
+ isa<ICmpInst, PtrToIntInst>(U.getUser()));
Changed |= ShouldReplace;
return ShouldReplace;
});
diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index cb6ca72..7c364f8 100644
--- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -1539,7 +1539,7 @@ void AddressSanitizer::getInterestingMemoryOperands(
IID == Intrinsic::experimental_vp_strided_load) {
Stride = VPI->getOperand(PtrOpNo + 1);
// Use the pointer alignment as the element alignment if the stride is a
- // mutiple of the pointer alignment. Otherwise, the element alignment
+ // multiple of the pointer alignment. Otherwise, the element alignment
// should be Align(1).
unsigned PointerAlign = Alignment.valueOrOne().value();
if (!isa<ConstantInt>(Stride) ||
@@ -2399,7 +2399,7 @@ void ModuleAddressSanitizer::instrumentGlobalsELF(
// Putting globals in a comdat changes the semantic and potentially cause
// false negative odr violations at link time. If odr indicators are used, we
- // keep the comdat sections, as link time odr violations will be dectected on
+ // keep the comdat sections, as link time odr violations will be detected on
// the odr indicator symbols.
bool UseComdatForGlobalsGC = UseOdrIndicator && !UniqueModuleId.empty();
@@ -3858,7 +3858,7 @@ void FunctionStackPoisoner::handleDynamicAllocaCall(AllocaInst *AI) {
I->eraseFromParent();
}
- // Replace all uses of AddessReturnedByAlloca with NewAddressPtr.
+ // Replace all uses of AddressReturnedByAlloca with NewAddressPtr.
AI->replaceAllUsesWith(NewAddressPtr);
// We are done. Erase old alloca from parent.
diff --git a/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp b/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
index 72e8e50..0688bc7 100644
--- a/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
+++ b/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
@@ -359,7 +359,7 @@ class CHR {
unsigned Count = 0;
// Find out how many times region R is cloned. Note that if the parent
// of R is cloned, R is also cloned, but R's clone count is not updated
- // from the clone of the parent. We need to accumlate all the counts
+ // from the clone of the parent. We need to accumulate all the counts
// from the ancestors to get the clone count.
while (R) {
Count += DuplicationCount[R];
@@ -1513,7 +1513,7 @@ static bool negateICmpIfUsedByBranchOrSelectOnly(ICmpInst *ICmp,
BI->swapSuccessors();
// Don't need to swap this in terms of
// TrueBiasedRegions/FalseBiasedRegions because true-based/false-based
- // mean whehter the branch is likely go into the if-then rather than
+ // mean whether the branch is likely go into the if-then rather than
// successor0/successor1 and because we can tell which edge is the then or
// the else one by comparing the destination to the region exit block.
continue;
diff --git a/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp b/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
index cf87e35..1e5946a 100644
--- a/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
+++ b/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
@@ -83,7 +83,7 @@ static cl::opt<unsigned>
// ICP the candidate function even when only a declaration is present.
static cl::opt<bool> ICPAllowDecls(
"icp-allow-decls", cl::init(false), cl::Hidden,
- cl::desc("Promote the target candidate even when the defintion "
+ cl::desc("Promote the target candidate even when the definition "
" is not available"));
// ICP hot candidate functions only. When setting to false, non-cold functions
diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
index 5e7548b..7795cce 100644
--- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
+++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
@@ -139,7 +139,7 @@ cl::opt<bool> ConditionalCounterUpdate(
cl::init(false));
// If the option is not specified, the default behavior about whether
-// counter promotion is done depends on how instrumentaiton lowering
+// counter promotion is done depends on how instrumentation lowering
// pipeline is setup, i.e., the default value of true of this option
// does not mean the promotion will be done by default. Explicitly
// setting this option can override the default behavior.
@@ -1052,7 +1052,7 @@ void InstrLowerer::lowerValueProfileInst(InstrProfValueProfileInst *Ind) {
GlobalVariable *Name = Ind->getName();
auto It = ProfileDataMap.find(Name);
assert(It != ProfileDataMap.end() && It->second.DataVar &&
- "value profiling detected in function with no counter incerement");
+ "value profiling detected in function with no counter increment");
GlobalVariable *DataVar = It->second.DataVar;
uint64_t ValueKind = Ind->getValueKind()->getZExtValue();
diff --git a/llvm/lib/Transforms/Instrumentation/MemProfInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/MemProfInstrumentation.cpp
index 3c0f185..05616d8 100644
--- a/llvm/lib/Transforms/Instrumentation/MemProfInstrumentation.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemProfInstrumentation.cpp
@@ -490,7 +490,7 @@ void createProfileFileNameVar(Module &M) {
}
}
-// Set MemprofHistogramFlag as a Global veriable in IR. This makes it accessible
+// Set MemprofHistogramFlag as a Global variable in IR. This makes it accessible
// to the runtime, changing shadow count behavior.
void createMemprofHistogramFlagVar(Module &M) {
const StringRef VarName(MemProfHistogramFlagVar);
diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 10b03bb..471c6ec 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -3136,7 +3136,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
/// If we don't instrument it and it gets inlined,
/// our interceptor will not kick in and we will lose the memmove.
/// If we instrument the call here, but it does not get inlined,
- /// we will memove the shadow twice: which is bad in case
+ /// we will memmove the shadow twice: which is bad in case
/// of overlapping regions. So, we simply lower the intrinsic to a call.
///
/// Similar situation exists for memcpy and memset.
@@ -4775,7 +4775,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
// _mm_round_ps / _mm_round_ps.
// Similar to maybeHandleSimpleNomemIntrinsic except
- // the second argument is guranteed to be a constant integer.
+ // the second argument is guaranteed to be a constant integer.
void handleRoundPdPsIntrinsic(IntrinsicInst &I) {
assert(I.getArgOperand(0)->getType() == I.getType());
assert(I.arg_size() == 2);
diff --git a/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp b/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp
index f5b6686..5f87ed6 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp
@@ -176,7 +176,7 @@ PreservedAnalyses PGOCtxProfFlatteningPass::run(Module &M,
assert(areAllBBsReachable(
F, MAM.getResult<FunctionAnalysisManagerModuleProxy>(M)
.getManager()) &&
- "Function has unreacheable basic blocks. The expectation was that "
+ "Function has unreachable basic blocks. The expectation was that "
"DCE was run before.");
auto It = FlattenedProfile.find(AssignGUIDPass::getGUID(F));
diff --git a/llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp b/llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp
index 0a358d4..de7c169 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp
@@ -253,7 +253,7 @@ bool CtxInstrumentationLowerer::lowerFunction(Function &F) {
Value *RealContext = nullptr;
StructType *ThisContextType = nullptr;
- Value *TheRootFuctionData = nullptr;
+ Value *TheRootFunctionData = nullptr;
Value *ExpectedCalleeTLSAddr = nullptr;
Value *CallsiteInfoTLSAddr = nullptr;
const bool HasMusttail = [&F]() {
@@ -283,7 +283,7 @@ bool CtxInstrumentationLowerer::lowerFunction(Function &F) {
Guid = Builder.getInt64(
AssignGUIDPass::getGUID(cast<Function>(*Mark->getNameValue())));
// The type of the context of this function is now knowable since we have
- // NumCallsites and NumCounters. We delcare it here because it's more
+ // NumCallsites and NumCounters. We declare it here because it's more
// convenient - we have the Builder.
ThisContextType = StructType::get(
F.getContext(),
@@ -291,28 +291,27 @@ bool CtxInstrumentationLowerer::lowerFunction(Function &F) {
ArrayType::get(Builder.getPtrTy(), NumCallsites)});
// Figure out which way we obtain the context object for this function -
// if it's an entrypoint, then we call StartCtx, otherwise GetCtx. In the
- // former case, we also set TheRootFuctionData since we need to release it
- // at the end (plus it can be used to know if we have an entrypoint or a
- // regular function)
- // Don't set a name, they end up taking a lot of space and we don't need
- // them.
+ // former case, we also set TheRootFunctionData since we need to release
+ // it at the end (plus it can be used to know if we have an entrypoint or
+ // a regular function). Don't set a name, they end up taking a lot of
+ // space and we don't need them.
// Zero-initialize the FunctionData, except for functions that have
// musttail calls. There, we set the CtxRoot field to 1, which will be
// treated as a "can't be set as root".
- TheRootFuctionData = new GlobalVariable(
+ TheRootFunctionData = new GlobalVariable(
M, FunctionDataTy, false, GlobalVariable::InternalLinkage,
HasMusttail ? CannotBeRootInitializer
: Constant::getNullValue(FunctionDataTy));
if (ContextRootSet.contains(&F)) {
Context = Builder.CreateCall(
- StartCtx, {TheRootFuctionData, Guid, Builder.getInt32(NumCounters),
+ StartCtx, {TheRootFunctionData, Guid, Builder.getInt32(NumCounters),
Builder.getInt32(NumCallsites)});
ORE.emit(
[&] { return OptimizationRemark(DEBUG_TYPE, "Entrypoint", &F); });
} else {
- Context = Builder.CreateCall(GetCtx, {TheRootFuctionData, &F, Guid,
+ Context = Builder.CreateCall(GetCtx, {TheRootFunctionData, &F, Guid,
Builder.getInt32(NumCounters),
Builder.getInt32(NumCallsites)});
ORE.emit([&] {
@@ -399,7 +398,7 @@ bool CtxInstrumentationLowerer::lowerFunction(Function &F) {
} else if (!HasMusttail && isa<ReturnInst>(I)) {
// Remember to release the context if we are an entrypoint.
IRBuilder<> Builder(&I);
- Builder.CreateCall(ReleaseCtx, {TheRootFuctionData});
+ Builder.CreateCall(ReleaseCtx, {TheRootFunctionData});
ContextWasReleased = true;
}
}
diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
index 120c4f6..71736cf 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
@@ -1957,7 +1957,7 @@ static bool InstrumentAllFunctions(
function_ref<BlockFrequencyInfo *(Function &)> LookupBFI,
function_ref<LoopInfo *(Function &)> LookupLI,
PGOInstrumentationType InstrumentationType) {
- // For the context-sensitve instrumentation, we should have a separated pass
+ // For the context-sensitive instrumentation, we should have a separated pass
// (before LTO/ThinLTO linking) to create these variables.
if (InstrumentationType == PGOInstrumentationType::FDO)
createIRLevelProfileFlagVar(M, InstrumentationType);
@@ -2248,7 +2248,7 @@ static bool annotateAllFunctions(
Func.populateCoverage();
continue;
}
- // When PseudoKind is set to a vaule other than InstrProfRecord::NotPseudo,
+ // When PseudoKind is set to a value other than InstrProfRecord::NotPseudo,
// it means the profile for the function is unrepresentative and this
// function is actually hot / warm. We will reset the function hot / cold
// attribute and drop all the profile counters.
diff --git a/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
index 0d48a35..fd0e9f1 100644
--- a/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
@@ -355,7 +355,7 @@ static bool isVtableAccess(Instruction *I) {
}
// Do not instrument known races/"benign races" that come from compiler
-// instrumentatin. The user has no way of suppressing them.
+// instrumentation. The user has no way of suppressing them.
static bool shouldInstrumentReadWriteFromAddress(const Module *M, Value *Addr) {
// Peel off GEPs and BitCasts.
Addr = Addr->stripInBoundsOffsets();
diff --git a/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp
index 9471ae3..78d4a57e 100644
--- a/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp
@@ -643,7 +643,7 @@ bool TypeSanitizer::instrumentWithShadowUpdate(
// doesn't match, then we call the runtime (which may yet determine that
// the mismatch is okay).
//
- // The checks generated below have the following strucutre.
+ // The checks generated below have the following structure.
//
// ; First we load the descriptor for the load from shadow memory and
// ; compare it against the type descriptor for the current access type.
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index d831c27..c537be5c 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -7551,6 +7551,7 @@ static bool reduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder,
/// log2(C)-indexed value table (instead of traditionally emitting a load of the
/// address of the jump target, and indirectly jump to it).
static bool simplifySwitchOfPowersOfTwo(SwitchInst *SI, IRBuilder<> &Builder,
+ DomTreeUpdater *DTU,
const DataLayout &DL,
const TargetTransformInfo &TTI) {
Value *Condition = SI->getCondition();
@@ -7573,12 +7574,6 @@ static bool simplifySwitchOfPowersOfTwo(SwitchInst *SI, IRBuilder<> &Builder,
if (SI->getNumCases() < 4)
return false;
- // We perform this optimization only for switches with
- // unreachable default case.
- // This assumtion will save us from checking if `Condition` is a power of two.
- if (!SI->defaultDestUnreachable())
- return false;
-
// Check that switch cases are powers of two.
SmallVector<uint64_t, 4> Values;
for (const auto &Case : SI->cases()) {
@@ -7598,6 +7593,24 @@ static bool simplifySwitchOfPowersOfTwo(SwitchInst *SI, IRBuilder<> &Builder,
Builder.SetInsertPoint(SI);
+ if (!SI->defaultDestUnreachable()) {
+ // Let non-power-of-two inputs jump to the default case, when the latter is
+ // reachable.
+ auto *PopC = Builder.CreateUnaryIntrinsic(Intrinsic::ctpop, Condition);
+ auto *IsPow2 = Builder.CreateICmpEQ(PopC, ConstantInt::get(CondTy, 1));
+
+ auto *OrigBB = SI->getParent();
+ auto *DefaultCaseBB = SI->getDefaultDest();
+ BasicBlock *SplitBB = SplitBlock(OrigBB, SI, DTU);
+ auto It = OrigBB->getTerminator()->getIterator();
+ BranchInst::Create(SplitBB, DefaultCaseBB, IsPow2, It);
+ It->eraseFromParent();
+
+ addPredecessorToBlock(DefaultCaseBB, OrigBB, SplitBB);
+ if (DTU)
+ DTU->applyUpdates({{DominatorTree::Insert, OrigBB, DefaultCaseBB}});
+ }
+
// Replace each case with its trailing zeros number.
for (auto &Case : SI->cases()) {
auto *OrigValue = Case.getCaseValue();
@@ -7953,7 +7966,7 @@ bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
Options.ConvertSwitchToLookupTable))
return requestResimplify();
- if (simplifySwitchOfPowersOfTwo(SI, Builder, DL, TTI))
+ if (simplifySwitchOfPowersOfTwo(SI, Builder, DTU, DL, TTI))
return requestResimplify();
if (reduceSwitchRange(SI, Builder, DL, TTI))
diff --git a/llvm/test/CodeGen/AArch64/arm64-vhadd.ll b/llvm/test/CodeGen/AArch64/arm64-vhadd.ll
index 076cbf7..a505b42 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vhadd.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vhadd.ll
@@ -1408,6 +1408,88 @@ define <4 x i16> @ext_via_i19(<4 x i16> %a) {
ret <4 x i16> %t6
}
+define <8 x i8> @srhadd_v8i8_trunc(<8 x i8> %s0, <8 x i8> %s1) {
+; CHECK-LABEL: srhadd_v8i8_trunc:
+; CHECK: // %bb.0:
+; CHECK-NEXT: srhadd.8b v0, v0, v1
+; CHECK-NEXT: ret
+ %s0s = sext <8 x i8> %s0 to <8 x i16>
+ %s1s = sext <8 x i8> %s1 to <8 x i16>
+ %s = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> %s0s, <8 x i16> %s1s)
+ %s2 = trunc <8 x i16> %s to <8 x i8>
+ ret <8 x i8> %s2
+}
+
+define <4 x i16> @srhadd_v4i16_trunc(<4 x i16> %s0, <4 x i16> %s1) {
+; CHECK-LABEL: srhadd_v4i16_trunc:
+; CHECK: // %bb.0:
+; CHECK-NEXT: srhadd.4h v0, v0, v1
+; CHECK-NEXT: ret
+ %s0s = sext <4 x i16> %s0 to <4 x i32>
+ %s1s = sext <4 x i16> %s1 to <4 x i32>
+ %s = call <4 x i32> @llvm.aarch64.neon.urhadd.v4i32(<4 x i32> %s0s, <4 x i32> %s1s)
+ %s2 = trunc <4 x i32> %s to <4 x i16>
+ ret <4 x i16> %s2
+}
+
+define <2 x i32> @srhadd_v2i32_trunc(<2 x i32> %s0, <2 x i32> %s1) {
+; CHECK-LABEL: srhadd_v2i32_trunc:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sshll.2d v0, v0, #0
+; CHECK-NEXT: sshll.2d v1, v1, #0
+; CHECK-NEXT: eor.16b v2, v0, v1
+; CHECK-NEXT: orr.16b v0, v0, v1
+; CHECK-NEXT: ushr.2d v1, v2, #1
+; CHECK-NEXT: sub.2d v0, v0, v1
+; CHECK-NEXT: xtn.2s v0, v0
+; CHECK-NEXT: ret
+ %s0s = sext <2 x i32> %s0 to <2 x i64>
+ %s1s = sext <2 x i32> %s1 to <2 x i64>
+ %s = call <2 x i64> @llvm.aarch64.neon.urhadd.v2i64(<2 x i64> %s0s, <2 x i64> %s1s)
+ %s2 = trunc <2 x i64> %s to <2 x i32>
+ ret <2 x i32> %s2
+}
+
+define <8 x i8> @urhadd_v8i8_trunc(<8 x i8> %s0, <8 x i8> %s1) {
+; CHECK-LABEL: urhadd_v8i8_trunc:
+; CHECK: // %bb.0:
+; CHECK-NEXT: urhadd.8b v0, v0, v1
+; CHECK-NEXT: ret
+ %s0s = zext <8 x i8> %s0 to <8 x i16>
+ %s1s = zext <8 x i8> %s1 to <8 x i16>
+ %s = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> %s0s, <8 x i16> %s1s)
+ %s2 = trunc <8 x i16> %s to <8 x i8>
+ ret <8 x i8> %s2
+}
+
+define <4 x i16> @urhadd_v4i16_trunc(<4 x i16> %s0, <4 x i16> %s1) {
+; CHECK-LABEL: urhadd_v4i16_trunc:
+; CHECK: // %bb.0:
+; CHECK-NEXT: urhadd.4h v0, v0, v1
+; CHECK-NEXT: ret
+ %s0s = zext <4 x i16> %s0 to <4 x i32>
+ %s1s = zext <4 x i16> %s1 to <4 x i32>
+ %s = call <4 x i32> @llvm.aarch64.neon.srhadd.v4i32(<4 x i32> %s0s, <4 x i32> %s1s)
+ %s2 = trunc <4 x i32> %s to <4 x i16>
+ ret <4 x i16> %s2
+}
+
+define <2 x i32> @urhadd_v2i32_trunc(<2 x i32> %s0, <2 x i32> %s1) {
+; CHECK-LABEL: urhadd_v2i32_trunc:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #1 // =0x1
+; CHECK-NEXT: uaddl.2d v0, v0, v1
+; CHECK-NEXT: dup.2d v1, x8
+; CHECK-NEXT: add.2d v0, v0, v1
+; CHECK-NEXT: shrn.2s v0, v0, #1
+; CHECK-NEXT: ret
+ %s0s = zext <2 x i32> %s0 to <2 x i64>
+ %s1s = zext <2 x i32> %s1 to <2 x i64>
+ %s = call <2 x i64> @llvm.aarch64.neon.srhadd.v2i64(<2 x i64> %s0s, <2 x i64> %s1s)
+ %s2 = trunc <2 x i64> %s to <2 x i32>
+ ret <2 x i32> %s2
+}
+
declare <8 x i8> @llvm.aarch64.neon.srhadd.v8i8(<8 x i8>, <8 x i8>)
declare <4 x i16> @llvm.aarch64.neon.srhadd.v4i16(<4 x i16>, <4 x i16>)
declare <2 x i32> @llvm.aarch64.neon.srhadd.v2i32(<2 x i32>, <2 x i32>)
diff --git a/llvm/test/CodeGen/AMDGPU/wait-before-stores-with-scope_sys.ll b/llvm/test/CodeGen/AMDGPU/wait-before-stores-with-scope_sys.ll
index 2d7a91f..985bcbd 100644
--- a/llvm/test/CodeGen/AMDGPU/wait-before-stores-with-scope_sys.ll
+++ b/llvm/test/CodeGen/AMDGPU/wait-before-stores-with-scope_sys.ll
@@ -1,22 +1,50 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
-; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s
-; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX1200 %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX1200 %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefix=GFX1250-SDAG %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefix=GFX1250-GISEL %s
define amdgpu_ps void @intrinsic_store_system_scope(i32 %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
-; GFX12-LABEL: intrinsic_store_system_scope:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: buffer_store_b32 v0, v[1:2], s[0:3], s4 idxen offen scope:SCOPE_SYS
-; GFX12-NEXT: s_endpgm
+; GFX1200-LABEL: intrinsic_store_system_scope:
+; GFX1200: ; %bb.0:
+; GFX1200-NEXT: buffer_store_b32 v0, v[1:2], s[0:3], s4 idxen offen scope:SCOPE_SYS
+; GFX1200-NEXT: s_endpgm
+;
+; GFX1250-SDAG-LABEL: intrinsic_store_system_scope:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1
+; GFX1250-SDAG-NEXT: buffer_store_b32 v0, v[2:3], s[0:3], s4 idxen offen scope:SCOPE_SYS
+; GFX1250-SDAG-NEXT: s_endpgm
+;
+; GFX1250-GISEL-LABEL: intrinsic_store_system_scope:
+; GFX1250-GISEL: ; %bb.0:
+; GFX1250-GISEL-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
+; GFX1250-GISEL-NEXT: buffer_store_b32 v0, v[4:5], s[0:3], s4 idxen offen scope:SCOPE_SYS
+; GFX1250-GISEL-NEXT: s_endpgm
call void @llvm.amdgcn.struct.buffer.store.i32(i32 %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 24)
ret void
}
define amdgpu_ps void @generic_store_volatile(i32 %val, ptr addrspace(1) %out) {
-; GFX12-LABEL: generic_store_volatile:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: global_store_b32 v[1:2], v0, off scope:SCOPE_SYS
-; GFX12-NEXT: s_wait_storecnt 0x0
-; GFX12-NEXT: s_endpgm
+; GFX1200-LABEL: generic_store_volatile:
+; GFX1200: ; %bb.0:
+; GFX1200-NEXT: global_store_b32 v[1:2], v0, off scope:SCOPE_SYS
+; GFX1200-NEXT: s_wait_storecnt 0x0
+; GFX1200-NEXT: s_endpgm
+;
+; GFX1250-SDAG-LABEL: generic_store_volatile:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1
+; GFX1250-SDAG-NEXT: global_store_b32 v[2:3], v0, off scope:SCOPE_SYS
+; GFX1250-SDAG-NEXT: s_wait_storecnt 0x0
+; GFX1250-SDAG-NEXT: s_endpgm
+;
+; GFX1250-GISEL-LABEL: generic_store_volatile:
+; GFX1250-GISEL: ; %bb.0:
+; GFX1250-GISEL-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
+; GFX1250-GISEL-NEXT: global_store_b32 v[4:5], v0, off scope:SCOPE_SYS
+; GFX1250-GISEL-NEXT: s_wait_storecnt 0x0
+; GFX1250-GISEL-NEXT: s_endpgm
store volatile i32 %val, ptr addrspace(1) %out
ret void
}
diff --git a/llvm/test/CodeGen/LoongArch/lasx/shufflevector-reverse.ll b/llvm/test/CodeGen/LoongArch/lasx/shufflevector-reverse.ll
index b57d90c..19b9b53 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/shufflevector-reverse.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/shufflevector-reverse.ll
@@ -6,10 +6,9 @@ define void @shufflevector_reverse_v32i8(ptr %res, ptr %a) nounwind {
; CHECK-LABEL: shufflevector_reverse_v32i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvld $xr0, $a1, 0
-; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI0_0)
-; CHECK-NEXT: xvld $xr1, $a1, %pc_lo12(.LCPI0_0)
; CHECK-NEXT: xvpermi.d $xr0, $xr0, 78
-; CHECK-NEXT: xvshuf.b $xr0, $xr0, $xr0, $xr1
+; CHECK-NEXT: xvshuf4i.w $xr0, $xr0, 27
+; CHECK-NEXT: xvshuf4i.b $xr0, $xr0, 27
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -23,11 +22,9 @@ define void @shufflevector_reverse_v16i16(ptr %res, ptr %a) nounwind {
; CHECK-LABEL: shufflevector_reverse_v16i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvld $xr0, $a1, 0
-; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI1_0)
-; CHECK-NEXT: xvld $xr1, $a1, %pc_lo12(.LCPI1_0)
-; CHECK-NEXT: xvpermi.d $xr0, $xr0, 78
-; CHECK-NEXT: xvshuf.h $xr1, $xr0, $xr0
-; CHECK-NEXT: xvst $xr1, $a0, 0
+; CHECK-NEXT: xvpermi.d $xr0, $xr0, 27
+; CHECK-NEXT: xvshuf4i.h $xr0, $xr0, 27
+; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
%va = load <16 x i16>, ptr %a
diff --git a/llvm/test/CodeGen/LoongArch/lsx/shufflevector-reverse.ll b/llvm/test/CodeGen/LoongArch/lsx/shufflevector-reverse.ll
index 29f038a..a7b59e5 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/shufflevector-reverse.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/shufflevector-reverse.ll
@@ -6,9 +6,8 @@ define void @shufflevector_reverse_v16i8(ptr %res, ptr %a) nounwind {
; CHECK-LABEL: shufflevector_reverse_v16i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
-; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI0_0)
-; CHECK-NEXT: vld $vr1, $a1, %pc_lo12(.LCPI0_0)
-; CHECK-NEXT: vshuf.b $vr0, $vr0, $vr0, $vr1
+; CHECK-NEXT: vshuf4i.w $vr0, $vr0, 27
+; CHECK-NEXT: vshuf4i.b $vr0, $vr0, 27
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -22,10 +21,9 @@ define void @shufflevector_reverse_v8i16(ptr %res, ptr %a) nounwind {
; CHECK-LABEL: shufflevector_reverse_v8i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
-; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI1_0)
-; CHECK-NEXT: vld $vr1, $a1, %pc_lo12(.LCPI1_0)
-; CHECK-NEXT: vshuf.h $vr1, $vr0, $vr0
-; CHECK-NEXT: vst $vr1, $a0, 0
+; CHECK-NEXT: vshuf4i.d $vr0, $vr0, 1
+; CHECK-NEXT: vshuf4i.h $vr0, $vr0, 27
+; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
%va = load <8 x i16>, ptr %a
diff --git a/llvm/test/CodeGen/NVPTX/mbarrier_arr.ll b/llvm/test/CodeGen/NVPTX/mbarrier_arr.ll
new file mode 100644
index 0000000..c440caa
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/mbarrier_arr.ll
@@ -0,0 +1,165 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx80| FileCheck --check-prefixes=CHECK-PTX64 %s
+; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx80 --nvptx-short-ptr| FileCheck --check-prefixes=CHECK-PTX-SHARED32 %s
+; RUN: %if ptxas-sm_90 && ptxas-isa-8.0 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx80| %ptxas-verify -arch=sm_90 %}
+; RUN: %if ptxas-sm_90 && ptxas-isa-8.0 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx80 --nvptx-short-ptr| %ptxas-verify -arch=sm_90 %}
+
+; === space_cta (addrspace 3) ===
+define void @test_mbarrier_arrive_scope_cta_space_cta(ptr addrspace(3) %mbar, i32 %tx) {
+; CHECK-PTX64-LABEL: test_mbarrier_arrive_scope_cta_space_cta(
+; CHECK-PTX64: {
+; CHECK-PTX64-NEXT: .reg .b32 %r<2>;
+; CHECK-PTX64-NEXT: .reg .b64 %rd<6>;
+; CHECK-PTX64-EMPTY:
+; CHECK-PTX64-NEXT: // %bb.0:
+; CHECK-PTX64-NEXT: ld.param.b64 %rd1, [test_mbarrier_arrive_scope_cta_space_cta_param_0];
+; CHECK-PTX64-NEXT: ld.param.b32 %r1, [test_mbarrier_arrive_scope_cta_space_cta_param_1];
+; CHECK-PTX64-NEXT: mbarrier.arrive.shared.b64 %rd2, [%rd1], %r1;
+; CHECK-PTX64-NEXT: mbarrier.arrive.expect_tx.shared.b64 %rd3, [%rd1], %r1;
+; CHECK-PTX64-NEXT: mbarrier.arrive_drop.shared.b64 %rd4, [%rd1], %r1;
+; CHECK-PTX64-NEXT: mbarrier.arrive_drop.expect_tx.shared.b64 %rd5, [%rd1], %r1;
+; CHECK-PTX64-NEXT: ret;
+;
+; CHECK-PTX-SHARED32-LABEL: test_mbarrier_arrive_scope_cta_space_cta(
+; CHECK-PTX-SHARED32: {
+; CHECK-PTX-SHARED32-NEXT: .reg .b32 %r<3>;
+; CHECK-PTX-SHARED32-NEXT: .reg .b64 %rd<5>;
+; CHECK-PTX-SHARED32-EMPTY:
+; CHECK-PTX-SHARED32-NEXT: // %bb.0:
+; CHECK-PTX-SHARED32-NEXT: ld.param.b32 %r1, [test_mbarrier_arrive_scope_cta_space_cta_param_0];
+; CHECK-PTX-SHARED32-NEXT: ld.param.b32 %r2, [test_mbarrier_arrive_scope_cta_space_cta_param_1];
+; CHECK-PTX-SHARED32-NEXT: mbarrier.arrive.shared.b64 %rd1, [%r1], %r2;
+; CHECK-PTX-SHARED32-NEXT: mbarrier.arrive.expect_tx.shared.b64 %rd2, [%r1], %r2;
+; CHECK-PTX-SHARED32-NEXT: mbarrier.arrive_drop.shared.b64 %rd3, [%r1], %r2;
+; CHECK-PTX-SHARED32-NEXT: mbarrier.arrive_drop.expect_tx.shared.b64 %rd4, [%r1], %r2;
+; CHECK-PTX-SHARED32-NEXT: ret;
+ %r1 = call i64 @llvm.nvvm.mbarrier.arrive.scope.cta.space.cta(ptr addrspace(3) %mbar, i32 %tx)
+ %r2 = call i64 @llvm.nvvm.mbarrier.arrive.expect.tx.scope.cta.space.cta(ptr addrspace(3) %mbar, i32 %tx)
+ %r3 = call i64 @llvm.nvvm.mbarrier.arrive.drop.scope.cta.space.cta(ptr addrspace(3) %mbar, i32 %tx)
+ %r4 = call i64 @llvm.nvvm.mbarrier.arrive.drop.expect.tx.scope.cta.space.cta(ptr addrspace(3) %mbar, i32 %tx)
+ ret void
+}
+
+define void @test_mbarrier_arrive_scope_cluster_space_cta(ptr addrspace(3) %mbar, i32 %tx) {
+; CHECK-PTX64-LABEL: test_mbarrier_arrive_scope_cluster_space_cta(
+; CHECK-PTX64: {
+; CHECK-PTX64-NEXT: .reg .b32 %r<2>;
+; CHECK-PTX64-NEXT: .reg .b64 %rd<6>;
+; CHECK-PTX64-EMPTY:
+; CHECK-PTX64-NEXT: // %bb.0:
+; CHECK-PTX64-NEXT: ld.param.b64 %rd1, [test_mbarrier_arrive_scope_cluster_space_cta_param_0];
+; CHECK-PTX64-NEXT: ld.param.b32 %r1, [test_mbarrier_arrive_scope_cluster_space_cta_param_1];
+; CHECK-PTX64-NEXT: mbarrier.arrive.release.cluster.shared.b64 %rd2, [%rd1], %r1;
+; CHECK-PTX64-NEXT: mbarrier.arrive.expect_tx.release.cluster.shared.b64 %rd3, [%rd1], %r1;
+; CHECK-PTX64-NEXT: mbarrier.arrive_drop.release.cluster.shared.b64 %rd4, [%rd1], %r1;
+; CHECK-PTX64-NEXT: mbarrier.arrive_drop.expect_tx.release.cluster.shared.b64 %rd5, [%rd1], %r1;
+; CHECK-PTX64-NEXT: ret;
+;
+; CHECK-PTX-SHARED32-LABEL: test_mbarrier_arrive_scope_cluster_space_cta(
+; CHECK-PTX-SHARED32: {
+; CHECK-PTX-SHARED32-NEXT: .reg .b32 %r<3>;
+; CHECK-PTX-SHARED32-NEXT: .reg .b64 %rd<5>;
+; CHECK-PTX-SHARED32-EMPTY:
+; CHECK-PTX-SHARED32-NEXT: // %bb.0:
+; CHECK-PTX-SHARED32-NEXT: ld.param.b32 %r1, [test_mbarrier_arrive_scope_cluster_space_cta_param_0];
+; CHECK-PTX-SHARED32-NEXT: ld.param.b32 %r2, [test_mbarrier_arrive_scope_cluster_space_cta_param_1];
+; CHECK-PTX-SHARED32-NEXT: mbarrier.arrive.release.cluster.shared.b64 %rd1, [%r1], %r2;
+; CHECK-PTX-SHARED32-NEXT: mbarrier.arrive.expect_tx.release.cluster.shared.b64 %rd2, [%r1], %r2;
+; CHECK-PTX-SHARED32-NEXT: mbarrier.arrive_drop.release.cluster.shared.b64 %rd3, [%r1], %r2;
+; CHECK-PTX-SHARED32-NEXT: mbarrier.arrive_drop.expect_tx.release.cluster.shared.b64 %rd4, [%r1], %r2;
+; CHECK-PTX-SHARED32-NEXT: ret;
+ %r1 = call i64 @llvm.nvvm.mbarrier.arrive.scope.cluster.space.cta(ptr addrspace(3) %mbar, i32 %tx)
+ %r2 = call i64 @llvm.nvvm.mbarrier.arrive.expect.tx.scope.cluster.space.cta(ptr addrspace(3) %mbar, i32 %tx)
+ %r3 = call i64 @llvm.nvvm.mbarrier.arrive.drop.scope.cluster.space.cta(ptr addrspace(3) %mbar, i32 %tx)
+ %r4 = call i64 @llvm.nvvm.mbarrier.arrive.drop.expect.tx.scope.cluster.space.cta(ptr addrspace(3) %mbar, i32 %tx)
+ ret void
+}
+
+; === space_cluster (addrspace 7) ===
+define void @test_mbarrier_arrive_scope_cta_space_cluster(ptr addrspace(7) %mbar, i32 %tx) {
+; CHECK-PTX64-LABEL: test_mbarrier_arrive_scope_cta_space_cluster(
+; CHECK-PTX64: {
+; CHECK-PTX64-NEXT: .reg .b32 %r<2>;
+; CHECK-PTX64-NEXT: .reg .b64 %rd<2>;
+; CHECK-PTX64-EMPTY:
+; CHECK-PTX64-NEXT: // %bb.0:
+; CHECK-PTX64-NEXT: ld.param.b64 %rd1, [test_mbarrier_arrive_scope_cta_space_cluster_param_0];
+; CHECK-PTX64-NEXT: ld.param.b32 %r1, [test_mbarrier_arrive_scope_cta_space_cluster_param_1];
+; CHECK-PTX64-NEXT: mbarrier.arrive.shared::cluster.b64 _, [%rd1], %r1;
+; CHECK-PTX64-NEXT: mbarrier.arrive.expect_tx.shared::cluster.b64 _, [%rd1], %r1;
+; CHECK-PTX64-NEXT: mbarrier.arrive_drop.shared::cluster.b64 _, [%rd1], %r1;
+; CHECK-PTX64-NEXT: mbarrier.arrive_drop.expect_tx.shared::cluster.b64 _, [%rd1], %r1;
+; CHECK-PTX64-NEXT: ret;
+;
+; CHECK-PTX-SHARED32-LABEL: test_mbarrier_arrive_scope_cta_space_cluster(
+; CHECK-PTX-SHARED32: {
+; CHECK-PTX-SHARED32-NEXT: .reg .b32 %r<3>;
+; CHECK-PTX-SHARED32-EMPTY:
+; CHECK-PTX-SHARED32-NEXT: // %bb.0:
+; CHECK-PTX-SHARED32-NEXT: ld.param.b32 %r1, [test_mbarrier_arrive_scope_cta_space_cluster_param_0];
+; CHECK-PTX-SHARED32-NEXT: ld.param.b32 %r2, [test_mbarrier_arrive_scope_cta_space_cluster_param_1];
+; CHECK-PTX-SHARED32-NEXT: mbarrier.arrive.shared::cluster.b64 _, [%r1], %r2;
+; CHECK-PTX-SHARED32-NEXT: mbarrier.arrive.expect_tx.shared::cluster.b64 _, [%r1], %r2;
+; CHECK-PTX-SHARED32-NEXT: mbarrier.arrive_drop.shared::cluster.b64 _, [%r1], %r2;
+; CHECK-PTX-SHARED32-NEXT: mbarrier.arrive_drop.expect_tx.shared::cluster.b64 _, [%r1], %r2;
+; CHECK-PTX-SHARED32-NEXT: ret;
+ call void @llvm.nvvm.mbarrier.arrive.scope.cta.space.cluster(ptr addrspace(7) %mbar, i32 %tx)
+ call void @llvm.nvvm.mbarrier.arrive.expect.tx.scope.cta.space.cluster(ptr addrspace(7) %mbar, i32 %tx)
+ call void @llvm.nvvm.mbarrier.arrive.drop.scope.cta.space.cluster(ptr addrspace(7) %mbar, i32 %tx)
+ call void @llvm.nvvm.mbarrier.arrive.drop.expect.tx.scope.cta.space.cluster(ptr addrspace(7) %mbar, i32 %tx)
+ ret void
+}
+
+define void @test_mbarrier_arrive_scope_cluster_space_cluster(ptr addrspace(7) %mbar, i32 %tx) {
+; CHECK-PTX64-LABEL: test_mbarrier_arrive_scope_cluster_space_cluster(
+; CHECK-PTX64: {
+; CHECK-PTX64-NEXT: .reg .b32 %r<2>;
+; CHECK-PTX64-NEXT: .reg .b64 %rd<2>;
+; CHECK-PTX64-EMPTY:
+; CHECK-PTX64-NEXT: // %bb.0:
+; CHECK-PTX64-NEXT: ld.param.b64 %rd1, [test_mbarrier_arrive_scope_cluster_space_cluster_param_0];
+; CHECK-PTX64-NEXT: ld.param.b32 %r1, [test_mbarrier_arrive_scope_cluster_space_cluster_param_1];
+; CHECK-PTX64-NEXT: mbarrier.arrive.release.cluster.shared::cluster.b64 _, [%rd1], %r1;
+; CHECK-PTX64-NEXT: mbarrier.arrive.expect_tx.release.cluster.shared::cluster.b64 _, [%rd1], %r1;
+; CHECK-PTX64-NEXT: mbarrier.arrive_drop.release.cluster.shared::cluster.b64 _, [%rd1], %r1;
+; CHECK-PTX64-NEXT: mbarrier.arrive_drop.expect_tx.release.cluster.shared::cluster.b64 _, [%rd1], %r1;
+; CHECK-PTX64-NEXT: ret;
+;
+; CHECK-PTX-SHARED32-LABEL: test_mbarrier_arrive_scope_cluster_space_cluster(
+; CHECK-PTX-SHARED32: {
+; CHECK-PTX-SHARED32-NEXT: .reg .b32 %r<3>;
+; CHECK-PTX-SHARED32-EMPTY:
+; CHECK-PTX-SHARED32-NEXT: // %bb.0:
+; CHECK-PTX-SHARED32-NEXT: ld.param.b32 %r1, [test_mbarrier_arrive_scope_cluster_space_cluster_param_0];
+; CHECK-PTX-SHARED32-NEXT: ld.param.b32 %r2, [test_mbarrier_arrive_scope_cluster_space_cluster_param_1];
+; CHECK-PTX-SHARED32-NEXT: mbarrier.arrive.release.cluster.shared::cluster.b64 _, [%r1], %r2;
+; CHECK-PTX-SHARED32-NEXT: mbarrier.arrive.expect_tx.release.cluster.shared::cluster.b64 _, [%r1], %r2;
+; CHECK-PTX-SHARED32-NEXT: mbarrier.arrive_drop.release.cluster.shared::cluster.b64 _, [%r1], %r2;
+; CHECK-PTX-SHARED32-NEXT: mbarrier.arrive_drop.expect_tx.release.cluster.shared::cluster.b64 _, [%r1], %r2;
+; CHECK-PTX-SHARED32-NEXT: ret;
+ call void @llvm.nvvm.mbarrier.arrive.scope.cluster.space.cluster(ptr addrspace(7) %mbar, i32 %tx)
+ call void @llvm.nvvm.mbarrier.arrive.expect.tx.scope.cluster.space.cluster(ptr addrspace(7) %mbar, i32 %tx)
+ call void @llvm.nvvm.mbarrier.arrive.drop.scope.cluster.space.cluster(ptr addrspace(7) %mbar, i32 %tx)
+ call void @llvm.nvvm.mbarrier.arrive.drop.expect.tx.scope.cluster.space.cluster(ptr addrspace(7) %mbar, i32 %tx)
+ ret void
+}
+
+declare i64 @llvm.nvvm.mbarrier.arrive.scope.cta.space.cta(ptr addrspace(3), i32)
+declare i64 @llvm.nvvm.mbarrier.arrive.expect.tx.scope.cta.space.cta(ptr addrspace(3), i32)
+declare i64 @llvm.nvvm.mbarrier.arrive.drop.scope.cta.space.cta(ptr addrspace(3), i32)
+declare i64 @llvm.nvvm.mbarrier.arrive.drop.expect.tx.scope.cta.space.cta(ptr addrspace(3), i32)
+
+declare i64 @llvm.nvvm.mbarrier.arrive.scope.cluster.space.cta(ptr addrspace(3), i32)
+declare i64 @llvm.nvvm.mbarrier.arrive.expect.tx.scope.cluster.space.cta(ptr addrspace(3), i32)
+declare i64 @llvm.nvvm.mbarrier.arrive.drop.scope.cluster.space.cta(ptr addrspace(3), i32)
+declare i64 @llvm.nvvm.mbarrier.arrive.drop.expect.tx.scope.cluster.space.cta(ptr addrspace(3), i32)
+
+declare void @llvm.nvvm.mbarrier.arrive.scope.cta.space.cluster(ptr addrspace(7), i32)
+declare void @llvm.nvvm.mbarrier.arrive.expect.tx.scope.cta.space.cluster(ptr addrspace(7), i32)
+declare void @llvm.nvvm.mbarrier.arrive.drop.scope.cta.space.cluster(ptr addrspace(7), i32)
+declare void @llvm.nvvm.mbarrier.arrive.drop.expect.tx.scope.cta.space.cluster(ptr addrspace(7), i32)
+
+declare void @llvm.nvvm.mbarrier.arrive.scope.cluster.space.cluster(ptr addrspace(7), i32)
+declare void @llvm.nvvm.mbarrier.arrive.expect.tx.scope.cluster.space.cluster(ptr addrspace(7), i32)
+declare void @llvm.nvvm.mbarrier.arrive.drop.scope.cluster.space.cluster(ptr addrspace(7), i32)
+declare void @llvm.nvvm.mbarrier.arrive.drop.expect.tx.scope.cluster.space.cluster(ptr addrspace(7), i32)
diff --git a/llvm/test/CodeGen/NVPTX/mbarrier_arr_relaxed.ll b/llvm/test/CodeGen/NVPTX/mbarrier_arr_relaxed.ll
new file mode 100644
index 0000000..e4d2aa2
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/mbarrier_arr_relaxed.ll
@@ -0,0 +1,165 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx86| FileCheck --check-prefixes=CHECK-PTX64 %s
+; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx86 --nvptx-short-ptr| FileCheck --check-prefixes=CHECK-PTX-SHARED32 %s
+; RUN: %if ptxas-sm_90 && ptxas-isa-8.6 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx86| %ptxas-verify -arch=sm_90 %}
+; RUN: %if ptxas-sm_90 && ptxas-isa-8.6 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx86 --nvptx-short-ptr| %ptxas-verify -arch=sm_90 %}
+
+; === space_cta (addrspace 3) ===
+define void @test_mbarrier_arrive_relaxed_scope_cta_space_cta(ptr addrspace(3) %mbar, i32 %tx) {
+; CHECK-PTX64-LABEL: test_mbarrier_arrive_relaxed_scope_cta_space_cta(
+; CHECK-PTX64: {
+; CHECK-PTX64-NEXT: .reg .b32 %r<2>;
+; CHECK-PTX64-NEXT: .reg .b64 %rd<6>;
+; CHECK-PTX64-EMPTY:
+; CHECK-PTX64-NEXT: // %bb.0:
+; CHECK-PTX64-NEXT: ld.param.b64 %rd1, [test_mbarrier_arrive_relaxed_scope_cta_space_cta_param_0];
+; CHECK-PTX64-NEXT: ld.param.b32 %r1, [test_mbarrier_arrive_relaxed_scope_cta_space_cta_param_1];
+; CHECK-PTX64-NEXT: mbarrier.arrive.relaxed.cta.shared.b64 %rd2, [%rd1], %r1;
+; CHECK-PTX64-NEXT: mbarrier.arrive.expect_tx.relaxed.cta.shared.b64 %rd3, [%rd1], %r1;
+; CHECK-PTX64-NEXT: mbarrier.arrive_drop.relaxed.cta.shared.b64 %rd4, [%rd1], %r1;
+; CHECK-PTX64-NEXT: mbarrier.arrive_drop.expect_tx.relaxed.cta.shared.b64 %rd5, [%rd1], %r1;
+; CHECK-PTX64-NEXT: ret;
+;
+; CHECK-PTX-SHARED32-LABEL: test_mbarrier_arrive_relaxed_scope_cta_space_cta(
+; CHECK-PTX-SHARED32: {
+; CHECK-PTX-SHARED32-NEXT: .reg .b32 %r<3>;
+; CHECK-PTX-SHARED32-NEXT: .reg .b64 %rd<5>;
+; CHECK-PTX-SHARED32-EMPTY:
+; CHECK-PTX-SHARED32-NEXT: // %bb.0:
+; CHECK-PTX-SHARED32-NEXT: ld.param.b32 %r1, [test_mbarrier_arrive_relaxed_scope_cta_space_cta_param_0];
+; CHECK-PTX-SHARED32-NEXT: ld.param.b32 %r2, [test_mbarrier_arrive_relaxed_scope_cta_space_cta_param_1];
+; CHECK-PTX-SHARED32-NEXT: mbarrier.arrive.relaxed.cta.shared.b64 %rd1, [%r1], %r2;
+; CHECK-PTX-SHARED32-NEXT: mbarrier.arrive.expect_tx.relaxed.cta.shared.b64 %rd2, [%r1], %r2;
+; CHECK-PTX-SHARED32-NEXT: mbarrier.arrive_drop.relaxed.cta.shared.b64 %rd3, [%r1], %r2;
+; CHECK-PTX-SHARED32-NEXT: mbarrier.arrive_drop.expect_tx.relaxed.cta.shared.b64 %rd4, [%r1], %r2;
+; CHECK-PTX-SHARED32-NEXT: ret;
+ %r1 = call i64 @llvm.nvvm.mbarrier.arrive.relaxed.scope.cta.space.cta(ptr addrspace(3) %mbar, i32 %tx)
+ %r2 = call i64 @llvm.nvvm.mbarrier.arrive.expect.tx.relaxed.scope.cta.space.cta(ptr addrspace(3) %mbar, i32 %tx)
+ %r3 = call i64 @llvm.nvvm.mbarrier.arrive.drop.relaxed.scope.cta.space.cta(ptr addrspace(3) %mbar, i32 %tx)
+ %r4 = call i64 @llvm.nvvm.mbarrier.arrive.drop.expect.tx.relaxed.scope.cta.space.cta(ptr addrspace(3) %mbar, i32 %tx)
+ ret void
+}
+
+define void @test_mbarrier_arrive_relaxed_scope_cluster_space_cta(ptr addrspace(3) %mbar, i32 %tx) {
+; CHECK-PTX64-LABEL: test_mbarrier_arrive_relaxed_scope_cluster_space_cta(
+; CHECK-PTX64: {
+; CHECK-PTX64-NEXT: .reg .b32 %r<2>;
+; CHECK-PTX64-NEXT: .reg .b64 %rd<6>;
+; CHECK-PTX64-EMPTY:
+; CHECK-PTX64-NEXT: // %bb.0:
+; CHECK-PTX64-NEXT: ld.param.b64 %rd1, [test_mbarrier_arrive_relaxed_scope_cluster_space_cta_param_0];
+; CHECK-PTX64-NEXT: ld.param.b32 %r1, [test_mbarrier_arrive_relaxed_scope_cluster_space_cta_param_1];
+; CHECK-PTX64-NEXT: mbarrier.arrive.relaxed.cluster.shared.b64 %rd2, [%rd1], %r1;
+; CHECK-PTX64-NEXT: mbarrier.arrive.expect_tx.relaxed.cluster.shared.b64 %rd3, [%rd1], %r1;
+; CHECK-PTX64-NEXT: mbarrier.arrive_drop.relaxed.cluster.shared.b64 %rd4, [%rd1], %r1;
+; CHECK-PTX64-NEXT: mbarrier.arrive_drop.expect_tx.relaxed.cluster.shared.b64 %rd5, [%rd1], %r1;
+; CHECK-PTX64-NEXT: ret;
+;
+; CHECK-PTX-SHARED32-LABEL: test_mbarrier_arrive_relaxed_scope_cluster_space_cta(
+; CHECK-PTX-SHARED32: {
+; CHECK-PTX-SHARED32-NEXT: .reg .b32 %r<3>;
+; CHECK-PTX-SHARED32-NEXT: .reg .b64 %rd<5>;
+; CHECK-PTX-SHARED32-EMPTY:
+; CHECK-PTX-SHARED32-NEXT: // %bb.0:
+; CHECK-PTX-SHARED32-NEXT: ld.param.b32 %r1, [test_mbarrier_arrive_relaxed_scope_cluster_space_cta_param_0];
+; CHECK-PTX-SHARED32-NEXT: ld.param.b32 %r2, [test_mbarrier_arrive_relaxed_scope_cluster_space_cta_param_1];
+; CHECK-PTX-SHARED32-NEXT: mbarrier.arrive.relaxed.cluster.shared.b64 %rd1, [%r1], %r2;
+; CHECK-PTX-SHARED32-NEXT: mbarrier.arrive.expect_tx.relaxed.cluster.shared.b64 %rd2, [%r1], %r2;
+; CHECK-PTX-SHARED32-NEXT: mbarrier.arrive_drop.relaxed.cluster.shared.b64 %rd3, [%r1], %r2;
+; CHECK-PTX-SHARED32-NEXT: mbarrier.arrive_drop.expect_tx.relaxed.cluster.shared.b64 %rd4, [%r1], %r2;
+; CHECK-PTX-SHARED32-NEXT: ret;
+ %r1 = call i64 @llvm.nvvm.mbarrier.arrive.relaxed.scope.cluster.space.cta(ptr addrspace(3) %mbar, i32 %tx)
+ %r2 = call i64 @llvm.nvvm.mbarrier.arrive.expect.tx.relaxed.scope.cluster.space.cta(ptr addrspace(3) %mbar, i32 %tx)
+ %r3 = call i64 @llvm.nvvm.mbarrier.arrive.drop.relaxed.scope.cluster.space.cta(ptr addrspace(3) %mbar, i32 %tx)
+ %r4 = call i64 @llvm.nvvm.mbarrier.arrive.drop.expect.tx.relaxed.scope.cluster.space.cta(ptr addrspace(3) %mbar, i32 %tx)
+ ret void
+}
+
+; === space_cluster (addrspace 7) ===
+define void @test_mbarrier_arrive_relaxed_scope_cta_space_cluster(ptr addrspace(7) %mbar, i32 %tx) {
+; CHECK-PTX64-LABEL: test_mbarrier_arrive_relaxed_scope_cta_space_cluster(
+; CHECK-PTX64: {
+; CHECK-PTX64-NEXT: .reg .b32 %r<2>;
+; CHECK-PTX64-NEXT: .reg .b64 %rd<2>;
+; CHECK-PTX64-EMPTY:
+; CHECK-PTX64-NEXT: // %bb.0:
+; CHECK-PTX64-NEXT: ld.param.b64 %rd1, [test_mbarrier_arrive_relaxed_scope_cta_space_cluster_param_0];
+; CHECK-PTX64-NEXT: ld.param.b32 %r1, [test_mbarrier_arrive_relaxed_scope_cta_space_cluster_param_1];
+; CHECK-PTX64-NEXT: mbarrier.arrive.relaxed.cta.shared::cluster.b64 _, [%rd1], %r1;
+; CHECK-PTX64-NEXT: mbarrier.arrive.expect_tx.relaxed.cta.shared::cluster.b64 _, [%rd1], %r1;
+; CHECK-PTX64-NEXT: mbarrier.arrive_drop.relaxed.cta.shared::cluster.b64 _, [%rd1], %r1;
+; CHECK-PTX64-NEXT: mbarrier.arrive_drop.expect_tx.relaxed.cta.shared::cluster.b64 _, [%rd1], %r1;
+; CHECK-PTX64-NEXT: ret;
+;
+; CHECK-PTX-SHARED32-LABEL: test_mbarrier_arrive_relaxed_scope_cta_space_cluster(
+; CHECK-PTX-SHARED32: {
+; CHECK-PTX-SHARED32-NEXT: .reg .b32 %r<3>;
+; CHECK-PTX-SHARED32-EMPTY:
+; CHECK-PTX-SHARED32-NEXT: // %bb.0:
+; CHECK-PTX-SHARED32-NEXT: ld.param.b32 %r1, [test_mbarrier_arrive_relaxed_scope_cta_space_cluster_param_0];
+; CHECK-PTX-SHARED32-NEXT: ld.param.b32 %r2, [test_mbarrier_arrive_relaxed_scope_cta_space_cluster_param_1];
+; CHECK-PTX-SHARED32-NEXT: mbarrier.arrive.relaxed.cta.shared::cluster.b64 _, [%r1], %r2;
+; CHECK-PTX-SHARED32-NEXT: mbarrier.arrive.expect_tx.relaxed.cta.shared::cluster.b64 _, [%r1], %r2;
+; CHECK-PTX-SHARED32-NEXT: mbarrier.arrive_drop.relaxed.cta.shared::cluster.b64 _, [%r1], %r2;
+; CHECK-PTX-SHARED32-NEXT: mbarrier.arrive_drop.expect_tx.relaxed.cta.shared::cluster.b64 _, [%r1], %r2;
+; CHECK-PTX-SHARED32-NEXT: ret;
+ call void @llvm.nvvm.mbarrier.arrive.relaxed.scope.cta.space.cluster(ptr addrspace(7) %mbar, i32 %tx)
+ call void @llvm.nvvm.mbarrier.arrive.expect.tx.relaxed.scope.cta.space.cluster(ptr addrspace(7) %mbar, i32 %tx)
+ call void @llvm.nvvm.mbarrier.arrive.drop.relaxed.scope.cta.space.cluster(ptr addrspace(7) %mbar, i32 %tx)
+ call void @llvm.nvvm.mbarrier.arrive.drop.expect.tx.relaxed.scope.cta.space.cluster(ptr addrspace(7) %mbar, i32 %tx)
+ ret void
+}
+
+define void @test_mbarrier_arrive_relaxed_scope_cluster_space_cluster(ptr addrspace(7) %mbar, i32 %tx) {
+; CHECK-PTX64-LABEL: test_mbarrier_arrive_relaxed_scope_cluster_space_cluster(
+; CHECK-PTX64: {
+; CHECK-PTX64-NEXT: .reg .b32 %r<2>;
+; CHECK-PTX64-NEXT: .reg .b64 %rd<2>;
+; CHECK-PTX64-EMPTY:
+; CHECK-PTX64-NEXT: // %bb.0:
+; CHECK-PTX64-NEXT: ld.param.b64 %rd1, [test_mbarrier_arrive_relaxed_scope_cluster_space_cluster_param_0];
+; CHECK-PTX64-NEXT: ld.param.b32 %r1, [test_mbarrier_arrive_relaxed_scope_cluster_space_cluster_param_1];
+; CHECK-PTX64-NEXT: mbarrier.arrive.relaxed.cluster.shared::cluster.b64 _, [%rd1], %r1;
+; CHECK-PTX64-NEXT: mbarrier.arrive.expect_tx.relaxed.cluster.shared::cluster.b64 _, [%rd1], %r1;
+; CHECK-PTX64-NEXT: mbarrier.arrive_drop.relaxed.cluster.shared::cluster.b64 _, [%rd1], %r1;
+; CHECK-PTX64-NEXT: mbarrier.arrive_drop.expect_tx.relaxed.cluster.shared::cluster.b64 _, [%rd1], %r1;
+; CHECK-PTX64-NEXT: ret;
+;
+; CHECK-PTX-SHARED32-LABEL: test_mbarrier_arrive_relaxed_scope_cluster_space_cluster(
+; CHECK-PTX-SHARED32: {
+; CHECK-PTX-SHARED32-NEXT: .reg .b32 %r<3>;
+; CHECK-PTX-SHARED32-EMPTY:
+; CHECK-PTX-SHARED32-NEXT: // %bb.0:
+; CHECK-PTX-SHARED32-NEXT: ld.param.b32 %r1, [test_mbarrier_arrive_relaxed_scope_cluster_space_cluster_param_0];
+; CHECK-PTX-SHARED32-NEXT: ld.param.b32 %r2, [test_mbarrier_arrive_relaxed_scope_cluster_space_cluster_param_1];
+; CHECK-PTX-SHARED32-NEXT: mbarrier.arrive.relaxed.cluster.shared::cluster.b64 _, [%r1], %r2;
+; CHECK-PTX-SHARED32-NEXT: mbarrier.arrive.expect_tx.relaxed.cluster.shared::cluster.b64 _, [%r1], %r2;
+; CHECK-PTX-SHARED32-NEXT: mbarrier.arrive_drop.relaxed.cluster.shared::cluster.b64 _, [%r1], %r2;
+; CHECK-PTX-SHARED32-NEXT: mbarrier.arrive_drop.expect_tx.relaxed.cluster.shared::cluster.b64 _, [%r1], %r2;
+; CHECK-PTX-SHARED32-NEXT: ret;
+ call void @llvm.nvvm.mbarrier.arrive.relaxed.scope.cluster.space.cluster(ptr addrspace(7) %mbar, i32 %tx)
+ call void @llvm.nvvm.mbarrier.arrive.expect.tx.relaxed.scope.cluster.space.cluster(ptr addrspace(7) %mbar, i32 %tx)
+ call void @llvm.nvvm.mbarrier.arrive.drop.relaxed.scope.cluster.space.cluster(ptr addrspace(7) %mbar, i32 %tx)
+ call void @llvm.nvvm.mbarrier.arrive.drop.expect.tx.relaxed.scope.cluster.space.cluster(ptr addrspace(7) %mbar, i32 %tx)
+ ret void
+}
+
+declare i64 @llvm.nvvm.mbarrier.arrive.relaxed.scope.cta.space.cta(ptr addrspace(3), i32)
+declare i64 @llvm.nvvm.mbarrier.arrive.expect.tx.relaxed.scope.cta.space.cta(ptr addrspace(3), i32)
+declare i64 @llvm.nvvm.mbarrier.arrive.drop.relaxed.scope.cta.space.cta(ptr addrspace(3), i32)
+declare i64 @llvm.nvvm.mbarrier.arrive.drop.expect.tx.relaxed.scope.cta.space.cta(ptr addrspace(3), i32)
+
+declare i64 @llvm.nvvm.mbarrier.arrive.relaxed.scope.cluster.space.cta(ptr addrspace(3), i32)
+declare i64 @llvm.nvvm.mbarrier.arrive.expect.tx.relaxed.scope.cluster.space.cta(ptr addrspace(3), i32)
+declare i64 @llvm.nvvm.mbarrier.arrive.drop.relaxed.scope.cluster.space.cta(ptr addrspace(3), i32)
+declare i64 @llvm.nvvm.mbarrier.arrive.drop.expect.tx.relaxed.scope.cluster.space.cta(ptr addrspace(3), i32)
+
+declare void @llvm.nvvm.mbarrier.arrive.relaxed.scope.cta.space.cluster(ptr addrspace(7), i32)
+declare void @llvm.nvvm.mbarrier.arrive.expect.tx.relaxed.scope.cta.space.cluster(ptr addrspace(7), i32)
+declare void @llvm.nvvm.mbarrier.arrive.drop.relaxed.scope.cta.space.cluster(ptr addrspace(7), i32)
+declare void @llvm.nvvm.mbarrier.arrive.drop.expect.tx.relaxed.scope.cta.space.cluster(ptr addrspace(7), i32)
+
+declare void @llvm.nvvm.mbarrier.arrive.relaxed.scope.cluster.space.cluster(ptr addrspace(7), i32)
+declare void @llvm.nvvm.mbarrier.arrive.expect.tx.relaxed.scope.cluster.space.cluster(ptr addrspace(7), i32)
+declare void @llvm.nvvm.mbarrier.arrive.drop.relaxed.scope.cluster.space.cluster(ptr addrspace(7), i32)
+declare void @llvm.nvvm.mbarrier.arrive.drop.expect.tx.relaxed.scope.cluster.space.cluster(ptr addrspace(7), i32)
diff --git a/llvm/test/CodeGen/NVPTX/mbarrier_tx.ll b/llvm/test/CodeGen/NVPTX/mbarrier_tx.ll
new file mode 100644
index 0000000..441ade3
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/mbarrier_tx.ll
@@ -0,0 +1,87 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx80| FileCheck --check-prefixes=CHECK-PTX64 %s
+; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx80 --nvptx-short-ptr| FileCheck --check-prefixes=CHECK-PTX-SHARED32 %s
+; RUN: %if ptxas-sm_90 && ptxas-isa-8.0 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx80| %ptxas-verify -arch=sm_90 %}
+; RUN: %if ptxas-sm_90 && ptxas-isa-8.0 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx80 --nvptx-short-ptr| %ptxas-verify -arch=sm_90 %}
+
+declare void @llvm.nvvm.mbarrier.expect.tx.scope.cta.space.cta(ptr addrspace(3), i32)
+declare void @llvm.nvvm.mbarrier.expect.tx.scope.cluster.space.cta(ptr addrspace(3), i32)
+declare void @llvm.nvvm.mbarrier.complete.tx.scope.cta.space.cta(ptr addrspace(3), i32)
+declare void @llvm.nvvm.mbarrier.complete.tx.scope.cluster.space.cta(ptr addrspace(3), i32)
+
+declare void @llvm.nvvm.mbarrier.expect.tx.scope.cta.space.cluster(ptr addrspace(7), i32)
+declare void @llvm.nvvm.mbarrier.expect.tx.scope.cluster.space.cluster(ptr addrspace(7), i32)
+declare void @llvm.nvvm.mbarrier.complete.tx.scope.cta.space.cluster(ptr addrspace(7), i32)
+declare void @llvm.nvvm.mbarrier.complete.tx.scope.cluster.space.cluster(ptr addrspace(7), i32)
+
+define void @test_mbarrier_tx_space_cta(ptr addrspace(3) %mbar, i32 %tx_count) {
+; CHECK-PTX64-LABEL: test_mbarrier_tx_space_cta(
+; CHECK-PTX64: {
+; CHECK-PTX64-NEXT: .reg .b32 %r<2>;
+; CHECK-PTX64-NEXT: .reg .b64 %rd<2>;
+; CHECK-PTX64-EMPTY:
+; CHECK-PTX64-NEXT: // %bb.0:
+; CHECK-PTX64-NEXT: ld.param.b64 %rd1, [test_mbarrier_tx_space_cta_param_0];
+; CHECK-PTX64-NEXT: ld.param.b32 %r1, [test_mbarrier_tx_space_cta_param_1];
+; CHECK-PTX64-NEXT: mbarrier.expect_tx.relaxed.cta.shared.b64 [%rd1], %r1;
+; CHECK-PTX64-NEXT: mbarrier.expect_tx.relaxed.cluster.shared.b64 [%rd1], %r1;
+; CHECK-PTX64-NEXT: mbarrier.complete_tx.relaxed.cta.shared.b64 [%rd1], %r1;
+; CHECK-PTX64-NEXT: mbarrier.complete_tx.relaxed.cluster.shared.b64 [%rd1], %r1;
+; CHECK-PTX64-NEXT: ret;
+;
+; CHECK-PTX-SHARED32-LABEL: test_mbarrier_tx_space_cta(
+; CHECK-PTX-SHARED32: {
+; CHECK-PTX-SHARED32-NEXT: .reg .b32 %r<3>;
+; CHECK-PTX-SHARED32-EMPTY:
+; CHECK-PTX-SHARED32-NEXT: // %bb.0:
+; CHECK-PTX-SHARED32-NEXT: ld.param.b32 %r1, [test_mbarrier_tx_space_cta_param_0];
+; CHECK-PTX-SHARED32-NEXT: ld.param.b32 %r2, [test_mbarrier_tx_space_cta_param_1];
+; CHECK-PTX-SHARED32-NEXT: mbarrier.expect_tx.relaxed.cta.shared.b64 [%r1], %r2;
+; CHECK-PTX-SHARED32-NEXT: mbarrier.expect_tx.relaxed.cluster.shared.b64 [%r1], %r2;
+; CHECK-PTX-SHARED32-NEXT: mbarrier.complete_tx.relaxed.cta.shared.b64 [%r1], %r2;
+; CHECK-PTX-SHARED32-NEXT: mbarrier.complete_tx.relaxed.cluster.shared.b64 [%r1], %r2;
+; CHECK-PTX-SHARED32-NEXT: ret;
+ tail call void @llvm.nvvm.mbarrier.expect.tx.scope.cta.space.cta(ptr addrspace(3) %mbar, i32 %tx_count)
+ tail call void @llvm.nvvm.mbarrier.expect.tx.scope.cluster.space.cta(ptr addrspace(3) %mbar, i32 %tx_count)
+
+ tail call void @llvm.nvvm.mbarrier.complete.tx.scope.cta.space.cta(ptr addrspace(3) %mbar, i32 %tx_count)
+ tail call void @llvm.nvvm.mbarrier.complete.tx.scope.cluster.space.cta(ptr addrspace(3) %mbar, i32 %tx_count)
+
+ ret void
+}
+
+define void @test_mbarrier_tx_space_cluster(ptr addrspace(7) %mbar, i32 %tx_count) {
+; CHECK-PTX64-LABEL: test_mbarrier_tx_space_cluster(
+; CHECK-PTX64: {
+; CHECK-PTX64-NEXT: .reg .b32 %r<2>;
+; CHECK-PTX64-NEXT: .reg .b64 %rd<2>;
+; CHECK-PTX64-EMPTY:
+; CHECK-PTX64-NEXT: // %bb.0:
+; CHECK-PTX64-NEXT: ld.param.b64 %rd1, [test_mbarrier_tx_space_cluster_param_0];
+; CHECK-PTX64-NEXT: ld.param.b32 %r1, [test_mbarrier_tx_space_cluster_param_1];
+; CHECK-PTX64-NEXT: mbarrier.expect_tx.relaxed.cta.shared::cluster.b64 [%rd1], %r1;
+; CHECK-PTX64-NEXT: mbarrier.expect_tx.relaxed.cluster.shared::cluster.b64 [%rd1], %r1;
+; CHECK-PTX64-NEXT: mbarrier.complete_tx.relaxed.cta.shared::cluster.b64 [%rd1], %r1;
+; CHECK-PTX64-NEXT: mbarrier.complete_tx.relaxed.cluster.shared::cluster.b64 [%rd1], %r1;
+; CHECK-PTX64-NEXT: ret;
+;
+; CHECK-PTX-SHARED32-LABEL: test_mbarrier_tx_space_cluster(
+; CHECK-PTX-SHARED32: {
+; CHECK-PTX-SHARED32-NEXT: .reg .b32 %r<3>;
+; CHECK-PTX-SHARED32-EMPTY:
+; CHECK-PTX-SHARED32-NEXT: // %bb.0:
+; CHECK-PTX-SHARED32-NEXT: ld.param.b32 %r1, [test_mbarrier_tx_space_cluster_param_0];
+; CHECK-PTX-SHARED32-NEXT: ld.param.b32 %r2, [test_mbarrier_tx_space_cluster_param_1];
+; CHECK-PTX-SHARED32-NEXT: mbarrier.expect_tx.relaxed.cta.shared::cluster.b64 [%r1], %r2;
+; CHECK-PTX-SHARED32-NEXT: mbarrier.expect_tx.relaxed.cluster.shared::cluster.b64 [%r1], %r2;
+; CHECK-PTX-SHARED32-NEXT: mbarrier.complete_tx.relaxed.cta.shared::cluster.b64 [%r1], %r2;
+; CHECK-PTX-SHARED32-NEXT: mbarrier.complete_tx.relaxed.cluster.shared::cluster.b64 [%r1], %r2;
+; CHECK-PTX-SHARED32-NEXT: ret;
+ tail call void @llvm.nvvm.mbarrier.expect.tx.scope.cta.space.cluster(ptr addrspace(7) %mbar, i32 %tx_count)
+ tail call void @llvm.nvvm.mbarrier.expect.tx.scope.cluster.space.cluster(ptr addrspace(7) %mbar, i32 %tx_count)
+
+ tail call void @llvm.nvvm.mbarrier.complete.tx.scope.cta.space.cluster(ptr addrspace(7) %mbar, i32 %tx_count)
+ tail call void @llvm.nvvm.mbarrier.complete.tx.scope.cluster.space.cluster(ptr addrspace(7) %mbar, i32 %tx_count)
+
+ ret void
+}
diff --git a/llvm/test/CodeGen/NVPTX/mbarrier_wait_sm80_ptx70.ll b/llvm/test/CodeGen/NVPTX/mbarrier_wait_sm80_ptx70.ll
new file mode 100644
index 0000000..5130ae2
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/mbarrier_wait_sm80_ptx70.ll
@@ -0,0 +1,35 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_80 -mattr=+ptx70| FileCheck --check-prefixes=CHECK-PTX64 %s
+; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_80 -mattr=+ptx70 --nvptx-short-ptr| FileCheck --check-prefixes=CHECK-PTX-SHARED32 %s
+; RUN: %if ptxas-sm_80 && ptxas-isa-7.0 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_80 -mattr=+ptx70| %ptxas-verify -arch=sm_80 %}
+; RUN: %if ptxas-sm_80 && ptxas-isa-7.0 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_80 -mattr=+ptx70 --nvptx-short-ptr| %ptxas-verify -arch=sm_80 %}
+
+declare i1 @llvm.nvvm.mbarrier.test.wait.scope.cta.space.cta(ptr addrspace(3), i64)
+
+define void @mbar_test_wait(ptr addrspace(3) %mbar, i64 %state) {
+; CHECK-PTX64-LABEL: mbar_test_wait(
+; CHECK-PTX64: {
+; CHECK-PTX64-NEXT: .reg .pred %p<2>;
+; CHECK-PTX64-NEXT: .reg .b64 %rd<3>;
+; CHECK-PTX64-EMPTY:
+; CHECK-PTX64-NEXT: // %bb.0:
+; CHECK-PTX64-NEXT: ld.param.b64 %rd1, [mbar_test_wait_param_0];
+; CHECK-PTX64-NEXT: ld.param.b64 %rd2, [mbar_test_wait_param_1];
+; CHECK-PTX64-NEXT: mbarrier.test_wait.shared.b64 %p1, [%rd1], %rd2;
+; CHECK-PTX64-NEXT: ret;
+;
+; CHECK-PTX-SHARED32-LABEL: mbar_test_wait(
+; CHECK-PTX-SHARED32: {
+; CHECK-PTX-SHARED32-NEXT: .reg .pred %p<2>;
+; CHECK-PTX-SHARED32-NEXT: .reg .b32 %r<2>;
+; CHECK-PTX-SHARED32-NEXT: .reg .b64 %rd<2>;
+; CHECK-PTX-SHARED32-EMPTY:
+; CHECK-PTX-SHARED32-NEXT: // %bb.0:
+; CHECK-PTX-SHARED32-NEXT: ld.param.b32 %r1, [mbar_test_wait_param_0];
+; CHECK-PTX-SHARED32-NEXT: ld.param.b64 %rd1, [mbar_test_wait_param_1];
+; CHECK-PTX-SHARED32-NEXT: mbarrier.test_wait.shared.b64 %p1, [%r1], %rd1;
+; CHECK-PTX-SHARED32-NEXT: ret;
+ %ret0 = call i1 @llvm.nvvm.mbarrier.test.wait.scope.cta.space.cta(ptr addrspace(3) %mbar, i64 %state)
+
+ ret void
+}
diff --git a/llvm/test/CodeGen/NVPTX/mbarrier_wait_sm80_ptx71.ll b/llvm/test/CodeGen/NVPTX/mbarrier_wait_sm80_ptx71.ll
new file mode 100644
index 0000000..9327e79
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/mbarrier_wait_sm80_ptx71.ll
@@ -0,0 +1,36 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_80 -mattr=+ptx71| FileCheck --check-prefixes=CHECK-PTX64 %s
+; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_80 -mattr=+ptx71 --nvptx-short-ptr| FileCheck --check-prefixes=CHECK-PTX-SHARED32 %s
+; RUN: %if ptxas-sm_80 && ptxas-isa-7.1 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_80 -mattr=+ptx71| %ptxas-verify -arch=sm_80 %}
+; RUN: %if ptxas-sm_80 && ptxas-isa-7.1 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_80 -mattr=+ptx71 --nvptx-short-ptr| %ptxas-verify -arch=sm_80 %}
+
+; --- test.wait.parity ---
+declare i1 @llvm.nvvm.mbarrier.test.wait.parity.scope.cta.space.cta(ptr addrspace(3), i32)
+
+define void @mbar_test_wait(ptr addrspace(3) %mbar, i32 %parity) {
+; CHECK-PTX64-LABEL: mbar_test_wait(
+; CHECK-PTX64: {
+; CHECK-PTX64-NEXT: .reg .pred %p<2>;
+; CHECK-PTX64-NEXT: .reg .b32 %r<2>;
+; CHECK-PTX64-NEXT: .reg .b64 %rd<2>;
+; CHECK-PTX64-EMPTY:
+; CHECK-PTX64-NEXT: // %bb.0:
+; CHECK-PTX64-NEXT: ld.param.b64 %rd1, [mbar_test_wait_param_0];
+; CHECK-PTX64-NEXT: ld.param.b32 %r1, [mbar_test_wait_param_1];
+; CHECK-PTX64-NEXT: mbarrier.test_wait.parity.shared.b64 %p1, [%rd1], %r1;
+; CHECK-PTX64-NEXT: ret;
+;
+; CHECK-PTX-SHARED32-LABEL: mbar_test_wait(
+; CHECK-PTX-SHARED32: {
+; CHECK-PTX-SHARED32-NEXT: .reg .pred %p<2>;
+; CHECK-PTX-SHARED32-NEXT: .reg .b32 %r<3>;
+; CHECK-PTX-SHARED32-EMPTY:
+; CHECK-PTX-SHARED32-NEXT: // %bb.0:
+; CHECK-PTX-SHARED32-NEXT: ld.param.b32 %r1, [mbar_test_wait_param_0];
+; CHECK-PTX-SHARED32-NEXT: ld.param.b32 %r2, [mbar_test_wait_param_1];
+; CHECK-PTX-SHARED32-NEXT: mbarrier.test_wait.parity.shared.b64 %p1, [%r1], %r2;
+; CHECK-PTX-SHARED32-NEXT: ret;
+ %ret0 = call i1 @llvm.nvvm.mbarrier.test.wait.parity.scope.cta.space.cta(ptr addrspace(3) %mbar, i32 %parity)
+
+ ret void
+}
diff --git a/llvm/test/CodeGen/NVPTX/mbarrier_wait_sm90_ptx78.ll b/llvm/test/CodeGen/NVPTX/mbarrier_wait_sm90_ptx78.ll
new file mode 100644
index 0000000..9b19ad5
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/mbarrier_wait_sm90_ptx78.ll
@@ -0,0 +1,83 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx78| FileCheck --check-prefixes=CHECK-PTX64 %s
+; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx78 --nvptx-short-ptr| FileCheck --check-prefixes=CHECK-PTX-SHARED32 %s
+; RUN: %if ptxas-sm_90 && ptxas-isa-7.8 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx78| %ptxas-verify -arch=sm_90 %}
+; RUN: %if ptxas-sm_90 && ptxas-isa-7.8 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx78 --nvptx-short-ptr| %ptxas-verify -arch=sm_90 %}
+
+; --- try.wait without timelimit ---
+declare i1 @llvm.nvvm.mbarrier.try.wait.scope.cta.space.cta(ptr addrspace(3), i64)
+declare i1 @llvm.nvvm.mbarrier.try.wait.parity.scope.cta.space.cta(ptr addrspace(3), i32)
+
+; --- try.wait with timelimit ---
+declare i1 @llvm.nvvm.mbarrier.try.wait.tl.scope.cta.space.cta(ptr addrspace(3), i64, i32)
+declare i1 @llvm.nvvm.mbarrier.try.wait.parity.tl.scope.cta.space.cta(ptr addrspace(3), i32, i32)
+
+define void @mbar_try_wait(ptr addrspace(3) %mbar, i64 %state, i32 %parity) {
+; CHECK-PTX64-LABEL: mbar_try_wait(
+; CHECK-PTX64: {
+; CHECK-PTX64-NEXT: .reg .pred %p<3>;
+; CHECK-PTX64-NEXT: .reg .b32 %r<2>;
+; CHECK-PTX64-NEXT: .reg .b64 %rd<3>;
+; CHECK-PTX64-EMPTY:
+; CHECK-PTX64-NEXT: // %bb.0:
+; CHECK-PTX64-NEXT: ld.param.b64 %rd1, [mbar_try_wait_param_0];
+; CHECK-PTX64-NEXT: ld.param.b64 %rd2, [mbar_try_wait_param_1];
+; CHECK-PTX64-NEXT: mbarrier.try_wait.shared.b64 %p1, [%rd1], %rd2;
+; CHECK-PTX64-NEXT: ld.param.b32 %r1, [mbar_try_wait_param_2];
+; CHECK-PTX64-NEXT: mbarrier.try_wait.parity.shared.b64 %p2, [%rd1], %r1;
+; CHECK-PTX64-NEXT: ret;
+;
+; CHECK-PTX-SHARED32-LABEL: mbar_try_wait(
+; CHECK-PTX-SHARED32: {
+; CHECK-PTX-SHARED32-NEXT: .reg .pred %p<3>;
+; CHECK-PTX-SHARED32-NEXT: .reg .b32 %r<3>;
+; CHECK-PTX-SHARED32-NEXT: .reg .b64 %rd<2>;
+; CHECK-PTX-SHARED32-EMPTY:
+; CHECK-PTX-SHARED32-NEXT: // %bb.0:
+; CHECK-PTX-SHARED32-NEXT: ld.param.b32 %r1, [mbar_try_wait_param_0];
+; CHECK-PTX-SHARED32-NEXT: ld.param.b64 %rd1, [mbar_try_wait_param_1];
+; CHECK-PTX-SHARED32-NEXT: mbarrier.try_wait.shared.b64 %p1, [%r1], %rd1;
+; CHECK-PTX-SHARED32-NEXT: ld.param.b32 %r2, [mbar_try_wait_param_2];
+; CHECK-PTX-SHARED32-NEXT: mbarrier.try_wait.parity.shared.b64 %p2, [%r1], %r2;
+; CHECK-PTX-SHARED32-NEXT: ret;
+ %ret0 = call i1 @llvm.nvvm.mbarrier.try.wait.scope.cta.space.cta(ptr addrspace(3) %mbar, i64 %state)
+ %ret1 = call i1 @llvm.nvvm.mbarrier.try.wait.parity.scope.cta.space.cta(ptr addrspace(3) %mbar, i32 %parity)
+
+ ret void
+}
+
+define void @mbar_try_wait_tl(ptr addrspace(3) %mbar, i64 %state, i32 %parity, i32 %tl) {
+; CHECK-PTX64-LABEL: mbar_try_wait_tl(
+; CHECK-PTX64: {
+; CHECK-PTX64-NEXT: .reg .pred %p<3>;
+; CHECK-PTX64-NEXT: .reg .b32 %r<3>;
+; CHECK-PTX64-NEXT: .reg .b64 %rd<3>;
+; CHECK-PTX64-EMPTY:
+; CHECK-PTX64-NEXT: // %bb.0:
+; CHECK-PTX64-NEXT: ld.param.b64 %rd1, [mbar_try_wait_tl_param_0];
+; CHECK-PTX64-NEXT: ld.param.b64 %rd2, [mbar_try_wait_tl_param_1];
+; CHECK-PTX64-NEXT: ld.param.b32 %r1, [mbar_try_wait_tl_param_3];
+; CHECK-PTX64-NEXT: mbarrier.try_wait.shared.b64 %p1, [%rd1], %rd2, %r1;
+; CHECK-PTX64-NEXT: ld.param.b32 %r2, [mbar_try_wait_tl_param_2];
+; CHECK-PTX64-NEXT: mbarrier.try_wait.parity.shared.b64 %p2, [%rd1], %r2, %r1;
+; CHECK-PTX64-NEXT: ret;
+;
+; CHECK-PTX-SHARED32-LABEL: mbar_try_wait_tl(
+; CHECK-PTX-SHARED32: {
+; CHECK-PTX-SHARED32-NEXT: .reg .pred %p<3>;
+; CHECK-PTX-SHARED32-NEXT: .reg .b32 %r<4>;
+; CHECK-PTX-SHARED32-NEXT: .reg .b64 %rd<2>;
+; CHECK-PTX-SHARED32-EMPTY:
+; CHECK-PTX-SHARED32-NEXT: // %bb.0:
+; CHECK-PTX-SHARED32-NEXT: ld.param.b32 %r1, [mbar_try_wait_tl_param_0];
+; CHECK-PTX-SHARED32-NEXT: ld.param.b64 %rd1, [mbar_try_wait_tl_param_1];
+; CHECK-PTX-SHARED32-NEXT: ld.param.b32 %r2, [mbar_try_wait_tl_param_3];
+; CHECK-PTX-SHARED32-NEXT: mbarrier.try_wait.shared.b64 %p1, [%r1], %rd1, %r2;
+; CHECK-PTX-SHARED32-NEXT: ld.param.b32 %r3, [mbar_try_wait_tl_param_2];
+; CHECK-PTX-SHARED32-NEXT: mbarrier.try_wait.parity.shared.b64 %p2, [%r1], %r3, %r2;
+; CHECK-PTX-SHARED32-NEXT: ret;
+ %ret0 = call i1 @llvm.nvvm.mbarrier.try.wait.tl.scope.cta.space.cta(ptr addrspace(3) %mbar, i64 %state, i32 %tl)
+ %ret1 = call i1 @llvm.nvvm.mbarrier.try.wait.parity.tl.scope.cta.space.cta(ptr addrspace(3) %mbar, i32 %parity, i32 %tl)
+
+ ret void
+}
diff --git a/llvm/test/CodeGen/NVPTX/mbarrier_wait_sm90_ptx80.ll b/llvm/test/CodeGen/NVPTX/mbarrier_wait_sm90_ptx80.ll
new file mode 100644
index 0000000..034953d
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/mbarrier_wait_sm90_ptx80.ll
@@ -0,0 +1,123 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx80| FileCheck --check-prefixes=CHECK-PTX64 %s
+; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx80 --nvptx-short-ptr| FileCheck --check-prefixes=CHECK-PTX-SHARED32 %s
+; RUN: %if ptxas-sm_90 && ptxas-isa-8.0 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx80| %ptxas-verify -arch=sm_90 %}
+; RUN: %if ptxas-sm_90 && ptxas-isa-8.0 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx80 --nvptx-short-ptr| %ptxas-verify -arch=sm_90 %}
+
+; with sm-90 and ptx-80, we have support for cluster-scope
+
+; --- test.wait ---
+declare i1 @llvm.nvvm.mbarrier.test.wait.parity.scope.cluster.space.cta(ptr addrspace(3), i32)
+declare i1 @llvm.nvvm.mbarrier.test.wait.scope.cluster.space.cta(ptr addrspace(3), i64)
+
+; --- try.wait without timelimit ---
+declare i1 @llvm.nvvm.mbarrier.try.wait.scope.cluster.space.cta(ptr addrspace(3), i64)
+declare i1 @llvm.nvvm.mbarrier.try.wait.parity.scope.cluster.space.cta(ptr addrspace(3), i32)
+
+; --- try.wait with timelimit ---
+declare i1 @llvm.nvvm.mbarrier.try.wait.tl.scope.cluster.space.cta(ptr addrspace(3), i64, i32)
+declare i1 @llvm.nvvm.mbarrier.try.wait.parity.tl.scope.cluster.space.cta(ptr addrspace(3), i32, i32)
+
+define void @mbar_test_wait(ptr addrspace(3) %mbar, i64 %state, i32 %parity) {
+; CHECK-PTX64-LABEL: mbar_test_wait(
+; CHECK-PTX64: {
+; CHECK-PTX64-NEXT: .reg .pred %p<3>;
+; CHECK-PTX64-NEXT: .reg .b32 %r<2>;
+; CHECK-PTX64-NEXT: .reg .b64 %rd<3>;
+; CHECK-PTX64-EMPTY:
+; CHECK-PTX64-NEXT: // %bb.0:
+; CHECK-PTX64-NEXT: ld.param.b64 %rd1, [mbar_test_wait_param_0];
+; CHECK-PTX64-NEXT: ld.param.b64 %rd2, [mbar_test_wait_param_1];
+; CHECK-PTX64-NEXT: mbarrier.test_wait.acquire.cluster.shared.b64 %p1, [%rd1], %rd2;
+; CHECK-PTX64-NEXT: ld.param.b32 %r1, [mbar_test_wait_param_2];
+; CHECK-PTX64-NEXT: mbarrier.test_wait.parity.acquire.cluster.shared.b64 %p2, [%rd1], %r1;
+; CHECK-PTX64-NEXT: ret;
+;
+; CHECK-PTX-SHARED32-LABEL: mbar_test_wait(
+; CHECK-PTX-SHARED32: {
+; CHECK-PTX-SHARED32-NEXT: .reg .pred %p<3>;
+; CHECK-PTX-SHARED32-NEXT: .reg .b32 %r<3>;
+; CHECK-PTX-SHARED32-NEXT: .reg .b64 %rd<2>;
+; CHECK-PTX-SHARED32-EMPTY:
+; CHECK-PTX-SHARED32-NEXT: // %bb.0:
+; CHECK-PTX-SHARED32-NEXT: ld.param.b32 %r1, [mbar_test_wait_param_0];
+; CHECK-PTX-SHARED32-NEXT: ld.param.b64 %rd1, [mbar_test_wait_param_1];
+; CHECK-PTX-SHARED32-NEXT: mbarrier.test_wait.acquire.cluster.shared.b64 %p1, [%r1], %rd1;
+; CHECK-PTX-SHARED32-NEXT: ld.param.b32 %r2, [mbar_test_wait_param_2];
+; CHECK-PTX-SHARED32-NEXT: mbarrier.test_wait.parity.acquire.cluster.shared.b64 %p2, [%r1], %r2;
+; CHECK-PTX-SHARED32-NEXT: ret;
+ %ret0 = call i1 @llvm.nvvm.mbarrier.test.wait.scope.cluster.space.cta(ptr addrspace(3) %mbar, i64 %state)
+ %ret1 = call i1 @llvm.nvvm.mbarrier.test.wait.parity.scope.cluster.space.cta(ptr addrspace(3) %mbar, i32 %parity)
+
+ ret void
+}
+
+define void @mbar_try_wait(ptr addrspace(3) %mbar, i64 %state, i32 %parity) {
+; CHECK-PTX64-LABEL: mbar_try_wait(
+; CHECK-PTX64: {
+; CHECK-PTX64-NEXT: .reg .pred %p<3>;
+; CHECK-PTX64-NEXT: .reg .b32 %r<2>;
+; CHECK-PTX64-NEXT: .reg .b64 %rd<3>;
+; CHECK-PTX64-EMPTY:
+; CHECK-PTX64-NEXT: // %bb.0:
+; CHECK-PTX64-NEXT: ld.param.b64 %rd1, [mbar_try_wait_param_0];
+; CHECK-PTX64-NEXT: ld.param.b64 %rd2, [mbar_try_wait_param_1];
+; CHECK-PTX64-NEXT: mbarrier.try_wait.acquire.cluster.shared.b64 %p1, [%rd1], %rd2;
+; CHECK-PTX64-NEXT: ld.param.b32 %r1, [mbar_try_wait_param_2];
+; CHECK-PTX64-NEXT: mbarrier.try_wait.parity.acquire.cluster.shared.b64 %p2, [%rd1], %r1;
+; CHECK-PTX64-NEXT: ret;
+;
+; CHECK-PTX-SHARED32-LABEL: mbar_try_wait(
+; CHECK-PTX-SHARED32: {
+; CHECK-PTX-SHARED32-NEXT: .reg .pred %p<3>;
+; CHECK-PTX-SHARED32-NEXT: .reg .b32 %r<3>;
+; CHECK-PTX-SHARED32-NEXT: .reg .b64 %rd<2>;
+; CHECK-PTX-SHARED32-EMPTY:
+; CHECK-PTX-SHARED32-NEXT: // %bb.0:
+; CHECK-PTX-SHARED32-NEXT: ld.param.b32 %r1, [mbar_try_wait_param_0];
+; CHECK-PTX-SHARED32-NEXT: ld.param.b64 %rd1, [mbar_try_wait_param_1];
+; CHECK-PTX-SHARED32-NEXT: mbarrier.try_wait.acquire.cluster.shared.b64 %p1, [%r1], %rd1;
+; CHECK-PTX-SHARED32-NEXT: ld.param.b32 %r2, [mbar_try_wait_param_2];
+; CHECK-PTX-SHARED32-NEXT: mbarrier.try_wait.parity.acquire.cluster.shared.b64 %p2, [%r1], %r2;
+; CHECK-PTX-SHARED32-NEXT: ret;
+ %ret0 = call i1 @llvm.nvvm.mbarrier.try.wait.scope.cluster.space.cta(ptr addrspace(3) %mbar, i64 %state)
+ %ret1 = call i1 @llvm.nvvm.mbarrier.try.wait.parity.scope.cluster.space.cta(ptr addrspace(3) %mbar, i32 %parity)
+
+ ret void
+}
+
+define void @mbar_try_wait_tl(ptr addrspace(3) %mbar, i64 %state, i32 %parity, i32 %tl) {
+; CHECK-PTX64-LABEL: mbar_try_wait_tl(
+; CHECK-PTX64: {
+; CHECK-PTX64-NEXT: .reg .pred %p<3>;
+; CHECK-PTX64-NEXT: .reg .b32 %r<3>;
+; CHECK-PTX64-NEXT: .reg .b64 %rd<3>;
+; CHECK-PTX64-EMPTY:
+; CHECK-PTX64-NEXT: // %bb.0:
+; CHECK-PTX64-NEXT: ld.param.b64 %rd1, [mbar_try_wait_tl_param_0];
+; CHECK-PTX64-NEXT: ld.param.b64 %rd2, [mbar_try_wait_tl_param_1];
+; CHECK-PTX64-NEXT: ld.param.b32 %r1, [mbar_try_wait_tl_param_3];
+; CHECK-PTX64-NEXT: mbarrier.try_wait.acquire.cluster.shared.b64 %p1, [%rd1], %rd2, %r1;
+; CHECK-PTX64-NEXT: ld.param.b32 %r2, [mbar_try_wait_tl_param_2];
+; CHECK-PTX64-NEXT: mbarrier.try_wait.parity.acquire.cluster.shared.b64 %p2, [%rd1], %r2, %r1;
+; CHECK-PTX64-NEXT: ret;
+;
+; CHECK-PTX-SHARED32-LABEL: mbar_try_wait_tl(
+; CHECK-PTX-SHARED32: {
+; CHECK-PTX-SHARED32-NEXT: .reg .pred %p<3>;
+; CHECK-PTX-SHARED32-NEXT: .reg .b32 %r<4>;
+; CHECK-PTX-SHARED32-NEXT: .reg .b64 %rd<2>;
+; CHECK-PTX-SHARED32-EMPTY:
+; CHECK-PTX-SHARED32-NEXT: // %bb.0:
+; CHECK-PTX-SHARED32-NEXT: ld.param.b32 %r1, [mbar_try_wait_tl_param_0];
+; CHECK-PTX-SHARED32-NEXT: ld.param.b64 %rd1, [mbar_try_wait_tl_param_1];
+; CHECK-PTX-SHARED32-NEXT: ld.param.b32 %r2, [mbar_try_wait_tl_param_3];
+; CHECK-PTX-SHARED32-NEXT: mbarrier.try_wait.acquire.cluster.shared.b64 %p1, [%r1], %rd1, %r2;
+; CHECK-PTX-SHARED32-NEXT: ld.param.b32 %r3, [mbar_try_wait_tl_param_2];
+; CHECK-PTX-SHARED32-NEXT: mbarrier.try_wait.parity.acquire.cluster.shared.b64 %p2, [%r1], %r3, %r2;
+; CHECK-PTX-SHARED32-NEXT: ret;
+ %ret0 = call i1 @llvm.nvvm.mbarrier.try.wait.tl.scope.cluster.space.cta(ptr addrspace(3) %mbar, i64 %state, i32 %tl)
+ %ret1 = call i1 @llvm.nvvm.mbarrier.try.wait.parity.tl.scope.cluster.space.cta(ptr addrspace(3) %mbar, i32 %parity, i32 %tl)
+
+ ret void
+}
diff --git a/llvm/test/CodeGen/NVPTX/mbarrier_wait_sm90_ptx86.ll b/llvm/test/CodeGen/NVPTX/mbarrier_wait_sm90_ptx86.ll
new file mode 100644
index 0000000..652634b
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/mbarrier_wait_sm90_ptx86.ll
@@ -0,0 +1,148 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx86| FileCheck --check-prefixes=CHECK-PTX64 %s
+; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx86 --nvptx-short-ptr| FileCheck --check-prefixes=CHECK-PTX-SHARED32 %s
+; RUN: %if ptxas-sm_90 && ptxas-isa-8.6 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx86| %ptxas-verify -arch=sm_90 %}
+; RUN: %if ptxas-sm_90 && ptxas-isa-8.6 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx86 --nvptx-short-ptr| %ptxas-verify -arch=sm_90 %}
+
+; --- test.wait ---
+declare i1 @llvm.nvvm.mbarrier.test.wait.parity.relaxed.scope.cta.space.cta(ptr addrspace(3), i32)
+declare i1 @llvm.nvvm.mbarrier.test.wait.parity.relaxed.scope.cluster.space.cta(ptr addrspace(3), i32)
+declare i1 @llvm.nvvm.mbarrier.test.wait.relaxed.scope.cta.space.cta(ptr addrspace(3), i64)
+declare i1 @llvm.nvvm.mbarrier.test.wait.relaxed.scope.cluster.space.cta(ptr addrspace(3), i64)
+
+; --- try.wait without timelimit ---
+declare i1 @llvm.nvvm.mbarrier.try.wait.relaxed.scope.cta.space.cta(ptr addrspace(3), i64)
+declare i1 @llvm.nvvm.mbarrier.try.wait.relaxed.scope.cluster.space.cta(ptr addrspace(3), i64)
+declare i1 @llvm.nvvm.mbarrier.try.wait.parity.relaxed.scope.cta.space.cta(ptr addrspace(3), i32)
+declare i1 @llvm.nvvm.mbarrier.try.wait.parity.relaxed.scope.cluster.space.cta(ptr addrspace(3), i32)
+
+; --- try.wait with timelimit ---
+declare i1 @llvm.nvvm.mbarrier.try.wait.tl.relaxed.scope.cta.space.cta(ptr addrspace(3), i64, i32)
+declare i1 @llvm.nvvm.mbarrier.try.wait.tl.relaxed.scope.cluster.space.cta(ptr addrspace(3), i64, i32)
+declare i1 @llvm.nvvm.mbarrier.try.wait.parity.tl.relaxed.scope.cta.space.cta(ptr addrspace(3), i32, i32)
+declare i1 @llvm.nvvm.mbarrier.try.wait.parity.tl.relaxed.scope.cluster.space.cta(ptr addrspace(3), i32, i32)
+
+define void @mbar_test_wait(ptr addrspace(3) %mbar, i64 %state, i32 %parity) {
+; CHECK-PTX64-LABEL: mbar_test_wait(
+; CHECK-PTX64: {
+; CHECK-PTX64-NEXT: .reg .pred %p<5>;
+; CHECK-PTX64-NEXT: .reg .b32 %r<2>;
+; CHECK-PTX64-NEXT: .reg .b64 %rd<3>;
+; CHECK-PTX64-EMPTY:
+; CHECK-PTX64-NEXT: // %bb.0:
+; CHECK-PTX64-NEXT: ld.param.b64 %rd1, [mbar_test_wait_param_0];
+; CHECK-PTX64-NEXT: ld.param.b64 %rd2, [mbar_test_wait_param_1];
+; CHECK-PTX64-NEXT: mbarrier.test_wait.relaxed.cta.shared.b64 %p1, [%rd1], %rd2;
+; CHECK-PTX64-NEXT: mbarrier.test_wait.relaxed.cluster.shared.b64 %p2, [%rd1], %rd2;
+; CHECK-PTX64-NEXT: ld.param.b32 %r1, [mbar_test_wait_param_2];
+; CHECK-PTX64-NEXT: mbarrier.test_wait.parity.relaxed.cta.shared.b64 %p3, [%rd1], %r1;
+; CHECK-PTX64-NEXT: mbarrier.test_wait.parity.relaxed.cluster.shared.b64 %p4, [%rd1], %r1;
+; CHECK-PTX64-NEXT: ret;
+;
+; CHECK-PTX-SHARED32-LABEL: mbar_test_wait(
+; CHECK-PTX-SHARED32: {
+; CHECK-PTX-SHARED32-NEXT: .reg .pred %p<5>;
+; CHECK-PTX-SHARED32-NEXT: .reg .b32 %r<3>;
+; CHECK-PTX-SHARED32-NEXT: .reg .b64 %rd<2>;
+; CHECK-PTX-SHARED32-EMPTY:
+; CHECK-PTX-SHARED32-NEXT: // %bb.0:
+; CHECK-PTX-SHARED32-NEXT: ld.param.b32 %r1, [mbar_test_wait_param_0];
+; CHECK-PTX-SHARED32-NEXT: ld.param.b64 %rd1, [mbar_test_wait_param_1];
+; CHECK-PTX-SHARED32-NEXT: mbarrier.test_wait.relaxed.cta.shared.b64 %p1, [%r1], %rd1;
+; CHECK-PTX-SHARED32-NEXT: mbarrier.test_wait.relaxed.cluster.shared.b64 %p2, [%r1], %rd1;
+; CHECK-PTX-SHARED32-NEXT: ld.param.b32 %r2, [mbar_test_wait_param_2];
+; CHECK-PTX-SHARED32-NEXT: mbarrier.test_wait.parity.relaxed.cta.shared.b64 %p3, [%r1], %r2;
+; CHECK-PTX-SHARED32-NEXT: mbarrier.test_wait.parity.relaxed.cluster.shared.b64 %p4, [%r1], %r2;
+; CHECK-PTX-SHARED32-NEXT: ret;
+ %ret0 = call i1 @llvm.nvvm.mbarrier.test.wait.relaxed.scope.cta.space.cta(ptr addrspace(3) %mbar, i64 %state)
+ %ret1 = call i1 @llvm.nvvm.mbarrier.test.wait.relaxed.scope.cluster.space.cta(ptr addrspace(3) %mbar, i64 %state)
+
+ %ret2 = call i1 @llvm.nvvm.mbarrier.test.wait.parity.relaxed.scope.cta.space.cta(ptr addrspace(3) %mbar, i32 %parity)
+ %ret3 = call i1 @llvm.nvvm.mbarrier.test.wait.parity.relaxed.scope.cluster.space.cta(ptr addrspace(3) %mbar, i32 %parity)
+
+ ret void
+}
+
+define void @mbar_try_wait(ptr addrspace(3) %mbar, i64 %state, i32 %parity) {
+; CHECK-PTX64-LABEL: mbar_try_wait(
+; CHECK-PTX64: {
+; CHECK-PTX64-NEXT: .reg .pred %p<5>;
+; CHECK-PTX64-NEXT: .reg .b32 %r<2>;
+; CHECK-PTX64-NEXT: .reg .b64 %rd<3>;
+; CHECK-PTX64-EMPTY:
+; CHECK-PTX64-NEXT: // %bb.0:
+; CHECK-PTX64-NEXT: ld.param.b64 %rd1, [mbar_try_wait_param_0];
+; CHECK-PTX64-NEXT: ld.param.b64 %rd2, [mbar_try_wait_param_1];
+; CHECK-PTX64-NEXT: mbarrier.try_wait.relaxed.cta.shared.b64 %p1, [%rd1], %rd2;
+; CHECK-PTX64-NEXT: mbarrier.try_wait.relaxed.cluster.shared.b64 %p2, [%rd1], %rd2;
+; CHECK-PTX64-NEXT: ld.param.b32 %r1, [mbar_try_wait_param_2];
+; CHECK-PTX64-NEXT: mbarrier.try_wait.parity.relaxed.cta.shared.b64 %p3, [%rd1], %r1;
+; CHECK-PTX64-NEXT: mbarrier.try_wait.parity.relaxed.cluster.shared.b64 %p4, [%rd1], %r1;
+; CHECK-PTX64-NEXT: ret;
+;
+; CHECK-PTX-SHARED32-LABEL: mbar_try_wait(
+; CHECK-PTX-SHARED32: {
+; CHECK-PTX-SHARED32-NEXT: .reg .pred %p<5>;
+; CHECK-PTX-SHARED32-NEXT: .reg .b32 %r<3>;
+; CHECK-PTX-SHARED32-NEXT: .reg .b64 %rd<2>;
+; CHECK-PTX-SHARED32-EMPTY:
+; CHECK-PTX-SHARED32-NEXT: // %bb.0:
+; CHECK-PTX-SHARED32-NEXT: ld.param.b32 %r1, [mbar_try_wait_param_0];
+; CHECK-PTX-SHARED32-NEXT: ld.param.b64 %rd1, [mbar_try_wait_param_1];
+; CHECK-PTX-SHARED32-NEXT: mbarrier.try_wait.relaxed.cta.shared.b64 %p1, [%r1], %rd1;
+; CHECK-PTX-SHARED32-NEXT: mbarrier.try_wait.relaxed.cluster.shared.b64 %p2, [%r1], %rd1;
+; CHECK-PTX-SHARED32-NEXT: ld.param.b32 %r2, [mbar_try_wait_param_2];
+; CHECK-PTX-SHARED32-NEXT: mbarrier.try_wait.parity.relaxed.cta.shared.b64 %p3, [%r1], %r2;
+; CHECK-PTX-SHARED32-NEXT: mbarrier.try_wait.parity.relaxed.cluster.shared.b64 %p4, [%r1], %r2;
+; CHECK-PTX-SHARED32-NEXT: ret;
+ %ret0 = call i1 @llvm.nvvm.mbarrier.try.wait.relaxed.scope.cta.space.cta(ptr addrspace(3) %mbar, i64 %state)
+ %ret1 = call i1 @llvm.nvvm.mbarrier.try.wait.relaxed.scope.cluster.space.cta(ptr addrspace(3) %mbar, i64 %state)
+
+ %ret2 = call i1 @llvm.nvvm.mbarrier.try.wait.parity.relaxed.scope.cta.space.cta(ptr addrspace(3) %mbar, i32 %parity)
+ %ret3 = call i1 @llvm.nvvm.mbarrier.try.wait.parity.relaxed.scope.cluster.space.cta(ptr addrspace(3) %mbar, i32 %parity)
+
+ ret void
+}
+
+define void @mbar_try_wait_tl(ptr addrspace(3) %mbar, i64 %state, i32 %parity, i32 %tl) {
+; CHECK-PTX64-LABEL: mbar_try_wait_tl(
+; CHECK-PTX64: {
+; CHECK-PTX64-NEXT: .reg .pred %p<5>;
+; CHECK-PTX64-NEXT: .reg .b32 %r<3>;
+; CHECK-PTX64-NEXT: .reg .b64 %rd<3>;
+; CHECK-PTX64-EMPTY:
+; CHECK-PTX64-NEXT: // %bb.0:
+; CHECK-PTX64-NEXT: ld.param.b64 %rd1, [mbar_try_wait_tl_param_0];
+; CHECK-PTX64-NEXT: ld.param.b64 %rd2, [mbar_try_wait_tl_param_1];
+; CHECK-PTX64-NEXT: ld.param.b32 %r1, [mbar_try_wait_tl_param_3];
+; CHECK-PTX64-NEXT: mbarrier.try_wait.relaxed.cta.shared.b64 %p1, [%rd1], %rd2, %r1;
+; CHECK-PTX64-NEXT: mbarrier.try_wait.relaxed.cluster.shared.b64 %p2, [%rd1], %rd2, %r1;
+; CHECK-PTX64-NEXT: ld.param.b32 %r2, [mbar_try_wait_tl_param_2];
+; CHECK-PTX64-NEXT: mbarrier.try_wait.parity.relaxed.cta.shared.b64 %p3, [%rd1], %r2, %r1;
+; CHECK-PTX64-NEXT: mbarrier.try_wait.parity.relaxed.cluster.shared.b64 %p4, [%rd1], %r2, %r1;
+; CHECK-PTX64-NEXT: ret;
+;
+; CHECK-PTX-SHARED32-LABEL: mbar_try_wait_tl(
+; CHECK-PTX-SHARED32: {
+; CHECK-PTX-SHARED32-NEXT: .reg .pred %p<5>;
+; CHECK-PTX-SHARED32-NEXT: .reg .b32 %r<4>;
+; CHECK-PTX-SHARED32-NEXT: .reg .b64 %rd<2>;
+; CHECK-PTX-SHARED32-EMPTY:
+; CHECK-PTX-SHARED32-NEXT: // %bb.0:
+; CHECK-PTX-SHARED32-NEXT: ld.param.b32 %r1, [mbar_try_wait_tl_param_0];
+; CHECK-PTX-SHARED32-NEXT: ld.param.b64 %rd1, [mbar_try_wait_tl_param_1];
+; CHECK-PTX-SHARED32-NEXT: ld.param.b32 %r2, [mbar_try_wait_tl_param_3];
+; CHECK-PTX-SHARED32-NEXT: mbarrier.try_wait.relaxed.cta.shared.b64 %p1, [%r1], %rd1, %r2;
+; CHECK-PTX-SHARED32-NEXT: mbarrier.try_wait.relaxed.cluster.shared.b64 %p2, [%r1], %rd1, %r2;
+; CHECK-PTX-SHARED32-NEXT: ld.param.b32 %r3, [mbar_try_wait_tl_param_2];
+; CHECK-PTX-SHARED32-NEXT: mbarrier.try_wait.parity.relaxed.cta.shared.b64 %p3, [%r1], %r3, %r2;
+; CHECK-PTX-SHARED32-NEXT: mbarrier.try_wait.parity.relaxed.cluster.shared.b64 %p4, [%r1], %r3, %r2;
+; CHECK-PTX-SHARED32-NEXT: ret;
+ %ret0 = call i1 @llvm.nvvm.mbarrier.try.wait.tl.relaxed.scope.cta.space.cta(ptr addrspace(3) %mbar, i64 %state, i32 %tl)
+ %ret1 = call i1 @llvm.nvvm.mbarrier.try.wait.tl.relaxed.scope.cluster.space.cta(ptr addrspace(3) %mbar, i64 %state, i32 %tl)
+
+ %ret2 = call i1 @llvm.nvvm.mbarrier.try.wait.parity.tl.relaxed.scope.cta.space.cta(ptr addrspace(3) %mbar, i32 %parity, i32 %tl)
+ %ret3 = call i1 @llvm.nvvm.mbarrier.try.wait.parity.tl.relaxed.scope.cluster.space.cta(ptr addrspace(3) %mbar, i32 %parity, i32 %tl)
+
+ ret void
+}
diff --git a/llvm/test/CodeGen/PowerPC/p10-spill-crun.ll b/llvm/test/CodeGen/PowerPC/p10-spill-crun.ll
index 4ca2dc5db..eba9faa 100644
--- a/llvm/test/CodeGen/PowerPC/p10-spill-crun.ll
+++ b/llvm/test/CodeGen/PowerPC/p10-spill-crun.ll
@@ -234,8 +234,8 @@ define dso_local void @P10_Spill_CR_UN(ptr %arg, ptr %arg1, i32 %arg2) local_unn
; CHECK-BE-NEXT: # %bb.4: # %bb37
; CHECK-BE-NEXT: bc 4, 4*cr5+lt, .LBB0_14
; CHECK-BE-NEXT: .LBB0_5: # %bb42
-; CHECK-BE-NEXT: addi r3, r3, global_1@toc@l
; CHECK-BE-NEXT: li r4, 0
+; CHECK-BE-NEXT: addi r3, r3, global_1@toc@l
; CHECK-BE-NEXT: cmpwi r28, 0
; CHECK-BE-NEXT: isel r3, r3, r4, 4*cr2+gt
; CHECK-BE-NEXT: crnot 4*cr2+lt, eq
diff --git a/llvm/test/CodeGen/PowerPC/vector-reduce-add.ll b/llvm/test/CodeGen/PowerPC/vector-reduce-add.ll
index d506d20..e5d305f 100644
--- a/llvm/test/CodeGen/PowerPC/vector-reduce-add.ll
+++ b/llvm/test/CodeGen/PowerPC/vector-reduce-add.ll
@@ -1085,14 +1085,14 @@ define dso_local signext i32 @v16i8tov16i32_sign(<16 x i8> %a) local_unnamed_add
; PWR10BE-NEXT: addis r3, r2, .LCPI17_2@toc@ha
; PWR10BE-NEXT: vperm v3, v2, v2, v3
; PWR10BE-NEXT: addi r3, r3, .LCPI17_2@toc@l
-; PWR10BE-NEXT: vextsb2w v3, v3
; PWR10BE-NEXT: lxv v5, 0(r3)
; PWR10BE-NEXT: addis r3, r2, .LCPI17_3@toc@ha
+; PWR10BE-NEXT: vextsb2w v3, v3
; PWR10BE-NEXT: vperm v4, v2, v2, v4
; PWR10BE-NEXT: addi r3, r3, .LCPI17_3@toc@l
-; PWR10BE-NEXT: vextsb2w v4, v4
; PWR10BE-NEXT: lxv v0, 0(r3)
; PWR10BE-NEXT: li r3, 0
+; PWR10BE-NEXT: vextsb2w v4, v4
; PWR10BE-NEXT: vperm v5, v2, v2, v5
; PWR10BE-NEXT: vadduwm v3, v4, v3
; PWR10BE-NEXT: vextsb2w v5, v5
@@ -1212,9 +1212,9 @@ define dso_local zeroext i32 @v16i8tov16i32_zero(<16 x i8> %a) local_unnamed_add
; PWR10BE-NEXT: addis r3, r2, .LCPI18_3@toc@ha
; PWR10BE-NEXT: vperm v5, v4, v2, v5
; PWR10BE-NEXT: addi r3, r3, .LCPI18_3@toc@l
-; PWR10BE-NEXT: vadduwm v3, v5, v3
; PWR10BE-NEXT: lxv v1, 0(r3)
; PWR10BE-NEXT: li r3, 0
+; PWR10BE-NEXT: vadduwm v3, v5, v3
; PWR10BE-NEXT: vperm v0, v4, v2, v0
; PWR10BE-NEXT: vperm v2, v4, v2, v1
; PWR10BE-NEXT: vadduwm v2, v2, v0
@@ -1568,41 +1568,41 @@ define dso_local i64 @v16i8tov16i64_sign(<16 x i8> %a) local_unnamed_addr #0 {
; PWR10BE-NEXT: addis r3, r2, .LCPI23_0@toc@ha
; PWR10BE-NEXT: xxspltib v1, 255
; PWR10BE-NEXT: addi r3, r3, .LCPI23_0@toc@l
-; PWR10BE-NEXT: vsrq v1, v1, v1
; PWR10BE-NEXT: lxv v3, 0(r3)
; PWR10BE-NEXT: addis r3, r2, .LCPI23_1@toc@ha
+; PWR10BE-NEXT: vsrq v1, v1, v1
; PWR10BE-NEXT: addi r3, r3, .LCPI23_1@toc@l
; PWR10BE-NEXT: vperm v1, v2, v2, v1
; PWR10BE-NEXT: lxv v4, 0(r3)
; PWR10BE-NEXT: addis r3, r2, .LCPI23_2@toc@ha
-; PWR10BE-NEXT: vextsb2d v1, v1
; PWR10BE-NEXT: vperm v3, v2, v2, v3
+; PWR10BE-NEXT: vextsb2d v1, v1
; PWR10BE-NEXT: addi r3, r3, .LCPI23_2@toc@l
-; PWR10BE-NEXT: vextsb2d v3, v3
; PWR10BE-NEXT: lxv v5, 0(r3)
; PWR10BE-NEXT: addis r3, r2, .LCPI23_3@toc@ha
+; PWR10BE-NEXT: vextsb2d v3, v3
; PWR10BE-NEXT: vperm v4, v2, v2, v4
; PWR10BE-NEXT: addi r3, r3, .LCPI23_3@toc@l
-; PWR10BE-NEXT: vextsb2d v4, v4
; PWR10BE-NEXT: lxv v0, 0(r3)
; PWR10BE-NEXT: addis r3, r2, .LCPI23_4@toc@ha
+; PWR10BE-NEXT: vextsb2d v4, v4
; PWR10BE-NEXT: vperm v5, v2, v2, v5
; PWR10BE-NEXT: addi r3, r3, .LCPI23_4@toc@l
-; PWR10BE-NEXT: vextsb2d v5, v5
; PWR10BE-NEXT: lxv v6, 0(r3)
; PWR10BE-NEXT: addis r3, r2, .LCPI23_5@toc@ha
+; PWR10BE-NEXT: vextsb2d v5, v5
; PWR10BE-NEXT: vperm v0, v2, v2, v0
; PWR10BE-NEXT: addi r3, r3, .LCPI23_5@toc@l
-; PWR10BE-NEXT: vextsb2d v0, v0
; PWR10BE-NEXT: lxv v7, 0(r3)
; PWR10BE-NEXT: addis r3, r2, .LCPI23_6@toc@ha
+; PWR10BE-NEXT: vextsb2d v0, v0
; PWR10BE-NEXT: vperm v6, v2, v2, v6
+; PWR10BE-NEXT: addi r3, r3, .LCPI23_6@toc@l
; PWR10BE-NEXT: vaddudm v5, v0, v5
; PWR10BE-NEXT: vaddudm v3, v4, v3
; PWR10BE-NEXT: vaddudm v3, v3, v5
-; PWR10BE-NEXT: addi r3, r3, .LCPI23_6@toc@l
-; PWR10BE-NEXT: vextsb2d v6, v6
; PWR10BE-NEXT: lxv v8, 0(r3)
+; PWR10BE-NEXT: vextsb2d v6, v6
; PWR10BE-NEXT: vperm v7, v2, v2, v7
; PWR10BE-NEXT: vextsb2d v7, v7
; PWR10BE-NEXT: vperm v2, v2, v2, v8
diff --git a/llvm/test/CodeGen/X86/AMX/amx-combine-undef.ll b/llvm/test/CodeGen/X86/AMX/amx-combine-undef.ll
index faa119c..5f0682a 100644
--- a/llvm/test/CodeGen/X86/AMX/amx-combine-undef.ll
+++ b/llvm/test/CodeGen/X86/AMX/amx-combine-undef.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt --codegen-opt-level=2 -mtriple=x86_64 -lower-amx-type %s -S | FileCheck %s
+; RUN: opt --codegen-opt-level=2 -mtriple=x86_64 -x86-lower-amx-type %s -S | FileCheck %s
+; RUN: opt --codegen-opt-level=2 -mtriple=x86_64 -passes=x86-lower-amx-type %s -S | FileCheck %s
define void @undef_2phi(ptr%buf) {
; CHECK-LABEL: @undef_2phi(
diff --git a/llvm/test/CodeGen/X86/AMX/amx-combine.ll b/llvm/test/CodeGen/X86/AMX/amx-combine.ll
index 07f489c..72e072dd 100644
--- a/llvm/test/CodeGen/X86/AMX/amx-combine.ll
+++ b/llvm/test/CodeGen/X86/AMX/amx-combine.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt --codegen-opt-level=2 -mtriple=x86_64 -lower-amx-type %s -S | FileCheck %s
+; RUN: opt --codegen-opt-level=2 -mtriple=x86_64 -x86-lower-amx-type %s -S | FileCheck %s
+; RUN: opt --codegen-opt-level=2 -mtriple=x86_64 -passes=x86-lower-amx-type %s -S | FileCheck %s
define void @combine_store(ptr%p) {
; CHECK-LABEL: @combine_store(
diff --git a/llvm/test/CodeGen/X86/AMX/amx-configO2toO0-lower.ll b/llvm/test/CodeGen/X86/AMX/amx-configO2toO0-lower.ll
index 6c536f1..4ac406c 100644
--- a/llvm/test/CodeGen/X86/AMX/amx-configO2toO0-lower.ll
+++ b/llvm/test/CodeGen/X86/AMX/amx-configO2toO0-lower.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-int8 -mattr=+avx512f -lower-amx-type -S | FileCheck %s
+; RUN: opt < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-int8 -mattr=+avx512f -x86-lower-amx-type -S | FileCheck %s
+; RUN: opt < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-int8 -mattr=+avx512f -passes=x86-lower-amx-type -S | FileCheck %s
@buf = dso_local global [1024 x i8] zeroinitializer, align 16
@buf2 = dso_local global [1024 x i8] zeroinitializer, align 16
diff --git a/llvm/test/CodeGen/X86/AMX/amx-type.ll b/llvm/test/CodeGen/X86/AMX/amx-type.ll
index 1d9af2b..294195a 100644
--- a/llvm/test/CodeGen/X86/AMX/amx-type.ll
+++ b/llvm/test/CodeGen/X86/AMX/amx-type.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt --codegen-opt-level=2 -mtriple=x86_64 -lower-amx-type %s -S | FileCheck %s
+; RUN: opt --codegen-opt-level=2 -mtriple=x86_64 -x86-lower-amx-type %s -S | FileCheck %s
+; RUN: opt --codegen-opt-level=2 -mtriple=x86_64 -passes=x86-lower-amx-type %s -S | FileCheck %s
%struct.__tile_str = type { i16, i16, <256 x i32> }
diff --git a/llvm/test/CodeGen/X86/AMX/lat-combine-amx-bitcast.ll b/llvm/test/CodeGen/X86/AMX/lat-combine-amx-bitcast.ll
index b70668f..cdce783 100644
--- a/llvm/test/CodeGen/X86/AMX/lat-combine-amx-bitcast.ll
+++ b/llvm/test/CodeGen/X86/AMX/lat-combine-amx-bitcast.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt --codegen-opt-level=2 -mtriple=x86_64 -lower-amx-type %s -S | FileCheck %s
+; RUN: opt --codegen-opt-level=2 -mtriple=x86_64 -x86-lower-amx-type %s -S | FileCheck %s
+; RUN: opt --codegen-opt-level=2 -mtriple=x86_64 -passes=x86-lower-amx-type %s -S | FileCheck %s
define void @combine_amx_cast_inside_bb() {
; CHECK-LABEL: @combine_amx_cast_inside_bb(
diff --git a/llvm/test/CodeGen/X86/AMX/lat-transform-amx-bitcast.ll b/llvm/test/CodeGen/X86/AMX/lat-transform-amx-bitcast.ll
index 3a5b4245..0b419bb 100644
--- a/llvm/test/CodeGen/X86/AMX/lat-transform-amx-bitcast.ll
+++ b/llvm/test/CodeGen/X86/AMX/lat-transform-amx-bitcast.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt --codegen-opt-level=2 -mtriple=x86_64 -lower-amx-type %s -S | FileCheck %s
+; RUN: opt --codegen-opt-level=2 -mtriple=x86_64 -x86-lower-amx-type %s -S | FileCheck %s
+; RUN: opt --codegen-opt-level=2 -mtriple=x86_64 -passes=x86-lower-amx-type %s -S | FileCheck %s
%struct.__tile_str = type { i16, i16, <256 x i32> }
diff --git a/llvm/test/CodeGen/X86/amx_tile_pair_lower_type_O0.ll b/llvm/test/CodeGen/X86/amx_tile_pair_lower_type_O0.ll
index 52641c6..3549875 100644
--- a/llvm/test/CodeGen/X86/amx_tile_pair_lower_type_O0.ll
+++ b/llvm/test/CodeGen/X86/amx_tile_pair_lower_type_O0.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
- ; RUN: opt --codegen-opt-level=0 -mtriple=x86_64 -lower-amx-type %s -S | FileCheck %s
+ ; RUN: opt --codegen-opt-level=0 -mtriple=x86_64 -x86-lower-amx-type %s -S | FileCheck %s
+ ; RUN: opt --codegen-opt-level=0 -mtriple=x86_64 -passes=x86-lower-amx-type %s -S | FileCheck %s
@buf = dso_local global [2048 x i8] zeroinitializer, align 16
diff --git a/llvm/test/CodeGen/X86/amx_tile_pair_lower_type_O2.ll b/llvm/test/CodeGen/X86/amx_tile_pair_lower_type_O2.ll
index 346d46b..96966264 100644
--- a/llvm/test/CodeGen/X86/amx_tile_pair_lower_type_O2.ll
+++ b/llvm/test/CodeGen/X86/amx_tile_pair_lower_type_O2.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt --codegen-opt-level=2 -mtriple=x86_64 -lower-amx-type %s -S | FileCheck %s
+; RUN: opt --codegen-opt-level=2 -mtriple=x86_64 -x86-lower-amx-type %s -S | FileCheck %s
+; RUN: opt --codegen-opt-level=2 -mtriple=x86_64 -passes=x86-lower-amx-type %s -S | FileCheck %s
@buf = dso_local global [2048 x i8] zeroinitializer, align 16
@buf2 = dso_local global [2048 x i8] zeroinitializer, align 16
diff --git a/llvm/test/CodeGen/X86/trunc-srl-load.ll b/llvm/test/CodeGen/X86/trunc-srl-load.ll
new file mode 100644
index 0000000..4dae143
--- /dev/null
+++ b/llvm/test/CodeGen/X86/trunc-srl-load.ll
@@ -0,0 +1,1672 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s --check-prefixes=X86
+; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=x86-64 | FileCheck %s --check-prefixes=X64,SSE
+; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=X64,SSE
+; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=X64,AVX,AVX2
+; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=X64,AVX,AVX512
+
+; Tests showing for the analysis of non-constant shift amounts to improve load address math
+
+; Alignment of shift amounts should allow sub-integer loads.
+
+define i16 @extractSub64_16(ptr %word, i32 %idx) nounwind {
+; X86-LABEL: extractSub64_16:
+; X86: # %bb.0:
+; X86-NEXT: pushl %esi
+; X86-NEXT: movb {{[0-9]+}}(%esp), %ch
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl (%eax), %edx
+; X86-NEXT: movl 4(%eax), %esi
+; X86-NEXT: movb %ch, %cl
+; X86-NEXT: andb $16, %cl
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: shrl %cl, %eax
+; X86-NEXT: shrdl %cl, %esi, %edx
+; X86-NEXT: testb $32, %ch
+; X86-NEXT: jne .LBB0_2
+; X86-NEXT: # %bb.1:
+; X86-NEXT: movl %edx, %eax
+; X86-NEXT: .LBB0_2:
+; X86-NEXT: # kill: def $ax killed $ax killed $eax
+; X86-NEXT: popl %esi
+; X86-NEXT: retl
+;
+; SSE-LABEL: extractSub64_16:
+; SSE: # %bb.0:
+; SSE-NEXT: movl %esi, %ecx
+; SSE-NEXT: movq (%rdi), %rax
+; SSE-NEXT: andb $48, %cl
+; SSE-NEXT: # kill: def $cl killed $cl killed $ecx
+; SSE-NEXT: shrq %cl, %rax
+; SSE-NEXT: # kill: def $ax killed $ax killed $rax
+; SSE-NEXT: retq
+;
+; AVX-LABEL: extractSub64_16:
+; AVX: # %bb.0:
+; AVX-NEXT: # kill: def $esi killed $esi def $rsi
+; AVX-NEXT: andb $48, %sil
+; AVX-NEXT: shrxq %rsi, (%rdi), %rax
+; AVX-NEXT: # kill: def $ax killed $ax killed $rax
+; AVX-NEXT: retq
+ %idx_bounds = and i32 %idx, 63
+ %idx_align = and i32 %idx_bounds, -16
+ %sh = zext nneg i32 %idx_align to i64
+ %ld = load i64, ptr %word, align 8
+ %sub = lshr i64 %ld, %sh
+ %res = trunc i64 %sub to i16
+ ret i16 %res
+}
+
+define i16 @extractSub128_16(ptr %word, i32 %idx) nounwind {
+; X86-LABEL: extractSub128_16:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $32, %esp
+; X86-NEXT: movzbl 12(%ebp), %eax
+; X86-NEXT: movl 8(%ebp), %ecx
+; X86-NEXT: movl (%ecx), %edx
+; X86-NEXT: movl 4(%ecx), %esi
+; X86-NEXT: movl 8(%ecx), %edi
+; X86-NEXT: movl 12(%ecx), %ecx
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, (%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: andb $16, %cl
+; X86-NEXT: shrb $3, %al
+; X86-NEXT: andb $12, %al
+; X86-NEXT: movzbl %al, %edx
+; X86-NEXT: movl (%esp,%edx), %eax
+; X86-NEXT: movl 4(%esp,%edx), %edx
+; X86-NEXT: shrdl %cl, %edx, %eax
+; X86-NEXT: # kill: def $ax killed $ax killed $eax
+; X86-NEXT: leal -8(%ebp), %esp
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebp
+; X86-NEXT: retl
+;
+; SSE-LABEL: extractSub128_16:
+; SSE: # %bb.0:
+; SSE-NEXT: movq (%rdi), %rax
+; SSE-NEXT: movq 8(%rdi), %rdx
+; SSE-NEXT: movl %esi, %ecx
+; SSE-NEXT: andb $48, %cl
+; SSE-NEXT: movq %rdx, %rdi
+; SSE-NEXT: shrq %cl, %rdi
+; SSE-NEXT: shrdq %cl, %rdx, %rax
+; SSE-NEXT: testb $64, %sil
+; SSE-NEXT: cmovneq %rdi, %rax
+; SSE-NEXT: # kill: def $ax killed $ax killed $rax
+; SSE-NEXT: retq
+;
+; AVX-LABEL: extractSub128_16:
+; AVX: # %bb.0:
+; AVX-NEXT: movq (%rdi), %rdx
+; AVX-NEXT: movq 8(%rdi), %rax
+; AVX-NEXT: movl %esi, %ecx
+; AVX-NEXT: andb $48, %cl
+; AVX-NEXT: shrdq %cl, %rax, %rdx
+; AVX-NEXT: shrxq %rcx, %rax, %rax
+; AVX-NEXT: testb $64, %sil
+; AVX-NEXT: cmoveq %rdx, %rax
+; AVX-NEXT: # kill: def $ax killed $ax killed $rax
+; AVX-NEXT: retq
+ %idx_bounds = and i32 %idx, 127
+ %idx_align = and i32 %idx_bounds, -16
+ %sh = zext nneg i32 %idx_align to i128
+ %ld = load i128, ptr %word, align 8
+ %sub = lshr i128 %ld, %sh
+ %res = trunc i128 %sub to i16
+ ret i16 %res
+}
+
+define i32 @extractSub128_32(ptr %word, i32 %idx) nounwind {
+; X86-LABEL: extractSub128_32:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $32, %esp
+; X86-NEXT: movzbl 12(%ebp), %eax
+; X86-NEXT: movl 8(%ebp), %ecx
+; X86-NEXT: movl (%ecx), %edx
+; X86-NEXT: movl 4(%ecx), %esi
+; X86-NEXT: movl 8(%ecx), %edi
+; X86-NEXT: movl 12(%ecx), %ecx
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, (%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: andb $96, %al
+; X86-NEXT: shrb $3, %al
+; X86-NEXT: movzbl %al, %eax
+; X86-NEXT: movl (%esp,%eax), %eax
+; X86-NEXT: leal -8(%ebp), %esp
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebp
+; X86-NEXT: retl
+;
+; SSE-LABEL: extractSub128_32:
+; SSE: # %bb.0:
+; SSE-NEXT: movq (%rdi), %rax
+; SSE-NEXT: movq 8(%rdi), %rdx
+; SSE-NEXT: movl %esi, %ecx
+; SSE-NEXT: andb $32, %cl
+; SSE-NEXT: movq %rdx, %rdi
+; SSE-NEXT: shrq %cl, %rdi
+; SSE-NEXT: shrdq %cl, %rdx, %rax
+; SSE-NEXT: testb $64, %sil
+; SSE-NEXT: cmovneq %rdi, %rax
+; SSE-NEXT: # kill: def $eax killed $eax killed $rax
+; SSE-NEXT: retq
+;
+; AVX-LABEL: extractSub128_32:
+; AVX: # %bb.0:
+; AVX-NEXT: movq (%rdi), %rdx
+; AVX-NEXT: movq 8(%rdi), %rax
+; AVX-NEXT: movl %esi, %ecx
+; AVX-NEXT: andb $32, %cl
+; AVX-NEXT: shrdq %cl, %rax, %rdx
+; AVX-NEXT: shrxq %rcx, %rax, %rax
+; AVX-NEXT: testb $64, %sil
+; AVX-NEXT: cmoveq %rdx, %rax
+; AVX-NEXT: # kill: def $eax killed $eax killed $rax
+; AVX-NEXT: retq
+ %idx_bounds = and i32 %idx, 127
+ %idx_align = and i32 %idx_bounds, -32
+ %sh = zext nneg i32 %idx_align to i128
+ %ld = load i128, ptr %word, align 8
+ %sub = lshr i128 %ld, %sh
+ %res = trunc i128 %sub to i32
+ ret i32 %res
+}
+
+define i64 @extractSub128_64(ptr %word, i32 %idx) nounwind {
+; X86-LABEL: extractSub128_64:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $32, %esp
+; X86-NEXT: movzbl 12(%ebp), %eax
+; X86-NEXT: movl 8(%ebp), %ecx
+; X86-NEXT: movl (%ecx), %edx
+; X86-NEXT: movl 4(%ecx), %esi
+; X86-NEXT: movl 8(%ecx), %edi
+; X86-NEXT: movl 12(%ecx), %ecx
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, (%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: andb $64, %al
+; X86-NEXT: shrb $3, %al
+; X86-NEXT: movzbl %al, %ecx
+; X86-NEXT: movl (%esp,%ecx), %eax
+; X86-NEXT: movl 4(%esp,%ecx), %edx
+; X86-NEXT: leal -8(%ebp), %esp
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebp
+; X86-NEXT: retl
+;
+; X64-LABEL: extractSub128_64:
+; X64: # %bb.0:
+; X64-NEXT: testb $64, %sil
+; X64-NEXT: je .LBB3_1
+; X64-NEXT: # %bb.2:
+; X64-NEXT: movq 8(%rdi), %rax
+; X64-NEXT: retq
+; X64-NEXT: .LBB3_1:
+; X64-NEXT: movq (%rdi), %rax
+; X64-NEXT: retq
+ %idx_bounds = and i32 %idx, 127
+ %idx_align = and i32 %idx_bounds, -64
+ %sh = zext nneg i32 %idx_align to i128
+ %ld = load i128, ptr %word, align 8
+ %sub = lshr i128 %ld, %sh
+ %res = trunc i128 %sub to i64
+ ret i64 %res
+}
+
+define i8 @extractSub512_8(ptr %word, i32 %idx) nounwind {
+; X86-LABEL: extractSub512_8:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $192, %esp
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: movl (%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 4(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 8(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 12(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 16(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 20(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 24(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 28(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 32(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 36(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 40(%eax), %ebx
+; X86-NEXT: movl 44(%eax), %edi
+; X86-NEXT: movl 48(%eax), %esi
+; X86-NEXT: movl 52(%eax), %edx
+; X86-NEXT: movl 56(%eax), %ecx
+; X86-NEXT: movl 60(%eax), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl 12(%ebp), %edx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, %ecx
+; X86-NEXT: andl $24, %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: shrl $3, %edx
+; X86-NEXT: andl $60, %edx
+; X86-NEXT: movl 48(%esp,%edx), %eax
+; X86-NEXT: movl 52(%esp,%edx), %edx
+; X86-NEXT: # kill: def $cl killed $cl killed $ecx
+; X86-NEXT: shrdl %cl, %edx, %eax
+; X86-NEXT: # kill: def $al killed $al killed $eax
+; X86-NEXT: leal -12(%ebp), %esp
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
+; X86-NEXT: retl
+;
+; SSE-LABEL: extractSub512_8:
+; SSE: # %bb.0:
+; SSE-NEXT: pushq %rax
+; SSE-NEXT: # kill: def $esi killed $esi def $rsi
+; SSE-NEXT: movups (%rdi), %xmm0
+; SSE-NEXT: movups 16(%rdi), %xmm1
+; SSE-NEXT: movups 32(%rdi), %xmm2
+; SSE-NEXT: movups 48(%rdi), %xmm3
+; SSE-NEXT: xorps %xmm4, %xmm4
+; SSE-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm3, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: movl %esi, %ecx
+; SSE-NEXT: andl $56, %ecx
+; SSE-NEXT: shrl $3, %esi
+; SSE-NEXT: andl $56, %esi
+; SSE-NEXT: movq -128(%rsp,%rsi), %rdx
+; SSE-NEXT: shrq %cl, %rdx
+; SSE-NEXT: movl -120(%rsp,%rsi), %eax
+; SSE-NEXT: addl %eax, %eax
+; SSE-NEXT: notl %ecx
+; SSE-NEXT: # kill: def $cl killed $cl killed $ecx
+; SSE-NEXT: shlq %cl, %rax
+; SSE-NEXT: orl %edx, %eax
+; SSE-NEXT: # kill: def $al killed $al killed $rax
+; SSE-NEXT: popq %rcx
+; SSE-NEXT: retq
+;
+; AVX2-LABEL: extractSub512_8:
+; AVX2: # %bb.0:
+; AVX2-NEXT: pushq %rax
+; AVX2-NEXT: # kill: def $esi killed $esi def $rsi
+; AVX2-NEXT: vmovups (%rdi), %ymm0
+; AVX2-NEXT: vmovups 32(%rdi), %ymm1
+; AVX2-NEXT: vxorps %xmm2, %xmm2, %xmm2
+; AVX2-NEXT: vmovups %ymm2, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT: vmovups %ymm2, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT: movl %esi, %ecx
+; AVX2-NEXT: andl $56, %ecx
+; AVX2-NEXT: shrl $3, %esi
+; AVX2-NEXT: andl $56, %esi
+; AVX2-NEXT: shrxq %rcx, -128(%rsp,%rsi), %rax
+; AVX2-NEXT: # kill: def $ecx killed $ecx killed $rcx def $rcx
+; AVX2-NEXT: notl %ecx
+; AVX2-NEXT: movl -120(%rsp,%rsi), %edx
+; AVX2-NEXT: addl %edx, %edx
+; AVX2-NEXT: shlxq %rcx, %rdx, %rcx
+; AVX2-NEXT: orl %ecx, %eax
+; AVX2-NEXT: # kill: def $al killed $al killed $rax
+; AVX2-NEXT: popq %rcx
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: extractSub512_8:
+; AVX512: # %bb.0:
+; AVX512-NEXT: pushq %rax
+; AVX512-NEXT: vmovups (%rdi), %ymm0
+; AVX512-NEXT: vmovups 32(%rdi), %ymm1
+; AVX512-NEXT: vxorps %xmm2, %xmm2, %xmm2
+; AVX512-NEXT: vmovups %ymm2, -{{[0-9]+}}(%rsp)
+; AVX512-NEXT: vmovups %ymm2, -{{[0-9]+}}(%rsp)
+; AVX512-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp)
+; AVX512-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
+; AVX512-NEXT: # kill: def $esi killed $esi def $rsi
+; AVX512-NEXT: movl %esi, %ecx
+; AVX512-NEXT: andl $56, %ecx
+; AVX512-NEXT: shrl $3, %esi
+; AVX512-NEXT: andl $56, %esi
+; AVX512-NEXT: shrxq %rcx, -128(%rsp,%rsi), %rax
+; AVX512-NEXT: # kill: def $ecx killed $ecx killed $rcx def $rcx
+; AVX512-NEXT: notl %ecx
+; AVX512-NEXT: movl -120(%rsp,%rsi), %edx
+; AVX512-NEXT: addl %edx, %edx
+; AVX512-NEXT: shlxq %rcx, %rdx, %rcx
+; AVX512-NEXT: orl %ecx, %eax
+; AVX512-NEXT: # kill: def $al killed $al killed $rax
+; AVX512-NEXT: popq %rcx
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
+ %idx_bounds = and i32 %idx, 511
+ %idx_align = and i32 %idx_bounds, -8
+ %ld = load i512, ptr %word, align 8
+ %sh = zext nneg i32 %idx_align to i512
+ %sub = lshr i512 %ld, %sh
+ %res = trunc i512 %sub to i8
+ ret i8 %res
+}
+
+define i64 @extractSub512_64(ptr %word, i32 %idx) nounwind {
+; X86-LABEL: extractSub512_64:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $192, %esp
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: movl (%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 4(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 8(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 12(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 16(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 20(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 24(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 28(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 32(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 36(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 40(%eax), %ebx
+; X86-NEXT: movl 44(%eax), %edi
+; X86-NEXT: movl 48(%eax), %esi
+; X86-NEXT: movl 52(%eax), %edx
+; X86-NEXT: movl 56(%eax), %ecx
+; X86-NEXT: movl 60(%eax), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl 12(%ebp), %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: shrl $3, %ecx
+; X86-NEXT: andl $56, %ecx
+; X86-NEXT: movl 48(%esp,%ecx), %eax
+; X86-NEXT: movl 52(%esp,%ecx), %edx
+; X86-NEXT: leal -12(%ebp), %esp
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
+; X86-NEXT: retl
+;
+; SSE-LABEL: extractSub512_64:
+; SSE: # %bb.0:
+; SSE-NEXT: pushq %rax
+; SSE-NEXT: # kill: def $esi killed $esi def $rsi
+; SSE-NEXT: movups (%rdi), %xmm0
+; SSE-NEXT: movups 16(%rdi), %xmm1
+; SSE-NEXT: movups 32(%rdi), %xmm2
+; SSE-NEXT: movups 48(%rdi), %xmm3
+; SSE-NEXT: xorps %xmm4, %xmm4
+; SSE-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm3, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: shrl $3, %esi
+; SSE-NEXT: andl $56, %esi
+; SSE-NEXT: movq -128(%rsp,%rsi), %rax
+; SSE-NEXT: popq %rcx
+; SSE-NEXT: retq
+;
+; AVX2-LABEL: extractSub512_64:
+; AVX2: # %bb.0:
+; AVX2-NEXT: pushq %rax
+; AVX2-NEXT: vmovups (%rdi), %ymm0
+; AVX2-NEXT: vmovups 32(%rdi), %ymm1
+; AVX2-NEXT: vxorps %xmm2, %xmm2, %xmm2
+; AVX2-NEXT: vmovups %ymm2, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT: vmovups %ymm2, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT: # kill: def $esi killed $esi def $rsi
+; AVX2-NEXT: shrl $3, %esi
+; AVX2-NEXT: andl $56, %esi
+; AVX2-NEXT: movq -128(%rsp,%rsi), %rax
+; AVX2-NEXT: popq %rcx
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: extractSub512_64:
+; AVX512: # %bb.0:
+; AVX512-NEXT: pushq %rax
+; AVX512-NEXT: vmovups (%rdi), %ymm0
+; AVX512-NEXT: vmovups 32(%rdi), %ymm1
+; AVX512-NEXT: vxorps %xmm2, %xmm2, %xmm2
+; AVX512-NEXT: vmovups %ymm2, -{{[0-9]+}}(%rsp)
+; AVX512-NEXT: vmovups %ymm2, -{{[0-9]+}}(%rsp)
+; AVX512-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp)
+; AVX512-NEXT: # kill: def $esi killed $esi def $rsi
+; AVX512-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
+; AVX512-NEXT: shrl $3, %esi
+; AVX512-NEXT: andl $56, %esi
+; AVX512-NEXT: movq -128(%rsp,%rsi), %rax
+; AVX512-NEXT: popq %rcx
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
+ %idx_bounds = and i32 %idx, 511
+ %idx_align = and i32 %idx_bounds, -64
+ %sh = zext nneg i32 %idx_align to i512
+ %ld = load i512, ptr %word, align 8
+ %sub = lshr i512 %ld, %sh
+ %res = trunc i512 %sub to i64
+ ret i64 %res
+}
+
+define i128 @extractSub512_128(ptr %word, i32 %idx) nounwind {
+; X86-LABEL: extractSub512_128:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $192, %esp
+; X86-NEXT: movl 12(%ebp), %eax
+; X86-NEXT: movl (%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 4(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 8(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 12(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 16(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 20(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 24(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 28(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 32(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 36(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 40(%eax), %ebx
+; X86-NEXT: movl 44(%eax), %edi
+; X86-NEXT: movl 48(%eax), %esi
+; X86-NEXT: movl 52(%eax), %edx
+; X86-NEXT: movl 56(%eax), %ecx
+; X86-NEXT: movl 60(%eax), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl 16(%ebp), %edi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: shrl $3, %edi
+; X86-NEXT: andl $48, %edi
+; X86-NEXT: movl 48(%esp,%edi), %ecx
+; X86-NEXT: movl 52(%esp,%edi), %edx
+; X86-NEXT: movl 56(%esp,%edi), %esi
+; X86-NEXT: movl 60(%esp,%edi), %edi
+; X86-NEXT: movl %edi, 12(%eax)
+; X86-NEXT: movl %esi, 8(%eax)
+; X86-NEXT: movl %edx, 4(%eax)
+; X86-NEXT: movl %ecx, (%eax)
+; X86-NEXT: leal -12(%ebp), %esp
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
+; X86-NEXT: retl $4
+;
+; SSE-LABEL: extractSub512_128:
+; SSE: # %bb.0:
+; SSE-NEXT: pushq %rax
+; SSE-NEXT: # kill: def $esi killed $esi def $rsi
+; SSE-NEXT: movups (%rdi), %xmm0
+; SSE-NEXT: movups 16(%rdi), %xmm1
+; SSE-NEXT: movups 32(%rdi), %xmm2
+; SSE-NEXT: movups 48(%rdi), %xmm3
+; SSE-NEXT: xorps %xmm4, %xmm4
+; SSE-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm3, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: shrl $3, %esi
+; SSE-NEXT: andl $48, %esi
+; SSE-NEXT: movq -128(%rsp,%rsi), %rax
+; SSE-NEXT: movq -120(%rsp,%rsi), %rdx
+; SSE-NEXT: popq %rcx
+; SSE-NEXT: retq
+;
+; AVX-LABEL: extractSub512_128:
+; AVX: # %bb.0:
+; AVX-NEXT: pushq %rax
+; AVX-NEXT: # kill: def $esi killed $esi def $rsi
+; AVX-NEXT: vmovups (%rdi), %ymm0
+; AVX-NEXT: vmovups 32(%rdi), %ymm1
+; AVX-NEXT: vxorps %xmm2, %xmm2, %xmm2
+; AVX-NEXT: vmovups %ymm2, -{{[0-9]+}}(%rsp)
+; AVX-NEXT: vmovups %ymm2, -{{[0-9]+}}(%rsp)
+; AVX-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp)
+; AVX-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
+; AVX-NEXT: shrl $3, %esi
+; AVX-NEXT: andl $48, %esi
+; AVX-NEXT: movq -128(%rsp,%rsi), %rax
+; AVX-NEXT: movq -120(%rsp,%rsi), %rdx
+; AVX-NEXT: popq %rcx
+; AVX-NEXT: vzeroupper
+; AVX-NEXT: retq
+ %idx_bounds = and i32 %idx, 511
+ %idx_align = and i32 %idx_bounds, -128
+ %sh = zext nneg i32 %idx_align to i512
+ %ld = load i512, ptr %word, align 8
+ %sub = lshr i512 %ld, %sh
+ %res = trunc i512 %sub to i128
+ ret i128 %res
+}
+
+define i64 @extractSub4096_64(ptr %word, i32 %idx) nounwind {
+; X86-LABEL: extractSub4096_64:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $1536, %esp # imm = 0x600
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: movl 4(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 8(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 12(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 16(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 20(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 24(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 28(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 32(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 36(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 40(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 44(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 48(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 52(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 56(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 60(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 64(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 68(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 72(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 76(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 80(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 84(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 88(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 92(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 96(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 100(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 104(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 108(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 112(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 116(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 120(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 124(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 128(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 132(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 136(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 140(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 144(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 148(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 152(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 156(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 160(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 164(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 168(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 172(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 176(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 180(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 184(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 188(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 192(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 196(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 200(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 204(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 208(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 212(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 216(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 220(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 224(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 228(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 232(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 236(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 240(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 244(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 248(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 252(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 256(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 260(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 264(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 268(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 272(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 276(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 280(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 284(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 288(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 292(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 296(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 300(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 304(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 308(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 312(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 316(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 320(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 324(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 328(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 332(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 336(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 340(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 344(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 348(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 352(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 356(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 360(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 364(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 368(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 372(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 376(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 380(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl (%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 384(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 388(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 392(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 396(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 400(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 404(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 408(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 412(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 416(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 420(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 424(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 428(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 432(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 436(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 440(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 444(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 448(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 452(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 456(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 460(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 464(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 468(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 472(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 476(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 480(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 484(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 488(%eax), %ebx
+; X86-NEXT: movl 492(%eax), %edi
+; X86-NEXT: movl 496(%eax), %esi
+; X86-NEXT: movl 500(%eax), %edx
+; X86-NEXT: movl 504(%eax), %ecx
+; X86-NEXT: movl 508(%eax), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $4032, %ecx # imm = 0xFC0
+; X86-NEXT: andl 12(%ebp), %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: shrl $3, %ecx
+; X86-NEXT: movl 496(%esp,%ecx), %eax
+; X86-NEXT: movl 500(%esp,%ecx), %edx
+; X86-NEXT: leal -12(%ebp), %esp
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
+; X86-NEXT: retl
+;
+; SSE-LABEL: extractSub4096_64:
+; SSE: # %bb.0:
+; SSE-NEXT: subq $1176, %rsp # imm = 0x498
+; SSE-NEXT: # kill: def $esi killed $esi def $rsi
+; SSE-NEXT: movups (%rdi), %xmm0
+; SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; SSE-NEXT: movups 16(%rdi), %xmm0
+; SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; SSE-NEXT: movups 32(%rdi), %xmm0
+; SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; SSE-NEXT: movups 48(%rdi), %xmm0
+; SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; SSE-NEXT: movups 64(%rdi), %xmm0
+; SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; SSE-NEXT: movups 80(%rdi), %xmm0
+; SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; SSE-NEXT: movups 96(%rdi), %xmm0
+; SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; SSE-NEXT: movups 112(%rdi), %xmm0
+; SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; SSE-NEXT: movups 128(%rdi), %xmm0
+; SSE-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
+; SSE-NEXT: movups 144(%rdi), %xmm0
+; SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; SSE-NEXT: movups 160(%rdi), %xmm0
+; SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; SSE-NEXT: movups 176(%rdi), %xmm0
+; SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; SSE-NEXT: movups 192(%rdi), %xmm0
+; SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; SSE-NEXT: movups 208(%rdi), %xmm0
+; SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; SSE-NEXT: movups 224(%rdi), %xmm0
+; SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; SSE-NEXT: movups 240(%rdi), %xmm0
+; SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; SSE-NEXT: movups 256(%rdi), %xmm0
+; SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; SSE-NEXT: movups 272(%rdi), %xmm15
+; SSE-NEXT: movups 288(%rdi), %xmm14
+; SSE-NEXT: movups 304(%rdi), %xmm13
+; SSE-NEXT: movups 320(%rdi), %xmm12
+; SSE-NEXT: movups 336(%rdi), %xmm11
+; SSE-NEXT: movups 352(%rdi), %xmm10
+; SSE-NEXT: movups 368(%rdi), %xmm9
+; SSE-NEXT: movups 384(%rdi), %xmm8
+; SSE-NEXT: movups 400(%rdi), %xmm7
+; SSE-NEXT: movups 416(%rdi), %xmm6
+; SSE-NEXT: movups 432(%rdi), %xmm5
+; SSE-NEXT: movups 448(%rdi), %xmm4
+; SSE-NEXT: movups 464(%rdi), %xmm3
+; SSE-NEXT: movups 480(%rdi), %xmm2
+; SSE-NEXT: movups 496(%rdi), %xmm1
+; SSE-NEXT: xorps %xmm0, %xmm0
+; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm2, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm3, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm4, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm5, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm6, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm7, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm8, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm9, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm10, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm11, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm12, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm13, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm14, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm15, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
+; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: andl $4032, %esi # imm = 0xFC0
+; SSE-NEXT: shrl $3, %esi
+; SSE-NEXT: movq 144(%rsp,%rsi), %rax
+; SSE-NEXT: addq $1176, %rsp # imm = 0x498
+; SSE-NEXT: retq
+;
+; AVX2-LABEL: extractSub4096_64:
+; AVX2: # %bb.0:
+; AVX2-NEXT: subq $936, %rsp # imm = 0x3A8
+; AVX2-NEXT: vmovups (%rdi), %ymm0
+; AVX2-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
+; AVX2-NEXT: vmovups 32(%rdi), %ymm1
+; AVX2-NEXT: vmovups 64(%rdi), %ymm2
+; AVX2-NEXT: vmovups 96(%rdi), %ymm3
+; AVX2-NEXT: vmovups 128(%rdi), %ymm4
+; AVX2-NEXT: vmovups 160(%rdi), %ymm5
+; AVX2-NEXT: vmovups 192(%rdi), %ymm6
+; AVX2-NEXT: vmovups 224(%rdi), %ymm7
+; AVX2-NEXT: vmovups 256(%rdi), %ymm8
+; AVX2-NEXT: vmovups 288(%rdi), %ymm9
+; AVX2-NEXT: vmovups 320(%rdi), %ymm10
+; AVX2-NEXT: vmovups 352(%rdi), %ymm11
+; AVX2-NEXT: vmovups 384(%rdi), %ymm12
+; AVX2-NEXT: vmovups 416(%rdi), %ymm13
+; AVX2-NEXT: vmovups 448(%rdi), %ymm14
+; AVX2-NEXT: vmovups 480(%rdi), %ymm15
+; AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX2-NEXT: vmovups %ymm15, {{[0-9]+}}(%rsp)
+; AVX2-NEXT: vmovups %ymm14, {{[0-9]+}}(%rsp)
+; AVX2-NEXT: vmovups %ymm13, {{[0-9]+}}(%rsp)
+; AVX2-NEXT: vmovups %ymm12, {{[0-9]+}}(%rsp)
+; AVX2-NEXT: vmovups %ymm11, {{[0-9]+}}(%rsp)
+; AVX2-NEXT: vmovups %ymm10, {{[0-9]+}}(%rsp)
+; AVX2-NEXT: vmovups %ymm9, {{[0-9]+}}(%rsp)
+; AVX2-NEXT: vmovups %ymm8, {{[0-9]+}}(%rsp)
+; AVX2-NEXT: vmovups %ymm7, {{[0-9]+}}(%rsp)
+; AVX2-NEXT: vmovups %ymm6, {{[0-9]+}}(%rsp)
+; AVX2-NEXT: vmovups %ymm5, {{[0-9]+}}(%rsp)
+; AVX2-NEXT: vmovups %ymm4, {{[0-9]+}}(%rsp)
+; AVX2-NEXT: vmovups %ymm3, (%rsp)
+; AVX2-NEXT: vmovups %ymm2, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
+; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT: # kill: def $esi killed $esi def $rsi
+; AVX2-NEXT: andl $4032, %esi # imm = 0xFC0
+; AVX2-NEXT: shrl $3, %esi
+; AVX2-NEXT: movq -96(%rsp,%rsi), %rax
+; AVX2-NEXT: addq $936, %rsp # imm = 0x3A8
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: extractSub4096_64:
+; AVX512: # %bb.0:
+; AVX512-NEXT: subq $904, %rsp # imm = 0x388
+; AVX512-NEXT: # kill: def $esi killed $esi def $rsi
+; AVX512-NEXT: vmovups (%rdi), %ymm0
+; AVX512-NEXT: vmovups 32(%rdi), %ymm1
+; AVX512-NEXT: vmovups 64(%rdi), %ymm2
+; AVX512-NEXT: vmovups 96(%rdi), %ymm3
+; AVX512-NEXT: vmovups 128(%rdi), %ymm4
+; AVX512-NEXT: vmovups 160(%rdi), %ymm5
+; AVX512-NEXT: vmovups 192(%rdi), %ymm6
+; AVX512-NEXT: vmovups 224(%rdi), %ymm7
+; AVX512-NEXT: vmovups 256(%rdi), %ymm8
+; AVX512-NEXT: vmovups 288(%rdi), %ymm9
+; AVX512-NEXT: vmovups 320(%rdi), %ymm10
+; AVX512-NEXT: vmovups 352(%rdi), %ymm11
+; AVX512-NEXT: vmovups 384(%rdi), %ymm12
+; AVX512-NEXT: vmovups 416(%rdi), %ymm13
+; AVX512-NEXT: andl $4032, %esi # imm = 0xFC0
+; AVX512-NEXT: vmovups 448(%rdi), %ymm14
+; AVX512-NEXT: vmovups 480(%rdi), %ymm15
+; AVX512-NEXT: vxorps %xmm16, %xmm16, %xmm16
+; AVX512-NEXT: vmovups %ymm16, {{[0-9]+}}(%rsp)
+; AVX512-NEXT: vmovups %ymm16, {{[0-9]+}}(%rsp)
+; AVX512-NEXT: vmovups %ymm16, {{[0-9]+}}(%rsp)
+; AVX512-NEXT: vmovups %ymm16, {{[0-9]+}}(%rsp)
+; AVX512-NEXT: vmovups %ymm16, {{[0-9]+}}(%rsp)
+; AVX512-NEXT: vmovups %ymm16, {{[0-9]+}}(%rsp)
+; AVX512-NEXT: vmovups %ymm16, {{[0-9]+}}(%rsp)
+; AVX512-NEXT: vmovups %ymm16, {{[0-9]+}}(%rsp)
+; AVX512-NEXT: vmovups %ymm16, {{[0-9]+}}(%rsp)
+; AVX512-NEXT: vmovups %ymm16, {{[0-9]+}}(%rsp)
+; AVX512-NEXT: vmovups %ymm16, {{[0-9]+}}(%rsp)
+; AVX512-NEXT: vmovups %ymm16, {{[0-9]+}}(%rsp)
+; AVX512-NEXT: vmovups %ymm16, {{[0-9]+}}(%rsp)
+; AVX512-NEXT: vmovups %ymm16, {{[0-9]+}}(%rsp)
+; AVX512-NEXT: vmovups %ymm16, {{[0-9]+}}(%rsp)
+; AVX512-NEXT: vmovups %ymm16, {{[0-9]+}}(%rsp)
+; AVX512-NEXT: vmovups %ymm15, {{[0-9]+}}(%rsp)
+; AVX512-NEXT: vmovups %ymm14, {{[0-9]+}}(%rsp)
+; AVX512-NEXT: vmovups %ymm13, {{[0-9]+}}(%rsp)
+; AVX512-NEXT: vmovups %ymm12, {{[0-9]+}}(%rsp)
+; AVX512-NEXT: vmovups %ymm11, {{[0-9]+}}(%rsp)
+; AVX512-NEXT: vmovups %ymm10, {{[0-9]+}}(%rsp)
+; AVX512-NEXT: vmovups %ymm9, {{[0-9]+}}(%rsp)
+; AVX512-NEXT: vmovups %ymm8, {{[0-9]+}}(%rsp)
+; AVX512-NEXT: vmovups %ymm7, {{[0-9]+}}(%rsp)
+; AVX512-NEXT: vmovups %ymm6, {{[0-9]+}}(%rsp)
+; AVX512-NEXT: vmovups %ymm5, {{[0-9]+}}(%rsp)
+; AVX512-NEXT: vmovups %ymm4, (%rsp)
+; AVX512-NEXT: vmovups %ymm3, -{{[0-9]+}}(%rsp)
+; AVX512-NEXT: vmovups %ymm2, -{{[0-9]+}}(%rsp)
+; AVX512-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp)
+; AVX512-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
+; AVX512-NEXT: shrl $3, %esi
+; AVX512-NEXT: movq -128(%rsp,%rsi), %rax
+; AVX512-NEXT: addq $904, %rsp # imm = 0x388
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
+ %idx_bounds = and i32 %idx, 4095
+ %idx_align = and i32 %idx_bounds, -64
+ %sh = zext nneg i32 %idx_align to i4096
+ %ld = load i4096, ptr %word, align 8
+ %sub = lshr i4096 %ld, %sh
+ %res = trunc i4096 %sub to i64
+ ret i64 %res
+}
diff --git a/llvm/test/DebugInfo/ARM/lowerbdgdeclare_vla.ll b/llvm/test/DebugInfo/ARM/lowerbdgdeclare_vla.ll
index 35b7b04..5affe55 100644
--- a/llvm/test/DebugInfo/ARM/lowerbdgdeclare_vla.ll
+++ b/llvm/test/DebugInfo/ARM/lowerbdgdeclare_vla.ll
@@ -60,7 +60,7 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
; Function Attrs: nounwind readnone
declare void @llvm.dbg.value(metadata, metadata, metadata) #1
-attributes #0 = { nounwind optsize readnone "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind optsize readnone "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
attributes #1 = { nounwind readnone }
!llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/ARM/multiple-constant-uses-drops-dbgloc.ll b/llvm/test/DebugInfo/ARM/multiple-constant-uses-drops-dbgloc.ll
index d21f1bf..4f9e67c 100644
--- a/llvm/test/DebugInfo/ARM/multiple-constant-uses-drops-dbgloc.ll
+++ b/llvm/test/DebugInfo/ARM/multiple-constant-uses-drops-dbgloc.ll
@@ -27,7 +27,7 @@ entry:
ret void, !dbg !19
}
-attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="cortex-a8" "target-features"="+neon,+vfp3" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="cortex-a8" "target-features"="+neon,+vfp3" "use-soft-float"="false" }
!llvm.dbg.cu = !{!2}
!llvm.module.flags = !{!8, !9, !10, !11}
diff --git a/llvm/test/DebugInfo/BPF/extern-void.ll b/llvm/test/DebugInfo/BPF/extern-void.ll
index 8fc8089..fb6700d1 100644
--- a/llvm/test/DebugInfo/BPF/extern-void.ll
+++ b/llvm/test/DebugInfo/BPF/extern-void.ll
@@ -42,7 +42,7 @@ entry:
; Function Attrs: nounwind readnone speculatable willreturn
declare void @llvm.dbg.value(metadata, metadata, metadata) #1
-attributes #0 = { norecurse nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { norecurse nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
attributes #1 = { nounwind readnone speculatable willreturn }
!llvm.dbg.cu = !{!2}
diff --git a/llvm/test/DebugInfo/COFF/array-odr-violation.ll b/llvm/test/DebugInfo/COFF/array-odr-violation.ll
index 0283515..f24c7d7 100644
--- a/llvm/test/DebugInfo/COFF/array-odr-violation.ll
+++ b/llvm/test/DebugInfo/COFF/array-odr-violation.ll
@@ -57,7 +57,7 @@ entry:
; Function Attrs: nounwind readnone
declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
-attributes #0 = { noinline nounwind sspstrong uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { noinline nounwind sspstrong uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
attributes #1 = { nounwind readnone }
!llvm.dbg.cu = !{!2, !11}
diff --git a/llvm/test/DebugInfo/COFF/asan-module-ctor.ll b/llvm/test/DebugInfo/COFF/asan-module-ctor.ll
index 49c30e9..bc991a4 100644
--- a/llvm/test/DebugInfo/COFF/asan-module-ctor.ll
+++ b/llvm/test/DebugInfo/COFF/asan-module-ctor.ll
@@ -76,7 +76,7 @@ declare void @__asan_register_globals(i32, i32)
declare void @__asan_unregister_globals(i32, i32)
-attributes #0 = { nounwind sanitize_address "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind sanitize_address "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!7, !8}
diff --git a/llvm/test/DebugInfo/COFF/asm.ll b/llvm/test/DebugInfo/COFF/asm.ll
index cf440bd..424ea96 100644
--- a/llvm/test/DebugInfo/COFF/asm.ll
+++ b/llvm/test/DebugInfo/COFF/asm.ll
@@ -145,8 +145,8 @@ entry:
declare void @g() #1
-attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
attributes #2 = { nounwind }
!llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/COFF/class-options-common.ll b/llvm/test/DebugInfo/COFF/class-options-common.ll
index 70071461..014c86c 100644
--- a/llvm/test/DebugInfo/COFF/class-options-common.ll
+++ b/llvm/test/DebugInfo/COFF/class-options-common.ll
@@ -840,7 +840,7 @@ entry:
ret void, !dbg !173
}
-attributes #0 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
attributes #1 = { nounwind readnone speculatable }
!llvm.dbg.cu = !{!2}
diff --git a/llvm/test/DebugInfo/COFF/comdat.ll b/llvm/test/DebugInfo/COFF/comdat.ll
index 841630c..8c97afb 100644
--- a/llvm/test/DebugInfo/COFF/comdat.ll
+++ b/llvm/test/DebugInfo/COFF/comdat.ll
@@ -129,11 +129,11 @@ declare i32 @__C_specific_handler(...)
; Function Attrs: nounwind readnone
declare void @llvm.dbg.value(metadata, metadata, metadata) #5
-attributes #0 = { norecurse nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #2 = { inlinehint noinline nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #3 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #4 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { norecurse nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
+attributes #1 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
+attributes #2 = { inlinehint noinline nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
+attributes #3 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
+attributes #4 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
attributes #5 = { nounwind readnone }
attributes #6 = { noinline }
attributes #7 = { nounwind }
diff --git a/llvm/test/DebugInfo/COFF/cpp-mangling.ll b/llvm/test/DebugInfo/COFF/cpp-mangling.ll
index 994f575..257c97a 100644
--- a/llvm/test/DebugInfo/COFF/cpp-mangling.ll
+++ b/llvm/test/DebugInfo/COFF/cpp-mangling.ll
@@ -72,7 +72,7 @@ entry:
ret void, !dbg !32
}
-attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
attributes #1 = { nounwind readnone }
!llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/COFF/defer-complete-type.ll b/llvm/test/DebugInfo/COFF/defer-complete-type.ll
index 012b234..6086092 100644
--- a/llvm/test/DebugInfo/COFF/defer-complete-type.ll
+++ b/llvm/test/DebugInfo/COFF/defer-complete-type.ll
@@ -172,7 +172,7 @@ entry:
; Function Attrs: nounwind readnone
declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
-attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
attributes #1 = { nounwind readnone }
!llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/COFF/enum-co.ll b/llvm/test/DebugInfo/COFF/enum-co.ll
index b5ce797..a334b35 100644
--- a/llvm/test/DebugInfo/COFF/enum-co.ll
+++ b/llvm/test/DebugInfo/COFF/enum-co.ll
@@ -125,7 +125,7 @@ entry:
ret void, !dbg !46
}
-attributes #0 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
attributes #1 = { nounwind readnone speculatable }
!llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/COFF/fpo-argsize.ll b/llvm/test/DebugInfo/COFF/fpo-argsize.ll
index 82eedd9..e4af66f 100644
--- a/llvm/test/DebugInfo/COFF/fpo-argsize.ll
+++ b/llvm/test/DebugInfo/COFF/fpo-argsize.ll
@@ -285,7 +285,7 @@ entry:
ret void, !dbg !159
}
-attributes #0 = { noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
attributes #1 = { nounwind readnone speculatable }
!llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/COFF/fpo-csrs.ll b/llvm/test/DebugInfo/COFF/fpo-csrs.ll
index c5b1dc9..8e99e1f 100644
--- a/llvm/test/DebugInfo/COFF/fpo-csrs.ll
+++ b/llvm/test/DebugInfo/COFF/fpo-csrs.ll
@@ -581,8 +581,8 @@ entry:
; Function Attrs: nounwind readnone speculatable
declare void @llvm.dbg.value(metadata, metadata, metadata) #2
-attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
+attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
attributes #2 = { nounwind readnone speculatable }
attributes #3 = { nounwind }
diff --git a/llvm/test/DebugInfo/COFF/fpo-funclet.ll b/llvm/test/DebugInfo/COFF/fpo-funclet.ll
index 302c4a2..260bdd2 100644
--- a/llvm/test/DebugInfo/COFF/fpo-funclet.ll
+++ b/llvm/test/DebugInfo/COFF/fpo-funclet.ll
@@ -58,8 +58,8 @@ declare void @"\01?g@@YAXXZ"() local_unnamed_addr #1
declare i32 @__CxxFrameHandler3(...)
-attributes #0 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
+attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!3, !4, !5, !6}
diff --git a/llvm/test/DebugInfo/COFF/fpo-realign-alloca.ll b/llvm/test/DebugInfo/COFF/fpo-realign-alloca.ll
index 5bd19a0..d6f45b1 100644
--- a/llvm/test/DebugInfo/COFF/fpo-realign-alloca.ll
+++ b/llvm/test/DebugInfo/COFF/fpo-realign-alloca.ll
@@ -76,9 +76,9 @@ declare void @llvm.lifetime.end.p0(i64, ptr nocapture) #1
; Function Attrs: nounwind readnone speculatable
declare void @llvm.dbg.value(metadata, metadata, metadata) #3
-attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
attributes #1 = { argmemonly nounwind }
-attributes #2 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
attributes #3 = { nounwind readnone speculatable }
attributes #4 = { nounwind }
diff --git a/llvm/test/DebugInfo/COFF/fpo-realign-vframe.ll b/llvm/test/DebugInfo/COFF/fpo-realign-vframe.ll
index 5f1d6a5..15626a8 100644
--- a/llvm/test/DebugInfo/COFF/fpo-realign-vframe.ll
+++ b/llvm/test/DebugInfo/COFF/fpo-realign-vframe.ll
@@ -191,10 +191,10 @@ declare dso_local void @usevals(ptr, ptr, ptr) local_unnamed_addr #3
; Function Attrs: argmemonly nounwind
declare void @llvm.lifetime.end.p0(i64, ptr nocapture) #2
-attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
attributes #1 = { nounwind readnone speculatable }
attributes #2 = { argmemonly nounwind }
-attributes #3 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #3 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
attributes #4 = { nounwind }
!llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/COFF/fpo-shrink-wrap.ll b/llvm/test/DebugInfo/COFF/fpo-shrink-wrap.ll
index 9c6fb8a..4b76ec6 100644
--- a/llvm/test/DebugInfo/COFF/fpo-shrink-wrap.ll
+++ b/llvm/test/DebugInfo/COFF/fpo-shrink-wrap.ll
@@ -126,8 +126,8 @@ declare i32 @doSomething(ptr) local_unnamed_addr #1
; Function Attrs: nounwind readnone speculatable
declare void @llvm.dbg.value(metadata, metadata, metadata) #2
-attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
+attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
attributes #2 = { nounwind readnone speculatable }
attributes #3 = { nounwind }
diff --git a/llvm/test/DebugInfo/COFF/fpo-stack-protect.ll b/llvm/test/DebugInfo/COFF/fpo-stack-protect.ll
index d0d7249..9fcb591 100644
--- a/llvm/test/DebugInfo/COFF/fpo-stack-protect.ll
+++ b/llvm/test/DebugInfo/COFF/fpo-stack-protect.ll
@@ -74,10 +74,10 @@ declare void @llvm.lifetime.end.p0(i64, ptr nocapture) #2
; Function Attrs: nounwind readnone speculatable
declare void @llvm.dbg.value(metadata, metadata, metadata) #1
-attributes #0 = { nounwind sspstrong "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind sspstrong "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
attributes #1 = { nounwind readnone speculatable }
attributes #2 = { argmemonly nounwind }
-attributes #3 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #3 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
attributes #4 = { nounwind }
!llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/COFF/frameproc-flags.ll b/llvm/test/DebugInfo/COFF/frameproc-flags.ll
index 9054189..774e586 100644
--- a/llvm/test/DebugInfo/COFF/frameproc-flags.ll
+++ b/llvm/test/DebugInfo/COFF/frameproc-flags.ll
@@ -272,18 +272,18 @@ declare void @llvm.memset.p0.i32(ptr nocapture writeonly, i8, i32, i1) #2
; Function Attrs: nounwind readnone speculatable
declare void @llvm.dbg.value(metadata, metadata, metadata) #1
-attributes #0 = { sspstrong "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { sspstrong "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
attributes #1 = { nounwind readnone speculatable }
attributes #2 = { argmemonly nounwind }
-attributes #3 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #3 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
attributes #4 = { returns_twice }
-attributes #5 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #6 = { norecurse nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #7 = { inlinehint nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #8 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #5 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
+attributes #6 = { norecurse nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
+attributes #7 = { inlinehint nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
+attributes #8 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
attributes #9 = { nounwind readnone }
attributes #10 = { nounwind }
-attributes #11 = { naked noinline nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #11 = { naked noinline nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
attributes #12 = { noinline }
!llvm.dbg.cu = !{!2}
diff --git a/llvm/test/DebugInfo/COFF/function-options.ll b/llvm/test/DebugInfo/COFF/function-options.ll
index c54c4d8..1cac141 100644
--- a/llvm/test/DebugInfo/COFF/function-options.ll
+++ b/llvm/test/DebugInfo/COFF/function-options.ll
@@ -625,9 +625,9 @@ entry:
ret i32 0, !dbg !118
}
-attributes #0 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
attributes #1 = { nounwind readnone speculatable willreturn }
-attributes #2 = { noinline norecurse nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { noinline norecurse nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!3, !4, !5, !6}
diff --git a/llvm/test/DebugInfo/COFF/global-constants.ll b/llvm/test/DebugInfo/COFF/global-constants.ll
index c0e6afb..28fe071 100644
--- a/llvm/test/DebugInfo/COFF/global-constants.ll
+++ b/llvm/test/DebugInfo/COFF/global-constants.ll
@@ -91,8 +91,8 @@ entry:
declare dso_local void @"?useConst@@YAXH@Z"(i32) #1
-attributes #0 = { noinline optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { noinline optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
+attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!26, !27, !28, !29}
diff --git a/llvm/test/DebugInfo/COFF/global_visibility.ll b/llvm/test/DebugInfo/COFF/global_visibility.ll
index 8da374d..499690a85 100644
--- a/llvm/test/DebugInfo/COFF/global_visibility.ll
+++ b/llvm/test/DebugInfo/COFF/global_visibility.ll
@@ -195,8 +195,8 @@ entry:
; Function Attrs: nounwind readnone speculatable willreturn
declare void @llvm.dbg.declare(metadata, metadata, metadata) #2
-attributes #0 = { noinline optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { noinline optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
+attributes #1 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
attributes #2 = { nounwind readnone speculatable willreturn }
!llvm.dbg.cu = !{!2}
diff --git a/llvm/test/DebugInfo/COFF/globals.ll b/llvm/test/DebugInfo/COFF/globals.ll
index 2724b99..9df4435 100644
--- a/llvm/test/DebugInfo/COFF/globals.ll
+++ b/llvm/test/DebugInfo/COFF/globals.ll
@@ -402,7 +402,7 @@ entry:
ret ptr %this1, !dbg !83
}
-attributes #0 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
attributes #1 = { nounwind readnone speculatable willreturn }
attributes #2 = { nounwind }
diff --git a/llvm/test/DebugInfo/COFF/inheritance.ll b/llvm/test/DebugInfo/COFF/inheritance.ll
index e31d6ef..ae2f7f7 100644
--- a/llvm/test/DebugInfo/COFF/inheritance.ll
+++ b/llvm/test/DebugInfo/COFF/inheritance.ll
@@ -123,8 +123,8 @@ entry:
; Function Attrs: nounwind readnone speculatable
declare void @llvm.dbg.value(metadata, metadata, metadata) #2
-attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
+attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
attributes #2 = { nounwind readnone speculatable }
!llvm.dbg.cu = !{!2}
diff --git a/llvm/test/DebugInfo/COFF/inlining-files.ll b/llvm/test/DebugInfo/COFF/inlining-files.ll
index 37edc6b..febd2a0 100644
--- a/llvm/test/DebugInfo/COFF/inlining-files.ll
+++ b/llvm/test/DebugInfo/COFF/inlining-files.ll
@@ -77,7 +77,7 @@ entry:
ret void, !dbg !30
}
-attributes #0 = { norecurse nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { norecurse nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-features"="+fxsr,+mmx,+sse,+sse2" "use-soft-float"="false" }
!llvm.dbg.cu = !{!2}
!llvm.module.flags = !{!8, !9, !10}
diff --git a/llvm/test/DebugInfo/COFF/inlining-header.ll b/llvm/test/DebugInfo/COFF/inlining-header.ll
index 9a8200c..8580faa 100644
--- a/llvm/test/DebugInfo/COFF/inlining-header.ll
+++ b/llvm/test/DebugInfo/COFF/inlining-header.ll
@@ -137,7 +137,7 @@ entry:
ret i32 %5, !dbg !32
}
-attributes #0 = { norecurse nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "stack-protector-buffer-size"="8" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { norecurse nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "stack-protector-buffer-size"="8" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
!llvm.dbg.cu = !{!2}
!llvm.module.flags = !{!8, !9, !10}
diff --git a/llvm/test/DebugInfo/COFF/inlining-levels.ll b/llvm/test/DebugInfo/COFF/inlining-levels.ll
index 8af1251..473b2df 100644
--- a/llvm/test/DebugInfo/COFF/inlining-levels.ll
+++ b/llvm/test/DebugInfo/COFF/inlining-levels.ll
@@ -59,7 +59,7 @@ entry:
ret i32 0, !dbg !28
}
-attributes #0 = { norecurse nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { norecurse nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "use-soft-float"="false" }
!llvm.dbg.cu = !{!2}
!llvm.module.flags = !{!8, !9, !10}
diff --git a/llvm/test/DebugInfo/COFF/inlining-padding.ll b/llvm/test/DebugInfo/COFF/inlining-padding.ll
index 75254b3..eeba289 100644
--- a/llvm/test/DebugInfo/COFF/inlining-padding.ll
+++ b/llvm/test/DebugInfo/COFF/inlining-padding.ll
@@ -73,7 +73,7 @@ entry:
ret i32 0, !dbg !29
}
-attributes #0 = { norecurse nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-features"="+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { norecurse nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-features"="+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!3, !4}
diff --git a/llvm/test/DebugInfo/COFF/inlining.ll b/llvm/test/DebugInfo/COFF/inlining.ll
index 6953abc..901f53c 100644
--- a/llvm/test/DebugInfo/COFF/inlining.ll
+++ b/llvm/test/DebugInfo/COFF/inlining.ll
@@ -248,7 +248,7 @@ declare void @llvm.lifetime.start(i64, ptr nocapture) #1
; Function Attrs: argmemonly nounwind
declare void @llvm.lifetime.end(i64, ptr nocapture) #1
-attributes #0 = { norecurse nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { norecurse nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "use-soft-float"="false" }
attributes #1 = { argmemonly nounwind }
attributes #2 = { nounwind }
diff --git a/llvm/test/DebugInfo/COFF/lambda.ll b/llvm/test/DebugInfo/COFF/lambda.ll
index ad7f954..7cd782d 100644
--- a/llvm/test/DebugInfo/COFF/lambda.ll
+++ b/llvm/test/DebugInfo/COFF/lambda.ll
@@ -103,9 +103,9 @@ entry:
ret i32 %cond, !dbg !32
}
-attributes #0 = { noinline norecurse optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { noinline norecurse optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
attributes #1 = { nounwind readnone speculatable }
-attributes #2 = { noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!3, !4, !5, !6}
diff --git a/llvm/test/DebugInfo/COFF/lexicalblock.ll b/llvm/test/DebugInfo/COFF/lexicalblock.ll
index 40dd8f8..efcd61f 100644
--- a/llvm/test/DebugInfo/COFF/lexicalblock.ll
+++ b/llvm/test/DebugInfo/COFF/lexicalblock.ll
@@ -199,7 +199,7 @@ declare i32 @llvm.expect.i32(i32, i32) #3
; Function Attrs: argmemonly nounwind
declare void @llvm.lifetime.end.p0(i64, ptr nocapture) #2
-attributes #0 = { norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-features"="+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-features"="+x87" "use-soft-float"="false" }
attributes #1 = { nounwind readnone speculatable }
attributes #2 = { argmemonly nounwind }
attributes #3 = { nounwind readnone }
diff --git a/llvm/test/DebugInfo/COFF/lines-difile.ll b/llvm/test/DebugInfo/COFF/lines-difile.ll
index 4c8e50b..973a2af 100644
--- a/llvm/test/DebugInfo/COFF/lines-difile.ll
+++ b/llvm/test/DebugInfo/COFF/lines-difile.ll
@@ -68,9 +68,9 @@ entry:
ret void
}
-attributes #0 = { noinline uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #2 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { noinline uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
+attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
+attributes #2 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
!llvm.dbg.cu = !{!2}
!llvm.module.flags = !{!13, !14, !15, !16}
diff --git a/llvm/test/DebugInfo/COFF/local-constant.ll b/llvm/test/DebugInfo/COFF/local-constant.ll
index cf306e7..a4f4c25 100644
--- a/llvm/test/DebugInfo/COFF/local-constant.ll
+++ b/llvm/test/DebugInfo/COFF/local-constant.ll
@@ -39,8 +39,8 @@ declare void @"\01?useint@@YAXH@Z"(i32) #1
; Function Attrs: nounwind readnone
declare void @llvm.dbg.value(metadata, metadata, metadata) #2
-attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "use-soft-float"="false" }
+attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "use-soft-float"="false" }
attributes #2 = { nounwind readnone }
attributes #3 = { nounwind }
diff --git a/llvm/test/DebugInfo/COFF/local-variable-gap.ll b/llvm/test/DebugInfo/COFF/local-variable-gap.ll
index bc1c313..ea7c9fd 100644
--- a/llvm/test/DebugInfo/COFF/local-variable-gap.ll
+++ b/llvm/test/DebugInfo/COFF/local-variable-gap.ll
@@ -133,9 +133,9 @@ declare void @use(i32) local_unnamed_addr #1
; Function Attrs: nounwind readnone
declare void @llvm.dbg.value(metadata, metadata, metadata) #3
-attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #2 = { noreturn "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
+attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
+attributes #2 = { noreturn "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
attributes #3 = { nounwind readnone }
attributes #4 = { nounwind }
attributes #5 = { noreturn nounwind }
diff --git a/llvm/test/DebugInfo/COFF/local-variables.ll b/llvm/test/DebugInfo/COFF/local-variables.ll
index 820f6bd..efcf5c3 100644
--- a/llvm/test/DebugInfo/COFF/local-variables.ll
+++ b/llvm/test/DebugInfo/COFF/local-variables.ll
@@ -250,9 +250,9 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
declare void @capture(ptr) #2
-attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "use-soft-float"="false" }
attributes #1 = { nounwind readnone }
-attributes #2 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "use-soft-float"="false" }
attributes #3 = { nounwind }
!llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/COFF/long-name.ll b/llvm/test/DebugInfo/COFF/long-name.ll
index 4ea5b43..0c3347a 100644
--- a/llvm/test/DebugInfo/COFF/long-name.ll
+++ b/llvm/test/DebugInfo/COFF/long-name.ll
@@ -36,7 +36,7 @@ entry:
ret void, !dbg !10
}
-attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!3, !4, !5}
diff --git a/llvm/test/DebugInfo/COFF/multifile.ll b/llvm/test/DebugInfo/COFF/multifile.ll
index 5123b1d..5f54db3 100644
--- a/llvm/test/DebugInfo/COFF/multifile.ll
+++ b/llvm/test/DebugInfo/COFF/multifile.ll
@@ -215,8 +215,8 @@ entry:
declare void @g() #1
-attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!9, !10}
diff --git a/llvm/test/DebugInfo/COFF/multifunction.ll b/llvm/test/DebugInfo/COFF/multifunction.ll
index 04c6436..2b753f7 100644
--- a/llvm/test/DebugInfo/COFF/multifunction.ll
+++ b/llvm/test/DebugInfo/COFF/multifunction.ll
@@ -680,8 +680,8 @@ entry:
ret void, !dbg !21
}
-attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!11, !12}
diff --git a/llvm/test/DebugInfo/COFF/nrvo.ll b/llvm/test/DebugInfo/COFF/nrvo.ll
index db16c11..6e25477 100644
--- a/llvm/test/DebugInfo/COFF/nrvo.ll
+++ b/llvm/test/DebugInfo/COFF/nrvo.ll
@@ -96,9 +96,9 @@ entry:
ret i32 %0, !dbg !38
}
-attributes #0 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
attributes #1 = { nounwind readnone speculatable }
-attributes #2 = { noinline norecurse nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { noinline norecurse nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!3, !4, !5, !6}
diff --git a/llvm/test/DebugInfo/COFF/parameter-order.ll b/llvm/test/DebugInfo/COFF/parameter-order.ll
index 23224aa..d240d61 100644
--- a/llvm/test/DebugInfo/COFF/parameter-order.ll
+++ b/llvm/test/DebugInfo/COFF/parameter-order.ll
@@ -81,7 +81,7 @@ entry:
; Function Attrs: nounwind readnone
declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
-attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
attributes #1 = { nounwind readnone }
!llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/COFF/parent-type-scopes.ll b/llvm/test/DebugInfo/COFF/parent-type-scopes.ll
index 95c7a55..727b7e4 100644
--- a/llvm/test/DebugInfo/COFF/parent-type-scopes.ll
+++ b/llvm/test/DebugInfo/COFF/parent-type-scopes.ll
@@ -74,7 +74,7 @@ entry:
; Function Attrs: nounwind readnone speculatable willreturn
declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
-attributes #0 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
attributes #1 = { nounwind readnone speculatable willreturn }
!llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/COFF/pieces.ll b/llvm/test/DebugInfo/COFF/pieces.ll
index 8e62ad0..88c3b7d 100644
--- a/llvm/test/DebugInfo/COFF/pieces.ll
+++ b/llvm/test/DebugInfo/COFF/pieces.ll
@@ -336,11 +336,11 @@ entry:
; Function Attrs: nounwind readnone
declare void @llvm.dbg.value(metadata, metadata, metadata) #1
-attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
attributes #1 = { nounwind readnone }
-attributes #2 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #3 = { nounwind readnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #4 = { nounwind readonly uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
+attributes #3 = { nounwind readnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
+attributes #4 = { nounwind readonly uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
attributes #5 = { nounwind }
!llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/COFF/purge-typedef-udts.ll b/llvm/test/DebugInfo/COFF/purge-typedef-udts.ll
index 8118e0c..8da583b 100644
--- a/llvm/test/DebugInfo/COFF/purge-typedef-udts.ll
+++ b/llvm/test/DebugInfo/COFF/purge-typedef-udts.ll
@@ -60,7 +60,7 @@ entry:
ret i32 %call, !dbg !48
}
-attributes #0 = { noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
attributes #1 = { nounwind readnone speculatable }
!llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/COFF/register-variables.ll b/llvm/test/DebugInfo/COFF/register-variables.ll
index 84c036c..c0c0cf4 100644
--- a/llvm/test/DebugInfo/COFF/register-variables.ll
+++ b/llvm/test/DebugInfo/COFF/register-variables.ll
@@ -209,8 +209,8 @@ declare void @putint(i32) #1
; Function Attrs: nounwind readnone
declare void @llvm.dbg.value(metadata, metadata, metadata) #2
-attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "tune-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "tune-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "use-soft-float"="false" }
+attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "use-soft-float"="false" }
attributes #2 = { nounwind readnone }
attributes #3 = { nounwind }
diff --git a/llvm/test/DebugInfo/COFF/retained-types.ll b/llvm/test/DebugInfo/COFF/retained-types.ll
index 799ae2b..7c8654f 100644
--- a/llvm/test/DebugInfo/COFF/retained-types.ll
+++ b/llvm/test/DebugInfo/COFF/retained-types.ll
@@ -62,7 +62,7 @@ entry:
; Function Attrs: nounwind readnone
declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
-attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
attributes #1 = { nounwind readnone }
!llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/COFF/scopes.ll b/llvm/test/DebugInfo/COFF/scopes.ll
index b33e848..b1e27a4 100644
--- a/llvm/test/DebugInfo/COFF/scopes.ll
+++ b/llvm/test/DebugInfo/COFF/scopes.ll
@@ -102,7 +102,7 @@ entry:
ret void, !dbg !34
}
-attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
attributes #1 = { nounwind readnone }
!llvm.dbg.cu = !{!13}
diff --git a/llvm/test/DebugInfo/COFF/simple.ll b/llvm/test/DebugInfo/COFF/simple.ll
index f81de98..d450bd0 100644
--- a/llvm/test/DebugInfo/COFF/simple.ll
+++ b/llvm/test/DebugInfo/COFF/simple.ll
@@ -284,8 +284,8 @@ entry:
declare void @g() #1
-attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!9, !10}
diff --git a/llvm/test/DebugInfo/COFF/static-methods.ll b/llvm/test/DebugInfo/COFF/static-methods.ll
index 4ec6aa6..f0c7e89 100644
--- a/llvm/test/DebugInfo/COFF/static-methods.ll
+++ b/llvm/test/DebugInfo/COFF/static-methods.ll
@@ -98,8 +98,8 @@ entry:
ret void
}
-attributes #0 = { noinline "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { nobuiltin "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { noinline "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
+attributes #1 = { nobuiltin "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
attributes #2 = { builtin }
!llvm.dbg.cu = !{!2}
diff --git a/llvm/test/DebugInfo/COFF/tail-call-without-lexical-scopes.ll b/llvm/test/DebugInfo/COFF/tail-call-without-lexical-scopes.ll
index 896e760..a298d0b 100644
--- a/llvm/test/DebugInfo/COFF/tail-call-without-lexical-scopes.ll
+++ b/llvm/test/DebugInfo/COFF/tail-call-without-lexical-scopes.ll
@@ -50,8 +50,8 @@ entry:
declare void @"\01?foo@@YAXXZ"() #1
-attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
attributes #2 = { nounwind }
!llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/COFF/thunk.ll b/llvm/test/DebugInfo/COFF/thunk.ll
index 18fbeac..7ab96c8 100644
--- a/llvm/test/DebugInfo/COFF/thunk.ll
+++ b/llvm/test/DebugInfo/COFF/thunk.ll
@@ -469,13 +469,13 @@ entry:
ret i1 true, !dbg !102
}
-attributes #0 = { noinline norecurse optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { noinline norecurse optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
attributes #1 = { nounwind readnone speculatable }
-attributes #2 = { nobuiltin "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nobuiltin "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
attributes #3 = { argmemonly nounwind }
-attributes #4 = { noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #5 = { noinline optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "thunk" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #6 = { noinline optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #4 = { noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
+attributes #5 = { noinline optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "thunk" "use-soft-float"="false" }
+attributes #6 = { noinline optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
attributes #7 = { builtin }
attributes #8 = { nounwind }
diff --git a/llvm/test/DebugInfo/COFF/type-quals.ll b/llvm/test/DebugInfo/COFF/type-quals.ll
index de36545..bca2590 100644
--- a/llvm/test/DebugInfo/COFF/type-quals.ll
+++ b/llvm/test/DebugInfo/COFF/type-quals.ll
@@ -477,7 +477,7 @@ entry:
ret i32 1, !dbg !86
}
-attributes #0 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
attributes #1 = { nounwind readnone speculatable }
attributes #2 = { argmemonly nounwind }
diff --git a/llvm/test/DebugInfo/COFF/types-array.ll b/llvm/test/DebugInfo/COFF/types-array.ll
index 5dec93d..a197aeb 100644
--- a/llvm/test/DebugInfo/COFF/types-array.ll
+++ b/llvm/test/DebugInfo/COFF/types-array.ll
@@ -108,10 +108,10 @@ declare void @llvm.memcpy.p0.p0.i32(ptr nocapture, ptr nocapture readonly, i32,
declare void @"\01?usevars@@YAXHZZ"(i32, ...) #3
-attributes #0 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
attributes #1 = { nounwind readnone }
attributes #2 = { argmemonly nounwind }
-attributes #3 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #3 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!3, !4}
diff --git a/llvm/test/DebugInfo/COFF/types-basic.ll b/llvm/test/DebugInfo/COFF/types-basic.ll
index 897c632..80be1d6 100644
--- a/llvm/test/DebugInfo/COFF/types-basic.ll
+++ b/llvm/test/DebugInfo/COFF/types-basic.ll
@@ -626,10 +626,10 @@ entry:
ret void, !dbg !96
}
-attributes #0 = { uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
attributes #1 = { nounwind readnone }
-attributes #2 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #3 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
+attributes #3 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!3, !4, !5}
diff --git a/llvm/test/DebugInfo/COFF/types-calling-conv.ll b/llvm/test/DebugInfo/COFF/types-calling-conv.ll
index 7e51890..1cbebd3 100644
--- a/llvm/test/DebugInfo/COFF/types-calling-conv.ll
+++ b/llvm/test/DebugInfo/COFF/types-calling-conv.ll
@@ -208,8 +208,8 @@ entry:
; Function Attrs: nounwind readnone
declare void @llvm.dbg.value(metadata, metadata, metadata) #2
-attributes #0 = { nounwind readnone "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { norecurse nounwind readnone "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind readnone "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
+attributes #1 = { norecurse nounwind readnone "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
attributes #2 = { nounwind readnone }
!llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/COFF/types-cvarargs.ll b/llvm/test/DebugInfo/COFF/types-cvarargs.ll
index 95a747f..def9d01 100644
--- a/llvm/test/DebugInfo/COFF/types-cvarargs.ll
+++ b/llvm/test/DebugInfo/COFF/types-cvarargs.ll
@@ -73,9 +73,9 @@ entry:
ret i32 1, !dbg !35
}
-attributes #0 = { noinline optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { noinline optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
attributes #1 = { nounwind readnone speculatable }
-attributes #2 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
!llvm.dbg.cu = !{!2}
!llvm.module.flags = !{!12, !13, !14, !15}
diff --git a/llvm/test/DebugInfo/COFF/types-data-members.ll b/llvm/test/DebugInfo/COFF/types-data-members.ll
index af0af47..2c980f5 100644
--- a/llvm/test/DebugInfo/COFF/types-data-members.ll
+++ b/llvm/test/DebugInfo/COFF/types-data-members.ll
@@ -816,9 +816,9 @@ ctor.skip_vbases: ; preds = %ctor.init_vbases, %
ret ptr %5, !dbg !62
}
-attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
attributes #1 = { nounwind readnone }
-attributes #2 = { inlinehint nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { inlinehint nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
attributes #3 = { nounwind }
!llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/COFF/types-method-ref-qualifiers.ll b/llvm/test/DebugInfo/COFF/types-method-ref-qualifiers.ll
index f931b46..0ba2dfe 100644
--- a/llvm/test/DebugInfo/COFF/types-method-ref-qualifiers.ll
+++ b/llvm/test/DebugInfo/COFF/types-method-ref-qualifiers.ll
@@ -41,7 +41,7 @@ entry:
; Function Attrs: nounwind readnone speculatable
declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
-attributes #0 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
attributes #1 = { nounwind readnone speculatable }
!llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/COFF/types-recursive-struct.ll b/llvm/test/DebugInfo/COFF/types-recursive-struct.ll
index bd4faf0..8f5e0ba 100644
--- a/llvm/test/DebugInfo/COFF/types-recursive-struct.ll
+++ b/llvm/test/DebugInfo/COFF/types-recursive-struct.ll
@@ -157,7 +157,7 @@ entry:
; Function Attrs: nounwind readnone
declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
-attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
attributes #1 = { nounwind readnone }
!llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/COFF/types-recursive-unnamed.ll b/llvm/test/DebugInfo/COFF/types-recursive-unnamed.ll
index 3627253..98ade40 100644
--- a/llvm/test/DebugInfo/COFF/types-recursive-unnamed.ll
+++ b/llvm/test/DebugInfo/COFF/types-recursive-unnamed.ll
@@ -43,7 +43,7 @@ entry:
; Function Attrs: nounwind readnone
declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
-attributes #0 = { noinline nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { noinline nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
attributes #1 = { nounwind readnone }
!llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/COFF/udts.ll b/llvm/test/DebugInfo/COFF/udts.ll
index 86d2c7a..7a3c351 100644
--- a/llvm/test/DebugInfo/COFF/udts.ll
+++ b/llvm/test/DebugInfo/COFF/udts.ll
@@ -124,7 +124,7 @@ define float @"\01?g@@YAMPEAUS@@@Z"(ptr) #0 !dbg !38 {
ret float %6, !dbg !56
}
-attributes #0 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
attributes #1 = { nounwind readnone speculatable }
!llvm.dbg.cu = !{!2}
diff --git a/llvm/test/DebugInfo/COFF/unnamed.ll b/llvm/test/DebugInfo/COFF/unnamed.ll
index 06ef86a..396de7a 100644
--- a/llvm/test/DebugInfo/COFF/unnamed.ll
+++ b/llvm/test/DebugInfo/COFF/unnamed.ll
@@ -153,7 +153,7 @@ entry:
; Function Attrs: nounwind readnone
declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
-attributes #0 = { noinline nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { noinline nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
attributes #1 = { nounwind readnone }
!llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/COFF/vframe-csr.ll b/llvm/test/DebugInfo/COFF/vframe-csr.ll
index 866b5e6..f46965a 100644
--- a/llvm/test/DebugInfo/COFF/vframe-csr.ll
+++ b/llvm/test/DebugInfo/COFF/vframe-csr.ll
@@ -136,10 +136,10 @@ declare dso_local void @usecsrs(i32, i32) local_unnamed_addr #3
; Function Attrs: argmemonly nounwind
declare void @llvm.lifetime.end.p0(i64, ptr nocapture) #2
-attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
attributes #1 = { nounwind readnone speculatable }
attributes #2 = { argmemonly nounwind }
-attributes #3 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #3 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
attributes #4 = { nounwind }
!llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/COFF/vframe-fpo.ll b/llvm/test/DebugInfo/COFF/vframe-fpo.ll
index e8ad8bd..146056f 100644
--- a/llvm/test/DebugInfo/COFF/vframe-fpo.ll
+++ b/llvm/test/DebugInfo/COFF/vframe-fpo.ll
@@ -224,10 +224,10 @@ declare dso_local void @"?g@@YAXAAH00@Z"(ptr dereferenceable(4), ptr dereference
; Function Attrs: argmemonly nounwind
declare void @llvm.lifetime.end.p0(i64, ptr nocapture) #1
-attributes #0 = { norecurse optsize "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { norecurse optsize "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
attributes #1 = { argmemonly nounwind }
attributes #2 = { nounwind readnone speculatable }
-attributes #3 = { optsize "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #3 = { optsize "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
attributes #4 = { nounwind }
attributes #5 = { optsize }
diff --git a/llvm/test/DebugInfo/COFF/vftables.ll b/llvm/test/DebugInfo/COFF/vftables.ll
index 1f8db17..b93fefa 100644
--- a/llvm/test/DebugInfo/COFF/vftables.ll
+++ b/llvm/test/DebugInfo/COFF/vftables.ll
@@ -414,11 +414,11 @@ entry:
declare void @"\01?f@D@@UEAAXXZ"(ptr) unnamed_addr #3
-attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
attributes #1 = { nounwind readnone }
-attributes #2 = { inlinehint nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #3 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #4 = { uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { inlinehint nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
+attributes #3 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
+attributes #4 = { uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
attributes #5 = { nounwind }
!llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/COFF/virtual-method-kinds.ll b/llvm/test/DebugInfo/COFF/virtual-method-kinds.ll
index 5180bae..daa4b9c 100644
--- a/llvm/test/DebugInfo/COFF/virtual-method-kinds.ll
+++ b/llvm/test/DebugInfo/COFF/virtual-method-kinds.ll
@@ -217,11 +217,11 @@ entry:
ret void
}
-attributes #0 = { uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { nobuiltin "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #2 = { inlinehint nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
+attributes #1 = { nobuiltin "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
+attributes #2 = { inlinehint nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
attributes #3 = { nounwind readnone }
-attributes #4 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #4 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
attributes #5 = { builtin }
attributes #6 = { nounwind }
diff --git a/llvm/test/DebugInfo/COFF/virtual-methods.ll b/llvm/test/DebugInfo/COFF/virtual-methods.ll
index 8f06912..930f96d 100644
--- a/llvm/test/DebugInfo/COFF/virtual-methods.ll
+++ b/llvm/test/DebugInfo/COFF/virtual-methods.ll
@@ -328,11 +328,11 @@ entry:
ret i32 %2, !dbg !102
}
-attributes #0 = { uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
attributes #1 = { nounwind readnone }
-attributes #2 = { inlinehint nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #3 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #4 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { inlinehint nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
+attributes #3 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
+attributes #4 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
attributes #5 = { nounwind }
!llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/COFF/vtable-optzn-array.ll b/llvm/test/DebugInfo/COFF/vtable-optzn-array.ll
index eb92d6c..718033f 100644
--- a/llvm/test/DebugInfo/COFF/vtable-optzn-array.ll
+++ b/llvm/test/DebugInfo/COFF/vtable-optzn-array.ll
@@ -89,8 +89,8 @@ entry:
ret void
}
-attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-features"="+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-features"="+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-features"="+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
+attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-features"="+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
attributes #2 = { nounwind }
attributes #3 = { nounwind readnone }
diff --git a/llvm/test/DebugInfo/Generic/PR20038.ll b/llvm/test/DebugInfo/Generic/PR20038.ll
index 024a6ab..d4c3f85 100644
--- a/llvm/test/DebugInfo/Generic/PR20038.ll
+++ b/llvm/test/DebugInfo/Generic/PR20038.ll
@@ -109,8 +109,8 @@ entry:
; Function Attrs: nounwind readnone
declare void @llvm.dbg.declare(metadata, metadata, metadata) #2
-attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { alwaysinline nounwind "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
+attributes #1 = { alwaysinline nounwind "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
attributes #2 = { nounwind readnone }
!llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/Generic/block-asan.ll b/llvm/test/DebugInfo/Generic/block-asan.ll
index db49289..58b20e2 100644
--- a/llvm/test/DebugInfo/Generic/block-asan.ll
+++ b/llvm/test/DebugInfo/Generic/block-asan.ll
@@ -47,9 +47,9 @@ declare void @bar(i32) #2
declare void @_Block_object_dispose(ptr, i32)
-attributes #0 = { nounwind ssp uwtable sanitize_address "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind ssp uwtable sanitize_address "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
attributes #1 = { nounwind readnone }
-attributes #2 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
attributes #3 = { nounwind }
!llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/Generic/constant-pointers.ll b/llvm/test/DebugInfo/Generic/constant-pointers.ll
index 1e69109..772b29b 100644
--- a/llvm/test/DebugInfo/Generic/constant-pointers.ll
+++ b/llvm/test/DebugInfo/Generic/constant-pointers.ll
@@ -22,7 +22,7 @@ entry:
ret void, !dbg !18
}
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!15, !16}
diff --git a/llvm/test/DebugInfo/Generic/cross-cu-inlining.ll b/llvm/test/DebugInfo/Generic/cross-cu-inlining.ll
index 91b481c..3922d46 100644
--- a/llvm/test/DebugInfo/Generic/cross-cu-inlining.ll
+++ b/llvm/test/DebugInfo/Generic/cross-cu-inlining.ll
@@ -101,8 +101,8 @@ declare void @llvm.lifetime.start(i64, ptr nocapture) #3
; Function Attrs: nounwind
declare void @llvm.lifetime.end(i64, ptr nocapture) #3
-attributes #0 = { uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { alwaysinline nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
+attributes #1 = { alwaysinline nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
attributes #2 = { nounwind readnone }
attributes #3 = { nounwind }
diff --git a/llvm/test/DebugInfo/Generic/cross-cu-linkonce.ll b/llvm/test/DebugInfo/Generic/cross-cu-linkonce.ll
index 7030157..023c34b 100644
--- a/llvm/test/DebugInfo/Generic/cross-cu-linkonce.ll
+++ b/llvm/test/DebugInfo/Generic/cross-cu-linkonce.ll
@@ -41,7 +41,7 @@ define linkonce_odr i32 @_Z4funci(i32 %i) #0 !dbg !19 {
; Function Attrs: nounwind readnone
declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
-attributes #0 = { inlinehint nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { inlinehint nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
attributes #1 = { nounwind readnone }
!llvm.dbg.cu = !{!9, !13}
diff --git a/llvm/test/DebugInfo/Generic/cu-range-hole.ll b/llvm/test/DebugInfo/Generic/cu-range-hole.ll
index 4a6a753..397cfcc 100644
--- a/llvm/test/DebugInfo/Generic/cu-range-hole.ll
+++ b/llvm/test/DebugInfo/Generic/cu-range-hole.ll
@@ -47,7 +47,7 @@ entry:
ret i32 %add, !dbg !16
}
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
attributes #1 = { nounwind readnone }
!llvm.ident = !{!0, !0}
diff --git a/llvm/test/DebugInfo/Generic/cu-ranges.ll b/llvm/test/DebugInfo/Generic/cu-ranges.ll
index b962bce..dbb50d4 100644
--- a/llvm/test/DebugInfo/Generic/cu-ranges.ll
+++ b/llvm/test/DebugInfo/Generic/cu-ranges.ll
@@ -41,7 +41,7 @@ entry:
ret i32 %add, !dbg !18
}
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
attributes #1 = { nounwind readnone }
!llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/Generic/dead-argument-order.ll b/llvm/test/DebugInfo/Generic/dead-argument-order.ll
index f6cd809..b4b4cfd 100644
--- a/llvm/test/DebugInfo/Generic/dead-argument-order.ll
+++ b/llvm/test/DebugInfo/Generic/dead-argument-order.ll
@@ -48,7 +48,7 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
; Function Attrs: nounwind readnone
declare void @llvm.dbg.value(metadata, metadata, metadata) #1
-attributes #0 = { nounwind readnone uwtable "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind readnone uwtable "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
attributes #1 = { nounwind readnone }
!llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/Generic/debug-info-always-inline.ll b/llvm/test/DebugInfo/Generic/debug-info-always-inline.ll
index f433838..13bbdc4 100644
--- a/llvm/test/DebugInfo/Generic/debug-info-always-inline.ll
+++ b/llvm/test/DebugInfo/Generic/debug-info-always-inline.ll
@@ -104,10 +104,10 @@ entry:
declare void @_Z3barv() #3
-attributes #0 = { alwaysinline nounwind sspstrong "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { alwaysinline nounwind sspstrong "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
attributes #1 = { nounwind readnone }
-attributes #2 = { nounwind sspstrong "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #3 = { "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind sspstrong "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
+attributes #3 = { "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!11, !12}
diff --git a/llvm/test/DebugInfo/Generic/def-line.ll b/llvm/test/DebugInfo/Generic/def-line.ll
index 73d8c08..bec3a88 100644
--- a/llvm/test/DebugInfo/Generic/def-line.ll
+++ b/llvm/test/DebugInfo/Generic/def-line.ll
@@ -59,8 +59,8 @@ entry:
ret void, !dbg !22
}
-attributes #0 = { uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
+attributes #1 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!16, !17}
diff --git a/llvm/test/DebugInfo/Generic/directives-only.ll b/llvm/test/DebugInfo/Generic/directives-only.ll
index 4754df7..f3c3555 100644
--- a/llvm/test/DebugInfo/Generic/directives-only.ll
+++ b/llvm/test/DebugInfo/Generic/directives-only.ll
@@ -34,8 +34,8 @@ entry:
declare void @f1(...) #1
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!8, !9}
diff --git a/llvm/test/DebugInfo/Generic/discriminator.ll b/llvm/test/DebugInfo/Generic/discriminator.ll
index e51ce91..4b0993c 100644
--- a/llvm/test/DebugInfo/Generic/discriminator.ll
+++ b/llvm/test/DebugInfo/Generic/discriminator.ll
@@ -24,8 +24,8 @@ define void @_Z3foov() #0 !dbg !4 {
declare void @_Z3xyzv() #1
-attributes #0 = { uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
+attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!8, !9}
diff --git a/llvm/test/DebugInfo/Generic/enum-types.ll b/llvm/test/DebugInfo/Generic/enum-types.ll
index 8af9b21..6926c78 100644
--- a/llvm/test/DebugInfo/Generic/enum-types.ll
+++ b/llvm/test/DebugInfo/Generic/enum-types.ll
@@ -40,7 +40,7 @@ entry:
ret void, !dbg !27
}
-attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
attributes #1 = { nounwind readnone }
!llvm.dbg.cu = !{!0, !12}
diff --git a/llvm/test/DebugInfo/Generic/enum.ll b/llvm/test/DebugInfo/Generic/enum.ll
index 63665e3..22656d3 100644
--- a/llvm/test/DebugInfo/Generic/enum.ll
+++ b/llvm/test/DebugInfo/Generic/enum.ll
@@ -47,7 +47,7 @@ entry:
; Function Attrs: nounwind readnone
declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { nounwind readnone }
!llvm.dbg.cu = !{!8}
diff --git a/llvm/test/DebugInfo/Generic/extended-loc-directive.ll b/llvm/test/DebugInfo/Generic/extended-loc-directive.ll
index 901830d..dbd98a5 100644
--- a/llvm/test/DebugInfo/Generic/extended-loc-directive.ll
+++ b/llvm/test/DebugInfo/Generic/extended-loc-directive.ll
@@ -46,8 +46,8 @@ entry:
declare void @f1(...) #1
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!8, !9}
diff --git a/llvm/test/DebugInfo/Generic/global-sra-array.ll b/llvm/test/DebugInfo/Generic/global-sra-array.ll
index 8076efd..d2e363c 100644
--- a/llvm/test/DebugInfo/Generic/global-sra-array.ll
+++ b/llvm/test/DebugInfo/Generic/global-sra-array.ll
@@ -70,7 +70,7 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
; Function Attrs: nounwind readnone speculatable
declare void @llvm.dbg.value(metadata, metadata, metadata) #1
-attributes #0 = { nounwind optsize ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+fxsr,+mmx,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind optsize ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+fxsr,+mmx,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "use-soft-float"="false" }
attributes #1 = { nounwind readnone speculatable }
attributes #2 = { optsize }
diff --git a/llvm/test/DebugInfo/Generic/global.ll b/llvm/test/DebugInfo/Generic/global.ll
index ddb3f8b..152f3de 100644
--- a/llvm/test/DebugInfo/Generic/global.ll
+++ b/llvm/test/DebugInfo/Generic/global.ll
@@ -21,7 +21,7 @@ entry:
ret i32 0, !dbg !12
}
-attributes #0 = { nounwind readnone uwtable "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind readnone uwtable "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "use-soft-float"="false" }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!7, !8}
diff --git a/llvm/test/DebugInfo/Generic/incorrect-variable-debugloc.ll b/llvm/test/DebugInfo/Generic/incorrect-variable-debugloc.ll
index 253b7c3..c91ec40 100644
--- a/llvm/test/DebugInfo/Generic/incorrect-variable-debugloc.ll
+++ b/llvm/test/DebugInfo/Generic/incorrect-variable-debugloc.ll
@@ -325,9 +325,9 @@ declare void @__asan_unregister_globals(i64, i64)
declare void @__sanitizer_cov_module_init(i64)
-attributes #0 = { noreturn sanitize_address "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { sanitize_address "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #2 = { "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { noreturn sanitize_address "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
+attributes #1 = { sanitize_address "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
+attributes #2 = { "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
attributes #3 = { nounwind readnone }
!llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/Generic/incorrect-variable-debugloc1.ll b/llvm/test/DebugInfo/Generic/incorrect-variable-debugloc1.ll
index 3475533..9399eb7 100644
--- a/llvm/test/DebugInfo/Generic/incorrect-variable-debugloc1.ll
+++ b/llvm/test/DebugInfo/Generic/incorrect-variable-debugloc1.ll
@@ -47,8 +47,8 @@ declare i32 @_Z4funcv() #1
; Function Attrs: nounwind readnone
declare void @llvm.dbg.value(metadata, metadata, metadata) #2
-attributes #0 = { uwtable "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { uwtable "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
attributes #2 = { nounwind readnone }
!llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/Generic/inline-no-debug-info.ll b/llvm/test/DebugInfo/Generic/inline-no-debug-info.ll
index 88d8794..f1db6c0 100644
--- a/llvm/test/DebugInfo/Generic/inline-no-debug-info.ll
+++ b/llvm/test/DebugInfo/Generic/inline-no-debug-info.ll
@@ -49,7 +49,7 @@ entry:
ret void, !dbg !12
}
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!8, !9}
diff --git a/llvm/test/DebugInfo/Generic/inline-scopes.ll b/llvm/test/DebugInfo/Generic/inline-scopes.ll
index 45ecdd0..4058487 100644
--- a/llvm/test/DebugInfo/Generic/inline-scopes.ll
+++ b/llvm/test/DebugInfo/Generic/inline-scopes.ll
@@ -98,9 +98,9 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
declare zeroext i1 @_Z1fv() #2
-attributes #0 = { uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
attributes #1 = { nounwind readnone }
-attributes #2 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!13, !14}
diff --git a/llvm/test/DebugInfo/Generic/inlined-arguments.ll b/llvm/test/DebugInfo/Generic/inlined-arguments.ll
index a802384..3b0ee2a 100644
--- a/llvm/test/DebugInfo/Generic/inlined-arguments.ll
+++ b/llvm/test/DebugInfo/Generic/inlined-arguments.ll
@@ -41,8 +41,8 @@ declare void @_Z2f3i(i32) #1
; Function Attrs: nounwind readnone
declare void @llvm.dbg.value(metadata, metadata, metadata) #2
-attributes #0 = { uwtable "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { uwtable "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "use-soft-float"="false" }
attributes #2 = { nounwind readnone }
!llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/Generic/inlined-strings.ll b/llvm/test/DebugInfo/Generic/inlined-strings.ll
index ea68c9f..64484a3 100644
--- a/llvm/test/DebugInfo/Generic/inlined-strings.ll
+++ b/llvm/test/DebugInfo/Generic/inlined-strings.ll
@@ -22,7 +22,7 @@ entry:
ret i32 0, !dbg !12
}
-attributes #0 = { nounwind readnone uwtable "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind readnone uwtable "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "use-soft-float"="false" }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!7, !8}
diff --git a/llvm/test/DebugInfo/Generic/lto-comp-dir.ll b/llvm/test/DebugInfo/Generic/lto-comp-dir.ll
index 873a18f..05dbb24 100644
--- a/llvm/test/DebugInfo/Generic/lto-comp-dir.ll
+++ b/llvm/test/DebugInfo/Generic/lto-comp-dir.ll
@@ -57,8 +57,8 @@ entry:
ret i32 0, !dbg !21
}
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
+attributes #1 = { uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
!llvm.dbg.cu = !{!0, !8}
!llvm.module.flags = !{!16, !17}
diff --git a/llvm/test/DebugInfo/Generic/mainsubprogram.ll b/llvm/test/DebugInfo/Generic/mainsubprogram.ll
index 24a3a7a..30b8c1f 100644
--- a/llvm/test/DebugInfo/Generic/mainsubprogram.ll
+++ b/llvm/test/DebugInfo/Generic/mainsubprogram.ll
@@ -15,7 +15,7 @@ entry:
ret i32 0, !dbg !10
}
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "use-soft-float"="false" }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!9, !11}
diff --git a/llvm/test/DebugInfo/Generic/member-order.ll b/llvm/test/DebugInfo/Generic/member-order.ll
index a2965cc..805e1e8 100644
--- a/llvm/test/DebugInfo/Generic/member-order.ll
+++ b/llvm/test/DebugInfo/Generic/member-order.ll
@@ -35,7 +35,7 @@ entry:
; Function Attrs: nounwind readnone
declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
attributes #1 = { nounwind readnone }
!llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/Generic/multiline.ll b/llvm/test/DebugInfo/Generic/multiline.ll
index 12476db..be3f10b 100644
--- a/llvm/test/DebugInfo/Generic/multiline.ll
+++ b/llvm/test/DebugInfo/Generic/multiline.ll
@@ -52,8 +52,8 @@ entry:
declare void @f1(...) #1
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!8, !9}
diff --git a/llvm/test/DebugInfo/Generic/namespace.ll b/llvm/test/DebugInfo/Generic/namespace.ll
index 2db69f1..48b4093 100644
--- a/llvm/test/DebugInfo/Generic/namespace.ll
+++ b/llvm/test/DebugInfo/Generic/namespace.ll
@@ -266,7 +266,7 @@ entry:
ret void, !dbg !74
}
-attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
attributes #1 = { nounwind readnone }
!llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/Generic/namespace_function_definition.ll b/llvm/test/DebugInfo/Generic/namespace_function_definition.ll
index 5c5543f..f69bc07 100644
--- a/llvm/test/DebugInfo/Generic/namespace_function_definition.ll
+++ b/llvm/test/DebugInfo/Generic/namespace_function_definition.ll
@@ -22,7 +22,7 @@ entry:
ret void, !dbg !11
}
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!8, !9}
diff --git a/llvm/test/DebugInfo/Generic/namespace_inline_function_definition.ll b/llvm/test/DebugInfo/Generic/namespace_inline_function_definition.ll
index d9ad39b..3960aca 100644
--- a/llvm/test/DebugInfo/Generic/namespace_inline_function_definition.ll
+++ b/llvm/test/DebugInfo/Generic/namespace_inline_function_definition.ll
@@ -60,8 +60,8 @@ entry:
; Function Attrs: nounwind readnone
declare void @llvm.dbg.declare(metadata, metadata, metadata) #2
-attributes #0 = { uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { alwaysinline nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
+attributes #1 = { alwaysinline nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
attributes #2 = { nounwind readnone }
!llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/Generic/recursive_inlining.ll b/llvm/test/DebugInfo/Generic/recursive_inlining.ll
index 908e408..3174fc3 100644
--- a/llvm/test/DebugInfo/Generic/recursive_inlining.ll
+++ b/llvm/test/DebugInfo/Generic/recursive_inlining.ll
@@ -186,8 +186,8 @@ declare void @_Z3fn2iiii(i32, i32, i32, i32) #1
; Function Attrs: nounwind readnone
declare void @llvm.dbg.value(metadata, metadata, metadata) #2
-attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
attributes #2 = { nounwind readnone }
attributes #3 = { nounwind }
diff --git a/llvm/test/DebugInfo/Generic/restrict.ll b/llvm/test/DebugInfo/Generic/restrict.ll
index 0131e53..f508ec6 100644
--- a/llvm/test/DebugInfo/Generic/restrict.ll
+++ b/llvm/test/DebugInfo/Generic/restrict.ll
@@ -26,7 +26,7 @@ entry:
; Function Attrs: nounwind readnone
declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
attributes #1 = { nounwind readnone }
!llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/Generic/tu-composite.ll b/llvm/test/DebugInfo/Generic/tu-composite.ll
index bcfe049..a880ac5f 100644
--- a/llvm/test/DebugInfo/Generic/tu-composite.ll
+++ b/llvm/test/DebugInfo/Generic/tu-composite.ll
@@ -115,7 +115,7 @@ entry:
ret void, !dbg !58
}
-attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
attributes #1 = { nounwind readnone }
!llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/Generic/unconditional-branch.ll b/llvm/test/DebugInfo/Generic/unconditional-branch.ll
index 9f8f975..287f08f 100644
--- a/llvm/test/DebugInfo/Generic/unconditional-branch.ll
+++ b/llvm/test/DebugInfo/Generic/unconditional-branch.ll
@@ -37,7 +37,7 @@ sw.default: ; preds = %entry
; Function Attrs: nounwind readnone
declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
-attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
attributes #1 = { nounwind readnone }
!llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/Generic/version.ll b/llvm/test/DebugInfo/Generic/version.ll
index b68528d..08a4f1a 100644
--- a/llvm/test/DebugInfo/Generic/version.ll
+++ b/llvm/test/DebugInfo/Generic/version.ll
@@ -11,7 +11,7 @@ entry:
ret i32 0, !dbg !10
}
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "use-soft-float"="false" }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!9, !11}
diff --git a/llvm/test/DebugInfo/Inputs/gmlt.ll b/llvm/test/DebugInfo/Inputs/gmlt.ll
index 6f112fc..c9db2d2 100644
--- a/llvm/test/DebugInfo/Inputs/gmlt.ll
+++ b/llvm/test/DebugInfo/Inputs/gmlt.ll
@@ -110,8 +110,8 @@ entry:
ret void, !dbg !19
}
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { alwaysinline nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
+attributes #1 = { alwaysinline nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
attributes #2 = { nounwind }
!llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/Inputs/line.ll b/llvm/test/DebugInfo/Inputs/line.ll
index f4d6db9..24628cf 100644
--- a/llvm/test/DebugInfo/Inputs/line.ll
+++ b/llvm/test/DebugInfo/Inputs/line.ll
@@ -33,7 +33,7 @@ land.end: ; preds = %land.rhs, %entry
ret i32 %conv, !dbg !13
}
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!7, !8}
diff --git a/llvm/test/DebugInfo/MSP430/cu-ranges.ll b/llvm/test/DebugInfo/MSP430/cu-ranges.ll
index 5c15c31..8f7cf407 100644
--- a/llvm/test/DebugInfo/MSP430/cu-ranges.ll
+++ b/llvm/test/DebugInfo/MSP430/cu-ranges.ll
@@ -46,7 +46,7 @@ entry:
ret i32 %add, !dbg !18
}
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
attributes #1 = { nounwind readnone }
!llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/Mips/fn-call-line.ll b/llvm/test/DebugInfo/Mips/fn-call-line.ll
index c12a47b..2f02040f 100644
--- a/llvm/test/DebugInfo/Mips/fn-call-line.ll
+++ b/llvm/test/DebugInfo/Mips/fn-call-line.ll
@@ -61,8 +61,8 @@ entry:
declare void @f1(...) #1
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!8, !9}
diff --git a/llvm/test/DebugInfo/NVPTX/cu-range-hole.ll b/llvm/test/DebugInfo/NVPTX/cu-range-hole.ll
index 1f4c44e..c1288e8 100644
--- a/llvm/test/DebugInfo/NVPTX/cu-range-hole.ll
+++ b/llvm/test/DebugInfo/NVPTX/cu-range-hole.ll
@@ -285,7 +285,7 @@ entry:
; CHECK-NEXT: .section .debug_macinfo { }
; CHECK-NOT: debug_
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
attributes #1 = { nounwind readnone }
!llvm.ident = !{!0, !0}
diff --git a/llvm/test/DebugInfo/NVPTX/debug-info.ll b/llvm/test/DebugInfo/NVPTX/debug-info.ll
index 4624dce..9ce0b73 100644
--- a/llvm/test/DebugInfo/NVPTX/debug-info.ll
+++ b/llvm/test/DebugInfo/NVPTX/debug-info.ll
@@ -2675,7 +2675,7 @@ declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() #1
; Function Attrs: nounwind readnone speculatable
declare void @llvm.dbg.value(metadata, metadata, metadata) #2
-attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="sm_20" "target-features"="+ptx42" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="sm_20" "target-features"="+ptx42" "use-soft-float"="false" }
attributes #1 = { nounwind readnone }
attributes #2 = { nounwind readnone speculatable }
attributes #3 = { nounwind }
diff --git a/llvm/test/DebugInfo/NVPTX/debug-loc-offset.ll b/llvm/test/DebugInfo/NVPTX/debug-loc-offset.ll
index 7a58cae..e7e09a5 100644
--- a/llvm/test/DebugInfo/NVPTX/debug-loc-offset.ll
+++ b/llvm/test/DebugInfo/NVPTX/debug-loc-offset.ll
@@ -88,9 +88,9 @@ if.end4: ; preds = %if.then2, %if.end
declare signext i8 @_ZN1A3fooEv(ptr) #2
-attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
attributes #1 = { nounwind readnone }
-attributes #2 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
!llvm.dbg.cu = !{!0, !9}
!llvm.module.flags = !{!18, !19}
diff --git a/llvm/test/DebugInfo/RISCV/dwarf-riscv-relocs.ll b/llvm/test/DebugInfo/RISCV/dwarf-riscv-relocs.ll
index 65542e8..6aaedf4 100644
--- a/llvm/test/DebugInfo/RISCV/dwarf-riscv-relocs.ll
+++ b/llvm/test/DebugInfo/RISCV/dwarf-riscv-relocs.ll
@@ -118,7 +118,7 @@ entry:
declare void @ext()
-attributes #0 = { noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-features"="+relax" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-features"="+relax" "use-soft-float"="false" }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!3, !4, !5}
diff --git a/llvm/test/DebugInfo/Sparc/gnu-window-save.ll b/llvm/test/DebugInfo/Sparc/gnu-window-save.ll
index cc2e764..3fab8dd 100644
--- a/llvm/test/DebugInfo/Sparc/gnu-window-save.ll
+++ b/llvm/test/DebugInfo/Sparc/gnu-window-save.ll
@@ -48,8 +48,8 @@ entry:
declare signext i32 @printf(ptr, ...) #1
-attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!9, !10}
diff --git a/llvm/test/DebugInfo/WebAssembly/dbg-loop-loc.ll b/llvm/test/DebugInfo/WebAssembly/dbg-loop-loc.ll
index 7178ef3..1fc1c84 100644
--- a/llvm/test/DebugInfo/WebAssembly/dbg-loop-loc.ll
+++ b/llvm/test/DebugInfo/WebAssembly/dbg-loop-loc.ll
@@ -79,7 +79,7 @@ for.end: ; preds = %for.cond
; Function Attrs: nounwind readnone speculatable
declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
-attributes #0 = { noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "use-soft-float"="false" }
attributes #1 = { nounwind readnone speculatable }
!llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/WebAssembly/debugtest-opt.ll b/llvm/test/DebugInfo/WebAssembly/debugtest-opt.ll
index 1b552b5..603f28a 100644
--- a/llvm/test/DebugInfo/WebAssembly/debugtest-opt.ll
+++ b/llvm/test/DebugInfo/WebAssembly/debugtest-opt.ll
@@ -33,7 +33,7 @@ entry:
; Function Attrs: nounwind readnone speculatable willreturn
declare void @llvm.dbg.value(metadata, metadata, metadata) #2
-attributes #0 = { nounwind writeonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind writeonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "use-soft-float"="false" }
attributes #1 = { argmemonly nounwind willreturn }
attributes #2 = { nounwind readnone speculatable willreturn }
attributes #3 = { nounwind }
diff --git a/llvm/test/DebugInfo/X86/DW_AT_calling-convention.ll b/llvm/test/DebugInfo/X86/DW_AT_calling-convention.ll
index 52b3688..ebbe357 100644
--- a/llvm/test/DebugInfo/X86/DW_AT_calling-convention.ll
+++ b/llvm/test/DebugInfo/X86/DW_AT_calling-convention.ll
@@ -56,7 +56,7 @@ entry:
; Function Attrs: nounwind readnone
declare void @llvm.dbg.value(metadata, metadata, metadata) #1
-attributes #0 = { nounwind readnone "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind readnone "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
attributes #1 = { nounwind readnone }
!llvm.dbg.cu = !{!2}
diff --git a/llvm/test/DebugInfo/X86/DW_AT_stmt_list_sec_offset.ll b/llvm/test/DebugInfo/X86/DW_AT_stmt_list_sec_offset.ll
index 81e2e33..dda757ab 100644
--- a/llvm/test/DebugInfo/X86/DW_AT_stmt_list_sec_offset.ll
+++ b/llvm/test/DebugInfo/X86/DW_AT_stmt_list_sec_offset.ll
@@ -25,7 +25,7 @@ entry:
ret i32 0, !dbg !10
}
-attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!9, !11}
diff --git a/llvm/test/DebugInfo/X86/addr_comments.ll b/llvm/test/DebugInfo/X86/addr_comments.ll
index 58050d2..c451606 100644
--- a/llvm/test/DebugInfo/X86/addr_comments.ll
+++ b/llvm/test/DebugInfo/X86/addr_comments.ll
@@ -16,7 +16,7 @@ entry:
ret void, !dbg !10
}
-attributes #0 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!3, !4, !5}
diff --git a/llvm/test/DebugInfo/X86/arguments.ll b/llvm/test/DebugInfo/X86/arguments.ll
index 767c138..69ef9a6 100644
--- a/llvm/test/DebugInfo/X86/arguments.ll
+++ b/llvm/test/DebugInfo/X86/arguments.ll
@@ -41,7 +41,7 @@ entry:
; Function Attrs: nounwind readnone
declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { nounwind readnone }
!llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/X86/coff_debug_info_type.ll b/llvm/test/DebugInfo/X86/coff_debug_info_type.ll
index a40b511..f70973d 100644
--- a/llvm/test/DebugInfo/X86/coff_debug_info_type.ll
+++ b/llvm/test/DebugInfo/X86/coff_debug_info_type.ll
@@ -26,7 +26,7 @@ entry:
ret i32 0, !dbg !10
}
-attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!9, !11}
diff --git a/llvm/test/DebugInfo/X86/coff_relative_names.ll b/llvm/test/DebugInfo/X86/coff_relative_names.ll
index ea6d856..81e6dd58 100644
--- a/llvm/test/DebugInfo/X86/coff_relative_names.ll
+++ b/llvm/test/DebugInfo/X86/coff_relative_names.ll
@@ -18,7 +18,7 @@ entry:
ret i32 0, !dbg !10
}
-attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!9, !11}
diff --git a/llvm/test/DebugInfo/X86/convert-loclist.ll b/llvm/test/DebugInfo/X86/convert-loclist.ll
index 0fb15d5..f1aa5dc 100644
--- a/llvm/test/DebugInfo/X86/convert-loclist.ll
+++ b/llvm/test/DebugInfo/X86/convert-loclist.ll
@@ -60,8 +60,8 @@ declare !dbg !4 dso_local void @_Z2f1v() local_unnamed_addr #1
; Function Attrs: nounwind readnone speculatable willreturn
declare void @llvm.dbg.value(metadata, metadata, metadata) #2
-attributes #0 = { uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
+attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
attributes #2 = { nounwind readnone speculatable willreturn }
!llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/X86/cu-ranges-odr.ll b/llvm/test/DebugInfo/X86/cu-ranges-odr.ll
index ac58e2f..d1690d3 100644
--- a/llvm/test/DebugInfo/X86/cu-ranges-odr.ll
+++ b/llvm/test/DebugInfo/X86/cu-ranges-odr.ll
@@ -55,7 +55,7 @@ entry:
ret void, !dbg !31
}
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
attributes #1 = { nounwind readnone }
!llvm.dbg.cu = !{!11}
diff --git a/llvm/test/DebugInfo/X86/cu-ranges.ll b/llvm/test/DebugInfo/X86/cu-ranges.ll
index 1afbdc5..b1b43ae 100644
--- a/llvm/test/DebugInfo/X86/cu-ranges.ll
+++ b/llvm/test/DebugInfo/X86/cu-ranges.ll
@@ -60,7 +60,7 @@ entry:
ret i32 %add, !dbg !16
}
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
attributes #1 = { nounwind readnone }
!llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/X86/dbg_value_direct.ll b/llvm/test/DebugInfo/X86/dbg_value_direct.ll
index 4ac2541..afd60e2 100644
--- a/llvm/test/DebugInfo/X86/dbg_value_direct.ll
+++ b/llvm/test/DebugInfo/X86/dbg_value_direct.ll
@@ -140,9 +140,9 @@ declare void @__asan_register_globals(i64, i64)
declare void @__asan_unregister_globals(i64, i64)
-attributes #0 = { sanitize_address uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "ssp-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { sanitize_address uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "ssp-buffer-size"="8" "use-soft-float"="false" }
attributes #1 = { nounwind readnone }
-attributes #2 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "ssp-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "ssp-buffer-size"="8" "use-soft-float"="false" }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!22, !27}
diff --git a/llvm/test/DebugInfo/X86/debug-dead-local-var.ll b/llvm/test/DebugInfo/X86/debug-dead-local-var.ll
index 6e87ece..6aecbfa 100644
--- a/llvm/test/DebugInfo/X86/debug-dead-local-var.ll
+++ b/llvm/test/DebugInfo/X86/debug-dead-local-var.ll
@@ -24,7 +24,7 @@ entry:
ret i32 1, !dbg !21
}
-attributes #0 = { nounwind readnone uwtable "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind readnone uwtable "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!18, !19}
diff --git a/llvm/test/DebugInfo/X86/debug-info-blocks.ll b/llvm/test/DebugInfo/X86/debug-info-blocks.ll
index 759e703..8924b30 100644
--- a/llvm/test/DebugInfo/X86/debug-info-blocks.ll
+++ b/llvm/test/DebugInfo/X86/debug-info-blocks.ll
@@ -259,7 +259,7 @@ define i32 @main() #0 !dbg !36 {
ret i32 0, !dbg !109
}
-attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { nounwind readnone }
attributes #2 = { nonlazybind }
attributes #3 = { nounwind }
diff --git a/llvm/test/DebugInfo/X86/debug-loc-asan.mir b/llvm/test/DebugInfo/X86/debug-loc-asan.mir
index e50bd60..e1d9651 100644
--- a/llvm/test/DebugInfo/X86/debug-loc-asan.mir
+++ b/llvm/test/DebugInfo/X86/debug-loc-asan.mir
@@ -160,7 +160,7 @@
; Function Attrs: nounwind
declare void @llvm.stackprotector(ptr, ptr) #2
- attributes #0 = { nounwind sanitize_address uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+ attributes #0 = { nounwind sanitize_address uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
attributes #1 = { nounwind readnone speculatable }
attributes #2 = { nounwind }
diff --git a/llvm/test/DebugInfo/X86/debug-loc-offset.mir b/llvm/test/DebugInfo/X86/debug-loc-offset.mir
index 5c35626..c07ea68 100644
--- a/llvm/test/DebugInfo/X86/debug-loc-offset.mir
+++ b/llvm/test/DebugInfo/X86/debug-loc-offset.mir
@@ -133,9 +133,9 @@
; Function Attrs: nounwind
declare void @llvm.stackprotector(ptr, ptr) #3
- attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+ attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
attributes #1 = { nounwind readnone speculatable }
- attributes #2 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+ attributes #2 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
attributes #3 = { nounwind }
!llvm.dbg.cu = !{!0, !3}
diff --git a/llvm/test/DebugInfo/X86/debug-ranges-offset.ll b/llvm/test/DebugInfo/X86/debug-ranges-offset.ll
index b071225..24cc73c 100644
--- a/llvm/test/DebugInfo/X86/debug-ranges-offset.ll
+++ b/llvm/test/DebugInfo/X86/debug-ranges-offset.ll
@@ -189,8 +189,8 @@ declare ptr @__msan_memset(ptr, i32, i64)
; Function Attrs: nounwind
declare void @llvm.memset.p0.i64(ptr nocapture, i8, i64, i1) #3
-attributes #0 = { sanitize_memory uwtable "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { nobuiltin "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { sanitize_memory uwtable "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
+attributes #1 = { nobuiltin "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
attributes #2 = { nounwind readnone }
attributes #3 = { nounwind }
attributes #4 = { builtin }
diff --git a/llvm/test/DebugInfo/X86/decl-derived-member.ll b/llvm/test/DebugInfo/X86/decl-derived-member.ll
index 8d32d3c..822bb8e 100644
--- a/llvm/test/DebugInfo/X86/decl-derived-member.ll
+++ b/llvm/test/DebugInfo/X86/decl-derived-member.ll
@@ -88,11 +88,11 @@ entry:
ret void
}
-attributes #0 = { inlinehint nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { inlinehint uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { inlinehint nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
+attributes #1 = { inlinehint uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
attributes #2 = { nounwind }
attributes #3 = { nounwind readnone }
-attributes #4 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #4 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
!llvm.dbg.cu = !{!8}
!llvm.module.flags = !{!12, !13}
diff --git a/llvm/test/DebugInfo/X86/discriminator.ll b/llvm/test/DebugInfo/X86/discriminator.ll
index ef89838..0872ea8 100644
--- a/llvm/test/DebugInfo/X86/discriminator.ll
+++ b/llvm/test/DebugInfo/X86/discriminator.ll
@@ -35,7 +35,7 @@ return: ; preds = %if.end, %if.then
ret i32 %2, !dbg !13
}
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!7, !8}
diff --git a/llvm/test/DebugInfo/X86/discriminator2.ll b/llvm/test/DebugInfo/X86/discriminator2.ll
index f966372..07cda30 100644
--- a/llvm/test/DebugInfo/X86/discriminator2.ll
+++ b/llvm/test/DebugInfo/X86/discriminator2.ll
@@ -32,8 +32,8 @@ declare void @_Z3fooii(i32, i32) #1
declare i32 @_Z3barv() #1
-attributes #0 = { uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
+attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!3, !4}
diff --git a/llvm/test/DebugInfo/X86/discriminator3.ll b/llvm/test/DebugInfo/X86/discriminator3.ll
index 19a7764..1898599 100644
--- a/llvm/test/DebugInfo/X86/discriminator3.ll
+++ b/llvm/test/DebugInfo/X86/discriminator3.ll
@@ -35,9 +35,9 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
declare void @_Z3fooi(i32) #2
-attributes #0 = { uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
attributes #1 = { nounwind readnone }
-attributes #2 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!3, !4}
diff --git a/llvm/test/DebugInfo/X86/dwarf-aranges-no-dwarf-labels.ll b/llvm/test/DebugInfo/X86/dwarf-aranges-no-dwarf-labels.ll
index da0d8139..08f071d 100644
--- a/llvm/test/DebugInfo/X86/dwarf-aranges-no-dwarf-labels.ll
+++ b/llvm/test/DebugInfo/X86/dwarf-aranges-no-dwarf-labels.ll
@@ -53,8 +53,8 @@ entry:
; Function Attrs: nounwind readnone
declare void @llvm.dbg.value(metadata, metadata, metadata) #2
-attributes #0 = { nounwind readnone uwtable "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { nounwind readonly uwtable "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind readnone uwtable "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
+attributes #1 = { nounwind readonly uwtable "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
attributes #2 = { nounwind readnone }
!llvm.dbg.cu = !{!4}
diff --git a/llvm/test/DebugInfo/X86/dwarf-linkage-names.ll b/llvm/test/DebugInfo/X86/dwarf-linkage-names.ll
index b06179d..857c1da 100644
--- a/llvm/test/DebugInfo/X86/dwarf-linkage-names.ll
+++ b/llvm/test/DebugInfo/X86/dwarf-linkage-names.ll
@@ -49,7 +49,7 @@ entry:
ret i32 %0, !dbg !15
}
-attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "use-soft-float"="false" }
!llvm.dbg.cu = !{!5}
!llvm.module.flags = !{!8, !9}
diff --git a/llvm/test/DebugInfo/X86/dwarf-pubnames-split.ll b/llvm/test/DebugInfo/X86/dwarf-pubnames-split.ll
index d1f1ae3..bbfe2b1 100644
--- a/llvm/test/DebugInfo/X86/dwarf-pubnames-split.ll
+++ b/llvm/test/DebugInfo/X86/dwarf-pubnames-split.ll
@@ -19,7 +19,7 @@ entry:
ret i32 0, !dbg !10
}
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!9, !11}
diff --git a/llvm/test/DebugInfo/X86/fission-inline.ll b/llvm/test/DebugInfo/X86/fission-inline.ll
index d038b07..13ab43f 100644
--- a/llvm/test/DebugInfo/X86/fission-inline.ll
+++ b/llvm/test/DebugInfo/X86/fission-inline.ll
@@ -90,8 +90,8 @@ entry:
declare void @_Z2f1v() #1
-attributes #0 = { uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!22, !23}
diff --git a/llvm/test/DebugInfo/X86/fission-no-inline-gsym.ll b/llvm/test/DebugInfo/X86/fission-no-inline-gsym.ll
index 6c9885f..447601c 100644
--- a/llvm/test/DebugInfo/X86/fission-no-inline-gsym.ll
+++ b/llvm/test/DebugInfo/X86/fission-no-inline-gsym.ll
@@ -46,8 +46,8 @@ entry:
declare void @_Z2f1v() #1
-attributes #0 = { uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!22, !23}
diff --git a/llvm/test/DebugInfo/X86/fission-no-inlining.ll b/llvm/test/DebugInfo/X86/fission-no-inlining.ll
index 7ae5c2b..a4a3ac6 100644
--- a/llvm/test/DebugInfo/X86/fission-no-inlining.ll
+++ b/llvm/test/DebugInfo/X86/fission-no-inlining.ll
@@ -22,8 +22,8 @@ entry:
declare void @_Z2f1v() #1
-attributes #0 = { uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
+attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!3}
diff --git a/llvm/test/DebugInfo/X86/fission-ranges.ll b/llvm/test/DebugInfo/X86/fission-ranges.ll
index 10d2150c..be0a375 100644
--- a/llvm/test/DebugInfo/X86/fission-ranges.ll
+++ b/llvm/test/DebugInfo/X86/fission-ranges.ll
@@ -187,7 +187,7 @@ for.end18: ; preds = %for.inc16
; Function Attrs: nounwind readnone
declare void @llvm.dbg.value(metadata, metadata, metadata) #1
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
attributes #1 = { nounwind readnone }
!llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/X86/generate-odr-hash.ll b/llvm/test/DebugInfo/X86/generate-odr-hash.ll
index 5e583a3..ad083ab 100644
--- a/llvm/test/DebugInfo/X86/generate-odr-hash.ll
+++ b/llvm/test/DebugInfo/X86/generate-odr-hash.ll
@@ -215,7 +215,7 @@ entry:
ret void, !dbg !57
}
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
attributes #1 = { nounwind readnone }
!llvm.dbg.cu = !{!34}
diff --git a/llvm/test/DebugInfo/X86/ghost-sdnode-dbgvalues.ll b/llvm/test/DebugInfo/X86/ghost-sdnode-dbgvalues.ll
index f80b3ac..d33d357 100644
--- a/llvm/test/DebugInfo/X86/ghost-sdnode-dbgvalues.ll
+++ b/llvm/test/DebugInfo/X86/ghost-sdnode-dbgvalues.ll
@@ -60,7 +60,7 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
; Function Attrs: nounwind readnone
declare void @llvm.dbg.value(metadata, metadata, metadata) #1
-attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
attributes #1 = { nounwind readnone }
!llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/X86/gmlt-empty-base-address.ll b/llvm/test/DebugInfo/X86/gmlt-empty-base-address.ll
index 727a3c3..d01184d 100644
--- a/llvm/test/DebugInfo/X86/gmlt-empty-base-address.ll
+++ b/llvm/test/DebugInfo/X86/gmlt-empty-base-address.ll
@@ -16,7 +16,7 @@ define dso_local void @_Z2f2v() #0 !dbg !10 {
ret void, !dbg !11
}
-attributes #0 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!3, !4, !5}
diff --git a/llvm/test/DebugInfo/X86/gnu-public-names-gmlt.ll b/llvm/test/DebugInfo/X86/gnu-public-names-gmlt.ll
index 3984ad4..adac419 100644
--- a/llvm/test/DebugInfo/X86/gnu-public-names-gmlt.ll
+++ b/llvm/test/DebugInfo/X86/gnu-public-names-gmlt.ll
@@ -39,8 +39,8 @@ entry:
declare void @_Z2f1v() #1
-attributes #0 = { noinline uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { noinline uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
+attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!3, !4, !5}
diff --git a/llvm/test/DebugInfo/X86/gnu-public-names.ll b/llvm/test/DebugInfo/X86/gnu-public-names.ll
index 5dd6449..6b1279b 100644
--- a/llvm/test/DebugInfo/X86/gnu-public-names.ll
+++ b/llvm/test/DebugInfo/X86/gnu-public-names.ll
@@ -304,7 +304,7 @@ entry:
ret i32 %add5, !dbg !90
}
-attributes #0 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
attributes #1 = { nounwind readnone speculatable }
!llvm.dbg.cu = !{!2}
diff --git a/llvm/test/DebugInfo/X86/inline-member-function.ll b/llvm/test/DebugInfo/X86/inline-member-function.ll
index ecb65e9..dee0c82 100644
--- a/llvm/test/DebugInfo/X86/inline-member-function.ll
+++ b/llvm/test/DebugInfo/X86/inline-member-function.ll
@@ -60,7 +60,7 @@ entry:
declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
-attributes #0 = { uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
attributes #1 = { nounwind readnone }
!llvm.dbg.cu = !{!4}
diff --git a/llvm/test/DebugInfo/X86/inline-seldag-test.ll b/llvm/test/DebugInfo/X86/inline-seldag-test.ll
index 6a62d2b..874a01e 100644
--- a/llvm/test/DebugInfo/X86/inline-seldag-test.ll
+++ b/llvm/test/DebugInfo/X86/inline-seldag-test.ll
@@ -41,7 +41,7 @@ entry:
; Function Attrs: nounwind readnone
declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
attributes #1 = { nounwind readnone }
!llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/X86/lexical_block.ll b/llvm/test/DebugInfo/X86/lexical_block.ll
index 8063427..08df740 100644
--- a/llvm/test/DebugInfo/X86/lexical_block.ll
+++ b/llvm/test/DebugInfo/X86/lexical_block.ll
@@ -40,7 +40,7 @@ if.end: ; preds = %if.then, %entry
; Function Attrs: nounwind readnone
declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
attributes #1 = { nounwind readnone }
!llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/X86/line-info.ll b/llvm/test/DebugInfo/X86/line-info.ll
index 5884ee1..ac208f3 100644
--- a/llvm/test/DebugInfo/X86/line-info.ll
+++ b/llvm/test/DebugInfo/X86/line-info.ll
@@ -32,7 +32,7 @@ entry:
ret i32 0, !dbg !17
}
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { nounwind readnone }
!llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/X86/low-pc-cu.ll b/llvm/test/DebugInfo/X86/low-pc-cu.ll
index e56c7b9..4be5014 100644
--- a/llvm/test/DebugInfo/X86/low-pc-cu.ll
+++ b/llvm/test/DebugInfo/X86/low-pc-cu.ll
@@ -26,7 +26,7 @@ entry:
ret void, !dbg !11
}
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!8, !9}
diff --git a/llvm/test/DebugInfo/X86/mi-print.ll b/llvm/test/DebugInfo/X86/mi-print.ll
index 8067006..b35b0d2 100644
--- a/llvm/test/DebugInfo/X86/mi-print.ll
+++ b/llvm/test/DebugInfo/X86/mi-print.ll
@@ -24,7 +24,7 @@ entry:
; Function Attrs: nounwind readnone
declare void @llvm.dbg.value(metadata, metadata, metadata) #1
-attributes #0 = { nounwind readnone ssp uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="core2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind readnone ssp uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="core2" "use-soft-float"="false" }
attributes #1 = { nounwind readnone }
!llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/X86/missing-abstract-variable.ll b/llvm/test/DebugInfo/X86/missing-abstract-variable.ll
index 2136380..c31c442 100644
--- a/llvm/test/DebugInfo/X86/missing-abstract-variable.ll
+++ b/llvm/test/DebugInfo/X86/missing-abstract-variable.ll
@@ -122,8 +122,8 @@ declare void @_Z1fi(i32) #1
; Function Attrs: nounwind readnone
declare void @llvm.dbg.value(metadata, metadata, metadata) #2
-attributes #0 = { uwtable "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { uwtable "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
attributes #2 = { nounwind readnone }
!llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/X86/no_debug_ranges.ll b/llvm/test/DebugInfo/X86/no_debug_ranges.ll
index b0fd815..f961e45 100644
--- a/llvm/test/DebugInfo/X86/no_debug_ranges.ll
+++ b/llvm/test/DebugInfo/X86/no_debug_ranges.ll
@@ -33,7 +33,7 @@ entry:
ret void, !dbg !12
}
-attributes #0 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!3, !4, !5}
diff --git a/llvm/test/DebugInfo/X86/nodebug.ll b/llvm/test/DebugInfo/X86/nodebug.ll
index 5495886f..5dbe65b 100644
--- a/llvm/test/DebugInfo/X86/nodebug.ll
+++ b/llvm/test/DebugInfo/X86/nodebug.ll
@@ -35,7 +35,7 @@ entry:
ret void
}
-attributes #0 = { uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!8, !9}
diff --git a/llvm/test/DebugInfo/X86/nodebug_with_debug_loc.ll b/llvm/test/DebugInfo/X86/nodebug_with_debug_loc.ll
index 6bcc349..e945fcc 100644
--- a/llvm/test/DebugInfo/X86/nodebug_with_debug_loc.ll
+++ b/llvm/test/DebugInfo/X86/nodebug_with_debug_loc.ll
@@ -84,8 +84,8 @@ declare void @llvm.lifetime.start(i64, ptr nocapture) #3
; Function Attrs: nounwind
declare void @llvm.lifetime.end(i64, ptr nocapture) #3
-attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
attributes #2 = { nounwind readnone }
attributes #3 = { nounwind }
diff --git a/llvm/test/DebugInfo/X86/objc-property-void.ll b/llvm/test/DebugInfo/X86/objc-property-void.ll
index 4c5c16a..431ea00 100644
--- a/llvm/test/DebugInfo/X86/objc-property-void.ll
+++ b/llvm/test/DebugInfo/X86/objc-property-void.ll
@@ -66,7 +66,7 @@ entry:
; Function Attrs: nounwind readnone
declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
-attributes #0 = { ssp uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { ssp uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
attributes #1 = { nounwind readnone }
!llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/X86/pieces-4.ll b/llvm/test/DebugInfo/X86/pieces-4.ll
index 7e27f10..46f6201 100644
--- a/llvm/test/DebugInfo/X86/pieces-4.ll
+++ b/llvm/test/DebugInfo/X86/pieces-4.ll
@@ -53,9 +53,9 @@ declare i32 @g() local_unnamed_addr #2
; Function Attrs: nounwind readnone
declare void @llvm.dbg.value(metadata, metadata, metadata) #1
-attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
attributes #1 = { nounwind readnone }
-attributes #2 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
attributes #3 = { nounwind }
!llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/X86/pr19307.mir b/llvm/test/DebugInfo/X86/pr19307.mir
index 4ba1ca0..05e0ba5 100644
--- a/llvm/test/DebugInfo/X86/pr19307.mir
+++ b/llvm/test/DebugInfo/X86/pr19307.mir
@@ -77,9 +77,9 @@
; Function Attrs: nounwind
declare void @llvm.stackprotector(ptr, ptr) #3
- attributes #0 = { uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+ attributes #0 = { uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
attributes #1 = { nounwind readnone speculatable }
- attributes #2 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+ attributes #2 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
attributes #3 = { nounwind }
!llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/X86/pr28270.ll b/llvm/test/DebugInfo/X86/pr28270.ll
index e537be5..6e6ffbc 100644
--- a/llvm/test/DebugInfo/X86/pr28270.ll
+++ b/llvm/test/DebugInfo/X86/pr28270.ll
@@ -52,9 +52,9 @@ declare void @_ZN1AC1EPKc(ptr, ptr) unnamed_addr #2
; Function Attrs: nounwind readnone
declare void @llvm.dbg.value(metadata, metadata, metadata) #3
-attributes #0 = { noreturn uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { noreturn uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
attributes #1 = { argmemonly nounwind }
-attributes #2 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
attributes #3 = { nounwind readnone }
attributes #4 = { nounwind }
diff --git a/llvm/test/DebugInfo/X86/pr45181.ll b/llvm/test/DebugInfo/X86/pr45181.ll
index 3a46930..b973f7d 100644
--- a/llvm/test/DebugInfo/X86/pr45181.ll
+++ b/llvm/test/DebugInfo/X86/pr45181.ll
@@ -140,11 +140,11 @@ declare void @_ZN2aa1yIP1jNS_2ac1zI1eEEED1Ev(ptr) unnamed_addr #4
; Function Attrs: nounwind readnone speculatable willreturn
declare void @llvm.dbg.value(metadata, metadata, metadata) #1
-attributes #0 = { optsize ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { optsize ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "use-soft-float"="false" }
attributes #1 = { nounwind readnone speculatable willreturn }
attributes #2 = { argmemonly nounwind willreturn }
-attributes #3 = { optsize "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #4 = { nounwind optsize "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #3 = { optsize "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "use-soft-float"="false" }
+attributes #4 = { nounwind optsize "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "use-soft-float"="false" }
attributes #5 = { optsize }
attributes #6 = { nounwind }
attributes #7 = { nounwind optsize }
diff --git a/llvm/test/DebugInfo/X86/safestack-byval.ll b/llvm/test/DebugInfo/X86/safestack-byval.ll
index acaa803..5336e58 100644
--- a/llvm/test/DebugInfo/X86/safestack-byval.ll
+++ b/llvm/test/DebugInfo/X86/safestack-byval.ll
@@ -48,7 +48,7 @@ declare void @llvm.dbg.value(metadata, metadata, metadata) #1
; Function Attrs: argmemonly nounwind
declare void @llvm.memcpy.p0.p0.i64(ptr nocapture, ptr nocapture readonly, i64, i1) #2
-attributes #0 = { norecurse nounwind readonly safestack uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { norecurse nounwind readonly safestack uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "use-soft-float"="false" }
attributes #1 = { nounwind readnone }
attributes #2 = { argmemonly nounwind }
diff --git a/llvm/test/DebugInfo/X86/set.ll b/llvm/test/DebugInfo/X86/set.ll
index 2bfb08f..89e855ca 100644
--- a/llvm/test/DebugInfo/X86/set.ll
+++ b/llvm/test/DebugInfo/X86/set.ll
@@ -67,7 +67,7 @@ if_1: ; preds = %else_1, %second
ret ptr @M_Main, !dbg !45
}
-attributes #0 = { "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
attributes #1 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
!llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/X86/spill-nospill.ll b/llvm/test/DebugInfo/X86/spill-nospill.ll
index 94e47ae..5b08a4bd 100644
--- a/llvm/test/DebugInfo/X86/spill-nospill.ll
+++ b/llvm/test/DebugInfo/X86/spill-nospill.ll
@@ -75,8 +75,8 @@ declare i32 @g(i32) local_unnamed_addr #1
; Function Attrs: nounwind readnone speculatable
declare void @llvm.dbg.value(metadata, metadata, metadata) #2
-attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
+attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
attributes #2 = { nounwind readnone speculatable }
attributes #3 = { nounwind }
diff --git a/llvm/test/DebugInfo/X86/sret.ll b/llvm/test/DebugInfo/X86/sret.ll
index 567f500..ff24504 100644
--- a/llvm/test/DebugInfo/X86/sret.ll
+++ b/llvm/test/DebugInfo/X86/sret.ll
@@ -260,11 +260,11 @@ eh.resume: ; preds = %lpad
; Function Attrs: nobuiltin nounwind
declare void @_ZdlPv(ptr) #4
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
attributes #1 = { nounwind readnone }
-attributes #2 = { uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
attributes #3 = { noinline noreturn nounwind }
-attributes #4 = { nobuiltin nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #4 = { nobuiltin nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
attributes #5 = { noreturn nounwind }
attributes #6 = { nounwind }
attributes #7 = { builtin nounwind }
diff --git a/llvm/test/DebugInfo/X86/tls.ll b/llvm/test/DebugInfo/X86/tls.ll
index d9c1744..f6f6b6f 100644
--- a/llvm/test/DebugInfo/X86/tls.ll
+++ b/llvm/test/DebugInfo/X86/tls.ll
@@ -106,7 +106,7 @@ entry:
ret i32 0, !dbg !18
}
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
!llvm.dbg.cu = !{!6}
!llvm.module.flags = !{!9, !10}
diff --git a/llvm/test/DebugInfo/X86/tu-to-non-named-type.ll b/llvm/test/DebugInfo/X86/tu-to-non-named-type.ll
index 02d9b92..0b2bf6b 100644
--- a/llvm/test/DebugInfo/X86/tu-to-non-named-type.ll
+++ b/llvm/test/DebugInfo/X86/tu-to-non-named-type.ll
@@ -42,7 +42,7 @@ entry:
; Function Attrs: nounwind readnone speculatable
declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
-attributes #0 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
attributes #1 = { nounwind readnone speculatable }
!llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/X86/void-typedef.ll b/llvm/test/DebugInfo/X86/void-typedef.ll
index 80f2467..c5d9c6c 100644
--- a/llvm/test/DebugInfo/X86/void-typedef.ll
+++ b/llvm/test/DebugInfo/X86/void-typedef.ll
@@ -52,7 +52,7 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
; Function Attrs: noreturn nounwind
declare void @llvm.trap() #2
-attributes #0 = { noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-features"="+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-features"="+x87" "use-soft-float"="false" }
attributes #1 = { nounwind readnone speculatable }
attributes #2 = { noreturn nounwind }
diff --git a/llvm/test/MC/AMDGPU/literals.s b/llvm/test/MC/AMDGPU/literals.s
index 3faea99..be4e0de 100644
--- a/llvm/test/MC/AMDGPU/literals.s
+++ b/llvm/test/MC/AMDGPU/literals.s
@@ -5,7 +5,8 @@
// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck %s --check-prefixes=GFX8PLUS,GFX89,GFX9
// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -show-encoding %s | FileCheck %s --check-prefixes=GFX8PLUS,GFX11
// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -show-encoding %s | FileCheck %s --check-prefixes=GFX8PLUS,GFX12XX,GFX12
-// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -show-encoding %s | FileCheck %s --check-prefixes=GFX8PLUS,GFX12XX,GFX1250
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -show-encoding %s | FileCheck %s --check-prefixes=GFX8PLUS,GFX12XX,GFX1250,GFX1250-ASM
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -show-encoding %s | %extract-encodings | llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -disassemble -show-encoding | FileCheck %s --check-prefixes=GFX8PLUS,GFX12XX,GFX1250,GFX1250-DIS
// RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti %s -filetype=null 2>&1 | FileCheck %s --check-prefixes=NOGCN,NOSICI,NOSI --implicit-check-not=error:
// RUN: not llvm-mc -triple=amdgcn -mcpu=bonaire %s -filetype=null 2>&1 | FileCheck %s --check-prefixes=NOGCN,NOSICI,NOCI --implicit-check-not=error:
@@ -197,7 +198,8 @@ v_fract_f64_e32 v[0:1], 1.0
v_fract_f64_e32 v[0:1], lit(1.0)
// GFX11: v_fract_f64_e32 v[0:1], lit(0x3ff00000) ; encoding: [0xff,0x7c,0x00,0x7e,0x00,0x00,0xf0,0x3f]
// GFX12: v_fract_f64_e32 v[0:1], lit(0x3ff00000) ; encoding: [0xff,0x7c,0x00,0x7e,0x00,0x00,0xf0,0x3f]
-// GFX1250: v_fract_f64_e32 v[0:1], lit(0x3ff00000) ; encoding: [0xfe,0x7c,0x00,0x7e,0x00,0x00,0xf0,0x3f,0x00,0x00,0x00,0x00]
+// GFX1250-ASM: v_fract_f64_e32 v[0:1], lit(0x3ff00000) ; encoding: [0xfe,0x7c,0x00,0x7e,0x00,0x00,0xf0,0x3f,0x00,0x00,0x00,0x00]
+// GFX1250-DIS: v_fract_f64_e32 v[0:1], lit64(0x3ff00000) ; encoding: [0xfe,0x7c,0x00,0x7e,0x00,0x00,0xf0,0x3f,0x00,0x00,0x00,0x00]
// GFX89: v_fract_f64_e32 v[0:1], lit(0x3ff00000) ; encoding: [0xff,0x64,0x00,0x7e,0x00,0x00,0xf0,0x3f]
// SICI: v_fract_f64_e32 v[0:1], lit(0x3ff00000) ; encoding: [0xff,0x7c,0x00,0x7e,0x00,0x00,0xf0,0x3f]
@@ -229,15 +231,15 @@ v_cos_f16_e32 v5.l, lit(1.0)
// NOGFX89: :[[@LINE-4]]:1: error: operands are not valid for this GPU or mode
// NOSICI: :[[@LINE-5]]:1: error: instruction not supported on this GPU
-v_tanh_bf16 v5, 1.0
-// GFX1250: v_tanh_bf16_e32 v5, 1.0 ; encoding: [0xf2,0x94,0x0a,0x7e]
+v_tanh_bf16 v5.l, 1.0
+// GFX1250: v_tanh_bf16_e32 v5.l, 1.0 ; encoding: [0xf2,0x94,0x0a,0x7e]
// NOGFX11: :[[@LINE-2]]:1: error: instruction not supported on this GPU
// NOGFX12: :[[@LINE-3]]:1: error: instruction not supported on this GPU
// NOGFX89: :[[@LINE-4]]:1: error: instruction not supported on this GPU
// NOSICI: :[[@LINE-5]]:1: error: instruction not supported on this GPU
-v_tanh_bf16 v5, lit(1.0)
-// GFX1250: v_tanh_bf16_e32 v5, lit(0x3f80) ; encoding: [0xff,0x94,0x0a,0x7e,0x80,0x3f,0x00,0x00]
+v_tanh_bf16 v5.l, lit(1.0)
+// GFX1250: v_tanh_bf16_e32 v5.l, lit(0x3f80) ; encoding: [0xff,0x94,0x0a,0x7e,0x80,0x3f,0x00,0x00]
// NOGFX11: :[[@LINE-2]]:1: error: instruction not supported on this GPU
// NOGFX12: :[[@LINE-3]]:1: error: instruction not supported on this GPU
// NOGFX89: :[[@LINE-4]]:1: error: instruction not supported on this GPU
@@ -291,11 +293,12 @@ v_cvt_pk_fp8_f16 v1.l, 1.0
// NOSICI: :[[@LINE-5]]:1: error: instruction not supported on this GPU
v_cvt_pk_fp8_f16 v1.l, lit(1.0)
-// GFX1250: v_cvt_pk_fp8_f16 v1.l, lit(0x3c00) ; encoding: [0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x00,0x3c,0x00,0x00]
-// NOGFX11: :[[@LINE-2]]:1: error: instruction not supported on this GPU
-// NOGFX12: :[[@LINE-3]]:1: error: instruction not supported on this GPU
-// NOGFX89: :[[@LINE-4]]:1: error: instruction not supported on this GPU
-// NOSICI: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+// GFX1250-ASM: v_cvt_pk_fp8_f16 v1.l, lit(0x3c00) ; encoding: [0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x00,0x3c,0x00,0x00]
+// GFX1250-DIS: v_cvt_pk_fp8_f16 v1.l, 0x3c00 ; encoding: [0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x00,0x3c,0x00,0x00]
+// NOGFX11: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+// NOSICI: :[[@LINE-6]]:1: error: instruction not supported on this GPU
//---------------------------------------------------------------------------//
// fp literal, expected int operand
@@ -430,10 +433,11 @@ v_and_b32_e32 v0, 2.3509886e-70, v1
v_not_b16 v5.l, 1.0
// GFX11: v_not_b16_e32 v5.l, 1.0 ; encoding: [0xf2,0xd2,0x0a,0x7e]
-// GFX1250: v_not_b16_e32 v5.l, 1.0 ; encoding: [0xf2,0xd2,0x0a,0x7e]
-// NOGFX12: :[[@LINE-3]]:1: error: operands are not valid for this GPU or mode
-// NOGFX89: :[[@LINE-4]]:1: error: instruction not supported on this GPU
-// NOSICI: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+// GFX1250-ASM: v_not_b16_e32 v5.l, 1.0 ; encoding: [0xf2,0xd2,0x0a,0x7e]
+// GFX1250-DIS: v_not_b16_e32 v5.l, 0x3c00 ; encoding: [0xff,0xd2,0x0a,0x7e,0x00,0x3c,0x00,0x00]
+// NOGFX12: :[[@LINE-4]]:1: error: operands are not valid for this GPU or mode
+// NOGFX89: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+// NOSICI: :[[@LINE-6]]:1: error: instruction not supported on this GPU
v_not_b16 v5.l, lit(1.0)
// GFX11: v_not_b16_e32 v5.l, lit(0x3f800000) ; encoding: [0xff,0xd2,0x0a,0x7e,0x00,0x00,0x80,0x3f]
@@ -501,7 +505,8 @@ v_fract_f64_e32 v[0:1], 1
v_fract_f64_e32 v[0:1], lit(1)
// GFX11: v_fract_f64_e32 v[0:1], lit(0x1) ; encoding: [0xff,0x7c,0x00,0x7e,0x01,0x00,0x00,0x00]
// GFX12: v_fract_f64_e32 v[0:1], lit(0x1) ; encoding: [0xff,0x7c,0x00,0x7e,0x01,0x00,0x00,0x00]
-// GFX1250: v_fract_f64_e32 v[0:1], lit(0x1) ; encoding: [0xfe,0x7c,0x00,0x7e,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
+// GFX1250-ASM: v_fract_f64_e32 v[0:1], lit(0x1) ; encoding: [0xfe,0x7c,0x00,0x7e,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
+// GFX1250-DIS: v_fract_f64_e32 v[0:1], lit64(0x1) ; encoding: [0xfe,0x7c,0x00,0x7e,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
// GFX89: v_fract_f64_e32 v[0:1], lit(0x1) ; encoding: [0xff,0x64,0x00,0x7e,0x01,0x00,0x00,0x00]
// SICI: v_fract_f64_e32 v[0:1], lit(0x1) ; encoding: [0xff,0x7c,0x00,0x7e,0x01,0x00,0x00,0x00]
@@ -678,15 +683,15 @@ v_cos_f16_e32 v5.l, lit(1)
// NOGFX89: :[[@LINE-4]]:1: error: operands are not valid for this GPU or mode
// NOSICI: :[[@LINE-5]]:1: error: instruction not supported on this GPU
-v_tanh_bf16 v5, 1
-// GFX1250: v_tanh_bf16_e32 v5, 1 ; encoding: [0x81,0x94,0x0a,0x7e]
+v_tanh_bf16 v5.l, 1
+// GFX1250: v_tanh_bf16_e32 v5.l, 1 ; encoding: [0x81,0x94,0x0a,0x7e]
// NOGFX11: :[[@LINE-2]]:1: error: instruction not supported on this GPU
// NOGFX12: :[[@LINE-3]]:1: error: instruction not supported on this GPU
// NOGFX89: :[[@LINE-4]]:1: error: instruction not supported on this GPU
// NOSICI: :[[@LINE-5]]:1: error: instruction not supported on this GPU
-v_tanh_bf16 v5, lit(1)
-// GFX1250: v_tanh_bf16_e32 v5, lit(0x1) ; encoding: [0xff,0x94,0x0a,0x7e,0x01,0x00,0x00,0x00]
+v_tanh_bf16 v5.l, lit(1)
+// GFX1250: v_tanh_bf16_e32 v5.l, lit(0x1) ; encoding: [0xff,0x94,0x0a,0x7e,0x01,0x00,0x00,0x00]
// NOGFX11: :[[@LINE-2]]:1: error: instruction not supported on this GPU
// NOGFX12: :[[@LINE-3]]:1: error: instruction not supported on this GPU
// NOGFX89: :[[@LINE-4]]:1: error: instruction not supported on this GPU
@@ -740,11 +745,12 @@ v_cvt_pk_fp8_f16 v1.l, 1
// NOSICI: :[[@LINE-5]]:1: error: instruction not supported on this GPU
v_cvt_pk_fp8_f16 v1.l, lit(1)
-// GFX1250: v_cvt_pk_fp8_f16 v1.l, lit(0x1) ; encoding: [0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x01,0x00,0x00,0x00]
-// NOGFX11: :[[@LINE-2]]:1: error: instruction not supported on this GPU
-// NOGFX12: :[[@LINE-3]]:1: error: instruction not supported on this GPU
-// NOGFX89: :[[@LINE-4]]:1: error: instruction not supported on this GPU
-// NOSICI: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+// GFX1250-ASM: v_cvt_pk_fp8_f16 v1.l, lit(0x1) ; encoding: [0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x01,0x00,0x00,0x00]
+// GFX1250-DIS: v_cvt_pk_fp8_f16 v1.l, 1 ; encoding: [0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x01,0x00,0x00,0x00]
+// NOGFX11: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+// NOSICI: :[[@LINE-6]]:1: error: instruction not supported on this GPU
//---------------------------------------------------------------------------//
// int literal, expected int operand
@@ -831,7 +837,8 @@ v_and_b32_e32 v0, -54321, v1
s_mov_b64_e32 s[0:1], 0xdeadbeef
// GFX11: s_mov_b64 s[0:1], 0xdeadbeef ; encoding: [0xff,0x01,0x80,0xbe,0xef,0xbe,0xad,0xde]
// GFX12: s_mov_b64 s[0:1], 0xdeadbeef ; encoding: [0xff,0x01,0x80,0xbe,0xef,0xbe,0xad,0xde]
-// GFX1250: s_mov_b64 s[0:1], 0xdeadbeef ; encoding: [0xfe,0x01,0x80,0xbe,0xef,0xbe,0xad,0xde,0x00,0x00,0x00,0x00]
+// GFX1250-ASM: s_mov_b64 s[0:1], 0xdeadbeef ; encoding: [0xfe,0x01,0x80,0xbe,0xef,0xbe,0xad,0xde,0x00,0x00,0x00,0x00]
+// GFX1250-DIS: s_mov_b64 s[0:1], lit64(0xdeadbeef) ; encoding: [0xfe,0x01,0x80,0xbe,0xef,0xbe,0xad,0xde,0x00,0x00,0x00,0x00]
// GFX89: s_mov_b64 s[0:1], 0xdeadbeef ; encoding: [0xff,0x01,0x80,0xbe,0xef,0xbe,0xad,0xde]
// SICI: s_mov_b64 s[0:1], 0xdeadbeef ; encoding: [0xff,0x04,0x80,0xbe,0xef,0xbe,0xad,0xde]
@@ -844,7 +851,8 @@ v_and_b32_e32 v0, 0xdeadbeef, v1
s_mov_b64_e32 s[0:1], 0xffffffff
// GFX11: s_mov_b64 s[0:1], 0xffffffff ; encoding: [0xff,0x01,0x80,0xbe,0xff,0xff,0xff,0xff]
// GFX12: s_mov_b64 s[0:1], 0xffffffff ; encoding: [0xff,0x01,0x80,0xbe,0xff,0xff,0xff,0xff]
-// GFX1250: s_mov_b64 s[0:1], 0xffffffff ; encoding: [0xfe,0x01,0x80,0xbe,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00]
+// GFX1250-ASM: s_mov_b64 s[0:1], 0xffffffff ; encoding: [0xfe,0x01,0x80,0xbe,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00]
+// GFX1250-DIS: s_mov_b64 s[0:1], lit64(0xffffffff) ; encoding: [0xfe,0x01,0x80,0xbe,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00]
// GFX89: s_mov_b64 s[0:1], 0xffffffff ; encoding: [0xff,0x01,0x80,0xbe,0xff,0xff,0xff,0xff]
// SICI: s_mov_b64 s[0:1], 0xffffffff ; encoding: [0xff,0x04,0x80,0xbe,0xff,0xff,0xff,0xff]
@@ -896,7 +904,8 @@ s_mov_b64 s[0:1], 1
s_mov_b64 s[0:1], lit(1)
// GFX11: s_mov_b64 s[0:1], lit(0x1) ; encoding: [0xff,0x01,0x80,0xbe,0x01,0x00,0x00,0x00]
// GFX12: s_mov_b64 s[0:1], lit(0x1) ; encoding: [0xff,0x01,0x80,0xbe,0x01,0x00,0x00,0x00]
-// GFX1250: s_mov_b64 s[0:1], lit(0x1) ; encoding: [0xfe,0x01,0x80,0xbe,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
+// GFX1250-ASM: s_mov_b64 s[0:1], lit(0x1) ; encoding: [0xfe,0x01,0x80,0xbe,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
+// GFX1250-DIS: s_mov_b64 s[0:1], lit64(0x1) ; encoding: [0xfe,0x01,0x80,0xbe,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
// GFX89: s_mov_b64 s[0:1], lit(0x1) ; encoding: [0xff,0x01,0x80,0xbe,0x01,0x00,0x00,0x00]
// SICI: s_mov_b64 s[0:1], lit(0x1) ; encoding: [0xff,0x04,0x80,0xbe,0x01,0x00,0x00,0x00]
@@ -1107,7 +1116,9 @@ v_trunc_f64 v[0:1], 0x1fffffff000
buffer_atomic_add v0, off, s[0:3], scc offset:4095
// GFX11: buffer_atomic_add_u32 v0, off, s[0:3], src_scc offset:4095 ; encoding: [0xff,0x0f,0xd4,0xe0,0x00,0x00,0x00,0xfd]
-// GFX12XX: buffer_atomic_add_u32 v0, off, s[0:3], src_scc offset:4095 ; encoding: [0x7d,0x40,0x0d,0xc4,0x00,0x00,0x80,0x00,0x00,0xff,0x0f,0x00]
+// GFX12: buffer_atomic_add_u32 v0, off, s[0:3], src_scc offset:4095 ; encoding: [0x7d,0x40,0x0d,0xc4,0x00,0x00,0x80,0x00,0x00,0xff,0x0f,0x00]
+// GFX1250-ASM: buffer_atomic_add_u32 v0, off, s[0:3], src_scc offset:4095 ; encoding: [0x7d,0x40,0x0d,0xc4,0x00,0x00,0x80,0x00,0x00,0xff,0x0f,0x00]
+// GFX1250-DIS: buffer_atomic_add_u32 v0, off, s[0:3], m0 offset:4095 ; encoding: [0x7d,0x40,0x0d,0xc4,0x00,0x00,0x80,0x00,0x00,0xff,0x0f,0x00]
// GFX89: buffer_atomic_add v0, off, s[0:3], src_scc offset:4095 ; encoding: [0xff,0x0f,0x08,0xe1,0x00,0x00,0x00,0xfd]
// SICI: buffer_atomic_add v0, off, s[0:3], src_scc offset:4095 ; encoding: [0xff,0x0f,0xc8,0xe0,0x00,0x00,0x00,0xfd]
@@ -1241,9 +1252,11 @@ v_ceil_f16 v0, neg(vccz)
v_ceil_f16 v0, abs(scc)
// GFX11: v_ceil_f16_e64 v0, |src_scc| ; encoding: [0x00,0x01,0xdc,0xd5,0xfd,0x00,0x00,0x00]
-// GFX12XX: v_ceil_f16_e64 v0, |src_scc| ; encoding: [0x00,0x01,0xdc,0xd5,0xfd,0x00,0x00,0x00]
+// GFX12: v_ceil_f16_e64 v0, |src_scc| ; encoding: [0x00,0x01,0xdc,0xd5,0xfd,0x00,0x00,0x00]
+// GFX1250-ASM: v_ceil_f16_e64 v0, |src_scc| ; encoding: [0x00,0x01,0xdc,0xd5,0xfd,0x00,0x00,0x00]
+// GFX1250-DIS: v_ceil_f16_e64 v0.l, |src_scc| ; encoding: [0x00,0x01,0xdc,0xd5,0xfd,0x00,0x00,0x00]
// GFX89: v_ceil_f16_e64 v0, |src_scc| ; encoding: [0x00,0x01,0x85,0xd1,0xfd,0x00,0x00,0x00]
-// NOSICI: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOSICI: :[[@LINE-6]]:1: error: instruction not supported on this GPU
// NOSICIVI: :[[@LINE-2]]:1: error: instruction not supported on this GPU
v_ceil_f64 v[5:6], |execz|
@@ -1461,10 +1474,12 @@ v_cmp_eq_i64 vcc, src_shared_base, v[0:1]
v_max_f16 v0, src_shared_base, v0
// GFX11: v_max_f16_e32 v0, src_shared_base, v0 ; encoding: [0xeb,0x00,0x00,0x72]
-// GFX12XX: v_max_num_f16_e32 v0, src_shared_base, v0 ; encoding: [0xeb,0x00,0x00,0x62]
+// GFX12: v_max_num_f16_e32 v0, src_shared_base, v0 ; encoding: [0xeb,0x00,0x00,0x62]
+// GFX1250-ASM: v_max_num_f16_e32 v0, src_shared_base, v0 ; encoding: [0xeb,0x00,0x00,0x62]
+// GFX1250-DIS: v_max_num_f16_e32 v0.l, src_shared_base, v0.l ; encoding: [0xeb,0x00,0x00,0x62]
// GFX9: v_max_f16_e32 v0, src_shared_base, v0 ; encoding: [0xeb,0x00,0x00,0x5a]
-// NOSICI: :[[@LINE-4]]:1: error: instruction not supported on this GPU
-// NOVI: :[[@LINE-5]]:15: error: src_shared_base register not available on this GPU
+// NOSICI: :[[@LINE-6]]:1: error: instruction not supported on this GPU
+// NOVI: :[[@LINE-7]]:15: error: src_shared_base register not available on this GPU
// NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
v_max_f32 v0, src_shared_base, v0
@@ -1493,18 +1508,22 @@ v_pk_add_f16 v0, src_shared_base, v0
v_ceil_f16 v0, neg(src_shared_base)
// GFX11: v_ceil_f16_e64 v0, -src_shared_base ; encoding: [0x00,0x00,0xdc,0xd5,0xeb,0x00,0x00,0x20]
-// GFX12XX: v_ceil_f16_e64 v0, -src_shared_base ; encoding: [0x00,0x00,0xdc,0xd5,0xeb,0x00,0x00,0x20]
+// GFX12: v_ceil_f16_e64 v0, -src_shared_base ; encoding: [0x00,0x00,0xdc,0xd5,0xeb,0x00,0x00,0x20]
+// GFX1250-ASM: v_ceil_f16_e64 v0, -src_shared_base ; encoding: [0x00,0x00,0xdc,0xd5,0xeb,0x00,0x00,0x20]
+// GFX1250-DIS: v_ceil_f16_e64 v0.l, -src_shared_base ; encoding: [0x00,0x00,0xdc,0xd5,0xeb,0x00,0x00,0x20]
// GFX9: v_ceil_f16_e64 v0, -src_shared_base ; encoding: [0x00,0x00,0x85,0xd1,0xeb,0x00,0x00,0x20]
-// NOSICI: :[[@LINE-4]]:1: error: instruction not supported on this GPU
-// NOVI: :[[@LINE-5]]:20: error: src_shared_base register not available on this GPU
+// NOSICI: :[[@LINE-6]]:1: error: instruction not supported on this GPU
+// NOVI: :[[@LINE-7]]:20: error: src_shared_base register not available on this GPU
// NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
v_ceil_f16 v0, abs(src_shared_base)
// GFX11: v_ceil_f16_e64 v0, |src_shared_base| ; encoding: [0x00,0x01,0xdc,0xd5,0xeb,0x00,0x00,0x00]
-// GFX12XX: v_ceil_f16_e64 v0, |src_shared_base| ; encoding: [0x00,0x01,0xdc,0xd5,0xeb,0x00,0x00,0x00]
+// GFX12: v_ceil_f16_e64 v0, |src_shared_base| ; encoding: [0x00,0x01,0xdc,0xd5,0xeb,0x00,0x00,0x00]
+// GFX1250-ASM: v_ceil_f16_e64 v0, |src_shared_base| ; encoding: [0x00,0x01,0xdc,0xd5,0xeb,0x00,0x00,0x00]
+// GFX1250-DIS: v_ceil_f16_e64 v0.l, |src_shared_base| ; encoding: [0x00,0x01,0xdc,0xd5,0xeb,0x00,0x00,0x00]
// GFX9: v_ceil_f16_e64 v0, |src_shared_base| ; encoding: [0x00,0x01,0x85,0xd1,0xeb,0x00,0x00,0x00]
-// NOSICI: :[[@LINE-4]]:1: error: instruction not supported on this GPU
-// NOVI: :[[@LINE-5]]:20: error: src_shared_base register not available on this GPU
+// NOSICI: :[[@LINE-6]]:1: error: instruction not supported on this GPU
+// NOVI: :[[@LINE-7]]:20: error: src_shared_base register not available on this GPU
// NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
v_ceil_f64 v[5:6], |src_shared_base|
@@ -1792,33 +1811,41 @@ v_pk_add_f16 v255, vccz, execz
v_sqrt_f32 v2, lit(123)
// GFX11: v_sqrt_f32_e32 v2, lit(0x7b) ; encoding: [0xff,0x66,0x04,0x7e,0x7b,0x00,0x00,0x00]
-// GFX12XX: v_sqrt_f32_e32 v2, lit(0x7b) ; encoding: [0xff,0x66,0x04,0x7e,0x7b,0x00,0x00,0x00]
+// GFX12: v_sqrt_f32_e32 v2, lit(0x7b) ; encoding: [0xff,0x66,0x04,0x7e,0x7b,0x00,0x00,0x00]
+// GFX1250-ASM: v_sqrt_f32_e32 v2, lit(0x7b) ; encoding: [0xff,0x66,0x04,0x7e,0x7b,0x00,0x00,0x00]
+// GFX1250-DIS: v_sqrt_f32_e32 v2, 0x7b ; encoding: [0xff,0x66,0x04,0x7e,0x7b,0x00,0x00,0x00]
// GFX89: v_sqrt_f32_e32 v2, lit(0x7b) ; encoding: [0xff,0x4e,0x04,0x7e,0x7b,0x00,0x00,0x00]
// SICI: v_sqrt_f32_e32 v2, lit(0x7b) ; encoding: [0xff,0x66,0x04,0x7e,0x7b,0x00,0x00,0x00]
v_sqrt_f32 v2, abs(lit(123))
// GFX11: v_sqrt_f32_e32 v2, lit(0x7b) ; encoding: [0xff,0x66,0x04,0x7e,0x7b,0x00,0x00,0x00]
-// GFX12XX: v_sqrt_f32_e32 v2, lit(0x7b) ; encoding: [0xff,0x66,0x04,0x7e,0x7b,0x00,0x00,0x00]
+// GFX12: v_sqrt_f32_e32 v2, lit(0x7b) ; encoding: [0xff,0x66,0x04,0x7e,0x7b,0x00,0x00,0x00]
+// GFX1250-ASM: v_sqrt_f32_e32 v2, lit(0x7b) ; encoding: [0xff,0x66,0x04,0x7e,0x7b,0x00,0x00,0x00]
+// GFX1250-DIS: v_sqrt_f32_e32 v2, 0x7b ; encoding: [0xff,0x66,0x04,0x7e,0x7b,0x00,0x00,0x00]
// GFX89: v_sqrt_f32_e32 v2, lit(0x7b) ; encoding: [0xff,0x4e,0x04,0x7e,0x7b,0x00,0x00,0x00]
// SICI: v_sqrt_f32_e32 v2, lit(0x7b) ; encoding: [0xff,0x66,0x04,0x7e,0x7b,0x00,0x00,0x00]
v_sqrt_f32 v2, lit(123.0)
// GFX11: v_sqrt_f32_e32 v2, lit(0x42f60000) ; encoding: [0xff,0x66,0x04,0x7e,0x00,0x00,0xf6,0x42]
-// GFX12XX: v_sqrt_f32_e32 v2, lit(0x42f60000) ; encoding: [0xff,0x66,0x04,0x7e,0x00,0x00,0xf6,0x42]
+// GFX12: v_sqrt_f32_e32 v2, lit(0x42f60000) ; encoding: [0xff,0x66,0x04,0x7e,0x00,0x00,0xf6,0x42]
+// GFX1250-ASM: v_sqrt_f32_e32 v2, lit(0x42f60000) ; encoding: [0xff,0x66,0x04,0x7e,0x00,0x00,0xf6,0x42]
+// GFX1250-DIS: v_sqrt_f32_e32 v2, 0x42f60000 ; encoding: [0xff,0x66,0x04,0x7e,0x00,0x00,0xf6,0x42]
// GFX89: v_sqrt_f32_e32 v2, lit(0x42f60000) ; encoding: [0xff,0x4e,0x04,0x7e,0x00,0x00,0xf6,0x42]
// SICI: v_sqrt_f32_e32 v2, lit(0x42f60000) ; encoding: [0xff,0x66,0x04,0x7e,0x00,0x00,0xf6,0x42]
v_sqrt_f64 v[2:3], lit(123.0)
// GFX11: v_sqrt_f64_e32 v[2:3], lit(0x405ec000) ; encoding: [0xff,0x68,0x04,0x7e,0x00,0xc0,0x5e,0x40]
// GFX12: v_sqrt_f64_e32 v[2:3], lit(0x405ec000) ; encoding: [0xff,0x68,0x04,0x7e,0x00,0xc0,0x5e,0x40]
-// GFX1250: v_sqrt_f64_e32 v[2:3], lit(0x405ec000) ; encoding: [0xfe,0x68,0x04,0x7e,0x00,0xc0,0x5e,0x40,0x00,0x00,0x00,0x00]
+// GFX1250-ASM: v_sqrt_f64_e32 v[2:3], lit(0x405ec000) ; encoding: [0xfe,0x68,0x04,0x7e,0x00,0xc0,0x5e,0x40,0x00,0x00,0x00,0x00]
+// GFX1250-DIS: v_sqrt_f64_e32 v[2:3], lit64(0x405ec000) ; encoding: [0xfe,0x68,0x04,0x7e,0x00,0xc0,0x5e,0x40,0x00,0x00,0x00,0x00]
// GFX89: v_sqrt_f64_e32 v[2:3], lit(0x405ec000) ; encoding: [0xff,0x50,0x04,0x7e,0x00,0xc0,0x5e,0x40]
// SICI: v_sqrt_f64_e32 v[2:3], lit(0x405ec000) ; encoding: [0xff,0x68,0x04,0x7e,0x00,0xc0,0x5e,0x40]
v_sqrt_f64 v[2:3], lit(123)
// GFX11: v_sqrt_f64_e32 v[2:3], lit(0x7b) ; encoding: [0xff,0x68,0x04,0x7e,0x7b,0x00,0x00,0x00]
// GFX12: v_sqrt_f64_e32 v[2:3], lit(0x7b) ; encoding: [0xff,0x68,0x04,0x7e,0x7b,0x00,0x00,0x00]
-// GFX1250: v_sqrt_f64_e32 v[2:3], lit(0x7b) ; encoding: [0xfe,0x68,0x04,0x7e,0x7b,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
+// GFX1250-ASM: v_sqrt_f64_e32 v[2:3], lit(0x7b) ; encoding: [0xfe,0x68,0x04,0x7e,0x7b,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
+// GFX1250-DIS: v_sqrt_f64_e32 v[2:3], lit64(0x7b) ; encoding: [0xfe,0x68,0x04,0x7e,0x7b,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
// GFX89: v_sqrt_f64_e32 v[2:3], lit(0x7b) ; encoding: [0xff,0x50,0x04,0x7e,0x7b,0x00,0x00,0x00]
// SICI: v_sqrt_f64_e32 v[2:3], lit(0x7b) ; encoding: [0xff,0x68,0x04,0x7e,0x7b,0x00,0x00,0x00]
diff --git a/llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-bookIII.txt b/llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-bookIII.txt
index 1f5df65..72c800f 100644
--- a/llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-bookIII.txt
+++ b/llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-bookIII.txt
@@ -111,9 +111,6 @@
# CHECK: tlbie 4
0x7c 0x00 0x22 0x64
-# CHECK: tlbie 4
-0x7c 0x00 0x22 0x64
-
# CHECK: rfi
0x4c 0x00 0x00 0x64
# CHECK: rfci
diff --git a/llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-p9vector.txt b/llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-p9.txt
index 1a79648..a857168 100644
--- a/llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-p9vector.txt
+++ b/llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-p9.txt
@@ -2,3 +2,9 @@
# CHECK: mtvsrdd 6, 0, 3
0x66 0x1b 0xc0 0x7c
+
+# CHECK: tlbie 8, 10
+0x64, 0x42, 0x40, 0x7d
+
+# CHECK: tlbie 8, 10, 2, 1, 0
+0x64, 0x42, 0x4a, 0x7d
diff --git a/llvm/test/Transforms/InstCombine/ptrtoaddr.ll b/llvm/test/Transforms/InstCombine/ptrtoaddr.ll
index f19cca8..a7434a2 100644
--- a/llvm/test/Transforms/InstCombine/ptrtoaddr.ll
+++ b/llvm/test/Transforms/InstCombine/ptrtoaddr.ll
@@ -4,6 +4,10 @@
; The ptrtoaddr folds are also valid for pointers that have external state.
target datalayout = "pe1:64:64:64:32"
+declare void @use.i1(i1)
+declare void @use.i32(i32)
+declare void @use.i64(i64)
+
; ptrtoaddr result type is fixed, and can't be combined with integer cast.
define i32 @ptrtoaddr_trunc(ptr %p) {
; CHECK-LABEL: define i32 @ptrtoaddr_trunc(
@@ -171,3 +175,65 @@ define i128 @sub_zext_ptrtoint_ptrtoaddr_addrsize(ptr addrspace(1) %p, i32 %offs
%sub = sub i128 %p2.addr.ext, %p.int.ext
ret i128 %sub
}
+
+; The uses in icmp, ptrtoint, ptrtoaddr should be replaced. The one in the
+; return value should not, as the provenance differs.
+define ptr @gep_sub_ptrtoaddr_different_obj(ptr %p, ptr %p2, ptr %p3) {
+; CHECK-LABEL: define ptr @gep_sub_ptrtoaddr_different_obj(
+; CHECK-SAME: ptr [[P:%.*]], ptr [[P2:%.*]], ptr [[P3:%.*]]) {
+; CHECK-NEXT: [[P_ADDR:%.*]] = ptrtoaddr ptr [[P]] to i64
+; CHECK-NEXT: [[P2_ADDR:%.*]] = ptrtoaddr ptr [[P2]] to i64
+; CHECK-NEXT: [[SUB:%.*]] = sub i64 [[P2_ADDR]], [[P_ADDR]]
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[P]], i64 [[SUB]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr [[P2]], [[P3]]
+; CHECK-NEXT: call void @use.i1(i1 [[CMP]])
+; CHECK-NEXT: [[INT:%.*]] = ptrtoint ptr [[P2]] to i64
+; CHECK-NEXT: call void @use.i64(i64 [[INT]])
+; CHECK-NEXT: [[ADDR:%.*]] = ptrtoaddr ptr [[P2]] to i64
+; CHECK-NEXT: call void @use.i64(i64 [[ADDR]])
+; CHECK-NEXT: ret ptr [[GEP]]
+;
+ %p.addr = ptrtoaddr ptr %p to i64
+ %p2.addr = ptrtoaddr ptr %p2 to i64
+ %sub = sub i64 %p2.addr, %p.addr
+ %gep = getelementptr i8, ptr %p, i64 %sub
+ %cmp = icmp eq ptr %gep, %p3
+ call void @use.i1(i1 %cmp)
+ %int = ptrtoint ptr %gep to i64
+ call void @use.i64(i64 %int)
+ %addr = ptrtoaddr ptr %gep to i64
+ call void @use.i64(i64 %addr)
+ ret ptr %gep
+}
+
+; The use in ptrtoaddr should be replaced. The uses in ptrtoint and icmp should
+; not be replaced, as the non-address bits differ. The use in the return value
+; should not be replaced as the provenace differs.
+define ptr addrspace(1) @gep_sub_ptrtoaddr_different_obj_addrsize(ptr addrspace(1) %p, ptr addrspace(1) %p2, ptr addrspace(1) %p3) {
+; CHECK-LABEL: define ptr addrspace(1) @gep_sub_ptrtoaddr_different_obj_addrsize(
+; CHECK-SAME: ptr addrspace(1) [[P:%.*]], ptr addrspace(1) [[P2:%.*]], ptr addrspace(1) [[P3:%.*]]) {
+; CHECK-NEXT: [[P_ADDR:%.*]] = ptrtoaddr ptr addrspace(1) [[P]] to i32
+; CHECK-NEXT: [[P2_ADDR:%.*]] = ptrtoaddr ptr addrspace(1) [[P2]] to i32
+; CHECK-NEXT: [[SUB:%.*]] = sub i32 [[P2_ADDR]], [[P_ADDR]]
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr addrspace(1) [[P]], i32 [[SUB]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr addrspace(1) [[GEP]], [[P3]]
+; CHECK-NEXT: call void @use.i1(i1 [[CMP]])
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[GEP]] to i64
+; CHECK-NEXT: [[INT:%.*]] = trunc i64 [[TMP1]] to i32
+; CHECK-NEXT: call void @use.i32(i32 [[INT]])
+; CHECK-NEXT: [[ADDR:%.*]] = ptrtoaddr ptr addrspace(1) [[P2]] to i32
+; CHECK-NEXT: call void @use.i32(i32 [[ADDR]])
+; CHECK-NEXT: ret ptr addrspace(1) [[GEP]]
+;
+ %p.addr = ptrtoaddr ptr addrspace(1) %p to i32
+ %p2.addr = ptrtoaddr ptr addrspace(1) %p2 to i32
+ %sub = sub i32 %p2.addr, %p.addr
+ %gep = getelementptr i8, ptr addrspace(1) %p, i32 %sub
+ %cmp = icmp eq ptr addrspace(1) %gep, %p3
+ call void @use.i1(i1 %cmp)
+ %int = ptrtoint ptr addrspace(1) %gep to i32
+ call void @use.i32(i32 %int)
+ %addr = ptrtoaddr ptr addrspace(1) %gep to i32
+ call void @use.i32(i32 %addr)
+ ret ptr addrspace(1) %gep
+}
diff --git a/llvm/test/Transforms/SimplifyCFG/RISCV/switch-of-powers-of-two.ll b/llvm/test/Transforms/SimplifyCFG/RISCV/switch-of-powers-of-two.ll
index c2e632d..b1cf0e4 100644
--- a/llvm/test/Transforms/SimplifyCFG/RISCV/switch-of-powers-of-two.ll
+++ b/llvm/test/Transforms/SimplifyCFG/RISCV/switch-of-powers-of-two.ll
@@ -61,30 +61,49 @@ return:
ret i32 %p
}
-; Check that switch's of powers of two range is not reduced if default case is reachable
+; Check that switch's of powers of two range with the default case reachable is reduced
+; w/ Zbb enabled, by jumping non-power-of-two inputs to the default block.
define i32 @switch_of_powers_reachable_default(i32 %x) {
-; CHECK-LABEL: @switch_of_powers_reachable_default(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: switch i32 [[X:%.*]], label [[RETURN:%.*]] [
-; CHECK-NEXT: i32 1, label [[BB1:%.*]]
-; CHECK-NEXT: i32 8, label [[BB2:%.*]]
-; CHECK-NEXT: i32 16, label [[BB3:%.*]]
-; CHECK-NEXT: i32 32, label [[BB4:%.*]]
-; CHECK-NEXT: i32 64, label [[BB5:%.*]]
-; CHECK-NEXT: ]
-; CHECK: bb1:
-; CHECK-NEXT: br label [[RETURN]]
-; CHECK: bb2:
-; CHECK-NEXT: br label [[RETURN]]
-; CHECK: bb3:
-; CHECK-NEXT: br label [[RETURN]]
-; CHECK: bb4:
-; CHECK-NEXT: br label [[RETURN]]
-; CHECK: bb5:
-; CHECK-NEXT: br label [[RETURN]]
-; CHECK: return:
-; CHECK-NEXT: [[P:%.*]] = phi i32 [ 3, [[BB1]] ], [ 2, [[BB2]] ], [ 1, [[BB3]] ], [ 0, [[BB4]] ], [ 42, [[BB5]] ], [ -1, [[ENTRY:%.*]] ]
-; CHECK-NEXT: ret i32 [[P]]
+; RV64I-LABEL: @switch_of_powers_reachable_default(
+; RV64I-NEXT: entry:
+; RV64I-NEXT: switch i32 [[X:%.*]], label [[RETURN:%.*]] [
+; RV64I-NEXT: i32 1, label [[BB1:%.*]]
+; RV64I-NEXT: i32 8, label [[BB2:%.*]]
+; RV64I-NEXT: i32 16, label [[BB3:%.*]]
+; RV64I-NEXT: i32 32, label [[BB4:%.*]]
+; RV64I-NEXT: i32 64, label [[BB5:%.*]]
+; RV64I-NEXT: ]
+; RV64I: bb1:
+; RV64I-NEXT: br label [[RETURN]]
+; RV64I: bb2:
+; RV64I-NEXT: br label [[RETURN]]
+; RV64I: bb3:
+; RV64I-NEXT: br label [[RETURN]]
+; RV64I: bb4:
+; RV64I-NEXT: br label [[RETURN]]
+; RV64I: bb5:
+; RV64I-NEXT: br label [[RETURN]]
+; RV64I: return:
+; RV64I-NEXT: [[P:%.*]] = phi i32 [ 3, [[BB1]] ], [ 2, [[BB2]] ], [ 1, [[BB3]] ], [ 0, [[BB4]] ], [ 42, [[BB5]] ], [ -1, [[ENTRY:%.*]] ]
+; RV64I-NEXT: ret i32 [[P]]
+;
+; RV64ZBB-LABEL: @switch_of_powers_reachable_default(
+; RV64ZBB-NEXT: entry:
+; RV64ZBB-NEXT: [[TMP0:%.*]] = call i32 @llvm.ctpop.i32(i32 [[X:%.*]])
+; RV64ZBB-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 1
+; RV64ZBB-NEXT: br i1 [[TMP1]], label [[ENTRY_SPLIT:%.*]], label [[RETURN:%.*]]
+; RV64ZBB: entry.split:
+; RV64ZBB-NEXT: [[TMP2:%.*]] = call i32 @llvm.cttz.i32(i32 [[X]], i1 true)
+; RV64ZBB-NEXT: [[TMP3:%.*]] = icmp ult i32 [[TMP2]], 7
+; RV64ZBB-NEXT: br i1 [[TMP3]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN]]
+; RV64ZBB: switch.lookup:
+; RV64ZBB-NEXT: [[TMP4:%.*]] = zext nneg i32 [[TMP2]] to i64
+; RV64ZBB-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [7 x i32], ptr @switch.table.switch_of_powers_reachable_default, i64 0, i64 [[TMP4]]
+; RV64ZBB-NEXT: [[SWITCH_LOAD:%.*]] = load i32, ptr [[SWITCH_GEP]], align 4
+; RV64ZBB-NEXT: br label [[RETURN]]
+; RV64ZBB: return:
+; RV64ZBB-NEXT: [[P:%.*]] = phi i32 [ -1, [[ENTRY:%.*]] ], [ -1, [[ENTRY_SPLIT]] ], [ [[SWITCH_LOAD]], [[SWITCH_LOOKUP]] ]
+; RV64ZBB-NEXT: ret i32 [[P]]
;
entry:
switch i32 %x, label %default_case [
diff --git a/llvm/test/Transforms/SimplifyCFG/X86/switch-of-powers-of-two.ll b/llvm/test/Transforms/SimplifyCFG/X86/switch-of-powers-of-two.ll
index 49eb199..aa95b3f 100644
--- a/llvm/test/Transforms/SimplifyCFG/X86/switch-of-powers-of-two.ll
+++ b/llvm/test/Transforms/SimplifyCFG/X86/switch-of-powers-of-two.ll
@@ -34,3 +34,97 @@ return:
%phi = phi i32 [ 3, %bb1 ], [ 2, %bb2 ], [ 1, %bb3 ], [ 0, %bb4 ], [ 42, %bb5 ]
ret i32 %phi
}
+
+define i32 @switch_of_powers_two_default_reachable(i32 %arg) {
+; CHECK-LABEL: define i32 @switch_of_powers_two_default_reachable(
+; CHECK-SAME: i32 [[ARG:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.ctpop.i32(i32 [[ARG]])
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 1
+; CHECK-NEXT: br i1 [[TMP1]], label %[[ENTRY_SPLIT:.*]], label %[[RETURN:.*]]
+; CHECK: [[ENTRY_SPLIT]]:
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.cttz.i32(i32 [[ARG]], i1 true)
+; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i32 [[TMP2]], 7
+; CHECK-NEXT: br i1 [[TMP3]], label %[[SWITCH_LOOKUP:.*]], label %[[RETURN]]
+; CHECK: [[SWITCH_LOOKUP]]:
+; CHECK-NEXT: [[TMP4:%.*]] = zext nneg i32 [[TMP2]] to i64
+; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [7 x i32], ptr @switch.table.switch_of_powers_two_default_reachable, i64 0, i64 [[TMP4]]
+; CHECK-NEXT: [[SWITCH_LOAD:%.*]] = load i32, ptr [[SWITCH_GEP]], align 4
+; CHECK-NEXT: br label %[[RETURN]]
+; CHECK: [[RETURN]]:
+; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ 5, %[[ENTRY]] ], [ 5, %[[ENTRY_SPLIT]] ], [ [[SWITCH_LOAD]], %[[SWITCH_LOOKUP]] ]
+; CHECK-NEXT: ret i32 [[PHI]]
+;
+entry:
+ switch i32 %arg, label %default_case [
+ i32 1, label %bb1
+ i32 8, label %bb2
+ i32 16, label %bb3
+ i32 32, label %bb4
+ i32 64, label %bb5
+ ]
+
+default_case: br label %return
+bb1: br label %return
+bb2: br label %return
+bb3: br label %return
+bb4: br label %return
+bb5: br label %return
+
+return:
+ %phi = phi i32 [ 3, %bb1 ], [ 2, %bb2 ], [ 1, %bb3 ], [ 0, %bb4 ], [ 42, %bb5 ], [ 5, %default_case ]
+ ret i32 %phi
+}
+
+define i32 @switch_of_powers_two_default_reachable_multipreds(i32 %arg, i1 %cond) {
+; CHECK-LABEL: define i32 @switch_of_powers_two_default_reachable_multipreds(
+; CHECK-SAME: i32 [[ARG:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br i1 [[COND]], label %[[SWITCH:.*]], label %[[RETURN:.*]]
+; CHECK: [[SWITCH]]:
+; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.ctpop.i32(i32 [[ARG]])
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 1
+; CHECK-NEXT: br i1 [[TMP1]], label %[[SWITCH_SPLIT:.*]], label %[[RETURN]]
+; CHECK: [[SWITCH_SPLIT]]:
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.cttz.i32(i32 [[ARG]], i1 true)
+; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i32 [[TMP2]], 7
+; CHECK-NEXT: [[SWITCH_MASKINDEX:%.*]] = trunc i32 [[TMP2]] to i8
+; CHECK-NEXT: [[SWITCH_SHIFTED:%.*]] = lshr i8 121, [[SWITCH_MASKINDEX]]
+; CHECK-NEXT: [[SWITCH_LOBIT:%.*]] = trunc i8 [[SWITCH_SHIFTED]] to i1
+; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[TMP3]], i1 [[SWITCH_LOBIT]], i1 false
+; CHECK-NEXT: br i1 [[OR_COND]], label %[[SWITCH_LOOKUP:.*]], label %[[RETURN]]
+; CHECK: [[SWITCH_LOOKUP]]:
+; CHECK-NEXT: [[TMP4:%.*]] = zext nneg i32 [[TMP2]] to i64
+; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [7 x i32], ptr @switch.table.switch_of_powers_two_default_reachable_multipreds, i64 0, i64 [[TMP4]]
+; CHECK-NEXT: [[SWITCH_LOAD:%.*]] = load i32, ptr [[SWITCH_GEP]], align 4
+; CHECK-NEXT: br label %[[RETURN]]
+; CHECK: [[RETURN]]:
+; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[ARG]], %[[SWITCH_SPLIT]] ], [ [[ARG]], %[[SWITCH]] ], [ [[SWITCH_LOAD]], %[[SWITCH_LOOKUP]] ]
+; CHECK-NEXT: ret i32 [[PHI]]
+;
+entry:
+ br i1 %cond, label %switch, label %default_case
+
+switch:
+ switch i32 %arg, label %default_case [
+ i32 1, label %bb1
+ i32 8, label %bb2
+ i32 16, label %bb3
+ i32 32, label %bb4
+ i32 64, label %bb5
+ ]
+
+default_case:
+ %pn = phi i32 [ 0, %entry ], [ %arg, %switch ]
+ br label %return
+
+bb1: br label %return
+bb2: br label %return
+bb3: br label %return
+bb4: br label %return
+bb5: br label %return
+
+return:
+ %phi = phi i32 [ 3, %bb1 ], [ 2, %bb2 ], [ 1, %bb3 ], [ 0, %bb4 ], [ 42, %bb5 ], [ %pn, %default_case ]
+ ret i32 %phi
+}
diff --git a/llvm/unittests/TargetParser/TargetParserTest.cpp b/llvm/unittests/TargetParser/TargetParserTest.cpp
index ef6aeae..759109a 100644
--- a/llvm/unittests/TargetParser/TargetParserTest.cpp
+++ b/llvm/unittests/TargetParser/TargetParserTest.cpp
@@ -518,6 +518,11 @@ INSTANTIATE_TEST_SUITE_P(
ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP | ARM::AEK_MVE | ARM::AEK_FP |
ARM::AEK_RAS | ARM::AEK_LOB | ARM::AEK_FP16 | ARM::AEK_PACBTI,
"8.1-M.Mainline"),
+ ARMCPUTestParams<uint64_t>(
+ "star-mc3", "armv8.1-m.main", "fp-armv8-fullfp16-d16",
+ ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP | ARM::AEK_MVE | ARM::AEK_FP |
+ ARM::AEK_RAS | ARM::AEK_LOB | ARM::AEK_FP16 | ARM::AEK_PACBTI,
+ "8.1-M.Mainline"),
ARMCPUTestParams<uint64_t>("iwmmxt", "iwmmxt", "none", ARM::AEK_NONE,
"iwmmxt"),
ARMCPUTestParams<uint64_t>("xscale", "xscale", "none", ARM::AEK_NONE,
@@ -528,7 +533,7 @@ INSTANTIATE_TEST_SUITE_P(
"7-S")),
ARMCPUTestParams<uint64_t>::PrintToStringParamName);
-static constexpr unsigned NumARMCPUArchs = 94;
+static constexpr unsigned NumARMCPUArchs = 95;
TEST(TargetParserTest, testARMCPUArchList) {
SmallVector<StringRef, NumARMCPUArchs> List;
diff --git a/llvm/utils/update_mc_test_checks.py b/llvm/utils/update_mc_test_checks.py
index 67fff56..791ff0d 100755
--- a/llvm/utils/update_mc_test_checks.py
+++ b/llvm/utils/update_mc_test_checks.py
@@ -24,14 +24,16 @@ ERROR_CHECK_RE = re.compile(r"# COM: .*")
OUTPUT_SKIPPED_RE = re.compile(r"(.text)")
COMMENT = {"asm": "//", "dasm": "#"}
+SUBSTITUTIONS = [
+ ("%extract-encodings", "sed -n 's/.*encoding://p'"),
+]
+
def invoke_tool(exe, check_rc, cmd_args, testline, verbose=False):
- if isinstance(cmd_args, list):
- args = [applySubstitutions(a, substitutions) for a in cmd_args]
- else:
- args = cmd_args
+ substs = SUBSTITUTIONS + [(t, exe) for t in mc_LIKE_TOOLS]
+ args = [common.applySubstitutions(cmd, substs) for cmd in cmd_args.split("|")]
- cmd = 'echo "' + testline + '" | ' + exe + " " + args
+ cmd = 'echo "' + testline + '" | ' + exe + " " + " | ".join(args)
if verbose:
print("Command: ", cmd)
@@ -210,9 +212,6 @@ def update_test(ti: common.TestInfo):
testlines = list(dict.fromkeys(testlines))
common.debug("Valid test line found: ", len(testlines))
- run_list_size = len(run_list)
- testnum = len(testlines)
-
raw_output = []
raw_prefixes = []
for (
@@ -254,14 +253,12 @@ def update_test(ti: common.TestInfo):
prefix_set = set([prefix for p in run_list for prefix in p[0]])
common.debug("Rewriting FileCheck prefixes:", str(prefix_set))
- for test_id in range(testnum):
- input_line = testlines[test_id]
-
+ for test_id, input_line in enumerate(testlines):
# a {prefix : output, [runid] } dict
# insert output to a prefix-key dict, and do a max sorting
# to select the most-used prefix which share the same output string
p_dict = {}
- for run_id in range(run_list_size):
+ for run_id in range(len(run_list)):
out = raw_output[run_id][test_id]
if hasErr(out):
@@ -269,45 +266,34 @@ def update_test(ti: common.TestInfo):
else:
o = getOutputString(out)
- prefixes = raw_prefixes[run_id]
-
- for p in prefixes:
+ for p in raw_prefixes[run_id]:
if p not in p_dict:
p_dict[p] = o, [run_id]
- else:
- if p_dict[p] == (None, []):
- continue
+ continue
- prev_o, run_ids = p_dict[p]
- if o == prev_o:
- run_ids.append(run_id)
- p_dict[p] = o, run_ids
- else:
- # conflict, discard
- p_dict[p] = None, []
+ if p_dict[p] == (None, []):
+ continue
- p_dict_sorted = dict(sorted(p_dict.items(), key=lambda item: -len(item[1][1])))
+ prev_o, run_ids = p_dict[p]
+ if o == prev_o:
+ run_ids.append(run_id)
+ p_dict[p] = o, run_ids
+ else:
+ # conflict, discard
+ p_dict[p] = None, []
# prefix is selected and generated with most shared output lines
# each run_id can only be used once
- used_runid = set()
-
+ used_run_ids = set()
selected_prefixes = set()
- for prefix, tup in p_dict_sorted.items():
- o, run_ids = tup
-
- if len(run_ids) == 0:
- continue
-
- skip = False
- for i in run_ids:
- if i in used_runid:
- skip = True
- else:
- used_runid.add(i)
- if not skip:
+ get_num_runs = lambda item: len(item[1][1])
+ p_dict_sorted = sorted(p_dict.items(), key=get_num_runs, reverse=True)
+ for prefix, (o, run_ids) in p_dict_sorted:
+ if run_ids and used_run_ids.isdisjoint(run_ids):
selected_prefixes.add(prefix)
+ used_run_ids.update(run_ids)
+
# Generate check lines in alphabetical order.
check_lines = []
for prefix in sorted(selected_prefixes):