aboutsummaryrefslogtreecommitdiff
path: root/llvm/test
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test')
-rw-r--r--llvm/test/Analysis/CostModel/AArch64/extract-last-active.ll216
-rw-r--r--llvm/test/Analysis/DependenceAnalysis/GCD.ll6
-rw-r--r--llvm/test/Analysis/DependenceAnalysis/SymbolicSIV.ll4
-rw-r--r--llvm/test/Analysis/DependenceAnalysis/compute-absolute-value.ll2
-rw-r--r--llvm/test/Analysis/DependenceAnalysis/gcd-miv-overflow.ll63
-rw-r--r--llvm/test/Analysis/DependenceAnalysis/strong-siv-overflow.ll68
-rw-r--r--llvm/test/Analysis/DependenceAnalysis/symbolic-rdiv-overflow.ll137
-rw-r--r--llvm/test/Analysis/DependenceAnalysis/weak-crossing-siv-overflow.ll125
-rw-r--r--llvm/test/Analysis/DependenceAnalysis/weak-zero-siv-overflow.ll122
-rw-r--r--llvm/test/Analysis/HashRecognize/cyclic-redundancy-check.ll81
-rw-r--r--llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll17
-rw-r--r--llvm/test/CodeGen/AArch64/GlobalISel/combine-freeze.mir47
-rw-r--r--llvm/test/CodeGen/AArch64/arm64-srl-and.ll42
-rw-r--r--llvm/test/CodeGen/AArch64/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll414
-rw-r--r--llvm/test/CodeGen/AArch64/hoist-and-by-const-from-shl-in-eqcmp-zero.ll424
-rw-r--r--llvm/test/CodeGen/AArch64/llvm.sincos.ll195
-rw-r--r--llvm/test/CodeGen/AArch64/signbit-test.ll22
-rw-r--r--llvm/test/CodeGen/AArch64/signed-truncation-check.ll434
-rw-r--r--llvm/test/CodeGen/AArch64/stackmap.ll4
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/add.ll612
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/fadd.ll165
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-scc-vcc.ll66
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-scc-vcc.mir37
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.abs.ll199
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ballot.i32.ll63
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ballot.i64.ll67
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.mir524
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.s16.mir19
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.v2s16.mir24
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sext.mir8
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sub.mir479
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-zext.mir8
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/sub.ll535
-rw-r--r--llvm/test/CodeGen/AMDGPU/amdgpu-miscellaneous-uniform-intrinsic.ll173
-rw-r--r--llvm/test/CodeGen/AMDGPU/amdgpu-simplify-uniform-waterfall.ll1
-rw-r--r--llvm/test/CodeGen/AMDGPU/amdgpu-uniform-intrinsic-combine.ll1
-rw-r--r--llvm/test/CodeGen/AMDGPU/amdgpu-uniform-temporal-divergence.ll1
-rw-r--r--llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-wwm.ll18
-rw-r--r--llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll6
-rw-r--r--llvm/test/CodeGen/AMDGPU/llc-pipeline.ll23
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i32.ll79
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i64.ll70
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane64.ll75
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane64.ptr.ll77
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.amdgcn.readfirstlane.ll48
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.amdgcn.readlane.ll49
-rw-r--r--llvm/test/CodeGen/AMDGPU/promote-alloca-array-to-vector.ll325
-rw-r--r--llvm/test/CodeGen/AMDGPU/readcyclecounter.ll12
-rw-r--r--llvm/test/CodeGen/AMDGPU/schedule-amdgpu-trackers.ll6
-rw-r--r--llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr-update-regscavenger.ll23
-rw-r--r--llvm/test/CodeGen/AMDGPU/splitkit-getsubrangeformask.ll198
-rw-r--r--llvm/test/CodeGen/AMDGPU/umin-sub-to-usubo-select-combine.ll236
-rw-r--r--llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250-t16.mir4
-rw-r--r--llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250.mir142
-rw-r--r--llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll2
-rw-r--r--llvm/test/CodeGen/AMDGPU/wait-xcnt.mir176
-rw-r--r--llvm/test/CodeGen/AMDGPU/whole-wave-functions.ll36
-rw-r--r--llvm/test/CodeGen/AMDGPU/wqm.ll18
-rw-r--r--llvm/test/CodeGen/ARM/fp-intrinsics.ll169
-rw-r--r--llvm/test/CodeGen/ARM/fp16-fullfp16.ll968
-rw-r--r--llvm/test/CodeGen/ARM/strict-fp-func.ll13
-rw-r--r--llvm/test/CodeGen/ARM/strict-fp-int-promote.ll159
-rw-r--r--llvm/test/CodeGen/ARM/strict-fp-ops.ll202
-rw-r--r--llvm/test/CodeGen/ARM/strictfp_f16_abi_promote.ll270
-rw-r--r--llvm/test/CodeGen/Hexagon/isel-fclass.ll86
-rw-r--r--llvm/test/CodeGen/Hexagon/isel/trunc-vNi1-HVX.ll18
-rw-r--r--llvm/test/CodeGen/LoongArch/ir-instruction/flog2.ll8
-rw-r--r--llvm/test/CodeGen/LoongArch/lasx/ctpop-ctlz.ll63
-rw-r--r--llvm/test/CodeGen/LoongArch/lasx/fp-rounding.ll132
-rw-r--r--llvm/test/CodeGen/LoongArch/lasx/ir-instruction/avg.ll321
-rw-r--r--llvm/test/CodeGen/LoongArch/lasx/ir-instruction/avgfloor-ceil.ll379
-rw-r--r--llvm/test/CodeGen/LoongArch/lasx/ir-instruction/flog2.ll258
-rw-r--r--llvm/test/CodeGen/LoongArch/lsx/ctpop-ctlz.ll63
-rw-r--r--llvm/test/CodeGen/LoongArch/lsx/fp-rounding.ll132
-rw-r--r--llvm/test/CodeGen/LoongArch/lsx/ir-instruction/avg.ll321
-rw-r--r--llvm/test/CodeGen/LoongArch/lsx/ir-instruction/avgfloor-ceil.ll379
-rw-r--r--llvm/test/CodeGen/LoongArch/lsx/ir-instruction/flog2.ll156
-rw-r--r--llvm/test/CodeGen/LoongArch/sink-fold-addi.ll758
-rw-r--r--llvm/test/CodeGen/MIR2Vec/Inputs/reference_x86_vocab_print.txt22
-rw-r--r--llvm/test/CodeGen/MIR2Vec/Inputs/reference_x86_vocab_wo=0.5_print.txt22
-rw-r--r--llvm/test/CodeGen/MLRegAlloc/dev-mode-extra-features-logging.ll2
-rw-r--r--llvm/test/CodeGen/NVPTX/cp-async-bulk-tensor-g2s-1cta.ll4
-rw-r--r--llvm/test/CodeGen/NVPTX/cp-async-bulk-tensor-g2s-2cta.ll4
-rw-r--r--llvm/test/CodeGen/NVPTX/cp-async-bulk-tensor-g2s-gather4.ll4
-rw-r--r--llvm/test/CodeGen/NVPTX/cp-async-bulk-tensor-g2s-im2colw.ll4
-rw-r--r--llvm/test/CodeGen/NVPTX/cp-async-bulk-tensor-g2s-im2colw128.ll4
-rw-r--r--llvm/test/CodeGen/NVPTX/cp-async-bulk-tensor-g2s.ll68
-rw-r--r--llvm/test/CodeGen/NVPTX/f16-ex2.ll40
-rw-r--r--llvm/test/CodeGen/NVPTX/f32-ex2.ll7
-rw-r--r--llvm/test/CodeGen/PowerPC/combine-sext-and-shl-after-isel.ll100
-rw-r--r--llvm/test/CodeGen/RISCV/GlobalISel/rvv/vse.ll1575
-rw-r--r--llvm/test/CodeGen/RISCV/GlobalISel/rvv/vsm.ll139
-rw-r--r--llvm/test/CodeGen/RISCV/GlobalISel/rvv/vsse.ll1724
-rw-r--r--llvm/test/CodeGen/RISCV/features-info.ll1
-rw-r--r--llvm/test/CodeGen/RISCV/rv64-stackmap.ll118
-rw-r--r--llvm/test/CodeGen/RISCV/rv64p.ll6
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll253
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/pr165232.ll244
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir12
-rw-r--r--llvm/test/CodeGen/RISCV/short-forward-branch-opt-min-max.ll703
-rw-r--r--llvm/test/CodeGen/SPIRV/hlsl-resources/issue-146942-ptr-cast.ll4
-rw-r--r--llvm/test/CodeGen/SPIRV/pointers/ptrcast-bitcast.ll22
-rw-r--r--llvm/test/CodeGen/SystemZ/stackmap.ll4
-rw-r--r--llvm/test/CodeGen/Thumb2/LowOverheadLoops/minloop.ll70
-rw-r--r--llvm/test/CodeGen/X86/amx-tf32-internal.ll7
-rw-r--r--llvm/test/CodeGen/X86/amx-tf32-intrinsics.ll12
-rwxr-xr-xllvm/test/CodeGen/X86/amx_movrs_transpose_intrinsics.ll122
-rw-r--r--llvm/test/CodeGen/X86/amx_tile_pair_O2_to_O0.ll136
-rw-r--r--llvm/test/CodeGen/X86/amx_tile_pair_configure_O0.mir165
-rw-r--r--llvm/test/CodeGen/X86/amx_tile_pair_configure_O2.mir153
-rw-r--r--llvm/test/CodeGen/X86/amx_tile_pair_copy.mir97
-rw-r--r--llvm/test/CodeGen/X86/amx_tile_pair_lower_type_O0.ll87
-rw-r--r--llvm/test/CodeGen/X86/amx_tile_pair_lower_type_O2.ll61
-rw-r--r--llvm/test/CodeGen/X86/amx_tile_pair_preconfigure_O0.mir134
-rw-r--r--llvm/test/CodeGen/X86/amx_tile_pair_preconfigure_O2.mir113
-rw-r--r--llvm/test/CodeGen/X86/amx_transpose_intrinsics.ll371
-rw-r--r--llvm/test/CodeGen/X86/basic-block-sections-bb-hash.ll39
-rw-r--r--llvm/test/CodeGen/X86/basic-block-sections-clusters-error.ll14
-rw-r--r--llvm/test/CodeGen/X86/bittest-big-integer.ll276
-rw-r--r--llvm/test/CodeGen/X86/ipra-reg-usage.ll4
-rw-r--r--llvm/test/CodeGen/X86/ldexp-avx512.ll467
-rw-r--r--llvm/test/CodeGen/X86/pr165755.ll26
-rw-r--r--llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll84
-rw-r--r--llvm/test/DebugInfo/Generic/objc-property.ll26
-rwxr-xr-xllvm/test/DebugInfo/PDB/Native/pdb-native-index-overflow.test13
-rw-r--r--llvm/test/Instrumentation/TypeSanitizer/basic_outlined.ll68
-rw-r--r--llvm/test/Instrumentation/TypeSanitizer/basic_verify_outlined.ll736
-rw-r--r--llvm/test/Instrumentation/TypeSanitizer/globals_outlined.ll24
-rw-r--r--llvm/test/MC/AMDGPU/gfx1250_asm_vimage.s16
-rw-r--r--llvm/test/MC/AMDGPU/gfx12_asm_vop1.s4
-rw-r--r--llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vimage.txt32
-rwxr-xr-xllvm/test/MC/Disassembler/X86/AMX/x86-64-amx-movrs.txt128
-rw-r--r--llvm/test/MC/Disassembler/X86/AMX/x86-64-amx-tf32.txt8
-rw-r--r--llvm/test/MC/Disassembler/X86/amx-transpose-att.txt154
-rwxr-xr-xllvm/test/MC/X86/AMX/x86-64-amx-movrs-att.s128
-rwxr-xr-xllvm/test/MC/X86/AMX/x86-64-amx-movrs-intel.s128
-rw-r--r--llvm/test/MC/X86/AMX/x86-64-amx-tf32-att.s7
-rw-r--r--llvm/test/MC/X86/AMX/x86-64-amx-tf32-intel.s7
-rw-r--r--llvm/test/MC/X86/amx-transpose-att.s153
-rw-r--r--llvm/test/MC/X86/amx-transpose-intel.s153
-rw-r--r--llvm/test/TableGen/x86-instr-mapping.inc8
-rw-r--r--llvm/test/Transforms/FixIrreducible/bug45623.ll109
-rw-r--r--llvm/test/Transforms/FixIrreducible/callbr.ll869
-rw-r--r--llvm/test/Transforms/FixIrreducible/nested.ll676
-rw-r--r--llvm/test/Transforms/FixIrreducible/unreachable.ll23
-rw-r--r--llvm/test/Transforms/GVN/assume-equal.ll44
-rw-r--r--llvm/test/Transforms/IndVarSimplify/AMDGPU/addrspace-7-doesnt-crash.ll2
-rw-r--r--llvm/test/Transforms/IndVarSimplify/ARM/code-size.ll22
-rw-r--r--llvm/test/Transforms/IndVarSimplify/ARM/indvar-unroll-imm-cost.ll4
-rw-r--r--llvm/test/Transforms/IndVarSimplify/X86/inner-loop-by-latch-cond.ll2
-rw-r--r--llvm/test/Transforms/IndVarSimplify/exit-count-select.ll14
-rw-r--r--llvm/test/Transforms/IndVarSimplify/finite-exit-comparisons.ll6
-rw-r--r--llvm/test/Transforms/IndVarSimplify/pr116483.ll8
-rw-r--r--llvm/test/Transforms/IndVarSimplify/pr24783.ll2
-rw-r--r--llvm/test/Transforms/IndVarSimplify/pr39673.ll2
-rw-r--r--llvm/test/Transforms/IndVarSimplify/pr63763.ll6
-rw-r--r--llvm/test/Transforms/IndVarSimplify/replace-loop-exit-folds.ll21
-rw-r--r--llvm/test/Transforms/IndVarSimplify/rewrite-loop-exit-values-phi.ll8
-rw-r--r--llvm/test/Transforms/IndVarSimplify/scev-expander-preserve-lcssa.ll14
-rw-r--r--llvm/test/Transforms/IndVarSimplify/scev-invalidation.ll4
-rw-r--r--llvm/test/Transforms/IndVarSimplify/sentinel.ll14
-rw-r--r--llvm/test/Transforms/IndVarSimplify/sink-from-preheader.ll32
-rw-r--r--llvm/test/Transforms/IndVarSimplify/sink-trapping.ll19
-rw-r--r--llvm/test/Transforms/IndVarSimplify/zext-nuw.ll2
-rw-r--r--llvm/test/Transforms/InstCombine/vec_extract_var_elt-inseltpoison.ll26
-rw-r--r--llvm/test/Transforms/InstCombine/vec_extract_var_elt.ll73
-rw-r--r--llvm/test/Transforms/InstSimplify/ConstProp/vecreduce.ll104
-rw-r--r--llvm/test/Transforms/LICM/scalar-promote.ll6
-rw-r--r--llvm/test/Transforms/LICM/sink-alloca.ll (renamed from llvm/test/Transforms/IndVarSimplify/sink-alloca.ll)6
-rw-r--r--llvm/test/Transforms/LICM/sink-from-preheader.ll185
-rw-r--r--llvm/test/Transforms/LICM/sink-trapping.ll28
-rw-r--r--llvm/test/Transforms/LoopDeletion/invalidate-scev-after-hoisting.ll2
-rw-r--r--llvm/test/Transforms/LoopDistribute/laa-invalidation.ll2
-rw-r--r--llvm/test/Transforms/LoopIdiom/cyclic-redundancy-check.ll46
-rw-r--r--llvm/test/Transforms/LoopStrengthReduce/AArch64/prefer-all.ll144
-rw-r--r--llvm/test/Transforms/LoopUnroll/branch-weights-freq/peel.ll (renamed from llvm/test/Transforms/LoopUnroll/peel-branch-weights-freq.ll)0
-rw-r--r--llvm/test/Transforms/LoopUnroll/branch-weights-freq/unroll-epilog.ll160
-rw-r--r--llvm/test/Transforms/LoopUnroll/branch-weights-freq/unroll-partial.ll68
-rw-r--r--llvm/test/Transforms/LoopUnroll/followup.ll35
-rw-r--r--llvm/test/Transforms/LoopUnroll/runtime-exit-phi-scev-invalidation.ll4
-rw-r--r--llvm/test/Transforms/LoopUnroll/runtime-loop-branchweight.ll58
-rw-r--r--llvm/test/Transforms/LoopUnroll/runtime-loop.ll9
-rw-r--r--llvm/test/Transforms/LoopUnroll/unroll-heuristics-pgo.ll66
-rw-r--r--llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll108
-rw-r--r--llvm/test/Transforms/LoopVectorize/X86/uniform_load.ll39
-rw-r--r--llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll2
-rw-r--r--llvm/test/Transforms/LoopVectorize/single-scalar-cast-minbw.ll8
-rw-r--r--llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll488
-rw-r--r--llvm/test/Transforms/MemCpyOpt/stack-move.ll58
-rw-r--r--llvm/test/Transforms/PGOProfile/memprof_diff_inline.ll118
-rw-r--r--llvm/test/Transforms/PGOProfile/memprof_loop_unroll.ll46
-rw-r--r--llvm/test/Transforms/PhaseOrdering/AArch64/indvars-vectorization.ll2
-rw-r--r--llvm/test/Transforms/PhaseOrdering/AArch64/interleave_vec.ll4
-rw-r--r--llvm/test/Transforms/PhaseOrdering/AArch64/std-find.ll2
-rw-r--r--llvm/test/Transforms/PhaseOrdering/ARM/arm_mult_q15.ll20
-rw-r--r--llvm/test/Transforms/PhaseOrdering/X86/pr48844-br-to-switch-vectorization.ll6
-rw-r--r--llvm/test/Transforms/PhaseOrdering/X86/vdiv.ll49
-rw-r--r--llvm/test/Transforms/SLPVectorizer/AArch64/div-like-mixed-with-undefs.ll34
-rw-r--r--llvm/test/Transforms/SLPVectorizer/X86/alternate-opcode-strict-bitwidth-than-main.ll36
-rw-r--r--llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-profile.ll89
-rw-r--r--llvm/test/Transforms/SimpleLoopUnswitch/pr60736.ll11
-rw-r--r--llvm/test/Transforms/SimpleLoopUnswitch/simple-unswitch-profile.ll157
-rw-r--r--llvm/test/Transforms/SimplifyCFG/X86/switch-of-powers-of-two.ll27
-rw-r--r--llvm/test/Transforms/SimplifyCFG/pr165301.ll31
-rw-r--r--llvm/test/Transforms/UnifyLoopExits/basic.ll131
-rw-r--r--llvm/test/Transforms/UnifyLoopExits/integer_guards.ll410
-rw-r--r--llvm/test/Transforms/UnifyLoopExits/nested.ll142
-rw-r--r--llvm/test/Transforms/UnifyLoopExits/restore-ssa.ll236
-rw-r--r--llvm/test/Transforms/UnifyLoopExits/undef-phis.ll68
-rw-r--r--llvm/test/Transforms/Util/PredicateInfo/branch-on-same-cond.ll36
-rw-r--r--llvm/test/Transforms/Util/PredicateInfo/condprop.ll103
-rw-r--r--llvm/test/Transforms/Util/PredicateInfo/diamond.ll20
-rw-r--r--llvm/test/Transforms/Util/PredicateInfo/edge.ll67
-rw-r--r--llvm/test/Transforms/Util/PredicateInfo/pr33456.ll19
-rw-r--r--llvm/test/Transforms/Util/PredicateInfo/pr33457.ll27
-rw-r--r--llvm/test/Transforms/Util/PredicateInfo/testandor.ll354
-rw-r--r--llvm/test/lit.cfg.py15
-rw-r--r--llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/check_empty.ll29
-rw-r--r--llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/check_empty.ll.expected57
-rw-r--r--llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/switch_case.ll.expected36
-rw-r--r--llvm/test/tools/UpdateTestChecks/update_test_checks/check_empty.test3
-rw-r--r--llvm/test/tools/llvm-config/paths.test16
-rw-r--r--llvm/test/tools/llvm-dwarfdump/AArch64/DW_AT_APPLE_property.s126
-rw-r--r--llvm/test/tools/llvm-ir2vec/output/reference_triplets.txt52
-rw-r--r--llvm/test/tools/llvm-ir2vec/output/reference_x86_entities.txt11444
-rw-r--r--llvm/test/tools/llvm-readobj/ELF/bb-addr-map-feature-warning.test37
-rw-r--r--llvm/test/tools/llvm-readobj/ELF/bb-addr-map-pgo-analysis-map.test17
-rw-r--r--llvm/test/tools/obj2yaml/ELF/bb-addr-map-pgo-analysis-map.yaml49
-rw-r--r--llvm/test/tools/yaml2obj/ELF/bb-addr-map-pgo-analysis-map.yaml25
-rw-r--r--llvm/test/tools/yaml2obj/ELF/bb-addr-map.yaml4
230 files changed, 28070 insertions, 10913 deletions
diff --git a/llvm/test/Analysis/CostModel/AArch64/extract-last-active.ll b/llvm/test/Analysis/CostModel/AArch64/extract-last-active.ll
new file mode 100644
index 0000000..9efcf91
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/AArch64/extract-last-active.ll
@@ -0,0 +1,216 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 6
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -S -mtriple=aarch64--linux-gnu | FileCheck %s --check-prefix=NEON
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -S -mtriple=aarch64--linux-gnu -mattr=+sve | FileCheck %s --check-prefix=SVE
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -S -mtriple=aarch64--linux-gnu -mattr=+sme -force-streaming | FileCheck %s --check-prefix=SME-STREAMING
+
+define void @extractions() {
+; NEON-LABEL: 'extractions'
+; NEON-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v16i8 = call i8 @llvm.experimental.vector.extract.last.active.v16i8(<16 x i8> poison, <16 x i1> poison, i8 poison)
+; NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v8i16 = call i16 @llvm.experimental.vector.extract.last.active.v8i16(<8 x i16> poison, <8 x i1> poison, i16 poison)
+; NEON-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v4i32 = call i32 @llvm.experimental.vector.extract.last.active.v4i32(<4 x i32> poison, <4 x i1> poison, i32 poison)
+; NEON-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v2i64 = call i64 @llvm.experimental.vector.extract.last.active.v2i64(<2 x i64> poison, <2 x i1> poison, i64 poison)
+; NEON-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %v8f16 = call half @llvm.experimental.vector.extract.last.active.v8f16(<8 x half> poison, <8 x i1> poison, half poison)
+; NEON-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %v8bf16 = call bfloat @llvm.experimental.vector.extract.last.active.v8bf16(<8 x bfloat> poison, <8 x i1> poison, bfloat poison)
+; NEON-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %v4f32 = call float @llvm.experimental.vector.extract.last.active.v4f32(<4 x float> poison, <4 x i1> poison, float poison)
+; NEON-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f64 = call double @llvm.experimental.vector.extract.last.active.v2f64(<2 x double> poison, <2 x i1> poison, double poison)
+; NEON-NEXT: Cost Model: Invalid cost for instruction: %nxv16i8 = call i8 @llvm.experimental.vector.extract.last.active.nxv16i8(<vscale x 16 x i8> poison, <vscale x 16 x i1> poison, i8 poison)
+; NEON-NEXT: Cost Model: Invalid cost for instruction: %nxv8i16 = call i16 @llvm.experimental.vector.extract.last.active.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i1> poison, i16 poison)
+; NEON-NEXT: Cost Model: Invalid cost for instruction: %nxv4i32 = call i32 @llvm.experimental.vector.extract.last.active.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i1> poison, i32 poison)
+; NEON-NEXT: Cost Model: Invalid cost for instruction: %nxv2i64 = call i64 @llvm.experimental.vector.extract.last.active.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i1> poison, i64 poison)
+; NEON-NEXT: Cost Model: Invalid cost for instruction: %nxv8f16 = call half @llvm.experimental.vector.extract.last.active.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x i1> poison, half poison)
+; NEON-NEXT: Cost Model: Invalid cost for instruction: %nxv8bf16 = call bfloat @llvm.experimental.vector.extract.last.active.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, bfloat poison)
+; NEON-NEXT: Cost Model: Invalid cost for instruction: %nxv4f32 = call float @llvm.experimental.vector.extract.last.active.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x i1> poison, float poison)
+; NEON-NEXT: Cost Model: Invalid cost for instruction: %nxv2f64 = call double @llvm.experimental.vector.extract.last.active.nxv2f64(<vscale x 2 x double> poison, <vscale x 2 x i1> poison, double poison)
+; NEON-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %v32i8 = call i8 @llvm.experimental.vector.extract.last.active.v32i8(<32 x i8> poison, <32 x i1> poison, i8 poison)
+; NEON-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v16i16 = call i16 @llvm.experimental.vector.extract.last.active.v16i16(<16 x i16> poison, <16 x i1> poison, i16 poison)
+; NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v8i32 = call i32 @llvm.experimental.vector.extract.last.active.v8i32(<8 x i32> poison, <8 x i1> poison, i32 poison)
+; NEON-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v4i64 = call i64 @llvm.experimental.vector.extract.last.active.v4i64(<4 x i64> poison, <4 x i1> poison, i64 poison)
+; NEON-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %v16f16 = call half @llvm.experimental.vector.extract.last.active.v16f16(<16 x half> poison, <16 x i1> poison, half poison)
+; NEON-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %v16bf16 = call bfloat @llvm.experimental.vector.extract.last.active.v16bf16(<16 x bfloat> poison, <16 x i1> poison, bfloat poison)
+; NEON-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %v8f32 = call float @llvm.experimental.vector.extract.last.active.v8f32(<8 x float> poison, <8 x i1> poison, float poison)
+; NEON-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f64 = call double @llvm.experimental.vector.extract.last.active.v4f64(<4 x double> poison, <4 x i1> poison, double poison)
+; NEON-NEXT: Cost Model: Invalid cost for instruction: %nxv32i8 = call i8 @llvm.experimental.vector.extract.last.active.nxv32i8(<vscale x 32 x i8> poison, <vscale x 32 x i1> poison, i8 poison)
+; NEON-NEXT: Cost Model: Invalid cost for instruction: %nxv16i16 = call i16 @llvm.experimental.vector.extract.last.active.nxv16i16(<vscale x 16 x i16> poison, <vscale x 16 x i1> poison, i16 poison)
+; NEON-NEXT: Cost Model: Invalid cost for instruction: %nxv8i32 = call i32 @llvm.experimental.vector.extract.last.active.nxv8i32(<vscale x 8 x i32> poison, <vscale x 8 x i1> poison, i32 poison)
+; NEON-NEXT: Cost Model: Invalid cost for instruction: %nxv4i64 = call i64 @llvm.experimental.vector.extract.last.active.nxv4i64(<vscale x 4 x i64> poison, <vscale x 4 x i1> poison, i64 poison)
+; NEON-NEXT: Cost Model: Invalid cost for instruction: %nxv16f16 = call half @llvm.experimental.vector.extract.last.active.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x i1> poison, half poison)
+; NEON-NEXT: Cost Model: Invalid cost for instruction: %nxv16bf16 = call bfloat @llvm.experimental.vector.extract.last.active.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, bfloat poison)
+; NEON-NEXT: Cost Model: Invalid cost for instruction: %nxv8f32 = call float @llvm.experimental.vector.extract.last.active.nxv8f32(<vscale x 8 x float> poison, <vscale x 8 x i1> poison, float poison)
+; NEON-NEXT: Cost Model: Invalid cost for instruction: %nxv4f64 = call double @llvm.experimental.vector.extract.last.active.nxv4f64(<vscale x 4 x double> poison, <vscale x 4 x i1> poison, double poison)
+; NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v8i8 = call i8 @llvm.experimental.vector.extract.last.active.v8i8(<8 x i8> poison, <8 x i1> poison, i8 poison)
+; NEON-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v4i16 = call i16 @llvm.experimental.vector.extract.last.active.v4i16(<4 x i16> poison, <4 x i1> poison, i16 poison)
+; NEON-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v2i32 = call i32 @llvm.experimental.vector.extract.last.active.v2i32(<2 x i32> poison, <2 x i1> poison, i32 poison)
+; NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1i64 = call i64 @llvm.experimental.vector.extract.last.active.v1i64(<1 x i64> poison, <1 x i1> poison, i64 poison)
+; NEON-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %v4f16 = call half @llvm.experimental.vector.extract.last.active.v4f16(<4 x half> poison, <4 x i1> poison, half poison)
+; NEON-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %v4bf16 = call bfloat @llvm.experimental.vector.extract.last.active.v4bf16(<4 x bfloat> poison, <4 x i1> poison, bfloat poison)
+; NEON-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f32 = call float @llvm.experimental.vector.extract.last.active.v2f32(<2 x float> poison, <2 x i1> poison, float poison)
+; NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1f64 = call double @llvm.experimental.vector.extract.last.active.v1f64(<1 x double> poison, <1 x i1> poison, double poison)
+; NEON-NEXT: Cost Model: Invalid cost for instruction: %nxv8i8 = call i8 @llvm.experimental.vector.extract.last.active.nxv8i8(<vscale x 8 x i8> poison, <vscale x 8 x i1> poison, i8 poison)
+; NEON-NEXT: Cost Model: Invalid cost for instruction: %nxv4i16 = call i16 @llvm.experimental.vector.extract.last.active.nxv4i16(<vscale x 4 x i16> poison, <vscale x 4 x i1> poison, i16 poison)
+; NEON-NEXT: Cost Model: Invalid cost for instruction: %nxv2i32 = call i32 @llvm.experimental.vector.extract.last.active.nxv2i32(<vscale x 2 x i32> poison, <vscale x 2 x i1> poison, i32 poison)
+; NEON-NEXT: Cost Model: Invalid cost for instruction: %nxv1i64 = call i64 @llvm.experimental.vector.extract.last.active.nxv1i64(<vscale x 1 x i64> poison, <vscale x 1 x i1> poison, i64 poison)
+; NEON-NEXT: Cost Model: Invalid cost for instruction: %nxv4f16 = call half @llvm.experimental.vector.extract.last.active.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x i1> poison, half poison)
+; NEON-NEXT: Cost Model: Invalid cost for instruction: %nxv4bf16 = call bfloat @llvm.experimental.vector.extract.last.active.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, bfloat poison)
+; NEON-NEXT: Cost Model: Invalid cost for instruction: %nxv2f32 = call float @llvm.experimental.vector.extract.last.active.nxv2f32(<vscale x 2 x float> poison, <vscale x 2 x i1> poison, float poison)
+; NEON-NEXT: Cost Model: Invalid cost for instruction: %nxv1f64 = call double @llvm.experimental.vector.extract.last.active.nxv1f64(<vscale x 1 x double> poison, <vscale x 1 x i1> poison, double poison)
+; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; SVE-LABEL: 'extractions'
+; SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8 = call i8 @llvm.experimental.vector.extract.last.active.v16i8(<16 x i8> poison, <16 x i1> poison, i8 poison)
+; SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16 = call i16 @llvm.experimental.vector.extract.last.active.v8i16(<8 x i16> poison, <8 x i1> poison, i16 poison)
+; SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32 = call i32 @llvm.experimental.vector.extract.last.active.v4i32(<4 x i32> poison, <4 x i1> poison, i32 poison)
+; SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64 = call i64 @llvm.experimental.vector.extract.last.active.v2i64(<2 x i64> poison, <2 x i1> poison, i64 poison)
+; SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f16 = call half @llvm.experimental.vector.extract.last.active.v8f16(<8 x half> poison, <8 x i1> poison, half poison)
+; SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8bf16 = call bfloat @llvm.experimental.vector.extract.last.active.v8bf16(<8 x bfloat> poison, <8 x i1> poison, bfloat poison)
+; SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32 = call float @llvm.experimental.vector.extract.last.active.v4f32(<4 x float> poison, <4 x i1> poison, float poison)
+; SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f64 = call double @llvm.experimental.vector.extract.last.active.v2f64(<2 x double> poison, <2 x i1> poison, double poison)
+; SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i8 = call i8 @llvm.experimental.vector.extract.last.active.nxv16i8(<vscale x 16 x i8> poison, <vscale x 16 x i1> poison, i8 poison)
+; SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i16 = call i16 @llvm.experimental.vector.extract.last.active.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i1> poison, i16 poison)
+; SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i32 = call i32 @llvm.experimental.vector.extract.last.active.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i1> poison, i32 poison)
+; SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i64 = call i64 @llvm.experimental.vector.extract.last.active.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i1> poison, i64 poison)
+; SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8f16 = call half @llvm.experimental.vector.extract.last.active.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x i1> poison, half poison)
+; SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8bf16 = call bfloat @llvm.experimental.vector.extract.last.active.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, bfloat poison)
+; SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4f32 = call float @llvm.experimental.vector.extract.last.active.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x i1> poison, float poison)
+; SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2f64 = call double @llvm.experimental.vector.extract.last.active.nxv2f64(<vscale x 2 x double> poison, <vscale x 2 x i1> poison, double poison)
+; SVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i8 = call i8 @llvm.experimental.vector.extract.last.active.v32i8(<32 x i8> poison, <32 x i1> poison, i8 poison)
+; SVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i16 = call i16 @llvm.experimental.vector.extract.last.active.v16i16(<16 x i16> poison, <16 x i1> poison, i16 poison)
+; SVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32 = call i32 @llvm.experimental.vector.extract.last.active.v8i32(<8 x i32> poison, <8 x i1> poison, i32 poison)
+; SVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64 = call i64 @llvm.experimental.vector.extract.last.active.v4i64(<4 x i64> poison, <4 x i1> poison, i64 poison)
+; SVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16f16 = call half @llvm.experimental.vector.extract.last.active.v16f16(<16 x half> poison, <16 x i1> poison, half poison)
+; SVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16bf16 = call bfloat @llvm.experimental.vector.extract.last.active.v16bf16(<16 x bfloat> poison, <16 x i1> poison, bfloat poison)
+; SVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f32 = call float @llvm.experimental.vector.extract.last.active.v8f32(<8 x float> poison, <8 x i1> poison, float poison)
+; SVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f64 = call double @llvm.experimental.vector.extract.last.active.v4f64(<4 x double> poison, <4 x i1> poison, double poison)
+; SVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv32i8 = call i8 @llvm.experimental.vector.extract.last.active.nxv32i8(<vscale x 32 x i8> poison, <vscale x 32 x i1> poison, i8 poison)
+; SVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv16i16 = call i16 @llvm.experimental.vector.extract.last.active.nxv16i16(<vscale x 16 x i16> poison, <vscale x 16 x i1> poison, i16 poison)
+; SVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i32 = call i32 @llvm.experimental.vector.extract.last.active.nxv8i32(<vscale x 8 x i32> poison, <vscale x 8 x i1> poison, i32 poison)
+; SVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i64 = call i64 @llvm.experimental.vector.extract.last.active.nxv4i64(<vscale x 4 x i64> poison, <vscale x 4 x i1> poison, i64 poison)
+; SVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv16f16 = call half @llvm.experimental.vector.extract.last.active.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x i1> poison, half poison)
+; SVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv16bf16 = call bfloat @llvm.experimental.vector.extract.last.active.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, bfloat poison)
+; SVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8f32 = call float @llvm.experimental.vector.extract.last.active.nxv8f32(<vscale x 8 x float> poison, <vscale x 8 x i1> poison, float poison)
+; SVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4f64 = call double @llvm.experimental.vector.extract.last.active.nxv4f64(<vscale x 4 x double> poison, <vscale x 4 x i1> poison, double poison)
+; SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8 = call i8 @llvm.experimental.vector.extract.last.active.v8i8(<8 x i8> poison, <8 x i1> poison, i8 poison)
+; SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16 = call i16 @llvm.experimental.vector.extract.last.active.v4i16(<4 x i16> poison, <4 x i1> poison, i16 poison)
+; SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32 = call i32 @llvm.experimental.vector.extract.last.active.v2i32(<2 x i32> poison, <2 x i1> poison, i32 poison)
+; SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1i64 = call i64 @llvm.experimental.vector.extract.last.active.v1i64(<1 x i64> poison, <1 x i1> poison, i64 poison)
+; SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f16 = call half @llvm.experimental.vector.extract.last.active.v4f16(<4 x half> poison, <4 x i1> poison, half poison)
+; SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4bf16 = call bfloat @llvm.experimental.vector.extract.last.active.v4bf16(<4 x bfloat> poison, <4 x i1> poison, bfloat poison)
+; SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32 = call float @llvm.experimental.vector.extract.last.active.v2f32(<2 x float> poison, <2 x i1> poison, float poison)
+; SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1f64 = call double @llvm.experimental.vector.extract.last.active.v1f64(<1 x double> poison, <1 x i1> poison, double poison)
+; SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i8 = call i8 @llvm.experimental.vector.extract.last.active.nxv8i8(<vscale x 8 x i8> poison, <vscale x 8 x i1> poison, i8 poison)
+; SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i16 = call i16 @llvm.experimental.vector.extract.last.active.nxv4i16(<vscale x 4 x i16> poison, <vscale x 4 x i1> poison, i16 poison)
+; SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i32 = call i32 @llvm.experimental.vector.extract.last.active.nxv2i32(<vscale x 2 x i32> poison, <vscale x 2 x i1> poison, i32 poison)
+; SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i64 = call i64 @llvm.experimental.vector.extract.last.active.nxv1i64(<vscale x 1 x i64> poison, <vscale x 1 x i1> poison, i64 poison)
+; SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4f16 = call half @llvm.experimental.vector.extract.last.active.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x i1> poison, half poison)
+; SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4bf16 = call bfloat @llvm.experimental.vector.extract.last.active.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, bfloat poison)
+; SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2f32 = call float @llvm.experimental.vector.extract.last.active.nxv2f32(<vscale x 2 x float> poison, <vscale x 2 x i1> poison, float poison)
+; SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1f64 = call double @llvm.experimental.vector.extract.last.active.nxv1f64(<vscale x 1 x double> poison, <vscale x 1 x i1> poison, double poison)
+; SVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; SME-STREAMING-LABEL: 'extractions'
+; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8 = call i8 @llvm.experimental.vector.extract.last.active.v16i8(<16 x i8> poison, <16 x i1> poison, i8 poison)
+; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16 = call i16 @llvm.experimental.vector.extract.last.active.v8i16(<8 x i16> poison, <8 x i1> poison, i16 poison)
+; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32 = call i32 @llvm.experimental.vector.extract.last.active.v4i32(<4 x i32> poison, <4 x i1> poison, i32 poison)
+; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64 = call i64 @llvm.experimental.vector.extract.last.active.v2i64(<2 x i64> poison, <2 x i1> poison, i64 poison)
+; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f16 = call half @llvm.experimental.vector.extract.last.active.v8f16(<8 x half> poison, <8 x i1> poison, half poison)
+; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8bf16 = call bfloat @llvm.experimental.vector.extract.last.active.v8bf16(<8 x bfloat> poison, <8 x i1> poison, bfloat poison)
+; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32 = call float @llvm.experimental.vector.extract.last.active.v4f32(<4 x float> poison, <4 x i1> poison, float poison)
+; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f64 = call double @llvm.experimental.vector.extract.last.active.v2f64(<2 x double> poison, <2 x i1> poison, double poison)
+; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i8 = call i8 @llvm.experimental.vector.extract.last.active.nxv16i8(<vscale x 16 x i8> poison, <vscale x 16 x i1> poison, i8 poison)
+; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i16 = call i16 @llvm.experimental.vector.extract.last.active.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i1> poison, i16 poison)
+; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i32 = call i32 @llvm.experimental.vector.extract.last.active.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i1> poison, i32 poison)
+; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i64 = call i64 @llvm.experimental.vector.extract.last.active.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i1> poison, i64 poison)
+; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8f16 = call half @llvm.experimental.vector.extract.last.active.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x i1> poison, half poison)
+; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8bf16 = call bfloat @llvm.experimental.vector.extract.last.active.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, bfloat poison)
+; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4f32 = call float @llvm.experimental.vector.extract.last.active.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x i1> poison, float poison)
+; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2f64 = call double @llvm.experimental.vector.extract.last.active.nxv2f64(<vscale x 2 x double> poison, <vscale x 2 x i1> poison, double poison)
+; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i8 = call i8 @llvm.experimental.vector.extract.last.active.v32i8(<32 x i8> poison, <32 x i1> poison, i8 poison)
+; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i16 = call i16 @llvm.experimental.vector.extract.last.active.v16i16(<16 x i16> poison, <16 x i1> poison, i16 poison)
+; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32 = call i32 @llvm.experimental.vector.extract.last.active.v8i32(<8 x i32> poison, <8 x i1> poison, i32 poison)
+; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64 = call i64 @llvm.experimental.vector.extract.last.active.v4i64(<4 x i64> poison, <4 x i1> poison, i64 poison)
+; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16f16 = call half @llvm.experimental.vector.extract.last.active.v16f16(<16 x half> poison, <16 x i1> poison, half poison)
+; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16bf16 = call bfloat @llvm.experimental.vector.extract.last.active.v16bf16(<16 x bfloat> poison, <16 x i1> poison, bfloat poison)
+; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f32 = call float @llvm.experimental.vector.extract.last.active.v8f32(<8 x float> poison, <8 x i1> poison, float poison)
+; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f64 = call double @llvm.experimental.vector.extract.last.active.v4f64(<4 x double> poison, <4 x i1> poison, double poison)
+; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv32i8 = call i8 @llvm.experimental.vector.extract.last.active.nxv32i8(<vscale x 32 x i8> poison, <vscale x 32 x i1> poison, i8 poison)
+; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv16i16 = call i16 @llvm.experimental.vector.extract.last.active.nxv16i16(<vscale x 16 x i16> poison, <vscale x 16 x i1> poison, i16 poison)
+; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i32 = call i32 @llvm.experimental.vector.extract.last.active.nxv8i32(<vscale x 8 x i32> poison, <vscale x 8 x i1> poison, i32 poison)
+; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i64 = call i64 @llvm.experimental.vector.extract.last.active.nxv4i64(<vscale x 4 x i64> poison, <vscale x 4 x i1> poison, i64 poison)
+; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv16f16 = call half @llvm.experimental.vector.extract.last.active.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x i1> poison, half poison)
+; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv16bf16 = call bfloat @llvm.experimental.vector.extract.last.active.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, bfloat poison)
+; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8f32 = call float @llvm.experimental.vector.extract.last.active.nxv8f32(<vscale x 8 x float> poison, <vscale x 8 x i1> poison, float poison)
+; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4f64 = call double @llvm.experimental.vector.extract.last.active.nxv4f64(<vscale x 4 x double> poison, <vscale x 4 x i1> poison, double poison)
+; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8 = call i8 @llvm.experimental.vector.extract.last.active.v8i8(<8 x i8> poison, <8 x i1> poison, i8 poison)
+; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16 = call i16 @llvm.experimental.vector.extract.last.active.v4i16(<4 x i16> poison, <4 x i1> poison, i16 poison)
+; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32 = call i32 @llvm.experimental.vector.extract.last.active.v2i32(<2 x i32> poison, <2 x i1> poison, i32 poison)
+; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1i64 = call i64 @llvm.experimental.vector.extract.last.active.v1i64(<1 x i64> poison, <1 x i1> poison, i64 poison)
+; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f16 = call half @llvm.experimental.vector.extract.last.active.v4f16(<4 x half> poison, <4 x i1> poison, half poison)
+; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4bf16 = call bfloat @llvm.experimental.vector.extract.last.active.v4bf16(<4 x bfloat> poison, <4 x i1> poison, bfloat poison)
+; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32 = call float @llvm.experimental.vector.extract.last.active.v2f32(<2 x float> poison, <2 x i1> poison, float poison)
+; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1f64 = call double @llvm.experimental.vector.extract.last.active.v1f64(<1 x double> poison, <1 x i1> poison, double poison)
+; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i8 = call i8 @llvm.experimental.vector.extract.last.active.nxv8i8(<vscale x 8 x i8> poison, <vscale x 8 x i1> poison, i8 poison)
+; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i16 = call i16 @llvm.experimental.vector.extract.last.active.nxv4i16(<vscale x 4 x i16> poison, <vscale x 4 x i1> poison, i16 poison)
+; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i32 = call i32 @llvm.experimental.vector.extract.last.active.nxv2i32(<vscale x 2 x i32> poison, <vscale x 2 x i1> poison, i32 poison)
+; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i64 = call i64 @llvm.experimental.vector.extract.last.active.nxv1i64(<vscale x 1 x i64> poison, <vscale x 1 x i1> poison, i64 poison)
+; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4f16 = call half @llvm.experimental.vector.extract.last.active.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x i1> poison, half poison)
+; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4bf16 = call bfloat @llvm.experimental.vector.extract.last.active.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, bfloat poison)
+; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2f32 = call float @llvm.experimental.vector.extract.last.active.nxv2f32(<vscale x 2 x float> poison, <vscale x 2 x i1> poison, float poison)
+; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1f64 = call double @llvm.experimental.vector.extract.last.active.nxv1f64(<vscale x 1 x double> poison, <vscale x 1 x i1> poison, double poison)
+; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+
+ ;; Legal types
+ %v16i8 = call i8 @llvm.experimental.vector.extract.last.active.v16i8(<16 x i8> poison, <16 x i1> poison, i8 poison)
+ %v8i16 = call i16 @llvm.experimental.vector.extract.last.active.v8i16(<8 x i16> poison, <8 x i1> poison, i16 poison)
+ %v4i32 = call i32 @llvm.experimental.vector.extract.last.active.v4i32(<4 x i32> poison, <4 x i1> poison, i32 poison)
+ %v2i64 = call i64 @llvm.experimental.vector.extract.last.active.v2i64(<2 x i64> poison, <2 x i1> poison, i64 poison)
+ %v8f16 = call half @llvm.experimental.vector.extract.last.active.v8f16(<8 x half> poison, <8 x i1> poison, half poison)
+ %v8bf16 = call bfloat @llvm.experimental.vector.extract.last.active.v8bf16(<8 x bfloat> poison, <8 x i1> poison, bfloat poison)
+ %v4f32 = call float @llvm.experimental.vector.extract.last.active.v4f32(<4 x float> poison, <4 x i1> poison, float poison)
+ %v2f64 = call double @llvm.experimental.vector.extract.last.active.v2f64(<2 x double> poison, <2 x i1> poison, double poison)
+ %nxv16i8 = call i8 @llvm.experimental.vector.extract.last.active.nxv16i8(<vscale x 16 x i8> poison, <vscale x 16 x i1> poison, i8 poison)
+ %nxv8i16 = call i16 @llvm.experimental.vector.extract.last.active.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i1> poison, i16 poison)
+ %nxv4i32 = call i32 @llvm.experimental.vector.extract.last.active.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i1> poison, i32 poison)
+ %nxv2i64 = call i64 @llvm.experimental.vector.extract.last.active.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i1> poison, i64 poison)
+ %nxv8f16 = call half @llvm.experimental.vector.extract.last.active.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x i1> poison, half poison)
+ %nxv8bf16 = call bfloat @llvm.experimental.vector.extract.last.active.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, bfloat poison)
+ %nxv4f32 = call float @llvm.experimental.vector.extract.last.active.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x i1> poison, float poison)
+ %nxv2f64 = call double @llvm.experimental.vector.extract.last.active.nxv2f64(<vscale x 2 x double> poison, <vscale x 2 x i1> poison, double poison)
+
+ ;; Wider-than-legal
+ %v32i8 = call i8 @llvm.experimental.vector.extract.last.active.v32i8(<32 x i8> poison, <32 x i1> poison, i8 poison)
+ %v16i16 = call i16 @llvm.experimental.vector.extract.last.active.v16i16(<16 x i16> poison, <16 x i1> poison, i16 poison)
+ %v8i32 = call i32 @llvm.experimental.vector.extract.last.active.v8i32(<8 x i32> poison, <8 x i1> poison, i32 poison)
+ %v4i64 = call i64 @llvm.experimental.vector.extract.last.active.v4i64(<4 x i64> poison, <4 x i1> poison, i64 poison)
+ %v16f16 = call half @llvm.experimental.vector.extract.last.active.v16f16(<16 x half> poison, <16 x i1> poison, half poison)
+ %v16bf16 = call bfloat @llvm.experimental.vector.extract.last.active.v16bf16(<16 x bfloat> poison, <16 x i1> poison, bfloat poison)
+ %v8f32 = call float @llvm.experimental.vector.extract.last.active.v8f32(<8 x float> poison, <8 x i1> poison, float poison)
+ %v4f64 = call double @llvm.experimental.vector.extract.last.active.v4f64(<4 x double> poison, <4 x i1> poison, double poison)
+ %nxv32i8 = call i8 @llvm.experimental.vector.extract.last.active.nxv32i8(<vscale x 32 x i8> poison, <vscale x 32 x i1> poison, i8 poison)
+ %nxv16i16 = call i16 @llvm.experimental.vector.extract.last.active.nxv16i16(<vscale x 16 x i16> poison, <vscale x 16 x i1> poison, i16 poison)
+ %nxv8i32 = call i32 @llvm.experimental.vector.extract.last.active.nxv8i32(<vscale x 8 x i32> poison, <vscale x 8 x i1> poison, i32 poison)
+ %nxv4i64 = call i64 @llvm.experimental.vector.extract.last.active.nxv4i64(<vscale x 4 x i64> poison, <vscale x 4 x i1> poison, i64 poison)
+ %nxv16f16 = call half @llvm.experimental.vector.extract.last.active.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x i1> poison, half poison)
+ %nxv16bf16 = call bfloat @llvm.experimental.vector.extract.last.active.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, bfloat poison)
+ %nxv8f32 = call float @llvm.experimental.vector.extract.last.active.nxv8f32(<vscale x 8 x float> poison, <vscale x 8 x i1> poison, float poison)
+ %nxv4f64 = call double @llvm.experimental.vector.extract.last.active.nxv4f64(<vscale x 4 x double> poison, <vscale x 4 x i1> poison, double poison)
+
+ ;; Narrower-than-legal
+ %v8i8 = call i8 @llvm.experimental.vector.extract.last.active.v8i8(<8 x i8> poison, <8 x i1> poison, i8 poison)
+ %v4i16 = call i16 @llvm.experimental.vector.extract.last.active.v4i16(<4 x i16> poison, <4 x i1> poison, i16 poison)
+ %v2i32 = call i32 @llvm.experimental.vector.extract.last.active.v2i32(<2 x i32> poison, <2 x i1> poison, i32 poison)
+ %v1i64 = call i64 @llvm.experimental.vector.extract.last.active.v1i64(<1 x i64> poison, <1 x i1> poison, i64 poison)
+ %v4f16 = call half @llvm.experimental.vector.extract.last.active.v4f16(<4 x half> poison, <4 x i1> poison, half poison)
+ %v4bf16 = call bfloat @llvm.experimental.vector.extract.last.active.v4bf16(<4 x bfloat> poison, <4 x i1> poison, bfloat poison)
+ %v2f32 = call float @llvm.experimental.vector.extract.last.active.v2f32(<2 x float> poison, <2 x i1> poison, float poison)
+ %v1f64 = call double @llvm.experimental.vector.extract.last.active.v1f64(<1 x double> poison, <1 x i1> poison, double poison)
+ %nxv8i8 = call i8 @llvm.experimental.vector.extract.last.active.nxv8i8(<vscale x 8 x i8> poison, <vscale x 8 x i1> poison, i8 poison)
+ %nxv4i16 = call i16 @llvm.experimental.vector.extract.last.active.nxv4i16(<vscale x 4 x i16> poison, <vscale x 4 x i1> poison, i16 poison)
+ %nxv2i32 = call i32 @llvm.experimental.vector.extract.last.active.nxv2i32(<vscale x 2 x i32> poison, <vscale x 2 x i1> poison, i32 poison)
+ %nxv1i64 = call i64 @llvm.experimental.vector.extract.last.active.nxv1i64(<vscale x 1 x i64> poison, <vscale x 1 x i1> poison, i64 poison)
+ %nxv4f16 = call half @llvm.experimental.vector.extract.last.active.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x i1> poison, half poison)
+ %nxv4bf16 = call bfloat @llvm.experimental.vector.extract.last.active.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, bfloat poison)
+ %nxv2f32 = call float @llvm.experimental.vector.extract.last.active.nxv2f32(<vscale x 2 x float> poison, <vscale x 2 x i1> poison, float poison)
+ %nxv1f64 = call double @llvm.experimental.vector.extract.last.active.nxv1f64(<vscale x 1 x double> poison, <vscale x 1 x i1> poison, double poison)
+
+ ret void
+}
diff --git a/llvm/test/Analysis/DependenceAnalysis/GCD.ll b/llvm/test/Analysis/DependenceAnalysis/GCD.ll
index 03343e7..cb14d18 100644
--- a/llvm/test/Analysis/DependenceAnalysis/GCD.ll
+++ b/llvm/test/Analysis/DependenceAnalysis/GCD.ll
@@ -254,7 +254,7 @@ define void @gcd4(ptr %A, ptr %B, i64 %M, i64 %N) nounwind uwtable ssp {
; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 %conv, ptr %arrayidx, align 4
; CHECK-NEXT: da analyze - output [* *]!
; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: %0 = load i32, ptr %arrayidx16, align 4
-; CHECK-NEXT: da analyze - none!
+; CHECK-NEXT: da analyze - flow [* *|<]!
; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 %0, ptr %B.addr.11, align 4
; CHECK-NEXT: da analyze - confused!
; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx16, align 4 --> Dst: %0 = load i32, ptr %arrayidx16, align 4
@@ -322,7 +322,7 @@ define void @gcd5(ptr %A, ptr %B, i64 %M, i64 %N) nounwind uwtable ssp {
; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 %conv, ptr %arrayidx, align 4
; CHECK-NEXT: da analyze - output [* *]!
; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: %0 = load i32, ptr %arrayidx16, align 4
-; CHECK-NEXT: da analyze - flow [<> *]!
+; CHECK-NEXT: da analyze - flow [* *|<]!
; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 %0, ptr %B.addr.11, align 4
; CHECK-NEXT: da analyze - confused!
; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx16, align 4 --> Dst: %0 = load i32, ptr %arrayidx16, align 4
@@ -390,7 +390,7 @@ define void @gcd6(i64 %n, ptr %A, ptr %B) nounwind uwtable ssp {
; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx5, align 4 --> Dst: store i32 %conv, ptr %arrayidx5, align 4
; CHECK-NEXT: da analyze - output [* *]!
; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx5, align 4 --> Dst: %2 = load i32, ptr %arrayidx9, align 4
-; CHECK-NEXT: da analyze - none!
+; CHECK-NEXT: da analyze - flow [* *|<]!
; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx5, align 4 --> Dst: store i32 %2, ptr %B.addr.12, align 4
; CHECK-NEXT: da analyze - confused!
; CHECK-NEXT: Src: %2 = load i32, ptr %arrayidx9, align 4 --> Dst: %2 = load i32, ptr %arrayidx9, align 4
diff --git a/llvm/test/Analysis/DependenceAnalysis/SymbolicSIV.ll b/llvm/test/Analysis/DependenceAnalysis/SymbolicSIV.ll
index cdfaec7..73a415b 100644
--- a/llvm/test/Analysis/DependenceAnalysis/SymbolicSIV.ll
+++ b/llvm/test/Analysis/DependenceAnalysis/SymbolicSIV.ll
@@ -384,7 +384,7 @@ define void @symbolicsiv6(ptr %A, ptr %B, i64 %n, i64 %N, i64 %M) nounwind uwtab
; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 %conv, ptr %arrayidx, align 4
; CHECK-NEXT: da analyze - none!
; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: %0 = load i32, ptr %arrayidx7, align 4
-; CHECK-NEXT: da analyze - none!
+; CHECK-NEXT: da analyze - flow [*|<]!
; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 %0, ptr %B.addr.02, align 4
; CHECK-NEXT: da analyze - confused!
; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx7, align 4 --> Dst: %0 = load i32, ptr %arrayidx7, align 4
@@ -440,7 +440,7 @@ define void @symbolicsiv7(ptr %A, ptr %B, i64 %n, i64 %N, i64 %M) nounwind uwtab
; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 %conv, ptr %arrayidx, align 4
; CHECK-NEXT: da analyze - none!
; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: %1 = load i32, ptr %arrayidx6, align 4
-; CHECK-NEXT: da analyze - flow [<>]!
+; CHECK-NEXT: da analyze - flow [*|<]!
; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 %1, ptr %B.addr.02, align 4
; CHECK-NEXT: da analyze - confused!
; CHECK-NEXT: Src: %1 = load i32, ptr %arrayidx6, align 4 --> Dst: %1 = load i32, ptr %arrayidx6, align 4
diff --git a/llvm/test/Analysis/DependenceAnalysis/compute-absolute-value.ll b/llvm/test/Analysis/DependenceAnalysis/compute-absolute-value.ll
index 64fad37..783150a 100644
--- a/llvm/test/Analysis/DependenceAnalysis/compute-absolute-value.ll
+++ b/llvm/test/Analysis/DependenceAnalysis/compute-absolute-value.ll
@@ -18,7 +18,7 @@ define void @unknown_sign(ptr %a, i64 %k) {
; CHECK-NEXT: Src: store i8 1, ptr %idx.0, align 1 --> Dst: store i8 1, ptr %idx.0, align 1
; CHECK-NEXT: da analyze - none!
; CHECK-NEXT: Src: store i8 1, ptr %idx.0, align 1 --> Dst: store i8 2, ptr %idx.1, align 1
-; CHECK-NEXT: da analyze - output [<>]!
+; CHECK-NEXT: da analyze - output [*|<]!
; CHECK-NEXT: Src: store i8 2, ptr %idx.1, align 1 --> Dst: store i8 2, ptr %idx.1, align 1
; CHECK-NEXT: da analyze - none!
;
diff --git a/llvm/test/Analysis/DependenceAnalysis/gcd-miv-overflow.ll b/llvm/test/Analysis/DependenceAnalysis/gcd-miv-overflow.ll
new file mode 100644
index 0000000..9169ac3
--- /dev/null
+++ b/llvm/test/Analysis/DependenceAnalysis/gcd-miv-overflow.ll
@@ -0,0 +1,63 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 6
+; RUN: opt < %s -disable-output "-passes=print<da>" 2>&1 \
+; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-ALL
+; RUN: opt < %s -disable-output "-passes=print<da>" -da-enable-dependence-test=gcd-miv 2>&1 \
+; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-GCD-MIV
+
+; offset0 = 4;
+; offset1 = 0;
+; for (i = 0; i < 100; i++) {
+; A[offset0] = 1;
+; A[offset1] = 2;
+; offset0 += 3*m;
+; offset1 += 3;
+; }
+;
+; Dependency exists between the two stores. E.g., consider `m` is
+; 12297829382473034411, which is a modular multiplicative inverse of 3 under
+; modulo 2^64. Then `offset0` is effectively `i + 4`, so accesses will be as
+; follows:
+;
+; - A[offset0] : A[4], A[5], A[6], ...
+; - A[offset1] : A[0], A[3], A[6], ...
+;
+define void @gcdmiv_coef_ovfl(ptr %A, i64 %m) {
+; CHECK-ALL-LABEL: 'gcdmiv_coef_ovfl'
+; CHECK-ALL-NEXT: Src: store i8 1, ptr %gep.0, align 1 --> Dst: store i8 1, ptr %gep.0, align 1
+; CHECK-ALL-NEXT: da analyze - none!
+; CHECK-ALL-NEXT: Src: store i8 1, ptr %gep.0, align 1 --> Dst: store i8 2, ptr %gep.1, align 1
+; CHECK-ALL-NEXT: da analyze - output [*|<]!
+; CHECK-ALL-NEXT: Src: store i8 2, ptr %gep.1, align 1 --> Dst: store i8 2, ptr %gep.1, align 1
+; CHECK-ALL-NEXT: da analyze - none!
+;
+; CHECK-GCD-MIV-LABEL: 'gcdmiv_coef_ovfl'
+; CHECK-GCD-MIV-NEXT: Src: store i8 1, ptr %gep.0, align 1 --> Dst: store i8 1, ptr %gep.0, align 1
+; CHECK-GCD-MIV-NEXT: da analyze - consistent output [*]!
+; CHECK-GCD-MIV-NEXT: Src: store i8 1, ptr %gep.0, align 1 --> Dst: store i8 2, ptr %gep.1, align 1
+; CHECK-GCD-MIV-NEXT: da analyze - consistent output [*|<]!
+; CHECK-GCD-MIV-NEXT: Src: store i8 2, ptr %gep.1, align 1 --> Dst: store i8 2, ptr %gep.1, align 1
+; CHECK-GCD-MIV-NEXT: da analyze - consistent output [*]!
+;
+entry:
+ %step = mul i64 3, %m
+ br label %loop
+
+loop:
+ %i = phi i64 [ 0, %entry ], [ %i.inc, %loop ]
+ %offset.0 = phi i64 [ 4, %entry ] , [ %offset.0.next, %loop ]
+ %offset.1 = phi i64 [ 0, %entry ] , [ %offset.1.next, %loop ]
+ %gep.0 = getelementptr inbounds i8, ptr %A, i64 %offset.0
+ %gep.1 = getelementptr inbounds i8, ptr %A, i64 %offset.1
+ store i8 1, ptr %gep.0
+ store i8 2, ptr %gep.1
+ %i.inc = add nuw nsw i64 %i, 1
+ %offset.0.next = add nsw i64 %offset.0, %step
+ %offset.1.next = add nsw i64 %offset.1, 3
+ %ec = icmp eq i64 %i.inc, 100
+ br i1 %ec, label %exit, label %loop
+
+exit:
+ ret void
+}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK: {{.*}}
diff --git a/llvm/test/Analysis/DependenceAnalysis/strong-siv-overflow.ll b/llvm/test/Analysis/DependenceAnalysis/strong-siv-overflow.ll
new file mode 100644
index 0000000..bf0fafc
--- /dev/null
+++ b/llvm/test/Analysis/DependenceAnalysis/strong-siv-overflow.ll
@@ -0,0 +1,68 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 6
+; RUN: opt < %s -disable-output "-passes=print<da>" 2>&1 \
+; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-ALL
+; RUN: opt < %s -disable-output "-passes=print<da>" -da-enable-dependence-test=strong-siv 2>&1 \
+; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-STRONG-SIV
+
+; for (i = 0; i < (1LL << 62); i++) {
+; if (0 <= 2*i - 2)
+; A[2*i - 2] = 1;
+;
+; if (0 <= 2*i - 4)
+; A[2*i - 4] = 2;
+; }
+;
+; FIXME: DependenceAnalysis currently detects no dependency between the two
+; stores, but it does exist. For example, each store will access A[0] when i
+; is 1 and 2 respectively.
+; The root cause is that the product of the BTC and the coefficient
+; ((1LL << 62) - 1 and 2) overflows in a signed sense.
+define void @strongsiv_const_ovfl(ptr %A) {
+; CHECK-LABEL: 'strongsiv_const_ovfl'
+; CHECK-NEXT: Src: store i8 1, ptr %gep.0, align 1 --> Dst: store i8 1, ptr %gep.0, align 1
+; CHECK-NEXT: da analyze - none!
+; CHECK-NEXT: Src: store i8 1, ptr %gep.0, align 1 --> Dst: store i8 2, ptr %gep.1, align 1
+; CHECK-NEXT: da analyze - none!
+; CHECK-NEXT: Src: store i8 2, ptr %gep.1, align 1 --> Dst: store i8 2, ptr %gep.1, align 1
+; CHECK-NEXT: da analyze - none!
+;
+entry:
+ br label %loop.header
+
+loop.header:
+ %i = phi i64 [ 0, %entry ], [ %i.inc, %loop.latch ]
+ %offset.0 = phi i64 [ -2, %entry ], [ %offset.0.next, %loop.latch ]
+ %offset.1 = phi i64 [ -4, %entry ], [ %offset.1.next, %loop.latch ]
+ %ec = icmp eq i64 %i, 4611686018427387904
+ br i1 %ec, label %exit, label %loop.body
+
+loop.body:
+ %cond.0 = icmp sge i64 %offset.0, 0
+ %cond.1 = icmp sge i64 %offset.1, 0
+ br i1 %cond.0, label %if.then.0, label %loop.middle
+
+if.then.0:
+ %gep.0 = getelementptr inbounds i8, ptr %A, i64 %offset.0
+ store i8 1, ptr %gep.0
+ br label %loop.middle
+
+loop.middle:
+ br i1 %cond.1, label %if.then.1, label %loop.latch
+
+if.then.1:
+ %gep.1 = getelementptr inbounds i8, ptr %A, i64 %offset.1
+ store i8 2, ptr %gep.1
+ br label %loop.latch
+
+loop.latch:
+ %i.inc = add nuw nsw i64 %i, 1
+ %offset.0.next = add nsw i64 %offset.0, 2
+ %offset.1.next = add nsw i64 %offset.1, 2
+ br label %loop.header
+
+exit:
+ ret void
+}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK-ALL: {{.*}}
+; CHECK-STRONG-SIV: {{.*}}
diff --git a/llvm/test/Analysis/DependenceAnalysis/symbolic-rdiv-overflow.ll b/llvm/test/Analysis/DependenceAnalysis/symbolic-rdiv-overflow.ll
new file mode 100644
index 0000000..c5ff988
--- /dev/null
+++ b/llvm/test/Analysis/DependenceAnalysis/symbolic-rdiv-overflow.ll
@@ -0,0 +1,137 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 6
+; RUN: opt < %s -disable-output "-passes=print<da>" 2>&1 \
+; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-ALL
+; RUN: opt < %s -disable-output "-passes=print<da>" -da-enable-dependence-test=symbolic-rdiv 2>&1 \
+; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-SYMBOLIC-RDIV
+
+; for (i = 0; i < (1LL << 62); i++) {
+; if (0 <= 2*i - 2)
+; A[2*i - 2] = 1;
+; A[i] = 2;
+; }
+;
+; FIXME: DependenceAnalysis currently detects no dependency between the two
+; stores, but it does exist. For example, each store will access A[0] when i
+; is 1 and 0 respectively.
+; The root cause is that the product of the BTC and the coefficient
+; ((1LL << 62) - 1 and 2) overflows in a signed sense.
+define void @symbolicrdiv_prod_ovfl(ptr %A) {
+; CHECK-ALL-LABEL: 'symbolicrdiv_prod_ovfl'
+; CHECK-ALL-NEXT: Src: store i8 1, ptr %gep.0, align 1 --> Dst: store i8 1, ptr %gep.0, align 1
+; CHECK-ALL-NEXT: da analyze - none!
+; CHECK-ALL-NEXT: Src: store i8 1, ptr %gep.0, align 1 --> Dst: store i8 2, ptr %gep.1, align 1
+; CHECK-ALL-NEXT: da analyze - none!
+; CHECK-ALL-NEXT: Src: store i8 2, ptr %gep.1, align 1 --> Dst: store i8 2, ptr %gep.1, align 1
+; CHECK-ALL-NEXT: da analyze - none!
+;
+; CHECK-SYMBOLIC-RDIV-LABEL: 'symbolicrdiv_prod_ovfl'
+; CHECK-SYMBOLIC-RDIV-NEXT: Src: store i8 1, ptr %gep.0, align 1 --> Dst: store i8 1, ptr %gep.0, align 1
+; CHECK-SYMBOLIC-RDIV-NEXT: da analyze - none!
+; CHECK-SYMBOLIC-RDIV-NEXT: Src: store i8 1, ptr %gep.0, align 1 --> Dst: store i8 2, ptr %gep.1, align 1
+; CHECK-SYMBOLIC-RDIV-NEXT: da analyze - none!
+; CHECK-SYMBOLIC-RDIV-NEXT: Src: store i8 2, ptr %gep.1, align 1 --> Dst: store i8 2, ptr %gep.1, align 1
+; CHECK-SYMBOLIC-RDIV-NEXT: da analyze - consistent output [*]!
+;
+entry:
+ br label %loop.header
+
+loop.header:
+ %i = phi i64 [ 0, %entry ], [ %i.inc, %loop.latch ]
+ %offset = phi i64 [ -2, %entry ], [ %offset.next, %loop.latch ]
+ %ec = icmp eq i64 %i, 4611686018427387904
+ br i1 %ec, label %exit, label %loop.body
+
+loop.body:
+ %cond = icmp sge i64 %offset, 0
+ br i1 %cond, label %if.then, label %loop.latch
+
+if.then:
+ %gep.0 = getelementptr inbounds i8, ptr %A, i64 %offset
+ store i8 1, ptr %gep.0
+ br label %loop.latch
+
+loop.latch:
+ %gep.1 = getelementptr inbounds i8, ptr %A, i64 %i
+ store i8 2, ptr %gep.1
+ %i.inc = add nuw nsw i64 %i, 1
+ %offset.next = add nsw i64 %offset, 2
+ br label %loop.header
+
+exit:
+ ret void
+}
+
+; offset0 = -4611686018427387904; // -2^62
+; offset1 = 4611686018427387904; // 2^62
+; for (i = 0; i < (1LL << 62) - 100; i++) {
+; if (0 <= offset0)
+; A[offset0] = 1;
+; if (0 <= offset1)
+; A[offset1] = 2;
+; offset0 += 2;
+; offset1 -= 1;
+; }
+;
+; FIXME: DependenceAnalysis currently detects no dependency between the two
+; stores, but it does exist. For example,
+;
+; memory access | i == 2^61 | i == 2^61 + 2^59 | i == 2^61 + 2^60
+; -------------------------|-----------|------------------|-------------------
+; A[2*i - 2^62] (offset0) | | A[2^60] | A[2^61]
+; A[-i + 2^62] (offset1) | A[2^61] | | A[2^60]
+;
+; The root cause is that the calculation of the differenct between the two
+; constants (-2^62 and 2^62) overflows in a signed sense.
+define void @symbolicrdiv_delta_ovfl(ptr %A) {
+; CHECK-ALL-LABEL: 'symbolicrdiv_delta_ovfl'
+; CHECK-ALL-NEXT: Src: store i8 1, ptr %gep.0, align 1 --> Dst: store i8 1, ptr %gep.0, align 1
+; CHECK-ALL-NEXT: da analyze - none!
+; CHECK-ALL-NEXT: Src: store i8 1, ptr %gep.0, align 1 --> Dst: store i8 2, ptr %gep.1, align 1
+; CHECK-ALL-NEXT: da analyze - none!
+; CHECK-ALL-NEXT: Src: store i8 2, ptr %gep.1, align 1 --> Dst: store i8 2, ptr %gep.1, align 1
+; CHECK-ALL-NEXT: da analyze - none!
+;
+; CHECK-SYMBOLIC-RDIV-LABEL: 'symbolicrdiv_delta_ovfl'
+; CHECK-SYMBOLIC-RDIV-NEXT: Src: store i8 1, ptr %gep.0, align 1 --> Dst: store i8 1, ptr %gep.0, align 1
+; CHECK-SYMBOLIC-RDIV-NEXT: da analyze - consistent output [*]!
+; CHECK-SYMBOLIC-RDIV-NEXT: Src: store i8 1, ptr %gep.0, align 1 --> Dst: store i8 2, ptr %gep.1, align 1
+; CHECK-SYMBOLIC-RDIV-NEXT: da analyze - none!
+; CHECK-SYMBOLIC-RDIV-NEXT: Src: store i8 2, ptr %gep.1, align 1 --> Dst: store i8 2, ptr %gep.1, align 1
+; CHECK-SYMBOLIC-RDIV-NEXT: da analyze - consistent output [*]!
+;
+entry:
+ br label %loop.header
+
+loop.header:
+ %i = phi i64 [ 0, %entry ], [ %i.inc, %loop.latch ]
+ %offset.0 = phi i64 [ -4611686018427387904, %entry ], [ %offset.0.next, %loop.latch ]
+ %offset.1 = phi i64 [ 4611686018427387904, %entry ], [ %offset.1.next, %loop.latch ]
+ %cond.0 = icmp sge i64 %offset.0, 0
+ %cond.1 = icmp sge i64 %offset.1, 0
+ br i1 %cond.0, label %if.then.0, label %loop.middle
+
+if.then.0:
+ %gep.0 = getelementptr inbounds i8, ptr %A, i64 %offset.0
+ store i8 1, ptr %gep.0
+ br label %loop.middle
+
+loop.middle:
+ br i1 %cond.1, label %if.then.1, label %loop.latch
+
+if.then.1:
+ %gep.1 = getelementptr inbounds i8, ptr %A, i64 %offset.1
+ store i8 2, ptr %gep.1
+ br label %loop.latch
+
+loop.latch:
+ %i.inc = add nuw nsw i64 %i, 1
+ %offset.0.next = add nsw i64 %offset.0, 2
+ %offset.1.next = sub nsw i64 %offset.1, 1
+ %ec = icmp eq i64 %i.inc, 4611686018427387804 ; 2^62 - 100
+ br i1 %ec, label %exit, label %loop.header
+
+exit:
+ ret void
+}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK: {{.*}}
diff --git a/llvm/test/Analysis/DependenceAnalysis/weak-crossing-siv-overflow.ll b/llvm/test/Analysis/DependenceAnalysis/weak-crossing-siv-overflow.ll
new file mode 100644
index 0000000..ba57c7b
--- /dev/null
+++ b/llvm/test/Analysis/DependenceAnalysis/weak-crossing-siv-overflow.ll
@@ -0,0 +1,125 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 6
+; RUN: opt < %s -disable-output "-passes=print<da>" 2>&1 \
+; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-ALL
+; RUN: opt < %s -disable-output "-passes=print<da>" -da-enable-dependence-test=weak-crossing-siv 2>&1 \
+; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-WEAK-CROSSING-SIV
+
+; max_i = INT64_MAX/3 // 3074457345618258602
+; for (long long i = 0; i <= max_i; i++) {
+; A[-3*i + INT64_MAX] = 0;
+; if (i)
+; A[3*i - 2] = 1;
+; }
+;
+; FIXME: DependenceAnalysis currently detects no dependency between
+; `A[-3*i + INT64_MAX]` and `A[3*i - 2]`, but it does exist. For example,
+;
+; memory access | i == 1 | i == max_i
+; ---------------------|------------------|------------------
+; A[-3*i + INT64_MAX] | A[INT64_MAX - 3] | A[1]
+; A[3*i - 2] | A[1] | A[INT64_MAX - 3]
+;
+; The root cause is that the calculation of the differenct between the two
+; constants (INT64_MAX and -2) triggers an overflow.
+
+define void @weakcorssing_delta_ovfl(ptr %A) {
+; CHECK-ALL-LABEL: 'weakcorssing_delta_ovfl'
+; CHECK-ALL-NEXT: Src: store i8 0, ptr %idx.0, align 1 --> Dst: store i8 0, ptr %idx.0, align 1
+; CHECK-ALL-NEXT: da analyze - none!
+; CHECK-ALL-NEXT: Src: store i8 0, ptr %idx.0, align 1 --> Dst: store i8 1, ptr %idx.1, align 1
+; CHECK-ALL-NEXT: da analyze - none!
+; CHECK-ALL-NEXT: Src: store i8 1, ptr %idx.1, align 1 --> Dst: store i8 1, ptr %idx.1, align 1
+; CHECK-ALL-NEXT: da analyze - none!
+;
+; CHECK-WEAK-CROSSING-SIV-LABEL: 'weakcorssing_delta_ovfl'
+; CHECK-WEAK-CROSSING-SIV-NEXT: Src: store i8 0, ptr %idx.0, align 1 --> Dst: store i8 0, ptr %idx.0, align 1
+; CHECK-WEAK-CROSSING-SIV-NEXT: da analyze - consistent output [*]!
+; CHECK-WEAK-CROSSING-SIV-NEXT: Src: store i8 0, ptr %idx.0, align 1 --> Dst: store i8 1, ptr %idx.1, align 1
+; CHECK-WEAK-CROSSING-SIV-NEXT: da analyze - none!
+; CHECK-WEAK-CROSSING-SIV-NEXT: Src: store i8 1, ptr %idx.1, align 1 --> Dst: store i8 1, ptr %idx.1, align 1
+; CHECK-WEAK-CROSSING-SIV-NEXT: da analyze - consistent output [*]!
+;
+entry:
+ br label %loop.header
+
+loop.header:
+ %i = phi i64 [ 0, %entry ], [ %i.inc, %loop.latch ]
+ %subscript.0 = phi i64 [ 9223372036854775807, %entry ], [ %subscript.0.next, %loop.latch ]
+ %subscript.1 = phi i64 [ -2, %entry ], [ %subscript.1.next, %loop.latch ]
+ %idx.0 = getelementptr inbounds i8, ptr %A, i64 %subscript.0
+ store i8 0, ptr %idx.0
+ %cond.store = icmp ne i64 %i, 0
+ br i1 %cond.store, label %if.store, label %loop.latch
+
+if.store:
+ %idx.1 = getelementptr inbounds i8, ptr %A, i64 %subscript.1
+ store i8 1, ptr %idx.1
+ br label %loop.latch
+
+loop.latch:
+ %i.inc = add nuw nsw i64 %i, 1
+ %subscript.0.next = add nsw i64 %subscript.0, -3
+ %subscript.1.next = add nsw i64 %subscript.1, 3
+ %ec = icmp sgt i64 %i.inc, 3074457345618258602
+ br i1 %ec, label %exit, label %loop.header
+
+exit:
+ ret void
+}
+
+; max_i = INT64_MAX/3 // 3074457345618258602
+; for (long long i = 0; i <= max_i; i++) {
+; A[-3*i + INT64_MAX] = 0;
+; A[3*i + 1] = 1;
+; }
+;
+; FIXME: DependenceAnalysis currently detects no dependency between
+; `A[-3*i + INT64_MAX]` and `A[3*i - 2]`, but it does exist. For example,
+;
+; memory access | i == 0 | i == 1 | i == max_i - 1 | i == max_i
+; ---------------------|--------|------------------|----------------|------------------
+; A[-3*i + INT64_MAX] | | A[INT64_MAX - 3] | A[1] |
+; A[3*i + 1] | A[1] | | | A[INT64_MAX - 3]
+;
+; The root cause is that the product of the BTC, the coefficient, and 2
+; triggers an overflow.
+;
+define void @weakcorssing_prod_ovfl(ptr %A) {
+; CHECK-ALL-LABEL: 'weakcorssing_prod_ovfl'
+; CHECK-ALL-NEXT: Src: store i8 0, ptr %idx.0, align 1 --> Dst: store i8 0, ptr %idx.0, align 1
+; CHECK-ALL-NEXT: da analyze - none!
+; CHECK-ALL-NEXT: Src: store i8 0, ptr %idx.0, align 1 --> Dst: store i8 1, ptr %idx.1, align 1
+; CHECK-ALL-NEXT: da analyze - none!
+; CHECK-ALL-NEXT: Src: store i8 1, ptr %idx.1, align 1 --> Dst: store i8 1, ptr %idx.1, align 1
+; CHECK-ALL-NEXT: da analyze - none!
+;
+; CHECK-WEAK-CROSSING-SIV-LABEL: 'weakcorssing_prod_ovfl'
+; CHECK-WEAK-CROSSING-SIV-NEXT: Src: store i8 0, ptr %idx.0, align 1 --> Dst: store i8 0, ptr %idx.0, align 1
+; CHECK-WEAK-CROSSING-SIV-NEXT: da analyze - consistent output [*]!
+; CHECK-WEAK-CROSSING-SIV-NEXT: Src: store i8 0, ptr %idx.0, align 1 --> Dst: store i8 1, ptr %idx.1, align 1
+; CHECK-WEAK-CROSSING-SIV-NEXT: da analyze - none!
+; CHECK-WEAK-CROSSING-SIV-NEXT: Src: store i8 1, ptr %idx.1, align 1 --> Dst: store i8 1, ptr %idx.1, align 1
+; CHECK-WEAK-CROSSING-SIV-NEXT: da analyze - consistent output [*]!
+;
+entry:
+ br label %loop
+
+loop:
+ %i = phi i64 [ 0, %entry ], [ %i.inc, %loop ]
+ %subscript.0 = phi i64 [ 9223372036854775807, %entry ], [ %subscript.0.next, %loop ]
+ %subscript.1 = phi i64 [ 1, %entry ], [ %subscript.1.next, %loop ]
+ %idx.0 = getelementptr inbounds i8, ptr %A, i64 %subscript.0
+ %idx.1 = getelementptr inbounds i8, ptr %A, i64 %subscript.1
+ store i8 0, ptr %idx.0
+ store i8 1, ptr %idx.1
+ %i.inc = add nuw nsw i64 %i, 1
+ %subscript.0.next = add nsw i64 %subscript.0, -3
+ %subscript.1.next = add nsw i64 %subscript.1, 3
+ %ec = icmp sgt i64 %i.inc, 3074457345618258602
+ br i1 %ec, label %exit, label %loop
+
+exit:
+ ret void
+}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK: {{.*}}
diff --git a/llvm/test/Analysis/DependenceAnalysis/weak-zero-siv-overflow.ll b/llvm/test/Analysis/DependenceAnalysis/weak-zero-siv-overflow.ll
new file mode 100644
index 0000000..6317c38
--- /dev/null
+++ b/llvm/test/Analysis/DependenceAnalysis/weak-zero-siv-overflow.ll
@@ -0,0 +1,122 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 6
+; RUN: opt < %s -disable-output "-passes=print<da>" 2>&1 \
+; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-ALL
+; RUN: opt < %s -disable-output "-passes=print<da>" -da-enable-dependence-test=weak-zero-siv 2>&1 \
+; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-WEAK-ZERO-SIV
+
+; for (i = 0; i < (1LL << 62); i++) {
+; if (0 <= 2*i - 2)
+; A[2*i - 2] = 1;
+; A[2] = 2;
+; }
+;
+; FIXME: DependenceAnalysis currently detects no dependency between the two
+; stores, but it does exist. The root cause is that the product of the BTC and
+; the coefficient ((1LL << 62) - 1 and 2) overflows in a signed sense.
+;
+define void @weakzero_dst_siv_prod_ovfl(ptr %A) {
+; CHECK-ALL-LABEL: 'weakzero_dst_siv_prod_ovfl'
+; CHECK-ALL-NEXT: Src: store i8 1, ptr %gep.0, align 1 --> Dst: store i8 1, ptr %gep.0, align 1
+; CHECK-ALL-NEXT: da analyze - none!
+; CHECK-ALL-NEXT: Src: store i8 1, ptr %gep.0, align 1 --> Dst: store i8 2, ptr %gep.1, align 1
+; CHECK-ALL-NEXT: da analyze - none!
+; CHECK-ALL-NEXT: Src: store i8 2, ptr %gep.1, align 1 --> Dst: store i8 2, ptr %gep.1, align 1
+; CHECK-ALL-NEXT: da analyze - consistent output [S]!
+;
+; CHECK-WEAK-ZERO-SIV-LABEL: 'weakzero_dst_siv_prod_ovfl'
+; CHECK-WEAK-ZERO-SIV-NEXT: Src: store i8 1, ptr %gep.0, align 1 --> Dst: store i8 1, ptr %gep.0, align 1
+; CHECK-WEAK-ZERO-SIV-NEXT: da analyze - consistent output [*]!
+; CHECK-WEAK-ZERO-SIV-NEXT: Src: store i8 1, ptr %gep.0, align 1 --> Dst: store i8 2, ptr %gep.1, align 1
+; CHECK-WEAK-ZERO-SIV-NEXT: da analyze - none!
+; CHECK-WEAK-ZERO-SIV-NEXT: Src: store i8 2, ptr %gep.1, align 1 --> Dst: store i8 2, ptr %gep.1, align 1
+; CHECK-WEAK-ZERO-SIV-NEXT: da analyze - consistent output [S]!
+;
+entry:
+ br label %loop.header
+
+loop.header:
+ %i = phi i64 [ 0, %entry ], [ %i.inc, %loop.latch ]
+ %offset = phi i64 [ -2, %entry ], [ %offset.next, %loop.latch ]
+ %ec = icmp eq i64 %i, 4611686018427387904
+ br i1 %ec, label %exit, label %loop.body
+
+loop.body:
+ %cond = icmp sge i64 %offset, 0
+ br i1 %cond, label %if.then, label %loop.latch
+
+if.then:
+ %gep.0 = getelementptr inbounds i8, ptr %A, i64 %offset
+ store i8 1, ptr %gep.0
+ br label %loop.latch
+
+loop.latch:
+ %gep.1 = getelementptr inbounds i8, ptr %A, i64 2
+ store i8 2, ptr %gep.1
+ %i.inc = add nuw nsw i64 %i, 1
+ %offset.next = add nsw i64 %offset, 2
+ br label %loop.header
+
+exit:
+ ret void
+}
+
+; for (i = 0; i < n; i++) {
+; if (0 <= 2*i - 1)
+; A[2*i - 1] = 1;
+; A[INT64_MAX] = 2;
+; }
+;
+; FIXME: DependenceAnalysis currently detects no dependency between the two
+; stores, but it does exist. When `%n` is 2^62, the value of `%offset` will be
+; the same as INT64_MAX at the last iteration.
+; The root cause is that the calculation of the difference between the two
+; constants (INT64_MAX and -1) overflows in a signed sense.
+;
+define void @weakzero_dst_siv_delta_ovfl(ptr %A, i64 %n) {
+; CHECK-ALL-LABEL: 'weakzero_dst_siv_delta_ovfl'
+; CHECK-ALL-NEXT: Src: store i8 1, ptr %gep.0, align 1 --> Dst: store i8 1, ptr %gep.0, align 1
+; CHECK-ALL-NEXT: da analyze - none!
+; CHECK-ALL-NEXT: Src: store i8 1, ptr %gep.0, align 1 --> Dst: store i8 2, ptr %gep.1, align 1
+; CHECK-ALL-NEXT: da analyze - none!
+; CHECK-ALL-NEXT: Src: store i8 2, ptr %gep.1, align 1 --> Dst: store i8 2, ptr %gep.1, align 1
+; CHECK-ALL-NEXT: da analyze - consistent output [S]!
+;
+; CHECK-WEAK-ZERO-SIV-LABEL: 'weakzero_dst_siv_delta_ovfl'
+; CHECK-WEAK-ZERO-SIV-NEXT: Src: store i8 1, ptr %gep.0, align 1 --> Dst: store i8 1, ptr %gep.0, align 1
+; CHECK-WEAK-ZERO-SIV-NEXT: da analyze - consistent output [*]!
+; CHECK-WEAK-ZERO-SIV-NEXT: Src: store i8 1, ptr %gep.0, align 1 --> Dst: store i8 2, ptr %gep.1, align 1
+; CHECK-WEAK-ZERO-SIV-NEXT: da analyze - none!
+; CHECK-WEAK-ZERO-SIV-NEXT: Src: store i8 2, ptr %gep.1, align 1 --> Dst: store i8 2, ptr %gep.1, align 1
+; CHECK-WEAK-ZERO-SIV-NEXT: da analyze - consistent output [S]!
+;
+entry:
+ %guard = icmp sgt i64 %n, 0
+ br i1 %guard, label %loop.header, label %exit
+
+loop.header:
+ %i = phi i64 [ 0, %entry ], [ %i.inc, %loop.latch ]
+ %offset = phi i64 [ -2, %entry ], [ %offset.next, %loop.latch ]
+ %ec = icmp eq i64 %i, %n
+ br i1 %ec, label %exit, label %loop.body
+
+loop.body:
+ %cond = icmp sge i64 %offset, 0
+ br i1 %cond, label %if.then, label %loop.latch
+
+if.then:
+ %gep.0 = getelementptr inbounds i8, ptr %A, i64 %offset
+ store i8 1, ptr %gep.0
+ br label %loop.latch
+
+loop.latch:
+ %gep.1 = getelementptr inbounds i8, ptr %A, i64 9223372036854775807
+ store i8 2, ptr %gep.1
+ %i.inc = add nuw nsw i64 %i, 1
+ %offset.next = add nsw i64 %offset, 2
+ br label %loop.header
+
+exit:
+ ret void
+}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK: {{.*}}
diff --git a/llvm/test/Analysis/HashRecognize/cyclic-redundancy-check.ll b/llvm/test/Analysis/HashRecognize/cyclic-redundancy-check.ll
index 7dec2f8..78b4139 100644
--- a/llvm/test/Analysis/HashRecognize/cyclic-redundancy-check.ll
+++ b/llvm/test/Analysis/HashRecognize/cyclic-redundancy-check.ll
@@ -1448,4 +1448,85 @@ exit: ; preds = %loop
ret i16 %crc.next
}
+define i16 @not.crc.data.next.outside.user(i16 %crc.init, i16 %data.init) {
+; CHECK-LABEL: 'not.crc.data.next.outside.user'
+; CHECK-NEXT: Did not find a hash algorithm
+; CHECK-NEXT: Reason: Recurrences have stray uses
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+ %crc = phi i16 [ %crc.init, %entry ], [ %crc.next, %loop ]
+ %data = phi i16 [ %data.init, %entry ], [ %data.next, %loop ]
+ %xor.crc.data = xor i16 %data, %crc
+ %crc.shl = shl i16 %crc, 1
+ %crc.xor = xor i16 %crc.shl, 3
+ %check.sb = icmp slt i16 %xor.crc.data, 0
+ %crc.next = select i1 %check.sb, i16 %crc.xor, i16 %crc.shl
+ %data.next = shl i16 %data, 1
+ %iv.next = add nuw nsw i32 %iv, 1
+ %exit.cond = icmp samesign ult i32 %iv, 7
+ br i1 %exit.cond, label %loop, label %exit
+
+exit:
+ %ret = xor i16 %data.next, %crc.next
+ ret i16 %ret
+}
+
+define i16 @not.crc.data.phi.outside.user(i16 %crc.init, i16 %data.init) {
+; CHECK-LABEL: 'not.crc.data.phi.outside.user'
+; CHECK-NEXT: Did not find a hash algorithm
+; CHECK-NEXT: Reason: Recurrences have stray uses
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+ %crc = phi i16 [ %crc.init, %entry ], [ %crc.next, %loop ]
+ %data = phi i16 [ %data.init, %entry ], [ %data.next, %loop ]
+ %xor.crc.data = xor i16 %data, %crc
+ %crc.shl = shl i16 %crc, 1
+ %crc.xor = xor i16 %crc.shl, 3
+ %check.sb = icmp slt i16 %xor.crc.data, 0
+ %crc.next = select i1 %check.sb, i16 %crc.xor, i16 %crc.shl
+ %data.next = shl i16 %data, 1
+ %iv.next = add nuw nsw i32 %iv, 1
+ %exit.cond = icmp samesign ult i32 %iv, 7
+ br i1 %exit.cond, label %loop, label %exit
+
+exit:
+ %ret = xor i16 %data, %crc.next
+ ret i16 %ret
+}
+
+define i16 @not.crc.crc.phi.outside.user(i16 %crc.init, i16 %data.init) {
+; CHECK-LABEL: 'not.crc.crc.phi.outside.user'
+; CHECK-NEXT: Did not find a hash algorithm
+; CHECK-NEXT: Reason: Recurrences have stray uses
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+ %crc = phi i16 [ %crc.init, %entry ], [ %crc.next, %loop ]
+ %data = phi i16 [ %data.init, %entry ], [ %data.next, %loop ]
+ %xor.crc.data = xor i16 %data, %crc
+ %crc.shl = shl i16 %crc, 1
+ %crc.xor = xor i16 %crc.shl, 3
+ %check.sb = icmp slt i16 %xor.crc.data, 0
+ %crc.next = select i1 %check.sb, i16 %crc.xor, i16 %crc.shl
+ %data.next = shl i16 %data, 1
+ %iv.next = add nuw nsw i32 %iv, 1
+ %exit.cond = icmp samesign ult i32 %iv, 7
+ br i1 %exit.cond, label %loop, label %exit
+
+exit:
+ %ret = xor i16 %crc, %crc.next
+ ret i16 %ret
+}
+
declare i16 @side.effect()
diff --git a/llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll b/llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll
index 362586a..4fc506f 100644
--- a/llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll
+++ b/llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll
@@ -87,6 +87,11 @@ declare void @llvm.nvvm.barrier(i32, i32)
declare void @llvm.nvvm.barrier.sync(i32)
declare void @llvm.nvvm.barrier.sync.cnt(i32, i32)
+declare float @llvm.nvvm.ex2.approx.f(float)
+declare double @llvm.nvvm.ex2.approx.d(double)
+declare <2 x half> @llvm.nvvm.ex2.approx.f16x2(<2 x half>)
+declare float @llvm.nvvm.ex2.approx.ftz.f(float)
+
; CHECK-LABEL: @simple_upgrade
define void @simple_upgrade(i32 %a, i64 %b, i16 %c) {
; CHECK: call i32 @llvm.bitreverse.i32(i32 %a)
@@ -355,3 +360,15 @@ define void @cta_barriers(i32 %x, i32 %y) {
call void @llvm.nvvm.barrier.sync.cnt(i32 %x, i32 %y)
ret void
}
+
+define void @nvvm_ex2_approx(float %a, double %b, half %c, <2 x half> %d) {
+; CHECK: call float @llvm.nvvm.ex2.approx.f32(float %a)
+; CHECK: call double @llvm.nvvm.ex2.approx.f64(double %b)
+; CHECK: call <2 x half> @llvm.nvvm.ex2.approx.v2f16(<2 x half> %d)
+; CHECK: call float @llvm.nvvm.ex2.approx.ftz.f32(float %a)
+ %r1 = call float @llvm.nvvm.ex2.approx.f(float %a)
+ %r2 = call double @llvm.nvvm.ex2.approx.d(double %b)
+ %r3 = call <2 x half> @llvm.nvvm.ex2.approx.f16x2(<2 x half> %d)
+ %r4 = call float @llvm.nvvm.ex2.approx.ftz.f(float %a)
+ ret void
+}
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-freeze.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-freeze.mir
index 6b84a84..1950e60 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-freeze.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-freeze.mir
@@ -1440,3 +1440,50 @@ body: |
%freeze:_(<4 x s32>) = G_FREEZE %extract
$q0 = COPY %freeze(<4 x s32>)
RET_ReallyLR implicit $x0
+...
+---
+name: ubfx_does_not_generate_poison
+body: |
+ bb.1:
+ liveins: $w0
+
+ ; CHECK-LABEL: name: ubfx_does_not_generate_poison
+ ; CHECK: liveins: $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+ ; CHECK-NEXT: %c1:_(s64) = G_CONSTANT i64 1
+ ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[COPY]]
+ ; CHECK-NEXT: [[UBFX:%[0-9]+]]:_(s64) = G_UBFX [[FREEZE]], %c1(s64), %c1
+ ; CHECK-NEXT: $x0 = COPY [[UBFX]](s64)
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
+ %0:_(s64) = COPY $x0
+ %c1:_(s64) = G_CONSTANT i64 1
+ %1:_(s64) = G_UBFX %0, %c1, %c1
+ %2:_(s64) = G_FREEZE %1
+ $x0 = COPY %2(s64)
+ RET_ReallyLR implicit $x0
+
+...
+---
+name: sbfx_does_not_generate_poison
+body: |
+ bb.1:
+ liveins: $w0
+
+ ; CHECK-LABEL: name: sbfx_does_not_generate_poison
+ ; CHECK: liveins: $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+ ; CHECK-NEXT: %c1:_(s64) = G_CONSTANT i64 1
+ ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[COPY]]
+ ; CHECK-NEXT: [[SBFX:%[0-9]+]]:_(s64) = G_SBFX [[FREEZE]], %c1(s64), %c1
+ ; CHECK-NEXT: $x0 = COPY [[SBFX]](s64)
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
+ %0:_(s64) = COPY $x0
+ %c1:_(s64) = G_CONSTANT i64 1
+ %1:_(s64) = G_SBFX %0, %c1, %c1
+ %2:_(s64) = G_FREEZE %1
+ $x0 = COPY %2(s64)
+ RET_ReallyLR implicit $x0
+
+...
diff --git a/llvm/test/CodeGen/AArch64/arm64-srl-and.ll b/llvm/test/CodeGen/AArch64/arm64-srl-and.ll
index b58f6ba..330f27b 100644
--- a/llvm/test/CodeGen/AArch64/arm64-srl-and.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-srl-and.ll
@@ -1,22 +1,38 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64-linux-gnu -O3 < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -O3 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc -mtriple=aarch64-linux-gnu -O3 -global-isel < %s | FileCheck %s --check-prefixes=CHECK,CHECK-GI
; This used to miscompile:
; The 16-bit -1 should not become 32-bit -1 (sub w8, w8, #1).
@g = global i16 0, align 4
define i32 @srl_and() {
-; CHECK-LABEL: srl_and:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: adrp x8, :got:g
-; CHECK-NEXT: mov w9, #50
-; CHECK-NEXT: ldr x8, [x8, :got_lo12:g]
-; CHECK-NEXT: ldrh w8, [x8]
-; CHECK-NEXT: eor w8, w8, w9
-; CHECK-NEXT: mov w9, #65535
-; CHECK-NEXT: add w8, w8, w9
-; CHECK-NEXT: and w0, w8, w8, lsr #16
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: srl_and:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: adrp x8, :got:g
+; CHECK-SD-NEXT: mov w9, #50 // =0x32
+; CHECK-SD-NEXT: ldr x8, [x8, :got_lo12:g]
+; CHECK-SD-NEXT: ldrh w8, [x8]
+; CHECK-SD-NEXT: eor w8, w8, w9
+; CHECK-SD-NEXT: mov w9, #65535 // =0xffff
+; CHECK-SD-NEXT: add w8, w8, w9
+; CHECK-SD-NEXT: and w0, w8, w8, lsr #16
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: srl_and:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: adrp x8, :got:g
+; CHECK-GI-NEXT: mov w9, #50 // =0x32
+; CHECK-GI-NEXT: ldr x8, [x8, :got_lo12:g]
+; CHECK-GI-NEXT: ldrh w8, [x8]
+; CHECK-GI-NEXT: eor w8, w8, w9
+; CHECK-GI-NEXT: mov w9, #65535 // =0xffff
+; CHECK-GI-NEXT: add w8, w9, w8, uxth
+; CHECK-GI-NEXT: and w9, w8, #0xffff
+; CHECK-GI-NEXT: cmp w8, w9
+; CHECK-GI-NEXT: cset w8, ne
+; CHECK-GI-NEXT: and w0, w9, w8
+; CHECK-GI-NEXT: ret
entry:
%0 = load i16, ptr @g, align 4
%1 = xor i16 %0, 50
@@ -29,3 +45,5 @@ entry:
ret i32 %and
}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK: {{.*}}
diff --git a/llvm/test/CodeGen/AArch64/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll b/llvm/test/CodeGen/AArch64/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll
index c3fdc7d..8438f0b0 100644
--- a/llvm/test/CodeGen/AArch64/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll
+++ b/llvm/test/CodeGen/AArch64/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64-unknown-unknown < %s | FileCheck %s --check-prefix=CHECK
+; RUN: llc -mtriple=aarch64-unknown-unknown < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc -mtriple=aarch64-unknown-unknown -global-isel < %s | FileCheck %s --check-prefixes=CHECK,CHECK-GI
; We are looking for the following pattern here:
; (X & (C l>> Y)) ==/!= 0
@@ -13,12 +14,21 @@
; i8 scalar
define i1 @scalar_i8_signbit_eq(i8 %x, i8 %y) nounwind {
-; CHECK-LABEL: scalar_i8_signbit_eq:
-; CHECK: // %bb.0:
-; CHECK-NEXT: lsl w8, w0, w1
-; CHECK-NEXT: tst w8, #0x80
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: scalar_i8_signbit_eq:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: lsl w8, w0, w1
+; CHECK-SD-NEXT: tst w8, #0x80
+; CHECK-SD-NEXT: cset w0, eq
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: scalar_i8_signbit_eq:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mov w8, #128 // =0x80
+; CHECK-GI-NEXT: and w9, w1, #0xff
+; CHECK-GI-NEXT: lsr w8, w8, w9
+; CHECK-GI-NEXT: tst w8, w0
+; CHECK-GI-NEXT: cset w0, eq
+; CHECK-GI-NEXT: ret
%t0 = lshr i8 128, %y
%t1 = and i8 %t0, %x
%res = icmp eq i8 %t1, 0
@@ -26,12 +36,21 @@ define i1 @scalar_i8_signbit_eq(i8 %x, i8 %y) nounwind {
}
define i1 @scalar_i8_lowestbit_eq(i8 %x, i8 %y) nounwind {
-; CHECK-LABEL: scalar_i8_lowestbit_eq:
-; CHECK: // %bb.0:
-; CHECK-NEXT: lsl w8, w0, w1
-; CHECK-NEXT: tst w8, #0x1
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: scalar_i8_lowestbit_eq:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: lsl w8, w0, w1
+; CHECK-SD-NEXT: tst w8, #0x1
+; CHECK-SD-NEXT: cset w0, eq
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: scalar_i8_lowestbit_eq:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mov w8, #1 // =0x1
+; CHECK-GI-NEXT: and w9, w1, #0xff
+; CHECK-GI-NEXT: lsr w8, w8, w9
+; CHECK-GI-NEXT: tst w8, w0
+; CHECK-GI-NEXT: cset w0, eq
+; CHECK-GI-NEXT: ret
%t0 = lshr i8 1, %y
%t1 = and i8 %t0, %x
%res = icmp eq i8 %t1, 0
@@ -39,12 +58,21 @@ define i1 @scalar_i8_lowestbit_eq(i8 %x, i8 %y) nounwind {
}
define i1 @scalar_i8_bitsinmiddle_eq(i8 %x, i8 %y) nounwind {
-; CHECK-LABEL: scalar_i8_bitsinmiddle_eq:
-; CHECK: // %bb.0:
-; CHECK-NEXT: lsl w8, w0, w1
-; CHECK-NEXT: tst w8, #0x18
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: scalar_i8_bitsinmiddle_eq:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: lsl w8, w0, w1
+; CHECK-SD-NEXT: tst w8, #0x18
+; CHECK-SD-NEXT: cset w0, eq
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: scalar_i8_bitsinmiddle_eq:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mov w8, #24 // =0x18
+; CHECK-GI-NEXT: and w9, w1, #0xff
+; CHECK-GI-NEXT: lsr w8, w8, w9
+; CHECK-GI-NEXT: tst w8, w0
+; CHECK-GI-NEXT: cset w0, eq
+; CHECK-GI-NEXT: ret
%t0 = lshr i8 24, %y
%t1 = and i8 %t0, %x
%res = icmp eq i8 %t1, 0
@@ -54,12 +82,21 @@ define i1 @scalar_i8_bitsinmiddle_eq(i8 %x, i8 %y) nounwind {
; i16 scalar
define i1 @scalar_i16_signbit_eq(i16 %x, i16 %y) nounwind {
-; CHECK-LABEL: scalar_i16_signbit_eq:
-; CHECK: // %bb.0:
-; CHECK-NEXT: lsl w8, w0, w1
-; CHECK-NEXT: tst w8, #0x8000
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: scalar_i16_signbit_eq:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: lsl w8, w0, w1
+; CHECK-SD-NEXT: tst w8, #0x8000
+; CHECK-SD-NEXT: cset w0, eq
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: scalar_i16_signbit_eq:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mov w8, #32768 // =0x8000
+; CHECK-GI-NEXT: and w9, w1, #0xffff
+; CHECK-GI-NEXT: lsr w8, w8, w9
+; CHECK-GI-NEXT: tst w8, w0
+; CHECK-GI-NEXT: cset w0, eq
+; CHECK-GI-NEXT: ret
%t0 = lshr i16 32768, %y
%t1 = and i16 %t0, %x
%res = icmp eq i16 %t1, 0
@@ -67,12 +104,21 @@ define i1 @scalar_i16_signbit_eq(i16 %x, i16 %y) nounwind {
}
define i1 @scalar_i16_lowestbit_eq(i16 %x, i16 %y) nounwind {
-; CHECK-LABEL: scalar_i16_lowestbit_eq:
-; CHECK: // %bb.0:
-; CHECK-NEXT: lsl w8, w0, w1
-; CHECK-NEXT: tst w8, #0x1
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: scalar_i16_lowestbit_eq:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: lsl w8, w0, w1
+; CHECK-SD-NEXT: tst w8, #0x1
+; CHECK-SD-NEXT: cset w0, eq
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: scalar_i16_lowestbit_eq:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mov w8, #1 // =0x1
+; CHECK-GI-NEXT: and w9, w1, #0xffff
+; CHECK-GI-NEXT: lsr w8, w8, w9
+; CHECK-GI-NEXT: tst w8, w0
+; CHECK-GI-NEXT: cset w0, eq
+; CHECK-GI-NEXT: ret
%t0 = lshr i16 1, %y
%t1 = and i16 %t0, %x
%res = icmp eq i16 %t1, 0
@@ -80,12 +126,21 @@ define i1 @scalar_i16_lowestbit_eq(i16 %x, i16 %y) nounwind {
}
define i1 @scalar_i16_bitsinmiddle_eq(i16 %x, i16 %y) nounwind {
-; CHECK-LABEL: scalar_i16_bitsinmiddle_eq:
-; CHECK: // %bb.0:
-; CHECK-NEXT: lsl w8, w0, w1
-; CHECK-NEXT: tst w8, #0xff0
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: scalar_i16_bitsinmiddle_eq:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: lsl w8, w0, w1
+; CHECK-SD-NEXT: tst w8, #0xff0
+; CHECK-SD-NEXT: cset w0, eq
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: scalar_i16_bitsinmiddle_eq:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mov w8, #4080 // =0xff0
+; CHECK-GI-NEXT: and w9, w1, #0xffff
+; CHECK-GI-NEXT: lsr w8, w8, w9
+; CHECK-GI-NEXT: tst w8, w0
+; CHECK-GI-NEXT: cset w0, eq
+; CHECK-GI-NEXT: ret
%t0 = lshr i16 4080, %y
%t1 = and i16 %t0, %x
%res = icmp eq i16 %t1, 0
@@ -95,12 +150,20 @@ define i1 @scalar_i16_bitsinmiddle_eq(i16 %x, i16 %y) nounwind {
; i32 scalar
define i1 @scalar_i32_signbit_eq(i32 %x, i32 %y) nounwind {
-; CHECK-LABEL: scalar_i32_signbit_eq:
-; CHECK: // %bb.0:
-; CHECK-NEXT: lsl w8, w0, w1
-; CHECK-NEXT: tst w8, #0x80000000
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: scalar_i32_signbit_eq:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: lsl w8, w0, w1
+; CHECK-SD-NEXT: tst w8, #0x80000000
+; CHECK-SD-NEXT: cset w0, eq
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: scalar_i32_signbit_eq:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mov w8, #-2147483648 // =0x80000000
+; CHECK-GI-NEXT: lsr w8, w8, w1
+; CHECK-GI-NEXT: tst w8, w0
+; CHECK-GI-NEXT: cset w0, eq
+; CHECK-GI-NEXT: ret
%t0 = lshr i32 2147483648, %y
%t1 = and i32 %t0, %x
%res = icmp eq i32 %t1, 0
@@ -108,12 +171,20 @@ define i1 @scalar_i32_signbit_eq(i32 %x, i32 %y) nounwind {
}
define i1 @scalar_i32_lowestbit_eq(i32 %x, i32 %y) nounwind {
-; CHECK-LABEL: scalar_i32_lowestbit_eq:
-; CHECK: // %bb.0:
-; CHECK-NEXT: lsl w8, w0, w1
-; CHECK-NEXT: tst w8, #0x1
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: scalar_i32_lowestbit_eq:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: lsl w8, w0, w1
+; CHECK-SD-NEXT: tst w8, #0x1
+; CHECK-SD-NEXT: cset w0, eq
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: scalar_i32_lowestbit_eq:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mov w8, #1 // =0x1
+; CHECK-GI-NEXT: lsr w8, w8, w1
+; CHECK-GI-NEXT: tst w8, w0
+; CHECK-GI-NEXT: cset w0, eq
+; CHECK-GI-NEXT: ret
%t0 = lshr i32 1, %y
%t1 = and i32 %t0, %x
%res = icmp eq i32 %t1, 0
@@ -121,12 +192,20 @@ define i1 @scalar_i32_lowestbit_eq(i32 %x, i32 %y) nounwind {
}
define i1 @scalar_i32_bitsinmiddle_eq(i32 %x, i32 %y) nounwind {
-; CHECK-LABEL: scalar_i32_bitsinmiddle_eq:
-; CHECK: // %bb.0:
-; CHECK-NEXT: lsl w8, w0, w1
-; CHECK-NEXT: tst w8, #0xffff00
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: scalar_i32_bitsinmiddle_eq:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: lsl w8, w0, w1
+; CHECK-SD-NEXT: tst w8, #0xffff00
+; CHECK-SD-NEXT: cset w0, eq
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: scalar_i32_bitsinmiddle_eq:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mov w8, #16776960 // =0xffff00
+; CHECK-GI-NEXT: lsr w8, w8, w1
+; CHECK-GI-NEXT: tst w8, w0
+; CHECK-GI-NEXT: cset w0, eq
+; CHECK-GI-NEXT: ret
%t0 = lshr i32 16776960, %y
%t1 = and i32 %t0, %x
%res = icmp eq i32 %t1, 0
@@ -136,12 +215,20 @@ define i1 @scalar_i32_bitsinmiddle_eq(i32 %x, i32 %y) nounwind {
; i64 scalar
define i1 @scalar_i64_signbit_eq(i64 %x, i64 %y) nounwind {
-; CHECK-LABEL: scalar_i64_signbit_eq:
-; CHECK: // %bb.0:
-; CHECK-NEXT: lsl x8, x0, x1
-; CHECK-NEXT: tst x8, #0x8000000000000000
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: scalar_i64_signbit_eq:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: lsl x8, x0, x1
+; CHECK-SD-NEXT: tst x8, #0x8000000000000000
+; CHECK-SD-NEXT: cset w0, eq
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: scalar_i64_signbit_eq:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000
+; CHECK-GI-NEXT: lsr x8, x8, x1
+; CHECK-GI-NEXT: tst x8, x0
+; CHECK-GI-NEXT: cset w0, eq
+; CHECK-GI-NEXT: ret
%t0 = lshr i64 9223372036854775808, %y
%t1 = and i64 %t0, %x
%res = icmp eq i64 %t1, 0
@@ -149,12 +236,20 @@ define i1 @scalar_i64_signbit_eq(i64 %x, i64 %y) nounwind {
}
define i1 @scalar_i64_lowestbit_eq(i64 %x, i64 %y) nounwind {
-; CHECK-LABEL: scalar_i64_lowestbit_eq:
-; CHECK: // %bb.0:
-; CHECK-NEXT: lsl x8, x0, x1
-; CHECK-NEXT: tst x8, #0x1
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: scalar_i64_lowestbit_eq:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: lsl x8, x0, x1
+; CHECK-SD-NEXT: tst x8, #0x1
+; CHECK-SD-NEXT: cset w0, eq
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: scalar_i64_lowestbit_eq:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mov w8, #1 // =0x1
+; CHECK-GI-NEXT: lsr x8, x8, x1
+; CHECK-GI-NEXT: tst x8, x0
+; CHECK-GI-NEXT: cset w0, eq
+; CHECK-GI-NEXT: ret
%t0 = lshr i64 1, %y
%t1 = and i64 %t0, %x
%res = icmp eq i64 %t1, 0
@@ -162,12 +257,20 @@ define i1 @scalar_i64_lowestbit_eq(i64 %x, i64 %y) nounwind {
}
define i1 @scalar_i64_bitsinmiddle_eq(i64 %x, i64 %y) nounwind {
-; CHECK-LABEL: scalar_i64_bitsinmiddle_eq:
-; CHECK: // %bb.0:
-; CHECK-NEXT: lsl x8, x0, x1
-; CHECK-NEXT: tst x8, #0xffffffff0000
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: scalar_i64_bitsinmiddle_eq:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: lsl x8, x0, x1
+; CHECK-SD-NEXT: tst x8, #0xffffffff0000
+; CHECK-SD-NEXT: cset w0, eq
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: scalar_i64_bitsinmiddle_eq:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mov x8, #281474976645120 // =0xffffffff0000
+; CHECK-GI-NEXT: lsr x8, x8, x1
+; CHECK-GI-NEXT: tst x8, x0
+; CHECK-GI-NEXT: cset w0, eq
+; CHECK-GI-NEXT: ret
%t0 = lshr i64 281474976645120, %y
%t1 = and i64 %t0, %x
%res = icmp eq i64 %t1, 0
@@ -179,14 +282,24 @@ define i1 @scalar_i64_bitsinmiddle_eq(i64 %x, i64 %y) nounwind {
;------------------------------------------------------------------------------;
define <4 x i1> @vec_4xi32_splat_eq(<4 x i32> %x, <4 x i32> %y) nounwind {
-; CHECK-LABEL: vec_4xi32_splat_eq:
-; CHECK: // %bb.0:
-; CHECK-NEXT: movi v2.4s, #1
-; CHECK-NEXT: ushl v0.4s, v0.4s, v1.4s
-; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
-; CHECK-NEXT: cmeq v0.4s, v0.4s, #0
-; CHECK-NEXT: xtn v0.4h, v0.4s
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: vec_4xi32_splat_eq:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi v2.4s, #1
+; CHECK-SD-NEXT: ushl v0.4s, v0.4s, v1.4s
+; CHECK-SD-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-SD-NEXT: cmeq v0.4s, v0.4s, #0
+; CHECK-SD-NEXT: xtn v0.4h, v0.4s
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: vec_4xi32_splat_eq:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi v2.4s, #1
+; CHECK-GI-NEXT: neg v1.4s, v1.4s
+; CHECK-GI-NEXT: ushl v1.4s, v2.4s, v1.4s
+; CHECK-GI-NEXT: and v0.16b, v1.16b, v0.16b
+; CHECK-GI-NEXT: cmeq v0.4s, v0.4s, #0
+; CHECK-GI-NEXT: xtn v0.4h, v0.4s
+; CHECK-GI-NEXT: ret
%t0 = lshr <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %y
%t1 = and <4 x i32> %t0, %x
%res = icmp eq <4 x i32> %t1, <i32 0, i32 0, i32 0, i32 0>
@@ -211,44 +324,86 @@ define <4 x i1> @vec_4xi32_nonsplat_eq(<4 x i32> %x, <4 x i32> %y) nounwind {
}
define <4 x i1> @vec_4xi32_nonsplat_undef0_eq(<4 x i32> %x, <4 x i32> %y) nounwind {
-; CHECK-LABEL: vec_4xi32_nonsplat_undef0_eq:
-; CHECK: // %bb.0:
-; CHECK-NEXT: movi v2.4s, #1
-; CHECK-NEXT: ushl v0.4s, v0.4s, v1.4s
-; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
-; CHECK-NEXT: cmeq v0.4s, v0.4s, #0
-; CHECK-NEXT: xtn v0.4h, v0.4s
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: vec_4xi32_nonsplat_undef0_eq:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi v2.4s, #1
+; CHECK-SD-NEXT: ushl v0.4s, v0.4s, v1.4s
+; CHECK-SD-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-SD-NEXT: cmeq v0.4s, v0.4s, #0
+; CHECK-SD-NEXT: xtn v0.4h, v0.4s
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: vec_4xi32_nonsplat_undef0_eq:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mov w8, #1 // =0x1
+; CHECK-GI-NEXT: neg v1.4s, v1.4s
+; CHECK-GI-NEXT: fmov s2, w8
+; CHECK-GI-NEXT: mov v2.s[1], w8
+; CHECK-GI-NEXT: mov v2.s[3], w8
+; CHECK-GI-NEXT: ushl v1.4s, v2.4s, v1.4s
+; CHECK-GI-NEXT: and v0.16b, v1.16b, v0.16b
+; CHECK-GI-NEXT: cmeq v0.4s, v0.4s, #0
+; CHECK-GI-NEXT: xtn v0.4h, v0.4s
+; CHECK-GI-NEXT: ret
%t0 = lshr <4 x i32> <i32 1, i32 1, i32 undef, i32 1>, %y
%t1 = and <4 x i32> %t0, %x
%res = icmp eq <4 x i32> %t1, <i32 0, i32 0, i32 0, i32 0>
ret <4 x i1> %res
}
define <4 x i1> @vec_4xi32_nonsplat_undef1_eq(<4 x i32> %x, <4 x i32> %y) nounwind {
-; CHECK-LABEL: vec_4xi32_nonsplat_undef1_eq:
-; CHECK: // %bb.0:
-; CHECK-NEXT: movi v2.4s, #1
-; CHECK-NEXT: neg v1.4s, v1.4s
-; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s
-; CHECK-NEXT: and v0.16b, v1.16b, v0.16b
-; CHECK-NEXT: cmeq v0.4s, v0.4s, #0
-; CHECK-NEXT: xtn v0.4h, v0.4s
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: vec_4xi32_nonsplat_undef1_eq:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi v2.4s, #1
+; CHECK-SD-NEXT: neg v1.4s, v1.4s
+; CHECK-SD-NEXT: ushl v1.4s, v2.4s, v1.4s
+; CHECK-SD-NEXT: and v0.16b, v1.16b, v0.16b
+; CHECK-SD-NEXT: cmeq v0.4s, v0.4s, #0
+; CHECK-SD-NEXT: xtn v0.4h, v0.4s
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: vec_4xi32_nonsplat_undef1_eq:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi d2, #0000000000000000
+; CHECK-GI-NEXT: movi v3.4s, #1
+; CHECK-GI-NEXT: neg v1.4s, v1.4s
+; CHECK-GI-NEXT: mov v2.s[1], wzr
+; CHECK-GI-NEXT: ushl v1.4s, v3.4s, v1.4s
+; CHECK-GI-NEXT: and v0.16b, v1.16b, v0.16b
+; CHECK-GI-NEXT: mov v2.s[3], wzr
+; CHECK-GI-NEXT: cmeq v0.4s, v0.4s, v2.4s
+; CHECK-GI-NEXT: xtn v0.4h, v0.4s
+; CHECK-GI-NEXT: ret
%t0 = lshr <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %y
%t1 = and <4 x i32> %t0, %x
%res = icmp eq <4 x i32> %t1, <i32 0, i32 0, i32 undef, i32 0>
ret <4 x i1> %res
}
define <4 x i1> @vec_4xi32_nonsplat_undef2_eq(<4 x i32> %x, <4 x i32> %y) nounwind {
-; CHECK-LABEL: vec_4xi32_nonsplat_undef2_eq:
-; CHECK: // %bb.0:
-; CHECK-NEXT: movi v2.4s, #1
-; CHECK-NEXT: neg v1.4s, v1.4s
-; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s
-; CHECK-NEXT: and v0.16b, v1.16b, v0.16b
-; CHECK-NEXT: cmeq v0.4s, v0.4s, #0
-; CHECK-NEXT: xtn v0.4h, v0.4s
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: vec_4xi32_nonsplat_undef2_eq:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi v2.4s, #1
+; CHECK-SD-NEXT: neg v1.4s, v1.4s
+; CHECK-SD-NEXT: ushl v1.4s, v2.4s, v1.4s
+; CHECK-SD-NEXT: and v0.16b, v1.16b, v0.16b
+; CHECK-SD-NEXT: cmeq v0.4s, v0.4s, #0
+; CHECK-SD-NEXT: xtn v0.4h, v0.4s
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: vec_4xi32_nonsplat_undef2_eq:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mov w8, #1 // =0x1
+; CHECK-GI-NEXT: movi d2, #0000000000000000
+; CHECK-GI-NEXT: neg v1.4s, v1.4s
+; CHECK-GI-NEXT: fmov s3, w8
+; CHECK-GI-NEXT: mov v3.s[1], w8
+; CHECK-GI-NEXT: mov v2.s[1], wzr
+; CHECK-GI-NEXT: mov v3.s[3], w8
+; CHECK-GI-NEXT: mov v2.s[3], wzr
+; CHECK-GI-NEXT: ushl v1.4s, v3.4s, v1.4s
+; CHECK-GI-NEXT: and v0.16b, v1.16b, v0.16b
+; CHECK-GI-NEXT: cmeq v0.4s, v0.4s, v2.4s
+; CHECK-GI-NEXT: xtn v0.4h, v0.4s
+; CHECK-GI-NEXT: ret
%t0 = lshr <4 x i32> <i32 1, i32 1, i32 undef, i32 1>, %y
%t1 = and <4 x i32> %t0, %x
%res = icmp eq <4 x i32> %t1, <i32 0, i32 0, i32 undef, i32 0>
@@ -260,11 +415,20 @@ define <4 x i1> @vec_4xi32_nonsplat_undef2_eq(<4 x i32> %x, <4 x i32> %y) nounwi
;------------------------------------------------------------------------------;
define i1 @scalar_i8_signbit_ne(i8 %x, i8 %y) nounwind {
-; CHECK-LABEL: scalar_i8_signbit_ne:
-; CHECK: // %bb.0:
-; CHECK-NEXT: lsl w8, w0, w1
-; CHECK-NEXT: ubfx w0, w8, #7, #1
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: scalar_i8_signbit_ne:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: lsl w8, w0, w1
+; CHECK-SD-NEXT: ubfx w0, w8, #7, #1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: scalar_i8_signbit_ne:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mov w8, #128 // =0x80
+; CHECK-GI-NEXT: and w9, w1, #0xff
+; CHECK-GI-NEXT: lsr w8, w8, w9
+; CHECK-GI-NEXT: tst w8, w0
+; CHECK-GI-NEXT: cset w0, ne
+; CHECK-GI-NEXT: ret
%t0 = lshr i8 128, %y
%t1 = and i8 %t0, %x
%res = icmp ne i8 %t1, 0 ; we are perfectly happy with 'ne' predicate
@@ -315,14 +479,24 @@ define i1 @scalar_i8_bitsinmiddle_slt(i8 %x, i8 %y) nounwind {
}
define i1 @scalar_i8_signbit_eq_with_nonzero(i8 %x, i8 %y) nounwind {
-; CHECK-LABEL: scalar_i8_signbit_eq_with_nonzero:
-; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #128 // =0x80
-; CHECK-NEXT: lsr w8, w8, w1
-; CHECK-NEXT: and w8, w8, w0
-; CHECK-NEXT: cmp w8, #1
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: scalar_i8_signbit_eq_with_nonzero:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: mov w8, #128 // =0x80
+; CHECK-SD-NEXT: lsr w8, w8, w1
+; CHECK-SD-NEXT: and w8, w8, w0
+; CHECK-SD-NEXT: cmp w8, #1
+; CHECK-SD-NEXT: cset w0, eq
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: scalar_i8_signbit_eq_with_nonzero:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mov w8, #128 // =0x80
+; CHECK-GI-NEXT: and w9, w1, #0xff
+; CHECK-GI-NEXT: lsr w8, w8, w9
+; CHECK-GI-NEXT: and w8, w8, w0
+; CHECK-GI-NEXT: cmp w8, #1
+; CHECK-GI-NEXT: cset w0, eq
+; CHECK-GI-NEXT: ret
%t0 = lshr i8 128, %y
%t1 = and i8 %t0, %x
%res = icmp eq i8 %t1, 1 ; should be comparing with 0
diff --git a/llvm/test/CodeGen/AArch64/hoist-and-by-const-from-shl-in-eqcmp-zero.ll b/llvm/test/CodeGen/AArch64/hoist-and-by-const-from-shl-in-eqcmp-zero.ll
index 4a73b10..cc1bf27 100644
--- a/llvm/test/CodeGen/AArch64/hoist-and-by-const-from-shl-in-eqcmp-zero.ll
+++ b/llvm/test/CodeGen/AArch64/hoist-and-by-const-from-shl-in-eqcmp-zero.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64-unknown-unknown < %s | FileCheck %s --check-prefix=CHECK
+; RUN: llc -mtriple=aarch64-unknown-unknown < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc -mtriple=aarch64-unknown-unknown -global-isel < %s | FileCheck %s --check-prefixes=CHECK,CHECK-GI
; We are looking for the following pattern here:
; (X & (C << Y)) ==/!= 0
@@ -13,13 +14,23 @@
; i8 scalar
define i1 @scalar_i8_signbit_eq(i8 %x, i8 %y) nounwind {
-; CHECK-LABEL: scalar_i8_signbit_eq:
-; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0xff
-; CHECK-NEXT: lsr w8, w8, w1
-; CHECK-NEXT: tst w8, #0x80
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: scalar_i8_signbit_eq:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: and w8, w0, #0xff
+; CHECK-SD-NEXT: lsr w8, w8, w1
+; CHECK-SD-NEXT: tst w8, #0x80
+; CHECK-SD-NEXT: cset w0, eq
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: scalar_i8_signbit_eq:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mov w8, #-128 // =0xffffff80
+; CHECK-GI-NEXT: and w9, w1, #0xff
+; CHECK-GI-NEXT: lsl w8, w8, w9
+; CHECK-GI-NEXT: and w8, w8, w0
+; CHECK-GI-NEXT: tst w8, #0xff
+; CHECK-GI-NEXT: cset w0, eq
+; CHECK-GI-NEXT: ret
%t0 = shl i8 128, %y
%t1 = and i8 %t0, %x
%res = icmp eq i8 %t1, 0
@@ -27,13 +38,23 @@ define i1 @scalar_i8_signbit_eq(i8 %x, i8 %y) nounwind {
}
define i1 @scalar_i8_lowestbit_eq(i8 %x, i8 %y) nounwind {
-; CHECK-LABEL: scalar_i8_lowestbit_eq:
-; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0xff
-; CHECK-NEXT: lsr w8, w8, w1
-; CHECK-NEXT: tst w8, #0x1
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: scalar_i8_lowestbit_eq:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: and w8, w0, #0xff
+; CHECK-SD-NEXT: lsr w8, w8, w1
+; CHECK-SD-NEXT: tst w8, #0x1
+; CHECK-SD-NEXT: cset w0, eq
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: scalar_i8_lowestbit_eq:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mov w8, #1 // =0x1
+; CHECK-GI-NEXT: and w9, w1, #0xff
+; CHECK-GI-NEXT: lsl w8, w8, w9
+; CHECK-GI-NEXT: and w8, w8, w0
+; CHECK-GI-NEXT: tst w8, #0xff
+; CHECK-GI-NEXT: cset w0, eq
+; CHECK-GI-NEXT: ret
%t0 = shl i8 1, %y
%t1 = and i8 %t0, %x
%res = icmp eq i8 %t1, 0
@@ -41,13 +62,23 @@ define i1 @scalar_i8_lowestbit_eq(i8 %x, i8 %y) nounwind {
}
define i1 @scalar_i8_bitsinmiddle_eq(i8 %x, i8 %y) nounwind {
-; CHECK-LABEL: scalar_i8_bitsinmiddle_eq:
-; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0xff
-; CHECK-NEXT: lsr w8, w8, w1
-; CHECK-NEXT: tst w8, #0x18
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: scalar_i8_bitsinmiddle_eq:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: and w8, w0, #0xff
+; CHECK-SD-NEXT: lsr w8, w8, w1
+; CHECK-SD-NEXT: tst w8, #0x18
+; CHECK-SD-NEXT: cset w0, eq
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: scalar_i8_bitsinmiddle_eq:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mov w8, #24 // =0x18
+; CHECK-GI-NEXT: and w9, w1, #0xff
+; CHECK-GI-NEXT: lsl w8, w8, w9
+; CHECK-GI-NEXT: and w8, w8, w0
+; CHECK-GI-NEXT: tst w8, #0xff
+; CHECK-GI-NEXT: cset w0, eq
+; CHECK-GI-NEXT: ret
%t0 = shl i8 24, %y
%t1 = and i8 %t0, %x
%res = icmp eq i8 %t1, 0
@@ -57,13 +88,23 @@ define i1 @scalar_i8_bitsinmiddle_eq(i8 %x, i8 %y) nounwind {
; i16 scalar
define i1 @scalar_i16_signbit_eq(i16 %x, i16 %y) nounwind {
-; CHECK-LABEL: scalar_i16_signbit_eq:
-; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0xffff
-; CHECK-NEXT: lsr w8, w8, w1
-; CHECK-NEXT: tst w8, #0x8000
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: scalar_i16_signbit_eq:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: and w8, w0, #0xffff
+; CHECK-SD-NEXT: lsr w8, w8, w1
+; CHECK-SD-NEXT: tst w8, #0x8000
+; CHECK-SD-NEXT: cset w0, eq
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: scalar_i16_signbit_eq:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mov w8, #-32768 // =0xffff8000
+; CHECK-GI-NEXT: and w9, w1, #0xffff
+; CHECK-GI-NEXT: lsl w8, w8, w9
+; CHECK-GI-NEXT: and w8, w8, w0
+; CHECK-GI-NEXT: tst w8, #0xffff
+; CHECK-GI-NEXT: cset w0, eq
+; CHECK-GI-NEXT: ret
%t0 = shl i16 32768, %y
%t1 = and i16 %t0, %x
%res = icmp eq i16 %t1, 0
@@ -71,13 +112,23 @@ define i1 @scalar_i16_signbit_eq(i16 %x, i16 %y) nounwind {
}
define i1 @scalar_i16_lowestbit_eq(i16 %x, i16 %y) nounwind {
-; CHECK-LABEL: scalar_i16_lowestbit_eq:
-; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0xffff
-; CHECK-NEXT: lsr w8, w8, w1
-; CHECK-NEXT: tst w8, #0x1
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: scalar_i16_lowestbit_eq:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: and w8, w0, #0xffff
+; CHECK-SD-NEXT: lsr w8, w8, w1
+; CHECK-SD-NEXT: tst w8, #0x1
+; CHECK-SD-NEXT: cset w0, eq
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: scalar_i16_lowestbit_eq:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mov w8, #1 // =0x1
+; CHECK-GI-NEXT: and w9, w1, #0xffff
+; CHECK-GI-NEXT: lsl w8, w8, w9
+; CHECK-GI-NEXT: and w8, w8, w0
+; CHECK-GI-NEXT: tst w8, #0xffff
+; CHECK-GI-NEXT: cset w0, eq
+; CHECK-GI-NEXT: ret
%t0 = shl i16 1, %y
%t1 = and i16 %t0, %x
%res = icmp eq i16 %t1, 0
@@ -85,13 +136,23 @@ define i1 @scalar_i16_lowestbit_eq(i16 %x, i16 %y) nounwind {
}
define i1 @scalar_i16_bitsinmiddle_eq(i16 %x, i16 %y) nounwind {
-; CHECK-LABEL: scalar_i16_bitsinmiddle_eq:
-; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0xffff
-; CHECK-NEXT: lsr w8, w8, w1
-; CHECK-NEXT: tst w8, #0xff0
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: scalar_i16_bitsinmiddle_eq:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: and w8, w0, #0xffff
+; CHECK-SD-NEXT: lsr w8, w8, w1
+; CHECK-SD-NEXT: tst w8, #0xff0
+; CHECK-SD-NEXT: cset w0, eq
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: scalar_i16_bitsinmiddle_eq:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mov w8, #4080 // =0xff0
+; CHECK-GI-NEXT: and w9, w1, #0xffff
+; CHECK-GI-NEXT: lsl w8, w8, w9
+; CHECK-GI-NEXT: and w8, w8, w0
+; CHECK-GI-NEXT: tst w8, #0xffff
+; CHECK-GI-NEXT: cset w0, eq
+; CHECK-GI-NEXT: ret
%t0 = shl i16 4080, %y
%t1 = and i16 %t0, %x
%res = icmp eq i16 %t1, 0
@@ -101,12 +162,20 @@ define i1 @scalar_i16_bitsinmiddle_eq(i16 %x, i16 %y) nounwind {
; i32 scalar
define i1 @scalar_i32_signbit_eq(i32 %x, i32 %y) nounwind {
-; CHECK-LABEL: scalar_i32_signbit_eq:
-; CHECK: // %bb.0:
-; CHECK-NEXT: lsr w8, w0, w1
-; CHECK-NEXT: tst w8, #0x80000000
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: scalar_i32_signbit_eq:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: lsr w8, w0, w1
+; CHECK-SD-NEXT: tst w8, #0x80000000
+; CHECK-SD-NEXT: cset w0, eq
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: scalar_i32_signbit_eq:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mov w8, #-2147483648 // =0x80000000
+; CHECK-GI-NEXT: lsl w8, w8, w1
+; CHECK-GI-NEXT: tst w8, w0
+; CHECK-GI-NEXT: cset w0, eq
+; CHECK-GI-NEXT: ret
%t0 = shl i32 2147483648, %y
%t1 = and i32 %t0, %x
%res = icmp eq i32 %t1, 0
@@ -114,12 +183,20 @@ define i1 @scalar_i32_signbit_eq(i32 %x, i32 %y) nounwind {
}
define i1 @scalar_i32_lowestbit_eq(i32 %x, i32 %y) nounwind {
-; CHECK-LABEL: scalar_i32_lowestbit_eq:
-; CHECK: // %bb.0:
-; CHECK-NEXT: lsr w8, w0, w1
-; CHECK-NEXT: tst w8, #0x1
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: scalar_i32_lowestbit_eq:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: lsr w8, w0, w1
+; CHECK-SD-NEXT: tst w8, #0x1
+; CHECK-SD-NEXT: cset w0, eq
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: scalar_i32_lowestbit_eq:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mov w8, #1 // =0x1
+; CHECK-GI-NEXT: lsl w8, w8, w1
+; CHECK-GI-NEXT: tst w8, w0
+; CHECK-GI-NEXT: cset w0, eq
+; CHECK-GI-NEXT: ret
%t0 = shl i32 1, %y
%t1 = and i32 %t0, %x
%res = icmp eq i32 %t1, 0
@@ -127,12 +204,20 @@ define i1 @scalar_i32_lowestbit_eq(i32 %x, i32 %y) nounwind {
}
define i1 @scalar_i32_bitsinmiddle_eq(i32 %x, i32 %y) nounwind {
-; CHECK-LABEL: scalar_i32_bitsinmiddle_eq:
-; CHECK: // %bb.0:
-; CHECK-NEXT: lsr w8, w0, w1
-; CHECK-NEXT: tst w8, #0xffff00
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: scalar_i32_bitsinmiddle_eq:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: lsr w8, w0, w1
+; CHECK-SD-NEXT: tst w8, #0xffff00
+; CHECK-SD-NEXT: cset w0, eq
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: scalar_i32_bitsinmiddle_eq:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mov w8, #16776960 // =0xffff00
+; CHECK-GI-NEXT: lsl w8, w8, w1
+; CHECK-GI-NEXT: tst w8, w0
+; CHECK-GI-NEXT: cset w0, eq
+; CHECK-GI-NEXT: ret
%t0 = shl i32 16776960, %y
%t1 = and i32 %t0, %x
%res = icmp eq i32 %t1, 0
@@ -142,12 +227,20 @@ define i1 @scalar_i32_bitsinmiddle_eq(i32 %x, i32 %y) nounwind {
; i64 scalar
define i1 @scalar_i64_signbit_eq(i64 %x, i64 %y) nounwind {
-; CHECK-LABEL: scalar_i64_signbit_eq:
-; CHECK: // %bb.0:
-; CHECK-NEXT: lsr x8, x0, x1
-; CHECK-NEXT: tst x8, #0x8000000000000000
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: scalar_i64_signbit_eq:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: lsr x8, x0, x1
+; CHECK-SD-NEXT: tst x8, #0x8000000000000000
+; CHECK-SD-NEXT: cset w0, eq
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: scalar_i64_signbit_eq:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000
+; CHECK-GI-NEXT: lsl x8, x8, x1
+; CHECK-GI-NEXT: tst x8, x0
+; CHECK-GI-NEXT: cset w0, eq
+; CHECK-GI-NEXT: ret
%t0 = shl i64 9223372036854775808, %y
%t1 = and i64 %t0, %x
%res = icmp eq i64 %t1, 0
@@ -155,12 +248,20 @@ define i1 @scalar_i64_signbit_eq(i64 %x, i64 %y) nounwind {
}
define i1 @scalar_i64_lowestbit_eq(i64 %x, i64 %y) nounwind {
-; CHECK-LABEL: scalar_i64_lowestbit_eq:
-; CHECK: // %bb.0:
-; CHECK-NEXT: lsr x8, x0, x1
-; CHECK-NEXT: tst x8, #0x1
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: scalar_i64_lowestbit_eq:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: lsr x8, x0, x1
+; CHECK-SD-NEXT: tst x8, #0x1
+; CHECK-SD-NEXT: cset w0, eq
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: scalar_i64_lowestbit_eq:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mov w8, #1 // =0x1
+; CHECK-GI-NEXT: lsl x8, x8, x1
+; CHECK-GI-NEXT: tst x8, x0
+; CHECK-GI-NEXT: cset w0, eq
+; CHECK-GI-NEXT: ret
%t0 = shl i64 1, %y
%t1 = and i64 %t0, %x
%res = icmp eq i64 %t1, 0
@@ -168,12 +269,20 @@ define i1 @scalar_i64_lowestbit_eq(i64 %x, i64 %y) nounwind {
}
define i1 @scalar_i64_bitsinmiddle_eq(i64 %x, i64 %y) nounwind {
-; CHECK-LABEL: scalar_i64_bitsinmiddle_eq:
-; CHECK: // %bb.0:
-; CHECK-NEXT: lsr x8, x0, x1
-; CHECK-NEXT: tst x8, #0xffffffff0000
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: scalar_i64_bitsinmiddle_eq:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: lsr x8, x0, x1
+; CHECK-SD-NEXT: tst x8, #0xffffffff0000
+; CHECK-SD-NEXT: cset w0, eq
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: scalar_i64_bitsinmiddle_eq:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mov x8, #281474976645120 // =0xffffffff0000
+; CHECK-GI-NEXT: lsl x8, x8, x1
+; CHECK-GI-NEXT: tst x8, x0
+; CHECK-GI-NEXT: cset w0, eq
+; CHECK-GI-NEXT: ret
%t0 = shl i64 281474976645120, %y
%t1 = and i64 %t0, %x
%res = icmp eq i64 %t1, 0
@@ -216,42 +325,81 @@ define <4 x i1> @vec_4xi32_nonsplat_eq(<4 x i32> %x, <4 x i32> %y) nounwind {
}
define <4 x i1> @vec_4xi32_nonsplat_undef0_eq(<4 x i32> %x, <4 x i32> %y) nounwind {
-; CHECK-LABEL: vec_4xi32_nonsplat_undef0_eq:
-; CHECK: // %bb.0:
-; CHECK-NEXT: movi v2.4s, #1
-; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s
-; CHECK-NEXT: and v0.16b, v1.16b, v0.16b
-; CHECK-NEXT: cmeq v0.4s, v0.4s, #0
-; CHECK-NEXT: xtn v0.4h, v0.4s
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: vec_4xi32_nonsplat_undef0_eq:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi v2.4s, #1
+; CHECK-SD-NEXT: ushl v1.4s, v2.4s, v1.4s
+; CHECK-SD-NEXT: and v0.16b, v1.16b, v0.16b
+; CHECK-SD-NEXT: cmeq v0.4s, v0.4s, #0
+; CHECK-SD-NEXT: xtn v0.4h, v0.4s
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: vec_4xi32_nonsplat_undef0_eq:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mov w8, #1 // =0x1
+; CHECK-GI-NEXT: fmov s2, w8
+; CHECK-GI-NEXT: mov v2.s[1], w8
+; CHECK-GI-NEXT: mov v2.s[3], w8
+; CHECK-GI-NEXT: ushl v1.4s, v2.4s, v1.4s
+; CHECK-GI-NEXT: and v0.16b, v1.16b, v0.16b
+; CHECK-GI-NEXT: cmeq v0.4s, v0.4s, #0
+; CHECK-GI-NEXT: xtn v0.4h, v0.4s
+; CHECK-GI-NEXT: ret
%t0 = shl <4 x i32> <i32 1, i32 1, i32 undef, i32 1>, %y
%t1 = and <4 x i32> %t0, %x
%res = icmp eq <4 x i32> %t1, <i32 0, i32 0, i32 0, i32 0>
ret <4 x i1> %res
}
define <4 x i1> @vec_4xi32_nonsplat_undef1_eq(<4 x i32> %x, <4 x i32> %y) nounwind {
-; CHECK-LABEL: vec_4xi32_nonsplat_undef1_eq:
-; CHECK: // %bb.0:
-; CHECK-NEXT: movi v2.4s, #1
-; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s
-; CHECK-NEXT: and v0.16b, v1.16b, v0.16b
-; CHECK-NEXT: cmeq v0.4s, v0.4s, #0
-; CHECK-NEXT: xtn v0.4h, v0.4s
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: vec_4xi32_nonsplat_undef1_eq:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi v2.4s, #1
+; CHECK-SD-NEXT: ushl v1.4s, v2.4s, v1.4s
+; CHECK-SD-NEXT: and v0.16b, v1.16b, v0.16b
+; CHECK-SD-NEXT: cmeq v0.4s, v0.4s, #0
+; CHECK-SD-NEXT: xtn v0.4h, v0.4s
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: vec_4xi32_nonsplat_undef1_eq:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi d3, #0000000000000000
+; CHECK-GI-NEXT: movi v2.4s, #1
+; CHECK-GI-NEXT: mov v3.s[1], wzr
+; CHECK-GI-NEXT: ushl v1.4s, v2.4s, v1.4s
+; CHECK-GI-NEXT: and v0.16b, v1.16b, v0.16b
+; CHECK-GI-NEXT: mov v3.s[3], wzr
+; CHECK-GI-NEXT: cmeq v0.4s, v0.4s, v3.4s
+; CHECK-GI-NEXT: xtn v0.4h, v0.4s
+; CHECK-GI-NEXT: ret
%t0 = shl <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %y
%t1 = and <4 x i32> %t0, %x
%res = icmp eq <4 x i32> %t1, <i32 0, i32 0, i32 undef, i32 0>
ret <4 x i1> %res
}
define <4 x i1> @vec_4xi32_nonsplat_undef2_eq(<4 x i32> %x, <4 x i32> %y) nounwind {
-; CHECK-LABEL: vec_4xi32_nonsplat_undef2_eq:
-; CHECK: // %bb.0:
-; CHECK-NEXT: movi v2.4s, #1
-; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s
-; CHECK-NEXT: and v0.16b, v1.16b, v0.16b
-; CHECK-NEXT: cmeq v0.4s, v0.4s, #0
-; CHECK-NEXT: xtn v0.4h, v0.4s
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: vec_4xi32_nonsplat_undef2_eq:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi v2.4s, #1
+; CHECK-SD-NEXT: ushl v1.4s, v2.4s, v1.4s
+; CHECK-SD-NEXT: and v0.16b, v1.16b, v0.16b
+; CHECK-SD-NEXT: cmeq v0.4s, v0.4s, #0
+; CHECK-SD-NEXT: xtn v0.4h, v0.4s
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: vec_4xi32_nonsplat_undef2_eq:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mov w8, #1 // =0x1
+; CHECK-GI-NEXT: movi d3, #0000000000000000
+; CHECK-GI-NEXT: fmov s2, w8
+; CHECK-GI-NEXT: mov v2.s[1], w8
+; CHECK-GI-NEXT: mov v3.s[1], wzr
+; CHECK-GI-NEXT: mov v2.s[3], w8
+; CHECK-GI-NEXT: mov v3.s[3], wzr
+; CHECK-GI-NEXT: ushl v1.4s, v2.4s, v1.4s
+; CHECK-GI-NEXT: and v0.16b, v1.16b, v0.16b
+; CHECK-GI-NEXT: cmeq v0.4s, v0.4s, v3.4s
+; CHECK-GI-NEXT: xtn v0.4h, v0.4s
+; CHECK-GI-NEXT: ret
%t0 = shl <4 x i32> <i32 1, i32 1, i32 undef, i32 1>, %y
%t1 = and <4 x i32> %t0, %x
%res = icmp eq <4 x i32> %t1, <i32 0, i32 0, i32 undef, i32 0>
@@ -263,12 +411,22 @@ define <4 x i1> @vec_4xi32_nonsplat_undef2_eq(<4 x i32> %x, <4 x i32> %y) nounwi
;------------------------------------------------------------------------------;
define i1 @scalar_i8_signbit_ne(i8 %x, i8 %y) nounwind {
-; CHECK-LABEL: scalar_i8_signbit_ne:
-; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0xff
-; CHECK-NEXT: lsr w8, w8, w1
-; CHECK-NEXT: lsr w0, w8, #7
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: scalar_i8_signbit_ne:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: and w8, w0, #0xff
+; CHECK-SD-NEXT: lsr w8, w8, w1
+; CHECK-SD-NEXT: lsr w0, w8, #7
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: scalar_i8_signbit_ne:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mov w8, #-128 // =0xffffff80
+; CHECK-GI-NEXT: and w9, w1, #0xff
+; CHECK-GI-NEXT: lsl w8, w8, w9
+; CHECK-GI-NEXT: and w8, w8, w0
+; CHECK-GI-NEXT: tst w8, #0xff
+; CHECK-GI-NEXT: cset w0, ne
+; CHECK-GI-NEXT: ret
%t0 = shl i8 128, %y
%t1 = and i8 %t0, %x
%res = icmp ne i8 %t1, 0 ; we are perfectly happy with 'ne' predicate
@@ -310,13 +468,24 @@ define i1 @scalar_i32_x_is_const2_eq(i32 %y) nounwind {
}
define i1 @scalar_i8_bitsinmiddle_slt(i8 %x, i8 %y) nounwind {
-; CHECK-LABEL: scalar_i8_bitsinmiddle_slt:
-; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #24 // =0x18
-; CHECK-NEXT: lsl w8, w8, w1
-; CHECK-NEXT: and w8, w8, w0
-; CHECK-NEXT: ubfx w0, w8, #7, #1
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: scalar_i8_bitsinmiddle_slt:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: mov w8, #24 // =0x18
+; CHECK-SD-NEXT: lsl w8, w8, w1
+; CHECK-SD-NEXT: and w8, w8, w0
+; CHECK-SD-NEXT: ubfx w0, w8, #7, #1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: scalar_i8_bitsinmiddle_slt:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mov w8, #24 // =0x18
+; CHECK-GI-NEXT: and w9, w1, #0xff
+; CHECK-GI-NEXT: lsl w8, w8, w9
+; CHECK-GI-NEXT: and w8, w8, w0
+; CHECK-GI-NEXT: sxtb w8, w8
+; CHECK-GI-NEXT: cmp w8, #0
+; CHECK-GI-NEXT: cset w0, mi
+; CHECK-GI-NEXT: ret
%t0 = shl i8 24, %y
%t1 = and i8 %t0, %x
%res = icmp slt i8 %t1, 0
@@ -324,15 +493,20 @@ define i1 @scalar_i8_bitsinmiddle_slt(i8 %x, i8 %y) nounwind {
}
define i1 @scalar_i8_signbit_eq_with_nonzero(i8 %x, i8 %y) nounwind {
-; CHECK-LABEL: scalar_i8_signbit_eq_with_nonzero:
-; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #-128 // =0xffffff80
-; CHECK-NEXT: lsl w8, w8, w1
-; CHECK-NEXT: and w8, w8, w0
-; CHECK-NEXT: and w8, w8, #0x80
-; CHECK-NEXT: cmp w8, #1
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: scalar_i8_signbit_eq_with_nonzero:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: mov w8, #-128 // =0xffffff80
+; CHECK-SD-NEXT: lsl w8, w8, w1
+; CHECK-SD-NEXT: and w8, w8, w0
+; CHECK-SD-NEXT: and w8, w8, #0x80
+; CHECK-SD-NEXT: cmp w8, #1
+; CHECK-SD-NEXT: cset w0, eq
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: scalar_i8_signbit_eq_with_nonzero:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mov w0, wzr
+; CHECK-GI-NEXT: ret
%t0 = shl i8 128, %y
%t1 = and i8 %t0, %x
%res = icmp eq i8 %t1, 1 ; should be comparing with 0
diff --git a/llvm/test/CodeGen/AArch64/llvm.sincos.ll b/llvm/test/CodeGen/AArch64/llvm.sincos.ll
index f1dcb2a..21da864 100644
--- a/llvm/test/CodeGen/AArch64/llvm.sincos.ll
+++ b/llvm/test/CodeGen/AArch64/llvm.sincos.ll
@@ -215,6 +215,133 @@ define { <2 x half>, <2 x half> } @test_sincos_v2f16(<2 x half> %a) nounwind {
ret { <2 x half>, <2 x half> } %result
}
+define { <3 x half>, <3 x half> } @test_sincos_v3f16(<3 x half> %a) nounwind {
+; CHECK-LABEL: test_sincos_v3f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub sp, sp, #64
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: mov h1, v0.h[1]
+; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: add x0, sp, #36
+; CHECK-NEXT: add x1, sp, #32
+; CHECK-NEXT: str x30, [sp, #48] // 8-byte Folded Spill
+; CHECK-NEXT: fcvt s0, h1
+; CHECK-NEXT: bl sincosf
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: add x0, sp, #28
+; CHECK-NEXT: add x1, sp, #24
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: bl sincosf
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: add x0, sp, #44
+; CHECK-NEXT: add x1, sp, #40
+; CHECK-NEXT: mov h0, v0.h[2]
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: bl sincosf
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: add x0, sp, #60
+; CHECK-NEXT: add x1, sp, #56
+; CHECK-NEXT: mov h0, v0.h[3]
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: bl sincosf
+; CHECK-NEXT: ldp s2, s0, [sp, #32]
+; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload
+; CHECK-NEXT: ldp s3, s1, [sp, #24]
+; CHECK-NEXT: fcvt h4, s0
+; CHECK-NEXT: fcvt h2, s2
+; CHECK-NEXT: fcvt h0, s1
+; CHECK-NEXT: fcvt h1, s3
+; CHECK-NEXT: ldp s5, s3, [sp, #40]
+; CHECK-NEXT: fcvt h3, s3
+; CHECK-NEXT: mov v0.h[1], v4.h[0]
+; CHECK-NEXT: fcvt h4, s5
+; CHECK-NEXT: mov v1.h[1], v2.h[0]
+; CHECK-NEXT: ldp s5, s2, [sp, #56]
+; CHECK-NEXT: mov v0.h[2], v3.h[0]
+; CHECK-NEXT: fcvt h2, s2
+; CHECK-NEXT: fcvt h3, s5
+; CHECK-NEXT: mov v1.h[2], v4.h[0]
+; CHECK-NEXT: mov v0.h[3], v2.h[0]
+; CHECK-NEXT: mov v1.h[3], v3.h[0]
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q1
+; CHECK-NEXT: add sp, sp, #64
+; CHECK-NEXT: ret
+;
+; NO-LIBCALL-LABEL: test_sincos_v3f16:
+; NO-LIBCALL: // %bb.0:
+; NO-LIBCALL-NEXT: sub sp, sp, #80
+; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 def $q0
+; NO-LIBCALL-NEXT: mov h1, v0.h[1]
+; NO-LIBCALL-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: str q0, [sp] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
+; NO-LIBCALL-NEXT: fcvt s8, h1
+; NO-LIBCALL-NEXT: fmov s0, s8
+; NO-LIBCALL-NEXT: bl sinf
+; NO-LIBCALL-NEXT: ldr q1, [sp] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: fcvt h0, s0
+; NO-LIBCALL-NEXT: fcvt s9, h1
+; NO-LIBCALL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: fmov s0, s9
+; NO-LIBCALL-NEXT: bl sinf
+; NO-LIBCALL-NEXT: ldr q1, [sp] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: fcvt h0, s0
+; NO-LIBCALL-NEXT: mov h1, v1.h[2]
+; NO-LIBCALL-NEXT: fcvt s10, h1
+; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: mov v0.h[1], v1.h[0]
+; NO-LIBCALL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: fmov s0, s10
+; NO-LIBCALL-NEXT: bl sinf
+; NO-LIBCALL-NEXT: ldr q1, [sp] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: fcvt h0, s0
+; NO-LIBCALL-NEXT: mov h1, v1.h[3]
+; NO-LIBCALL-NEXT: fcvt s11, h1
+; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: mov v1.h[2], v0.h[0]
+; NO-LIBCALL-NEXT: fmov s0, s11
+; NO-LIBCALL-NEXT: str q1, [sp, #16] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: bl sinf
+; NO-LIBCALL-NEXT: fcvt h0, s0
+; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: mov v1.h[3], v0.h[0]
+; NO-LIBCALL-NEXT: fmov s0, s8
+; NO-LIBCALL-NEXT: str q1, [sp, #16] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: bl cosf
+; NO-LIBCALL-NEXT: fcvt h0, s0
+; NO-LIBCALL-NEXT: str q0, [sp] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: fmov s0, s9
+; NO-LIBCALL-NEXT: bl cosf
+; NO-LIBCALL-NEXT: fcvt h0, s0
+; NO-LIBCALL-NEXT: ldr q1, [sp] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: mov v0.h[1], v1.h[0]
+; NO-LIBCALL-NEXT: str q0, [sp] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: fmov s0, s10
+; NO-LIBCALL-NEXT: bl cosf
+; NO-LIBCALL-NEXT: fcvt h0, s0
+; NO-LIBCALL-NEXT: ldr q1, [sp] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: mov v1.h[2], v0.h[0]
+; NO-LIBCALL-NEXT: fmov s0, s11
+; NO-LIBCALL-NEXT: str q1, [sp] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: bl cosf
+; NO-LIBCALL-NEXT: fmov s1, s0
+; NO-LIBCALL-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
+; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 killed $q0
+; NO-LIBCALL-NEXT: fcvt h2, s1
+; NO-LIBCALL-NEXT: ldr q1, [sp] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: mov v1.h[3], v2.h[0]
+; NO-LIBCALL-NEXT: // kill: def $d1 killed $d1 killed $q1
+; NO-LIBCALL-NEXT: add sp, sp, #80
+; NO-LIBCALL-NEXT: ret
+ %result = call { <3 x half>, <3 x half> } @llvm.sincos.v3f16(<3 x half> %a)
+ ret { <3 x half>, <3 x half> } %result
+}
+
define { float, float } @test_sincos_f32(float %a) nounwind {
; CHECK-LABEL: test_sincos_f32:
; CHECK: // %bb.0:
@@ -493,3 +620,71 @@ define { <2 x double>, <2 x double> } @test_sincos_v2f64(<2 x double> %a) nounwi
%result = call { <2 x double>, <2 x double> } @llvm.sincos.v2f64(<2 x double> %a)
ret { <2 x double>, <2 x double> } %result
}
+
+define { <3 x double>, <3 x double> } @test_sincos_v3f64(<3 x double> %a) nounwind {
+; CHECK-LABEL: test_sincos_v3f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub sp, sp, #80
+; CHECK-NEXT: add x0, sp, #16
+; CHECK-NEXT: add x1, sp, #8
+; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
+; CHECK-NEXT: fmov d8, d2
+; CHECK-NEXT: fmov d9, d1
+; CHECK-NEXT: bl sincos
+; CHECK-NEXT: fmov d0, d9
+; CHECK-NEXT: add x0, sp, #32
+; CHECK-NEXT: add x1, sp, #24
+; CHECK-NEXT: bl sincos
+; CHECK-NEXT: fmov d0, d8
+; CHECK-NEXT: add x0, sp, #72
+; CHECK-NEXT: add x1, sp, #40
+; CHECK-NEXT: bl sincos
+; CHECK-NEXT: ldp d3, d0, [sp, #8]
+; CHECK-NEXT: ldr d2, [sp, #72]
+; CHECK-NEXT: ldp d4, d1, [sp, #24]
+; CHECK-NEXT: ldr d5, [sp, #40]
+; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
+; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: add sp, sp, #80
+; CHECK-NEXT: ret
+;
+; NO-LIBCALL-LABEL: test_sincos_v3f64:
+; NO-LIBCALL: // %bb.0:
+; NO-LIBCALL-NEXT: stp d13, d12, [sp, #-64]! // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: stp d11, d10, [sp, #16] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: fmov d10, d0
+; NO-LIBCALL-NEXT: stp d9, d8, [sp, #32] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: fmov d8, d2
+; NO-LIBCALL-NEXT: fmov d9, d1
+; NO-LIBCALL-NEXT: str x30, [sp, #48] // 8-byte Folded Spill
+; NO-LIBCALL-NEXT: bl sin
+; NO-LIBCALL-NEXT: fmov d11, d0
+; NO-LIBCALL-NEXT: fmov d0, d9
+; NO-LIBCALL-NEXT: bl sin
+; NO-LIBCALL-NEXT: fmov d12, d0
+; NO-LIBCALL-NEXT: fmov d0, d8
+; NO-LIBCALL-NEXT: bl sin
+; NO-LIBCALL-NEXT: fmov d13, d0
+; NO-LIBCALL-NEXT: fmov d0, d10
+; NO-LIBCALL-NEXT: bl cos
+; NO-LIBCALL-NEXT: fmov d10, d0
+; NO-LIBCALL-NEXT: fmov d0, d9
+; NO-LIBCALL-NEXT: bl cos
+; NO-LIBCALL-NEXT: fmov d9, d0
+; NO-LIBCALL-NEXT: fmov d0, d8
+; NO-LIBCALL-NEXT: bl cos
+; NO-LIBCALL-NEXT: fmov d5, d0
+; NO-LIBCALL-NEXT: fmov d0, d11
+; NO-LIBCALL-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload
+; NO-LIBCALL-NEXT: fmov d3, d10
+; NO-LIBCALL-NEXT: fmov d4, d9
+; NO-LIBCALL-NEXT: fmov d1, d12
+; NO-LIBCALL-NEXT: ldp d9, d8, [sp, #32] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: fmov d2, d13
+; NO-LIBCALL-NEXT: ldp d11, d10, [sp, #16] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: ldp d13, d12, [sp], #64 // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: ret
+ %result = call { <3 x double>, <3 x double> } @llvm.sincos.v3f64(<3 x double> %a)
+ ret { <3 x double>, <3 x double> } %result
+}
diff --git a/llvm/test/CodeGen/AArch64/signbit-test.ll b/llvm/test/CodeGen/AArch64/signbit-test.ll
index c74a934..298495b 100644
--- a/llvm/test/CodeGen/AArch64/signbit-test.ll
+++ b/llvm/test/CodeGen/AArch64/signbit-test.ll
@@ -1,13 +1,21 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s
+; RUN: llc -mtriple=aarch64-- < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc -mtriple=aarch64-- -global-isel < %s | FileCheck %s --check-prefixes=CHECK,CHECK-GI
define i64 @test_clear_mask_i64_i32(i64 %x) nounwind {
-; CHECK-LABEL: test_clear_mask_i64_i32:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mov w8, #42 // =0x2a
-; CHECK-NEXT: cmn w0, #1
-; CHECK-NEXT: csel x0, x8, x0, gt
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_clear_mask_i64_i32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: mov w8, #42 // =0x2a
+; CHECK-SD-NEXT: cmn w0, #1
+; CHECK-SD-NEXT: csel x0, x8, x0, gt
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_clear_mask_i64_i32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov w8, #42 // =0x2a
+; CHECK-GI-NEXT: tst x0, #0x80000000
+; CHECK-GI-NEXT: csel x0, x8, x0, eq
+; CHECK-GI-NEXT: ret
entry:
%a = and i64 %x, 2147483648
%r = icmp eq i64 %a, 0
diff --git a/llvm/test/CodeGen/AArch64/signed-truncation-check.ll b/llvm/test/CodeGen/AArch64/signed-truncation-check.ll
index 7c80f93..fc01c6b 100644
--- a/llvm/test/CodeGen/AArch64/signed-truncation-check.ll
+++ b/llvm/test/CodeGen/AArch64/signed-truncation-check.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc -mtriple=aarch64-unknown-linux-gnu -global-isel < %s | FileCheck %s --check-prefixes=CHECK,CHECK-GI
; https://bugs.llvm.org/show_bug.cgi?id=38149
@@ -19,13 +20,22 @@
; ---------------------------------------------------------------------------- ;
define i1 @shifts_eqcmp_i16_i8(i16 %x) nounwind {
-; CHECK-LABEL: shifts_eqcmp_i16_i8:
-; CHECK: // %bb.0:
-; CHECK-NEXT: sxtb w8, w0
-; CHECK-NEXT: and w8, w8, #0xffff
-; CHECK-NEXT: cmp w8, w0, uxth
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: shifts_eqcmp_i16_i8:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: sxtb w8, w0
+; CHECK-SD-NEXT: and w8, w8, #0xffff
+; CHECK-SD-NEXT: cmp w8, w0, uxth
+; CHECK-SD-NEXT: cset w0, eq
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: shifts_eqcmp_i16_i8:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: lsl w8, w0, #8
+; CHECK-GI-NEXT: sbfx w8, w8, #8, #8
+; CHECK-GI-NEXT: and w8, w8, #0xffff
+; CHECK-GI-NEXT: cmp w8, w0, uxth
+; CHECK-GI-NEXT: cset w0, eq
+; CHECK-GI-NEXT: ret
%tmp0 = shl i16 %x, 8 ; 16-8
%tmp1 = ashr exact i16 %tmp0, 8 ; 16-8
%tmp2 = icmp eq i16 %tmp1, %x
@@ -97,26 +107,43 @@ define i1 @shifts_eqcmp_i64_i8(i64 %x) nounwind {
; ---------------------------------------------------------------------------- ;
define i1 @add_ugecmp_i16_i8(i16 %x) nounwind {
-; CHECK-LABEL: add_ugecmp_i16_i8:
-; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0xffff
-; CHECK-NEXT: sub w8, w8, #128
-; CHECK-NEXT: lsr w8, w8, #8
-; CHECK-NEXT: cmp w8, #254
-; CHECK-NEXT: cset w0, hi
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: add_ugecmp_i16_i8:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: and w8, w0, #0xffff
+; CHECK-SD-NEXT: sub w8, w8, #128
+; CHECK-SD-NEXT: lsr w8, w8, #8
+; CHECK-SD-NEXT: cmp w8, #254
+; CHECK-SD-NEXT: cset w0, hi
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: add_ugecmp_i16_i8:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mov w8, #-128 // =0xffffff80
+; CHECK-GI-NEXT: mov w9, #65280 // =0xff00
+; CHECK-GI-NEXT: add w8, w8, w0, uxth
+; CHECK-GI-NEXT: cmp w8, w9
+; CHECK-GI-NEXT: cset w0, hs
+; CHECK-GI-NEXT: ret
%tmp0 = add i16 %x, -128 ; ~0U << (8-1)
%tmp1 = icmp uge i16 %tmp0, -256 ; ~0U << 8
ret i1 %tmp1
}
define i1 @add_ugecmp_i32_i16_i8(i16 %xx) nounwind {
-; CHECK-LABEL: add_ugecmp_i32_i16_i8:
-; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0xffff
-; CHECK-NEXT: cmp w8, w8, sxtb
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: add_ugecmp_i32_i16_i8:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: and w8, w0, #0xffff
+; CHECK-SD-NEXT: cmp w8, w8, sxtb
+; CHECK-SD-NEXT: cset w0, eq
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: add_ugecmp_i32_i16_i8:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mov w8, #-128 // =0xffffff80
+; CHECK-GI-NEXT: add w8, w8, w0, uxth
+; CHECK-GI-NEXT: cmn w8, #256
+; CHECK-GI-NEXT: cset w0, hs
+; CHECK-GI-NEXT: ret
%x = zext i16 %xx to i32
%tmp0 = add i32 %x, -128 ; ~0U << (8-1)
%tmp1 = icmp uge i32 %tmp0, -256 ; ~0U << 8
@@ -124,55 +151,92 @@ define i1 @add_ugecmp_i32_i16_i8(i16 %xx) nounwind {
}
define i1 @add_ugecmp_i32_i16(i32 %x) nounwind {
-; CHECK-LABEL: add_ugecmp_i32_i16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: cmp w0, w0, sxth
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: add_ugecmp_i32_i16:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: cmp w0, w0, sxth
+; CHECK-SD-NEXT: cset w0, eq
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: add_ugecmp_i32_i16:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: sub w8, w0, #8, lsl #12 // =32768
+; CHECK-GI-NEXT: cmn w8, #16, lsl #12 // =65536
+; CHECK-GI-NEXT: cset w0, hs
+; CHECK-GI-NEXT: ret
%tmp0 = add i32 %x, -32768 ; ~0U << (16-1)
%tmp1 = icmp uge i32 %tmp0, -65536 ; ~0U << 16
ret i1 %tmp1
}
define i1 @add_ugecmp_i32_i8(i32 %x) nounwind {
-; CHECK-LABEL: add_ugecmp_i32_i8:
-; CHECK: // %bb.0:
-; CHECK-NEXT: cmp w0, w0, sxtb
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: add_ugecmp_i32_i8:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: cmp w0, w0, sxtb
+; CHECK-SD-NEXT: cset w0, eq
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: add_ugecmp_i32_i8:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: sub w8, w0, #128
+; CHECK-GI-NEXT: cmn w8, #256
+; CHECK-GI-NEXT: cset w0, hs
+; CHECK-GI-NEXT: ret
%tmp0 = add i32 %x, -128 ; ~0U << (8-1)
%tmp1 = icmp uge i32 %tmp0, -256 ; ~0U << 8
ret i1 %tmp1
}
define i1 @add_ugecmp_i64_i32(i64 %x) nounwind {
-; CHECK-LABEL: add_ugecmp_i64_i32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: cmp x0, w0, sxtw
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: add_ugecmp_i64_i32:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: cmp x0, w0, sxtw
+; CHECK-SD-NEXT: cset w0, eq
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: add_ugecmp_i64_i32:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mov x8, #-2147483648 // =0xffffffff80000000
+; CHECK-GI-NEXT: mov x9, #-4294967296 // =0xffffffff00000000
+; CHECK-GI-NEXT: add x8, x0, x8
+; CHECK-GI-NEXT: cmp x8, x9
+; CHECK-GI-NEXT: cset w0, hs
+; CHECK-GI-NEXT: ret
%tmp0 = add i64 %x, -2147483648 ; ~0U << (32-1)
%tmp1 = icmp uge i64 %tmp0, -4294967296 ; ~0U << 32
ret i1 %tmp1
}
define i1 @add_ugecmp_i64_i16(i64 %x) nounwind {
-; CHECK-LABEL: add_ugecmp_i64_i16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: cmp x0, w0, sxth
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: add_ugecmp_i64_i16:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: cmp x0, w0, sxth
+; CHECK-SD-NEXT: cset w0, eq
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: add_ugecmp_i64_i16:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: sub x8, x0, #8, lsl #12 // =32768
+; CHECK-GI-NEXT: cmn x8, #16, lsl #12 // =65536
+; CHECK-GI-NEXT: cset w0, hs
+; CHECK-GI-NEXT: ret
%tmp0 = add i64 %x, -32768 ; ~0U << (16-1)
%tmp1 = icmp uge i64 %tmp0, -65536 ; ~0U << 16
ret i1 %tmp1
}
define i1 @add_ugecmp_i64_i8(i64 %x) nounwind {
-; CHECK-LABEL: add_ugecmp_i64_i8:
-; CHECK: // %bb.0:
-; CHECK-NEXT: cmp x0, w0, sxtb
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: add_ugecmp_i64_i8:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: cmp x0, w0, sxtb
+; CHECK-SD-NEXT: cset w0, eq
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: add_ugecmp_i64_i8:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: sub x8, x0, #128
+; CHECK-GI-NEXT: cmn x8, #256
+; CHECK-GI-NEXT: cset w0, hs
+; CHECK-GI-NEXT: ret
%tmp0 = add i64 %x, -128 ; ~0U << (8-1)
%tmp1 = icmp uge i64 %tmp0, -256 ; ~0U << 8
ret i1 %tmp1
@@ -180,14 +244,23 @@ define i1 @add_ugecmp_i64_i8(i64 %x) nounwind {
; Slightly more canonical variant
define i1 @add_ugtcmp_i16_i8(i16 %x) nounwind {
-; CHECK-LABEL: add_ugtcmp_i16_i8:
-; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0xffff
-; CHECK-NEXT: sub w8, w8, #128
-; CHECK-NEXT: lsr w8, w8, #8
-; CHECK-NEXT: cmp w8, #254
-; CHECK-NEXT: cset w0, hi
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: add_ugtcmp_i16_i8:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: and w8, w0, #0xffff
+; CHECK-SD-NEXT: sub w8, w8, #128
+; CHECK-SD-NEXT: lsr w8, w8, #8
+; CHECK-SD-NEXT: cmp w8, #254
+; CHECK-SD-NEXT: cset w0, hi
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: add_ugtcmp_i16_i8:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mov w8, #-128 // =0xffffff80
+; CHECK-GI-NEXT: mov w9, #65279 // =0xfeff
+; CHECK-GI-NEXT: add w8, w8, w0, uxth
+; CHECK-GI-NEXT: cmp w8, w9
+; CHECK-GI-NEXT: cset w0, hi
+; CHECK-GI-NEXT: ret
%tmp0 = add i16 %x, -128 ; ~0U << (8-1)
%tmp1 = icmp ugt i16 %tmp0, -257 ; ~0U << 8 - 1
ret i1 %tmp1
@@ -198,68 +271,113 @@ define i1 @add_ugtcmp_i16_i8(i16 %x) nounwind {
; ---------------------------------------------------------------------------- ;
define i1 @add_ultcmp_i16_i8(i16 %x) nounwind {
-; CHECK-LABEL: add_ultcmp_i16_i8:
-; CHECK: // %bb.0:
-; CHECK-NEXT: sxtb w8, w0
-; CHECK-NEXT: and w8, w8, #0xffff
-; CHECK-NEXT: cmp w8, w0, uxth
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: add_ultcmp_i16_i8:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: sxtb w8, w0
+; CHECK-SD-NEXT: and w8, w8, #0xffff
+; CHECK-SD-NEXT: cmp w8, w0, uxth
+; CHECK-SD-NEXT: cset w0, eq
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: add_ultcmp_i16_i8:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: add w8, w0, #128
+; CHECK-GI-NEXT: and w8, w8, #0xffff
+; CHECK-GI-NEXT: cmp w8, #256
+; CHECK-GI-NEXT: cset w0, lo
+; CHECK-GI-NEXT: ret
%tmp0 = add i16 %x, 128 ; 1U << (8-1)
%tmp1 = icmp ult i16 %tmp0, 256 ; 1U << 8
ret i1 %tmp1
}
define i1 @add_ultcmp_i32_i16(i32 %x) nounwind {
-; CHECK-LABEL: add_ultcmp_i32_i16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: cmp w0, w0, sxth
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: add_ultcmp_i32_i16:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: cmp w0, w0, sxth
+; CHECK-SD-NEXT: cset w0, eq
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: add_ultcmp_i32_i16:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: add w8, w0, #8, lsl #12 // =32768
+; CHECK-GI-NEXT: cmp w8, #16, lsl #12 // =65536
+; CHECK-GI-NEXT: cset w0, lo
+; CHECK-GI-NEXT: ret
%tmp0 = add i32 %x, 32768 ; 1U << (16-1)
%tmp1 = icmp ult i32 %tmp0, 65536 ; 1U << 16
ret i1 %tmp1
}
define i1 @add_ultcmp_i32_i8(i32 %x) nounwind {
-; CHECK-LABEL: add_ultcmp_i32_i8:
-; CHECK: // %bb.0:
-; CHECK-NEXT: cmp w0, w0, sxtb
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: add_ultcmp_i32_i8:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: cmp w0, w0, sxtb
+; CHECK-SD-NEXT: cset w0, eq
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: add_ultcmp_i32_i8:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: add w8, w0, #128
+; CHECK-GI-NEXT: cmp w8, #256
+; CHECK-GI-NEXT: cset w0, lo
+; CHECK-GI-NEXT: ret
%tmp0 = add i32 %x, 128 ; 1U << (8-1)
%tmp1 = icmp ult i32 %tmp0, 256 ; 1U << 8
ret i1 %tmp1
}
define i1 @add_ultcmp_i64_i32(i64 %x) nounwind {
-; CHECK-LABEL: add_ultcmp_i64_i32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: cmp x0, w0, sxtw
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: add_ultcmp_i64_i32:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: cmp x0, w0, sxtw
+; CHECK-SD-NEXT: cset w0, eq
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: add_ultcmp_i64_i32:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mov w8, #-2147483648 // =0x80000000
+; CHECK-GI-NEXT: mov x9, #4294967296 // =0x100000000
+; CHECK-GI-NEXT: add x8, x0, x8
+; CHECK-GI-NEXT: cmp x8, x9
+; CHECK-GI-NEXT: cset w0, lo
+; CHECK-GI-NEXT: ret
%tmp0 = add i64 %x, 2147483648 ; 1U << (32-1)
%tmp1 = icmp ult i64 %tmp0, 4294967296 ; 1U << 32
ret i1 %tmp1
}
define i1 @add_ultcmp_i64_i16(i64 %x) nounwind {
-; CHECK-LABEL: add_ultcmp_i64_i16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: cmp x0, w0, sxth
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: add_ultcmp_i64_i16:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: cmp x0, w0, sxth
+; CHECK-SD-NEXT: cset w0, eq
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: add_ultcmp_i64_i16:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: add x8, x0, #8, lsl #12 // =32768
+; CHECK-GI-NEXT: cmp x8, #16, lsl #12 // =65536
+; CHECK-GI-NEXT: cset w0, lo
+; CHECK-GI-NEXT: ret
%tmp0 = add i64 %x, 32768 ; 1U << (16-1)
%tmp1 = icmp ult i64 %tmp0, 65536 ; 1U << 16
ret i1 %tmp1
}
define i1 @add_ultcmp_i64_i8(i64 %x) nounwind {
-; CHECK-LABEL: add_ultcmp_i64_i8:
-; CHECK: // %bb.0:
-; CHECK-NEXT: cmp x0, w0, sxtb
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: add_ultcmp_i64_i8:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: cmp x0, w0, sxtb
+; CHECK-SD-NEXT: cset w0, eq
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: add_ultcmp_i64_i8:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: add x8, x0, #128
+; CHECK-GI-NEXT: cmp x8, #256
+; CHECK-GI-NEXT: cset w0, lo
+; CHECK-GI-NEXT: ret
%tmp0 = add i64 %x, 128 ; 1U << (8-1)
%tmp1 = icmp ult i64 %tmp0, 256 ; 1U << 8
ret i1 %tmp1
@@ -267,13 +385,21 @@ define i1 @add_ultcmp_i64_i8(i64 %x) nounwind {
; Slightly more canonical variant
define i1 @add_ulecmp_i16_i8(i16 %x) nounwind {
-; CHECK-LABEL: add_ulecmp_i16_i8:
-; CHECK: // %bb.0:
-; CHECK-NEXT: sxtb w8, w0
-; CHECK-NEXT: and w8, w8, #0xffff
-; CHECK-NEXT: cmp w8, w0, uxth
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: add_ulecmp_i16_i8:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: sxtb w8, w0
+; CHECK-SD-NEXT: and w8, w8, #0xffff
+; CHECK-SD-NEXT: cmp w8, w0, uxth
+; CHECK-SD-NEXT: cset w0, eq
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: add_ulecmp_i16_i8:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: add w8, w0, #128
+; CHECK-GI-NEXT: and w8, w8, #0xffff
+; CHECK-GI-NEXT: cmp w8, #255
+; CHECK-GI-NEXT: cset w0, ls
+; CHECK-GI-NEXT: ret
%tmp0 = add i16 %x, 128 ; 1U << (8-1)
%tmp1 = icmp ule i16 %tmp0, 255 ; (1U << 8) - 1
ret i1 %tmp1
@@ -284,12 +410,20 @@ define i1 @add_ulecmp_i16_i8(i16 %x) nounwind {
; Adding not a constant
define i1 @add_ultcmp_bad_i16_i8_add(i16 %x, i16 %y) nounwind {
-; CHECK-LABEL: add_ultcmp_bad_i16_i8_add:
-; CHECK: // %bb.0:
-; CHECK-NEXT: add w8, w0, w1
-; CHECK-NEXT: tst w8, #0xff00
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: add_ultcmp_bad_i16_i8_add:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: add w8, w0, w1
+; CHECK-SD-NEXT: tst w8, #0xff00
+; CHECK-SD-NEXT: cset w0, eq
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: add_ultcmp_bad_i16_i8_add:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: add w8, w0, w1
+; CHECK-GI-NEXT: and w8, w8, #0xffff
+; CHECK-GI-NEXT: cmp w8, #256
+; CHECK-GI-NEXT: cset w0, lo
+; CHECK-GI-NEXT: ret
%tmp0 = add i16 %x, %y
%tmp1 = icmp ult i16 %tmp0, 256 ; 1U << 8
ret i1 %tmp1
@@ -311,12 +445,20 @@ define i1 @add_ultcmp_bad_i16_i8_cmp(i16 %x, i16 %y) nounwind {
; Second constant is not larger than the first one
define i1 @add_ultcmp_bad_i8_i16(i16 %x) nounwind {
-; CHECK-LABEL: add_ultcmp_bad_i8_i16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0xffff
-; CHECK-NEXT: add w8, w8, #128
-; CHECK-NEXT: lsr w0, w8, #16
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: add_ultcmp_bad_i8_i16:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: and w8, w0, #0xffff
+; CHECK-SD-NEXT: add w8, w8, #128
+; CHECK-SD-NEXT: lsr w0, w8, #16
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: add_ultcmp_bad_i8_i16:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: and w8, w0, #0xffff
+; CHECK-GI-NEXT: add w8, w8, #128
+; CHECK-GI-NEXT: cmp w8, w8, uxth
+; CHECK-GI-NEXT: cset w0, ne
+; CHECK-GI-NEXT: ret
%tmp0 = add i16 %x, 128 ; 1U << (8-1)
%tmp1 = icmp ult i16 %tmp0, 128 ; 1U << (8-1)
ret i1 %tmp1
@@ -324,12 +466,20 @@ define i1 @add_ultcmp_bad_i8_i16(i16 %x) nounwind {
; First constant is not power of two
define i1 @add_ultcmp_bad_i16_i8_c0notpoweroftwo(i16 %x) nounwind {
-; CHECK-LABEL: add_ultcmp_bad_i16_i8_c0notpoweroftwo:
-; CHECK: // %bb.0:
-; CHECK-NEXT: add w8, w0, #192
-; CHECK-NEXT: tst w8, #0xff00
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: add_ultcmp_bad_i16_i8_c0notpoweroftwo:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: add w8, w0, #192
+; CHECK-SD-NEXT: tst w8, #0xff00
+; CHECK-SD-NEXT: cset w0, eq
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: add_ultcmp_bad_i16_i8_c0notpoweroftwo:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: add w8, w0, #192
+; CHECK-GI-NEXT: and w8, w8, #0xffff
+; CHECK-GI-NEXT: cmp w8, #256
+; CHECK-GI-NEXT: cset w0, lo
+; CHECK-GI-NEXT: ret
%tmp0 = add i16 %x, 192 ; (1U << (8-1)) + (1U << (8-1-1))
%tmp1 = icmp ult i16 %tmp0, 256 ; 1U << 8
ret i1 %tmp1
@@ -351,12 +501,20 @@ define i1 @add_ultcmp_bad_i16_i8_c1notpoweroftwo(i16 %x) nounwind {
; Magic check fails, 64 << 1 != 256
define i1 @add_ultcmp_bad_i16_i8_magic(i16 %x) nounwind {
-; CHECK-LABEL: add_ultcmp_bad_i16_i8_magic:
-; CHECK: // %bb.0:
-; CHECK-NEXT: add w8, w0, #64
-; CHECK-NEXT: tst w8, #0xff00
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: add_ultcmp_bad_i16_i8_magic:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: add w8, w0, #64
+; CHECK-SD-NEXT: tst w8, #0xff00
+; CHECK-SD-NEXT: cset w0, eq
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: add_ultcmp_bad_i16_i8_magic:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: add w8, w0, #64
+; CHECK-GI-NEXT: and w8, w8, #0xffff
+; CHECK-GI-NEXT: cmp w8, #256
+; CHECK-GI-NEXT: cset w0, lo
+; CHECK-GI-NEXT: ret
%tmp0 = add i16 %x, 64 ; 1U << (8-1-1)
%tmp1 = icmp ult i16 %tmp0, 256 ; 1U << 8
ret i1 %tmp1
@@ -364,12 +522,20 @@ define i1 @add_ultcmp_bad_i16_i8_magic(i16 %x) nounwind {
; Bad 'destination type'
define i1 @add_ultcmp_bad_i16_i4(i16 %x) nounwind {
-; CHECK-LABEL: add_ultcmp_bad_i16_i4:
-; CHECK: // %bb.0:
-; CHECK-NEXT: add w8, w0, #8
-; CHECK-NEXT: tst w8, #0xfff0
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: add_ultcmp_bad_i16_i4:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: add w8, w0, #8
+; CHECK-SD-NEXT: tst w8, #0xfff0
+; CHECK-SD-NEXT: cset w0, eq
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: add_ultcmp_bad_i16_i4:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: add w8, w0, #8
+; CHECK-GI-NEXT: and w8, w8, #0xffff
+; CHECK-GI-NEXT: cmp w8, #16
+; CHECK-GI-NEXT: cset w0, lo
+; CHECK-GI-NEXT: ret
%tmp0 = add i16 %x, 8 ; 1U << (4-1)
%tmp1 = icmp ult i16 %tmp0, 16 ; 1U << 4
ret i1 %tmp1
@@ -377,12 +543,20 @@ define i1 @add_ultcmp_bad_i16_i4(i16 %x) nounwind {
; Bad storage type
define i1 @add_ultcmp_bad_i24_i8(i24 %x) nounwind {
-; CHECK-LABEL: add_ultcmp_bad_i24_i8:
-; CHECK: // %bb.0:
-; CHECK-NEXT: add w8, w0, #128
-; CHECK-NEXT: tst w8, #0xffff00
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: add_ultcmp_bad_i24_i8:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: add w8, w0, #128
+; CHECK-SD-NEXT: tst w8, #0xffff00
+; CHECK-SD-NEXT: cset w0, eq
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: add_ultcmp_bad_i24_i8:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: add w8, w0, #128
+; CHECK-GI-NEXT: and w8, w8, #0xffffff
+; CHECK-GI-NEXT: cmp w8, #256
+; CHECK-GI-NEXT: cset w0, lo
+; CHECK-GI-NEXT: ret
%tmp0 = add i24 %x, 128 ; 1U << (8-1)
%tmp1 = icmp ult i24 %tmp0, 256 ; 1U << 8
ret i1 %tmp1
diff --git a/llvm/test/CodeGen/AArch64/stackmap.ll b/llvm/test/CodeGen/AArch64/stackmap.ll
index 995d254..26221d0 100644
--- a/llvm/test/CodeGen/AArch64/stackmap.ll
+++ b/llvm/test/CodeGen/AArch64/stackmap.ll
@@ -81,14 +81,14 @@
; CHECK-NEXT: .hword 8
; CHECK-NEXT: .hword 0
; CHECK-NEXT: .hword 0
-; CHECK-NEXT: .word 65535
+; CHECK-NEXT: .word -1
; SmallConstant
; CHECK-NEXT: .byte 4
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .hword 8
; CHECK-NEXT: .hword 0
; CHECK-NEXT: .hword 0
-; CHECK-NEXT: .word 65535
+; CHECK-NEXT: .word -1
; SmallConstant
; CHECK-NEXT: .byte 4
; CHECK-NEXT: .byte 0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/add.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/add.ll
new file mode 100644
index 0000000..e117200
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/add.ll
@@ -0,0 +1,612 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=hawaii < %s | FileCheck -check-prefix=GFX7 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=fiji < %s | FileCheck -check-prefix=GFX8 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefix=GFX11 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefix=GFX12 %s
+
+define i16 @s_add_i16(i16 inreg %a, i16 inreg %b) {
+; GFX7-LABEL: s_add_i16:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: s_add_i32 s16, s16, s17
+; GFX7-NEXT: v_mov_b32_e32 v0, s16
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: s_add_i16:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: s_add_i32 s16, s16, s17
+; GFX9-NEXT: v_mov_b32_e32 v0, s16
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: s_add_i16:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: s_add_i32 s16, s16, s17
+; GFX8-NEXT: v_mov_b32_e32 v0, s16
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: s_add_i16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_add_i32 s16, s16, s17
+; GFX10-NEXT: v_mov_b32_e32 v0, s16
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: s_add_i16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_add_i32 s0, s0, s1
+; GFX11-NEXT: v_mov_b32_e32 v0, s0
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: s_add_i16:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: s_add_co_i32 s0, s0, s1
+; GFX12-NEXT: s_wait_alu 0xfffe
+; GFX12-NEXT: v_mov_b32_e32 v0, s0
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %c = add i16 %a, %b
+ ret i16 %c
+}
+
+define i16 @v_add_i16(i16 %a, i16 %b) {
+; GFX7-LABEL: v_add_i16:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v1
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_add_i16:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_add_u16_e32 v0, v0, v1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_add_i16:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_add_u16_e32 v0, v0, v1
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: v_add_i16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_add_nc_u16 v0, v0, v1
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_add_i16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_add_nc_u16 v0.l, v0.l, v1.l
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: v_add_i16:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_add_nc_u16 v0, v0, v1
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %c = add i16 %a, %b
+ ret i16 %c
+}
+
+define i32 @s_add_i32(i32 inreg %a, i32 inreg %b) {
+; GFX7-LABEL: s_add_i32:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: s_add_i32 s16, s16, s17
+; GFX7-NEXT: v_mov_b32_e32 v0, s16
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: s_add_i32:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: s_add_i32 s16, s16, s17
+; GFX9-NEXT: v_mov_b32_e32 v0, s16
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: s_add_i32:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: s_add_i32 s16, s16, s17
+; GFX8-NEXT: v_mov_b32_e32 v0, s16
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: s_add_i32:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_add_i32 s16, s16, s17
+; GFX10-NEXT: v_mov_b32_e32 v0, s16
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: s_add_i32:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_add_i32 s0, s0, s1
+; GFX11-NEXT: v_mov_b32_e32 v0, s0
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: s_add_i32:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: s_add_co_i32 s0, s0, s1
+; GFX12-NEXT: s_wait_alu 0xfffe
+; GFX12-NEXT: v_mov_b32_e32 v0, s0
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %c = add i32 %a, %b
+ ret i32 %c
+}
+
+define i32 @v_add_i32(i32 %a, i32 %b) {
+; GFX7-LABEL: v_add_i32:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v1
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_add_i32:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_add_u32_e32 v0, v0, v1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_add_i32:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v1
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: v_add_i32:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v1
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_add_i32:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_add_nc_u32_e32 v0, v0, v1
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: v_add_i32:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_add_nc_u32_e32 v0, v0, v1
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %c = add i32 %a, %b
+ ret i32 %c
+}
+
+define <2 x i16> @s_add_v2i16(<2 x i16> inreg %a, <2 x i16> inreg %b) {
+; GFX7-LABEL: s_add_v2i16:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: s_add_i32 s16, s16, s18
+; GFX7-NEXT: s_add_i32 s17, s17, s19
+; GFX7-NEXT: v_mov_b32_e32 v0, s16
+; GFX7-NEXT: v_mov_b32_e32 v1, s17
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: s_add_v2i16:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: s_lshr_b32 s4, s16, 16
+; GFX9-NEXT: s_lshr_b32 s5, s17, 16
+; GFX9-NEXT: s_add_i32 s16, s16, s17
+; GFX9-NEXT: s_add_i32 s4, s4, s5
+; GFX9-NEXT: s_pack_ll_b32_b16 s4, s16, s4
+; GFX9-NEXT: v_mov_b32_e32 v0, s4
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: s_add_v2i16:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: s_lshr_b32 s4, s16, 16
+; GFX8-NEXT: s_lshr_b32 s5, s17, 16
+; GFX8-NEXT: s_add_i32 s4, s4, s5
+; GFX8-NEXT: s_add_i32 s16, s16, s17
+; GFX8-NEXT: s_and_b32 s4, 0xffff, s4
+; GFX8-NEXT: s_and_b32 s5, 0xffff, s16
+; GFX8-NEXT: s_lshl_b32 s4, s4, 16
+; GFX8-NEXT: s_or_b32 s4, s5, s4
+; GFX8-NEXT: v_mov_b32_e32 v0, s4
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: s_add_v2i16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_lshr_b32 s4, s16, 16
+; GFX10-NEXT: s_lshr_b32 s5, s17, 16
+; GFX10-NEXT: s_add_i32 s16, s16, s17
+; GFX10-NEXT: s_add_i32 s4, s4, s5
+; GFX10-NEXT: s_pack_ll_b32_b16 s4, s16, s4
+; GFX10-NEXT: v_mov_b32_e32 v0, s4
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: s_add_v2i16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_lshr_b32 s2, s0, 16
+; GFX11-NEXT: s_lshr_b32 s3, s1, 16
+; GFX11-NEXT: s_add_i32 s0, s0, s1
+; GFX11-NEXT: s_add_i32 s2, s2, s3
+; GFX11-NEXT: s_pack_ll_b32_b16 s0, s0, s2
+; GFX11-NEXT: v_mov_b32_e32 v0, s0
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: s_add_v2i16:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: s_lshr_b32 s2, s0, 16
+; GFX12-NEXT: s_lshr_b32 s3, s1, 16
+; GFX12-NEXT: s_add_co_i32 s0, s0, s1
+; GFX12-NEXT: s_wait_alu 0xfffe
+; GFX12-NEXT: s_add_co_i32 s2, s2, s3
+; GFX12-NEXT: s_wait_alu 0xfffe
+; GFX12-NEXT: s_pack_ll_b32_b16 s0, s0, s2
+; GFX12-NEXT: s_wait_alu 0xfffe
+; GFX12-NEXT: v_mov_b32_e32 v0, s0
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %c = add <2 x i16> %a, %b
+ ret <2 x i16> %c
+}
+
+define <2 x i16> @v_add_v2i16(<2 x i16> %a, <2 x i16> %b) {
+; GFX7-LABEL: v_add_v2i16:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v2
+; GFX7-NEXT: v_add_i32_e32 v1, vcc, v1, v3
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_add_v2i16:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_pk_add_u16 v0, v0, v1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_add_v2i16:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_add_u16_e32 v2, v0, v1
+; GFX8-NEXT: v_add_u16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-NEXT: v_or_b32_e32 v0, v2, v0
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: v_add_v2i16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_pk_add_u16 v0, v0, v1
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_add_v2i16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_pk_add_u16 v0, v0, v1
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: v_add_v2i16:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_pk_add_u16 v0, v0, v1
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %c = add <2 x i16> %a, %b
+ ret <2 x i16> %c
+}
+
+define i64 @s_add_i64(i64 inreg %a, i64 inreg %b) {
+; GFX7-LABEL: s_add_i64:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: s_add_u32 s4, s16, s18
+; GFX7-NEXT: s_addc_u32 s5, s17, s19
+; GFX7-NEXT: v_mov_b32_e32 v0, s4
+; GFX7-NEXT: v_mov_b32_e32 v1, s5
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: s_add_i64:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: s_add_u32 s4, s16, s18
+; GFX9-NEXT: s_addc_u32 s5, s17, s19
+; GFX9-NEXT: v_mov_b32_e32 v0, s4
+; GFX9-NEXT: v_mov_b32_e32 v1, s5
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: s_add_i64:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: s_add_u32 s4, s16, s18
+; GFX8-NEXT: s_addc_u32 s5, s17, s19
+; GFX8-NEXT: v_mov_b32_e32 v0, s4
+; GFX8-NEXT: v_mov_b32_e32 v1, s5
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: s_add_i64:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_add_u32 s4, s16, s18
+; GFX10-NEXT: s_addc_u32 s5, s17, s19
+; GFX10-NEXT: v_mov_b32_e32 v0, s4
+; GFX10-NEXT: v_mov_b32_e32 v1, s5
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: s_add_i64:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_add_u32 s0, s0, s2
+; GFX11-NEXT: s_addc_u32 s1, s1, s3
+; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: s_add_i64:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[2:3]
+; GFX12-NEXT: s_wait_alu 0xfffe
+; GFX12-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %c = add i64 %a, %b
+ ret i64 %c
+}
+
+define i64 @v_add_i64(i64 %a, i64 %b) {
+; GFX7-LABEL: v_add_i64:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v2
+; GFX7-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_add_i64:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_add_i64:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2
+; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: v_add_i64:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_add_i64:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: v_add_i64:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-NEXT: s_wait_alu 0xfffd
+; GFX12-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %c = add i64 %a, %b
+ ret i64 %c
+}
+
+define void @s_uaddo_uadde(i64 inreg %a, i64 inreg %b, ptr addrspace(1) %res, ptr addrspace(1) %carry) {
+; GFX7-LABEL: s_uaddo_uadde:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: s_add_u32 s4, s16, s18
+; GFX7-NEXT: s_addc_u32 s5, s17, s19
+; GFX7-NEXT: v_mov_b32_e32 v4, s4
+; GFX7-NEXT: s_mov_b32 s6, 0
+; GFX7-NEXT: s_cselect_b32 s8, 1, 0
+; GFX7-NEXT: v_mov_b32_e32 v5, s5
+; GFX7-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-NEXT: buffer_store_dwordx2 v[4:5], v[0:1], s[4:7], 0 addr64
+; GFX7-NEXT: v_mov_b32_e32 v0, s8
+; GFX7-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: s_uaddo_uadde:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: s_add_u32 s4, s16, s18
+; GFX9-NEXT: s_addc_u32 s5, s17, s19
+; GFX9-NEXT: v_mov_b32_e32 v4, s4
+; GFX9-NEXT: s_cselect_b32 s6, 1, 0
+; GFX9-NEXT: v_mov_b32_e32 v5, s5
+; GFX9-NEXT: global_store_dwordx2 v[0:1], v[4:5], off
+; GFX9-NEXT: v_mov_b32_e32 v0, s6
+; GFX9-NEXT: global_store_dword v[2:3], v0, off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: s_uaddo_uadde:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: s_add_u32 s4, s16, s18
+; GFX8-NEXT: s_addc_u32 s5, s17, s19
+; GFX8-NEXT: v_mov_b32_e32 v4, s4
+; GFX8-NEXT: s_cselect_b32 s6, 1, 0
+; GFX8-NEXT: v_mov_b32_e32 v5, s5
+; GFX8-NEXT: flat_store_dwordx2 v[0:1], v[4:5]
+; GFX8-NEXT: v_mov_b32_e32 v0, s6
+; GFX8-NEXT: flat_store_dword v[2:3], v0
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: s_uaddo_uadde:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_add_u32 s4, s16, s18
+; GFX10-NEXT: s_addc_u32 s5, s17, s19
+; GFX10-NEXT: s_cselect_b32 s6, 1, 0
+; GFX10-NEXT: v_mov_b32_e32 v4, s4
+; GFX10-NEXT: v_mov_b32_e32 v5, s5
+; GFX10-NEXT: v_mov_b32_e32 v6, s6
+; GFX10-NEXT: global_store_dwordx2 v[0:1], v[4:5], off
+; GFX10-NEXT: global_store_dword v[2:3], v6, off
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: s_uaddo_uadde:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_add_u32 s0, s0, s2
+; GFX11-NEXT: s_addc_u32 s1, s1, s3
+; GFX11-NEXT: s_cselect_b32 s2, 1, 0
+; GFX11-NEXT: v_dual_mov_b32 v5, s1 :: v_dual_mov_b32 v4, s0
+; GFX11-NEXT: v_mov_b32_e32 v6, s2
+; GFX11-NEXT: global_store_b64 v[0:1], v[4:5], off
+; GFX11-NEXT: global_store_b32 v[2:3], v6, off
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: s_uaddo_uadde:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: s_add_co_u32 s0, s0, s2
+; GFX12-NEXT: s_add_co_ci_u32 s1, s1, s3
+; GFX12-NEXT: s_cselect_b32 s2, 1, 0
+; GFX12-NEXT: s_wait_alu 0xfffe
+; GFX12-NEXT: v_dual_mov_b32 v5, s1 :: v_dual_mov_b32 v4, s0
+; GFX12-NEXT: v_mov_b32_e32 v6, s2
+; GFX12-NEXT: global_store_b64 v[0:1], v[4:5], off
+; GFX12-NEXT: global_store_b32 v[2:3], v6, off
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %uaddo = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %b)
+ %add = extractvalue {i64, i1} %uaddo, 0
+ %of = extractvalue {i64, i1} %uaddo, 1
+ %of32 = select i1 %of, i32 1, i32 0
+ store i64 %add, ptr addrspace(1) %res
+ store i32 %of32, ptr addrspace(1) %carry
+ ret void
+}
+
+define void @v_uaddo_uadde(i64 %a, i64 %b, ptr addrspace(1) %res, ptr addrspace(1) %carry) {
+; GFX7-LABEL: v_uaddo_uadde:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v2
+; GFX7-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
+; GFX7-NEXT: s_mov_b32 s6, 0
+; GFX7-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; GFX7-NEXT: buffer_store_dwordx2 v[0:1], v[4:5], s[4:7], 0 addr64
+; GFX7-NEXT: buffer_store_dword v2, v[6:7], s[4:7], 0 addr64
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_uaddo_uadde:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; GFX9-NEXT: global_store_dwordx2 v[4:5], v[0:1], off
+; GFX9-NEXT: global_store_dword v[6:7], v2, off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_uaddo_uadde:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2
+; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
+; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; GFX8-NEXT: flat_store_dwordx2 v[4:5], v[0:1]
+; GFX8-NEXT: flat_store_dword v[6:7], v2
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: v_uaddo_uadde:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo
+; GFX10-NEXT: global_store_dwordx2 v[4:5], v[0:1], off
+; GFX10-NEXT: global_store_dword v[6:7], v2, off
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_uaddo_uadde:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo
+; GFX11-NEXT: global_store_b64 v[4:5], v[0:1], off
+; GFX11-NEXT: global_store_b32 v[6:7], v2, off
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: v_uaddo_uadde:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-NEXT: s_wait_alu 0xfffd
+; GFX12-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX12-NEXT: s_wait_alu 0xfffd
+; GFX12-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo
+; GFX12-NEXT: global_store_b64 v[4:5], v[0:1], off
+; GFX12-NEXT: global_store_b32 v[6:7], v2, off
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %uaddo = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %b)
+ %add = extractvalue {i64, i1} %uaddo, 0
+ %of = extractvalue {i64, i1} %uaddo, 1
+ %of32 = select i1 %of, i32 1, i32 0
+ store i64 %add, ptr addrspace(1) %res
+ store i32 %of32, ptr addrspace(1) %carry
+ ret void
+}
+
+declare {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %b)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fadd.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fadd.ll
new file mode 100644
index 0000000..e440bee
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fadd.ll
@@ -0,0 +1,165 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mattr=-real-true16 -mcpu=gfx1100 -o - %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-FAKE16 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mattr=+real-true16 -mcpu=gfx1100 -o - %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-TRUE16 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mattr=-real-true16 -mcpu=gfx1200 -o - %s | FileCheck -check-prefixes=GCN,GFX12,GFX12-FAKE16 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mattr=+real-true16 -mcpu=gfx1200 -o - %s | FileCheck -check-prefixes=GCN,GFX12,GFX12-TRUE16 %s
+
+define amdgpu_ps half @fadd_s16_uniform(half inreg %a, half inreg %b) {
+; GFX11-FAKE16-LABEL: fadd_s16_uniform:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: v_add_f16_e64 v0, s0, s1
+; GFX11-FAKE16-NEXT: ; return to shader part epilog
+;
+; GFX11-TRUE16-LABEL: fadd_s16_uniform:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: v_add_f16_e64 v0.l, s0, s1
+; GFX11-TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX12-LABEL: fadd_s16_uniform:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_add_f16 s0, s0, s1
+; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
+; GFX12-NEXT: v_mov_b32_e32 v0, s0
+; GFX12-NEXT: ; return to shader part epilog
+ %fadd = fadd half %a, %b
+ ret half %fadd
+}
+
+define amdgpu_ps half @fadd_s16_div(half %a, half %b) {
+; GFX11-FAKE16-LABEL: fadd_s16_div:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: v_add_f16_e32 v0, v0, v1
+; GFX11-FAKE16-NEXT: ; return to shader part epilog
+;
+; GFX11-TRUE16-LABEL: fadd_s16_div:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: v_add_f16_e32 v0.l, v0.l, v1.l
+; GFX11-TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX12-FAKE16-LABEL: fadd_s16_div:
+; GFX12-FAKE16: ; %bb.0:
+; GFX12-FAKE16-NEXT: v_add_f16_e32 v0, v0, v1
+; GFX12-FAKE16-NEXT: ; return to shader part epilog
+;
+; GFX12-TRUE16-LABEL: fadd_s16_div:
+; GFX12-TRUE16: ; %bb.0:
+; GFX12-TRUE16-NEXT: v_add_f16_e32 v0.l, v0.l, v1.l
+; GFX12-TRUE16-NEXT: ; return to shader part epilog
+ %fadd = fadd half %a, %b
+ ret half %fadd
+}
+
+define amdgpu_ps float @fadd_s32_uniform(float inreg %a, float inreg %b) {
+; GFX11-LABEL: fadd_s32_uniform:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_add_f32_e64 v0, s0, s1
+; GFX11-NEXT: ; return to shader part epilog
+;
+; GFX12-LABEL: fadd_s32_uniform:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_add_f32 s0, s0, s1
+; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
+; GFX12-NEXT: v_mov_b32_e32 v0, s0
+; GFX12-NEXT: ; return to shader part epilog
+ %fadd = fadd float %a, %b
+ ret float %fadd
+}
+
+define amdgpu_ps float @fadd_s32_div(float %a, float %b) {
+; GCN-LABEL: fadd_s32_div:
+; GCN: ; %bb.0:
+; GCN-NEXT: v_add_f32_e32 v0, v0, v1
+; GCN-NEXT: ; return to shader part epilog
+ %fadd = fadd float %a, %b
+ ret float %fadd
+}
+
+define amdgpu_ps void @fadd_s64_uniform(double inreg %a, double inreg %b, ptr addrspace(1) %ptr) {
+; GFX11-LABEL: fadd_s64_uniform:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_add_f64 v[2:3], s[0:1], s[2:3]
+; GFX11-NEXT: global_store_b64 v[0:1], v[2:3], off
+; GFX11-NEXT: s_endpgm
+;
+; GFX12-LABEL: fadd_s64_uniform:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: v_add_f64_e64 v[2:3], s[0:1], s[2:3]
+; GFX12-NEXT: global_store_b64 v[0:1], v[2:3], off
+; GFX12-NEXT: s_endpgm
+ %fadd = fadd double %a, %b
+ store double %fadd, ptr addrspace(1) %ptr
+ ret void
+}
+
+define amdgpu_ps void @fadd_s64_div(double %a, double %b, ptr addrspace(1) %ptr) {
+; GFX11-LABEL: fadd_s64_div:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3]
+; GFX11-NEXT: global_store_b64 v[4:5], v[0:1], off
+; GFX11-NEXT: s_endpgm
+;
+; GFX12-LABEL: fadd_s64_div:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: v_add_f64_e32 v[0:1], v[0:1], v[2:3]
+; GFX12-NEXT: global_store_b64 v[4:5], v[0:1], off
+; GFX12-NEXT: s_endpgm
+ %fadd = fadd double %a, %b
+ store double %fadd, ptr addrspace(1) %ptr
+ ret void
+}
+
+define amdgpu_ps <2 x half> @fadd_v2s16_uniform(<2 x half> inreg %a, <2 x half> inreg %b) {
+; GFX11-LABEL: fadd_v2s16_uniform:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_pk_add_f16 v0, s0, s1
+; GFX11-NEXT: ; return to shader part epilog
+;
+; GFX12-LABEL: fadd_v2s16_uniform:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_lshr_b32 s2, s0, 16
+; GFX12-NEXT: s_lshr_b32 s3, s1, 16
+; GFX12-NEXT: s_add_f16 s0, s0, s1
+; GFX12-NEXT: s_add_f16 s1, s2, s3
+; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_3) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX12-NEXT: s_pack_ll_b32_b16 s0, s0, s1
+; GFX12-NEXT: v_mov_b32_e32 v0, s0
+; GFX12-NEXT: ; return to shader part epilog
+ %fadd = fadd <2 x half> %a, %b
+ ret <2 x half> %fadd
+}
+
+define amdgpu_ps <2 x half> @fadd_v2s16_div(<2 x half> %a, <2 x half> %b) {
+; GCN-LABEL: fadd_v2s16_div:
+; GCN: ; %bb.0:
+; GCN-NEXT: v_pk_add_f16 v0, v0, v1
+; GCN-NEXT: ; return to shader part epilog
+ %fadd = fadd <2 x half> %a, %b
+ ret <2 x half> %fadd
+}
+
+define amdgpu_ps <2 x float> @fadd_v2s32_uniform(<2 x float> inreg %a, <2 x float> inreg %b) {
+; GFX11-LABEL: fadd_v2s32_uniform:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_add_f32_e64 v0, s0, s2
+; GFX11-NEXT: v_add_f32_e64 v1, s1, s3
+; GFX11-NEXT: ; return to shader part epilog
+;
+; GFX12-LABEL: fadd_v2s32_uniform:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_add_f32 s0, s0, s2
+; GFX12-NEXT: s_add_f32 s1, s1, s3
+; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
+; GFX12-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX12-NEXT: ; return to shader part epilog
+ %fadd = fadd <2 x float> %a, %b
+ ret <2 x float> %fadd
+}
+
+define amdgpu_ps <2 x float> @fadd_v2s32_div(<2 x float> %a, <2 x float> %b) {
+; GCN-LABEL: fadd_v2s32_div:
+; GCN: ; %bb.0:
+; GCN-NEXT: v_dual_add_f32 v0, v0, v2 :: v_dual_add_f32 v1, v1, v3
+; GCN-NEXT: ; return to shader part epilog
+ %fadd = fadd <2 x float> %a, %b
+ ret <2 x float> %fadd
+}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-scc-vcc.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-scc-vcc.ll
new file mode 100644
index 0000000..588802c
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-scc-vcc.ll
@@ -0,0 +1,66 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx700 < %s | FileCheck -check-prefixes=GFX7 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx803 < %s | FileCheck -check-prefixes=GFX8 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11 %s
+
+define amdgpu_kernel void @fcmp_uniform_select(float %a, i32 %b, i32 %c, ptr addrspace(1) %out) {
+; GFX7-LABEL: fcmp_uniform_select:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x9
+; GFX7-NEXT: s_load_dword s3, s[4:5], 0xb
+; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xd
+; GFX7-NEXT: s_mov_b32 s2, -1
+; GFX7-NEXT: s_waitcnt lgkmcnt(0)
+; GFX7-NEXT: v_cmp_eq_f32_e64 s[4:5], s6, 0
+; GFX7-NEXT: s_or_b64 s[4:5], s[4:5], s[4:5]
+; GFX7-NEXT: s_cselect_b32 s4, 1, 0
+; GFX7-NEXT: s_and_b32 s4, s4, 1
+; GFX7-NEXT: s_cmp_lg_u32 s4, 0
+; GFX7-NEXT: s_cselect_b32 s3, s7, s3
+; GFX7-NEXT: v_mov_b32_e32 v0, s3
+; GFX7-NEXT: s_mov_b32 s3, 0xf000
+; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GFX7-NEXT: s_endpgm
+;
+; GFX8-LABEL: fcmp_uniform_select:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX8-NEXT: s_load_dword s6, s[4:5], 0x2c
+; GFX8-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x34
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8-NEXT: v_cmp_eq_f32_e64 s[4:5], s0, 0
+; GFX8-NEXT: s_cmp_lg_u64 s[4:5], 0
+; GFX8-NEXT: s_cselect_b32 s0, 1, 0
+; GFX8-NEXT: s_and_b32 s0, s0, 1
+; GFX8-NEXT: s_cmp_lg_u32 s0, 0
+; GFX8-NEXT: s_cselect_b32 s0, s1, s6
+; GFX8-NEXT: v_mov_b32_e32 v0, s2
+; GFX8-NEXT: v_mov_b32_e32 v2, s0
+; GFX8-NEXT: v_mov_b32_e32 v1, s3
+; GFX8-NEXT: flat_store_dword v[0:1], v2
+; GFX8-NEXT: s_endpgm
+;
+; GFX11-LABEL: fcmp_uniform_select:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_clause 0x2
+; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX11-NEXT: s_load_b32 s6, s[4:5], 0x2c
+; GFX11-NEXT: s_load_b64 s[2:3], s[4:5], 0x34
+; GFX11-NEXT: v_mov_b32_e32 v1, 0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: v_cmp_eq_f32_e64 s0, s0, 0
+; GFX11-NEXT: s_cmp_lg_u32 s0, 0
+; GFX11-NEXT: s_cselect_b32 s0, 1, 0
+; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-NEXT: s_and_b32 s0, s0, 1
+; GFX11-NEXT: s_cmp_lg_u32 s0, 0
+; GFX11-NEXT: s_cselect_b32 s0, s1, s6
+; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-NEXT: v_mov_b32_e32 v0, s0
+; GFX11-NEXT: global_store_b32 v1, v0, s[2:3]
+; GFX11-NEXT: s_endpgm
+ %cmp = fcmp oeq float %a, 0.0
+ %sel = select i1 %cmp, i32 %b, i32 %c
+ store i32 %sel, ptr addrspace(1) %out
+ ret void
+}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-scc-vcc.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-scc-vcc.mir
new file mode 100644
index 0000000..b6652f6
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-scc-vcc.mir
@@ -0,0 +1,37 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn -mcpu=gfx700 -run-pass=instruction-select %s -o - | FileCheck -check-prefixes=GFX7 %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx803 -run-pass=instruction-select %s -o - | FileCheck -check-prefixes=GF8 %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass=instruction-select %s -o - | FileCheck -check-prefixes=GFX11 %s
+
+---
+name: test_copy_scc_vcc
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; GFX7-LABEL: name: test_copy_scc_vcc
+ ; GFX7: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; GFX7-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64 = S_OR_B64 [[DEF]], [[DEF]], implicit-def $scc
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $scc
+ ; GFX7-NEXT: $sgpr0 = COPY [[COPY]]
+ ; GFX7-NEXT: S_ENDPGM 0, implicit $sgpr0
+ ;
+ ; GF8-LABEL: name: test_copy_scc_vcc
+ ; GF8: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; GF8-NEXT: S_CMP_LG_U64 [[DEF]], 0, implicit-def $scc
+ ; GF8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $scc
+ ; GF8-NEXT: $sgpr0 = COPY [[COPY]]
+ ; GF8-NEXT: S_ENDPGM 0, implicit $sgpr0
+ ;
+ ; GFX11-LABEL: name: test_copy_scc_vcc
+ ; GFX11: [[DEF:%[0-9]+]]:sreg_32_xm0_xexec = IMPLICIT_DEF
+ ; GFX11-NEXT: S_CMP_LG_U32 [[DEF]], 0, implicit-def $scc
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $scc
+ ; GFX11-NEXT: $sgpr0 = COPY [[COPY]]
+ ; GFX11-NEXT: S_ENDPGM 0, implicit $sgpr0
+ %0:vcc(s1) = G_IMPLICIT_DEF
+ %1:sgpr(s32) = G_AMDGPU_COPY_SCC_VCC %0
+ $sgpr0 = COPY %1
+ S_ENDPGM 0, implicit $sgpr0
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.abs.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.abs.ll
index 02d0e52..6facdfd 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.abs.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.abs.ll
@@ -104,109 +104,110 @@ define amdgpu_cs <4 x i32> @abs_sgpr_v4i32(<4 x i32> inreg %arg) {
ret <4 x i32> %res
}
-define amdgpu_cs i16 @abs_vgpr_i16(i16 %arg) {
+define i16 @abs_vgpr_i16(i16 %arg) {
; GFX6-LABEL: abs_vgpr_i16:
; GFX6: ; %bb.0:
+; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 16
; GFX6-NEXT: v_sub_i32_e32 v1, vcc, 0, v0
; GFX6-NEXT: v_max_i32_e32 v0, v0, v1
-; GFX6-NEXT: v_readfirstlane_b32 s0, v0
-; GFX6-NEXT: ; return to shader part epilog
+; GFX6-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: abs_vgpr_i16:
; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_sub_u16_e32 v1, 0, v0
; GFX8-NEXT: v_max_i16_e32 v0, v0, v1
-; GFX8-NEXT: v_readfirstlane_b32 s0, v0
-; GFX8-NEXT: ; return to shader part epilog
+; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: abs_vgpr_i16:
; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_sub_nc_u16 v1, 0, v0
; GFX10-NEXT: v_max_i16 v0, v0, v1
-; GFX10-NEXT: v_readfirstlane_b32 s0, v0
-; GFX10-NEXT: ; return to shader part epilog
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-LABEL: abs_vgpr_i16:
; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_sub_nc_u16 v1, 0, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-NEXT: v_max_i16 v0, v0, v1
-; GFX1250-NEXT: v_readfirstlane_b32 s0, v0
-; GFX1250-NEXT: ; return to shader part epilog
+; GFX1250-NEXT: s_set_pc_i64 s[30:31]
%res = call i16 @llvm.abs.i16(i16 %arg, i1 false)
ret i16 %res
}
-define amdgpu_cs i32 @abs_vgpr_i32(i32 %arg) {
+define i32 @abs_vgpr_i32(i32 %arg) {
; GFX6-LABEL: abs_vgpr_i32:
; GFX6: ; %bb.0:
+; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_sub_i32_e32 v1, vcc, 0, v0
; GFX6-NEXT: v_max_i32_e32 v0, v0, v1
-; GFX6-NEXT: v_readfirstlane_b32 s0, v0
-; GFX6-NEXT: ; return to shader part epilog
+; GFX6-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: abs_vgpr_i32:
; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_sub_u32_e32 v1, vcc, 0, v0
; GFX8-NEXT: v_max_i32_e32 v0, v0, v1
-; GFX8-NEXT: v_readfirstlane_b32 s0, v0
-; GFX8-NEXT: ; return to shader part epilog
+; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: abs_vgpr_i32:
; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_sub_nc_u32_e32 v1, 0, v0
; GFX10-NEXT: v_max_i32_e32 v0, v0, v1
-; GFX10-NEXT: v_readfirstlane_b32 s0, v0
-; GFX10-NEXT: ; return to shader part epilog
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-LABEL: abs_vgpr_i32:
; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_sub_nc_u32_e32 v1, 0, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-NEXT: v_max_i32_e32 v0, v0, v1
-; GFX1250-NEXT: v_readfirstlane_b32 s0, v0
-; GFX1250-NEXT: ; return to shader part epilog
+; GFX1250-NEXT: s_set_pc_i64 s[30:31]
%res = call i32 @llvm.abs.i32(i32 %arg, i1 false)
ret i32 %res
}
-define amdgpu_cs i64 @abs_vgpr_i64(i64 %arg) {
+define i64 @abs_vgpr_i64(i64 %arg) {
; GFX6-LABEL: abs_vgpr_i64:
; GFX6: ; %bb.0:
+; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_ashrrev_i32_e32 v2, 31, v1
; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v2
; GFX6-NEXT: v_addc_u32_e32 v1, vcc, v1, v2, vcc
; GFX6-NEXT: v_xor_b32_e32 v0, v0, v2
; GFX6-NEXT: v_xor_b32_e32 v1, v1, v2
-; GFX6-NEXT: v_readfirstlane_b32 s0, v0
-; GFX6-NEXT: v_readfirstlane_b32 s1, v1
-; GFX6-NEXT: ; return to shader part epilog
+; GFX6-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: abs_vgpr_i64:
; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_ashrrev_i32_e32 v2, 31, v1
; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v1, v2, vcc
; GFX8-NEXT: v_xor_b32_e32 v0, v0, v2
; GFX8-NEXT: v_xor_b32_e32 v1, v1, v2
-; GFX8-NEXT: v_readfirstlane_b32 s0, v0
-; GFX8-NEXT: v_readfirstlane_b32 s1, v1
-; GFX8-NEXT: ; return to shader part epilog
+; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: abs_vgpr_i64:
; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_ashrrev_i32_e32 v2, 31, v1
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v2, vcc_lo
; GFX10-NEXT: v_xor_b32_e32 v0, v0, v2
; GFX10-NEXT: v_xor_b32_e32 v1, v1, v2
-; GFX10-NEXT: v_readfirstlane_b32 s0, v0
-; GFX10-NEXT: v_readfirstlane_b32 s1, v1
-; GFX10-NEXT: ; return to shader part epilog
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-LABEL: abs_vgpr_i64:
; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_ashrrev_i32_e32 v2, 31, v1
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1250-NEXT: v_mov_b32_e32 v3, v2
@@ -214,17 +215,15 @@ define amdgpu_cs i64 @abs_vgpr_i64(i64 %arg) {
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1250-NEXT: v_xor_b32_e32 v0, v0, v2
; GFX1250-NEXT: v_xor_b32_e32 v1, v1, v2
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1250-NEXT: v_readfirstlane_b32 s0, v0
-; GFX1250-NEXT: v_readfirstlane_b32 s1, v1
-; GFX1250-NEXT: ; return to shader part epilog
+; GFX1250-NEXT: s_set_pc_i64 s[30:31]
%res = call i64 @llvm.abs.i64(i64 %arg, i1 false)
ret i64 %res
}
-define amdgpu_cs <4 x i32> @abs_vgpr_v4i32(<4 x i32> %arg) {
+define <4 x i32> @abs_vgpr_v4i32(<4 x i32> %arg) {
; GFX6-LABEL: abs_vgpr_v4i32:
; GFX6: ; %bb.0:
+; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_sub_i32_e32 v4, vcc, 0, v0
; GFX6-NEXT: v_max_i32_e32 v0, v0, v4
; GFX6-NEXT: v_sub_i32_e32 v4, vcc, 0, v1
@@ -233,14 +232,11 @@ define amdgpu_cs <4 x i32> @abs_vgpr_v4i32(<4 x i32> %arg) {
; GFX6-NEXT: v_max_i32_e32 v2, v2, v4
; GFX6-NEXT: v_sub_i32_e32 v4, vcc, 0, v3
; GFX6-NEXT: v_max_i32_e32 v3, v3, v4
-; GFX6-NEXT: v_readfirstlane_b32 s0, v0
-; GFX6-NEXT: v_readfirstlane_b32 s1, v1
-; GFX6-NEXT: v_readfirstlane_b32 s2, v2
-; GFX6-NEXT: v_readfirstlane_b32 s3, v3
-; GFX6-NEXT: ; return to shader part epilog
+; GFX6-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: abs_vgpr_v4i32:
; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_sub_u32_e32 v4, vcc, 0, v0
; GFX8-NEXT: v_max_i32_e32 v0, v0, v4
; GFX8-NEXT: v_sub_u32_e32 v4, vcc, 0, v1
@@ -249,14 +245,11 @@ define amdgpu_cs <4 x i32> @abs_vgpr_v4i32(<4 x i32> %arg) {
; GFX8-NEXT: v_max_i32_e32 v2, v2, v4
; GFX8-NEXT: v_sub_u32_e32 v4, vcc, 0, v3
; GFX8-NEXT: v_max_i32_e32 v3, v3, v4
-; GFX8-NEXT: v_readfirstlane_b32 s0, v0
-; GFX8-NEXT: v_readfirstlane_b32 s1, v1
-; GFX8-NEXT: v_readfirstlane_b32 s2, v2
-; GFX8-NEXT: v_readfirstlane_b32 s3, v3
-; GFX8-NEXT: ; return to shader part epilog
+; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: abs_vgpr_v4i32:
; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_sub_nc_u32_e32 v4, 0, v0
; GFX10-NEXT: v_sub_nc_u32_e32 v5, 0, v1
; GFX10-NEXT: v_sub_nc_u32_e32 v6, 0, v2
@@ -265,14 +258,12 @@ define amdgpu_cs <4 x i32> @abs_vgpr_v4i32(<4 x i32> %arg) {
; GFX10-NEXT: v_max_i32_e32 v1, v1, v5
; GFX10-NEXT: v_max_i32_e32 v2, v2, v6
; GFX10-NEXT: v_max_i32_e32 v3, v3, v7
-; GFX10-NEXT: v_readfirstlane_b32 s0, v0
-; GFX10-NEXT: v_readfirstlane_b32 s1, v1
-; GFX10-NEXT: v_readfirstlane_b32 s2, v2
-; GFX10-NEXT: v_readfirstlane_b32 s3, v3
-; GFX10-NEXT: ; return to shader part epilog
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-LABEL: abs_vgpr_v4i32:
; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_dual_sub_nc_u32 v4, 0, v0 :: v_dual_sub_nc_u32 v5, 0, v1
; GFX1250-NEXT: v_dual_sub_nc_u32 v6, 0, v2 :: v_dual_sub_nc_u32 v7, 0, v3
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
@@ -281,13 +272,7 @@ define amdgpu_cs <4 x i32> @abs_vgpr_v4i32(<4 x i32> %arg) {
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX1250-NEXT: v_max_i32_e32 v2, v2, v6
; GFX1250-NEXT: v_max_i32_e32 v3, v3, v7
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX1250-NEXT: v_readfirstlane_b32 s0, v0
-; GFX1250-NEXT: v_readfirstlane_b32 s1, v1
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX1250-NEXT: v_readfirstlane_b32 s2, v2
-; GFX1250-NEXT: v_readfirstlane_b32 s3, v3
-; GFX1250-NEXT: ; return to shader part epilog
+; GFX1250-NEXT: s_set_pc_i64 s[30:31]
%res = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %arg, i1 false)
ret <4 x i32> %res
}
@@ -304,44 +289,43 @@ define amdgpu_cs <2 x i8> @abs_sgpr_v2i8(<2 x i8> inreg %arg) {
ret <2 x i8> %res
}
-define amdgpu_cs <2 x i8> @abs_vgpr_v2i8(<2 x i8> %arg) {
+define <2 x i8> @abs_vgpr_v2i8(<2 x i8> %arg) {
; GFX6-LABEL: abs_vgpr_v2i8:
; GFX6: ; %bb.0:
+; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 8
; GFX6-NEXT: v_sub_i32_e32 v2, vcc, 0, v0
; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 8
; GFX6-NEXT: v_max_i32_e32 v0, v0, v2
; GFX6-NEXT: v_sub_i32_e32 v2, vcc, 0, v1
; GFX6-NEXT: v_max_i32_e32 v1, v1, v2
-; GFX6-NEXT: v_readfirstlane_b32 s0, v0
-; GFX6-NEXT: v_readfirstlane_b32 s1, v1
-; GFX6-NEXT: ; return to shader part epilog
+; GFX6-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: abs_vgpr_v2i8:
; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v2, 0
; GFX8-NEXT: v_sub_u16_sdwa v3, v2, sext(v0) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
; GFX8-NEXT: v_sub_u16_sdwa v2, v2, sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
; GFX8-NEXT: v_max_i16_sdwa v0, sext(v0), v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; GFX8-NEXT: v_max_i16_sdwa v1, sext(v1), v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
-; GFX8-NEXT: v_readfirstlane_b32 s0, v0
-; GFX8-NEXT: v_readfirstlane_b32 s1, v1
-; GFX8-NEXT: ; return to shader part epilog
+; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: abs_vgpr_v2i8:
; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_bfe_i32 v0, v0, 0, 8
; GFX10-NEXT: v_bfe_i32 v1, v1, 0, 8
; GFX10-NEXT: v_sub_nc_u16 v2, 0, v0
; GFX10-NEXT: v_sub_nc_u16 v3, 0, v1
; GFX10-NEXT: v_max_i16 v0, v0, v2
; GFX10-NEXT: v_max_i16 v1, v1, v3
-; GFX10-NEXT: v_readfirstlane_b32 s0, v0
-; GFX10-NEXT: v_readfirstlane_b32 s1, v1
-; GFX10-NEXT: ; return to shader part epilog
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-LABEL: abs_vgpr_v2i8:
; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_bfe_i32 v0, v0, 0, 8
; GFX1250-NEXT: v_bfe_i32 v1, v1, 0, 8
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
@@ -350,10 +334,7 @@ define amdgpu_cs <2 x i8> @abs_vgpr_v2i8(<2 x i8> %arg) {
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1250-NEXT: v_max_i16 v0, v0, v2
; GFX1250-NEXT: v_max_i16 v1, v1, v3
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1250-NEXT: v_readfirstlane_b32 s0, v0
-; GFX1250-NEXT: v_readfirstlane_b32 s1, v1
-; GFX1250-NEXT: ; return to shader part epilog
+; GFX1250-NEXT: s_set_pc_i64 s[30:31]
%res = call <2 x i8> @llvm.abs.v2i8(<2 x i8> %arg, i1 false)
ret <2 x i8> %res
}
@@ -372,9 +353,10 @@ define amdgpu_cs <3 x i8> @abs_sgpr_v3i8(<3 x i8> inreg %arg) {
ret <3 x i8> %res
}
-define amdgpu_cs <3 x i8> @abs_vgpr_v3i8(<3 x i8> %arg) {
+define <3 x i8> @abs_vgpr_v3i8(<3 x i8> %arg) {
; GFX6-LABEL: abs_vgpr_v3i8:
; GFX6: ; %bb.0:
+; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 8
; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 0, v0
; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 8
@@ -384,13 +366,11 @@ define amdgpu_cs <3 x i8> @abs_vgpr_v3i8(<3 x i8> %arg) {
; GFX6-NEXT: v_max_i32_e32 v1, v1, v3
; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 0, v2
; GFX6-NEXT: v_max_i32_e32 v2, v2, v3
-; GFX6-NEXT: v_readfirstlane_b32 s0, v0
-; GFX6-NEXT: v_readfirstlane_b32 s1, v1
-; GFX6-NEXT: v_readfirstlane_b32 s2, v2
-; GFX6-NEXT: ; return to shader part epilog
+; GFX6-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: abs_vgpr_v3i8:
; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v3, 0
; GFX8-NEXT: v_sub_u16_sdwa v4, v3, sext(v0) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
; GFX8-NEXT: v_max_i16_sdwa v0, sext(v0), v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
@@ -398,13 +378,11 @@ define amdgpu_cs <3 x i8> @abs_vgpr_v3i8(<3 x i8> %arg) {
; GFX8-NEXT: v_sub_u16_sdwa v3, v3, sext(v2) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
; GFX8-NEXT: v_max_i16_sdwa v1, sext(v1), v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; GFX8-NEXT: v_max_i16_sdwa v2, sext(v2), v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
-; GFX8-NEXT: v_readfirstlane_b32 s0, v0
-; GFX8-NEXT: v_readfirstlane_b32 s1, v1
-; GFX8-NEXT: v_readfirstlane_b32 s2, v2
-; GFX8-NEXT: ; return to shader part epilog
+; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: abs_vgpr_v3i8:
; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_bfe_i32 v0, v0, 0, 8
; GFX10-NEXT: v_bfe_i32 v1, v1, 0, 8
; GFX10-NEXT: v_bfe_i32 v2, v2, 0, 8
@@ -414,13 +392,12 @@ define amdgpu_cs <3 x i8> @abs_vgpr_v3i8(<3 x i8> %arg) {
; GFX10-NEXT: v_max_i16 v0, v0, v3
; GFX10-NEXT: v_max_i16 v1, v1, v4
; GFX10-NEXT: v_max_i16 v2, v2, v5
-; GFX10-NEXT: v_readfirstlane_b32 s0, v0
-; GFX10-NEXT: v_readfirstlane_b32 s1, v1
-; GFX10-NEXT: v_readfirstlane_b32 s2, v2
-; GFX10-NEXT: ; return to shader part epilog
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-LABEL: abs_vgpr_v3i8:
; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_bfe_i32 v0, v0, 0, 8
; GFX1250-NEXT: v_bfe_i32 v1, v1, 0, 8
; GFX1250-NEXT: v_bfe_i32 v2, v2, 0, 8
@@ -433,12 +410,7 @@ define amdgpu_cs <3 x i8> @abs_vgpr_v3i8(<3 x i8> %arg) {
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX1250-NEXT: v_max_i16 v1, v1, v4
; GFX1250-NEXT: v_max_i16 v2, v2, v5
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX1250-NEXT: v_readfirstlane_b32 s0, v0
-; GFX1250-NEXT: v_readfirstlane_b32 s1, v1
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3)
-; GFX1250-NEXT: v_readfirstlane_b32 s2, v2
-; GFX1250-NEXT: ; return to shader part epilog
+; GFX1250-NEXT: s_set_pc_i64 s[30:31]
%res = call <3 x i8> @llvm.abs.v3i8(<3 x i8> %arg, i1 false)
ret <3 x i8> %res
}
@@ -485,44 +457,44 @@ define amdgpu_cs <2 x i16> @abs_sgpr_v2i16(<2 x i16> inreg %arg) {
ret <2 x i16> %res
}
-define amdgpu_cs <2 x i16> @abs_vgpr_v2i16(<2 x i16> %arg) {
+define <2 x i16> @abs_vgpr_v2i16(<2 x i16> %arg) {
; GFX6-LABEL: abs_vgpr_v2i16:
; GFX6: ; %bb.0:
+; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 16
; GFX6-NEXT: v_sub_i32_e32 v2, vcc, 0, v0
; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 16
; GFX6-NEXT: v_max_i32_e32 v0, v0, v2
; GFX6-NEXT: v_sub_i32_e32 v2, vcc, 0, v1
; GFX6-NEXT: v_max_i32_e32 v1, v1, v2
-; GFX6-NEXT: v_readfirstlane_b32 s0, v0
-; GFX6-NEXT: v_readfirstlane_b32 s1, v1
-; GFX6-NEXT: ; return to shader part epilog
+; GFX6-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: abs_vgpr_v2i16:
; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v2, 0
; GFX8-NEXT: v_sub_u16_e32 v1, 0, v0
; GFX8-NEXT: v_sub_u16_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; GFX8-NEXT: v_max_i16_e32 v1, v0, v1
; GFX8-NEXT: v_max_i16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX8-NEXT: v_or_b32_e32 v0, v1, v0
-; GFX8-NEXT: v_readfirstlane_b32 s0, v0
-; GFX8-NEXT: ; return to shader part epilog
+; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: abs_vgpr_v2i16:
; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_pk_sub_i16 v1, 0, v0
; GFX10-NEXT: v_pk_max_i16 v0, v0, v1
-; GFX10-NEXT: v_readfirstlane_b32 s0, v0
-; GFX10-NEXT: ; return to shader part epilog
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-LABEL: abs_vgpr_v2i16:
; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_pk_sub_i16 v1, 0, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-NEXT: v_pk_max_i16 v0, v0, v1
-; GFX1250-NEXT: v_readfirstlane_b32 s0, v0
-; GFX1250-NEXT: ; return to shader part epilog
+; GFX1250-NEXT: s_set_pc_i64 s[30:31]
%res = call <2 x i16> @llvm.abs.v2i16(<2 x i16> %arg, i1 false)
ret <2 x i16> %res
}
@@ -576,9 +548,10 @@ define amdgpu_cs <3 x i16> @abs_sgpr_v3i16(<3 x i16> inreg %arg) {
ret <3 x i16> %res
}
-define amdgpu_cs <3 x i16> @abs_vgpr_v3i16(<3 x i16> %arg) {
+define <3 x i16> @abs_vgpr_v3i16(<3 x i16> %arg) {
; GFX6-LABEL: abs_vgpr_v3i16:
; GFX6: ; %bb.0:
+; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 16
; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 0, v0
; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 16
@@ -588,13 +561,11 @@ define amdgpu_cs <3 x i16> @abs_vgpr_v3i16(<3 x i16> %arg) {
; GFX6-NEXT: v_max_i32_e32 v1, v1, v3
; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 0, v2
; GFX6-NEXT: v_max_i32_e32 v2, v2, v3
-; GFX6-NEXT: v_readfirstlane_b32 s0, v0
-; GFX6-NEXT: v_readfirstlane_b32 s1, v1
-; GFX6-NEXT: v_readfirstlane_b32 s2, v2
-; GFX6-NEXT: ; return to shader part epilog
+; GFX6-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: abs_vgpr_v3i16:
; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v3, 0
; GFX8-NEXT: v_sub_u16_e32 v2, 0, v0
; GFX8-NEXT: v_sub_u16_sdwa v3, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
@@ -603,31 +574,27 @@ define amdgpu_cs <3 x i16> @abs_vgpr_v3i16(<3 x i16> %arg) {
; GFX8-NEXT: v_max_i16_sdwa v0, v0, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX8-NEXT: v_or_b32_e32 v0, v2, v0
; GFX8-NEXT: v_max_i16_e32 v1, v1, v4
-; GFX8-NEXT: v_readfirstlane_b32 s0, v0
-; GFX8-NEXT: v_readfirstlane_b32 s1, v1
-; GFX8-NEXT: ; return to shader part epilog
+; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: abs_vgpr_v3i16:
; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_pk_sub_i16 v2, 0, v0
; GFX10-NEXT: v_sub_nc_u16 v3, 0, v1
; GFX10-NEXT: v_pk_max_i16 v0, v0, v2
; GFX10-NEXT: v_max_i16 v1, v1, v3
-; GFX10-NEXT: v_readfirstlane_b32 s0, v0
-; GFX10-NEXT: v_readfirstlane_b32 s1, v1
-; GFX10-NEXT: ; return to shader part epilog
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-LABEL: abs_vgpr_v3i16:
; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_pk_sub_i16 v2, 0, v0
; GFX1250-NEXT: v_sub_nc_u16 v3, 0, v1
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1250-NEXT: v_pk_max_i16 v0, v0, v2
; GFX1250-NEXT: v_max_i16 v1, v1, v3
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1250-NEXT: v_readfirstlane_b32 s0, v0
-; GFX1250-NEXT: v_readfirstlane_b32 s1, v1
-; GFX1250-NEXT: ; return to shader part epilog
+; GFX1250-NEXT: s_set_pc_i64 s[30:31]
%res = call <3 x i16> @llvm.abs.v3i16(<3 x i16> %arg, i1 false)
ret <3 x i16> %res
}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ballot.i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ballot.i32.ll
index 7714c03..d3e2118 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ballot.i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ballot.i32.ll
@@ -113,9 +113,9 @@ false:
define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_non_compare(i32 inreg %v) {
; CHECK-LABEL: branch_uniform_ballot_ne_zero_non_compare:
; CHECK: ; %bb.0:
-; CHECK-NEXT: s_and_b32 s0, 1, s0
-; CHECK-NEXT: v_cmp_ne_u32_e64 s0, 0, s0
-; CHECK-NEXT: s_cmp_eq_u32 s0, 0
+; CHECK-NEXT: s_xor_b32 s0, s0, 1
+; CHECK-NEXT: s_and_b32 s0, s0, 1
+; CHECK-NEXT: s_cmp_lg_u32 s0, 0
; CHECK-NEXT: s_cbranch_scc1 .LBB8_2
; CHECK-NEXT: ; %bb.1: ; %true
; CHECK-NEXT: s_mov_b32 s0, 42
@@ -161,16 +161,17 @@ false:
define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_non_compare(i32 inreg %v) {
; CHECK-LABEL: branch_uniform_ballot_eq_zero_non_compare:
; CHECK: ; %bb.0:
-; CHECK-NEXT: s_and_b32 s0, 1, s0
-; CHECK-NEXT: v_cmp_ne_u32_e64 s0, 0, s0
+; CHECK-NEXT: s_xor_b32 s0, s0, 1
+; CHECK-NEXT: s_xor_b32 s0, s0, 1
+; CHECK-NEXT: s_and_b32 s0, s0, 1
; CHECK-NEXT: s_cmp_lg_u32 s0, 0
-; CHECK-NEXT: s_cbranch_scc0 .LBB10_2
-; CHECK-NEXT: ; %bb.1: ; %false
-; CHECK-NEXT: s_mov_b32 s0, 33
-; CHECK-NEXT: s_branch .LBB10_3
-; CHECK-NEXT: .LBB10_2: ; %true
+; CHECK-NEXT: s_cbranch_scc1 .LBB10_2
+; CHECK-NEXT: ; %bb.1: ; %true
; CHECK-NEXT: s_mov_b32 s0, 42
; CHECK-NEXT: s_branch .LBB10_3
+; CHECK-NEXT: .LBB10_2: ; %false
+; CHECK-NEXT: s_mov_b32 s0, 33
+; CHECK-NEXT: s_branch .LBB10_3
; CHECK-NEXT: .LBB10_3:
%c = trunc i32 %v to i1
%ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %c)
@@ -208,11 +209,7 @@ false:
define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_compare(i32 inreg %v) {
; CHECK-LABEL: branch_uniform_ballot_ne_zero_compare:
; CHECK: ; %bb.0:
-; CHECK-NEXT: s_cmp_lt_u32 s0, 12
-; CHECK-NEXT: s_cselect_b32 s0, 1, 0
-; CHECK-NEXT: s_and_b32 s0, 1, s0
-; CHECK-NEXT: v_cmp_ne_u32_e64 s0, 0, s0
-; CHECK-NEXT: s_cmp_eq_u32 s0, 0
+; CHECK-NEXT: s_cmp_ge_u32 s0, 12
; CHECK-NEXT: s_cbranch_scc1 .LBB12_2
; CHECK-NEXT: ; %bb.1: ; %true
; CHECK-NEXT: s_mov_b32 s0, 42
@@ -258,17 +255,13 @@ define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_compare(i32 inreg %v) {
; CHECK-LABEL: branch_uniform_ballot_eq_zero_compare:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_cmp_lt_u32 s0, 12
-; CHECK-NEXT: s_cselect_b32 s0, 1, 0
-; CHECK-NEXT: s_and_b32 s0, 1, s0
-; CHECK-NEXT: v_cmp_ne_u32_e64 s0, 0, s0
-; CHECK-NEXT: s_cmp_lg_u32 s0, 0
-; CHECK-NEXT: s_cbranch_scc0 .LBB14_2
-; CHECK-NEXT: ; %bb.1: ; %false
-; CHECK-NEXT: s_mov_b32 s0, 33
-; CHECK-NEXT: s_branch .LBB14_3
-; CHECK-NEXT: .LBB14_2: ; %true
+; CHECK-NEXT: s_cbranch_scc1 .LBB14_2
+; CHECK-NEXT: ; %bb.1: ; %true
; CHECK-NEXT: s_mov_b32 s0, 42
; CHECK-NEXT: s_branch .LBB14_3
+; CHECK-NEXT: .LBB14_2: ; %false
+; CHECK-NEXT: s_mov_b32 s0, 33
+; CHECK-NEXT: s_branch .LBB14_3
; CHECK-NEXT: .LBB14_3:
%c = icmp ult i32 %v, 12
%ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %c)
@@ -310,14 +303,12 @@ false:
define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_and(i32 inreg %v1, i32 inreg %v2) {
; CHECK-LABEL: branch_uniform_ballot_ne_zero_and:
; CHECK: ; %bb.0:
-; CHECK-NEXT: s_cmp_lt_u32 s0, 12
+; CHECK-NEXT: s_cmp_ge_u32 s0, 12
; CHECK-NEXT: s_cselect_b32 s0, 1, 0
-; CHECK-NEXT: s_cmp_gt_u32 s1, 34
+; CHECK-NEXT: s_cmp_le_u32 s1, 34
; CHECK-NEXT: s_cselect_b32 s1, 1, 0
-; CHECK-NEXT: s_and_b32 s0, s0, s1
-; CHECK-NEXT: s_and_b32 s0, 1, s0
-; CHECK-NEXT: v_cmp_ne_u32_e64 s0, 0, s0
-; CHECK-NEXT: s_cmp_eq_u32 s0, 0
+; CHECK-NEXT: s_or_b32 s0, s0, s1
+; CHECK-NEXT: s_cmp_lg_u32 s0, 0
; CHECK-NEXT: s_cbranch_scc1 .LBB16_2
; CHECK-NEXT: ; %bb.1: ; %true
; CHECK-NEXT: s_mov_b32 s0, 42
@@ -372,16 +363,14 @@ define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_and(i32 inreg %v1, i32 inreg
; CHECK-NEXT: s_cmp_gt_u32 s1, 34
; CHECK-NEXT: s_cselect_b32 s1, 1, 0
; CHECK-NEXT: s_and_b32 s0, s0, s1
-; CHECK-NEXT: s_and_b32 s0, 1, s0
-; CHECK-NEXT: v_cmp_ne_u32_e64 s0, 0, s0
; CHECK-NEXT: s_cmp_lg_u32 s0, 0
-; CHECK-NEXT: s_cbranch_scc0 .LBB18_2
-; CHECK-NEXT: ; %bb.1: ; %false
-; CHECK-NEXT: s_mov_b32 s0, 33
-; CHECK-NEXT: s_branch .LBB18_3
-; CHECK-NEXT: .LBB18_2: ; %true
+; CHECK-NEXT: s_cbranch_scc1 .LBB18_2
+; CHECK-NEXT: ; %bb.1: ; %true
; CHECK-NEXT: s_mov_b32 s0, 42
; CHECK-NEXT: s_branch .LBB18_3
+; CHECK-NEXT: .LBB18_2: ; %false
+; CHECK-NEXT: s_mov_b32 s0, 33
+; CHECK-NEXT: s_branch .LBB18_3
; CHECK-NEXT: .LBB18_3:
%v1c = icmp ult i32 %v1, 12
%v2c = icmp ugt i32 %v2, 34
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ballot.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ballot.i64.ll
index 7b81669..250fbc7 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ballot.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ballot.i64.ll
@@ -116,9 +116,9 @@ false:
define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_non_compare(i32 inreg %v) {
; CHECK-LABEL: branch_uniform_ballot_ne_zero_non_compare:
; CHECK: ; %bb.0:
-; CHECK-NEXT: s_and_b32 s0, 1, s0
-; CHECK-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0
-; CHECK-NEXT: s_cmp_eq_u64 s[0:1], 0
+; CHECK-NEXT: s_xor_b32 s0, s0, 1
+; CHECK-NEXT: s_and_b32 s0, s0, 1
+; CHECK-NEXT: s_cmp_lg_u32 s0, 0
; CHECK-NEXT: s_cbranch_scc1 .LBB8_2
; CHECK-NEXT: ; %bb.1: ; %true
; CHECK-NEXT: s_mov_b32 s0, 42
@@ -164,16 +164,17 @@ false:
define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_non_compare(i32 inreg %v) {
; CHECK-LABEL: branch_uniform_ballot_eq_zero_non_compare:
; CHECK: ; %bb.0:
-; CHECK-NEXT: s_and_b32 s0, 1, s0
-; CHECK-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0
-; CHECK-NEXT: s_cmp_lg_u64 s[0:1], 0
-; CHECK-NEXT: s_cbranch_scc0 .LBB10_2
-; CHECK-NEXT: ; %bb.1: ; %false
-; CHECK-NEXT: s_mov_b32 s0, 33
-; CHECK-NEXT: s_branch .LBB10_3
-; CHECK-NEXT: .LBB10_2: ; %true
+; CHECK-NEXT: s_xor_b32 s0, s0, 1
+; CHECK-NEXT: s_xor_b32 s0, s0, 1
+; CHECK-NEXT: s_and_b32 s0, s0, 1
+; CHECK-NEXT: s_cmp_lg_u32 s0, 0
+; CHECK-NEXT: s_cbranch_scc1 .LBB10_2
+; CHECK-NEXT: ; %bb.1: ; %true
; CHECK-NEXT: s_mov_b32 s0, 42
; CHECK-NEXT: s_branch .LBB10_3
+; CHECK-NEXT: .LBB10_2: ; %false
+; CHECK-NEXT: s_mov_b32 s0, 33
+; CHECK-NEXT: s_branch .LBB10_3
; CHECK-NEXT: .LBB10_3:
%c = trunc i32 %v to i1
%ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %c)
@@ -211,11 +212,7 @@ false:
define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_compare(i32 inreg %v) {
; CHECK-LABEL: branch_uniform_ballot_ne_zero_compare:
; CHECK: ; %bb.0:
-; CHECK-NEXT: s_cmp_lt_u32 s0, 12
-; CHECK-NEXT: s_cselect_b32 s0, 1, 0
-; CHECK-NEXT: s_and_b32 s0, 1, s0
-; CHECK-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0
-; CHECK-NEXT: s_cmp_eq_u64 s[0:1], 0
+; CHECK-NEXT: s_cmp_ge_u32 s0, 12
; CHECK-NEXT: s_cbranch_scc1 .LBB12_2
; CHECK-NEXT: ; %bb.1: ; %true
; CHECK-NEXT: s_mov_b32 s0, 42
@@ -261,17 +258,13 @@ define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_compare(i32 inreg %v) {
; CHECK-LABEL: branch_uniform_ballot_eq_zero_compare:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_cmp_lt_u32 s0, 12
-; CHECK-NEXT: s_cselect_b32 s0, 1, 0
-; CHECK-NEXT: s_and_b32 s0, 1, s0
-; CHECK-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0
-; CHECK-NEXT: s_cmp_lg_u64 s[0:1], 0
-; CHECK-NEXT: s_cbranch_scc0 .LBB14_2
-; CHECK-NEXT: ; %bb.1: ; %false
-; CHECK-NEXT: s_mov_b32 s0, 33
-; CHECK-NEXT: s_branch .LBB14_3
-; CHECK-NEXT: .LBB14_2: ; %true
+; CHECK-NEXT: s_cbranch_scc1 .LBB14_2
+; CHECK-NEXT: ; %bb.1: ; %true
; CHECK-NEXT: s_mov_b32 s0, 42
; CHECK-NEXT: s_branch .LBB14_3
+; CHECK-NEXT: .LBB14_2: ; %false
+; CHECK-NEXT: s_mov_b32 s0, 33
+; CHECK-NEXT: s_branch .LBB14_3
; CHECK-NEXT: .LBB14_3:
%c = icmp ult i32 %v, 12
%ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %c)
@@ -313,14 +306,12 @@ false:
define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_and(i32 inreg %v1, i32 inreg %v2) {
; CHECK-LABEL: branch_uniform_ballot_ne_zero_and:
; CHECK: ; %bb.0:
-; CHECK-NEXT: s_cmp_lt_u32 s0, 12
+; CHECK-NEXT: s_cmp_ge_u32 s0, 12
; CHECK-NEXT: s_cselect_b32 s0, 1, 0
-; CHECK-NEXT: s_cmp_gt_u32 s1, 34
+; CHECK-NEXT: s_cmp_le_u32 s1, 34
; CHECK-NEXT: s_cselect_b32 s1, 1, 0
-; CHECK-NEXT: s_and_b32 s0, s0, s1
-; CHECK-NEXT: s_and_b32 s0, 1, s0
-; CHECK-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0
-; CHECK-NEXT: s_cmp_eq_u64 s[0:1], 0
+; CHECK-NEXT: s_or_b32 s0, s0, s1
+; CHECK-NEXT: s_cmp_lg_u32 s0, 0
; CHECK-NEXT: s_cbranch_scc1 .LBB16_2
; CHECK-NEXT: ; %bb.1: ; %true
; CHECK-NEXT: s_mov_b32 s0, 42
@@ -375,16 +366,14 @@ define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_and(i32 inreg %v1, i32 inreg
; CHECK-NEXT: s_cmp_gt_u32 s1, 34
; CHECK-NEXT: s_cselect_b32 s1, 1, 0
; CHECK-NEXT: s_and_b32 s0, s0, s1
-; CHECK-NEXT: s_and_b32 s0, 1, s0
-; CHECK-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0
-; CHECK-NEXT: s_cmp_lg_u64 s[0:1], 0
-; CHECK-NEXT: s_cbranch_scc0 .LBB18_2
-; CHECK-NEXT: ; %bb.1: ; %false
-; CHECK-NEXT: s_mov_b32 s0, 33
-; CHECK-NEXT: s_branch .LBB18_3
-; CHECK-NEXT: .LBB18_2: ; %true
+; CHECK-NEXT: s_cmp_lg_u32 s0, 0
+; CHECK-NEXT: s_cbranch_scc1 .LBB18_2
+; CHECK-NEXT: ; %bb.1: ; %true
; CHECK-NEXT: s_mov_b32 s0, 42
; CHECK-NEXT: s_branch .LBB18_3
+; CHECK-NEXT: .LBB18_2: ; %false
+; CHECK-NEXT: s_mov_b32 s0, 33
+; CHECK-NEXT: s_branch .LBB18_3
; CHECK-NEXT: .LBB18_3:
%v1c = icmp ult i32 %v1, 12
%v2c = icmp ugt i32 %v2, 34
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.mir
new file mode 100644
index 0000000..097372a
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.mir
@@ -0,0 +1,524 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=amdgpu-regbankselect,amdgpu-regbanklegalize %s -verify-machineinstrs -o - | FileCheck %s
+---
+name: add_s16_ss
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1
+ ; CHECK-LABEL: name: add_s16_ss
+ ; CHECK: liveins: $sgpr0, $sgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
+ ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32)
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC]](s16)
+ ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC1]](s16)
+ ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[ANYEXT]], [[ANYEXT1]]
+ ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(s16) = G_TRUNC [[ADD]](s32)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s16) = G_AND [[TRUNC2]], [[TRUNC2]]
+ %0:_(s32) = COPY $sgpr0
+ %1:_(s32) = COPY $sgpr1
+ %2:_(s16) = G_TRUNC %0
+ %3:_(s16) = G_TRUNC %1
+ %4:_(s16) = G_ADD %2, %3
+ %5:_(s16) = G_AND %4, %4
+...
+
+---
+name: add_s16_sv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $vgpr0
+ ; CHECK-LABEL: name: add_s16_sv
+ ; CHECK: liveins: $sgpr0, $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
+ ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32)
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s16) = COPY [[TRUNC]](s16)
+ ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s16) = G_ADD [[COPY2]], [[TRUNC1]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s16) = G_AND [[ADD]], [[ADD]]
+ %0:_(s32) = COPY $sgpr0
+ %1:_(s32) = COPY $vgpr0
+ %2:_(s16) = G_TRUNC %0
+ %3:_(s16) = G_TRUNC %1
+ %4:_(s16) = G_ADD %2, %3
+ %5:_(s16) = G_AND %4, %4
+...
+
+---
+name: add_s16_vs
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $vgpr0
+ ; CHECK-LABEL: name: add_s16_vs
+ ; CHECK: liveins: $sgpr0, $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
+ ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32)
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s16) = COPY [[TRUNC1]](s16)
+ ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s16) = G_ADD [[TRUNC]], [[COPY2]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s16) = G_AND [[ADD]], [[ADD]]
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $sgpr0
+ %2:_(s16) = G_TRUNC %0
+ %3:_(s16) = G_TRUNC %1
+ %4:_(s16) = G_ADD %2, %3
+ %5:_(s16) = G_AND %4, %4
+...
+
+---
+name: add_s16_vv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+ ; CHECK-LABEL: name: add_s16_vv
+ ; CHECK: liveins: $vgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
+ ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32)
+ ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s16) = G_ADD [[TRUNC]], [[TRUNC1]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s16) = G_AND [[ADD]], [[ADD]]
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(s16) = G_TRUNC %0
+ %3:_(s16) = G_TRUNC %1
+ %4:_(s16) = G_ADD %2, %3
+ %5:_(s16) = G_AND %4, %4
+...
+
+---
+name: add_s32_ss
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1
+ ; CHECK-LABEL: name: add_s32_ss
+ ; CHECK: liveins: $sgpr0, $sgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+ ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[ADD]], [[ADD]]
+ %0:_(s32) = COPY $sgpr0
+ %1:_(s32) = COPY $sgpr1
+ %2:_(s32) = G_ADD %0, %1
+ %3:_(s32) = G_AND %2, %2
+...
+
+---
+name: add_s32_sv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $vgpr0
+ ; CHECK-LABEL: name: add_s32_sv
+ ; CHECK: liveins: $sgpr0, $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY2]], [[COPY1]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[ADD]], [[ADD]]
+ %0:_(s32) = COPY $sgpr0
+ %1:_(s32) = COPY $vgpr0
+ %2:_(s32) = G_ADD %0, %1
+ %3:_(s32) = G_AND %2, %2
+...
+
+---
+name: add_s32_vs
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $vgpr0
+ ; CHECK-LABEL: name: add_s32_vs
+ ; CHECK: liveins: $sgpr0, $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
+ ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY]], [[COPY2]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[ADD]], [[ADD]]
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $sgpr0
+ %2:_(s32) = G_ADD %0, %1
+ %3:_(s32) = G_AND %2, %2
+...
+
+---
+name: add_s32_vv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+ ; CHECK-LABEL: name: add_s32_vv
+ ; CHECK: liveins: $vgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[ADD]], [[ADD]]
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(s32) = G_ADD %0, %1
+ %3:_(s32) = G_AND %2, %2
+...
+
+---
+name: add_s64_ss
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
+ ; CHECK-LABEL: name: add_s64_ss
+ ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3
+ ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s64) = G_ADD [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 255
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s64) = G_AND [[ADD]], [[ADD]]
+ %0:_(s64) = COPY $sgpr0_sgpr1
+ %1:_(s64) = COPY $sgpr2_sgpr3
+ %2:_(s64) = G_ADD %0, %1
+ %3:_(s64) = G_CONSTANT i64 255
+ %4:_(s64) = G_AND %2, %2
+...
+
+---
+name: add_s64_sv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1, $vgpr0_vgpr1
+ ; CHECK-LABEL: name: add_s64_sv
+ ; CHECK: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64)
+ ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s64) = G_ADD [[COPY2]], [[COPY1]]
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ADD]](s64)
+ ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ADD]](s64)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]]
+ ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32)
+ %0:_(s64) = COPY $sgpr0_sgpr1
+ %1:_(s64) = COPY $vgpr0_vgpr1
+ %2:_(s64) = G_ADD %0, %1
+ %3:_(s64) = G_AND %2, %2
+...
+
+---
+name: add_s64_vs
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1, $vgpr0_vgpr1
+ ; CHECK-LABEL: name: add_s64_vs
+ ; CHECK: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY1]](s64)
+ ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s64) = G_ADD [[COPY]], [[COPY2]]
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ADD]](s64)
+ ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ADD]](s64)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]]
+ ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32)
+ %0:_(s64) = COPY $vgpr0_vgpr1
+ %1:_(s64) = COPY $sgpr0_sgpr1
+ %2:_(s64) = G_ADD %0, %1
+ %3:_(s64) = G_AND %2, %2
+...
+
+---
+name: add_s64_vv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+ ; CHECK-LABEL: name: add_s64_vv
+ ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
+ ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s64) = G_ADD [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ADD]](s64)
+ ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ADD]](s64)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]]
+ ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32)
+ %0:_(s64) = COPY $vgpr0_vgpr1
+ %1:_(s64) = COPY $vgpr2_vgpr3
+ %2:_(s64) = G_ADD %0, %1
+ %3:_(s64) = G_AND %2, %2
+...
+
+---
+name: uaddo_s32_ss
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1
+ ; CHECK-LABEL: name: uaddo_s32_ss
+ ; CHECK: liveins: $sgpr0, $sgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+ ; CHECK-NEXT: [[UADDO:%[0-9]+]]:sgpr(s32), [[UADDO1:%[0-9]+]]:sgpr(s32) = G_UADDO [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[UADDO1]], [[C]]
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[AND]](s32), [[C]], [[C1]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:sgpr(s32) = G_AND [[SELECT]], [[UADDO]]
+ %0:_(s32) = COPY $sgpr0
+ %1:_(s32) = COPY $sgpr1
+ %2:_(s32), %3:_(s1) = G_UADDO %0, %1
+ %4:_(s32) = G_ZEXT %3
+ %5:_(s32) = G_AND %4, %2
+...
+
+---
+name: uaddo_s32_sv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $vgpr1
+ ; CHECK-LABEL: name: uaddo_s32_sv
+ ; CHECK: liveins: $sgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[UADDO:%[0-9]+]]:vgpr(s32), [[UADDO1:%[0-9]+]]:vcc(s1) = G_UADDO [[COPY2]], [[COPY1]]
+ ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[UADDO1]](s1), [[C]], [[C1]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UADDO]], [[SELECT]]
+ %0:_(s32) = COPY $sgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(s32), %3:_(s1) = G_UADDO %0, %1
+ %4:_(s32) = G_ZEXT %3
+ %5:_(s32) = G_AND %2, %4
+...
+
+---
+name: uaddo_s32_vs
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0, $sgpr1
+ ; CHECK-LABEL: name: uaddo_s32_vs
+ ; CHECK: liveins: $vgpr0, $sgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
+ ; CHECK-NEXT: [[UADDO:%[0-9]+]]:vgpr(s32), [[UADDO1:%[0-9]+]]:vcc(s1) = G_UADDO [[COPY]], [[COPY2]]
+ ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[UADDO1]](s1), [[C]], [[C1]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UADDO]], [[SELECT]]
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $sgpr1
+ %2:_(s32), %3:_(s1) = G_UADDO %0, %1
+ %4:_(s32) = G_ZEXT %3
+ %5:_(s32) = G_AND %2, %4
+...
+
+---
+name: uaddo_s32_vv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+ ; CHECK-LABEL: name: uaddo_s32_vv
+ ; CHECK: liveins: $vgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[UADDO:%[0-9]+]]:vgpr(s32), [[UADDO1:%[0-9]+]]:vcc(s1) = G_UADDO [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[UADDO1]](s1), [[C]], [[C1]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UADDO]], [[SELECT]]
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(s32), %3:_(s1) = G_UADDO %0, %1
+ %4:_(s32) = G_ZEXT %3
+ %5:_(s32) = G_AND %2, %4
+...
+
+---
+name: uadde_s32_ss
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1, $sgpr2
+ ; CHECK-LABEL: name: uadde_s32_ss
+ ; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
+ ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[COPY2]], [[C]]
+ ; CHECK-NEXT: [[UADDE:%[0-9]+]]:sgpr(s32), [[UADDE1:%[0-9]+]]:sgpr(s32) = G_UADDE [[COPY]], [[COPY1]], [[AND]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:sgpr(s32) = G_AND [[UADDE1]], [[C]]
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[AND1]](s32), [[C]], [[C1]]
+ ; CHECK-NEXT: [[AND2:%[0-9]+]]:sgpr(s32) = G_AND [[UADDE]], [[SELECT]]
+ %0:_(s32) = COPY $sgpr0
+ %1:_(s32) = COPY $sgpr1
+ %2:_(s32) = COPY $sgpr2
+ %3:_(s1) = G_TRUNC %2
+ %4:_(s32), %5:_(s1) = G_UADDE %0, %1, %3
+ %6:_(s32) = G_ZEXT %5
+ %7:_(s32) = G_AND %4, %6
+...
+
+---
+name: uadde_s32_sv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $vgpr1, $sgpr2
+ ; CHECK-LABEL: name: uadde_s32_sv
+ ; CHECK: liveins: $sgpr0, $vgpr1, $sgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[AMDGPU_COPY_VCC_SCC:%[0-9]+]]:vcc(s1) = G_AMDGPU_COPY_VCC_SCC [[COPY2]](s32)
+ ; CHECK-NEXT: [[UADDE:%[0-9]+]]:vgpr(s32), [[UADDE1:%[0-9]+]]:vcc(s1) = G_UADDE [[COPY3]], [[COPY1]], [[AMDGPU_COPY_VCC_SCC]]
+ ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[UADDE1]](s1), [[C]], [[C1]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UADDE]], [[SELECT]]
+ %0:_(s32) = COPY $sgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(s32) = COPY $sgpr2
+ %3:_(s1) = G_TRUNC %2
+ %4:_(s32), %5:_(s1) = G_UADDE %0, %1, %3
+ %6:_(s32) = G_ZEXT %5
+ %7:_(s32) = G_AND %4, %6
+...
+
+---
+name: uadde_s32_vs
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0, $sgpr1, $sgpr2
+ ; CHECK-LABEL: name: uadde_s32_vs
+ ; CHECK: liveins: $vgpr0, $sgpr1, $sgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
+ ; CHECK-NEXT: [[AMDGPU_COPY_VCC_SCC:%[0-9]+]]:vcc(s1) = G_AMDGPU_COPY_VCC_SCC [[COPY2]](s32)
+ ; CHECK-NEXT: [[UADDE:%[0-9]+]]:vgpr(s32), [[UADDE1:%[0-9]+]]:vcc(s1) = G_UADDE [[COPY]], [[COPY3]], [[AMDGPU_COPY_VCC_SCC]]
+ ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[UADDE1]](s1), [[C]], [[C1]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UADDE]], [[SELECT]]
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $sgpr1
+ %2:_(s32) = COPY $sgpr2
+ %3:_(s1) = G_TRUNC %2
+ %4:_(s32), %5:_(s1) = G_UADDE %0, %1, %3
+ %6:_(s32) = G_ZEXT %5
+ %7:_(s32) = G_AND %4, %6
+...
+
+---
+name: uadde_s32_vv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2
+ ; CHECK-LABEL: name: uadde_s32_vv
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
+ ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[COPY2]], [[C]]
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[AND]](s32), [[C1]]
+ ; CHECK-NEXT: [[UADDE:%[0-9]+]]:vgpr(s32), [[UADDE1:%[0-9]+]]:vcc(s1) = G_UADDE [[COPY]], [[COPY1]], [[ICMP]]
+ ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[UADDE1]](s1), [[C]], [[C1]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UADDE]], [[SELECT]]
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(s32) = COPY $vgpr2
+ %3:_(s1) = G_TRUNC %2
+ %4:_(s32), %5:_(s1) = G_UADDE %0, %1, %3
+ %6:_(s32) = G_ZEXT %5
+ %7:_(s32) = G_AND %4, %6
+...
+
+---
+name: uadde_s32_ss_scc_use
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1, $sgpr2
+ ; CHECK-LABEL: name: uadde_s32_ss_scc_use
+ ; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
+ ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[COPY2]], [[C]]
+ ; CHECK-NEXT: [[UADDE:%[0-9]+]]:sgpr(s32), [[UADDE1:%[0-9]+]]:sgpr(s32) = G_UADDE [[COPY]], [[COPY1]], [[AND]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:sgpr(s32) = G_AND [[UADDE1]], [[C]]
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[AND1]](s32), [[C]], [[C1]]
+ ; CHECK-NEXT: [[AND2:%[0-9]+]]:sgpr(s32) = G_AND [[UADDE]], [[SELECT]]
+ %0:_(s32) = COPY $sgpr0
+ %1:_(s32) = COPY $sgpr1
+ %2:_(s32) = COPY $sgpr2
+ %3:_(s1) = G_TRUNC %2
+ %4:_(s32), %5:_(s1) = G_UADDE %0, %1, %3
+ %6:_(s32) = G_ZEXT %5
+ %8:_(s32) = G_AND %4, %6
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.s16.mir
index 54ee69f..30c958f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.s16.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.s16.mir
@@ -1,6 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s
-# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=amdgpu-regbankselect,amdgpu-regbanklegalize %s -verify-machineinstrs -o - | FileCheck %s
---
name: add_s16_ss
legalized: true
@@ -19,13 +18,13 @@ body: |
; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC1]](s16)
; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[ANYEXT]], [[ANYEXT1]]
; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(s16) = G_TRUNC [[ADD]](s32)
- ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC2]](s16)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s16) = G_AND [[TRUNC2]], [[TRUNC2]]
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $sgpr1
%2:_(s16) = G_TRUNC %0
%3:_(s16) = G_TRUNC %1
%4:_(s16) = G_ADD %2, %3
- S_ENDPGM 0, implicit %4
+ %5:_(s16) = G_AND %4, %4
...
---
@@ -44,13 +43,13 @@ body: |
; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32)
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s16) = COPY [[TRUNC]](s16)
; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s16) = G_ADD [[COPY2]], [[TRUNC1]]
- ; CHECK-NEXT: S_ENDPGM 0, implicit [[ADD]](s16)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s16) = G_AND [[ADD]], [[ADD]]
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $vgpr0
%2:_(s16) = G_TRUNC %0
%3:_(s16) = G_TRUNC %1
%4:_(s16) = G_ADD %2, %3
- S_ENDPGM 0, implicit %4
+ %5:_(s16) = G_AND %4, %4
...
---
@@ -69,13 +68,13 @@ body: |
; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32)
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s16) = COPY [[TRUNC1]](s16)
; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s16) = G_ADD [[TRUNC]], [[COPY2]]
- ; CHECK-NEXT: S_ENDPGM 0, implicit [[ADD]](s16)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s16) = G_AND [[ADD]], [[ADD]]
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $sgpr0
%2:_(s16) = G_TRUNC %0
%3:_(s16) = G_TRUNC %1
%4:_(s16) = G_ADD %2, %3
- S_ENDPGM 0, implicit %4
+ %5:_(s16) = G_AND %4, %4
...
---
@@ -93,11 +92,11 @@ body: |
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32)
; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s16) = G_ADD [[TRUNC]], [[TRUNC1]]
- ; CHECK-NEXT: S_ENDPGM 0, implicit [[ADD]](s16)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s16) = G_AND [[ADD]], [[ADD]]
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s16) = G_TRUNC %0
%3:_(s16) = G_TRUNC %1
%4:_(s16) = G_ADD %2, %3
- S_ENDPGM 0, implicit %4
+ %5:_(s16) = G_AND %4, %4
...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.v2s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.v2s16.mir
index 97018fa..01eb391 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.v2s16.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.v2s16.mir
@@ -1,6 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s
-# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=amdgpu-regbankselect,amdgpu-regbanklegalize %s -verify-machineinstrs -o - | FileCheck %s
---
name: add_v2s16_ss
@@ -18,16 +17,19 @@ body: |
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16
; CHECK-NEXT: [[LSHR:%[0-9]+]]:sgpr(s32) = G_LSHR [[BITCAST]], [[C]](s32)
; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(s32) = G_BITCAST [[COPY1]](<2 x s16>)
- ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16
- ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:sgpr(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
+ ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:sgpr(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[BITCAST]], [[BITCAST1]]
; CHECK-NEXT: [[ADD1:%[0-9]+]]:sgpr(s32) = G_ADD [[LSHR]], [[LSHR1]]
; CHECK-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ADD]](s32), [[ADD1]](s32)
- ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s16) = G_CONSTANT i16 255
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR [[C1]](s16), [[C1]](s16)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(<2 x s16>) = G_AND [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR]]
%0:_(<2 x s16>) = COPY $sgpr0
%1:_(<2 x s16>) = COPY $sgpr1
%2:_(<2 x s16>) = G_ADD %0, %1
- S_ENDPGM 0, implicit %2
+ %3:_(s16) = G_CONSTANT i16 255
+ %4:_(<2 x s16>) = G_BUILD_VECTOR %3, %3
+ %5:_(<2 x s16>) = G_AND %2, %4
...
---
@@ -44,11 +46,11 @@ body: |
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>)
; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(<2 x s16>) = G_ADD [[COPY2]], [[COPY1]]
- ; CHECK-NEXT: S_ENDPGM 0, implicit [[ADD]](<2 x s16>)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(<2 x s16>) = G_AND [[ADD]], [[ADD]]
%0:_(<2 x s16>) = COPY $sgpr0
%1:_(<2 x s16>) = COPY $vgpr0
%2:_(<2 x s16>) = G_ADD %0, %1
- S_ENDPGM 0, implicit %2
+ %3:_(<2 x s16>) = G_AND %2, %2
...
---
@@ -65,9 +67,11 @@ body: |
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>)
; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(<2 x s16>) = G_ADD [[COPY]], [[COPY2]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(<2 x s16>) = G_AND [[ADD]], [[ADD]]
%0:_(<2 x s16>) = COPY $vgpr0
%1:_(<2 x s16>) = COPY $sgpr0
%2:_(<2 x s16>) = G_ADD %0, %1
+ %3:_(<2 x s16>) = G_AND %2, %2
...
---
@@ -83,9 +87,9 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1
; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(<2 x s16>) = G_ADD [[COPY]], [[COPY1]]
- ; CHECK-NEXT: S_ENDPGM 0, implicit [[ADD]](<2 x s16>)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(<2 x s16>) = G_AND [[ADD]], [[ADD]]
%0:_(<2 x s16>) = COPY $vgpr0
%1:_(<2 x s16>) = COPY $vgpr1
%2:_(<2 x s16>) = G_ADD %0, %1
- S_ENDPGM 0, implicit %2
+ %3:_(<2 x s16>) = G_AND %2, %2
...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sext.mir
index 7378c93..e0e783e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sext.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sext.mir
@@ -77,10 +77,14 @@ body: |
; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[AND]](s32), [[C1]], [[C2]]
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[SELECT]](s32)
+ ; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(s16) = G_CONSTANT i16 255
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:sgpr(s16) = G_AND [[TRUNC]], [[C3]]
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $sgpr1
%2:_(s1) = G_ICMP intpred(eq), %0, %1
%3:_(s16) = G_SEXT %2
+ %4:_(s16) = G_CONSTANT i16 255
+ %5:_(s16) = G_AND %3, %4
...
---
@@ -215,9 +219,13 @@ body: |
; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[AND]](s32), [[C1]], [[C2]]
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[SELECT]](s32)
+ ; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(s16) = G_CONSTANT i16 255
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:sgpr(s16) = G_AND [[TRUNC]], [[C3]]
%0:_(s32) = COPY $sgpr0
%1:_(s1) = G_TRUNC %0
%2:_(s16) = G_SEXT %1
+ %3:_(s16) = G_CONSTANT i16 255
+ %4:_(s16) = G_AND %2, %3
...
---
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sub.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sub.mir
index b0199d3..e3c01c0 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sub.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sub.mir
@@ -1,5 +1,107 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass="amdgpu-regbankselect,amdgpu-regbanklegalize" %s -o - | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=amdgpu-regbankselect,amdgpu-regbanklegalize %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=amdgpu-regbankselect,amdgpu-regbanklegalize %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s
+
+---
+name: sub_s16_ss
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1
+ ; CHECK-LABEL: name: sub_s16_ss
+ ; CHECK: liveins: $sgpr0, $sgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
+ ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32)
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC]](s16)
+ ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC1]](s16)
+ ; CHECK-NEXT: [[SUB:%[0-9]+]]:sgpr(s32) = G_SUB [[ANYEXT]], [[ANYEXT1]]
+ ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(s16) = G_TRUNC [[SUB]](s32)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s16) = G_AND [[TRUNC2]], [[TRUNC2]]
+ %0:_(s32) = COPY $sgpr0
+ %1:_(s32) = COPY $sgpr1
+ %2:_(s16) = G_TRUNC %0
+ %3:_(s16) = G_TRUNC %1
+ %4:_(s16) = G_SUB %2, %3
+ %6:_(s16) = G_AND %4, %4
+...
+
+---
+name: sub_s16_sv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $vgpr0
+ ; CHECK-LABEL: name: sub_s16_sv
+ ; CHECK: liveins: $sgpr0, $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
+ ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32)
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s16) = COPY [[TRUNC]](s16)
+ ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(s16) = G_SUB [[COPY2]], [[TRUNC1]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s16) = G_AND [[SUB]], [[SUB]]
+ %0:_(s32) = COPY $sgpr0
+ %1:_(s32) = COPY $vgpr0
+ %2:_(s16) = G_TRUNC %0
+ %3:_(s16) = G_TRUNC %1
+ %4:_(s16) = G_SUB %2, %3
+ %6:_(s16) = G_AND %4, %4
+...
+
+---
+name: sub_s16_vs
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $vgpr0
+ ; CHECK-LABEL: name: sub_s16_vs
+ ; CHECK: liveins: $sgpr0, $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
+ ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32)
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s16) = COPY [[TRUNC1]](s16)
+ ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(s16) = G_SUB [[TRUNC]], [[COPY2]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s16) = G_AND [[SUB]], [[SUB]]
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $sgpr0
+ %2:_(s16) = G_TRUNC %0
+ %3:_(s16) = G_TRUNC %1
+ %4:_(s16) = G_SUB %2, %3
+ %6:_(s16) = G_AND %4, %4
+...
+
+---
+name: sub_s16_vv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+ ; CHECK-LABEL: name: sub_s16_vv
+ ; CHECK: liveins: $vgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
+ ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32)
+ ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(s16) = G_SUB [[TRUNC]], [[TRUNC1]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s16) = G_AND [[SUB]], [[SUB]]
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(s16) = G_TRUNC %0
+ %3:_(s16) = G_TRUNC %1
+ %4:_(s16) = G_SUB %2, %3
+ %6:_(s16) = G_AND %4, %4
+...
---
name: sub_s32_ss
@@ -14,9 +116,11 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
; CHECK-NEXT: [[SUB:%[0-9]+]]:sgpr(s32) = G_SUB [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[SUB]], [[SUB]]
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $sgpr1
%2:_(s32) = G_SUB %0, %1
+ %4:_(s32) = G_AND %2, %2
...
---
@@ -33,9 +137,11 @@ body: |
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(s32) = G_SUB [[COPY2]], [[COPY1]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[SUB]], [[SUB]]
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $vgpr0
%2:_(s32) = G_SUB %0, %1
+ %4:_(s32) = G_AND %2, %2
...
---
@@ -52,9 +158,11 @@ body: |
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(s32) = G_SUB [[COPY]], [[COPY2]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[SUB]], [[SUB]]
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $sgpr0
%2:_(s32) = G_SUB %0, %1
+ %4:_(s32) = G_AND %2, %2
...
---
@@ -70,7 +178,376 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(s32) = G_SUB [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[SUB]], [[SUB]]
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s32) = G_SUB %0, %1
+ %4:_(s32) = G_AND %2, %2
+...
+
+---
+name: sub_v2s16_ss
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1
+ ; CHECK-LABEL: name: sub_v2s16_ss
+ ; CHECK: liveins: $sgpr0, $sgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr1
+ ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(s32) = G_BITCAST [[COPY]](<2 x s16>)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16
+ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:sgpr(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+ ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(s32) = G_BITCAST [[COPY1]](<2 x s16>)
+ ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:sgpr(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+ ; CHECK-NEXT: [[SUB:%[0-9]+]]:sgpr(s32) = G_SUB [[BITCAST]], [[BITCAST1]]
+ ; CHECK-NEXT: [[SUB1:%[0-9]+]]:sgpr(s32) = G_SUB [[LSHR]], [[LSHR1]]
+ ; CHECK-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[SUB]](s32), [[SUB1]](s32)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(<2 x s16>) = G_AND [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC]]
+ %0:_(<2 x s16>) = COPY $sgpr0
+ %1:_(<2 x s16>) = COPY $sgpr1
+ %2:_(<2 x s16>) = G_SUB %0, %1
+ %5:_(<2 x s16>) = G_AND %2, %2
+...
+
+---
+name: sub_v2s16_sv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $vgpr0
+ ; CHECK-LABEL: name: sub_v2s16_sv
+ ; CHECK: liveins: $sgpr0, $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>)
+ ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(<2 x s16>) = G_SUB [[COPY2]], [[COPY1]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(<2 x s16>) = G_AND [[SUB]], [[SUB]]
+ %0:_(<2 x s16>) = COPY $sgpr0
+ %1:_(<2 x s16>) = COPY $vgpr0
+ %2:_(<2 x s16>) = G_SUB %0, %1
+ %5:_(<2 x s16>) = G_AND %2, %2
+...
+
+---
+name: sub_v2s16_vs
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $vgpr0
+ ; CHECK-LABEL: name: sub_v2s16_vs
+ ; CHECK: liveins: $sgpr0, $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>)
+ ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(<2 x s16>) = G_SUB [[COPY]], [[COPY2]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(<2 x s16>) = G_AND [[SUB]], [[SUB]]
+ %0:_(<2 x s16>) = COPY $vgpr0
+ %1:_(<2 x s16>) = COPY $sgpr0
+ %2:_(<2 x s16>) = G_SUB %0, %1
+ %5:_(<2 x s16>) = G_AND %2, %2
+...
+
+---
+name: sub_v2s16_vv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+ ; CHECK-LABEL: name: sub_v2s16_vv
+ ; CHECK: liveins: $vgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1
+ ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(<2 x s16>) = G_SUB [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(<2 x s16>) = G_AND [[SUB]], [[SUB]]
+ %0:_(<2 x s16>) = COPY $vgpr0
+ %1:_(<2 x s16>) = COPY $vgpr1
+ %2:_(<2 x s16>) = G_SUB %0, %1
+ %5:_(<2 x s16>) = G_AND %2, %2
+...
+
+---
+name: sub_s64_ss
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1, $sgpr0_sgpr1
+ ; CHECK-LABEL: name: sub_s64_ss
+ ; CHECK: liveins: $sgpr0_sgpr1, $sgpr0_sgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1
+ ; CHECK-NEXT: [[SUB:%[0-9]+]]:sgpr(s64) = G_SUB [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s64) = G_AND [[SUB]], [[SUB]]
+ %0:_(s64) = COPY $sgpr0_sgpr1
+ %1:_(s64) = COPY $sgpr0_sgpr1
+ %2:_(s64) = G_SUB %0, %1
+ %4:_(s64) = G_AND %2, %2
+...
+
+---
+name: sub_s64_sv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1, $vgpr0_vgpr1
+ ; CHECK-LABEL: name: sub_s64_sv
+ ; CHECK: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64)
+ ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(s64) = G_SUB [[COPY2]], [[COPY1]]
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[SUB]](s64)
+ ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[SUB]](s64)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]]
+ ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32)
+ %0:_(s64) = COPY $sgpr0_sgpr1
+ %1:_(s64) = COPY $vgpr0_vgpr1
+ %2:_(s64) = G_SUB %0, %1
+ %4:_(s64) = G_AND %2, %2
+...
+
+---
+name: sub_s64_vs
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1, $vgpr0_vgpr1
+ ; CHECK-LABEL: name: sub_s64_vs
+ ; CHECK: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY1]](s64)
+ ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(s64) = G_SUB [[COPY]], [[COPY2]]
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[SUB]](s64)
+ ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[SUB]](s64)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]]
+ ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32)
+ %0:_(s64) = COPY $vgpr0_vgpr1
+ %1:_(s64) = COPY $sgpr0_sgpr1
+ %2:_(s64) = G_SUB %0, %1
+ %4:_(s64) = G_AND %2, %2
+...
+
+---
+name: sub_s64_vv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+ ; CHECK-LABEL: name: sub_s64_vv
+ ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
+ ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(s64) = G_SUB [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[SUB]](s64)
+ ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[SUB]](s64)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]]
+ ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32)
+ %0:_(s64) = COPY $vgpr0_vgpr1
+ %1:_(s64) = COPY $vgpr2_vgpr3
+ %2:_(s64) = G_SUB %0, %1
+ %4:_(s64) = G_AND %2, %2
+...
+
+---
+name: usubo_s32_ss
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1
+ ; CHECK-LABEL: name: usubo_s32_ss
+ ; CHECK: liveins: $sgpr0, $sgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+ ; CHECK-NEXT: [[USUBO:%[0-9]+]]:sgpr(s32), [[USUBO1:%[0-9]+]]:sgpr(s32) = G_USUBO [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[USUBO]], [[USUBO]]
+ %0:_(s32) = COPY $sgpr0
+ %1:_(s32) = COPY $sgpr1
+ %2:_(s32), %3:_(s1) = G_USUBO %0, %1
+ %5:_(s32) = G_AND %2, %2
+...
+
+---
+name: usubo_s32_sv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $vgpr1
+ ; CHECK-LABEL: name: usubo_s32_sv
+ ; CHECK: liveins: $sgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[USUBO:%[0-9]+]]:vgpr(s32), [[USUBO1:%[0-9]+]]:vcc(s1) = G_USUBO [[COPY2]], [[COPY1]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[USUBO]], [[USUBO]]
+ %0:_(s32) = COPY $sgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(s32), %3:_(s1) = G_USUBO %0, %1
+ %5:_(s32) = G_AND %2, %2
+...
+
+---
+name: usubo_s32_vs
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0, $sgpr1
+ ; CHECK-LABEL: name: usubo_s32_vs
+ ; CHECK: liveins: $vgpr0, $sgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
+ ; CHECK-NEXT: [[USUBO:%[0-9]+]]:vgpr(s32), [[USUBO1:%[0-9]+]]:vcc(s1) = G_USUBO [[COPY]], [[COPY2]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[USUBO]], [[USUBO]]
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $sgpr1
+ %2:_(s32), %3:_(s1) = G_USUBO %0, %1
+ %5:_(s32) = G_AND %2, %2
+...
+
+---
+name: usubo_s32_vv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+ ; CHECK-LABEL: name: usubo_s32_vv
+ ; CHECK: liveins: $vgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[USUBO:%[0-9]+]]:vgpr(s32), [[USUBO1:%[0-9]+]]:vcc(s1) = G_USUBO [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[USUBO]], [[USUBO]]
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(s32), %3:_(s1) = G_USUBO %0, %1
+ %5:_(s32) = G_AND %2, %2
+...
+
+---
+name: usube_s32_ss
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1, $sgpr2
+ ; CHECK-LABEL: name: usube_s32_ss
+ ; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
+ ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[COPY2]], [[C]]
+ ; CHECK-NEXT: [[USUBE:%[0-9]+]]:sgpr(s32), [[USUBE1:%[0-9]+]]:sgpr(s32) = G_USUBE [[COPY]], [[COPY1]], [[AND]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:sgpr(s32) = G_AND [[USUBE]], [[USUBE]]
+ %0:_(s32) = COPY $sgpr0
+ %1:_(s32) = COPY $sgpr1
+ %2:_(s32) = COPY $sgpr2
+ %3:_(s1) = G_TRUNC %2
+ %4:_(s32), %5:_(s1) = G_USUBE %0, %1, %3
+ %7:_(s32) = G_AND %4, %4
+...
+
+---
+name: usube_s32_sv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $vgpr1, $sgpr2
+ ; CHECK-LABEL: name: usube_s32_sv
+ ; CHECK: liveins: $sgpr0, $vgpr1, $sgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[AMDGPU_COPY_VCC_SCC:%[0-9]+]]:vcc(s1) = G_AMDGPU_COPY_VCC_SCC [[COPY2]](s32)
+ ; CHECK-NEXT: [[USUBE:%[0-9]+]]:vgpr(s32), [[USUBE1:%[0-9]+]]:vcc(s1) = G_USUBE [[COPY3]], [[COPY1]], [[AMDGPU_COPY_VCC_SCC]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[USUBE]], [[USUBE]]
+ %0:_(s32) = COPY $sgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(s32) = COPY $sgpr2
+ %3:_(s1) = G_TRUNC %2
+ %4:_(s32), %5:_(s1) = G_USUBE %0, %1, %3
+ %7:_(s32) = G_AND %4, %4
+...
+
+---
+name: usube_s32_vs
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0, $sgpr1, $sgpr2
+ ; CHECK-LABEL: name: usube_s32_vs
+ ; CHECK: liveins: $vgpr0, $sgpr1, $sgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
+ ; CHECK-NEXT: [[AMDGPU_COPY_VCC_SCC:%[0-9]+]]:vcc(s1) = G_AMDGPU_COPY_VCC_SCC [[COPY2]](s32)
+ ; CHECK-NEXT: [[USUBE:%[0-9]+]]:vgpr(s32), [[USUBE1:%[0-9]+]]:vcc(s1) = G_USUBE [[COPY]], [[COPY3]], [[AMDGPU_COPY_VCC_SCC]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[USUBE]], [[USUBE]]
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $sgpr1
+ %2:_(s32) = COPY $sgpr2
+ %3:_(s1) = G_TRUNC %2
+ %4:_(s32), %5:_(s1) = G_USUBE %0, %1, %3
+ %7:_(s32) = G_AND %4, %4
+...
+
+---
+name: usube_s32_vv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2
+ ; CHECK-LABEL: name: usube_s32_vv
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
+ ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[COPY2]], [[C]]
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[AND]](s32), [[C1]]
+ ; CHECK-NEXT: [[USUBE:%[0-9]+]]:vgpr(s32), [[USUBE1:%[0-9]+]]:vcc(s1) = G_USUBE [[COPY]], [[COPY1]], [[ICMP]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[USUBE]], [[USUBE]]
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(s32) = COPY $vgpr2
+ %3:_(s1) = G_TRUNC %2
+ %4:_(s32), %5:_(s1) = G_USUBE %0, %1, %3
+ %7:_(s32) = G_AND %4, %4
...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-zext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-zext.mir
index 088c20a3..d4baa5f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-zext.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-zext.mir
@@ -73,10 +73,14 @@ body: |
; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[AND]](s32), [[C]], [[C1]]
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[SELECT]](s32)
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s16) = G_CONSTANT i16 255
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:sgpr(s16) = G_AND [[TRUNC]], [[C2]]
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $sgpr1
%2:_(s1) = G_ICMP intpred(eq), %0, %1
%3:_(s16) = G_ZEXT %2
+ %4:_(s16) = G_CONSTANT i16 255
+ %5:_(s16) = G_AND %3, %4
...
---
@@ -209,9 +213,13 @@ body: |
; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[AND]](s32), [[C]], [[C1]]
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[SELECT]](s32)
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s16) = G_CONSTANT i16 255
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:sgpr(s16) = G_AND [[TRUNC]], [[C2]]
%0:_(s32) = COPY $sgpr0
%1:_(s1) = G_TRUNC %0
%2:_(s16) = G_ZEXT %1
+ %3:_(s16) = G_CONSTANT i16 255
+ %4:_(s16) = G_AND %2, %3
...
---
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sub.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sub.ll
new file mode 100644
index 0000000..8b5958d
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sub.ll
@@ -0,0 +1,535 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=hawaii < %s | FileCheck -check-prefix=GFX7 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=fiji < %s | FileCheck -check-prefix=GFX8 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefix=GFX11 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefix=GFX12 %s
+
+define i16 @s_sub_i16(i16 inreg %a, i16 inreg %b) {
+; GFX7-LABEL: s_sub_i16:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: s_sub_i32 s4, s16, s17
+; GFX7-NEXT: v_mov_b32_e32 v0, s4
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: s_sub_i16:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: s_sub_i32 s4, s16, s17
+; GFX9-NEXT: v_mov_b32_e32 v0, s4
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: s_sub_i16:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: s_sub_i32 s4, s16, s17
+; GFX8-NEXT: v_mov_b32_e32 v0, s4
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: s_sub_i16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_sub_i32 s4, s16, s17
+; GFX10-NEXT: v_mov_b32_e32 v0, s4
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: s_sub_i16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_sub_i32 s0, s0, s1
+; GFX11-NEXT: v_mov_b32_e32 v0, s0
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: s_sub_i16:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: s_sub_co_i32 s0, s0, s1
+; GFX12-NEXT: s_wait_alu 0xfffe
+; GFX12-NEXT: v_mov_b32_e32 v0, s0
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %c = sub i16 %a, %b
+ ret i16 %c
+}
+
+define i16 @v_sub_i16(i16 %a, i16 %b) {
+; GFX7-LABEL: v_sub_i16:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_sub_i32_e32 v0, vcc, v0, v1
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_sub_i16:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_sub_u16_e32 v0, v0, v1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_sub_i16:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_sub_u16_e32 v0, v0, v1
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: v_sub_i16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_sub_nc_u16 v0, v0, v1
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_sub_i16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_sub_nc_u16 v0.l, v0.l, v1.l
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: v_sub_i16:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_sub_nc_u16 v0, v0, v1
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %c = sub i16 %a, %b
+ ret i16 %c
+}
+
+define i32 @s_sub_i32(i32 inreg %a, i32 inreg %b) {
+; GFX7-LABEL: s_sub_i32:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: s_sub_i32 s4, s16, s17
+; GFX7-NEXT: v_mov_b32_e32 v0, s4
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: s_sub_i32:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: s_sub_i32 s4, s16, s17
+; GFX9-NEXT: v_mov_b32_e32 v0, s4
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: s_sub_i32:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: s_sub_i32 s4, s16, s17
+; GFX8-NEXT: v_mov_b32_e32 v0, s4
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: s_sub_i32:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_sub_i32 s4, s16, s17
+; GFX10-NEXT: v_mov_b32_e32 v0, s4
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: s_sub_i32:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_sub_i32 s0, s0, s1
+; GFX11-NEXT: v_mov_b32_e32 v0, s0
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: s_sub_i32:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: s_sub_co_i32 s0, s0, s1
+; GFX12-NEXT: s_wait_alu 0xfffe
+; GFX12-NEXT: v_mov_b32_e32 v0, s0
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %c = sub i32 %a, %b
+ ret i32 %c
+}
+
+define i32 @v_sub_i32(i32 %a, i32 %b) {
+; GFX7-LABEL: v_sub_i32:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_sub_i32_e32 v0, vcc, v0, v1
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_sub_i32:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_sub_u32_e32 v0, v0, v1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_sub_i32:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_sub_u32_e32 v0, vcc, v0, v1
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: v_sub_i32:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_sub_nc_u32_e32 v0, v0, v1
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_sub_i32:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_sub_nc_u32_e32 v0, v0, v1
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: v_sub_i32:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_sub_nc_u32_e32 v0, v0, v1
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %c = sub i32 %a, %b
+ ret i32 %c
+}
+
+; TODO: Add test for s_sub_v2i16. Instruction selector currently fails
+; to handle G_UNMERGE_VALUES.
+
+define <2 x i16> @v_sub_v2i16(<2 x i16> %a, <2 x i16> %b) {
+; GFX7-LABEL: v_sub_v2i16:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
+; GFX7-NEXT: v_sub_i32_e32 v1, vcc, v1, v3
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_sub_v2i16:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_pk_sub_i16 v0, v0, v1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_sub_v2i16:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_sub_u16_e32 v2, v0, v1
+; GFX8-NEXT: v_sub_u16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-NEXT: v_or_b32_e32 v0, v2, v0
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: v_sub_v2i16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_pk_sub_i16 v0, v0, v1
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_sub_v2i16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_pk_sub_i16 v0, v0, v1
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: v_sub_v2i16:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_pk_sub_i16 v0, v0, v1
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %c = sub <2 x i16> %a, %b
+ ret <2 x i16> %c
+}
+
+define i64 @s_sub_i64(i64 inreg %a, i64 inreg %b) {
+; GFX7-LABEL: s_sub_i64:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: s_sub_u32 s4, s16, s18
+; GFX7-NEXT: s_subb_u32 s5, s17, s19
+; GFX7-NEXT: v_mov_b32_e32 v0, s4
+; GFX7-NEXT: v_mov_b32_e32 v1, s5
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: s_sub_i64:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: s_sub_u32 s4, s16, s18
+; GFX9-NEXT: s_subb_u32 s5, s17, s19
+; GFX9-NEXT: v_mov_b32_e32 v0, s4
+; GFX9-NEXT: v_mov_b32_e32 v1, s5
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: s_sub_i64:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: s_sub_u32 s4, s16, s18
+; GFX8-NEXT: s_subb_u32 s5, s17, s19
+; GFX8-NEXT: v_mov_b32_e32 v0, s4
+; GFX8-NEXT: v_mov_b32_e32 v1, s5
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: s_sub_i64:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_sub_u32 s4, s16, s18
+; GFX10-NEXT: s_subb_u32 s5, s17, s19
+; GFX10-NEXT: v_mov_b32_e32 v0, s4
+; GFX10-NEXT: v_mov_b32_e32 v1, s5
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: s_sub_i64:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_sub_u32 s0, s0, s2
+; GFX11-NEXT: s_subb_u32 s1, s1, s3
+; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: s_sub_i64:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: s_sub_nc_u64 s[0:1], s[0:1], s[2:3]
+; GFX12-NEXT: s_wait_alu 0xfffe
+; GFX12-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %c = sub i64 %a, %b
+ ret i64 %c
+}
+
+define i64 @v_sub_i64(i64 %a, i64 %b) {
+; GFX7-LABEL: v_sub_i64:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
+; GFX7-NEXT: v_subb_u32_e32 v1, vcc, v1, v3, vcc
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_sub_i64:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v2
+; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_sub_i64:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_sub_u32_e32 v0, vcc, v0, v2
+; GFX8-NEXT: v_subb_u32_e32 v1, vcc, v1, v3, vcc
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: v_sub_i64:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v2
+; GFX10-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_sub_i64:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v2
+; GFX11-NEXT: v_sub_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: v_sub_i64:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v2
+; GFX12-NEXT: s_wait_alu 0xfffd
+; GFX12-NEXT: v_sub_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %c = sub i64 %a, %b
+ ret i64 %c
+}
+
+define void @s_usubo_usube(i64 inreg %a, i64 inreg %b, ptr addrspace(1) %res, ptr addrspace(1) %carry) {
+; GFX7-LABEL: s_usubo_usube:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: s_sub_u32 s4, s16, s18
+; GFX7-NEXT: s_subb_u32 s5, s17, s19
+; GFX7-NEXT: v_mov_b32_e32 v4, s4
+; GFX7-NEXT: s_mov_b32 s6, 0
+; GFX7-NEXT: s_cselect_b32 s8, 1, 0
+; GFX7-NEXT: v_mov_b32_e32 v5, s5
+; GFX7-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-NEXT: buffer_store_dwordx2 v[4:5], v[0:1], s[4:7], 0 addr64
+; GFX7-NEXT: v_mov_b32_e32 v0, s8
+; GFX7-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: s_usubo_usube:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: s_sub_u32 s4, s16, s18
+; GFX9-NEXT: s_subb_u32 s5, s17, s19
+; GFX9-NEXT: v_mov_b32_e32 v4, s4
+; GFX9-NEXT: s_cselect_b32 s6, 1, 0
+; GFX9-NEXT: v_mov_b32_e32 v5, s5
+; GFX9-NEXT: global_store_dwordx2 v[0:1], v[4:5], off
+; GFX9-NEXT: v_mov_b32_e32 v0, s6
+; GFX9-NEXT: global_store_dword v[2:3], v0, off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: s_usubo_usube:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: s_sub_u32 s4, s16, s18
+; GFX8-NEXT: s_subb_u32 s5, s17, s19
+; GFX8-NEXT: v_mov_b32_e32 v4, s4
+; GFX8-NEXT: s_cselect_b32 s6, 1, 0
+; GFX8-NEXT: v_mov_b32_e32 v5, s5
+; GFX8-NEXT: flat_store_dwordx2 v[0:1], v[4:5]
+; GFX8-NEXT: v_mov_b32_e32 v0, s6
+; GFX8-NEXT: flat_store_dword v[2:3], v0
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: s_usubo_usube:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_sub_u32 s4, s16, s18
+; GFX10-NEXT: s_subb_u32 s5, s17, s19
+; GFX10-NEXT: s_cselect_b32 s6, 1, 0
+; GFX10-NEXT: v_mov_b32_e32 v4, s4
+; GFX10-NEXT: v_mov_b32_e32 v5, s5
+; GFX10-NEXT: v_mov_b32_e32 v6, s6
+; GFX10-NEXT: global_store_dwordx2 v[0:1], v[4:5], off
+; GFX10-NEXT: global_store_dword v[2:3], v6, off
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: s_usubo_usube:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_sub_u32 s0, s0, s2
+; GFX11-NEXT: s_subb_u32 s1, s1, s3
+; GFX11-NEXT: s_cselect_b32 s2, 1, 0
+; GFX11-NEXT: v_dual_mov_b32 v5, s1 :: v_dual_mov_b32 v4, s0
+; GFX11-NEXT: v_mov_b32_e32 v6, s2
+; GFX11-NEXT: global_store_b64 v[0:1], v[4:5], off
+; GFX11-NEXT: global_store_b32 v[2:3], v6, off
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: s_usubo_usube:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: s_sub_co_u32 s0, s0, s2
+; GFX12-NEXT: s_sub_co_ci_u32 s1, s1, s3
+; GFX12-NEXT: s_cselect_b32 s2, 1, 0
+; GFX12-NEXT: s_wait_alu 0xfffe
+; GFX12-NEXT: v_dual_mov_b32 v5, s1 :: v_dual_mov_b32 v4, s0
+; GFX12-NEXT: v_mov_b32_e32 v6, s2
+; GFX12-NEXT: global_store_b64 v[0:1], v[4:5], off
+; GFX12-NEXT: global_store_b32 v[2:3], v6, off
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %usubo = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %b)
+ %sub = extractvalue {i64, i1} %usubo, 0
+ %of = extractvalue {i64, i1} %usubo, 1
+ %of32 = select i1 %of, i32 1, i32 0
+ store i64 %sub, ptr addrspace(1) %res
+ store i32 %of32, ptr addrspace(1) %carry
+ ret void
+}
+
+define void @v_usubo_usube(i64 %a, i64 %b, ptr addrspace(1) %res, ptr addrspace(1) %carry) {
+; GFX7-LABEL: v_usubo_usube:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
+; GFX7-NEXT: v_subb_u32_e32 v1, vcc, v1, v3, vcc
+; GFX7-NEXT: s_mov_b32 s6, 0
+; GFX7-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; GFX7-NEXT: buffer_store_dwordx2 v[0:1], v[4:5], s[4:7], 0 addr64
+; GFX7-NEXT: buffer_store_dword v2, v[6:7], s[4:7], 0 addr64
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_usubo_usube:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v2
+; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; GFX9-NEXT: global_store_dwordx2 v[4:5], v[0:1], off
+; GFX9-NEXT: global_store_dword v[6:7], v2, off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_usubo_usube:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_sub_u32_e32 v0, vcc, v0, v2
+; GFX8-NEXT: v_subb_u32_e32 v1, vcc, v1, v3, vcc
+; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; GFX8-NEXT: flat_store_dwordx2 v[4:5], v[0:1]
+; GFX8-NEXT: flat_store_dword v[6:7], v2
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: v_usubo_usube:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v2
+; GFX10-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo
+; GFX10-NEXT: global_store_dwordx2 v[4:5], v[0:1], off
+; GFX10-NEXT: global_store_dword v[6:7], v2, off
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_usubo_usube:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v2
+; GFX11-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo
+; GFX11-NEXT: global_store_b64 v[4:5], v[0:1], off
+; GFX11-NEXT: global_store_b32 v[6:7], v2, off
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: v_usubo_usube:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v2
+; GFX12-NEXT: s_wait_alu 0xfffd
+; GFX12-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX12-NEXT: s_wait_alu 0xfffd
+; GFX12-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo
+; GFX12-NEXT: global_store_b64 v[4:5], v[0:1], off
+; GFX12-NEXT: global_store_b32 v[6:7], v2, off
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %usubo = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %b)
+ %sub = extractvalue {i64, i1} %usubo, 0
+ %of = extractvalue {i64, i1} %usubo, 1
+ %of32 = select i1 %of, i32 1, i32 0
+ store i64 %sub, ptr addrspace(1) %res
+ store i32 %of32, ptr addrspace(1) %carry
+ ret void
+}
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-miscellaneous-uniform-intrinsic.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-miscellaneous-uniform-intrinsic.ll
new file mode 100644
index 0000000..34d4c51
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-miscellaneous-uniform-intrinsic.ll
@@ -0,0 +1,173 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -o - %s | FileCheck %s
+define amdgpu_kernel void @readfirstlane_with_readfirstlane(ptr addrspace(1) %out) {
+; CHECK-LABEL: readfirstlane_with_readfirstlane:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; CHECK-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 5
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: global_store_b32 v0, v1, s[0:1]
+; CHECK-NEXT: s_endpgm
+ %v1 = call i32 @llvm.amdgcn.readfirstlane(i32 5)
+ %v2 = call i32 @llvm.amdgcn.readfirstlane(i32 %v1)
+ store i32 %v2, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_kernel void @readfirstlane_with_readlane(ptr addrspace(1) %out) {
+; CHECK-LABEL: readfirstlane_with_readlane:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; CHECK-NEXT: v_bfe_u32 v1, v0, 10, 10
+; CHECK-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; CHECK-NEXT: v_readfirstlane_b32 s2, v1
+; CHECK-NEXT: v_readlane_b32 s2, v0, s2
+; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; CHECK-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: global_store_b32 v0, v1, s[0:1]
+; CHECK-NEXT: s_endpgm
+ %tidx = call i32 @llvm.amdgcn.workitem.id.x()
+ %tidy = call i32 @llvm.amdgcn.workitem.id.y()
+ %v1 = call i32 @llvm.amdgcn.readlane(i32 %tidx, i32 %tidy)
+ %v2 = call i32 @llvm.amdgcn.readfirstlane(i32 %v1)
+ store i32 %v2, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_kernel void @readlane_with_firstlane(ptr addrspace(1) %out) {
+; CHECK-LABEL: readlane_with_firstlane:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; CHECK-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; CHECK-NEXT: v_readfirstlane_b32 s2, v0
+; CHECK-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: global_store_b32 v0, v1, s[0:1]
+; CHECK-NEXT: s_endpgm
+ %tidx = call i32 @llvm.amdgcn.workitem.id.x()
+ %v1 = call i32 @llvm.amdgcn.readfirstlane(i32 %tidx)
+ %v2 = call i32 @llvm.amdgcn.readlane(i32 %v1, i32 3)
+ store i32 %v2, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_kernel void @readlane_readlane(ptr addrspace(1) %out) {
+; CHECK-LABEL: readlane_readlane:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; CHECK-NEXT: v_bfe_u32 v1, v0, 10, 10
+; CHECK-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; CHECK-NEXT: v_readfirstlane_b32 s2, v1
+; CHECK-NEXT: v_readlane_b32 s2, v0, s2
+; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; CHECK-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: global_store_b32 v0, v1, s[0:1]
+; CHECK-NEXT: s_endpgm
+ %tidx = call i32 @llvm.amdgcn.workitem.id.x()
+ %tidy = call i32 @llvm.amdgcn.workitem.id.y()
+ %v1 = call i32 @llvm.amdgcn.readlane(i32 %tidx, i32 %tidy)
+ %v2 = call i32 @llvm.amdgcn.readlane(i32 %v1, i32 2)
+ store i32 %v2, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_kernel void @permlane64_uniform(ptr addrspace(1) %out, i32 %src) {
+; CHECK-LABEL: permlane64_uniform:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_clause 0x1
+; CHECK-NEXT: s_load_b32 s2, s[4:5], 0x8
+; CHECK-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; CHECK-NEXT: global_store_b32 v0, v1, s[0:1]
+; CHECK-NEXT: s_endpgm
+ %v = call i32 @llvm.amdgcn.permlane64(i32 %src)
+ store i32 %v, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_kernel void @permlane64_nonuniform(i32 addrspace(1)* %out) {
+; CHECK-LABEL: permlane64_nonuniform:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; CHECK-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; CHECK-NEXT: v_permlane64_b32 v1, v0
+; CHECK-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: global_store_b32 v0, v1, s[0:1]
+; CHECK-NEXT: s_endpgm
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %v = call i32 @llvm.amdgcn.permlane64(i32 %tid)
+ %out_ptr = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ store i32 %v, i32 addrspace(1)* %out_ptr
+ ret void
+}
+
+define amdgpu_kernel void @permlane64_nonuniform_expression(i32 addrspace(1)* %out) {
+; CHECK-LABEL: permlane64_nonuniform_expression:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; CHECK-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; CHECK-NEXT: v_add_nc_u32_e32 v1, 1, v0
+; CHECK-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; CHECK-NEXT: v_permlane64_b32 v1, v1
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: global_store_b32 v0, v1, s[0:1]
+; CHECK-NEXT: s_endpgm
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %tid2 = add i32 %tid, 1
+ %v = call i32 @llvm.amdgcn.permlane64(i32 %tid2)
+ %out_ptr = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ store i32 %v, i32 addrspace(1)* %out_ptr
+ ret void
+}
+
+define protected amdgpu_kernel void @trivial_waterfall_eq_zero(ptr addrspace(1) %out) {
+; CHECK-LABEL: trivial_waterfall_eq_zero:
+; CHECK: ; %bb.0: ; %entry
+; CHECK-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; CHECK-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 5
+; CHECK-NEXT: s_mov_b32 s2, 0
+; CHECK-NEXT: s_branch .LBB7_2
+; CHECK-NEXT: .LBB7_1: ; %Flow
+; CHECK-NEXT: ; in Loop: Header=BB7_2 Depth=1
+; CHECK-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s2
+; CHECK-NEXT: s_mov_b32 s2, -1
+; CHECK-NEXT: s_cbranch_vccz .LBB7_4
+; CHECK-NEXT: .LBB7_2: ; %while
+; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: s_and_b32 vcc_lo, exec_lo, s2
+; CHECK-NEXT: s_mov_b32 s2, -1
+; CHECK-NEXT: s_cbranch_vccnz .LBB7_1
+; CHECK-NEXT: ; %bb.3: ; %if
+; CHECK-NEXT: ; in Loop: Header=BB7_2 Depth=1
+; CHECK-NEXT: s_mov_b32 s2, 0
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: global_store_b32 v0, v1, s[0:1]
+; CHECK-NEXT: s_branch .LBB7_1
+; CHECK-NEXT: .LBB7_4: ; %exit
+; CHECK-NEXT: s_endpgm
+entry:
+ br label %while
+
+while:
+ %done = phi i1 [ 0, %entry ], [ 1, %if ]
+ %not_done = xor i1 %done, true
+ %ballot = tail call i64 @llvm.amdgcn.ballot.i64(i1 %not_done)
+ %is_done = icmp eq i64 %ballot, 0 ; in this case is_done = !not_done
+ br i1 %is_done, label %exit, label %if
+
+if:
+ store i32 5, ptr addrspace(1) %out
+ br label %while
+
+exit:
+ ret void
+}
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-uniform-waterfall.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-uniform-waterfall.ll
index 33ce278..c962c05 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-uniform-waterfall.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-uniform-waterfall.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -amdgpu-enable-uniform-intrinsic-combine=0 -O3 -S < %s | FileCheck %s -check-prefix=CURRENT-CHECK
; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -passes=amdgpu-uniform-intrinsic-combine -S < %s | FileCheck %s -check-prefix=PASS-CHECK
+; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -amdgpu-uniform-intrinsic-combine -S < %s | FileCheck %s -check-prefix=PASS-CHECK
; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -O3 -S < %s | FileCheck %s -check-prefix=O3-CHECK
define protected amdgpu_kernel void @trivial_waterfall_eq_zero(ptr addrspace(1) %out) {
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-uniform-intrinsic-combine.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-uniform-intrinsic-combine.ll
index a3e42e5..a7e828c 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-uniform-intrinsic-combine.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-uniform-intrinsic-combine.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -amdgpu-enable-uniform-intrinsic-combine=0 -O3 -S < %s | FileCheck %s -check-prefix=CURRENT-CHECK
+; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -amdgpu-uniform-intrinsic-combine -S < %s | FileCheck %s -check-prefix=PASS-CHECK
; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -passes=amdgpu-uniform-intrinsic-combine -S < %s | FileCheck %s -check-prefix=PASS-CHECK
; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -passes=amdgpu-uniform-intrinsic-combine,dce -S < %s | FileCheck %s -check-prefix=DCE-CHECK
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-uniform-temporal-divergence.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-uniform-temporal-divergence.ll
index 2fde3e3..7929261 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-uniform-temporal-divergence.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-uniform-temporal-divergence.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -passes=amdgpu-uniform-intrinsic-combine -S < %s | FileCheck %s -check-prefix=PASS-CHECK
+; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -amdgpu-uniform-intrinsic-combine -S < %s | FileCheck %s -check-prefix=PASS-CHECK
; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -passes=amdgpu-uniform-intrinsic-combine,instcombine,early-cse,simplifycfg -S < %s | FileCheck %s -check-prefix=COMB-CHECK
; This should not be optimized
diff --git a/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-wwm.ll b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-wwm.ll
index db32135..b8f084d 100644
--- a/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-wwm.ll
+++ b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-wwm.ll
@@ -4,24 +4,14 @@
define amdgpu_gs i32 @main() {
; CHECK-LABEL: main:
; CHECK: ; %bb.0: ; %bb
-; CHECK-NEXT: s_bitcmp1_b32 0, 0
; CHECK-NEXT: s_mov_b32 s0, 0
-; CHECK-NEXT: s_cselect_b32 s1, -1, 0
-; CHECK-NEXT: s_or_saveexec_b32 s2, -1
-; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s1
-; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; CHECK-NEXT: v_readfirstlane_b32 s1, v0
-; CHECK-NEXT: s_mov_b32 exec_lo, s2
-; CHECK-NEXT: s_or_b32 s0, s0, s1
-; CHECK-NEXT: s_wait_alu 0xfffe
+; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; CHECK-NEXT: s_bitcmp1_b32 s0, 0
; CHECK-NEXT: s_cselect_b32 s0, -1, 0
-; CHECK-NEXT: s_wait_alu 0xfffe
; CHECK-NEXT: s_xor_b32 s0, s0, -1
-; CHECK-NEXT: s_wait_alu 0xfffe
-; CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0
-; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; CHECK-NEXT: v_readfirstlane_b32 s0, v1
+; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; CHECK-NEXT: v_readfirstlane_b32 s0, v0
; CHECK-NEXT: s_wait_alu 0xf1ff
; CHECK-NEXT: ; return to shader part epilog
bb:
diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll
index 3aa3663..704ea37 100644
--- a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll
+++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll
@@ -9,11 +9,11 @@
; RUN: | FileCheck -check-prefix=GCN-O3 %s
-; GCN-O0: require<MachineModuleAnalysis>,require<profile-summary>,require<collector-metadata>,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp<O0>),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(atomic-expand,verify,gc-lowering,lower-constant-intrinsics,unreachableblockelim,ee-instrument<post-inline>,scalarize-masked-mem-intrin,expand-reductions,amdgpu-lower-kernel-arguments),amdgpu-lower-buffer-fat-pointers,amdgpu-lower-intrinsics,cgscc(function(lower-switch,lower-invoke,unreachableblockelim,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa,require<uniformity>,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,localstackalloc))),require<reg-usage>,cgscc(function(machine-function(reg-usage-propagation,phi-node-elimination,two-address-instruction,regallocfast,si-fix-vgpr-copies,remove-redundant-debug-values,fixup-statepoint-caller-saved,prolog-epilog,post-ra-pseudos,si-post-ra-bundler,fentry-insert,xray-instrumentation,patchable-function,si-memory-legalizer,si-insert-waitcnts,si-late-branch-lowering,post-RA-hazard-rec,amdgpu-wait-sgpr-hazards,amdgpu-lower-vgpr-encoding,branch-relaxation,reg-usage-collector,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),free-machine-function))
+; GCN-O0: require<MachineModuleAnalysis>,require<profile-summary>,require<collector-metadata>,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp<O0>),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,function(amdgpu-uniform-intrinsic-combine),expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(atomic-expand,verify,gc-lowering,lower-constant-intrinsics,unreachableblockelim,ee-instrument<post-inline>,scalarize-masked-mem-intrin,expand-reductions,amdgpu-lower-kernel-arguments),amdgpu-lower-buffer-fat-pointers,amdgpu-lower-intrinsics,cgscc(function(lower-switch,lower-invoke,unreachableblockelim,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa,require<uniformity>,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,localstackalloc))),require<reg-usage>,cgscc(function(machine-function(reg-usage-propagation,phi-node-elimination,two-address-instruction,regallocfast,si-fix-vgpr-copies,remove-redundant-debug-values,fixup-statepoint-caller-saved,prolog-epilog,post-ra-pseudos,si-post-ra-bundler,fentry-insert,xray-instrumentation,patchable-function,si-memory-legalizer,si-insert-waitcnts,si-late-branch-lowering,post-RA-hazard-rec,amdgpu-wait-sgpr-hazards,amdgpu-lower-vgpr-encoding,branch-relaxation,reg-usage-collector,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),free-machine-function))
-; GCN-O2: require<MachineModuleAnalysis>,require<profile-summary>,require<collector-metadata>,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp<O2>),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,function(amdgpu-image-intrinsic-opt),expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(amdgpu-atomic-optimizer,atomic-expand,amdgpu-promote-alloca,separate-const-offset-from-gep<>,slsr,early-cse<>,nary-reassociate,early-cse<>,amdgpu-codegenprepare,loop-mssa(licm<allowspeculation>),verify,loop-mssa(canon-freeze,loop-reduce),mergeicmps,expand-memcmp,gc-lowering,lower-constant-intrinsics,unreachableblockelim,consthoist,replace-with-veclib,partially-inline-libcalls,ee-instrument<post-inline>,scalarize-masked-mem-intrin,expand-reductions,early-cse<>),amdgpu-preload-kernel-arguments,function(amdgpu-lower-kernel-arguments,codegenprepare,load-store-vectorizer),amdgpu-lower-buffer-fat-pointers,amdgpu-lower-intrinsics,cgscc(function(lower-switch,lower-invoke,unreachableblockelim,flatten-cfg,sink,amdgpu-late-codegenprepare,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa)),amdgpu-perf-hint,cgscc(function(require<uniformity>,objc-arc-contract,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,early-tailduplication,opt-phis,stack-coloring,localstackalloc,dead-mi-elimination,early-machinelicm,machine-cse,machine-sink,peephole-opt,dead-mi-elimination,si-fold-operands,gcn-dpp-combine,si-load-store-opt,si-peephole-sdwa,early-machinelicm,machine-cse,si-fold-operands,dead-mi-elimination,si-shrink-instructions))),require<reg-usage>,cgscc(function(machine-function(reg-usage-propagation,amdgpu-prepare-agpr-alloc,detect-dead-lanes,dead-mi-elimination,init-undef,process-imp-defs,unreachable-mbb-elimination,require<live-vars>,si-opt-vgpr-liverange,require<machine-loops>,phi-node-elimination,si-lower-control-flow,two-address-instruction,register-coalescer,rename-independent-subregs,amdgpu-rewrite-partial-reg-uses,machine-scheduler,amdgpu-pre-ra-optimizations,si-wqm,si-optimize-exec-masking-pre-ra,si-form-memory-clauses,amdgpu-pre-ra-long-branch-reg,greedy<sgpr>,virt-reg-rewriter<no-clear-vregs>,stack-slot-coloring,si-lower-sgpr-spills,si-pre-allocate-wwm-regs,greedy<wwm>,si-lower-wwm-copies,virt-reg-rewriter<no-clear-vregs>,amdgpu-reserve-wwm-regs,greedy<vgpr>,amdgpu-nsa-reassign,virt-reg-rewriter,amdgpu-mark-last-scratch-load,machine-cp,machinelicm,si-fix-vgpr-copies,si-optimize-exec-masking,remove-redundant-debug-values,fixup-statepoint-caller-saved,postra-machine-sink,shrink-wrap,prolog-epilog,branch-folder,tailduplication,machine-latecleanup,machine-cp,post-ra-pseudos,si-shrink-instructions,si-post-ra-bundler,postmisched,block-placement,fentry-insert,xray-instrumentation,patchable-function,gcn-create-vopd,si-memory-legalizer,si-insert-waitcnts,si-late-branch-lowering,si-pre-emit-peephole,post-RA-hazard-rec,amdgpu-wait-sgpr-hazards,amdgpu-lower-vgpr-encoding,amdgpu-insert-delay-alu,branch-relaxation,reg-usage-collector,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),free-machine-function))
+; GCN-O2: require<MachineModuleAnalysis>,require<profile-summary>,require<collector-metadata>,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp<O2>),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,function(amdgpu-image-intrinsic-opt,amdgpu-uniform-intrinsic-combine),expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(amdgpu-atomic-optimizer,atomic-expand,amdgpu-promote-alloca,separate-const-offset-from-gep<>,slsr,early-cse<>,nary-reassociate,early-cse<>,amdgpu-codegenprepare,loop-mssa(licm<allowspeculation>),verify,loop-mssa(canon-freeze,loop-reduce),mergeicmps,expand-memcmp,gc-lowering,lower-constant-intrinsics,unreachableblockelim,consthoist,replace-with-veclib,partially-inline-libcalls,ee-instrument<post-inline>,scalarize-masked-mem-intrin,expand-reductions,early-cse<>),amdgpu-preload-kernel-arguments,function(amdgpu-lower-kernel-arguments,codegenprepare,load-store-vectorizer),amdgpu-lower-buffer-fat-pointers,amdgpu-lower-intrinsics,cgscc(function(lower-switch,lower-invoke,unreachableblockelim,flatten-cfg,sink,amdgpu-late-codegenprepare,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa)),amdgpu-perf-hint,cgscc(function(require<uniformity>,objc-arc-contract,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,early-tailduplication,opt-phis,stack-coloring,localstackalloc,dead-mi-elimination,early-machinelicm,machine-cse,machine-sink,peephole-opt,dead-mi-elimination,si-fold-operands,gcn-dpp-combine,si-load-store-opt,si-peephole-sdwa,early-machinelicm,machine-cse,si-fold-operands,dead-mi-elimination,si-shrink-instructions))),require<reg-usage>,cgscc(function(machine-function(reg-usage-propagation,amdgpu-prepare-agpr-alloc,detect-dead-lanes,dead-mi-elimination,init-undef,process-imp-defs,unreachable-mbb-elimination,require<live-vars>,si-opt-vgpr-liverange,require<machine-loops>,phi-node-elimination,si-lower-control-flow,two-address-instruction,register-coalescer,rename-independent-subregs,amdgpu-rewrite-partial-reg-uses,machine-scheduler,amdgpu-pre-ra-optimizations,si-wqm,si-optimize-exec-masking-pre-ra,si-form-memory-clauses,amdgpu-pre-ra-long-branch-reg,greedy<sgpr>,virt-reg-rewriter<no-clear-vregs>,stack-slot-coloring,si-lower-sgpr-spills,si-pre-allocate-wwm-regs,greedy<wwm>,si-lower-wwm-copies,virt-reg-rewriter<no-clear-vregs>,amdgpu-reserve-wwm-regs,greedy<vgpr>,amdgpu-nsa-reassign,virt-reg-rewriter,amdgpu-mark-last-scratch-load,machine-cp,machinelicm,si-fix-vgpr-copies,si-optimize-exec-masking,remove-redundant-debug-values,fixup-statepoint-caller-saved,postra-machine-sink,shrink-wrap,prolog-epilog,branch-folder,tailduplication,machine-latecleanup,machine-cp,post-ra-pseudos,si-shrink-instructions,si-post-ra-bundler,postmisched,block-placement,fentry-insert,xray-instrumentation,patchable-function,gcn-create-vopd,si-memory-legalizer,si-insert-waitcnts,si-late-branch-lowering,si-pre-emit-peephole,post-RA-hazard-rec,amdgpu-wait-sgpr-hazards,amdgpu-lower-vgpr-encoding,amdgpu-insert-delay-alu,branch-relaxation,reg-usage-collector,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),free-machine-function))
-; GCN-O3: require<MachineModuleAnalysis>,require<profile-summary>,require<collector-metadata>,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp<O3>),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,function(amdgpu-image-intrinsic-opt),expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(amdgpu-atomic-optimizer,atomic-expand,amdgpu-promote-alloca,separate-const-offset-from-gep<>,slsr,gvn<>,nary-reassociate,early-cse<>,amdgpu-codegenprepare,loop-mssa(licm<allowspeculation>),verify,loop-mssa(canon-freeze,loop-reduce),mergeicmps,expand-memcmp,gc-lowering,lower-constant-intrinsics,unreachableblockelim,consthoist,replace-with-veclib,partially-inline-libcalls,ee-instrument<post-inline>,scalarize-masked-mem-intrin,expand-reductions,gvn<>),amdgpu-preload-kernel-arguments,function(amdgpu-lower-kernel-arguments,codegenprepare,load-store-vectorizer),amdgpu-lower-buffer-fat-pointers,amdgpu-lower-intrinsics,cgscc(function(lower-switch,lower-invoke,unreachableblockelim,flatten-cfg,sink,amdgpu-late-codegenprepare,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa)),amdgpu-perf-hint,cgscc(function(require<uniformity>,objc-arc-contract,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,early-tailduplication,opt-phis,stack-coloring,localstackalloc,dead-mi-elimination,early-machinelicm,machine-cse,machine-sink,peephole-opt,dead-mi-elimination,si-fold-operands,gcn-dpp-combine,si-load-store-opt,si-peephole-sdwa,early-machinelicm,machine-cse,si-fold-operands,dead-mi-elimination,si-shrink-instructions))),require<reg-usage>,cgscc(function(machine-function(reg-usage-propagation,amdgpu-prepare-agpr-alloc,detect-dead-lanes,dead-mi-elimination,init-undef,process-imp-defs,unreachable-mbb-elimination,require<live-vars>,si-opt-vgpr-liverange,require<machine-loops>,phi-node-elimination,si-lower-control-flow,two-address-instruction,register-coalescer,rename-independent-subregs,amdgpu-rewrite-partial-reg-uses,machine-scheduler,amdgpu-pre-ra-optimizations,si-wqm,si-optimize-exec-masking-pre-ra,si-form-memory-clauses,amdgpu-pre-ra-long-branch-reg,greedy<sgpr>,virt-reg-rewriter<no-clear-vregs>,stack-slot-coloring,si-lower-sgpr-spills,si-pre-allocate-wwm-regs,greedy<wwm>,si-lower-wwm-copies,virt-reg-rewriter<no-clear-vregs>,amdgpu-reserve-wwm-regs,greedy<vgpr>,amdgpu-nsa-reassign,virt-reg-rewriter,amdgpu-mark-last-scratch-load,machine-cp,machinelicm,si-fix-vgpr-copies,si-optimize-exec-masking,remove-redundant-debug-values,fixup-statepoint-caller-saved,postra-machine-sink,shrink-wrap,prolog-epilog,branch-folder,tailduplication,machine-latecleanup,machine-cp,post-ra-pseudos,si-shrink-instructions,si-post-ra-bundler,postmisched,block-placement,fentry-insert,xray-instrumentation,patchable-function,gcn-create-vopd,si-memory-legalizer,si-insert-waitcnts,si-late-branch-lowering,si-pre-emit-peephole,post-RA-hazard-rec,amdgpu-wait-sgpr-hazards,amdgpu-lower-vgpr-encoding,amdgpu-insert-delay-alu,branch-relaxation,reg-usage-collector,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),free-machine-function))
+; GCN-O3: require<MachineModuleAnalysis>,require<profile-summary>,require<collector-metadata>,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp<O3>),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,function(amdgpu-image-intrinsic-opt,amdgpu-uniform-intrinsic-combine),expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(amdgpu-atomic-optimizer,atomic-expand,amdgpu-promote-alloca,separate-const-offset-from-gep<>,slsr,gvn<>,nary-reassociate,early-cse<>,amdgpu-codegenprepare,loop-mssa(licm<allowspeculation>),verify,loop-mssa(canon-freeze,loop-reduce),mergeicmps,expand-memcmp,gc-lowering,lower-constant-intrinsics,unreachableblockelim,consthoist,replace-with-veclib,partially-inline-libcalls,ee-instrument<post-inline>,scalarize-masked-mem-intrin,expand-reductions,gvn<>),amdgpu-preload-kernel-arguments,function(amdgpu-lower-kernel-arguments,codegenprepare,load-store-vectorizer),amdgpu-lower-buffer-fat-pointers,amdgpu-lower-intrinsics,cgscc(function(lower-switch,lower-invoke,unreachableblockelim,flatten-cfg,sink,amdgpu-late-codegenprepare,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa)),amdgpu-perf-hint,cgscc(function(require<uniformity>,objc-arc-contract,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,early-tailduplication,opt-phis,stack-coloring,localstackalloc,dead-mi-elimination,early-machinelicm,machine-cse,machine-sink,peephole-opt,dead-mi-elimination,si-fold-operands,gcn-dpp-combine,si-load-store-opt,si-peephole-sdwa,early-machinelicm,machine-cse,si-fold-operands,dead-mi-elimination,si-shrink-instructions))),require<reg-usage>,cgscc(function(machine-function(reg-usage-propagation,amdgpu-prepare-agpr-alloc,detect-dead-lanes,dead-mi-elimination,init-undef,process-imp-defs,unreachable-mbb-elimination,require<live-vars>,si-opt-vgpr-liverange,require<machine-loops>,phi-node-elimination,si-lower-control-flow,two-address-instruction,register-coalescer,rename-independent-subregs,amdgpu-rewrite-partial-reg-uses,machine-scheduler,amdgpu-pre-ra-optimizations,si-wqm,si-optimize-exec-masking-pre-ra,si-form-memory-clauses,amdgpu-pre-ra-long-branch-reg,greedy<sgpr>,virt-reg-rewriter<no-clear-vregs>,stack-slot-coloring,si-lower-sgpr-spills,si-pre-allocate-wwm-regs,greedy<wwm>,si-lower-wwm-copies,virt-reg-rewriter<no-clear-vregs>,amdgpu-reserve-wwm-regs,greedy<vgpr>,amdgpu-nsa-reassign,virt-reg-rewriter,amdgpu-mark-last-scratch-load,machine-cp,machinelicm,si-fix-vgpr-copies,si-optimize-exec-masking,remove-redundant-debug-values,fixup-statepoint-caller-saved,postra-machine-sink,shrink-wrap,prolog-epilog,branch-folder,tailduplication,machine-latecleanup,machine-cp,post-ra-pseudos,si-shrink-instructions,si-post-ra-bundler,postmisched,block-placement,fentry-insert,xray-instrumentation,patchable-function,gcn-create-vopd,si-memory-legalizer,si-insert-waitcnts,si-late-branch-lowering,si-pre-emit-peephole,post-RA-hazard-rec,amdgpu-wait-sgpr-hazards,amdgpu-lower-vgpr-encoding,amdgpu-insert-delay-alu,branch-relaxation,reg-usage-collector,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),free-machine-function))
define void @empty() {
ret void
diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
index 6e52125..ee6caab 100644
--- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
+++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
@@ -31,6 +31,11 @@
; GCN-O0-NEXT: AMDGPU Remove Incompatible Functions
; GCN-O0-NEXT: AMDGPU Printf lowering
; GCN-O0-NEXT: Lower ctors and dtors for AMDGPU
+; GCN-O0-NEXT: FunctionPass Manager
+; GCN-O0-NEXT: Dominator Tree Construction
+; GCN-O0-NEXT: Cycle Info Analysis
+; GCN-O0-NEXT: Uniformity Analysis
+; GCN-O0-NEXT: AMDGPU Uniform Intrinsic Combine
; GCN-O0-NEXT: Expand variadic functions
; GCN-O0-NEXT: AMDGPU Inline All Functions
; GCN-O0-NEXT: Inliner for always_inline functions
@@ -179,6 +184,11 @@
; GCN-O1-NEXT: AMDGPU Remove Incompatible Functions
; GCN-O1-NEXT: AMDGPU Printf lowering
; GCN-O1-NEXT: Lower ctors and dtors for AMDGPU
+; GCN-O1-NEXT: FunctionPass Manager
+; GCN-O1-NEXT: Dominator Tree Construction
+; GCN-O1-NEXT: Cycle Info Analysis
+; GCN-O1-NEXT: Uniformity Analysis
+; GCN-O1-NEXT: AMDGPU Uniform Intrinsic Combine
; GCN-O1-NEXT: Expand variadic functions
; GCN-O1-NEXT: AMDGPU Inline All Functions
; GCN-O1-NEXT: Inliner for always_inline functions
@@ -466,6 +476,11 @@
; GCN-O1-OPTS-NEXT: AMDGPU Remove Incompatible Functions
; GCN-O1-OPTS-NEXT: AMDGPU Printf lowering
; GCN-O1-OPTS-NEXT: Lower ctors and dtors for AMDGPU
+; GCN-O1-OPTS-NEXT: FunctionPass Manager
+; GCN-O1-OPTS-NEXT: Dominator Tree Construction
+; GCN-O1-OPTS-NEXT: Cycle Info Analysis
+; GCN-O1-OPTS-NEXT: Uniformity Analysis
+; GCN-O1-OPTS-NEXT: AMDGPU Uniform Intrinsic Combine
; GCN-O1-OPTS-NEXT: Expand variadic functions
; GCN-O1-OPTS-NEXT: AMDGPU Inline All Functions
; GCN-O1-OPTS-NEXT: Inliner for always_inline functions
@@ -783,6 +798,10 @@
; GCN-O2-NEXT: Lower ctors and dtors for AMDGPU
; GCN-O2-NEXT: FunctionPass Manager
; GCN-O2-NEXT: AMDGPU Image Intrinsic Optimizer
+; GCN-O2-NEXT: Dominator Tree Construction
+; GCN-O2-NEXT: Cycle Info Analysis
+; GCN-O2-NEXT: Uniformity Analysis
+; GCN-O2-NEXT: AMDGPU Uniform Intrinsic Combine
; GCN-O2-NEXT: Expand variadic functions
; GCN-O2-NEXT: AMDGPU Inline All Functions
; GCN-O2-NEXT: Inliner for always_inline functions
@@ -1104,6 +1123,10 @@
; GCN-O3-NEXT: Lower ctors and dtors for AMDGPU
; GCN-O3-NEXT: FunctionPass Manager
; GCN-O3-NEXT: AMDGPU Image Intrinsic Optimizer
+; GCN-O3-NEXT: Dominator Tree Construction
+; GCN-O3-NEXT: Cycle Info Analysis
+; GCN-O3-NEXT: Uniformity Analysis
+; GCN-O3-NEXT: AMDGPU Uniform Intrinsic Combine
; GCN-O3-NEXT: Expand variadic functions
; GCN-O3-NEXT: AMDGPU Inline All Functions
; GCN-O3-NEXT: Inliner for always_inline functions
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i32.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i32.ll
index e00e1f1..c1f3a12 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i32.ll
@@ -110,9 +110,8 @@ false:
define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_non_compare(i32 inreg %v) {
; CHECK-LABEL: branch_uniform_ballot_ne_zero_non_compare:
; CHECK: ; %bb.0:
-; CHECK-NEXT: s_and_b32 s0, s0, 1
-; CHECK-NEXT: v_cmp_ne_u32_e64 vcc_lo, s0, 0
-; CHECK-NEXT: s_cbranch_vccz .LBB8_2
+; CHECK-NEXT: s_bitcmp0_b32 s0, 0
+; CHECK-NEXT: s_cbranch_scc1 .LBB8_2
; CHECK-NEXT: ; %bb.1: ; %true
; CHECK-NEXT: s_mov_b32 s0, 42
; CHECK-NEXT: s_branch .LBB8_3
@@ -156,15 +155,16 @@ false:
define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_non_compare(i32 inreg %v) {
; CHECK-LABEL: branch_uniform_ballot_eq_zero_non_compare:
; CHECK: ; %bb.0:
-; CHECK-NEXT: s_and_b32 s0, s0, 1
-; CHECK-NEXT: v_cmp_ne_u32_e64 vcc_lo, s0, 0
-; CHECK-NEXT: s_cbranch_vccz .LBB10_2
-; CHECK-NEXT: ; %bb.1: ; %false
-; CHECK-NEXT: s_mov_b32 s0, 33
-; CHECK-NEXT: s_branch .LBB10_3
-; CHECK-NEXT: .LBB10_2: ; %true
+; CHECK-NEXT: s_bitcmp1_b32 s0, 0
+; CHECK-NEXT: s_cselect_b32 s0, -1, 0
+; CHECK-NEXT: s_and_b32 vcc_lo, exec_lo, s0
+; CHECK-NEXT: s_cbranch_vccnz .LBB10_2
+; CHECK-NEXT: ; %bb.1: ; %true
; CHECK-NEXT: s_mov_b32 s0, 42
; CHECK-NEXT: s_branch .LBB10_3
+; CHECK-NEXT: .LBB10_2: ; %false
+; CHECK-NEXT: s_mov_b32 s0, 33
+; CHECK-NEXT: s_branch .LBB10_3
; CHECK-NEXT: .LBB10_3:
%c = trunc i32 %v to i1
%ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %c)
@@ -201,8 +201,8 @@ false:
define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_compare(i32 inreg %v) {
; CHECK-LABEL: branch_uniform_ballot_ne_zero_compare:
; CHECK: ; %bb.0:
-; CHECK-NEXT: v_cmp_lt_u32_e64 vcc_lo, s0, 12
-; CHECK-NEXT: s_cbranch_vccz .LBB12_2
+; CHECK-NEXT: s_cmp_gt_u32 s0, 11
+; CHECK-NEXT: s_cbranch_scc1 .LBB12_2
; CHECK-NEXT: ; %bb.1: ; %true
; CHECK-NEXT: s_mov_b32 s0, 42
; CHECK-NEXT: s_branch .LBB12_3
@@ -245,14 +245,14 @@ false:
define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_compare(i32 inreg %v) {
; CHECK-LABEL: branch_uniform_ballot_eq_zero_compare:
; CHECK: ; %bb.0:
-; CHECK-NEXT: v_cmp_lt_u32_e64 vcc_lo, s0, 12
-; CHECK-NEXT: s_cbranch_vccz .LBB14_2
-; CHECK-NEXT: ; %bb.1: ; %false
-; CHECK-NEXT: s_mov_b32 s0, 33
-; CHECK-NEXT: s_branch .LBB14_3
-; CHECK-NEXT: .LBB14_2: ; %true
+; CHECK-NEXT: s_cmp_lt_u32 s0, 12
+; CHECK-NEXT: s_cbranch_scc1 .LBB14_2
+; CHECK-NEXT: ; %bb.1: ; %true
; CHECK-NEXT: s_mov_b32 s0, 42
; CHECK-NEXT: s_branch .LBB14_3
+; CHECK-NEXT: .LBB14_2: ; %false
+; CHECK-NEXT: s_mov_b32 s0, 33
+; CHECK-NEXT: s_branch .LBB14_3
; CHECK-NEXT: .LBB14_3:
%c = icmp ult i32 %v, 12
%ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %c)
@@ -293,13 +293,13 @@ false:
define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_and(i32 inreg %v1, i32 inreg %v2) {
; CHECK-LABEL: branch_uniform_ballot_ne_zero_and:
; CHECK: ; %bb.0:
-; CHECK-NEXT: s_cmp_lt_u32 s0, 12
+; CHECK-NEXT: s_cmp_gt_u32 s0, 11
; CHECK-NEXT: s_cselect_b32 s0, -1, 0
-; CHECK-NEXT: s_cmp_gt_u32 s1, 34
+; CHECK-NEXT: s_cmp_lt_u32 s1, 35
; CHECK-NEXT: s_cselect_b32 s1, -1, 0
-; CHECK-NEXT: s_and_b32 s0, s0, s1
-; CHECK-NEXT: s_and_b32 s0, s0, exec_lo
-; CHECK-NEXT: s_cbranch_scc0 .LBB16_2
+; CHECK-NEXT: s_or_b32 s0, s0, s1
+; CHECK-NEXT: s_and_b32 vcc_lo, exec_lo, s0
+; CHECK-NEXT: s_cbranch_vccnz .LBB16_2
; CHECK-NEXT: ; %bb.1: ; %true
; CHECK-NEXT: s_mov_b32 s0, 42
; CHECK-NEXT: s_branch .LBB16_3
@@ -353,14 +353,14 @@ define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_and(i32 inreg %v1, i32 inreg
; CHECK-NEXT: s_cmp_gt_u32 s1, 34
; CHECK-NEXT: s_cselect_b32 s1, -1, 0
; CHECK-NEXT: s_and_b32 s0, s0, s1
-; CHECK-NEXT: s_and_b32 s0, s0, exec_lo
-; CHECK-NEXT: s_cbranch_scc0 .LBB18_2
-; CHECK-NEXT: ; %bb.1: ; %false
-; CHECK-NEXT: s_mov_b32 s0, 33
-; CHECK-NEXT: s_branch .LBB18_3
-; CHECK-NEXT: .LBB18_2: ; %true
+; CHECK-NEXT: s_and_b32 vcc_lo, exec_lo, s0
+; CHECK-NEXT: s_cbranch_vccnz .LBB18_2
+; CHECK-NEXT: ; %bb.1: ; %true
; CHECK-NEXT: s_mov_b32 s0, 42
; CHECK-NEXT: s_branch .LBB18_3
+; CHECK-NEXT: .LBB18_2: ; %false
+; CHECK-NEXT: s_mov_b32 s0, 33
+; CHECK-NEXT: s_branch .LBB18_3
; CHECK-NEXT: .LBB18_3:
%v1c = icmp ult i32 %v1, 12
%v2c = icmp ugt i32 %v2, 34
@@ -591,3 +591,24 @@ exit:
store i32 %ballot, ptr addrspace(1) %out
ret void
}
+
+define amdgpu_cs i32 @compare_bfloats(bfloat %x, bfloat %y) {
+; GFX10-LABEL: compare_bfloats:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX10-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX10-NEXT: v_cmp_gt_f32_e64 s0, v0, v1
+; GFX10-NEXT: ; return to shader part epilog
+;
+; GFX11-LABEL: compare_bfloats:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_mov_b16_e32 v2.l, 0
+; GFX11-NEXT: v_mov_b16_e32 v2.h, v1.l
+; GFX11-NEXT: v_mov_b16_e32 v1.h, v0.l
+; GFX11-NEXT: v_mov_b16_e32 v1.l, v2.l
+; GFX11-NEXT: v_cmp_gt_f32_e64 s0, v1, v2
+; GFX11-NEXT: ; return to shader part epilog
+ %cmp = fcmp ogt bfloat %x, %y
+ %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %cmp)
+ ret i32 %ballot
+}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i64.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i64.ll
index b4adf7f..827a01f 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i64.ll
@@ -113,9 +113,8 @@ false:
define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_non_compare(i32 inreg %v) {
; CHECK-LABEL: branch_uniform_ballot_ne_zero_non_compare:
; CHECK: ; %bb.0:
-; CHECK-NEXT: s_and_b32 s0, s0, 1
-; CHECK-NEXT: v_cmp_ne_u32_e64 vcc, s0, 0
-; CHECK-NEXT: s_cbranch_vccz .LBB8_2
+; CHECK-NEXT: s_bitcmp0_b32 s0, 0
+; CHECK-NEXT: s_cbranch_scc1 .LBB8_2
; CHECK-NEXT: ; %bb.1: ; %true
; CHECK-NEXT: s_mov_b32 s0, 42
; CHECK-NEXT: s_branch .LBB8_3
@@ -159,15 +158,16 @@ false:
define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_non_compare(i32 inreg %v) {
; CHECK-LABEL: branch_uniform_ballot_eq_zero_non_compare:
; CHECK: ; %bb.0:
-; CHECK-NEXT: s_and_b32 s0, s0, 1
-; CHECK-NEXT: v_cmp_ne_u32_e64 vcc, s0, 0
-; CHECK-NEXT: s_cbranch_vccz .LBB10_2
-; CHECK-NEXT: ; %bb.1: ; %false
-; CHECK-NEXT: s_mov_b32 s0, 33
-; CHECK-NEXT: s_branch .LBB10_3
-; CHECK-NEXT: .LBB10_2: ; %true
+; CHECK-NEXT: s_bitcmp1_b32 s0, 0
+; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0
+; CHECK-NEXT: s_and_b64 vcc, exec, s[0:1]
+; CHECK-NEXT: s_cbranch_vccnz .LBB10_2
+; CHECK-NEXT: ; %bb.1: ; %true
; CHECK-NEXT: s_mov_b32 s0, 42
; CHECK-NEXT: s_branch .LBB10_3
+; CHECK-NEXT: .LBB10_2: ; %false
+; CHECK-NEXT: s_mov_b32 s0, 33
+; CHECK-NEXT: s_branch .LBB10_3
; CHECK-NEXT: .LBB10_3:
%c = trunc i32 %v to i1
%ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %c)
@@ -204,8 +204,8 @@ false:
define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_compare(i32 inreg %v) {
; CHECK-LABEL: branch_uniform_ballot_ne_zero_compare:
; CHECK: ; %bb.0:
-; CHECK-NEXT: v_cmp_lt_u32_e64 vcc, s0, 12
-; CHECK-NEXT: s_cbranch_vccz .LBB12_2
+; CHECK-NEXT: s_cmp_gt_u32 s0, 11
+; CHECK-NEXT: s_cbranch_scc1 .LBB12_2
; CHECK-NEXT: ; %bb.1: ; %true
; CHECK-NEXT: s_mov_b32 s0, 42
; CHECK-NEXT: s_branch .LBB12_3
@@ -248,14 +248,14 @@ false:
define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_compare(i32 inreg %v) {
; CHECK-LABEL: branch_uniform_ballot_eq_zero_compare:
; CHECK: ; %bb.0:
-; CHECK-NEXT: v_cmp_lt_u32_e64 vcc, s0, 12
-; CHECK-NEXT: s_cbranch_vccz .LBB14_2
-; CHECK-NEXT: ; %bb.1: ; %false
-; CHECK-NEXT: s_mov_b32 s0, 33
-; CHECK-NEXT: s_branch .LBB14_3
-; CHECK-NEXT: .LBB14_2: ; %true
+; CHECK-NEXT: s_cmp_lt_u32 s0, 12
+; CHECK-NEXT: s_cbranch_scc1 .LBB14_2
+; CHECK-NEXT: ; %bb.1: ; %true
; CHECK-NEXT: s_mov_b32 s0, 42
; CHECK-NEXT: s_branch .LBB14_3
+; CHECK-NEXT: .LBB14_2: ; %false
+; CHECK-NEXT: s_mov_b32 s0, 33
+; CHECK-NEXT: s_branch .LBB14_3
; CHECK-NEXT: .LBB14_3:
%c = icmp ult i32 %v, 12
%ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %c)
@@ -296,13 +296,13 @@ false:
define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_and(i32 inreg %v1, i32 inreg %v2) {
; CHECK-LABEL: branch_uniform_ballot_ne_zero_and:
; CHECK: ; %bb.0:
-; CHECK-NEXT: s_cmp_lt_u32 s0, 12
+; CHECK-NEXT: s_cmp_gt_u32 s0, 11
; CHECK-NEXT: s_cselect_b64 s[2:3], -1, 0
-; CHECK-NEXT: s_cmp_gt_u32 s1, 34
+; CHECK-NEXT: s_cmp_lt_u32 s1, 35
; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0
-; CHECK-NEXT: s_and_b64 s[0:1], s[2:3], s[0:1]
-; CHECK-NEXT: s_and_b64 s[0:1], s[0:1], exec
-; CHECK-NEXT: s_cbranch_scc0 .LBB16_2
+; CHECK-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1]
+; CHECK-NEXT: s_and_b64 vcc, exec, s[0:1]
+; CHECK-NEXT: s_cbranch_vccnz .LBB16_2
; CHECK-NEXT: ; %bb.1: ; %true
; CHECK-NEXT: s_mov_b32 s0, 42
; CHECK-NEXT: s_branch .LBB16_3
@@ -356,14 +356,14 @@ define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_and(i32 inreg %v1, i32 inreg
; CHECK-NEXT: s_cmp_gt_u32 s1, 34
; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0
; CHECK-NEXT: s_and_b64 s[0:1], s[2:3], s[0:1]
-; CHECK-NEXT: s_and_b64 s[0:1], s[0:1], exec
-; CHECK-NEXT: s_cbranch_scc0 .LBB18_2
-; CHECK-NEXT: ; %bb.1: ; %false
-; CHECK-NEXT: s_mov_b32 s0, 33
-; CHECK-NEXT: s_branch .LBB18_3
-; CHECK-NEXT: .LBB18_2: ; %true
+; CHECK-NEXT: s_and_b64 vcc, exec, s[0:1]
+; CHECK-NEXT: s_cbranch_vccnz .LBB18_2
+; CHECK-NEXT: ; %bb.1: ; %true
; CHECK-NEXT: s_mov_b32 s0, 42
; CHECK-NEXT: s_branch .LBB18_3
+; CHECK-NEXT: .LBB18_2: ; %false
+; CHECK-NEXT: s_mov_b32 s0, 33
+; CHECK-NEXT: s_branch .LBB18_3
; CHECK-NEXT: .LBB18_3:
%v1c = icmp ult i32 %v1, 12
%v2c = icmp ugt i32 %v2, 34
@@ -557,3 +557,15 @@ exit:
store i64 %ballot, ptr addrspace(1) %out
ret void
}
+
+define amdgpu_cs i64 @compare_bfloats(bfloat %x, bfloat %y) {
+; CHECK-LABEL: compare_bfloats:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; CHECK-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; CHECK-NEXT: v_cmp_gt_f32_e64 s[0:1], v0, v1
+; CHECK-NEXT: ; return to shader part epilog
+ %cmp = fcmp ogt bfloat %x, %y
+ %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %cmp)
+ ret i64 %ballot
+}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane64.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane64.ll
index 6dd2258..39191d2 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane64.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane64.ll
@@ -23,10 +23,8 @@ define amdgpu_kernel void @test_s_i32(ptr addrspace(1) %out, i32 %src0) {
; GFX11-SDAG-NEXT: s_load_b32 s2, s[4:5], 0x2c
; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-SDAG-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SDAG-NEXT: v_permlane64_b32 v0, v0
-; GFX11-SDAG-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-SDAG-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX11-SDAG-NEXT: s_endpgm
;
; GFX11-GISEL-LABEL: test_s_i32:
@@ -36,8 +34,6 @@ define amdgpu_kernel void @test_s_i32(ptr addrspace(1) %out, i32 %src0) {
; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-GISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_permlane64_b32 v0, v0
; GFX11-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX11-GISEL-NEXT: s_endpgm
%v = call i32 @llvm.amdgcn.permlane64.i32(i32 %src0)
@@ -50,12 +46,9 @@ define amdgpu_kernel void @test_s_i64(ptr addrspace(1) %out, i64 %src0) {
; GFX11-SDAG: ; %bb.0:
; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-SDAG-NEXT: v_dual_mov_b32 v3, 0 :: v_dual_mov_b32 v0, s3
-; GFX11-SDAG-NEXT: v_mov_b32_e32 v2, s2
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SDAG-NEXT: v_permlane64_b32 v1, v0
-; GFX11-SDAG-NEXT: v_permlane64_b32 v0, v2
-; GFX11-SDAG-NEXT: global_store_b64 v3, v[0:1], s[0:1]
+; GFX11-SDAG-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s3
+; GFX11-SDAG-NEXT: v_mov_b32_e32 v0, s2
+; GFX11-SDAG-NEXT: global_store_b64 v2, v[0:1], s[0:1]
; GFX11-SDAG-NEXT: s_endpgm
;
; GFX11-GISEL-LABEL: test_s_i64:
@@ -64,9 +57,6 @@ define amdgpu_kernel void @test_s_i64(ptr addrspace(1) %out, i64 %src0) {
; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 0
; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-GISEL-NEXT: v_permlane64_b32 v0, v0
-; GFX11-GISEL-NEXT: v_permlane64_b32 v1, v1
; GFX11-GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
; GFX11-GISEL-NEXT: s_endpgm
%v = call i64 @llvm.amdgcn.permlane64.i64(i64 %src0)
@@ -79,12 +69,9 @@ define amdgpu_kernel void @test_s_f64(ptr addrspace(1) %out, double %src0) {
; GFX11-SDAG: ; %bb.0:
; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-SDAG-NEXT: v_dual_mov_b32 v3, 0 :: v_dual_mov_b32 v0, s3
-; GFX11-SDAG-NEXT: v_mov_b32_e32 v2, s2
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SDAG-NEXT: v_permlane64_b32 v1, v0
-; GFX11-SDAG-NEXT: v_permlane64_b32 v0, v2
-; GFX11-SDAG-NEXT: global_store_b64 v3, v[0:1], s[0:1]
+; GFX11-SDAG-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s3
+; GFX11-SDAG-NEXT: v_mov_b32_e32 v0, s2
+; GFX11-SDAG-NEXT: global_store_b64 v2, v[0:1], s[0:1]
; GFX11-SDAG-NEXT: s_endpgm
;
; GFX11-GISEL-LABEL: test_s_f64:
@@ -93,9 +80,6 @@ define amdgpu_kernel void @test_s_f64(ptr addrspace(1) %out, double %src0) {
; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 0
; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-GISEL-NEXT: v_permlane64_b32 v0, v0
-; GFX11-GISEL-NEXT: v_permlane64_b32 v1, v1
; GFX11-GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
; GFX11-GISEL-NEXT: s_endpgm
%v = call double @llvm.amdgcn.permlane64.f64(double %src0)
@@ -116,19 +100,15 @@ define amdgpu_kernel void @test_i_i32(ptr addrspace(1) %out) {
; GFX11-SDAG-LABEL: test_i_i32:
; GFX11-SDAG: ; %bb.0:
; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX11-SDAG-NEXT: v_dual_mov_b32 v0, 0x63 :: v_dual_mov_b32 v1, 0
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SDAG-NEXT: v_permlane64_b32 v0, v0
+; GFX11-SDAG-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x63
; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-SDAG-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX11-SDAG-NEXT: s_endpgm
;
; GFX11-GISEL-LABEL: test_i_i32:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX11-GISEL-NEXT: v_dual_mov_b32 v0, 0x63 :: v_dual_mov_b32 v1, 0
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_permlane64_b32 v0, v0
; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX11-GISEL-NEXT: s_endpgm
@@ -141,19 +121,15 @@ define amdgpu_kernel void @test_i_f32(ptr addrspace(1) %out) {
; GFX11-SDAG-LABEL: test_i_f32:
; GFX11-SDAG: ; %bb.0:
; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX11-SDAG-NEXT: v_dual_mov_b32 v0, 0x449a5000 :: v_dual_mov_b32 v1, 0
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SDAG-NEXT: v_permlane64_b32 v0, v0
+; GFX11-SDAG-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x449a5000
; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-SDAG-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX11-SDAG-NEXT: s_endpgm
;
; GFX11-GISEL-LABEL: test_i_f32:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX11-GISEL-NEXT: v_dual_mov_b32 v0, 0x449a5000 :: v_dual_mov_b32 v1, 0
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_permlane64_b32 v0, v0
; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX11-GISEL-NEXT: s_endpgm
@@ -166,23 +142,16 @@ define amdgpu_kernel void @test_i_i64(ptr addrspace(1) %out) {
; GFX11-SDAG-LABEL: test_i_i64:
; GFX11-SDAG: ; %bb.0:
; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX11-SDAG-NEXT: v_mov_b32_e32 v2, 0
-; GFX11-SDAG-NEXT: v_mov_b32_e32 v0, 0x63
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SDAG-NEXT: v_permlane64_b32 v1, v2
-; GFX11-SDAG-NEXT: v_permlane64_b32 v0, v0
+; GFX11-SDAG-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, 0x63
; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-SDAG-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-SDAG-NEXT: global_store_b64 v1, v[0:1], s[0:1]
; GFX11-SDAG-NEXT: s_endpgm
;
; GFX11-GISEL-LABEL: test_i_i64:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, 0x63
-; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-GISEL-NEXT: v_permlane64_b32 v0, v0
-; GFX11-GISEL-NEXT: v_permlane64_b32 v1, v2
+; GFX11-GISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0
; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
; GFX11-GISEL-NEXT: s_endpgm
@@ -195,22 +164,16 @@ define amdgpu_kernel void @test_i_f64(ptr addrspace(1) %out) {
; GFX11-SDAG-LABEL: test_i_f64:
; GFX11-SDAG: ; %bb.0:
; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX11-SDAG-NEXT: v_mov_b32_e32 v0, 0x40934a00
-; GFX11-SDAG-NEXT: v_mov_b32_e32 v2, 0
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SDAG-NEXT: v_permlane64_b32 v1, v0
-; GFX11-SDAG-NEXT: v_permlane64_b32 v0, v2
+; GFX11-SDAG-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x40934a00
; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-SDAG-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-SDAG-NEXT: global_store_b64 v0, v[0:1], s[0:1]
; GFX11-SDAG-NEXT: s_endpgm
;
; GFX11-GISEL-LABEL: test_i_f64:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX11-GISEL-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, 0x40934a00
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-GISEL-NEXT: v_permlane64_b32 v0, v2
-; GFX11-GISEL-NEXT: v_permlane64_b32 v1, v1
+; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX11-GISEL-NEXT: v_dual_mov_b32 v1, 0x40934a00 :: v_dual_mov_b32 v2, 0
; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
; GFX11-GISEL-NEXT: s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane64.ptr.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane64.ptr.ll
index b0149f7..672b658 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane64.ptr.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane64.ptr.ll
@@ -6,12 +6,9 @@ define amdgpu_kernel void @test_p0(ptr addrspace(1) %out, ptr %src0) {
; GFX11-SDAG: ; %bb.0:
; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-SDAG-NEXT: v_dual_mov_b32 v3, 0 :: v_dual_mov_b32 v0, s3
-; GFX11-SDAG-NEXT: v_mov_b32_e32 v2, s2
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SDAG-NEXT: v_permlane64_b32 v1, v0
-; GFX11-SDAG-NEXT: v_permlane64_b32 v0, v2
-; GFX11-SDAG-NEXT: global_store_b64 v3, v[0:1], s[0:1]
+; GFX11-SDAG-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s3
+; GFX11-SDAG-NEXT: v_mov_b32_e32 v0, s2
+; GFX11-SDAG-NEXT: global_store_b64 v2, v[0:1], s[0:1]
; GFX11-SDAG-NEXT: s_endpgm
%v = call ptr @llvm.amdgcn.permlane64.p0(ptr %src0)
store ptr %v, ptr addrspace(1) %out
@@ -22,21 +19,14 @@ define amdgpu_kernel void @test_v3p0(ptr addrspace(1) %out, <3 x ptr> %src0) {
; GFX11-SDAG-LABEL: test_v3p0:
; GFX11-SDAG: ; %bb.0:
; GFX11-SDAG-NEXT: s_clause 0x2
-; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x44
; GFX11-SDAG-NEXT: s_load_b64 s[6:7], s[4:5], 0x54
+; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x44
; GFX11-SDAG-NEXT: s_load_b64 s[4:5], s[4:5], 0x24
; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-SDAG-NEXT: v_dual_mov_b32 v6, 0 :: v_dual_mov_b32 v1, s2
-; GFX11-SDAG-NEXT: v_dual_mov_b32 v4, s1 :: v_dual_mov_b32 v5, s7
-; GFX11-SDAG-NEXT: v_mov_b32_e32 v8, s6
-; GFX11-SDAG-NEXT: v_dual_mov_b32 v0, s3 :: v_dual_mov_b32 v7, s0
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-SDAG-NEXT: v_permlane64_b32 v2, v1
-; GFX11-SDAG-NEXT: v_permlane64_b32 v1, v4
-; GFX11-SDAG-NEXT: v_permlane64_b32 v5, v5
-; GFX11-SDAG-NEXT: v_permlane64_b32 v4, v8
-; GFX11-SDAG-NEXT: v_permlane64_b32 v3, v0
-; GFX11-SDAG-NEXT: v_permlane64_b32 v0, v7
+; GFX11-SDAG-NEXT: v_dual_mov_b32 v6, 0 :: v_dual_mov_b32 v5, s7
+; GFX11-SDAG-NEXT: v_dual_mov_b32 v4, s6 :: v_dual_mov_b32 v1, s1
+; GFX11-SDAG-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v3, s3
+; GFX11-SDAG-NEXT: v_mov_b32_e32 v2, s2
; GFX11-SDAG-NEXT: s_clause 0x1
; GFX11-SDAG-NEXT: global_store_b64 v6, v[4:5], s[4:5] offset:16
; GFX11-SDAG-NEXT: global_store_b128 v6, v[0:3], s[4:5]
@@ -53,10 +43,8 @@ define amdgpu_kernel void @test_p3(ptr addrspace(1) %out, ptr addrspace(3) %src0
; GFX11-SDAG-NEXT: s_load_b32 s2, s[4:5], 0x2c
; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-SDAG-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SDAG-NEXT: v_permlane64_b32 v0, v0
-; GFX11-SDAG-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-SDAG-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX11-SDAG-NEXT: s_endpgm
%v = call ptr addrspace(3) @llvm.amdgcn.permlane64.v3p0(ptr addrspace(3) %src0)
store ptr addrspace(3) %v, ptr addrspace(1) %out
@@ -70,14 +58,9 @@ define amdgpu_kernel void @test_v3p3(ptr addrspace(1) %out, <3 x ptr addrspace(3
; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x34
; GFX11-SDAG-NEXT: s_load_b64 s[4:5], s[4:5], 0x24
; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-SDAG-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v3, s0
-; GFX11-SDAG-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s1
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SDAG-NEXT: v_permlane64_b32 v2, v0
-; GFX11-SDAG-NEXT: v_permlane64_b32 v1, v1
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4)
-; GFX11-SDAG-NEXT: v_permlane64_b32 v0, v3
-; GFX11-SDAG-NEXT: global_store_b96 v4, v[0:2], s[4:5]
+; GFX11-SDAG-NEXT: v_dual_mov_b32 v3, 0 :: v_dual_mov_b32 v0, s0
+; GFX11-SDAG-NEXT: v_dual_mov_b32 v1, s1 :: v_dual_mov_b32 v2, s2
+; GFX11-SDAG-NEXT: global_store_b96 v3, v[0:2], s[4:5]
; GFX11-SDAG-NEXT: s_endpgm
%v = call <3 x ptr addrspace(3)> @llvm.amdgcn.permlane64.v3p3(<3 x ptr addrspace(3)> %src0)
store <3 x ptr addrspace(3)> %v, ptr addrspace(1) %out
@@ -91,10 +74,8 @@ define amdgpu_kernel void @test_p5(ptr addrspace(1) %out, ptr addrspace(5) %src0
; GFX11-SDAG-NEXT: s_load_b32 s2, s[4:5], 0x2c
; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-SDAG-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SDAG-NEXT: v_permlane64_b32 v0, v0
-; GFX11-SDAG-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-SDAG-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX11-SDAG-NEXT: s_endpgm
%v = call ptr addrspace(5) @llvm.amdgcn.permlane64.p5(ptr addrspace(5) %src0)
store ptr addrspace(5) %v, ptr addrspace(1) %out
@@ -108,14 +89,9 @@ define amdgpu_kernel void @test_v3p5(ptr addrspace(1) %out, <3 x ptr addrspace(5
; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x34
; GFX11-SDAG-NEXT: s_load_b64 s[4:5], s[4:5], 0x24
; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-SDAG-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v3, s0
-; GFX11-SDAG-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s1
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SDAG-NEXT: v_permlane64_b32 v2, v0
-; GFX11-SDAG-NEXT: v_permlane64_b32 v1, v1
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4)
-; GFX11-SDAG-NEXT: v_permlane64_b32 v0, v3
-; GFX11-SDAG-NEXT: global_store_b96 v4, v[0:2], s[4:5]
+; GFX11-SDAG-NEXT: v_dual_mov_b32 v3, 0 :: v_dual_mov_b32 v0, s0
+; GFX11-SDAG-NEXT: v_dual_mov_b32 v1, s1 :: v_dual_mov_b32 v2, s2
+; GFX11-SDAG-NEXT: global_store_b96 v3, v[0:2], s[4:5]
; GFX11-SDAG-NEXT: s_endpgm
%v = call <3 x ptr addrspace(5)> @llvm.amdgcn.permlane64.v3p5(<3 x ptr addrspace(5)> %src0)
store <3 x ptr addrspace(5)> %v, ptr addrspace(1) %out
@@ -129,10 +105,8 @@ define amdgpu_kernel void @test_p6(ptr addrspace(1) %out, ptr addrspace(6) %src0
; GFX11-SDAG-NEXT: s_load_b32 s2, s[4:5], 0x2c
; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-SDAG-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SDAG-NEXT: v_permlane64_b32 v0, v0
-; GFX11-SDAG-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-SDAG-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX11-SDAG-NEXT: s_endpgm
%v = call ptr addrspace(6) @llvm.amdgcn.permlane64.p6(ptr addrspace(6) %src0)
store ptr addrspace(6) %v, ptr addrspace(1) %out
@@ -146,14 +120,9 @@ define amdgpu_kernel void @test_v3p6(ptr addrspace(1) %out, <3 x ptr addrspace(6
; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x34
; GFX11-SDAG-NEXT: s_load_b64 s[4:5], s[4:5], 0x24
; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-SDAG-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v3, s0
-; GFX11-SDAG-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s1
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SDAG-NEXT: v_permlane64_b32 v2, v0
-; GFX11-SDAG-NEXT: v_permlane64_b32 v1, v1
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4)
-; GFX11-SDAG-NEXT: v_permlane64_b32 v0, v3
-; GFX11-SDAG-NEXT: global_store_b96 v4, v[0:2], s[4:5]
+; GFX11-SDAG-NEXT: v_dual_mov_b32 v3, 0 :: v_dual_mov_b32 v0, s0
+; GFX11-SDAG-NEXT: v_dual_mov_b32 v1, s1 :: v_dual_mov_b32 v2, s2
+; GFX11-SDAG-NEXT: global_store_b96 v3, v[0:2], s[4:5]
; GFX11-SDAG-NEXT: s_endpgm
%v = call <3 x ptr addrspace(6)> @llvm.amdgcn.permlane64.v3p6(<3 x ptr addrspace(6)> %src0)
store <3 x ptr addrspace(6)> %v, ptr addrspace(1) %out
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.readfirstlane.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.readfirstlane.ll
index d1ba892..02d2990 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.readfirstlane.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.readfirstlane.ll
@@ -396,8 +396,7 @@ define amdgpu_kernel void @test_readfirstlane_imm_f64(ptr addrspace(1) %out) {
;
; CHECK-GISEL-LABEL: test_readfirstlane_imm_f64:
; CHECK-GISEL: ; %bb.0:
-; CHECK-GISEL-NEXT: s_mov_b32 s0, 0
-; CHECK-GISEL-NEXT: s_mov_b32 s1, 0x40400000
+; CHECK-GISEL-NEXT: s_mov_b64 s[0:1], 0x4040000000000000
; CHECK-GISEL-NEXT: ;;#ASMSTART
; CHECK-GISEL-NEXT: ; use s[0:1]
; CHECK-GISEL-NEXT: ;;#ASMEND
@@ -456,14 +455,13 @@ define amdgpu_kernel void @test_readfirstlane_imm_fold_i64(ptr addrspace(1) %out
; CHECK-GISEL-LABEL: test_readfirstlane_imm_fold_i64:
; CHECK-GISEL: ; %bb.0:
; CHECK-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
-; CHECK-GISEL-NEXT: s_mov_b64 s[2:3], 32
; CHECK-GISEL-NEXT: s_add_i32 s12, s12, s17
-; CHECK-GISEL-NEXT: v_mov_b32_e32 v0, s2
+; CHECK-GISEL-NEXT: v_mov_b32_e32 v0, 32
; CHECK-GISEL-NEXT: s_mov_b32 flat_scratch_lo, s13
+; CHECK-GISEL-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
; CHECK-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-GISEL-NEXT: v_mov_b32_e32 v3, s1
-; CHECK-GISEL-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
-; CHECK-GISEL-NEXT: v_mov_b32_e32 v1, s3
+; CHECK-GISEL-NEXT: v_mov_b32_e32 v1, 0
; CHECK-GISEL-NEXT: v_mov_b32_e32 v2, s0
; CHECK-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; CHECK-GISEL-NEXT: s_endpgm
@@ -490,15 +488,13 @@ define amdgpu_kernel void @test_readfirstlane_imm_fold_f64(ptr addrspace(1) %out
; CHECK-GISEL-LABEL: test_readfirstlane_imm_fold_f64:
; CHECK-GISEL: ; %bb.0:
; CHECK-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
-; CHECK-GISEL-NEXT: s_mov_b32 s2, 0
; CHECK-GISEL-NEXT: s_add_i32 s12, s12, s17
-; CHECK-GISEL-NEXT: s_mov_b32 s3, 0x40400000
-; CHECK-GISEL-NEXT: v_mov_b32_e32 v0, s2
-; CHECK-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; CHECK-GISEL-NEXT: v_mov_b32_e32 v3, s1
+; CHECK-GISEL-NEXT: v_mov_b32_e32 v0, 0
; CHECK-GISEL-NEXT: s_mov_b32 flat_scratch_lo, s13
; CHECK-GISEL-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
-; CHECK-GISEL-NEXT: v_mov_b32_e32 v1, s3
+; CHECK-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-GISEL-NEXT: v_mov_b32_e32 v3, s1
+; CHECK-GISEL-NEXT: v_mov_b32_e32 v1, 0x40400000
; CHECK-GISEL-NEXT: v_mov_b32_e32 v2, s0
; CHECK-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; CHECK-GISEL-NEXT: s_endpgm
@@ -588,17 +584,17 @@ define amdgpu_kernel void @test_readfirstlane_copy_from_sgpr_i64(ptr addrspace(1
; CHECK-SDAG: ; %bb.0:
; CHECK-SDAG-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
; CHECK-SDAG-NEXT: s_add_i32 s12, s12, s17
-; CHECK-SDAG-NEXT: s_mov_b32 flat_scratch_lo, s13
-; CHECK-SDAG-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
; CHECK-SDAG-NEXT: ;;#ASMSTART
; CHECK-SDAG-NEXT: s_mov_b64 s[2:3], 0
; CHECK-SDAG-NEXT: ;;#ASMEND
+; CHECK-SDAG-NEXT: v_mov_b32_e32 v2, s2
+; CHECK-SDAG-NEXT: s_mov_b32 flat_scratch_lo, s13
; CHECK-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; CHECK-SDAG-NEXT: v_mov_b32_e32 v3, s1
-; CHECK-SDAG-NEXT: v_mov_b32_e32 v0, s2
-; CHECK-SDAG-NEXT: v_mov_b32_e32 v1, s3
-; CHECK-SDAG-NEXT: v_mov_b32_e32 v2, s0
-; CHECK-SDAG-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
+; CHECK-SDAG-NEXT: v_mov_b32_e32 v0, s0
+; CHECK-SDAG-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
+; CHECK-SDAG-NEXT: v_mov_b32_e32 v1, s1
+; CHECK-SDAG-NEXT: v_mov_b32_e32 v3, s3
+; CHECK-SDAG-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
; CHECK-SDAG-NEXT: s_endpgm
;
; CHECK-GISEL-LABEL: test_readfirstlane_copy_from_sgpr_i64:
@@ -628,17 +624,17 @@ define amdgpu_kernel void @test_readfirstlane_copy_from_sgpr_f64(ptr addrspace(1
; CHECK-SDAG: ; %bb.0:
; CHECK-SDAG-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
; CHECK-SDAG-NEXT: s_add_i32 s12, s12, s17
-; CHECK-SDAG-NEXT: s_mov_b32 flat_scratch_lo, s13
-; CHECK-SDAG-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
; CHECK-SDAG-NEXT: ;;#ASMSTART
; CHECK-SDAG-NEXT: s_mov_b64 s[2:3], 0
; CHECK-SDAG-NEXT: ;;#ASMEND
+; CHECK-SDAG-NEXT: v_mov_b32_e32 v2, s2
+; CHECK-SDAG-NEXT: s_mov_b32 flat_scratch_lo, s13
; CHECK-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; CHECK-SDAG-NEXT: v_mov_b32_e32 v3, s1
-; CHECK-SDAG-NEXT: v_mov_b32_e32 v0, s2
-; CHECK-SDAG-NEXT: v_mov_b32_e32 v1, s3
-; CHECK-SDAG-NEXT: v_mov_b32_e32 v2, s0
-; CHECK-SDAG-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
+; CHECK-SDAG-NEXT: v_mov_b32_e32 v0, s0
+; CHECK-SDAG-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
+; CHECK-SDAG-NEXT: v_mov_b32_e32 v1, s1
+; CHECK-SDAG-NEXT: v_mov_b32_e32 v3, s3
+; CHECK-SDAG-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
; CHECK-SDAG-NEXT: s_endpgm
;
; CHECK-GISEL-LABEL: test_readfirstlane_copy_from_sgpr_f64:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.readlane.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.readlane.ll
index 7ff5eb4..0795f40 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.readlane.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.readlane.ll
@@ -9,7 +9,7 @@ declare double @llvm.amdgcn.readlane.f64(double, i32) #0
define amdgpu_kernel void @test_readlane_sreg_sreg_i32(i32 %src0, i32 %src1) #1 {
; CHECK-SDAG-LABEL: test_readlane_sreg_sreg_i32:
; CHECK-SDAG: ; %bb.0:
-; CHECK-SDAG-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
+; CHECK-SDAG-NEXT: s_load_dword s0, s[8:9], 0x0
; CHECK-SDAG-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-SDAG-NEXT: ;;#ASMSTART
; CHECK-SDAG-NEXT: ; use s0
@@ -18,7 +18,7 @@ define amdgpu_kernel void @test_readlane_sreg_sreg_i32(i32 %src0, i32 %src1) #1
;
; CHECK-GISEL-LABEL: test_readlane_sreg_sreg_i32:
; CHECK-GISEL: ; %bb.0:
-; CHECK-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
+; CHECK-GISEL-NEXT: s_load_dword s0, s[8:9], 0x0
; CHECK-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-GISEL-NEXT: ;;#ASMSTART
; CHECK-GISEL-NEXT: ; use s0
@@ -224,14 +224,13 @@ define amdgpu_kernel void @test_readlane_imm_sreg_i64(ptr addrspace(1) %out, i32
; CHECK-GISEL-LABEL: test_readlane_imm_sreg_i64:
; CHECK-GISEL: ; %bb.0:
; CHECK-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
-; CHECK-GISEL-NEXT: s_mov_b64 s[2:3], 32
; CHECK-GISEL-NEXT: s_add_i32 s12, s12, s17
-; CHECK-GISEL-NEXT: v_mov_b32_e32 v0, s2
+; CHECK-GISEL-NEXT: v_mov_b32_e32 v0, 32
; CHECK-GISEL-NEXT: s_mov_b32 flat_scratch_lo, s13
+; CHECK-GISEL-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
; CHECK-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-GISEL-NEXT: v_mov_b32_e32 v3, s1
-; CHECK-GISEL-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
-; CHECK-GISEL-NEXT: v_mov_b32_e32 v1, s3
+; CHECK-GISEL-NEXT: v_mov_b32_e32 v1, 0
; CHECK-GISEL-NEXT: v_mov_b32_e32 v2, s0
; CHECK-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; CHECK-GISEL-NEXT: s_endpgm
@@ -258,15 +257,13 @@ define amdgpu_kernel void @test_readlane_imm_sreg_f64(ptr addrspace(1) %out, i32
; CHECK-GISEL-LABEL: test_readlane_imm_sreg_f64:
; CHECK-GISEL: ; %bb.0:
; CHECK-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
-; CHECK-GISEL-NEXT: s_mov_b32 s2, 0
; CHECK-GISEL-NEXT: s_add_i32 s12, s12, s17
-; CHECK-GISEL-NEXT: s_mov_b32 s3, 0x40400000
-; CHECK-GISEL-NEXT: v_mov_b32_e32 v0, s2
-; CHECK-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; CHECK-GISEL-NEXT: v_mov_b32_e32 v3, s1
+; CHECK-GISEL-NEXT: v_mov_b32_e32 v0, 0
; CHECK-GISEL-NEXT: s_mov_b32 flat_scratch_lo, s13
; CHECK-GISEL-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
-; CHECK-GISEL-NEXT: v_mov_b32_e32 v1, s3
+; CHECK-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-GISEL-NEXT: v_mov_b32_e32 v3, s1
+; CHECK-GISEL-NEXT: v_mov_b32_e32 v1, 0x40400000
; CHECK-GISEL-NEXT: v_mov_b32_e32 v2, s0
; CHECK-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; CHECK-GISEL-NEXT: s_endpgm
@@ -660,17 +657,17 @@ define amdgpu_kernel void @test_readlane_copy_from_sgpr_i64(ptr addrspace(1) %ou
; CHECK-SDAG: ; %bb.0:
; CHECK-SDAG-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
; CHECK-SDAG-NEXT: s_add_i32 s12, s12, s17
-; CHECK-SDAG-NEXT: s_mov_b32 flat_scratch_lo, s13
-; CHECK-SDAG-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
; CHECK-SDAG-NEXT: ;;#ASMSTART
; CHECK-SDAG-NEXT: s_mov_b64 s[2:3], 0
; CHECK-SDAG-NEXT: ;;#ASMEND
+; CHECK-SDAG-NEXT: v_mov_b32_e32 v2, s2
+; CHECK-SDAG-NEXT: s_mov_b32 flat_scratch_lo, s13
; CHECK-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; CHECK-SDAG-NEXT: v_mov_b32_e32 v3, s1
-; CHECK-SDAG-NEXT: v_mov_b32_e32 v0, s2
-; CHECK-SDAG-NEXT: v_mov_b32_e32 v1, s3
-; CHECK-SDAG-NEXT: v_mov_b32_e32 v2, s0
-; CHECK-SDAG-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
+; CHECK-SDAG-NEXT: v_mov_b32_e32 v0, s0
+; CHECK-SDAG-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
+; CHECK-SDAG-NEXT: v_mov_b32_e32 v1, s1
+; CHECK-SDAG-NEXT: v_mov_b32_e32 v3, s3
+; CHECK-SDAG-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
; CHECK-SDAG-NEXT: s_endpgm
;
; CHECK-GISEL-LABEL: test_readlane_copy_from_sgpr_i64:
@@ -700,17 +697,17 @@ define amdgpu_kernel void @test_readlane_copy_from_sgpr_f64(ptr addrspace(1) %ou
; CHECK-SDAG: ; %bb.0:
; CHECK-SDAG-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
; CHECK-SDAG-NEXT: s_add_i32 s12, s12, s17
-; CHECK-SDAG-NEXT: s_mov_b32 flat_scratch_lo, s13
-; CHECK-SDAG-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
; CHECK-SDAG-NEXT: ;;#ASMSTART
; CHECK-SDAG-NEXT: s_mov_b64 s[2:3], 0
; CHECK-SDAG-NEXT: ;;#ASMEND
+; CHECK-SDAG-NEXT: v_mov_b32_e32 v2, s2
+; CHECK-SDAG-NEXT: s_mov_b32 flat_scratch_lo, s13
; CHECK-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; CHECK-SDAG-NEXT: v_mov_b32_e32 v3, s1
-; CHECK-SDAG-NEXT: v_mov_b32_e32 v0, s2
-; CHECK-SDAG-NEXT: v_mov_b32_e32 v1, s3
-; CHECK-SDAG-NEXT: v_mov_b32_e32 v2, s0
-; CHECK-SDAG-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
+; CHECK-SDAG-NEXT: v_mov_b32_e32 v0, s0
+; CHECK-SDAG-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
+; CHECK-SDAG-NEXT: v_mov_b32_e32 v1, s1
+; CHECK-SDAG-NEXT: v_mov_b32_e32 v3, s3
+; CHECK-SDAG-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
; CHECK-SDAG-NEXT: s_endpgm
;
; CHECK-GISEL-LABEL: test_readlane_copy_from_sgpr_f64:
diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-array-to-vector.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-array-to-vector.ll
new file mode 100644
index 0000000..05a0e39
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-array-to-vector.ll
@@ -0,0 +1,325 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -S -mtriple=amdgcn-- -mcpu=gfx1100 -passes=amdgpu-promote-alloca < %s | FileCheck -check-prefix=OPT %s
+
+define amdgpu_kernel void @large_array_vectors_small_users(<16 x i8> %in, <16 x i8> %add, ptr addrspace(3) %out) #0 {
+; OPT-LABEL: define amdgpu_kernel void @large_array_vectors_small_users(
+; OPT-SAME: <16 x i8> [[IN:%.*]], <16 x i8> [[ADD:%.*]], ptr addrspace(3) [[OUT:%.*]]) #[[ATTR0:[0-9]+]] {
+; OPT-NEXT: [[ENTRY:.*:]]
+; OPT-NEXT: [[ALLOCA:%.*]] = freeze <128 x i8> poison
+; OPT-NEXT: [[TMP0:%.*]] = extractelement <16 x i8> [[IN]], i64 0
+; OPT-NEXT: [[TMP1:%.*]] = insertelement <128 x i8> [[ALLOCA]], i8 [[TMP0]], i32 0
+; OPT-NEXT: [[TMP2:%.*]] = extractelement <16 x i8> [[IN]], i64 1
+; OPT-NEXT: [[TMP3:%.*]] = insertelement <128 x i8> [[TMP1]], i8 [[TMP2]], i32 1
+; OPT-NEXT: [[TMP4:%.*]] = extractelement <16 x i8> [[IN]], i64 2
+; OPT-NEXT: [[TMP5:%.*]] = insertelement <128 x i8> [[TMP3]], i8 [[TMP4]], i32 2
+; OPT-NEXT: [[TMP6:%.*]] = extractelement <16 x i8> [[IN]], i64 3
+; OPT-NEXT: [[TMP7:%.*]] = insertelement <128 x i8> [[TMP5]], i8 [[TMP6]], i32 3
+; OPT-NEXT: [[TMP8:%.*]] = extractelement <16 x i8> [[IN]], i64 4
+; OPT-NEXT: [[TMP9:%.*]] = insertelement <128 x i8> [[TMP7]], i8 [[TMP8]], i32 4
+; OPT-NEXT: [[TMP10:%.*]] = extractelement <16 x i8> [[IN]], i64 5
+; OPT-NEXT: [[TMP11:%.*]] = insertelement <128 x i8> [[TMP9]], i8 [[TMP10]], i32 5
+; OPT-NEXT: [[TMP12:%.*]] = extractelement <16 x i8> [[IN]], i64 6
+; OPT-NEXT: [[TMP13:%.*]] = insertelement <128 x i8> [[TMP11]], i8 [[TMP12]], i32 6
+; OPT-NEXT: [[TMP14:%.*]] = extractelement <16 x i8> [[IN]], i64 7
+; OPT-NEXT: [[TMP15:%.*]] = insertelement <128 x i8> [[TMP13]], i8 [[TMP14]], i32 7
+; OPT-NEXT: [[TMP16:%.*]] = extractelement <16 x i8> [[IN]], i64 8
+; OPT-NEXT: [[TMP17:%.*]] = insertelement <128 x i8> [[TMP15]], i8 [[TMP16]], i32 8
+; OPT-NEXT: [[TMP18:%.*]] = extractelement <16 x i8> [[IN]], i64 9
+; OPT-NEXT: [[TMP19:%.*]] = insertelement <128 x i8> [[TMP17]], i8 [[TMP18]], i32 9
+; OPT-NEXT: [[TMP20:%.*]] = extractelement <16 x i8> [[IN]], i64 10
+; OPT-NEXT: [[TMP21:%.*]] = insertelement <128 x i8> [[TMP19]], i8 [[TMP20]], i32 10
+; OPT-NEXT: [[TMP22:%.*]] = extractelement <16 x i8> [[IN]], i64 11
+; OPT-NEXT: [[TMP23:%.*]] = insertelement <128 x i8> [[TMP21]], i8 [[TMP22]], i32 11
+; OPT-NEXT: [[TMP24:%.*]] = extractelement <16 x i8> [[IN]], i64 12
+; OPT-NEXT: [[TMP25:%.*]] = insertelement <128 x i8> [[TMP23]], i8 [[TMP24]], i32 12
+; OPT-NEXT: [[TMP26:%.*]] = extractelement <16 x i8> [[IN]], i64 13
+; OPT-NEXT: [[TMP27:%.*]] = insertelement <128 x i8> [[TMP25]], i8 [[TMP26]], i32 13
+; OPT-NEXT: [[TMP28:%.*]] = extractelement <16 x i8> [[IN]], i64 14
+; OPT-NEXT: [[TMP29:%.*]] = insertelement <128 x i8> [[TMP27]], i8 [[TMP28]], i32 14
+; OPT-NEXT: [[TMP30:%.*]] = extractelement <16 x i8> [[IN]], i64 15
+; OPT-NEXT: [[TMP31:%.*]] = insertelement <128 x i8> [[TMP29]], i8 [[TMP30]], i32 15
+; OPT-NEXT: [[TMP32:%.*]] = extractelement <16 x i8> [[IN]], i64 0
+; OPT-NEXT: [[TMP33:%.*]] = insertelement <128 x i8> [[TMP31]], i8 [[TMP32]], i32 0
+; OPT-NEXT: [[TMP34:%.*]] = extractelement <16 x i8> [[IN]], i64 1
+; OPT-NEXT: [[TMP35:%.*]] = insertelement <128 x i8> [[TMP33]], i8 [[TMP34]], i32 1
+; OPT-NEXT: [[TMP36:%.*]] = extractelement <16 x i8> [[IN]], i64 2
+; OPT-NEXT: [[TMP37:%.*]] = insertelement <128 x i8> [[TMP35]], i8 [[TMP36]], i32 2
+; OPT-NEXT: [[TMP38:%.*]] = extractelement <16 x i8> [[IN]], i64 3
+; OPT-NEXT: [[TMP39:%.*]] = insertelement <128 x i8> [[TMP37]], i8 [[TMP38]], i32 3
+; OPT-NEXT: [[TMP40:%.*]] = extractelement <16 x i8> [[IN]], i64 4
+; OPT-NEXT: [[TMP41:%.*]] = insertelement <128 x i8> [[TMP39]], i8 [[TMP40]], i32 4
+; OPT-NEXT: [[TMP42:%.*]] = extractelement <16 x i8> [[IN]], i64 5
+; OPT-NEXT: [[TMP43:%.*]] = insertelement <128 x i8> [[TMP41]], i8 [[TMP42]], i32 5
+; OPT-NEXT: [[TMP44:%.*]] = extractelement <16 x i8> [[IN]], i64 6
+; OPT-NEXT: [[TMP45:%.*]] = insertelement <128 x i8> [[TMP43]], i8 [[TMP44]], i32 6
+; OPT-NEXT: [[TMP46:%.*]] = extractelement <16 x i8> [[IN]], i64 7
+; OPT-NEXT: [[TMP47:%.*]] = insertelement <128 x i8> [[TMP45]], i8 [[TMP46]], i32 7
+; OPT-NEXT: [[TMP48:%.*]] = extractelement <16 x i8> [[IN]], i64 8
+; OPT-NEXT: [[TMP49:%.*]] = insertelement <128 x i8> [[TMP47]], i8 [[TMP48]], i32 8
+; OPT-NEXT: [[TMP50:%.*]] = extractelement <16 x i8> [[IN]], i64 9
+; OPT-NEXT: [[TMP51:%.*]] = insertelement <128 x i8> [[TMP49]], i8 [[TMP50]], i32 9
+; OPT-NEXT: [[TMP52:%.*]] = extractelement <16 x i8> [[IN]], i64 10
+; OPT-NEXT: [[TMP53:%.*]] = insertelement <128 x i8> [[TMP51]], i8 [[TMP52]], i32 10
+; OPT-NEXT: [[TMP54:%.*]] = extractelement <16 x i8> [[IN]], i64 11
+; OPT-NEXT: [[TMP55:%.*]] = insertelement <128 x i8> [[TMP53]], i8 [[TMP54]], i32 11
+; OPT-NEXT: [[TMP56:%.*]] = extractelement <16 x i8> [[IN]], i64 12
+; OPT-NEXT: [[TMP57:%.*]] = insertelement <128 x i8> [[TMP55]], i8 [[TMP56]], i32 12
+; OPT-NEXT: [[TMP58:%.*]] = extractelement <16 x i8> [[IN]], i64 13
+; OPT-NEXT: [[TMP59:%.*]] = insertelement <128 x i8> [[TMP57]], i8 [[TMP58]], i32 13
+; OPT-NEXT: [[TMP60:%.*]] = extractelement <16 x i8> [[IN]], i64 14
+; OPT-NEXT: [[TMP61:%.*]] = insertelement <128 x i8> [[TMP59]], i8 [[TMP60]], i32 14
+; OPT-NEXT: [[TMP62:%.*]] = extractelement <16 x i8> [[IN]], i64 15
+; OPT-NEXT: [[TMP63:%.*]] = insertelement <128 x i8> [[TMP61]], i8 [[TMP62]], i32 15
+; OPT-NEXT: [[TMP64:%.*]] = extractelement <16 x i8> [[IN]], i64 0
+; OPT-NEXT: [[TMP65:%.*]] = insertelement <128 x i8> [[TMP63]], i8 [[TMP64]], i32 0
+; OPT-NEXT: [[TMP66:%.*]] = extractelement <16 x i8> [[IN]], i64 1
+; OPT-NEXT: [[TMP67:%.*]] = insertelement <128 x i8> [[TMP65]], i8 [[TMP66]], i32 1
+; OPT-NEXT: [[TMP68:%.*]] = extractelement <16 x i8> [[IN]], i64 2
+; OPT-NEXT: [[TMP69:%.*]] = insertelement <128 x i8> [[TMP67]], i8 [[TMP68]], i32 2
+; OPT-NEXT: [[TMP70:%.*]] = extractelement <16 x i8> [[IN]], i64 3
+; OPT-NEXT: [[TMP71:%.*]] = insertelement <128 x i8> [[TMP69]], i8 [[TMP70]], i32 3
+; OPT-NEXT: [[TMP72:%.*]] = extractelement <16 x i8> [[IN]], i64 4
+; OPT-NEXT: [[TMP73:%.*]] = insertelement <128 x i8> [[TMP71]], i8 [[TMP72]], i32 4
+; OPT-NEXT: [[TMP74:%.*]] = extractelement <16 x i8> [[IN]], i64 5
+; OPT-NEXT: [[TMP75:%.*]] = insertelement <128 x i8> [[TMP73]], i8 [[TMP74]], i32 5
+; OPT-NEXT: [[TMP76:%.*]] = extractelement <16 x i8> [[IN]], i64 6
+; OPT-NEXT: [[TMP77:%.*]] = insertelement <128 x i8> [[TMP75]], i8 [[TMP76]], i32 6
+; OPT-NEXT: [[TMP78:%.*]] = extractelement <16 x i8> [[IN]], i64 7
+; OPT-NEXT: [[TMP79:%.*]] = insertelement <128 x i8> [[TMP77]], i8 [[TMP78]], i32 7
+; OPT-NEXT: [[TMP80:%.*]] = extractelement <16 x i8> [[IN]], i64 8
+; OPT-NEXT: [[TMP81:%.*]] = insertelement <128 x i8> [[TMP79]], i8 [[TMP80]], i32 8
+; OPT-NEXT: [[TMP82:%.*]] = extractelement <16 x i8> [[IN]], i64 9
+; OPT-NEXT: [[TMP83:%.*]] = insertelement <128 x i8> [[TMP81]], i8 [[TMP82]], i32 9
+; OPT-NEXT: [[TMP84:%.*]] = extractelement <16 x i8> [[IN]], i64 10
+; OPT-NEXT: [[TMP85:%.*]] = insertelement <128 x i8> [[TMP83]], i8 [[TMP84]], i32 10
+; OPT-NEXT: [[TMP86:%.*]] = extractelement <16 x i8> [[IN]], i64 11
+; OPT-NEXT: [[TMP87:%.*]] = insertelement <128 x i8> [[TMP85]], i8 [[TMP86]], i32 11
+; OPT-NEXT: [[TMP88:%.*]] = extractelement <16 x i8> [[IN]], i64 12
+; OPT-NEXT: [[TMP89:%.*]] = insertelement <128 x i8> [[TMP87]], i8 [[TMP88]], i32 12
+; OPT-NEXT: [[TMP90:%.*]] = extractelement <16 x i8> [[IN]], i64 13
+; OPT-NEXT: [[TMP91:%.*]] = insertelement <128 x i8> [[TMP89]], i8 [[TMP90]], i32 13
+; OPT-NEXT: [[TMP92:%.*]] = extractelement <16 x i8> [[IN]], i64 14
+; OPT-NEXT: [[TMP93:%.*]] = insertelement <128 x i8> [[TMP91]], i8 [[TMP92]], i32 14
+; OPT-NEXT: [[TMP94:%.*]] = extractelement <16 x i8> [[IN]], i64 15
+; OPT-NEXT: [[TMP95:%.*]] = insertelement <128 x i8> [[TMP93]], i8 [[TMP94]], i32 15
+; OPT-NEXT: [[TMP96:%.*]] = extractelement <16 x i8> [[IN]], i64 0
+; OPT-NEXT: [[TMP97:%.*]] = insertelement <128 x i8> [[TMP95]], i8 [[TMP96]], i32 0
+; OPT-NEXT: [[TMP98:%.*]] = extractelement <16 x i8> [[IN]], i64 1
+; OPT-NEXT: [[TMP99:%.*]] = insertelement <128 x i8> [[TMP97]], i8 [[TMP98]], i32 1
+; OPT-NEXT: [[TMP100:%.*]] = extractelement <16 x i8> [[IN]], i64 2
+; OPT-NEXT: [[TMP101:%.*]] = insertelement <128 x i8> [[TMP99]], i8 [[TMP100]], i32 2
+; OPT-NEXT: [[TMP102:%.*]] = extractelement <16 x i8> [[IN]], i64 3
+; OPT-NEXT: [[TMP103:%.*]] = insertelement <128 x i8> [[TMP101]], i8 [[TMP102]], i32 3
+; OPT-NEXT: [[TMP104:%.*]] = extractelement <16 x i8> [[IN]], i64 4
+; OPT-NEXT: [[TMP105:%.*]] = insertelement <128 x i8> [[TMP103]], i8 [[TMP104]], i32 4
+; OPT-NEXT: [[TMP106:%.*]] = extractelement <16 x i8> [[IN]], i64 5
+; OPT-NEXT: [[TMP107:%.*]] = insertelement <128 x i8> [[TMP105]], i8 [[TMP106]], i32 5
+; OPT-NEXT: [[TMP108:%.*]] = extractelement <16 x i8> [[IN]], i64 6
+; OPT-NEXT: [[TMP109:%.*]] = insertelement <128 x i8> [[TMP107]], i8 [[TMP108]], i32 6
+; OPT-NEXT: [[TMP110:%.*]] = extractelement <16 x i8> [[IN]], i64 7
+; OPT-NEXT: [[TMP111:%.*]] = insertelement <128 x i8> [[TMP109]], i8 [[TMP110]], i32 7
+; OPT-NEXT: [[TMP112:%.*]] = extractelement <16 x i8> [[IN]], i64 8
+; OPT-NEXT: [[TMP113:%.*]] = insertelement <128 x i8> [[TMP111]], i8 [[TMP112]], i32 8
+; OPT-NEXT: [[TMP114:%.*]] = extractelement <16 x i8> [[IN]], i64 9
+; OPT-NEXT: [[TMP115:%.*]] = insertelement <128 x i8> [[TMP113]], i8 [[TMP114]], i32 9
+; OPT-NEXT: [[TMP116:%.*]] = extractelement <16 x i8> [[IN]], i64 10
+; OPT-NEXT: [[TMP117:%.*]] = insertelement <128 x i8> [[TMP115]], i8 [[TMP116]], i32 10
+; OPT-NEXT: [[TMP118:%.*]] = extractelement <16 x i8> [[IN]], i64 11
+; OPT-NEXT: [[TMP119:%.*]] = insertelement <128 x i8> [[TMP117]], i8 [[TMP118]], i32 11
+; OPT-NEXT: [[TMP120:%.*]] = extractelement <16 x i8> [[IN]], i64 12
+; OPT-NEXT: [[TMP121:%.*]] = insertelement <128 x i8> [[TMP119]], i8 [[TMP120]], i32 12
+; OPT-NEXT: [[TMP122:%.*]] = extractelement <16 x i8> [[IN]], i64 13
+; OPT-NEXT: [[TMP123:%.*]] = insertelement <128 x i8> [[TMP121]], i8 [[TMP122]], i32 13
+; OPT-NEXT: [[TMP124:%.*]] = extractelement <16 x i8> [[IN]], i64 14
+; OPT-NEXT: [[TMP125:%.*]] = insertelement <128 x i8> [[TMP123]], i8 [[TMP124]], i32 14
+; OPT-NEXT: [[TMP126:%.*]] = extractelement <16 x i8> [[IN]], i64 15
+; OPT-NEXT: [[TMP127:%.*]] = insertelement <128 x i8> [[TMP125]], i8 [[TMP126]], i32 15
+; OPT-NEXT: [[TMP128:%.*]] = extractelement <16 x i8> [[IN]], i64 0
+; OPT-NEXT: [[TMP129:%.*]] = insertelement <128 x i8> [[TMP127]], i8 [[TMP128]], i32 0
+; OPT-NEXT: [[TMP130:%.*]] = extractelement <16 x i8> [[IN]], i64 1
+; OPT-NEXT: [[TMP131:%.*]] = insertelement <128 x i8> [[TMP129]], i8 [[TMP130]], i32 1
+; OPT-NEXT: [[TMP132:%.*]] = extractelement <16 x i8> [[IN]], i64 2
+; OPT-NEXT: [[TMP133:%.*]] = insertelement <128 x i8> [[TMP131]], i8 [[TMP132]], i32 2
+; OPT-NEXT: [[TMP134:%.*]] = extractelement <16 x i8> [[IN]], i64 3
+; OPT-NEXT: [[TMP135:%.*]] = insertelement <128 x i8> [[TMP133]], i8 [[TMP134]], i32 3
+; OPT-NEXT: [[TMP136:%.*]] = extractelement <16 x i8> [[IN]], i64 4
+; OPT-NEXT: [[TMP137:%.*]] = insertelement <128 x i8> [[TMP135]], i8 [[TMP136]], i32 4
+; OPT-NEXT: [[TMP138:%.*]] = extractelement <16 x i8> [[IN]], i64 5
+; OPT-NEXT: [[TMP139:%.*]] = insertelement <128 x i8> [[TMP137]], i8 [[TMP138]], i32 5
+; OPT-NEXT: [[TMP140:%.*]] = extractelement <16 x i8> [[IN]], i64 6
+; OPT-NEXT: [[TMP141:%.*]] = insertelement <128 x i8> [[TMP139]], i8 [[TMP140]], i32 6
+; OPT-NEXT: [[TMP142:%.*]] = extractelement <16 x i8> [[IN]], i64 7
+; OPT-NEXT: [[TMP143:%.*]] = insertelement <128 x i8> [[TMP141]], i8 [[TMP142]], i32 7
+; OPT-NEXT: [[TMP144:%.*]] = extractelement <16 x i8> [[IN]], i64 8
+; OPT-NEXT: [[TMP145:%.*]] = insertelement <128 x i8> [[TMP143]], i8 [[TMP144]], i32 8
+; OPT-NEXT: [[TMP146:%.*]] = extractelement <16 x i8> [[IN]], i64 9
+; OPT-NEXT: [[TMP147:%.*]] = insertelement <128 x i8> [[TMP145]], i8 [[TMP146]], i32 9
+; OPT-NEXT: [[TMP148:%.*]] = extractelement <16 x i8> [[IN]], i64 10
+; OPT-NEXT: [[TMP149:%.*]] = insertelement <128 x i8> [[TMP147]], i8 [[TMP148]], i32 10
+; OPT-NEXT: [[TMP150:%.*]] = extractelement <16 x i8> [[IN]], i64 11
+; OPT-NEXT: [[TMP151:%.*]] = insertelement <128 x i8> [[TMP149]], i8 [[TMP150]], i32 11
+; OPT-NEXT: [[TMP152:%.*]] = extractelement <16 x i8> [[IN]], i64 12
+; OPT-NEXT: [[TMP153:%.*]] = insertelement <128 x i8> [[TMP151]], i8 [[TMP152]], i32 12
+; OPT-NEXT: [[TMP154:%.*]] = extractelement <16 x i8> [[IN]], i64 13
+; OPT-NEXT: [[TMP155:%.*]] = insertelement <128 x i8> [[TMP153]], i8 [[TMP154]], i32 13
+; OPT-NEXT: [[TMP156:%.*]] = extractelement <16 x i8> [[IN]], i64 14
+; OPT-NEXT: [[TMP157:%.*]] = insertelement <128 x i8> [[TMP155]], i8 [[TMP156]], i32 14
+; OPT-NEXT: [[TMP158:%.*]] = extractelement <16 x i8> [[IN]], i64 15
+; OPT-NEXT: [[TMP159:%.*]] = insertelement <128 x i8> [[TMP157]], i8 [[TMP158]], i32 15
+; OPT-NEXT: [[TMP160:%.*]] = extractelement <16 x i8> [[IN]], i64 0
+; OPT-NEXT: [[TMP161:%.*]] = insertelement <128 x i8> [[TMP159]], i8 [[TMP160]], i32 0
+; OPT-NEXT: [[TMP162:%.*]] = extractelement <16 x i8> [[IN]], i64 1
+; OPT-NEXT: [[TMP163:%.*]] = insertelement <128 x i8> [[TMP161]], i8 [[TMP162]], i32 1
+; OPT-NEXT: [[TMP164:%.*]] = extractelement <16 x i8> [[IN]], i64 2
+; OPT-NEXT: [[TMP165:%.*]] = insertelement <128 x i8> [[TMP163]], i8 [[TMP164]], i32 2
+; OPT-NEXT: [[TMP166:%.*]] = extractelement <16 x i8> [[IN]], i64 3
+; OPT-NEXT: [[TMP167:%.*]] = insertelement <128 x i8> [[TMP165]], i8 [[TMP166]], i32 3
+; OPT-NEXT: [[TMP168:%.*]] = extractelement <16 x i8> [[IN]], i64 4
+; OPT-NEXT: [[TMP169:%.*]] = insertelement <128 x i8> [[TMP167]], i8 [[TMP168]], i32 4
+; OPT-NEXT: [[TMP170:%.*]] = extractelement <16 x i8> [[IN]], i64 5
+; OPT-NEXT: [[TMP171:%.*]] = insertelement <128 x i8> [[TMP169]], i8 [[TMP170]], i32 5
+; OPT-NEXT: [[TMP172:%.*]] = extractelement <16 x i8> [[IN]], i64 6
+; OPT-NEXT: [[TMP173:%.*]] = insertelement <128 x i8> [[TMP171]], i8 [[TMP172]], i32 6
+; OPT-NEXT: [[TMP174:%.*]] = extractelement <16 x i8> [[IN]], i64 7
+; OPT-NEXT: [[TMP175:%.*]] = insertelement <128 x i8> [[TMP173]], i8 [[TMP174]], i32 7
+; OPT-NEXT: [[TMP176:%.*]] = extractelement <16 x i8> [[IN]], i64 8
+; OPT-NEXT: [[TMP177:%.*]] = insertelement <128 x i8> [[TMP175]], i8 [[TMP176]], i32 8
+; OPT-NEXT: [[TMP178:%.*]] = extractelement <16 x i8> [[IN]], i64 9
+; OPT-NEXT: [[TMP179:%.*]] = insertelement <128 x i8> [[TMP177]], i8 [[TMP178]], i32 9
+; OPT-NEXT: [[TMP180:%.*]] = extractelement <16 x i8> [[IN]], i64 10
+; OPT-NEXT: [[TMP181:%.*]] = insertelement <128 x i8> [[TMP179]], i8 [[TMP180]], i32 10
+; OPT-NEXT: [[TMP182:%.*]] = extractelement <16 x i8> [[IN]], i64 11
+; OPT-NEXT: [[TMP183:%.*]] = insertelement <128 x i8> [[TMP181]], i8 [[TMP182]], i32 11
+; OPT-NEXT: [[TMP184:%.*]] = extractelement <16 x i8> [[IN]], i64 12
+; OPT-NEXT: [[TMP185:%.*]] = insertelement <128 x i8> [[TMP183]], i8 [[TMP184]], i32 12
+; OPT-NEXT: [[TMP186:%.*]] = extractelement <16 x i8> [[IN]], i64 13
+; OPT-NEXT: [[TMP187:%.*]] = insertelement <128 x i8> [[TMP185]], i8 [[TMP186]], i32 13
+; OPT-NEXT: [[TMP188:%.*]] = extractelement <16 x i8> [[IN]], i64 14
+; OPT-NEXT: [[TMP189:%.*]] = insertelement <128 x i8> [[TMP187]], i8 [[TMP188]], i32 14
+; OPT-NEXT: [[TMP190:%.*]] = extractelement <16 x i8> [[IN]], i64 15
+; OPT-NEXT: [[TMP191:%.*]] = insertelement <128 x i8> [[TMP189]], i8 [[TMP190]], i32 15
+; OPT-NEXT: [[TMP192:%.*]] = extractelement <16 x i8> [[IN]], i64 0
+; OPT-NEXT: [[TMP193:%.*]] = insertelement <128 x i8> [[TMP191]], i8 [[TMP192]], i32 0
+; OPT-NEXT: [[TMP194:%.*]] = extractelement <16 x i8> [[IN]], i64 1
+; OPT-NEXT: [[TMP195:%.*]] = insertelement <128 x i8> [[TMP193]], i8 [[TMP194]], i32 1
+; OPT-NEXT: [[TMP196:%.*]] = extractelement <16 x i8> [[IN]], i64 2
+; OPT-NEXT: [[TMP197:%.*]] = insertelement <128 x i8> [[TMP195]], i8 [[TMP196]], i32 2
+; OPT-NEXT: [[TMP198:%.*]] = extractelement <16 x i8> [[IN]], i64 3
+; OPT-NEXT: [[TMP199:%.*]] = insertelement <128 x i8> [[TMP197]], i8 [[TMP198]], i32 3
+; OPT-NEXT: [[TMP200:%.*]] = extractelement <16 x i8> [[IN]], i64 4
+; OPT-NEXT: [[TMP201:%.*]] = insertelement <128 x i8> [[TMP199]], i8 [[TMP200]], i32 4
+; OPT-NEXT: [[TMP202:%.*]] = extractelement <16 x i8> [[IN]], i64 5
+; OPT-NEXT: [[TMP203:%.*]] = insertelement <128 x i8> [[TMP201]], i8 [[TMP202]], i32 5
+; OPT-NEXT: [[TMP204:%.*]] = extractelement <16 x i8> [[IN]], i64 6
+; OPT-NEXT: [[TMP205:%.*]] = insertelement <128 x i8> [[TMP203]], i8 [[TMP204]], i32 6
+; OPT-NEXT: [[TMP206:%.*]] = extractelement <16 x i8> [[IN]], i64 7
+; OPT-NEXT: [[TMP207:%.*]] = insertelement <128 x i8> [[TMP205]], i8 [[TMP206]], i32 7
+; OPT-NEXT: [[TMP208:%.*]] = extractelement <16 x i8> [[IN]], i64 8
+; OPT-NEXT: [[TMP209:%.*]] = insertelement <128 x i8> [[TMP207]], i8 [[TMP208]], i32 8
+; OPT-NEXT: [[TMP210:%.*]] = extractelement <16 x i8> [[IN]], i64 9
+; OPT-NEXT: [[TMP211:%.*]] = insertelement <128 x i8> [[TMP209]], i8 [[TMP210]], i32 9
+; OPT-NEXT: [[TMP212:%.*]] = extractelement <16 x i8> [[IN]], i64 10
+; OPT-NEXT: [[TMP213:%.*]] = insertelement <128 x i8> [[TMP211]], i8 [[TMP212]], i32 10
+; OPT-NEXT: [[TMP214:%.*]] = extractelement <16 x i8> [[IN]], i64 11
+; OPT-NEXT: [[TMP215:%.*]] = insertelement <128 x i8> [[TMP213]], i8 [[TMP214]], i32 11
+; OPT-NEXT: [[TMP216:%.*]] = extractelement <16 x i8> [[IN]], i64 12
+; OPT-NEXT: [[TMP217:%.*]] = insertelement <128 x i8> [[TMP215]], i8 [[TMP216]], i32 12
+; OPT-NEXT: [[TMP218:%.*]] = extractelement <16 x i8> [[IN]], i64 13
+; OPT-NEXT: [[TMP219:%.*]] = insertelement <128 x i8> [[TMP217]], i8 [[TMP218]], i32 13
+; OPT-NEXT: [[TMP220:%.*]] = extractelement <16 x i8> [[IN]], i64 14
+; OPT-NEXT: [[TMP221:%.*]] = insertelement <128 x i8> [[TMP219]], i8 [[TMP220]], i32 14
+; OPT-NEXT: [[TMP222:%.*]] = extractelement <16 x i8> [[IN]], i64 15
+; OPT-NEXT: [[TMP223:%.*]] = insertelement <128 x i8> [[TMP221]], i8 [[TMP222]], i32 15
+; OPT-NEXT: [[TMP224:%.*]] = extractelement <16 x i8> [[IN]], i64 0
+; OPT-NEXT: [[TMP225:%.*]] = insertelement <128 x i8> [[TMP223]], i8 [[TMP224]], i32 0
+; OPT-NEXT: [[TMP226:%.*]] = extractelement <16 x i8> [[IN]], i64 1
+; OPT-NEXT: [[TMP227:%.*]] = insertelement <128 x i8> [[TMP225]], i8 [[TMP226]], i32 1
+; OPT-NEXT: [[TMP228:%.*]] = extractelement <16 x i8> [[IN]], i64 2
+; OPT-NEXT: [[TMP229:%.*]] = insertelement <128 x i8> [[TMP227]], i8 [[TMP228]], i32 2
+; OPT-NEXT: [[TMP230:%.*]] = extractelement <16 x i8> [[IN]], i64 3
+; OPT-NEXT: [[TMP231:%.*]] = insertelement <128 x i8> [[TMP229]], i8 [[TMP230]], i32 3
+; OPT-NEXT: [[TMP232:%.*]] = extractelement <16 x i8> [[IN]], i64 4
+; OPT-NEXT: [[TMP233:%.*]] = insertelement <128 x i8> [[TMP231]], i8 [[TMP232]], i32 4
+; OPT-NEXT: [[TMP234:%.*]] = extractelement <16 x i8> [[IN]], i64 5
+; OPT-NEXT: [[TMP235:%.*]] = insertelement <128 x i8> [[TMP233]], i8 [[TMP234]], i32 5
+; OPT-NEXT: [[TMP236:%.*]] = extractelement <16 x i8> [[IN]], i64 6
+; OPT-NEXT: [[TMP237:%.*]] = insertelement <128 x i8> [[TMP235]], i8 [[TMP236]], i32 6
+; OPT-NEXT: [[TMP238:%.*]] = extractelement <16 x i8> [[IN]], i64 7
+; OPT-NEXT: [[TMP239:%.*]] = insertelement <128 x i8> [[TMP237]], i8 [[TMP238]], i32 7
+; OPT-NEXT: [[TMP240:%.*]] = extractelement <16 x i8> [[IN]], i64 8
+; OPT-NEXT: [[TMP241:%.*]] = insertelement <128 x i8> [[TMP239]], i8 [[TMP240]], i32 8
+; OPT-NEXT: [[TMP242:%.*]] = extractelement <16 x i8> [[IN]], i64 9
+; OPT-NEXT: [[TMP243:%.*]] = insertelement <128 x i8> [[TMP241]], i8 [[TMP242]], i32 9
+; OPT-NEXT: [[TMP244:%.*]] = extractelement <16 x i8> [[IN]], i64 10
+; OPT-NEXT: [[TMP245:%.*]] = insertelement <128 x i8> [[TMP243]], i8 [[TMP244]], i32 10
+; OPT-NEXT: [[TMP246:%.*]] = extractelement <16 x i8> [[IN]], i64 11
+; OPT-NEXT: [[TMP247:%.*]] = insertelement <128 x i8> [[TMP245]], i8 [[TMP246]], i32 11
+; OPT-NEXT: [[TMP248:%.*]] = extractelement <16 x i8> [[IN]], i64 12
+; OPT-NEXT: [[TMP249:%.*]] = insertelement <128 x i8> [[TMP247]], i8 [[TMP248]], i32 12
+; OPT-NEXT: [[TMP250:%.*]] = extractelement <16 x i8> [[IN]], i64 13
+; OPT-NEXT: [[TMP251:%.*]] = insertelement <128 x i8> [[TMP249]], i8 [[TMP250]], i32 13
+; OPT-NEXT: [[TMP252:%.*]] = extractelement <16 x i8> [[IN]], i64 14
+; OPT-NEXT: [[TMP253:%.*]] = insertelement <128 x i8> [[TMP251]], i8 [[TMP252]], i32 14
+; OPT-NEXT: [[TMP254:%.*]] = extractelement <16 x i8> [[IN]], i64 15
+; OPT-NEXT: [[TMP255:%.*]] = insertelement <128 x i8> [[TMP253]], i8 [[TMP254]], i32 15
+; OPT-NEXT: [[TMP256:%.*]] = extractelement <128 x i8> [[TMP255]], i32 80
+; OPT-NEXT: [[TMP257:%.*]] = insertelement <16 x i8> poison, i8 [[TMP256]], i64 0
+; OPT-NEXT: [[TMP258:%.*]] = extractelement <128 x i8> [[TMP255]], i32 81
+; OPT-NEXT: [[TMP259:%.*]] = insertelement <16 x i8> [[TMP257]], i8 [[TMP258]], i64 1
+; OPT-NEXT: [[TMP260:%.*]] = extractelement <128 x i8> [[TMP255]], i32 82
+; OPT-NEXT: [[TMP261:%.*]] = insertelement <16 x i8> [[TMP259]], i8 [[TMP260]], i64 2
+; OPT-NEXT: [[TMP262:%.*]] = extractelement <128 x i8> [[TMP255]], i32 83
+; OPT-NEXT: [[TMP263:%.*]] = insertelement <16 x i8> [[TMP261]], i8 [[TMP262]], i64 3
+; OPT-NEXT: [[TMP264:%.*]] = extractelement <128 x i8> [[TMP255]], i32 84
+; OPT-NEXT: [[TMP265:%.*]] = insertelement <16 x i8> [[TMP263]], i8 [[TMP264]], i64 4
+; OPT-NEXT: [[TMP266:%.*]] = extractelement <128 x i8> [[TMP255]], i32 85
+; OPT-NEXT: [[TMP267:%.*]] = insertelement <16 x i8> [[TMP265]], i8 [[TMP266]], i64 5
+; OPT-NEXT: [[TMP268:%.*]] = extractelement <128 x i8> [[TMP255]], i32 86
+; OPT-NEXT: [[TMP269:%.*]] = insertelement <16 x i8> [[TMP267]], i8 [[TMP268]], i64 6
+; OPT-NEXT: [[TMP270:%.*]] = extractelement <128 x i8> [[TMP255]], i32 87
+; OPT-NEXT: [[TMP271:%.*]] = insertelement <16 x i8> [[TMP269]], i8 [[TMP270]], i64 7
+; OPT-NEXT: [[TMP272:%.*]] = extractelement <128 x i8> [[TMP255]], i32 88
+; OPT-NEXT: [[TMP273:%.*]] = insertelement <16 x i8> [[TMP271]], i8 [[TMP272]], i64 8
+; OPT-NEXT: [[TMP274:%.*]] = extractelement <128 x i8> [[TMP255]], i32 89
+; OPT-NEXT: [[TMP275:%.*]] = insertelement <16 x i8> [[TMP273]], i8 [[TMP274]], i64 9
+; OPT-NEXT: [[TMP276:%.*]] = extractelement <128 x i8> [[TMP255]], i32 90
+; OPT-NEXT: [[TMP277:%.*]] = insertelement <16 x i8> [[TMP275]], i8 [[TMP276]], i64 10
+; OPT-NEXT: [[TMP278:%.*]] = extractelement <128 x i8> [[TMP255]], i32 91
+; OPT-NEXT: [[TMP279:%.*]] = insertelement <16 x i8> [[TMP277]], i8 [[TMP278]], i64 11
+; OPT-NEXT: [[TMP280:%.*]] = extractelement <128 x i8> [[TMP255]], i32 92
+; OPT-NEXT: [[TMP281:%.*]] = insertelement <16 x i8> [[TMP279]], i8 [[TMP280]], i64 12
+; OPT-NEXT: [[TMP282:%.*]] = extractelement <128 x i8> [[TMP255]], i32 93
+; OPT-NEXT: [[TMP283:%.*]] = insertelement <16 x i8> [[TMP281]], i8 [[TMP282]], i64 13
+; OPT-NEXT: [[TMP284:%.*]] = extractelement <128 x i8> [[TMP255]], i32 94
+; OPT-NEXT: [[TMP285:%.*]] = insertelement <16 x i8> [[TMP283]], i8 [[TMP284]], i64 14
+; OPT-NEXT: [[TMP286:%.*]] = extractelement <128 x i8> [[TMP255]], i32 95
+; OPT-NEXT: [[TMP287:%.*]] = insertelement <16 x i8> [[TMP285]], i8 [[TMP286]], i64 15
+; OPT-NEXT: [[SUM:%.*]] = add <16 x i8> [[TMP287]], [[ADD]]
+; OPT-NEXT: store <16 x i8> [[SUM]], ptr addrspace(3) [[OUT]], align 16
+; OPT-NEXT: ret void
+;
+entry:
+ %alloca = alloca [8 x <16 x i8>], align 16, addrspace(5)
+ %gep0 = getelementptr [8 x <16 x i8>], ptr addrspace(5) %alloca, i64 0, i64 0
+ store <16 x i8> %in, ptr addrspace(5) %gep0, align 16
+ %gep1 = getelementptr [8 x <16 x i8>], ptr addrspace(5) %alloca, i64 0, i64 1
+ store <16 x i8> %in, ptr addrspace(5) %gep0, align 16
+ %gep2 = getelementptr [8 x <16 x i8>], ptr addrspace(5) %alloca, i64 0, i64 2
+ store <16 x i8> %in, ptr addrspace(5) %gep0, align 16
+ %gep3 = getelementptr [8 x <16 x i8>], ptr addrspace(5) %alloca, i64 0, i64 3
+ store <16 x i8> %in, ptr addrspace(5) %gep0, align 16
+ %gep4 = getelementptr [8 x <16 x i8>], ptr addrspace(5) %alloca, i64 0, i64 4
+ store <16 x i8> %in, ptr addrspace(5) %gep0, align 16
+ %gep5 = getelementptr [8 x <16 x i8>], ptr addrspace(5) %alloca, i64 0, i64 5
+ store <16 x i8> %in, ptr addrspace(5) %gep0, align 16
+ %gep6 = getelementptr [8 x <16 x i8>], ptr addrspace(5) %alloca, i64 0, i64 6
+ store <16 x i8> %in, ptr addrspace(5) %gep0, align 16
+ %gep7 = getelementptr [8 x <16 x i8>], ptr addrspace(5) %alloca, i64 0, i64 7
+ store <16 x i8> %in, ptr addrspace(5) %gep0, align 16
+ %load = load <16 x i8>, ptr addrspace(5) %gep5, align 16
+ %sum = add <16 x i8> %load, %add
+ store <16 x i8> %sum, ptr addrspace(3) %out, align 16
+ ret void
+}
+
+attributes #0 = {"amdgpu-waves-per-eu"="2,2"}
diff --git a/llvm/test/CodeGen/AMDGPU/readcyclecounter.ll b/llvm/test/CodeGen/AMDGPU/readcyclecounter.ll
index f67cbe3..ddb522a8 100644
--- a/llvm/test/CodeGen/AMDGPU/readcyclecounter.ll
+++ b/llvm/test/CodeGen/AMDGPU/readcyclecounter.ll
@@ -1,17 +1,17 @@
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefix=MEMTIME -check-prefix=SIVI -check-prefix=GCN %s
; -global-isel=1 SI run line skipped since store not yet implemented.
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=MEMTIME -check-prefix=SIVI -check-prefix=GCN %s
-; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=MEMTIME -check-prefix=SIVI -check-prefix=GCN %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=MEMTIME -check-prefix=SIVI -check-prefix=GCN %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefix=MEMTIME -check-prefix=GCN %s
-; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefix=MEMTIME -check-prefix=GCN %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefix=MEMTIME -check-prefix=GCN %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1030 < %s | FileCheck -check-prefixes=MEMTIME -check-prefix=GCN %s
-; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1030 < %s | FileCheck -check-prefixes=MEMTIME -check-prefix=GCN %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1030 < %s | FileCheck -check-prefixes=MEMTIME -check-prefix=GCN %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefixes=GETREG,GETREG-SDAG -check-prefix=GCN %s
-; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefixes=GETREG,GETREG-GISEL -check-prefix=GCN %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefixes=GETREG,GETREG-GISEL -check-prefix=GCN %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GCN,GFX12 %s
-; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GCN,GFX12 %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GCN,GFX12 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GCN,GFX1250 %s
-; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GCN,GFX1250 %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GCN,GFX1250 %s
declare i64 @llvm.readcyclecounter() #0
diff --git a/llvm/test/CodeGen/AMDGPU/schedule-amdgpu-trackers.ll b/llvm/test/CodeGen/AMDGPU/schedule-amdgpu-trackers.ll
index c573253..48ed5c4 100644
--- a/llvm/test/CodeGen/AMDGPU/schedule-amdgpu-trackers.ll
+++ b/llvm/test/CodeGen/AMDGPU/schedule-amdgpu-trackers.ll
@@ -73,10 +73,10 @@ define amdgpu_kernel void @constant_zextload_v64i16_to_v64i32(ptr addrspace(1) %
}
; CHECK-LABEL: {{^}}excess_soft_clause_reg_pressure:
-; GFX908: NumSgprs: 64
-; GFX908-GCNTRACKERS: NumSgprs: 64
+; GFX908: NumSgprs: 56
+; GFX908-GCNTRACKERS: NumSgprs: 56
; GFX908: NumVgprs: 43
-; GFX908-GCNTRACKERS: NumVgprs: 39
+; GFX908-GCNTRACKERS: NumVgprs: 40
; GFX908: Occupancy: 5
; GFX908-GCNTRACKERS: Occupancy: 6
diff --git a/llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr-update-regscavenger.ll b/llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr-update-regscavenger.ll
index 586579f..ef96944 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr-update-regscavenger.ll
+++ b/llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr-update-regscavenger.ll
@@ -20,38 +20,33 @@ define void @test() {
; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1
; CHECK-NEXT: .LBB0_3: ; %bb.3
; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1
-; CHECK-NEXT: ; implicit-def: $sgpr4
-; CHECK-NEXT: v_mov_b32_e32 v0, s4
-; CHECK-NEXT: v_readfirstlane_b32 s6, v0
; CHECK-NEXT: s_mov_b64 s[4:5], -1
-; CHECK-NEXT: s_mov_b32 s7, 0
-; CHECK-NEXT: s_cmp_eq_u32 s6, s7
; CHECK-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane
; CHECK-NEXT: v_writelane_b32 v1, s4, 0
; CHECK-NEXT: v_writelane_b32 v1, s5, 1
-; CHECK-NEXT: s_mov_b64 s[10:11], exec
-; CHECK-NEXT: s_mov_b64 exec, -1
+; CHECK-NEXT: s_or_saveexec_b64 s[8:9], -1
+; CHECK-NEXT: s_nop 0
; CHECK-NEXT: v_accvgpr_write_b32 a0, v1 ; Reload Reuse
-; CHECK-NEXT: s_mov_b64 exec, s[10:11]
+; CHECK-NEXT: s_mov_b64 exec, s[8:9]
; CHECK-NEXT: s_cbranch_scc1 .LBB0_5
; CHECK-NEXT: ; %bb.4: ; %bb.4
; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1
-; CHECK-NEXT: s_or_saveexec_b64 s[10:11], -1
+; CHECK-NEXT: s_or_saveexec_b64 s[8:9], -1
; CHECK-NEXT: v_accvgpr_read_b32 v1, a0 ; Reload Reuse
-; CHECK-NEXT: s_mov_b64 exec, s[10:11]
+; CHECK-NEXT: s_mov_b64 exec, s[8:9]
; CHECK-NEXT: s_mov_b64 s[4:5], 0
; CHECK-NEXT: v_writelane_b32 v1, s4, 0
; CHECK-NEXT: v_writelane_b32 v1, s5, 1
-; CHECK-NEXT: s_or_saveexec_b64 s[10:11], -1
+; CHECK-NEXT: s_or_saveexec_b64 s[8:9], -1
; CHECK-NEXT: s_nop 0
; CHECK-NEXT: v_accvgpr_write_b32 a0, v1 ; Reload Reuse
-; CHECK-NEXT: s_mov_b64 exec, s[10:11]
+; CHECK-NEXT: s_mov_b64 exec, s[8:9]
; CHECK-NEXT: .LBB0_5: ; %Flow
; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1
-; CHECK-NEXT: s_or_saveexec_b64 s[10:11], -1
+; CHECK-NEXT: s_or_saveexec_b64 s[8:9], -1
; CHECK-NEXT: s_nop 0
; CHECK-NEXT: v_accvgpr_read_b32 v1, a0 ; Reload Reuse
-; CHECK-NEXT: s_mov_b64 exec, s[10:11]
+; CHECK-NEXT: s_mov_b64 exec, s[8:9]
; CHECK-NEXT: v_readlane_b32 s4, v1, 0
; CHECK-NEXT: v_readlane_b32 s5, v1, 1
; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
diff --git a/llvm/test/CodeGen/AMDGPU/splitkit-getsubrangeformask.ll b/llvm/test/CodeGen/AMDGPU/splitkit-getsubrangeformask.ll
index 5aafb0f..364598f 100644
--- a/llvm/test/CodeGen/AMDGPU/splitkit-getsubrangeformask.ll
+++ b/llvm/test/CodeGen/AMDGPU/splitkit-getsubrangeformask.ll
@@ -31,8 +31,8 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x
; CHECK-NEXT: [[COPY13:%[0-9]+]]:sgpr_32 = COPY $sgpr10
; CHECK-NEXT: [[COPY14:%[0-9]+]]:sgpr_32 = COPY $sgpr8
; CHECK-NEXT: undef [[S_LOAD_DWORDX2_IMM:%[0-9]+]].sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM [[COPY]], 232, 0 :: (invariant load (s64) from %ir.39, addrspace 4)
- ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %125:sgpr_128, 0, 0 :: (dereferenceable invariant load (s32))
- ; CHECK-NEXT: KILL undef %125:sgpr_128
+ ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %117:sgpr_128, 0, 0 :: (dereferenceable invariant load (s32))
+ ; CHECK-NEXT: KILL undef %117:sgpr_128
; CHECK-NEXT: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY5]], 4, implicit-def dead $scc
; CHECK-NEXT: [[S_LSHL_B32_1:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY4]], 4, implicit-def dead $scc
; CHECK-NEXT: [[S_LSHL_B32_2:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY3]], 4, implicit-def dead $scc
@@ -44,87 +44,85 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x
; CHECK-NEXT: [[S_SUB_I32_1:%[0-9]+]]:sreg_32 = S_SUB_I32 [[S_BUFFER_LOAD_DWORD_IMM]], 30, implicit-def dead $scc
; CHECK-NEXT: undef [[S_ADD_U32_:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY6]], [[S_LSHL_B32_2]], implicit-def $scc
; CHECK-NEXT: [[S_ADD_U32_:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %54:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc
- ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_]], 16, 0 :: (invariant load (s128) from %ir.81, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_]], 16, 0 :: (invariant load (s128) from %ir.71, addrspace 4)
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM1:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM undef %74:sreg_64, 0, 0 :: (invariant load (s128) from `ptr addrspace(4) poison`, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM2:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_]], 64, 0 :: (invariant load (s128) from %ir.88, addrspace 4)
; CHECK-NEXT: KILL undef %74:sreg_64
; CHECK-NEXT: KILL [[S_ADD_U32_]].sub0, [[S_ADD_U32_]].sub1
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM1:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[S_LOAD_DWORDX4_IMM]], 0, 0 :: (dereferenceable invariant load (s32))
; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; CHECK-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub1:sgpr_128 = S_MOV_B32 0
- ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET undef %118:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
- ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], undef %89:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
+ ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET undef %112:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
+ ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], undef %87:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN1:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
- ; CHECK-NEXT: KILL undef %89:sgpr_128
- ; CHECK-NEXT: KILL undef %118:sgpr_128
+ ; CHECK-NEXT: KILL undef %112:sgpr_128
+ ; CHECK-NEXT: KILL undef %87:sgpr_128
; CHECK-NEXT: [[S_SUB_I32_2:%[0-9]+]]:sreg_32 = S_SUB_I32 [[S_BUFFER_LOAD_DWORD_IMM1]], 31, implicit-def dead $scc
; CHECK-NEXT: undef [[S_ADD_U32_1:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY6]], [[S_LSHL_B32_]], implicit-def $scc
; CHECK-NEXT: [[S_ADD_U32_1:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %54:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc
; CHECK-NEXT: undef [[S_ADD_U32_2:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY6]], [[S_LSHL_B32_1]], implicit-def $scc
; CHECK-NEXT: [[S_ADD_U32_2:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %54:sreg_32, [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc
- ; CHECK-NEXT: undef [[S_ADD_U32_3:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY6]], [[S_LSHL_B32_2]], implicit-def $scc
- ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM2:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_1]], 64, 0 :: (invariant load (s128) from %ir.87, addrspace 4)
- ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM3:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_2]], 64, 0 :: (invariant load (s128) from %ir.93, addrspace 4)
- ; CHECK-NEXT: KILL [[S_ADD_U32_1]].sub0, [[S_ADD_U32_1]].sub1
+ ; CHECK-NEXT: [[S_ASHR_I32_3:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 undef %148:sreg_32, 31, implicit-def dead $scc
+ ; CHECK-NEXT: undef [[S_ADD_U32_3:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY6]], undef %148:sreg_32, implicit-def $scc
+ ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM3:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_1]], 64, 0 :: (invariant load (s128) from %ir.77, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM4:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_2]], 64, 0 :: (invariant load (s128) from %ir.83, addrspace 4)
; CHECK-NEXT: KILL [[S_ADD_U32_2]].sub0, [[S_ADD_U32_2]].sub1
- ; CHECK-NEXT: [[S_ADD_U32_3:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %54:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc
- ; CHECK-NEXT: [[S_ASHR_I32_3:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 undef %169:sreg_32, 31, implicit-def dead $scc
- ; CHECK-NEXT: undef [[S_ADD_U32_4:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY6]], undef %169:sreg_32, implicit-def $scc
- ; CHECK-NEXT: [[S_ADD_U32_4:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %54:sreg_32, [[S_ASHR_I32_3]], implicit-def dead $scc, implicit $scc
- ; CHECK-NEXT: undef [[S_ADD_U32_5:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY7]].sub0, [[S_LSHL_B32_]], implicit-def $scc
- ; CHECK-NEXT: [[S_ADD_U32_5:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %51:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc
- ; CHECK-NEXT: undef [[S_ADD_U32_6:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY7]].sub0, [[S_LSHL_B32_1]], implicit-def $scc
- ; CHECK-NEXT: [[S_ADD_U32_6:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %51:sreg_32, [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc
- ; CHECK-NEXT: undef [[S_ADD_U32_7:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY7]].sub0, undef %169:sreg_32, implicit-def $scc
- ; CHECK-NEXT: [[S_ADD_U32_7:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %51:sreg_32, [[S_ASHR_I32_3]], implicit-def dead $scc, implicit $scc
- ; CHECK-NEXT: undef [[S_ADD_U32_8:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY7]].sub0, [[S_LSHL_B32_2]], implicit-def $scc
- ; CHECK-NEXT: [[S_ADD_U32_8:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %51:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc
- ; CHECK-NEXT: undef [[S_ADD_U32_9:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY8]], [[S_LSHL_B32_]], implicit-def $scc
- ; CHECK-NEXT: [[S_ADD_U32_9:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %48:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc
- ; CHECK-NEXT: undef [[S_ADD_U32_10:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY9]], [[S_LSHL_B32_1]], implicit-def $scc
- ; CHECK-NEXT: [[S_ADD_U32_10:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %45:sreg_32, [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc
- ; CHECK-NEXT: undef [[S_ADD_U32_11:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY9]], [[S_LSHL_B32_2]], implicit-def $scc
- ; CHECK-NEXT: [[S_ADD_U32_11:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %45:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc
+ ; CHECK-NEXT: KILL [[S_ADD_U32_1]].sub0, [[S_ADD_U32_1]].sub1
+ ; CHECK-NEXT: [[S_ADD_U32_3:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %54:sreg_32, [[S_ASHR_I32_3]], implicit-def dead $scc, implicit $scc
+ ; CHECK-NEXT: undef [[S_ADD_U32_4:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY7]].sub0, [[S_LSHL_B32_]], implicit-def $scc
+ ; CHECK-NEXT: [[S_ADD_U32_4:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %51:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc
+ ; CHECK-NEXT: undef [[S_ADD_U32_5:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY7]].sub0, [[S_LSHL_B32_1]], implicit-def $scc
+ ; CHECK-NEXT: [[S_ADD_U32_5:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %51:sreg_32, [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc
+ ; CHECK-NEXT: undef [[S_ADD_U32_6:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY7]].sub0, undef %148:sreg_32, implicit-def $scc
+ ; CHECK-NEXT: [[S_ADD_U32_6:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %51:sreg_32, [[S_ASHR_I32_3]], implicit-def dead $scc, implicit $scc
+ ; CHECK-NEXT: undef [[S_ADD_U32_7:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY7]].sub0, [[S_LSHL_B32_2]], implicit-def $scc
+ ; CHECK-NEXT: [[S_ADD_U32_7:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %51:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc
+ ; CHECK-NEXT: undef [[S_ADD_U32_8:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY8]], [[S_LSHL_B32_]], implicit-def $scc
+ ; CHECK-NEXT: [[S_ADD_U32_8:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %48:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc
+ ; CHECK-NEXT: undef [[S_ADD_U32_9:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY9]], [[S_LSHL_B32_1]], implicit-def $scc
+ ; CHECK-NEXT: [[S_ADD_U32_9:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %45:sreg_32, [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc
+ ; CHECK-NEXT: undef [[S_ADD_U32_10:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY9]], [[S_LSHL_B32_2]], implicit-def $scc
+ ; CHECK-NEXT: [[S_ADD_U32_10:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %45:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc
; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_LSHL_B32_]], 16, implicit-def dead $scc
; CHECK-NEXT: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_LSHL_B32_2]], 16, implicit-def dead $scc
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM [[S_MOV_B32_]], [[S_ADD_I32_]], 0, 0 :: (dereferenceable invariant load (s32))
- ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM1:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM [[S_MOV_B32_]], undef %302:sreg_32, 0, 0 :: (dereferenceable invariant load (s32))
+ ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM1:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM [[S_MOV_B32_]], undef %279:sreg_32, 0, 0 :: (dereferenceable invariant load (s32))
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM2:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM [[S_MOV_B32_]], [[S_ADD_I32_1]], 0, 0 :: (dereferenceable invariant load (s32))
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM2:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[S_MOV_B32_]], 16, 0 :: (dereferenceable invariant load (s32))
- ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM3:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM undef %357:sgpr_128, undef %358:sreg_32, 0, 0 :: (dereferenceable invariant load (s32))
- ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM3:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %368:sgpr_128, 16, 0 :: (dereferenceable invariant load (s32))
- ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM4:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_3]], 64, 0 :: (invariant load (s128) from %ir.99, addrspace 4)
- ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM5:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_4]], 64, 0 :: (invariant load (s128) from %ir.107, addrspace 4)
- ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM6:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_5]], 0, 0 :: (invariant load (s128) from %ir.112, addrspace 4)
- ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM7:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_6]], 0, 0 :: (invariant load (s128) from %ir.117, addrspace 4)
- ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM8:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_7]], 0, 0 :: (invariant load (s128) from %ir.124, addrspace 4)
- ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN2:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM2]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
- ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM4:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM undef %352:sgpr_128, [[S_ADD_I32_]], 0, 0 :: (dereferenceable invariant load (s32))
- ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM5:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM undef %363:sgpr_128, [[S_ADD_I32_1]], 0, 0 :: (dereferenceable invariant load (s32))
- ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN3:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM3]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
+ ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM3:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM undef %334:sgpr_128, undef %335:sreg_32, 0, 0 :: (dereferenceable invariant load (s32))
+ ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM3:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %345:sgpr_128, 16, 0 :: (dereferenceable invariant load (s32))
+ ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM5:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_3]], 64, 0 :: (invariant load (s128) from %ir.95, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM6:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_4]], 0, 0 :: (invariant load (s128) from %ir.100, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM7:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_5]], 0, 0 :: (invariant load (s128) from %ir.105, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM8:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_6]], 0, 0 :: (invariant load (s128) from %ir.112, addrspace 4)
+ ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM4:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM undef %329:sgpr_128, [[S_ADD_I32_]], 0, 0 :: (dereferenceable invariant load (s32))
+ ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM5:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM undef %340:sgpr_128, [[S_ADD_I32_1]], 0, 0 :: (dereferenceable invariant load (s32))
+ ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN2:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM3]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
+ ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN3:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
; CHECK-NEXT: [[S_ADD_I32_2:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM]], -98, implicit-def dead $scc
; CHECK-NEXT: [[S_ADD_I32_3:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM1]], -114, implicit-def dead $scc
; CHECK-NEXT: [[S_ADD_I32_4:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM2]], -130, implicit-def dead $scc
; CHECK-NEXT: [[S_ADD_I32_5:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM2]], -178, implicit-def dead $scc
- ; CHECK-NEXT: undef [[S_ADD_U32_12:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY10]], [[S_LSHL_B32_]], implicit-def $scc
- ; CHECK-NEXT: [[S_ADD_U32_12:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %42:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc
- ; CHECK-NEXT: undef [[S_ADD_U32_13:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY11]], [[S_LSHL_B32_]], implicit-def $scc
- ; CHECK-NEXT: [[S_ADD_U32_13:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %39:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc
- ; CHECK-NEXT: undef [[S_ADD_U32_14:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY11]], [[S_LSHL_B32_1]], implicit-def $scc
- ; CHECK-NEXT: [[S_ADD_U32_14:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %39:sreg_32, [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc
- ; CHECK-NEXT: undef [[S_ADD_U32_15:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY11]], [[S_LSHL_B32_2]], implicit-def $scc
- ; CHECK-NEXT: [[S_ADD_U32_15:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %39:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc
+ ; CHECK-NEXT: undef [[S_ADD_U32_11:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY10]], [[S_LSHL_B32_]], implicit-def $scc
+ ; CHECK-NEXT: [[S_ADD_U32_11:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %42:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc
+ ; CHECK-NEXT: undef [[S_ADD_U32_12:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY11]], [[S_LSHL_B32_]], implicit-def $scc
+ ; CHECK-NEXT: [[S_ADD_U32_12:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %39:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc
+ ; CHECK-NEXT: undef [[S_ADD_U32_13:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY11]], [[S_LSHL_B32_1]], implicit-def $scc
+ ; CHECK-NEXT: [[S_ADD_U32_13:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %39:sreg_32, [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc
+ ; CHECK-NEXT: undef [[S_ADD_U32_14:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY11]], [[S_LSHL_B32_2]], implicit-def $scc
+ ; CHECK-NEXT: [[S_ADD_U32_14:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %39:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc
; CHECK-NEXT: [[S_LSHL_B32_3:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY12]], 4, implicit-def dead $scc
- ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN4:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
+ ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN4:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM2]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
; CHECK-NEXT: [[S_ADD_I32_6:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_LSHL_B32_3]], 16, implicit-def dead $scc
- ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM6:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM undef %384:sgpr_128, [[S_ADD_I32_6]], 0, 0 :: (dereferenceable invariant load (s32))
+ ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM6:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM undef %361:sgpr_128, [[S_ADD_I32_6]], 0, 0 :: (dereferenceable invariant load (s32))
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN5:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
- ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM9:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_5]], 224, 0 :: (invariant load (s128) from %ir.129, addrspace 4)
- ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM10:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY7]], 224, 0 :: (invariant load (s128) from %ir.145, addrspace 4)
- ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM11:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_5]], 576, 0 :: (invariant load (s128) from %ir.150, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM9:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_4]], 224, 0 :: (invariant load (s128) from %ir.117, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM10:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY7]], 224, 0 :: (invariant load (s128) from %ir.133, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM11:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_4]], 576, 0 :: (invariant load (s128) from %ir.138, addrspace 4)
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN6:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
- ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM12:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_6]], 224, 0 :: (invariant load (s128) from %ir.134, addrspace 4)
- ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM13:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_7]], 576, 0 :: (invariant load (s128) from %ir.162, addrspace 4)
- ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM14:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_8]], 224, 0 :: (invariant load (s128) from %ir.140, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM12:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_5]], 224, 0 :: (invariant load (s128) from %ir.122, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM13:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_6]], 576, 0 :: (invariant load (s128) from %ir.150, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM14:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_7]], 224, 0 :: (invariant load (s128) from %ir.128, addrspace 4)
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN7:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN8:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM8]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
; CHECK-NEXT: [[S_ADD_I32_7:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM4]], -217, implicit-def dead $scc
@@ -135,49 +133,49 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x
; CHECK-NEXT: [[S_ADD_I32_12:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM3]], -329, implicit-def dead $scc
; CHECK-NEXT: [[S_ADD_I32_13:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM3]], -345, implicit-def dead $scc
; CHECK-NEXT: [[S_ADD_I32_14:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM6]], -441, implicit-def dead $scc
- ; CHECK-NEXT: undef [[S_ADD_U32_16:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY2]], [[S_LSHL_B32_2]], implicit-def $scc
- ; CHECK-NEXT: [[S_ADD_U32_16:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %36:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc
+ ; CHECK-NEXT: undef [[S_ADD_U32_15:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY2]], [[S_LSHL_B32_2]], implicit-def $scc
+ ; CHECK-NEXT: [[S_ADD_U32_15:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %36:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc
; CHECK-NEXT: [[S_LSHL_B32_4:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY13]], 4, implicit-def dead $scc
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN9:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM9]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
; CHECK-NEXT: [[S_ASHR_I32_4:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_4]], 31, implicit-def dead $scc
- ; CHECK-NEXT: undef [[S_ADD_U32_17:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY2]], [[S_LSHL_B32_4]], implicit-def $scc
- ; CHECK-NEXT: [[S_ADD_U32_17:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %36:sreg_32, [[S_ASHR_I32_4]], implicit-def dead $scc, implicit $scc
+ ; CHECK-NEXT: undef [[S_ADD_U32_16:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY2]], [[S_LSHL_B32_4]], implicit-def $scc
+ ; CHECK-NEXT: [[S_ADD_U32_16:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %36:sreg_32, [[S_ASHR_I32_4]], implicit-def dead $scc, implicit $scc
; CHECK-NEXT: [[S_LSHL_B32_5:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY5]], 3, implicit-def dead $scc
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN10:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM12]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
; CHECK-NEXT: [[S_ASHR_I32_5:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_5]], 31, implicit-def dead $scc
- ; CHECK-NEXT: undef [[S_ADD_U32_18:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY]].sub0, [[S_LSHL_B32_5]], implicit-def $scc
- ; CHECK-NEXT: [[S_ADD_U32_18:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %57:sreg_32, [[S_ASHR_I32_5]], implicit-def dead $scc, implicit $scc
- ; CHECK-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[S_ADD_U32_18]], 168, 0 :: (invariant load (s32) from %ir.273, align 8, addrspace 4)
- ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM15:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_8]], 576, 0 :: (invariant load (s128) from %ir.157, addrspace 4)
+ ; CHECK-NEXT: undef [[S_ADD_U32_17:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY]].sub0, [[S_LSHL_B32_5]], implicit-def $scc
+ ; CHECK-NEXT: [[S_ADD_U32_17:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %57:sreg_32, [[S_ASHR_I32_5]], implicit-def dead $scc, implicit $scc
+ ; CHECK-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[S_ADD_U32_17]], 168, 0 :: (invariant load (s32) from %ir.260, align 8, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM15:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_7]], 576, 0 :: (invariant load (s128) from %ir.145, addrspace 4)
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN11:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM14]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN12:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM10]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN13:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM11]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]].sub3:sgpr_128 = S_MOV_B32 553734060
; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]].sub2:sgpr_128 = S_MOV_B32 -1
; CHECK-NEXT: [[COPY15:%[0-9]+]]:sgpr_128 = COPY [[S_LOAD_DWORDX2_IMM]]
- ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM16:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_9]], 0, 0 :: (invariant load (s128) from %ir.170, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM16:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_8]], 0, 0 :: (invariant load (s128) from %ir.158, addrspace 4)
; CHECK-NEXT: [[COPY15:%[0-9]+]].sub1:sgpr_128 = COPY [[S_MOV_B32_]].sub1
; CHECK-NEXT: [[COPY15:%[0-9]+]].sub0:sgpr_128 = COPY [[S_LOAD_DWORD_IMM]]
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM4:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[COPY15]], 0, 0 :: (dereferenceable invariant load (s32))
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN14:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM15]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN15:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM13]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
- ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM17:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_10]], 0, 0 :: (invariant load (s128) from %ir.178, addrspace 4)
- ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM18:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_11]], 0, 0 :: (invariant load (s128) from %ir.183, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM17:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_9]], 0, 0 :: (invariant load (s128) from %ir.166, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM18:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_10]], 0, 0 :: (invariant load (s128) from %ir.171, addrspace 4)
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN16:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM16]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
; CHECK-NEXT: [[S_LSHL_B32_6:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY4]], 3, implicit-def dead $scc
; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFSET1:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[S_LOAD_DWORDX4_IMM1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
; CHECK-NEXT: [[S_ASHR_I32_6:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_6]], 31, implicit-def dead $scc
; CHECK-NEXT: [[S_ADD_I32_15:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM4]], -467, implicit-def dead $scc
- ; CHECK-NEXT: undef [[S_ADD_U32_19:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY]].sub0, [[S_LSHL_B32_6]], implicit-def $scc
- ; CHECK-NEXT: [[S_ADD_U32_19:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %57:sreg_32, [[S_ASHR_I32_6]], implicit-def dead $scc, implicit $scc
- ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM1:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[S_ADD_U32_19]], 168, 0 :: (invariant load (s64) from %ir.282, addrspace 4)
+ ; CHECK-NEXT: undef [[S_ADD_U32_18:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY]].sub0, [[S_LSHL_B32_6]], implicit-def $scc
+ ; CHECK-NEXT: [[S_ADD_U32_18:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %57:sreg_32, [[S_ASHR_I32_6]], implicit-def dead $scc, implicit $scc
+ ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM1:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[S_ADD_U32_18]], 168, 0 :: (invariant load (s64) from %ir.269, addrspace 4)
; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFSET2:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[S_LOAD_DWORDX4_IMM17]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFSET3:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[S_LOAD_DWORDX4_IMM18]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
- ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM19:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_12]], 0, 0 :: (invariant load (s128) from %ir.205, addrspace 4)
- ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM20:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_13]], 0, 0 :: (invariant load (s128) from %ir.211, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM19:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_11]], 0, 0 :: (invariant load (s128) from %ir.193, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM20:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_12]], 0, 0 :: (invariant load (s128) from %ir.199, addrspace 4)
; CHECK-NEXT: [[COPY16:%[0-9]+]]:sgpr_128 = COPY [[S_LOAD_DWORDX2_IMM]]
- ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM21:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_14]], 0, 0 :: (invariant load (s128) from %ir.216, addrspace 4)
- ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM22:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_15]], 0, 0 :: (invariant load (s128) from %ir.221, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM21:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_13]], 0, 0 :: (invariant load (s128) from %ir.204, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM22:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_14]], 0, 0 :: (invariant load (s128) from %ir.209, addrspace 4)
; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_LOAD_DWORDX2_IMM1]].sub1, 65535, implicit-def dead $scc
; CHECK-NEXT: [[COPY16:%[0-9]+]].sub0:sgpr_128 = COPY [[S_LOAD_DWORDX2_IMM1]].sub0
; CHECK-NEXT: [[COPY16:%[0-9]+]].sub1:sgpr_128 = COPY [[S_AND_B32_]]
@@ -189,30 +187,30 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN20:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM22]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
; CHECK-NEXT: [[S_ASHR_I32_7:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_7]], 31, implicit-def dead $scc
; CHECK-NEXT: [[S_ADD_I32_16:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM5]], -468, implicit-def dead $scc
- ; CHECK-NEXT: undef [[S_ADD_U32_20:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY]].sub0, [[S_LSHL_B32_7]], implicit-def $scc
- ; CHECK-NEXT: [[S_ADD_U32_20:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %57:sreg_32, [[S_ASHR_I32_7]], implicit-def dead $scc, implicit $scc
- ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM2:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[S_ADD_U32_20]], 168, 0 :: (invariant load (s64) from %ir.293, addrspace 4)
+ ; CHECK-NEXT: undef [[S_ADD_U32_19:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY]].sub0, [[S_LSHL_B32_7]], implicit-def $scc
+ ; CHECK-NEXT: [[S_ADD_U32_19:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %57:sreg_32, [[S_ASHR_I32_7]], implicit-def dead $scc, implicit $scc
+ ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM2:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[S_ADD_U32_19]], 168, 0 :: (invariant load (s64) from %ir.280, addrspace 4)
; CHECK-NEXT: [[COPY17:%[0-9]+]]:sgpr_128 = COPY [[S_LOAD_DWORDX2_IMM]]
; CHECK-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_LOAD_DWORDX2_IMM2]].sub1, 65535, implicit-def dead $scc
; CHECK-NEXT: [[COPY17:%[0-9]+]].sub0:sgpr_128 = COPY [[S_LOAD_DWORDX2_IMM2]].sub0
; CHECK-NEXT: [[COPY17:%[0-9]+]].sub1:sgpr_128 = COPY [[S_AND_B32_1]]
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM6:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[COPY17]], 0, 0 :: (dereferenceable invariant load (s32))
- ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM23:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_16]], 160, 0 :: (invariant load (s128) from %ir.256, addrspace 4)
- ; CHECK-NEXT: [[S_LOAD_DWORD_IMM1:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef %470:sreg_64, 0, 0 :: (invariant load (s32) from `ptr addrspace(4) poison`, addrspace 4)
- ; CHECK-NEXT: KILL [[S_ADD_U32_16]].sub0, [[S_ADD_U32_16]].sub1
- ; CHECK-NEXT: KILL undef %470:sreg_64
+ ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM23:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_15]], 160, 0 :: (invariant load (s128) from %ir.244, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORD_IMM1:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef %443:sreg_64, 0, 0 :: (invariant load (s32) from `ptr addrspace(4) poison`, addrspace 4)
+ ; CHECK-NEXT: KILL [[S_ADD_U32_15]].sub0, [[S_ADD_U32_15]].sub1
; CHECK-NEXT: KILL [[COPY17]].sub0_sub1_sub2, [[COPY17]].sub3
+ ; CHECK-NEXT: KILL undef %443:sreg_64
; CHECK-NEXT: [[S_LSHL_B32_8:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY14]], 3, implicit-def dead $scc
- ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM24:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_17]], 160, 0 :: (invariant load (s128) from %ir.265, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM24:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_16]], 160, 0 :: (invariant load (s128) from %ir.252, addrspace 4)
; CHECK-NEXT: [[S_ASHR_I32_8:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_8]], 31, implicit-def dead $scc
; CHECK-NEXT: [[S_ADD_I32_17:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM6]], -469, implicit-def dead $scc
- ; CHECK-NEXT: undef [[S_ADD_U32_21:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY]].sub0, [[S_LSHL_B32_8]], implicit-def $scc
- ; CHECK-NEXT: [[S_ADD_U32_21:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %57:sreg_32, [[S_ASHR_I32_8]], implicit-def dead $scc, implicit $scc
- ; CHECK-NEXT: [[S_LOAD_DWORD_IMM2:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[S_ADD_U32_21]], 168, 0 :: (invariant load (s32) from %ir.305, align 8, addrspace 4)
+ ; CHECK-NEXT: undef [[S_ADD_U32_20:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY]].sub0, [[S_LSHL_B32_8]], implicit-def $scc
+ ; CHECK-NEXT: [[S_ADD_U32_20:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %57:sreg_32, [[S_ASHR_I32_8]], implicit-def dead $scc, implicit $scc
+ ; CHECK-NEXT: [[S_LOAD_DWORD_IMM2:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[S_ADD_U32_20]], 168, 0 :: (invariant load (s32) from %ir.291, align 8, addrspace 4)
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN21:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM23]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN22:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM24]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
- ; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM24]]
; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM23]]
+ ; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM24]]
; CHECK-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_LOAD_DWORD_IMM1]], 65535, implicit-def dead $scc
; CHECK-NEXT: [[COPY18:%[0-9]+]]:sgpr_128 = COPY [[S_LOAD_DWORDX2_IMM]]
; CHECK-NEXT: [[COPY18:%[0-9]+]].sub1:sgpr_128 = COPY [[S_AND_B32_2]]
@@ -224,22 +222,22 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x
; CHECK-NEXT: [[S_ADD_I32_21:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM3]], -507, implicit-def dead $scc
; CHECK-NEXT: [[S_ADD_I32_22:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM3]], -539, implicit-def dead $scc
; CHECK-NEXT: [[S_ADD_I32_23:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM7]], -473, implicit-def dead $scc
- ; CHECK-NEXT: undef [[S_ADD_U32_22:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY1]], [[S_LSHL_B32_]], implicit-def $scc
- ; CHECK-NEXT: [[S_ADD_U32_22:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %33:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc
- ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM25:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_22]], 96, 0 :: (invariant load (s128) from %ir.323, addrspace 4)
- ; CHECK-NEXT: undef [[S_ADD_U32_23:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY1]], [[S_LSHL_B32_1]], implicit-def $scc
- ; CHECK-NEXT: [[S_ADD_U32_23:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %33:sreg_32, [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc
- ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM26:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_23]], 96, 0 :: (invariant load (s128) from %ir.329, addrspace 4)
- ; CHECK-NEXT: undef [[S_ADD_U32_24:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY1]], [[S_LSHL_B32_2]], implicit-def $scc
- ; CHECK-NEXT: [[S_ADD_U32_24:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %33:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc
- ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM27:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_24]], 96, 0 :: (invariant load (s128) from %ir.335, addrspace 4)
+ ; CHECK-NEXT: undef [[S_ADD_U32_21:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY1]], [[S_LSHL_B32_]], implicit-def $scc
+ ; CHECK-NEXT: [[S_ADD_U32_21:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %33:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc
+ ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM25:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_21]], 96, 0 :: (invariant load (s128) from %ir.309, addrspace 4)
+ ; CHECK-NEXT: undef [[S_ADD_U32_22:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY1]], [[S_LSHL_B32_1]], implicit-def $scc
+ ; CHECK-NEXT: [[S_ADD_U32_22:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %33:sreg_32, [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc
+ ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM26:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_22]], 96, 0 :: (invariant load (s128) from %ir.315, addrspace 4)
+ ; CHECK-NEXT: undef [[S_ADD_U32_23:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY1]], [[S_LSHL_B32_2]], implicit-def $scc
+ ; CHECK-NEXT: [[S_ADD_U32_23:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %33:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc
+ ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM27:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_23]], 96, 0 :: (invariant load (s128) from %ir.321, addrspace 4)
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN23:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM25]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN24:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM26]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN25:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM27]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
- ; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM27]]
; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM25]]
- ; CHECK-NEXT: KILL [[V_MOV_B32_e32_]]
; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM26]]
+ ; CHECK-NEXT: KILL [[V_MOV_B32_e32_]]
+ ; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM27]]
; CHECK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -2, [[BUFFER_LOAD_FORMAT_X_IDXEN]], 0, implicit $exec
; CHECK-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -1, [[BUFFER_LOAD_FORMAT_X_IDXEN1]], 0, implicit $exec
; CHECK-NEXT: [[V_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -3, [[BUFFER_LOAD_FORMAT_X_IDXEN]], 0, implicit $exec
@@ -351,13 +349,13 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x
; CHECK-NEXT: [[V_OR_B32_e64_64:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_63]], [[V_ADD_U32_e64_28]], implicit $exec
; CHECK-NEXT: [[V_ADD_U32_e64_30:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -593, [[BUFFER_LOAD_FORMAT_X_IDXEN]], 0, implicit $exec
; CHECK-NEXT: [[V_OR_B32_e64_65:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_64]], [[V_ADD_U32_e64_29]], implicit $exec
- ; CHECK-NEXT: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM undef %543:sreg_64, 0, 0 :: (invariant load (s256) from `ptr addrspace(4) poison`, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM undef %516:sreg_64, 0, 0 :: (invariant load (s256) from `ptr addrspace(4) poison`, addrspace 4)
; CHECK-NEXT: [[V_OR_B32_e64_66:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_65]], [[V_ADD_U32_e64_30]], implicit $exec
; CHECK-NEXT: [[S_ADD_I32_24:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM8]], -594, implicit-def dead $scc
; CHECK-NEXT: [[V_OR_B32_e64_67:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_24]], [[V_OR_B32_e64_66]], implicit $exec
; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 0, [[V_OR_B32_e64_67]], implicit $exec
; CHECK-NEXT: undef [[V_CNDMASK_B32_e64_:%[0-9]+]].sub3:vreg_128 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[V_CMP_EQ_U32_e64_]], implicit $exec
- ; CHECK-NEXT: IMAGE_STORE_V4_V2_nsa_gfx10 [[V_CNDMASK_B32_e64_]], undef %557:vgpr_32, undef %559:vgpr_32, [[S_LOAD_DWORDX8_IMM]], 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), addrspace 8)
+ ; CHECK-NEXT: IMAGE_STORE_V4_V2_nsa_gfx10 [[V_CNDMASK_B32_e64_]], undef %530:vgpr_32, undef %532:vgpr_32, [[S_LOAD_DWORDX8_IMM]], 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), addrspace 8)
; CHECK-NEXT: S_ENDPGM 0
.expVert:
%0 = extractelement <31 x i32> %userData, i64 2
diff --git a/llvm/test/CodeGen/AMDGPU/umin-sub-to-usubo-select-combine.ll b/llvm/test/CodeGen/AMDGPU/umin-sub-to-usubo-select-combine.ll
new file mode 100644
index 0000000..22e4a24
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/umin-sub-to-usubo-select-combine.ll
@@ -0,0 +1,236 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
+; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11 %s
+
+define i16 @v_underflow_compare_fold_i16(i16 %a, i16 %b) #0 {
+; GFX9-LABEL: v_underflow_compare_fold_i16:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_sub_u16_e32 v1, v0, v1
+; GFX9-NEXT: v_min_u16_e32 v0, v1, v0
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_underflow_compare_fold_i16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_sub_nc_u16 v0.h, v0.l, v1.l
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_min_u16 v0.l, v0.h, v0.l
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+ %sub = sub i16 %a, %b
+ %cond = call i16 @llvm.umin.i16(i16 %sub, i16 %a)
+ ret i16 %cond
+}
+
+define i32 @v_underflow_compare_fold_i32(i32 %a, i32 %b) #0 {
+; GFX9-LABEL: v_underflow_compare_fold_i32:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_sub_u32_e32 v1, v0, v1
+; GFX9-NEXT: v_min_u32_e32 v0, v1, v0
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_underflow_compare_fold_i32:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_sub_nc_u32_e32 v1, v0, v1
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_min_u32_e32 v0, v1, v0
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+ %sub = sub i32 %a, %b
+ %cond = call i32 @llvm.umin.i32(i32 %sub, i32 %a)
+ ret i32 %cond
+}
+
+define i32 @v_underflow_compare_fold_i32_commute(i32 %a, i32 %b) #0 {
+; GFX9-LABEL: v_underflow_compare_fold_i32_commute:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_sub_u32_e32 v1, v0, v1
+; GFX9-NEXT: v_min_u32_e32 v0, v0, v1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_underflow_compare_fold_i32_commute:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_sub_nc_u32_e32 v1, v0, v1
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_min_u32_e32 v0, v0, v1
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+ %sub = sub i32 %a, %b
+ %cond = call i32 @llvm.umin.i32(i32 %a, i32 %sub)
+ ret i32 %cond
+}
+
+define i32 @v_underflow_compare_fold_i32_multi_use(i32 %a, i32 %b, ptr addrspace(1) %ptr) #0 {
+; GFX9-LABEL: v_underflow_compare_fold_i32_multi_use:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_sub_u32_e32 v1, v0, v1
+; GFX9-NEXT: v_min_u32_e32 v0, v1, v0
+; GFX9-NEXT: global_store_dword v[2:3], v1, off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_underflow_compare_fold_i32_multi_use:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_sub_nc_u32_e32 v1, v0, v1
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_min_u32_e32 v0, v1, v0
+; GFX11-NEXT: global_store_b32 v[2:3], v1, off
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+ %sub = sub i32 %a, %b
+ store i32 %sub, ptr addrspace(1) %ptr
+ %cond = call i32 @llvm.umin.i32(i32 %sub, i32 %a)
+ ret i32 %cond
+}
+
+define i64 @v_underflow_compare_fold_i64(i64 %a, i64 %b) #0 {
+; GFX9-LABEL: v_underflow_compare_fold_i64:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_sub_co_u32_e32 v2, vcc, v0, v2
+; GFX9-NEXT: v_subb_co_u32_e32 v3, vcc, v1, v3, vcc
+; GFX9-NEXT: v_cmp_lt_u64_e32 vcc, v[2:3], v[0:1]
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_underflow_compare_fold_i64:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_sub_co_u32 v2, vcc_lo, v0, v2
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_sub_co_ci_u32_e64 v3, null, v1, v3, vcc_lo
+; GFX11-NEXT: v_cmp_lt_u64_e32 vcc_lo, v[2:3], v[0:1]
+; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v2 :: v_dual_cndmask_b32 v1, v1, v3
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+ %sub = sub i64 %a, %b
+ %cond = call i64 @llvm.umin.i64(i64 %sub, i64 %a)
+ ret i64 %cond
+}
+
+define i64 @v_underflow_compare_fold_i64_commute(i64 %a, i64 %b) #0 {
+; GFX9-LABEL: v_underflow_compare_fold_i64_commute:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_sub_co_u32_e32 v2, vcc, v0, v2
+; GFX9-NEXT: v_subb_co_u32_e32 v3, vcc, v1, v3, vcc
+; GFX9-NEXT: v_cmp_lt_u64_e32 vcc, v[0:1], v[2:3]
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_underflow_compare_fold_i64_commute:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_sub_co_u32 v2, vcc_lo, v0, v2
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_sub_co_ci_u32_e64 v3, null, v1, v3, vcc_lo
+; GFX11-NEXT: v_cmp_lt_u64_e32 vcc_lo, v[0:1], v[2:3]
+; GFX11-NEXT: v_dual_cndmask_b32 v0, v2, v0 :: v_dual_cndmask_b32 v1, v3, v1
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+ %sub = sub i64 %a, %b
+ %cond = call i64 @llvm.umin.i64(i64 %a, i64 %sub)
+ ret i64 %cond
+}
+
+define i64 @v_underflow_compare_fold_i64_multi_use(i64 %a, i64 %b, ptr addrspace(1) %ptr) #0 {
+; GFX9-LABEL: v_underflow_compare_fold_i64_multi_use:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_sub_co_u32_e32 v2, vcc, v0, v2
+; GFX9-NEXT: v_subb_co_u32_e32 v3, vcc, v1, v3, vcc
+; GFX9-NEXT: v_cmp_lt_u64_e32 vcc, v[2:3], v[0:1]
+; GFX9-NEXT: global_store_dwordx2 v[4:5], v[2:3], off
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_underflow_compare_fold_i64_multi_use:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_sub_co_u32 v2, vcc_lo, v0, v2
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_sub_co_ci_u32_e64 v3, null, v1, v3, vcc_lo
+; GFX11-NEXT: v_cmp_lt_u64_e32 vcc_lo, v[2:3], v[0:1]
+; GFX11-NEXT: global_store_b64 v[4:5], v[2:3], off
+; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v2 :: v_dual_cndmask_b32 v1, v1, v3
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+ %sub = sub i64 %a, %b
+ store i64 %sub, ptr addrspace(1) %ptr
+ %cond = call i64 @llvm.umin.i64(i64 %sub, i64 %a)
+ ret i64 %cond
+}
+
+define amdgpu_ps i16 @s_underflow_compare_fold_i16(i16 inreg %a, i16 inreg %b) #0 {
+; GFX9-LABEL: s_underflow_compare_fold_i16:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_sub_i32 s1, s0, s1
+; GFX9-NEXT: s_and_b32 s0, 0xffff, s0
+; GFX9-NEXT: s_and_b32 s1, s1, 0xffff
+; GFX9-NEXT: s_min_u32 s0, s1, s0
+; GFX9-NEXT: ; return to shader part epilog
+;
+; GFX11-LABEL: s_underflow_compare_fold_i16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_sub_i32 s1, s0, s1
+; GFX11-NEXT: s_and_b32 s0, 0xffff, s0
+; GFX11-NEXT: s_and_b32 s1, s1, 0xffff
+; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-NEXT: s_min_u32 s0, s1, s0
+; GFX11-NEXT: ; return to shader part epilog
+ %sub = sub i16 %a, %b
+ %cond = call i16 @llvm.umin.i16(i16 %sub, i16 %a)
+ ret i16 %cond
+}
+
+define amdgpu_ps i32 @s_underflow_compare_fold_i32(i32 inreg %a, i32 inreg %b) #0 {
+; GFX9-LABEL: s_underflow_compare_fold_i32:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_sub_i32 s1, s0, s1
+; GFX9-NEXT: s_min_u32 s0, s1, s0
+; GFX9-NEXT: ; return to shader part epilog
+;
+; GFX11-LABEL: s_underflow_compare_fold_i32:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_sub_i32 s1, s0, s1
+; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-NEXT: s_min_u32 s0, s1, s0
+; GFX11-NEXT: ; return to shader part epilog
+ %sub = sub i32 %a, %b
+ %cond = call i32 @llvm.umin.i32(i32 %sub, i32 %a)
+ ret i32 %cond
+}
+
+define amdgpu_ps i64 @s_underflow_compare_fold_i64(i64 inreg %a, i64 inreg %b) #0 {
+; GFX9-LABEL: s_underflow_compare_fold_i64:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_sub_u32 s2, s0, s2
+; GFX9-NEXT: v_mov_b32_e32 v0, s0
+; GFX9-NEXT: s_subb_u32 s3, s1, s3
+; GFX9-NEXT: v_mov_b32_e32 v1, s1
+; GFX9-NEXT: v_cmp_lt_u64_e32 vcc, s[2:3], v[0:1]
+; GFX9-NEXT: s_and_b64 s[4:5], vcc, exec
+; GFX9-NEXT: s_cselect_b32 s1, s3, s1
+; GFX9-NEXT: s_cselect_b32 s0, s2, s0
+; GFX9-NEXT: ; return to shader part epilog
+;
+; GFX11-LABEL: s_underflow_compare_fold_i64:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_sub_u32 s2, s0, s2
+; GFX11-NEXT: s_subb_u32 s3, s1, s3
+; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-NEXT: v_cmp_lt_u64_e64 s4, s[2:3], s[0:1]
+; GFX11-NEXT: s_and_b32 s4, s4, exec_lo
+; GFX11-NEXT: s_cselect_b32 s0, s2, s0
+; GFX11-NEXT: s_cselect_b32 s1, s3, s1
+; GFX11-NEXT: ; return to shader part epilog
+ %sub = sub i64 %a, %b
+ %cond = call i64 @llvm.umin.i64(i64 %sub, i64 %a)
+ ret i64 %cond
+}
+
+attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250-t16.mir b/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250-t16.mir
index 8a70a8a..32cc398 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250-t16.mir
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250-t16.mir
@@ -36,7 +36,7 @@ body: |
; GCN-NEXT: v_add_f16_e64 v128.l /*v384.l*/, v129.l /*v385.l*/, v130.l /*v386.l*/
$vgpr384_lo16 = V_ADD_F16_t16_e64 0, undef $vgpr385_lo16, 0, undef $vgpr386_lo16, 0, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: s_set_vgpr_msb 0x8a
+ ; GCN-NEXT: s_set_vgpr_msb 0x458a
; ASM-SAME: ; msbs: dst=2 src0=2 src1=2 src2=0
; GCN-NEXT: v_add_f16_e64 v0.h /*v512.h*/, v1.h /*v513.h*/, v2.h /*v514.h*/
$vgpr512_hi16 = V_ADD_F16_t16_e64 0, undef $vgpr513_hi16, 0, undef $vgpr514_hi16, 0, 0, 0, implicit $exec, implicit $mode
@@ -50,7 +50,7 @@ body: |
; GCN-NEXT: v_add_f16_e64 v128.l /*v640.l*/, v129.l /*v641.l*/, v130.l /*v642.l*/
$vgpr640_lo16 = V_ADD_F16_t16_e64 0, undef $vgpr641_lo16, 0, undef $vgpr642_lo16, 0, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: s_set_vgpr_msb 0xcf
+ ; GCN-NEXT: s_set_vgpr_msb 0x8acf
; ASM-SAME: ; msbs: dst=3 src0=3 src1=3 src2=0
; GCN-NEXT: v_add_f16_e64 v0.h /*v768.h*/, v1.h /*v769.h*/, v2.h /*v770.h*/
$vgpr768_hi16 = V_ADD_F16_t16_e64 0, undef $vgpr769_hi16, 0, undef $vgpr770_hi16, 0, 0, 0, implicit $exec, implicit $mode
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250.mir b/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250.mir
index f508df2..7e1c28f 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250.mir
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250.mir
@@ -22,13 +22,13 @@ body: |
$vgpr257 = V_MOV_B32_e32 undef $vgpr510, implicit $exec
; Single bit change
- ; GCN-NEXT: s_set_vgpr_msb 1
+ ; GCN-NEXT: s_set_vgpr_msb 0x4101
; ASM-SAME: ; msbs: dst=0 src0=1 src1=0 src2=0
; GCN-NEXT: v_rcp_f32_e64 v255, v2 /*v258*/
$vgpr255 = V_RCP_F32_e64 0, undef $vgpr258, 0, 0, implicit $exec, implicit $mode
; Reset
- ; GCN-NEXT: s_set_vgpr_msb 0
+ ; GCN-NEXT: s_set_vgpr_msb 0x100
; ASM-SAME: ; msbs: dst=0 src0=0 src1=0 src2=0
; GCN-NEXT: v_rcp_f32_e64 v255, v1
$vgpr255 = V_RCP_F32_e64 0, undef $vgpr1, 0, 0, implicit $exec, implicit $mode
@@ -40,7 +40,7 @@ body: |
; GCN-NEXT: v_add_nc_u32_e32 v0, v253 /*v509*/, v252 /*v508*/
$vgpr0 = V_ADD_U32_e32 undef $vgpr509, undef $vgpr508, implicit $exec
- ; GCN-NEXT: s_set_vgpr_msb 0x44
+ ; GCN-NEXT: s_set_vgpr_msb 0x544
; ASM-SAME: ; msbs: dst=1 src0=0 src1=1 src2=0
; GCN-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GCN-NEXT: v_add_f32_e64 v2 /*v258*/, v0, v251 /*v507*/
@@ -48,7 +48,7 @@ body: |
; VOP3
- ; GCN-NEXT: s_set_vgpr_msb 0x55
+ ; GCN-NEXT: s_set_vgpr_msb 0x4455
; ASM-SAME: ; msbs: dst=1 src0=1 src1=1 src2=1
; GCN-NEXT: v_fma_f32 v3 /*v259*/, v4 /*v260*/, v5 /*v261*/, v6 /*v262*/
$vgpr259 = V_FMA_F32_e64 0, undef $vgpr260, 0, undef $vgpr261, 0, undef $vgpr262, 0, 0, implicit $exec, implicit $mode
@@ -58,32 +58,32 @@ body: |
$vgpr259 = V_FMA_F32_e64 0, undef $vgpr260, 0, undef $vgpr261, 0, undef $vgpr262, 0, 0, implicit $exec, implicit $mode
; Tuple crossing the 256 boundary
- ; GCN-NEXT: s_set_vgpr_msb 17
+ ; GCN-NEXT: s_set_vgpr_msb 0x5511
; ASM-SAME: ; msbs: dst=0 src0=1 src1=0 src2=1
; GCN-NEXT: v_mqsad_u32_u8 v[254:257], v[2:3] /*v[258:259]*/, v0, v[244:247] /*v[500:503]*/
$vgpr254_vgpr255_vgpr256_vgpr257 = V_MQSAD_U32_U8_e64 $vgpr258_vgpr259, $vgpr0, undef $vgpr500_vgpr501_vgpr502_vgpr503, 0, implicit $exec
; DPP/tied operand
- ; GCN-NEXT: s_set_vgpr_msb 0x45
+ ; GCN-NEXT: s_set_vgpr_msb 0x1145
; ASM-SAME: ; msbs: dst=1 src0=1 src1=1 src2=0
; GCN-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GCN-NEXT: v_add_nc_u16_e64_dpp v0 /*v256*/, v1 /*v257*/, v2 /*v258*/ quad_perm:[1,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:1
$vgpr256 = V_ADD_NC_U16_fake16_e64_dpp $vgpr256, 0, $vgpr257, 0, undef $vgpr258, 0, 0, 1, 15, 15, 1, implicit $exec
- ; GCN-NEXT: s_set_vgpr_msb 17
+ ; GCN-NEXT: s_set_vgpr_msb 0x4511
; ASM-SAME: ; msbs: dst=0 src0=1 src1=0 src2=1
; GCN-NEXT: v_add3_u32_e64_dpp v0, v1 /*v257*/, v0, v2 /*v258*/ quad_perm:[1,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:1
$vgpr0 = V_ADD3_U32_e64_dpp $vgpr0, $vgpr257, $vgpr0, undef $vgpr258, 1, 15, 15, 1, implicit $exec
; DS (addr, data0, and data1 operands)
- ; GCN-NEXT: s_set_vgpr_msb 20
+ ; GCN-NEXT: s_set_vgpr_msb 0x1114
; ASM-SAME: ; msbs: dst=0 src0=0 src1=1 src2=1
; GCN-NEXT: ds_store_2addr_b32 v0, v248 /*v504*/, v249 /*v505*/ offset1:1
DS_WRITE2_B32_gfx9 $vgpr0, undef $vgpr504, undef $vgpr505, 0, 1, 0, implicit $exec
; Reset
- ; GCN-NEXT: s_set_vgpr_msb 0
+ ; GCN-NEXT: s_set_vgpr_msb 0x1400
; ASM-SAME: ; msbs: dst=0 src0=0 src1=0 src2=0
; GCN-NEXT: ds_store_2addr_b32 v0, v248, v249 offset1:1
DS_WRITE2_B32_gfx9 $vgpr0, undef $vgpr248, undef $vgpr249, 0, 1, 0, implicit $exec
@@ -93,13 +93,13 @@ body: |
; GCN-NEXT: ds_load_b32 v0, v255 /*v511*/
$vgpr0 = DS_READ_B32_gfx9 $vgpr511, 0, 0, implicit $exec
- ; GCN-NEXT: s_set_vgpr_msb 0x44
+ ; GCN-NEXT: s_set_vgpr_msb 0x144
; ASM-SAME: ; msbs: dst=1 src0=0 src1=1 src2=0
; GCN-NEXT: ds_add_rtn_u32 v255 /*v511*/, v0, v248 /*v504*/
$vgpr511 = DS_ADD_RTN_U32_gfx9 $vgpr0, undef $vgpr504, 0, 0, implicit $exec
; Reset
- ; GCN-NEXT: s_set_vgpr_msb 0
+ ; GCN-NEXT: s_set_vgpr_msb 0x4400
; ASM-SAME: ; msbs: dst=0 src0=0 src1=0 src2=0
; GCN-NEXT: ds_add_rtn_u32 v0, v0, v0
$vgpr0 = DS_ADD_RTN_U32_gfx9 $vgpr0, $vgpr0, 0, 0, implicit $exec
@@ -111,17 +111,17 @@ body: |
; GCN-NEXT: global_load_b32 v2, v[2:3] /*v[258:259]*/, off
$vgpr2 = GLOBAL_LOAD_DWORD undef $vgpr258_vgpr259, 0, 0, implicit $exec
- ; GCN-NEXT: s_set_vgpr_msb 64
+ ; GCN-NEXT: s_set_vgpr_msb 0x140
; ASM-SAME: ; msbs: dst=1 src0=0 src1=0 src2=0
; GCN-NEXT: global_load_b32 v255 /*v511*/, v0, s[0:1]
$vgpr511 = GLOBAL_LOAD_DWORD_SADDR undef $sgpr0_sgpr1, $vgpr0, 0, 0, implicit $exec
- ; GCN-NEXT: s_set_vgpr_msb 1
+ ; GCN-NEXT: s_set_vgpr_msb 0x4001
; ASM-SAME: ; msbs: dst=0 src0=1 src1=0 src2=0
; GCN-NEXT: scratch_load_u8 v0, v255 /*v511*/, s0
$vgpr0 = SCRATCH_LOAD_UBYTE_SVS $vgpr511, undef $sgpr0, 0, 0, implicit $exec, implicit $flat_scr
- ; GCN-NEXT: s_set_vgpr_msb 0
+ ; GCN-NEXT: s_set_vgpr_msb 0x100
; ASM-SAME: ; msbs: dst=0 src0=0 src1=0 src2=0
; GCN-NEXT: global_store_b32 v[0:1], v2, off
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec
@@ -135,13 +135,13 @@ body: |
; GCN-NEXT: global_store_b96 v[0:1] /*v[256:257]*/, v[244:246] /*v[500:502]*/, off
GLOBAL_STORE_DWORDX3 $vgpr256_vgpr257, $vgpr500_vgpr501_vgpr502, 0, 0, implicit $exec
- ; GCN-NEXT: s_set_vgpr_msb 0x44
+ ; GCN-NEXT: s_set_vgpr_msb 0x544
; ASM-SAME: ; msbs: dst=1 src0=0 src1=1 src2=0
; GCN-NEXT: flat_atomic_add_u32 v254 /*v510*/, v[0:1], v255 /*v511*/ th:TH_ATOMIC_RETURN
$vgpr510 = FLAT_ATOMIC_ADD_RTN $vgpr0_vgpr1, $vgpr511, 0, 1, implicit $exec, implicit $flat_scr
; Reset
- ; GCN-NEXT: s_set_vgpr_msb 0
+ ; GCN-NEXT: s_set_vgpr_msb 0x4400
; ASM-SAME: ; msbs: dst=0 src0=0 src1=0 src2=0
; GCN-NEXT: flat_atomic_add_u32 v0, v[0:1], v255 th:TH_ATOMIC_RETURN
$vgpr0 = FLAT_ATOMIC_ADD_RTN $vgpr0_vgpr1, $vgpr255, 0, 1, implicit $exec, implicit $flat_scr
@@ -156,12 +156,12 @@ body: |
; GCN-NEXT: buffer_load_b32 v1 /*v257*/, v0, s[8:11], s3 offen
$vgpr257 = BUFFER_LOAD_DWORD_VBUFFER_OFFEN $vgpr0, undef $sgpr8_sgpr9_sgpr10_sgpr11, undef $sgpr3, 0, 0, 0, implicit $exec
- ; GCN-NEXT: s_set_vgpr_msb 0x41
+ ; GCN-NEXT: s_set_vgpr_msb 0x4041
; ASM-SAME: ; msbs: dst=1 src0=1 src1=0 src2=0
; GCN-NEXT: buffer_load_b32 v1 /*v257*/, v0 /*v256*/, s[8:11], s3 offen
$vgpr257 = BUFFER_LOAD_DWORD_VBUFFER_OFFEN $vgpr256, undef $sgpr8_sgpr9_sgpr10_sgpr11, undef $sgpr3, 0, 0, 0, implicit $exec
- ; GCN-NEXT: s_set_vgpr_msb 0
+ ; GCN-NEXT: s_set_vgpr_msb 0x4100
; ASM-SAME: ; msbs: dst=0 src0=0 src1=0 src2=0
; GCN-NEXT: buffer_store_b32 v0, v1, s[0:3], s3 offen
BUFFER_STORE_DWORD_VBUFFER_OFFEN $vgpr0, $vgpr1, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr3, 0, 0, 0, implicit $exec
@@ -171,7 +171,7 @@ body: |
; GCN-NEXT: buffer_store_b32 v0 /*v256*/, v1 /*v257*/, s[0:3], s3 offen
BUFFER_STORE_DWORD_VBUFFER_OFFEN $vgpr256, $vgpr257, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr3, 0, 0, 0, implicit $exec
- ; GCN-NEXT: s_set_vgpr_msb 0
+ ; GCN-NEXT: s_set_vgpr_msb 0x4100
; ASM-SAME: ; msbs: dst=0 src0=0 src1=0 src2=0
; GCN-NEXT: buffer_atomic_add_f32 v0, v1, s[0:3], s3 offen
BUFFER_ATOMIC_ADD_F32_VBUFFER_OFFEN $vgpr0, $vgpr1, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr3, 0, 0, implicit $exec
@@ -183,44 +183,44 @@ body: |
; VGPRs above 512
- ; GCN-NEXT: s_set_vgpr_msb 0xaa
+ ; GCN-NEXT: s_set_vgpr_msb 0x41aa
; ASM-SAME: ; msbs: dst=2 src0=2 src1=2 src2=2
; GCN-NEXT: v_fma_f32 v0 /*v512*/, v1 /*v513*/, v2 /*v514*/, v3 /*v515*/
$vgpr512 = V_FMA_F32_e64 0, undef $vgpr513, 0, undef $vgpr514, 0, undef $vgpr515, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: s_set_vgpr_msb 0xab
+ ; GCN-NEXT: s_set_vgpr_msb 0xaaab
; ASM-SAME: ; msbs: dst=2 src0=3 src1=2 src2=2
; GCN-NEXT: v_fma_f32 v0 /*v512*/, v0 /*v768*/, v2 /*v514*/, v3 /*v515*/
$vgpr512 = V_FMA_F32_e64 0, undef $vgpr768, 0, undef $vgpr514, 0, undef $vgpr515, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: s_set_vgpr_msb 0xae
+ ; GCN-NEXT: s_set_vgpr_msb 0xabae
; ASM-SAME: ; msbs: dst=2 src0=2 src1=3 src2=2
; GCN-NEXT: v_fma_f32 v0 /*v512*/, v1 /*v513*/, v2 /*v770*/, v3 /*v515*/
$vgpr512 = V_FMA_F32_e64 0, undef $vgpr513, 0, undef $vgpr770, 0, undef $vgpr515, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: s_set_vgpr_msb 0xba
+ ; GCN-NEXT: s_set_vgpr_msb 0xaeba
; ASM-SAME: ; msbs: dst=2 src0=2 src1=2 src2=3
; GCN-NEXT: v_fma_f32 v0 /*v512*/, v1 /*v513*/, v2 /*v514*/, v3 /*v771*/
$vgpr512 = V_FMA_F32_e64 0, undef $vgpr513, 0, undef $vgpr514, 0, undef $vgpr771, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: s_set_vgpr_msb 0xea
+ ; GCN-NEXT: s_set_vgpr_msb 0xbaea
; ASM-SAME: ; msbs: dst=3 src0=2 src1=2 src2=2
; GCN-NEXT: v_fma_f32 v255 /*v1023*/, v1 /*v513*/, v2 /*v514*/, v3 /*v515*/
$vgpr1023 = V_FMA_F32_e64 0, undef $vgpr513, 0, undef $vgpr514, 0, undef $vgpr515, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: s_set_vgpr_msb 0xff
+ ; GCN-NEXT: s_set_vgpr_msb 0xeaff
; ASM-SAME: ; msbs: dst=3 src0=3 src1=3 src2=3
; GCN-NEXT: v_fma_f32 v0 /*v768*/, v1 /*v769*/, v2 /*v770*/, v3 /*v771*/
$vgpr768 = V_FMA_F32_e64 0, undef $vgpr769, 0, undef $vgpr770, 0, undef $vgpr771, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: s_set_vgpr_msb 0x42
+ ; GCN-NEXT: s_set_vgpr_msb 0xff42
; ASM-SAME: ; msbs: dst=1 src0=2 src1=0 src2=0
; GCN-NEXT: v_mov_b32_e32 v0 /*v256*/, v0 /*v512*/
$vgpr256 = V_MOV_B32_e32 undef $vgpr512, implicit $exec
; Reset
- ; GCN-NEXT: s_set_vgpr_msb 0
+ ; GCN-NEXT: s_set_vgpr_msb 0x4200
; ASM-SAME: ; msbs: dst=0 src0=0 src1=0 src2=0
; GCN-NEXT: v_fma_f32 v0, v1, v2, v3
$vgpr0 = V_FMA_F32_e64 0, undef $vgpr1, 0, undef $vgpr2, 0, undef $vgpr3, 0, 0, implicit $exec, implicit $mode
@@ -232,12 +232,12 @@ body: |
; GCN-NEXT: global_store_b96 v[0:1] /*v[512:513]*/, v[0:2] /*v[512:514]*/, off
GLOBAL_STORE_DWORDX3 $vgpr512_vgpr513, $vgpr512_vgpr513_vgpr514, 0, 0, implicit $exec
- ; GCN-NEXT: s_set_vgpr_msb 11
+ ; GCN-NEXT: s_set_vgpr_msb 0xa0b
; ASM-SAME: ; msbs: dst=0 src0=3 src1=2 src2=0
; GCN-NEXT: global_store_b64 v[254:255] /*v[1022:1023]*/, v[254:255] /*v[766:767]*/, off
GLOBAL_STORE_DWORDX2 $vgpr1022_vgpr1023, $vgpr766_vgpr767, 0, 0, implicit $exec
- ; GCN-NEXT: s_set_vgpr_msb 0x55
+ ; GCN-NEXT: s_set_vgpr_msb 0xb55
; ASM-SAME: ; msbs: dst=1 src0=1 src1=1 src2=1
; GCN-NEXT: v_wmma_f32_16x16x32_bf16 v[14:21] /*v[270:277]*/, v[26:33] /*v[282:289]*/, v[34:41] /*v[290:297]*/, v[14:21] /*v[270:277]*/
early-clobber $vgpr270_vgpr271_vgpr272_vgpr273_vgpr274_vgpr275_vgpr276_vgpr277 = V_WMMA_F32_16X16X32_BF16_w32_twoaddr 8, undef $vgpr282_vgpr283_vgpr284_vgpr285_vgpr286_vgpr287_vgpr288_vgpr289, 8, undef $vgpr290_vgpr291_vgpr292_vgpr293_vgpr294_vgpr295_vgpr296_vgpr297, 8, killed undef $vgpr270_vgpr271_vgpr272_vgpr273_vgpr274_vgpr275_vgpr276_vgpr277, 0, 0, 0, 0, implicit $exec
@@ -247,6 +247,7 @@ body: |
...
# ASM-LABEL: {{^}}vopd:
+
# DIS-LABEL: <vopd>:
---
name: vopd
@@ -262,35 +263,35 @@ body: |
; GCN-NEXT: v_dual_sub_f32 v244 /*v500*/, v1, v2 :: v_dual_mul_f32 v0 /*v256*/, v3, v4
$vgpr500, $vgpr256 = V_DUAL_SUB_F32_e32_X_MUL_F32_e32_gfx1250 undef $vgpr1, undef $vgpr2, undef $vgpr3, undef $vgpr4, implicit $mode, implicit $exec
- ; GCN-NEXT: s_set_vgpr_msb 0x41
+ ; GCN-NEXT: s_set_vgpr_msb 0x4041
; GCN-NEXT: v_dual_sub_f32 v244 /*v500*/, s1, v2 :: v_dual_mul_f32 v0 /*v256*/, v44 /*v300*/, v4
$vgpr500, $vgpr256 = V_DUAL_SUB_F32_e32_X_MUL_F32_e32_gfx1250 undef $sgpr1, undef $vgpr2, undef $vgpr300, undef $vgpr4, implicit $mode, implicit $exec
- ; GCN-NEXT: s_set_vgpr_msb 4
+ ; GCN-NEXT: s_set_vgpr_msb 0x4104
; GCN-NEXT: v_dual_sub_f32 v255, v1, v44 /*v300*/ :: v_dual_mul_f32 v6, v0, v1 /*v257*/
$vgpr255, $vgpr6 = V_DUAL_SUB_F32_e32_X_MUL_F32_e32_gfx1250 undef $vgpr1, undef $vgpr300, undef $vgpr0, $vgpr257, implicit $mode, implicit $exec
- ; GCN-NEXT: s_set_vgpr_msb 1
+ ; GCN-NEXT: s_set_vgpr_msb 0x401
; GCN-NEXT: v_dual_sub_f32 v255, 0, v1 :: v_dual_mul_f32 v6, v44 /*v300*/, v3
$vgpr255, $vgpr6 = V_DUAL_SUB_F32_e32_X_MUL_F32_e32_gfx1250 0, undef $vgpr1, undef $vgpr300, undef $vgpr3, implicit $mode, implicit $exec
- ; GCN-NEXT: s_set_vgpr_msb 64
+ ; GCN-NEXT: s_set_vgpr_msb 0x140
; GCN-NEXT: v_dual_fmamk_f32 v243 /*v499*/, v0, 0xa, v3 :: v_dual_fmac_f32 v0 /*v256*/, v1, v1
$vgpr499, $vgpr256 = V_DUAL_FMAMK_F32_X_FMAC_F32_e32_gfx1250 undef $vgpr0, 10, undef $vgpr3, undef $vgpr1, undef $vgpr1, $vgpr256, implicit $mode, implicit $exec
- ; GCN-NEXT: s_set_vgpr_msb 5
+ ; GCN-NEXT: s_set_vgpr_msb 0x4005
; GCN-NEXT: v_dual_mov_b32 v2, v3 /*v259*/ :: v_dual_add_f32 v3, v1 /*v257*/, v2 /*v258*/
$vgpr2, $vgpr3 = V_DUAL_MOV_B32_e32_X_ADD_F32_e32_gfx1250 undef $vgpr259, undef $vgpr257, undef $vgpr258, implicit $exec, implicit $mode
- ; GCN-NEXT: s_set_vgpr_msb 0x44
+ ; GCN-NEXT: s_set_vgpr_msb 0x544
; GCN-NEXT: v_dual_fmamk_f32 v244 /*v500*/, v0, 0xa, v44 /*v300*/ :: v_dual_fmac_f32 v3 /*v259*/, v1, v1 /*v257*/
$vgpr500, $vgpr259 = V_DUAL_FMAMK_F32_X_FMAC_F32_e32_gfx1250 undef $vgpr0, 10, undef $vgpr300, undef $vgpr1, undef $vgpr257, $vgpr259, implicit $mode, implicit $exec
- ; GCN-NEXT: s_set_vgpr_msb 16
+ ; GCN-NEXT: s_set_vgpr_msb 0x4410
; GCN-NEXT: v_dual_fma_f32 v0, v6, v6, v44 /*v300*/ :: v_dual_fma_f32 v1, v4, v5, v45 /*v301*/
$vgpr0, $vgpr1 = V_DUAL_FMA_F32_e64_X_FMA_F32_e64_e96_gfx1250 0, undef $vgpr6, 0, undef $vgpr6, 0, undef $vgpr300, 0, undef $vgpr4, 0, undef $vgpr5, 0, undef $vgpr301, implicit $mode, implicit $exec
- ; GCN-NEXT: s_set_vgpr_msb 0
+ ; GCN-NEXT: s_set_vgpr_msb 0x1000
; GCN-NEXT: v_dual_fmac_f32 v2, v6, v6 :: v_dual_fma_f32 v3, v4, v5, v3
$vgpr2, $vgpr3 = V_DUAL_FMAC_F32_e32_X_FMA_F32_e64_e96_gfx1250 0, undef $vgpr6, 0, undef $vgpr6, undef $vgpr2, 0, undef $vgpr4, 0, undef $vgpr5, 0, $vgpr3, implicit $mode, implicit $exec
@@ -298,7 +299,7 @@ body: |
; GCN-NEXT: v_dual_fma_f32 v244 /*v500*/, v6, v7, v8 :: v_dual_add_f32 v3 /*v259*/, v4, v5
$vgpr500, $vgpr259 = V_DUAL_FMA_F32_e64_X_ADD_F32_e32_e96_gfx1250 0, undef $vgpr6, 0, undef $vgpr7, 0, undef $vgpr8, 0, undef $vgpr4, 0, undef $vgpr5, implicit $mode, implicit $exec
- ; GCN-NEXT: s_set_vgpr_msb 0xae
+ ; GCN-NEXT: s_set_vgpr_msb 0x40ae
; GCN-NEXT: v_dual_fmac_f32 v2 /*v514*/, v6 /*v518*/, v8 /*v776*/ :: v_dual_fma_f32 v3 /*v515*/, v4 /*v516*/, v7 /*v775*/, v3 /*v515*/
$vgpr514, $vgpr515 = V_DUAL_FMAC_F32_e32_X_FMA_F32_e64_e96_gfx1250 0, undef $vgpr518, 0, undef $vgpr776, undef $vgpr514, 0, undef $vgpr516, 0, undef $vgpr775, 0, $vgpr515, implicit $mode, implicit $exec
@@ -319,31 +320,31 @@ body: |
; GCN-NEXT: v_fmaak_f32 v0 /*v256*/, v1 /*v257*/, v2 /*v258*/, 0x1
$vgpr256 = V_FMAAK_F32 undef $vgpr257, undef $vgpr258, 1, implicit $exec, implicit $mode
- ; GCN-NEXT: s_set_vgpr_msb 5
+ ; GCN-NEXT: s_set_vgpr_msb 0x4505
; GCN-NEXT: v_fmaak_f32 v0, v1 /*v257*/, v2 /*v258*/, 0x1
$vgpr0 = V_FMAAK_F32 undef $vgpr257, undef $vgpr258, 1, implicit $exec, implicit $mode
- ; GCN-NEXT: s_set_vgpr_msb 0x41
+ ; GCN-NEXT: s_set_vgpr_msb 0x541
; GCN-NEXT: v_fmaak_f32 v0 /*v256*/, v1 /*v257*/, v2, 0x1
$vgpr256 = V_FMAAK_F32 undef $vgpr257, undef $vgpr2, 1, implicit $exec, implicit $mode
- ; GCN-NEXT: s_set_vgpr_msb 0x44
+ ; GCN-NEXT: s_set_vgpr_msb 0x4144
; GCN-NEXT: v_fmaak_f32 v0 /*v256*/, v1, v2 /*v258*/, 0x1
$vgpr256 = V_FMAAK_F32 undef $vgpr1, undef $vgpr258, 1, implicit $exec, implicit $mode
- ; GCN-NEXT: s_set_vgpr_msb 0x45
+ ; GCN-NEXT: s_set_vgpr_msb 0x4445
; GCN-NEXT: v_fmamk_f32 v0 /*v256*/, v1 /*v257*/, 0x1, v2 /*v258*/
$vgpr256 = V_FMAMK_F32 undef $vgpr257, 1, undef $vgpr258, implicit $exec, implicit $mode
- ; GCN-NEXT: s_set_vgpr_msb 5
+ ; GCN-NEXT: s_set_vgpr_msb 0x4505
; GCN-NEXT: v_fmamk_f32 v0, v1 /*v257*/, 0x1, v2 /*v258*/
$vgpr0 = V_FMAMK_F32 undef $vgpr257, 1, undef $vgpr258, implicit $exec, implicit $mode
- ; GCN-NEXT: s_set_vgpr_msb 0x41
+ ; GCN-NEXT: s_set_vgpr_msb 0x541
; GCN-NEXT: v_fmamk_f32 v0 /*v256*/, v1 /*v257*/, 0x1, v2
$vgpr256 = V_FMAMK_F32 undef $vgpr257, 1, undef $vgpr2, implicit $exec, implicit $mode
- ; GCN-NEXT: s_set_vgpr_msb 0x44
+ ; GCN-NEXT: s_set_vgpr_msb 0x4144
; GCN-NEXT: v_fmamk_f32 v0 /*v256*/, v1, 0x1, v2 /*v258*/
$vgpr256 = V_FMAMK_F32 undef $vgpr1, 1, undef $vgpr258, implicit $exec, implicit $mode
@@ -389,15 +390,15 @@ body: |
; GCN-NEXT: v_lshlrev_b32_e64 v0, v0 /*v256*/, v2
$vgpr0 = V_LSHLREV_B32_e64 undef $vgpr256, undef $vgpr2, implicit $exec
- ; GCN-NEXT: s_set_vgpr_msb 4
+ ; GCN-NEXT: s_set_vgpr_msb 0x104
; GCN-NEXT: v_lshlrev_b32_e64 v0, v1, v0 /*v256*/
$vgpr0 = V_LSHLREV_B32_e64 undef $vgpr1, undef $vgpr256, implicit $exec
- ; GCN-NEXT: s_set_vgpr_msb 1
+ ; GCN-NEXT: s_set_vgpr_msb 0x401
; GCN-NEXT: v_subrev_nc_u32_e32 v0, v0 /*v256*/, v2
$vgpr0 = V_SUBREV_U32_e32 undef $vgpr256, undef $vgpr2, implicit $exec
- ; GCN-NEXT: s_set_vgpr_msb 4
+ ; GCN-NEXT: s_set_vgpr_msb 0x104
; GCN-NEXT: v_subrev_nc_u32_e32 v0, v1, v0 /*v256*/
$vgpr0 = V_SUBREV_U32_e32 undef $vgpr1, undef $vgpr256, implicit $exec
@@ -417,7 +418,7 @@ body: |
; GCN-NEXT: v_fma_f32 v3 /*v259*/, v4 /*v260*/, v5 /*v261*/, v6 /*v262*/
$vgpr259 = V_FMA_F32_e64 0, undef $vgpr260, 0, undef $vgpr261, 0, undef $vgpr262, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: s_set_vgpr_msb 0
+ ; GCN-NEXT: s_set_vgpr_msb 0x5500
; GCN-NEXT: v_add_nc_u32_e32 v0, v1, v2
$vgpr0 = V_ADD_U32_e32 undef $vgpr1, undef $vgpr2, implicit $exec
@@ -431,7 +432,7 @@ body: |
; GCN-NEXT: v_add_nc_u32_e32 v0 /*v256*/, v1, v2
$vgpr256 = V_ADD_U32_e32 undef $vgpr1, undef $vgpr2, implicit $exec
- ; GCN-NEXT: s_set_vgpr_msb 0
+ ; GCN-NEXT: s_set_vgpr_msb 0x4000
; GCN-NEXT: v_fma_f32 v3, v4, v5, s2
$vgpr3 = V_FMA_F32_e64 0, undef $vgpr4, 0, undef $vgpr5, 0, undef $sgpr2, 0, 0, implicit $exec, implicit $mode
@@ -439,17 +440,17 @@ body: |
; GCN-NEXT: v_fma_f32 v3, v4 /*v260*/, v5, 1
$vgpr3 = V_FMA_F32_e64 0, undef $vgpr260, 0, undef $vgpr5, 0, 1, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: s_set_vgpr_msb 4
+ ; GCN-NEXT: s_set_vgpr_msb 0x104
; GCN-NEXT: v_mov_b32_e32 v0, v1
$vgpr0 = V_MOV_B32_e32 undef $vgpr1, implicit $exec
; GCN-NEXT: v_add_nc_u32_e32 v2, v1, v3 /*v259*/
$vgpr2 = V_ADD_U32_e32 undef $vgpr1, undef $vgpr259, implicit $exec
- ; GCN-NEXT: s_set_vgpr_msb 1
+ ; GCN-NEXT: s_set_vgpr_msb 0x401
; GCN-NEXT: v_mov_b32_e32 v0, v0 /*v256*/
; GCN-NEXT: v_add_nc_u32_e32 v1, v1 /*v257*/, v1
- ; GCN-NEXT: s_set_vgpr_msb 5
+ ; GCN-NEXT: s_set_vgpr_msb 0x105
; GCN-NEXT: v_add_nc_u32_e32 v2, v2 /*v258*/, v2 /*v258*/
$vgpr0 = V_MOV_B32_e32 undef $vgpr256, implicit $exec
$vgpr1 = V_ADD_U32_e32 undef $vgpr257, undef $vgpr1, implicit $exec
@@ -478,16 +479,18 @@ body: |
; ASM: .LBB{{.*_1}}:
; GCN-NEXT: s_set_vgpr_msb 64
; GCN-NEXT: v_mov_b32_e32 v0 /*v256*/, v1
+ ; GCN-NEXT: s_set_vgpr_msb 0x4000
$vgpr256 = V_MOV_B32_e32 undef $vgpr1, implicit $exec
- ; No mode switch on fall through
+ ; Reset on fallthrough block end
bb.2:
; ASM-NEXT: %bb.2:
- ; GCN-NEXT: s_nop 0
- ; GCN-NEXT: s_set_vgpr_msb 0
+ ; GCN-NEXT: s_set_vgpr_msb 64
+ ; GCN-NEXT: v_mov_b32_e32 v0 /*v256*/, v1
+ ; GCN-NEXT: s_set_vgpr_msb 0x4000
; GCN-NEXT: s_branch
- S_NOP 0
+ $vgpr256 = V_MOV_B32_e32 undef $vgpr1, implicit $exec
S_BRANCH %bb.3
; Reset mode on terminator
@@ -496,7 +499,7 @@ body: |
; ASM: .LBB{{.*_3}}:
; GCN-NEXT: s_set_vgpr_msb 64
; GCN-NEXT: v_mov_b32_e32 v0 /*v256*/, v1
- ; GCN-NEXT: s_set_vgpr_msb 0
+ ; GCN-NEXT: s_set_vgpr_msb 0x4000
; GCN-NEXT: s_swap_pc_i64
$vgpr256 = V_MOV_B32_e32 undef $vgpr1, implicit $exec
$exec = S_SWAPPC_B64 undef $sgpr0_sgpr1
@@ -518,7 +521,7 @@ body: |
; GCN-NEXT: v_mov_b32_e32 v0, v1
; GCN-NEXT: s_set_vgpr_msb 64
; GCN-NEXT: v_mov_b32_e32 v0 /*v256*/, v1
- ; GCN-NEXT: s_set_vgpr_msb 0
+ ; GCN-NEXT: s_set_vgpr_msb 0x4000
; GCN-NEXT: s_set_pc_i64
$vgpr0 = V_MOV_B32_e32 undef $vgpr1, implicit $exec
$vgpr256 = V_MOV_B32_e32 undef $vgpr1, implicit $exec
@@ -538,7 +541,7 @@ body: |
; ASM-NEXT: %bb.7:
; GCN-NEXT: s_set_vgpr_msb 64
; GCN-NEXT: v_mov_b32_e32 v0 /*v256*/, v1
- ; GCN-NEXT: s_set_vgpr_msb 0
+ ; GCN-NEXT: s_set_vgpr_msb 0x4000
; ASM-NEXT: ; return to shader part epilog
$vgpr256 = V_MOV_B32_e32 undef $vgpr1, implicit $exec
SI_RETURN_TO_EPILOG undef $vgpr0, implicit-def $exec
@@ -556,7 +559,7 @@ body: |
; ASM-NEXT: %bb.9:
; GCN-NEXT: s_set_vgpr_msb 64
; GCN-NEXT: v_mov_b32_e32 v0 /*v256*/, v1
- ; GCN-NEXT: s_set_vgpr_msb 0
+ ; GCN-NEXT: s_set_vgpr_msb 0x4000
; GCN-NEXT: s_set_pc_i64
$vgpr256 = V_MOV_B32_e32 undef $vgpr1, implicit $exec
S_SETPC_B64_return undef $sgpr0_sgpr1, implicit-def $exec
@@ -574,13 +577,14 @@ body: |
; ASM: %bb.0:
; GCN-NEXT: s_set_vgpr_msb 64
; GCN-NEXT: v_mov_b32_e32 v0 /*v256*/, v0
+ ; GCN-NEXT: s_set_vgpr_msb 0x4000
$vgpr256 = V_MOV_B32_e32 undef $vgpr0, implicit $exec
bb.1:
; ASM: .LBB{{[0-9]+}}_1:
; GCN-NEXT: s_set_vgpr_msb 64
; GCN-NEXT: v_mov_b32_e32 v1 /*v257*/, v1
- ; GCN-NEXT: s_set_vgpr_msb 0
+ ; GCN-NEXT: s_set_vgpr_msb 0x4000
; GCN-NEXT: s_cbranch_scc0
$vgpr257 = V_MOV_B32_e32 undef $vgpr1, implicit $exec
S_CBRANCH_SCC0 %bb.1, undef implicit $scc
@@ -604,7 +608,7 @@ body: |
; ASM: %bb.0:
; GCN-NEXT: s_set_vgpr_msb 64
; GCN-NEXT: v_mov_b32_e32 v0 /*v256*/, v1
- ; GCN-NEXT: s_set_vgpr_msb 0
+ ; GCN-NEXT: s_set_vgpr_msb 0x4000
; ASM: def v0
; GCN-NOT: s_set_vgpr_msb
; ASM: use v0
@@ -638,7 +642,7 @@ body: |
; GCN-NEXT: s_set_vgpr_msb 64
; GCN-NEXT: v_mov_b32_e32 v0 /*v256*/, v1
; GCN-NEXT: s_nop 0
- ; GCN-NEXT: s_set_vgpr_msb 1
+ ; GCN-NEXT: s_set_vgpr_msb 0x4001
; GCN-NEXT: v_mov_b32_e32 v1, v0 /*v256*/
BUNDLE implicit-def $vgpr256 {
$vgpr256 = V_MOV_B32_e32 undef $vgpr1, implicit $exec
@@ -680,7 +684,7 @@ body: |
; GCN-NEXT: v_mov_b32_e32 v0 /*v256*/, v1
; GCN-NEXT: v_mov_b32_e32 v1 /*v257*/, v1
- ; GCN-NEXT: s_set_vgpr_msb 0
+ ; GCN-NEXT: s_set_vgpr_msb 0x4000
; GCN-NEXT: v_mov_b32_e32 v2, v1
; GCN-NEXT: v_mov_b32_e32 v3, v1
BUNDLE implicit-def $vgpr256, implicit-def $vgpr257, implicit-def $vgpr2, implicit-def $vgpr3, implicit undef $vgpr1 {
@@ -709,7 +713,7 @@ body: |
; GCN-NEXT: s_clause 0x3e
; GCN-NEXT: v_mov_b32_e32 v0 /*v256*/, v1
- ; GCN-NEXT: s_set_vgpr_msb 0
+ ; GCN-NEXT: s_set_vgpr_msb 0x4000
; GCN-NEXT: v_mov_b32_e32 v1, v1
; GCN-NEXT: v_mov_b32_e32 v2, v1
; GCN-COUNT-60: v_mov_b32_e32 v1, v1
@@ -823,7 +827,7 @@ body: |
; GCN-NEXT: v_wmma_scale_f32_16x16x128_f8f6f4 v[210:217], v[244:259] /*v[500:515]*/, v[244:259] /*v[500:515]*/, v[10:17], v1, v2
$vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217 = V_WMMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_w32_threeaddr undef $vgpr500_vgpr501_vgpr502_vgpr503_vgpr504_vgpr505_vgpr506_vgpr507_vgpr508_vgpr509_vgpr510_vgpr511_vgpr512_vgpr513_vgpr514_vgpr515, undef $vgpr500_vgpr501_vgpr502_vgpr503_vgpr504_vgpr505_vgpr506_vgpr507_vgpr508_vgpr509_vgpr510_vgpr511_vgpr512_vgpr513_vgpr514_vgpr515, 0, undef $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, undef $vgpr1, undef $vgpr2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GCN-NEXT: s_set_vgpr_msb 0
+ ; GCN-NEXT: s_set_vgpr_msb 0x500
; GCN-NEXT: v_wmma_scale_f32_16x16x128_f8f6f4 v[210:217], v[100:115], v[100:115], v[10:17], v1, v2
$vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217 = V_WMMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_w32_threeaddr undef $vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115, undef $vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115, 0, undef $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, undef $vgpr1, undef $vgpr2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
@@ -835,11 +839,11 @@ body: |
; GCN-NEXT: v_wmma_ld_scale16_paired_b64 v[0:1], v[2:3]
V_WMMA_LD_SCALE16_PAIRED_B64 undef $vgpr0_vgpr1, undef $vgpr2_vgpr3, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GCN-NEXT: s_set_vgpr_msb 5
+ ; GCN-NEXT: s_set_vgpr_msb 0x105
; GCN-NEXT: v_wmma_scale16_f32_16x16x128_f8f6f4 v[210:217], v[244:259] /*v[500:515]*/, v[244:259] /*v[500:515]*/, v[10:17], v[0:1], v[2:3]
$vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217 = V_WMMA_SCALE16_F32_16X16X128_F8F6F4_f8_f8_w32_threeaddr undef $vgpr500_vgpr501_vgpr502_vgpr503_vgpr504_vgpr505_vgpr506_vgpr507_vgpr508_vgpr509_vgpr510_vgpr511_vgpr512_vgpr513_vgpr514_vgpr515, undef $vgpr500_vgpr501_vgpr502_vgpr503_vgpr504_vgpr505_vgpr506_vgpr507_vgpr508_vgpr509_vgpr510_vgpr511_vgpr512_vgpr513_vgpr514_vgpr515, 0, undef $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, undef $vgpr0_vgpr1, undef $vgpr2_vgpr3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GCN-NEXT: s_set_vgpr_msb 0
+ ; GCN-NEXT: s_set_vgpr_msb 0x500
; GCN-NEXT: v_wmma_scale16_f32_16x16x128_f8f6f4 v[210:217], v[100:115], v[100:115], v[10:17], v[0:1], v[2:3]
$vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217 = V_WMMA_SCALE16_F32_16X16X128_F8F6F4_f8_f8_w32_threeaddr undef $vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115, undef $vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115, 0, undef $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, undef $vgpr0_vgpr1, undef $vgpr2_vgpr3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll b/llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll
index db49339..9c16b3c 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll
@@ -22,8 +22,6 @@
; GFX9-DAG: s_mov_b32 s[[DESC3:[0-9]+]], 0xe00000
; OFFREG is offset system SGPR
-; GCN: buffer_store_dword {{v[0-9]+}}, off, s[[[DESC0]]:[[DESC3]]], 0 offset:{{[0-9]+}} ; 4-byte Folded Spill
-; GCN: buffer_load_dword v{{[0-9]+}}, off, s[[[DESC0]]:[[DESC3]]], 0 offset:{{[0-9]+}} ; 4-byte Folded Reload
; GCN: NumVgprs: 256
; GCN: ScratchSize: 640
diff --git a/llvm/test/CodeGen/AMDGPU/wait-xcnt.mir b/llvm/test/CodeGen/AMDGPU/wait-xcnt.mir
index 1b8e126..a1381ec 100644
--- a/llvm/test/CodeGen/AMDGPU/wait-xcnt.mir
+++ b/llvm/test/CodeGen/AMDGPU/wait-xcnt.mir
@@ -945,7 +945,6 @@ body: |
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
...
-# FIXME: Missing S_WAIT_XCNT before overwriting vgpr0.
---
name: wait_kmcnt_with_outstanding_vmem_2
tracksRegLiveness: true
@@ -971,6 +970,7 @@ body: |
; GCN-NEXT: {{ $}}
; GCN-NEXT: S_WAIT_KMCNT 0
; GCN-NEXT: $sgpr2 = S_MOV_B32 $sgpr2
+ ; GCN-NEXT: S_WAIT_XCNT 0
; GCN-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
bb.0:
liveins: $vgpr0_vgpr1, $sgpr0_sgpr1, $scc
@@ -986,6 +986,180 @@ body: |
...
---
+name: wait_kmcnt_and_wait_loadcnt
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+body: |
+ ; GCN-LABEL: name: wait_kmcnt_and_wait_loadcnt
+ ; GCN: bb.0:
+ ; GCN-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; GCN-NEXT: liveins: $vgpr0_vgpr1, $sgpr0_sgpr1, $scc
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
+ ; GCN-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.1:
+ ; GCN-NEXT: successors: %bb.2(0x80000000)
+ ; GCN-NEXT: liveins: $vgpr0_vgpr1, $sgpr2
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: $vgpr2 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.2:
+ ; GCN-NEXT: liveins: $sgpr2
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: S_WAIT_KMCNT 0
+ ; GCN-NEXT: $sgpr2 = S_MOV_B32 $sgpr2
+ ; GCN-NEXT: S_WAIT_LOADCNT 0
+ ; GCN-NEXT: $vgpr2 = V_MOV_B32_e32 0, implicit $exec
+ bb.0:
+ liveins: $vgpr0_vgpr1, $sgpr0_sgpr1, $scc
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
+ S_CBRANCH_SCC1 %bb.2, implicit $scc
+ bb.1:
+ liveins: $vgpr0_vgpr1, $sgpr2
+ $vgpr2 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec
+ bb.2:
+ liveins: $sgpr2
+ $sgpr2 = S_MOV_B32 $sgpr2
+ $vgpr2 = V_MOV_B32_e32 0, implicit $exec
+...
+
+---
+name: implicit_handling_of_pending_vmem_group
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+body: |
+ ; GCN-LABEL: name: implicit_handling_of_pending_vmem_group
+ ; GCN: bb.0:
+ ; GCN-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; GCN-NEXT: liveins: $vgpr0_vgpr1, $sgpr0_sgpr1, $scc
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
+ ; GCN-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.1:
+ ; GCN-NEXT: successors: %bb.2(0x80000000)
+ ; GCN-NEXT: liveins: $vgpr0_vgpr1, $sgpr2
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: $vgpr2 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.2:
+ ; GCN-NEXT: liveins: $sgpr0_sgpr1, $sgpr2
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: S_WAIT_KMCNT 0
+ ; GCN-NEXT: $sgpr2 = S_MOV_B32 $sgpr2
+ ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
+ ; GCN-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN-NEXT: S_WAIT_XCNT 0
+ ; GCN-NEXT: $sgpr0 = S_MOV_B32 $sgpr0
+ bb.0:
+ liveins: $vgpr0_vgpr1, $sgpr0_sgpr1, $scc
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
+ S_CBRANCH_SCC1 %bb.2, implicit $scc
+ bb.1:
+ liveins: $vgpr0_vgpr1, $sgpr2
+ $vgpr2 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec
+ bb.2:
+ liveins: $sgpr0_sgpr1, $sgpr2
+ $sgpr2 = S_MOV_B32 $sgpr2
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
+ $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ $sgpr0 = S_MOV_B32 $sgpr0
+...
+
+---
+name: pending_vmem_event_between_block
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+body: |
+ ; GCN-LABEL: name: pending_vmem_event_between_block
+ ; GCN: bb.0:
+ ; GCN-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; GCN-NEXT: liveins: $vgpr0_vgpr1, $sgpr0_sgpr1, $scc
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
+ ; GCN-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.1:
+ ; GCN-NEXT: successors: %bb.2(0x80000000)
+ ; GCN-NEXT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $sgpr2
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: $vgpr4 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec
+ ; GCN-NEXT: $vgpr5 = GLOBAL_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.2:
+ ; GCN-NEXT: liveins: $sgpr0_sgpr1, $sgpr2, $vgpr2
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: S_WAIT_KMCNT 0
+ ; GCN-NEXT: $sgpr2 = S_MOV_B32 $sgpr2
+ ; GCN-NEXT: S_WAIT_XCNT 1
+ ; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN-NEXT: S_WAIT_XCNT 0
+ ; GCN-NEXT: $vgpr2 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN-NEXT: $sgpr0 = S_MOV_B32 $sgpr0
+ bb.0:
+ liveins: $vgpr0_vgpr1, $sgpr0_sgpr1, $scc
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
+ S_CBRANCH_SCC1 %bb.2, implicit $scc
+ bb.1:
+ liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $sgpr2
+ $vgpr4 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec
+ $vgpr5 = GLOBAL_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec
+ bb.2:
+ liveins: $sgpr0_sgpr1, $sgpr2, $vgpr2
+ $sgpr2 = S_MOV_B32 $sgpr2
+ $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr2 = V_MOV_B32_e32 0, implicit $exec
+ $sgpr0 = S_MOV_B32 $sgpr0
+...
+
+---
+name: flushing_vmem_cnt_on_block_entry
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+body: |
+ ; GCN-LABEL: name: flushing_vmem_cnt_on_block_entry
+ ; GCN: bb.0:
+ ; GCN-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; GCN-NEXT: liveins: $vgpr0_vgpr1, $sgpr0_sgpr1, $scc
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
+ ; GCN-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.1:
+ ; GCN-NEXT: successors: %bb.2(0x80000000)
+ ; GCN-NEXT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $sgpr2
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: $vgpr4 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec
+ ; GCN-NEXT: $vgpr5 = GLOBAL_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.2:
+ ; GCN-NEXT: liveins: $sgpr0_sgpr1, $sgpr2, $vgpr2
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: S_WAIT_XCNT 0
+ ; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN-NEXT: $vgpr2 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN-NEXT: $sgpr0 = S_MOV_B32 $sgpr0
+ bb.0:
+ liveins: $vgpr0_vgpr1, $sgpr0_sgpr1, $scc
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
+ S_CBRANCH_SCC1 %bb.2, implicit $scc
+ bb.1:
+ liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $sgpr2
+ $vgpr4 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec
+ $vgpr5 = GLOBAL_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec
+ bb.2:
+ liveins: $sgpr0_sgpr1, $sgpr2, $vgpr2
+ $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr2 = V_MOV_B32_e32 0, implicit $exec
+ $sgpr0 = S_MOV_B32 $sgpr0
+...
+
+---
name: wait_loadcnt_with_outstanding_smem
tracksRegLiveness: true
machineFunctionInfo:
diff --git a/llvm/test/CodeGen/AMDGPU/whole-wave-functions.ll b/llvm/test/CodeGen/AMDGPU/whole-wave-functions.ll
index a42c8ac7..7581710 100644
--- a/llvm/test/CodeGen/AMDGPU/whole-wave-functions.ll
+++ b/llvm/test/CodeGen/AMDGPU/whole-wave-functions.ll
@@ -3182,7 +3182,7 @@ define amdgpu_gfx_whole_wave <2 x half> @call_gfx_from_whole_wave(i1 %active, <2
; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v253 /*v509*/, s33 offset:1592
; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v254 /*v510*/, s33 offset:1596
; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v255 /*v511*/, s33 offset:1600
-; GFX1250-DAGISEL-NEXT: s_set_vgpr_msb 8 ; msbs: dst=0 src0=0 src1=2 src2=0
+; GFX1250-DAGISEL-NEXT: s_set_vgpr_msb 0x408 ; msbs: dst=0 src0=0 src1=2 src2=0
; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v0 /*v512*/, s33 offset:1604
; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v1 /*v513*/, s33 offset:1608
; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v2 /*v514*/, s33 offset:1612
@@ -3443,7 +3443,7 @@ define amdgpu_gfx_whole_wave <2 x half> @call_gfx_from_whole_wave(i1 %active, <2
; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v253 /*v765*/, s33 offset:2616
; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v254 /*v766*/, s33 offset:2620
; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v255 /*v767*/, s33 offset:2624
-; GFX1250-DAGISEL-NEXT: s_set_vgpr_msb 12 ; msbs: dst=0 src0=0 src1=3 src2=0
+; GFX1250-DAGISEL-NEXT: s_set_vgpr_msb 0x80c ; msbs: dst=0 src0=0 src1=3 src2=0
; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v0 /*v768*/, s33 offset:2628
; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v1 /*v769*/, s33 offset:2632
; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v2 /*v770*/, s33 offset:2636
@@ -3706,7 +3706,7 @@ define amdgpu_gfx_whole_wave <2 x half> @call_gfx_from_whole_wave(i1 %active, <2
; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v255 /*v1023*/, s33 offset:3648
; GFX1250-DAGISEL-NEXT: s_wait_xcnt 0x0
; GFX1250-DAGISEL-NEXT: s_mov_b32 exec_lo, -1
-; GFX1250-DAGISEL-NEXT: s_set_vgpr_msb 0 ; msbs: dst=0 src0=0 src1=0 src2=0
+; GFX1250-DAGISEL-NEXT: s_set_vgpr_msb 0xc00 ; msbs: dst=0 src0=0 src1=0 src2=0
; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
; GFX1250-DAGISEL-NEXT: s_wait_xcnt 0x0
; GFX1250-DAGISEL-NEXT: v_writelane_b32 v40, s0, 3
@@ -4135,7 +4135,7 @@ define amdgpu_gfx_whole_wave <2 x half> @call_gfx_from_whole_wave(i1 %active, <2
; GFX1250-DAGISEL-NEXT: scratch_load_b32 v253 /*v509*/, off, s33 offset:1592
; GFX1250-DAGISEL-NEXT: scratch_load_b32 v254 /*v510*/, off, s33 offset:1596
; GFX1250-DAGISEL-NEXT: scratch_load_b32 v255 /*v511*/, off, s33 offset:1600
-; GFX1250-DAGISEL-NEXT: s_set_vgpr_msb 0x80 ; msbs: dst=2 src0=0 src1=0 src2=0
+; GFX1250-DAGISEL-NEXT: s_set_vgpr_msb 0x4080 ; msbs: dst=2 src0=0 src1=0 src2=0
; GFX1250-DAGISEL-NEXT: scratch_load_b32 v0 /*v512*/, off, s33 offset:1604
; GFX1250-DAGISEL-NEXT: scratch_load_b32 v1 /*v513*/, off, s33 offset:1608
; GFX1250-DAGISEL-NEXT: scratch_load_b32 v2 /*v514*/, off, s33 offset:1612
@@ -4396,7 +4396,7 @@ define amdgpu_gfx_whole_wave <2 x half> @call_gfx_from_whole_wave(i1 %active, <2
; GFX1250-DAGISEL-NEXT: scratch_load_b32 v253 /*v765*/, off, s33 offset:2616
; GFX1250-DAGISEL-NEXT: scratch_load_b32 v254 /*v766*/, off, s33 offset:2620
; GFX1250-DAGISEL-NEXT: scratch_load_b32 v255 /*v767*/, off, s33 offset:2624
-; GFX1250-DAGISEL-NEXT: s_set_vgpr_msb 0xc0 ; msbs: dst=3 src0=0 src1=0 src2=0
+; GFX1250-DAGISEL-NEXT: s_set_vgpr_msb 0x80c0 ; msbs: dst=3 src0=0 src1=0 src2=0
; GFX1250-DAGISEL-NEXT: scratch_load_b32 v0 /*v768*/, off, s33 offset:2628
; GFX1250-DAGISEL-NEXT: scratch_load_b32 v1 /*v769*/, off, s33 offset:2632
; GFX1250-DAGISEL-NEXT: scratch_load_b32 v2 /*v770*/, off, s33 offset:2636
@@ -4661,7 +4661,7 @@ define amdgpu_gfx_whole_wave <2 x half> @call_gfx_from_whole_wave(i1 %active, <2
; GFX1250-DAGISEL-NEXT: s_mov_b32 exec_lo, s4
; GFX1250-DAGISEL-NEXT: s_mov_b32 s33, s0
; GFX1250-DAGISEL-NEXT: s_wait_loadcnt 0x0
-; GFX1250-DAGISEL-NEXT: s_set_vgpr_msb 0 ; msbs: dst=0 src0=0 src1=0 src2=0
+; GFX1250-DAGISEL-NEXT: s_set_vgpr_msb 0xc000 ; msbs: dst=0 src0=0 src1=0 src2=0
; GFX1250-DAGISEL-NEXT: s_set_pc_i64 s[30:31]
%ret = call amdgpu_gfx <2 x half>(<2 x half>, <2 x half>) @gfx_callee(<2 x half> %y, <2 x half> %x) convergent
ret <2 x half> %ret
@@ -6346,7 +6346,7 @@ define amdgpu_gfx_whole_wave <2 x half> @tail_call_gfx_from_whole_wave(i1 %activ
; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v253 /*v509*/, s32 offset:1588
; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v254 /*v510*/, s32 offset:1592
; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v255 /*v511*/, s32 offset:1596
-; GFX1250-DAGISEL-NEXT: s_set_vgpr_msb 8 ; msbs: dst=0 src0=0 src1=2 src2=0
+; GFX1250-DAGISEL-NEXT: s_set_vgpr_msb 0x408 ; msbs: dst=0 src0=0 src1=2 src2=0
; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v0 /*v512*/, s32 offset:1600
; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v1 /*v513*/, s32 offset:1604
; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v2 /*v514*/, s32 offset:1608
@@ -6607,7 +6607,7 @@ define amdgpu_gfx_whole_wave <2 x half> @tail_call_gfx_from_whole_wave(i1 %activ
; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v253 /*v765*/, s32 offset:2612
; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v254 /*v766*/, s32 offset:2616
; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v255 /*v767*/, s32 offset:2620
-; GFX1250-DAGISEL-NEXT: s_set_vgpr_msb 12 ; msbs: dst=0 src0=0 src1=3 src2=0
+; GFX1250-DAGISEL-NEXT: s_set_vgpr_msb 0x80c ; msbs: dst=0 src0=0 src1=3 src2=0
; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v0 /*v768*/, s32 offset:2624
; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v1 /*v769*/, s32 offset:2628
; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v2 /*v770*/, s32 offset:2632
@@ -6872,7 +6872,7 @@ define amdgpu_gfx_whole_wave <2 x half> @tail_call_gfx_from_whole_wave(i1 %activ
; GFX1250-DAGISEL-NEXT: s_mov_b32 exec_lo, -1
; GFX1250-DAGISEL-NEXT: v_mov_b32_e32 v2, v0
; GFX1250-DAGISEL-NEXT: s_mov_b64 s[36:37], gfx_callee@abs64
-; GFX1250-DAGISEL-NEXT: s_set_vgpr_msb 0 ; msbs: dst=0 src0=0 src1=0 src2=0
+; GFX1250-DAGISEL-NEXT: s_set_vgpr_msb 0xc00 ; msbs: dst=0 src0=0 src1=0 src2=0
; GFX1250-DAGISEL-NEXT: v_swap_b32 v0, v1
; GFX1250-DAGISEL-NEXT: s_xor_b32 exec_lo, s0, -1
; GFX1250-DAGISEL-NEXT: s_clause 0x3e
@@ -7283,7 +7283,7 @@ define amdgpu_gfx_whole_wave <2 x half> @tail_call_gfx_from_whole_wave(i1 %activ
; GFX1250-DAGISEL-NEXT: scratch_load_b32 v253 /*v509*/, off, s32 offset:1588
; GFX1250-DAGISEL-NEXT: scratch_load_b32 v254 /*v510*/, off, s32 offset:1592
; GFX1250-DAGISEL-NEXT: scratch_load_b32 v255 /*v511*/, off, s32 offset:1596
-; GFX1250-DAGISEL-NEXT: s_set_vgpr_msb 0x80 ; msbs: dst=2 src0=0 src1=0 src2=0
+; GFX1250-DAGISEL-NEXT: s_set_vgpr_msb 0x4080 ; msbs: dst=2 src0=0 src1=0 src2=0
; GFX1250-DAGISEL-NEXT: scratch_load_b32 v0 /*v512*/, off, s32 offset:1600
; GFX1250-DAGISEL-NEXT: scratch_load_b32 v1 /*v513*/, off, s32 offset:1604
; GFX1250-DAGISEL-NEXT: scratch_load_b32 v2 /*v514*/, off, s32 offset:1608
@@ -7544,7 +7544,7 @@ define amdgpu_gfx_whole_wave <2 x half> @tail_call_gfx_from_whole_wave(i1 %activ
; GFX1250-DAGISEL-NEXT: scratch_load_b32 v253 /*v765*/, off, s32 offset:2612
; GFX1250-DAGISEL-NEXT: scratch_load_b32 v254 /*v766*/, off, s32 offset:2616
; GFX1250-DAGISEL-NEXT: scratch_load_b32 v255 /*v767*/, off, s32 offset:2620
-; GFX1250-DAGISEL-NEXT: s_set_vgpr_msb 0xc0 ; msbs: dst=3 src0=0 src1=0 src2=0
+; GFX1250-DAGISEL-NEXT: s_set_vgpr_msb 0x80c0 ; msbs: dst=3 src0=0 src1=0 src2=0
; GFX1250-DAGISEL-NEXT: scratch_load_b32 v0 /*v768*/, off, s32 offset:2624
; GFX1250-DAGISEL-NEXT: scratch_load_b32 v1 /*v769*/, off, s32 offset:2628
; GFX1250-DAGISEL-NEXT: scratch_load_b32 v2 /*v770*/, off, s32 offset:2632
@@ -7807,7 +7807,7 @@ define amdgpu_gfx_whole_wave <2 x half> @tail_call_gfx_from_whole_wave(i1 %activ
; GFX1250-DAGISEL-NEXT: scratch_load_b32 v255 /*v1023*/, off, s32 offset:3644
; GFX1250-DAGISEL-NEXT: s_wait_xcnt 0x0
; GFX1250-DAGISEL-NEXT: s_mov_b32 exec_lo, s0
-; GFX1250-DAGISEL-NEXT: s_set_vgpr_msb 0 ; msbs: dst=0 src0=0 src1=0 src2=0
+; GFX1250-DAGISEL-NEXT: s_set_vgpr_msb 0xc000 ; msbs: dst=0 src0=0 src1=0 src2=0
; GFX1250-DAGISEL-NEXT: s_set_pc_i64 s[36:37]
%ret = tail call amdgpu_gfx <2 x half>(<2 x half>, <2 x half>) @gfx_callee(<2 x half> %y, <2 x half> %x) convergent
ret <2 x half> %ret
@@ -9657,7 +9657,7 @@ define amdgpu_gfx_whole_wave void @call_from_whole_wave(i1 %unused, <8 x float>
; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v253 /*v509*/, s33 offset:1600
; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v254 /*v510*/, s33 offset:1604
; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v255 /*v511*/, s33 offset:1608
-; GFX1250-DAGISEL-NEXT: s_set_vgpr_msb 8 ; msbs: dst=0 src0=0 src1=2 src2=0
+; GFX1250-DAGISEL-NEXT: s_set_vgpr_msb 0x408 ; msbs: dst=0 src0=0 src1=2 src2=0
; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v0 /*v512*/, s33 offset:1612
; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v1 /*v513*/, s33 offset:1616
; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v2 /*v514*/, s33 offset:1620
@@ -9918,7 +9918,7 @@ define amdgpu_gfx_whole_wave void @call_from_whole_wave(i1 %unused, <8 x float>
; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v253 /*v765*/, s33 offset:2624
; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v254 /*v766*/, s33 offset:2628
; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v255 /*v767*/, s33 offset:2632
-; GFX1250-DAGISEL-NEXT: s_set_vgpr_msb 12 ; msbs: dst=0 src0=0 src1=3 src2=0
+; GFX1250-DAGISEL-NEXT: s_set_vgpr_msb 0x80c ; msbs: dst=0 src0=0 src1=3 src2=0
; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v0 /*v768*/, s33 offset:2636
; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v1 /*v769*/, s33 offset:2640
; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v2 /*v770*/, s33 offset:2644
@@ -10181,7 +10181,7 @@ define amdgpu_gfx_whole_wave void @call_from_whole_wave(i1 %unused, <8 x float>
; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v255 /*v1023*/, s33 offset:3656
; GFX1250-DAGISEL-NEXT: s_wait_xcnt 0x0
; GFX1250-DAGISEL-NEXT: s_mov_b32 exec_lo, -1
-; GFX1250-DAGISEL-NEXT: s_set_vgpr_msb 0 ; msbs: dst=0 src0=0 src1=0 src2=0
+; GFX1250-DAGISEL-NEXT: s_set_vgpr_msb 0xc00 ; msbs: dst=0 src0=0 src1=0 src2=0
; GFX1250-DAGISEL-NEXT: s_clause 0x2
; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v42, s33
; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v40, s33 offset:164
@@ -10616,7 +10616,7 @@ define amdgpu_gfx_whole_wave void @call_from_whole_wave(i1 %unused, <8 x float>
; GFX1250-DAGISEL-NEXT: scratch_load_b32 v253 /*v509*/, off, s33 offset:1600
; GFX1250-DAGISEL-NEXT: scratch_load_b32 v254 /*v510*/, off, s33 offset:1604
; GFX1250-DAGISEL-NEXT: scratch_load_b32 v255 /*v511*/, off, s33 offset:1608
-; GFX1250-DAGISEL-NEXT: s_set_vgpr_msb 0x80 ; msbs: dst=2 src0=0 src1=0 src2=0
+; GFX1250-DAGISEL-NEXT: s_set_vgpr_msb 0x4080 ; msbs: dst=2 src0=0 src1=0 src2=0
; GFX1250-DAGISEL-NEXT: scratch_load_b32 v0 /*v512*/, off, s33 offset:1612
; GFX1250-DAGISEL-NEXT: scratch_load_b32 v1 /*v513*/, off, s33 offset:1616
; GFX1250-DAGISEL-NEXT: scratch_load_b32 v2 /*v514*/, off, s33 offset:1620
@@ -10877,7 +10877,7 @@ define amdgpu_gfx_whole_wave void @call_from_whole_wave(i1 %unused, <8 x float>
; GFX1250-DAGISEL-NEXT: scratch_load_b32 v253 /*v765*/, off, s33 offset:2624
; GFX1250-DAGISEL-NEXT: scratch_load_b32 v254 /*v766*/, off, s33 offset:2628
; GFX1250-DAGISEL-NEXT: scratch_load_b32 v255 /*v767*/, off, s33 offset:2632
-; GFX1250-DAGISEL-NEXT: s_set_vgpr_msb 0xc0 ; msbs: dst=3 src0=0 src1=0 src2=0
+; GFX1250-DAGISEL-NEXT: s_set_vgpr_msb 0x80c0 ; msbs: dst=3 src0=0 src1=0 src2=0
; GFX1250-DAGISEL-NEXT: scratch_load_b32 v0 /*v768*/, off, s33 offset:2636
; GFX1250-DAGISEL-NEXT: scratch_load_b32 v1 /*v769*/, off, s33 offset:2640
; GFX1250-DAGISEL-NEXT: scratch_load_b32 v2 /*v770*/, off, s33 offset:2644
@@ -11142,7 +11142,7 @@ define amdgpu_gfx_whole_wave void @call_from_whole_wave(i1 %unused, <8 x float>
; GFX1250-DAGISEL-NEXT: s_mov_b32 exec_lo, s4
; GFX1250-DAGISEL-NEXT: s_mov_b32 s33, s0
; GFX1250-DAGISEL-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-DAGISEL-NEXT: s_set_vgpr_msb 0 ; msbs: dst=0 src0=0 src1=0 src2=0
+; GFX1250-DAGISEL-NEXT: s_set_vgpr_msb 0xc000 ; msbs: dst=0 src0=0 src1=0 src2=0
; GFX1250-DAGISEL-NEXT: s_set_pc_i64 s[30:31]
%ret = call float(ptr, ...) @llvm.amdgcn.call.whole.wave(ptr @callee, <8 x float> %x) convergent
store float %ret, ptr %p
diff --git a/llvm/test/CodeGen/AMDGPU/wqm.ll b/llvm/test/CodeGen/AMDGPU/wqm.ll
index ad8dcd3..21f0c00 100644
--- a/llvm/test/CodeGen/AMDGPU/wqm.ll
+++ b/llvm/test/CodeGen/AMDGPU/wqm.ll
@@ -3477,13 +3477,10 @@ define amdgpu_gs void @wqm_init_exec_wwm() {
; GFX9-W64-NEXT: s_mov_b64 exec, 0
; GFX9-W64-NEXT: s_mov_b32 s1, 0
; GFX9-W64-NEXT: s_mov_b32 s0, s1
-; GFX9-W64-NEXT: s_cmp_lg_u64 exec, 0
-; GFX9-W64-NEXT: s_cselect_b64 s[2:3], -1, 0
-; GFX9-W64-NEXT: s_cmp_lg_u64 s[0:1], 0
+; GFX9-W64-NEXT: s_cmp_eq_u64 s[0:1], 0
; GFX9-W64-NEXT: s_cselect_b64 s[0:1], -1, 0
-; GFX9-W64-NEXT: s_xor_b64 s[0:1], s[2:3], s[0:1]
-; GFX9-W64-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, s[0:1]
-; GFX9-W64-NEXT: v_mov_b32_e32 v1, 0
+; GFX9-W64-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-W64-NEXT: v_cndmask_b32_e64 v1, 0, 1.0, s[0:1]
; GFX9-W64-NEXT: exp mrt0 off, off, off, off
; GFX9-W64-NEXT: s_endpgm
;
@@ -3491,14 +3488,11 @@ define amdgpu_gs void @wqm_init_exec_wwm() {
; GFX10-W32: ; %bb.0:
; GFX10-W32-NEXT: s_mov_b32 exec_lo, 0
; GFX10-W32-NEXT: s_mov_b32 s1, 0
-; GFX10-W32-NEXT: s_cmp_lg_u64 exec, 0
+; GFX10-W32-NEXT: v_mov_b32_e32 v0, 0
; GFX10-W32-NEXT: s_mov_b32 s0, s1
-; GFX10-W32-NEXT: s_cselect_b32 s2, -1, 0
-; GFX10-W32-NEXT: s_cmp_lg_u64 s[0:1], 0
-; GFX10-W32-NEXT: v_mov_b32_e32 v1, 0
+; GFX10-W32-NEXT: s_cmp_eq_u64 s[0:1], 0
; GFX10-W32-NEXT: s_cselect_b32 s0, -1, 0
-; GFX10-W32-NEXT: s_xor_b32 s0, s2, s0
-; GFX10-W32-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, s0
+; GFX10-W32-NEXT: v_cndmask_b32_e64 v1, 0, 1.0, s0
; GFX10-W32-NEXT: exp mrt0 off, off, off, off
; GFX10-W32-NEXT: s_endpgm
call void @llvm.amdgcn.init.exec(i64 0)
diff --git a/llvm/test/CodeGen/ARM/fp-intrinsics.ll b/llvm/test/CodeGen/ARM/fp-intrinsics.ll
index 93b6a58..cb87508 100644
--- a/llvm/test/CodeGen/ARM/fp-intrinsics.ll
+++ b/llvm/test/CodeGen/ARM/fp-intrinsics.ll
@@ -76,7 +76,6 @@ define i32 @fptosi_f32(float %x) #0 {
; CHECK-NOSP: bl __aeabi_f2iz
; CHECK-NOSP: bl __aeabi_f2iz
; CHECK-SP: vcvt.s32.f32
-; FIXME-CHECK-SP: vcvt.s32.f32
define void @fptosi_f32_twice(float %arg, ptr %ptr) #0 {
entry:
%conv = call i32 @llvm.experimental.constrained.fptosi.i32.f32(float %arg, metadata !"fpexcept.strict") #0
@@ -146,6 +145,80 @@ define float @tan_f32(float %x) #0 {
ret float %val
}
+; CHECK-LABEL: acos_f32:
+; CHECK: bl acosf
+define float @acos_f32(float %x, float %y) #0 {
+ %val = call float @llvm.experimental.constrained.acos.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret float %val
+}
+
+; CHECK-LABEL: asin_f32:
+; CHECK: bl asinf
+define float @asin_f32(float %x, float %y) #0 {
+ %val = call float @llvm.experimental.constrained.asin.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret float %val
+}
+
+; CHECK-LABEL: atan_f32:
+; CHECK: bl atanf
+define float @atan_f32(float %x, float %y) #0 {
+ %val = call float @llvm.experimental.constrained.atan.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret float %val
+}
+
+; CHECK-LABEL: cosh_f32:
+; CHECK: bl coshf
+define float @cosh_f32(float %x, float %y) #0 {
+ %val = call float @llvm.experimental.constrained.cosh.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret float %val
+}
+
+; CHECK-LABEL: sinh_f32:
+; CHECK: bl sinhf
+define float @sinh_f32(float %x, float %y) #0 {
+ %val = call float @llvm.experimental.constrained.sinh.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret float %val
+}
+
+; CHECK-LABEL: tanh_f32:
+; CHECK: bl tanhf
+define float @tanh_f32(float %x, float %y) #0 {
+ %val = call float @llvm.experimental.constrained.tanh.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret float %val
+}
+
+; CHECK-LABEL: fmuladd_f32:
+; CHECK-SP: vfma.f32
+; CHECK-NOSP: bl __aeabi_fmul
+; CHECK-NOSP: bl __aeabi_fadd
+define float @fmuladd_f32(float %x, float %y, float %z) #0 {
+ %val = call float @llvm.experimental.constrained.fmuladd.f32(float %x, float %y, float %z, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret float %val
+}
+
+; CHECK-LABEL: ldexp_f32:
+; CHECK: bl ldexpf
+define float @ldexp_f32(float %x, i32 %y) #0 {
+ %val = call float @llvm.experimental.constrained.ldexp.f32.i32(float %x, i32 %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret float %val
+}
+
+; CHECK-LABEL: roundeven_f32:
+; CHECK-SP-V8: vrintn.f32
+; CHECK-NOSP: bl roundevenf
+define float @roundeven_f32(float %x) #0 {
+ %val = call float @llvm.experimental.constrained.roundeven.f32(float %x, metadata !"fpexcept.strict") #0
+ ret float %val
+}
+
+; CHECK-LABEL: uitofp_f32_i32:
+; CHECK-NOSP: bl __aeabi_ui2f
+; FIXME-CHECK-SP: vcvt.f32.f64
+define float @uitofp_f32_i32(i32 %x) #0 {
+ %val = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret float %val
+}
+
; CHECK-LABEL: atan2_f32:
; CHECK: bl atan2f
define float @atan2_f32(float %x, float %y) #0 {
@@ -617,6 +690,80 @@ define double @tan_f64(double %x) #0 {
ret double %val
}
+; CHECK-LABEL: acos_f64:
+; CHECK: bl acos
+define double @acos_f64(double %x, double %y) #0 {
+ %val = call double @llvm.experimental.constrained.acos.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret double %val
+}
+
+; CHECK-LABEL: asin_f64:
+; CHECK: bl asin
+define double @asin_f64(double %x, double %y) #0 {
+ %val = call double @llvm.experimental.constrained.asin.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret double %val
+}
+
+; CHECK-LABEL: atan_f64:
+; CHECK: bl atan
+define double @atan_f64(double %x, double %y) #0 {
+ %val = call double @llvm.experimental.constrained.atan.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret double %val
+}
+
+; CHECK-LABEL: cosh_f64:
+; CHECK: bl cosh
+define double @cosh_f64(double %x, double %y) #0 {
+ %val = call double @llvm.experimental.constrained.cosh.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret double %val
+}
+
+; CHECK-LABEL: sinh_f64:
+; CHECK: bl sinh
+define double @sinh_f64(double %x, double %y) #0 {
+ %val = call double @llvm.experimental.constrained.sinh.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret double %val
+}
+
+; CHECK-LABEL: tanh_f64:
+; CHECK: bl tanh
+define double @tanh_f64(double %x, double %y) #0 {
+ %val = call double @llvm.experimental.constrained.tanh.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret double %val
+}
+
+; CHECK-LABEL: fmuladd_f64:
+; CHECK-DP: vfma.f64
+; CHECK-NODP: bl __aeabi_dmul
+; CHECK-NODP: bl __aeabi_dadd
+define double @fmuladd_f64(double %x, double %y, double %z) #0 {
+ %val = call double @llvm.experimental.constrained.fmuladd.f64(double %x, double %y, double %z, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret double %val
+}
+
+; CHECK-LABEL: ldexp_f64:
+; CHECK: bl ldexp
+define double @ldexp_f64(double %x, i32 %y) #0 {
+ %val = call double @llvm.experimental.constrained.ldexp.f64.i32(double %x, i32 %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret double %val
+}
+
+; CHECK-LABEL: roundeven_f64:
+; CHECK-DP-V8: vrintn.f64
+; CHECK-NODP: bl roundeven
+define double @roundeven_f64(double %x) #0 {
+ %val = call double @llvm.experimental.constrained.roundeven.f64(double %x, metadata !"fpexcept.strict") #0
+ ret double %val
+}
+
+; CHECK-LABEL: uitofp_f64_i32:
+; CHECK-NOSP: bl __aeabi_ui2d
+; FIXME-CHECK-SP: vsub.f64
+define double @uitofp_f64_i32(i32 %x) #0 {
+ %val = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret double %val
+}
+
; CHECK-LABEL: atan2_f64:
; CHECK: bl atan2
define double @atan2_f64(double %x, double %y) #0 {
@@ -1052,6 +1199,16 @@ declare float @llvm.experimental.constrained.powi.f32(float, i32, metadata, meta
declare float @llvm.experimental.constrained.sin.f32(float, metadata, metadata)
declare float @llvm.experimental.constrained.cos.f32(float, metadata, metadata)
declare float @llvm.experimental.constrained.tan.f32(float, metadata, metadata)
+declare float @llvm.experimental.constrained.acos.f32(float, metadata, metadata)
+declare float @llvm.experimental.constrained.asin.f32(float, metadata, metadata)
+declare float @llvm.experimental.constrained.atan.f32(float, metadata, metadata)
+declare float @llvm.experimental.constrained.cosh.f32(float, metadata, metadata)
+declare float @llvm.experimental.constrained.sinh.f32(float, metadata, metadata)
+declare float @llvm.experimental.constrained.tanh.f32(float, metadata, metadata)
+declare float @llvm.experimental.constrained.fmuladd.f32(float, float, float, metadata, metadata)
+declare float @llvm.experimental.constrained.ldexp.f32.i32(float, i32, metadata, metadata)
+declare float @llvm.experimental.constrained.roundeven.f32(float, metadata)
+declare float @llvm.experimental.constrained.uitofp.f32.i32(i32, metadata, metadata)
declare float @llvm.experimental.constrained.atan2.f32(float, float, metadata, metadata)
declare float @llvm.experimental.constrained.pow.f32(float, float, metadata, metadata)
declare float @llvm.experimental.constrained.log.f32(float, metadata, metadata)
@@ -1087,6 +1244,16 @@ declare double @llvm.experimental.constrained.powi.f64(double, i32, metadata, me
declare double @llvm.experimental.constrained.sin.f64(double, metadata, metadata)
declare double @llvm.experimental.constrained.cos.f64(double, metadata, metadata)
declare double @llvm.experimental.constrained.tan.f64(double, metadata, metadata)
+declare double @llvm.experimental.constrained.acos.f64(double, metadata, metadata)
+declare double @llvm.experimental.constrained.asin.f64(double, metadata, metadata)
+declare double @llvm.experimental.constrained.atan.f64(double, metadata, metadata)
+declare double @llvm.experimental.constrained.cosh.f64(double, metadata, metadata)
+declare double @llvm.experimental.constrained.sinh.f64(double, metadata, metadata)
+declare double @llvm.experimental.constrained.tanh.f64(double, metadata, metadata)
+declare double @llvm.experimental.constrained.fmuladd.f64(double, double, double, metadata, metadata)
+declare double @llvm.experimental.constrained.ldexp.f64.i32(double, i32, metadata, metadata)
+declare double @llvm.experimental.constrained.roundeven.f64(double, metadata)
+declare double @llvm.experimental.constrained.uitofp.f64.i32(i32, metadata, metadata)
declare double @llvm.experimental.constrained.atan2.f64(double, double, metadata, metadata)
declare double @llvm.experimental.constrained.pow.f64(double, double, metadata, metadata)
declare double @llvm.experimental.constrained.log.f64(double, metadata, metadata)
diff --git a/llvm/test/CodeGen/ARM/fp16-fullfp16.ll b/llvm/test/CodeGen/ARM/fp16-fullfp16.ll
index 200b14b..b4060d5 100644
--- a/llvm/test/CodeGen/ARM/fp16-fullfp16.ll
+++ b/llvm/test/CodeGen/ARM/fp16-fullfp16.ll
@@ -98,12 +98,18 @@ define i32 @test_fptosi_i32(ptr %p) {
ret i32 %r
}
-; FIXME
-;define i64 @test_fptosi_i64(ptr %p) {
-; %a = load half, ptr %p, align 2
-; %r = fptosi half %a to i64
-; ret i64 %r
-;}
+define i64 @test_fptosi_i64(ptr %p) {
+; CHECK-LABEL: test_fptosi_i64:
+; CHECK: .save {r11, lr}
+; CHECK-NEXT: push {r11, lr}
+; CHECK-NEXT: ldrh r0, [r0]
+; CHECK-NEXT: vmov s0, r0
+; CHECK-NEXT: bl __fixhfdi
+; CHECK-NEXT: pop {r11, pc}
+ %a = load half, ptr %p, align 2
+ %r = fptosi half %a to i64
+ ret i64 %r
+}
define i32 @test_fptoui_i32(ptr %p) {
; CHECK-LABEL: test_fptoui_i32:
@@ -116,12 +122,18 @@ define i32 @test_fptoui_i32(ptr %p) {
ret i32 %r
}
-; FIXME
-;define i64 @test_fptoui_i64(ptr %p) {
-; %a = load half, ptr %p, align 2
-; %r = fptoui half %a to i64
-; ret i64 %r
-;}
+define i64 @test_fptoui_i64(ptr %p) {
+; CHECK-LABEL: test_fptoui_i64:
+; CHECK: .save {r11, lr}
+; CHECK-NEXT: push {r11, lr}
+; CHECK-NEXT: ldrh r0, [r0]
+; CHECK-NEXT: vmov s0, r0
+; CHECK-NEXT: bl __fixunshfdi
+; CHECK-NEXT: pop {r11, pc}
+ %a = load half, ptr %p, align 2
+ %r = fptoui half %a to i64
+ ret i64 %r
+}
define void @test_sitofp_i32(i32 %a, ptr %p) {
; CHECK-LABEL: test_sitofp_i32:
@@ -145,19 +157,31 @@ define void @test_uitofp_i32(i32 %a, ptr %p) {
ret void
}
-; FIXME
-;define void @test_sitofp_i64(i64 %a, ptr %p) {
-; %r = sitofp i64 %a to half
-; store half %r, ptr %p
-; ret void
-;}
+define void @test_sitofp_i64(i64 %a, ptr %p) {
+; CHECK-LABEL: test_sitofp_i64:
+; CHECK: .save {r4, lr}
+; CHECK-NEXT: push {r4, lr}
+; CHECK-NEXT: mov r4, r2
+; CHECK-NEXT: bl __floatdihf
+; CHECK-NEXT: vstr.16 s0, [r4]
+; CHECK-NEXT: pop {r4, pc}
+ %r = sitofp i64 %a to half
+ store half %r, ptr %p
+ ret void
+}
-; FIXME
-;define void @test_uitofp_i64(i64 %a, ptr %p) {
-; %r = uitofp i64 %a to half
-; store half %r, ptr %p
-; ret void
-;}
+define void @test_uitofp_i64(i64 %a, ptr %p) {
+; CHECK-LABEL: test_uitofp_i64:
+; CHECK: .save {r4, lr}
+; CHECK-NEXT: push {r4, lr}
+; CHECK-NEXT: mov r4, r2
+; CHECK-NEXT: bl __floatundihf
+; CHECK-NEXT: vstr.16 s0, [r4]
+; CHECK-NEXT: pop {r4, pc}
+ %r = uitofp i64 %a to half
+ store half %r, ptr %p
+ ret void
+}
define void @test_fptrunc_float(float %f, ptr %p) {
; CHECK-LABEL: test_fptrunc_float:
@@ -613,6 +637,902 @@ define void @test_fmuladd(ptr %p, ptr %q, ptr %r) {
ret void
}
+; Half-precision intrinsics
+
+define half @add_f16(half %x, half %y) #0 {
+; CHECK-LABEL: add_f16:
+; CHECK: vadd.f16 s0, s0, s1
+; CHECK-NEXT: bx lr
+ %val = call half @llvm.experimental.constrained.fadd.f16(half %x, half %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret half %val
+}
+
+define half @sub_f16(half %x, half %y) #0 {
+; CHECK-LABEL: sub_f16:
+; CHECK: vsub.f16 s0, s0, s1
+; CHECK-NEXT: bx lr
+ %val = call half @llvm.experimental.constrained.fsub.f16(half %x, half %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret half %val
+}
+
+define half @mul_f16(half %x, half %y) #0 {
+; CHECK-LABEL: mul_f16:
+; CHECK: vmul.f16 s0, s0, s1
+; CHECK-NEXT: bx lr
+ %val = call half @llvm.experimental.constrained.fmul.f16(half %x, half %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret half %val
+}
+
+define half @div_f16(half %x, half %y) #0 {
+; CHECK-LABEL: div_f16:
+; CHECK: vdiv.f16 s0, s0, s1
+; CHECK-NEXT: bx lr
+ %val = call half @llvm.experimental.constrained.fdiv.f16(half %x, half %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret half %val
+}
+
+define half @frem_f16(half %x, half %y) #0 {
+; CHECK-LABEL: frem_f16:
+; CHECK: .save {r11, lr}
+; CHECK-NEXT: push {r11, lr}
+; CHECK-NEXT: vcvtb.f32.f16 s0, s0
+; CHECK-NEXT: vcvtb.f32.f16 s1, s1
+; CHECK-NEXT: bl fmodf
+; CHECK-NEXT: vcvtb.f16.f32 s0, s0
+; CHECK-NEXT: pop {r11, pc}
+ %val = call half @llvm.experimental.constrained.frem.f16(half %x, half %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret half %val
+}
+
+define half @fma_f16(half %x, half %y, half %z) #0 {
+; CHECK-LABEL: fma_f16:
+; CHECK: vfma.f16 s2, s0, s1
+; CHECK-NEXT: vmov.f32 s0, s2
+; CHECK-NEXT: bx lr
+ %val = call half @llvm.experimental.constrained.fma.f16(half %x, half %y, half %z, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret half %val
+}
+
+define half @fmuladd_f16(half %x, half %y, half %z) #0 {
+; CHECK-LABEL: fmuladd_f16:
+; CHECK: vfma.f16 s2, s0, s1
+; CHECK-NEXT: vmov.f32 s0, s2
+; CHECK-NEXT: bx lr
+ %val = call half @llvm.experimental.constrained.fmuladd.f16(half %x, half %y, half %z, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret half %val
+}
+
+define i32 @fptosi_i32_f16(half %x) #0 {
+; CHECK-LABEL: fptosi_i32_f16:
+; CHECK: vcvt.s32.f16 s0, s0
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: bx lr
+ %val = call i32 @llvm.experimental.constrained.fptosi.i32.f16(half %x, metadata !"fpexcept.strict") #0
+ ret i32 %val
+}
+
+define i32 @fptoui_i32_f16(half %x) #0 {
+; CHECK-LABEL: fptoui_i32_f16:
+; CHECK: vcvt.s32.f16 s0, s0
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: bx lr
+ %val = call i32 @llvm.experimental.constrained.fptoui.i32.f16(half %x, metadata !"fpexcept.strict") #0
+ ret i32 %val
+}
+
+define i64 @fptosi_i64_f16(half %x) #0 {
+; CHECK-LABEL: fptosi_i64_f16:
+; CHECK: .save {r11, lr}
+; CHECK-NEXT: push {r11, lr}
+; CHECK-NEXT: vmov.f16 r0, s0
+; CHECK-NEXT: vmov s0, r0
+; CHECK-NEXT: bl __fixhfdi
+; CHECK-NEXT: pop {r11, pc}
+ %val = call i64 @llvm.experimental.constrained.fptosi.i64.f16(half %x, metadata !"fpexcept.strict") #0
+ ret i64 %val
+}
+
+define i64 @fptoui_i64_f16(half %x) #0 {
+; CHECK-LABEL: fptoui_i64_f16:
+; CHECK: .save {r11, lr}
+; CHECK-NEXT: push {r11, lr}
+; CHECK-NEXT: vmov.f16 r0, s0
+; CHECK-NEXT: vmov s0, r0
+; CHECK-NEXT: bl __fixunshfdi
+; CHECK-NEXT: pop {r11, pc}
+ %val = call i64 @llvm.experimental.constrained.fptoui.i64.f16(half %x, metadata !"fpexcept.strict") #0
+ ret i64 %val
+}
+
+define half @sitofp_f16_i32(i32 %x) #0 {
+; CHECK-LABEL: sitofp_f16_i32:
+; CHECK: .pad #8
+; CHECK-NEXT: sub sp, sp, #8
+; CHECK-NEXT: movw r1, #0
+; CHECK-NEXT: eor r0, r0, #-2147483648
+; CHECK-NEXT: movt r1, #17200
+; CHECK-NEXT: str r0, [sp]
+; CHECK-NEXT: str r1, [sp, #4]
+; CHECK-NEXT: vldr d16, .LCPI57_0
+; CHECK-NEXT: vldr d17, [sp]
+; CHECK-NEXT: vsub.f64 d16, d17, d16
+; CHECK-NEXT: vcvtb.f16.f64 s0, d16
+; CHECK-NEXT: add sp, sp, #8
+; CHECK-NEXT: bx lr
+; CHECK-NEXT: .p2align 3
+; CHECK-NEXT: .LCPI57_0:
+; CHECK-NEXT: .long 2147483648
+; CHECK-NEXT: .long 1127219200
+ %val = call half @llvm.experimental.constrained.sitofp.f16.i32(i32 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret half %val
+}
+
+define half @uitofp_f16_i32(i32 %x) #0 {
+; CHECK-LABEL: uitofp_f16_i32:
+; CHECK: .pad #8
+; CHECK-NEXT: sub sp, sp, #8
+; CHECK-NEXT: movw r1, #0
+; CHECK-NEXT: str r0, [sp]
+; CHECK-NEXT: movt r1, #17200
+; CHECK-NEXT: vldr d16, .LCPI58_0
+; CHECK-NEXT: str r1, [sp, #4]
+; CHECK-NEXT: vldr d17, [sp]
+; CHECK-NEXT: vsub.f64 d16, d17, d16
+; CHECK-NEXT: vcvtb.f16.f64 s0, d16
+; CHECK-NEXT: add sp, sp, #8
+; CHECK-NEXT: bx lr
+; CHECK-NEXT: .p2align 3
+; CHECK-NEXT: .LCPI58_0:
+; CHECK-NEXT: .long 0
+; CHECK-NEXT: .long 1127219200
+ %val = call half @llvm.experimental.constrained.uitofp.f16.i32(i32 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret half %val
+}
+
+define half @sitofp_f16_i64(i64 %x) #0 {
+; CHECK-LABEL: sitofp_f16_i64:
+; CHECK: .save {r11, lr}
+; CHECK-NEXT: push {r11, lr}
+; CHECK-NEXT: bl __floatdihf
+; CHECK-NEXT: pop {r11, pc}
+ %val = call half @llvm.experimental.constrained.sitofp.f16.i64(i64 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret half %val
+}
+
+define half @uitofp_f16_i64(i64 %x) #0 {
+; CHECK-LABEL: uitofp_f16_i64:
+; CHECK: .save {r11, lr}
+; CHECK-NEXT: push {r11, lr}
+; CHECK-NEXT: bl __floatundihf
+; CHECK-NEXT: pop {r11, pc}
+ %val = call half @llvm.experimental.constrained.uitofp.f16.i64(i64 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret half %val
+}
+
+define half @sitofp_f16_i128(i128 %x) #0 {
+; CHECK-LABEL: sitofp_f16_i128:
+; CHECK: .save {r11, lr}
+; CHECK-NEXT: push {r11, lr}
+; CHECK-NEXT: bl __floattihf
+; CHECK-NEXT: pop {r11, pc}
+ %val = call half @llvm.experimental.constrained.sitofp.f16.i128(i128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret half %val
+}
+
+define half @uitofp_f16_i128(i128 %x) #0 {
+; CHECK-LABEL: uitofp_f16_i128:
+; CHECK: .save {r11, lr}
+; CHECK-NEXT: push {r11, lr}
+; CHECK-NEXT: bl __floatuntihf
+; CHECK-NEXT: pop {r11, pc}
+ %val = call half @llvm.experimental.constrained.uitofp.f16.i128(i128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret half %val
+}
+
+define half @sqrt_f16(half %x) #0 {
+; CHECK-LABEL: sqrt_f16:
+; CHECK: vsqrt.f16 s0, s0
+; CHECK-NEXT: bx lr
+ %val = call half @llvm.experimental.constrained.sqrt.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret half %val
+}
+
+define half @powi_f16(half %x, i32 %y) #0 {
+; CHECK-LABEL: powi_f16:
+; CHECK: .save {r11, lr}
+; CHECK-NEXT: push {r11, lr}
+; CHECK-NEXT: vcvtb.f32.f16 s0, s0
+; CHECK-NEXT: bl __powisf2
+; CHECK-NEXT: vcvtb.f16.f32 s0, s0
+; CHECK-NEXT: pop {r11, pc}
+ %val = call half @llvm.experimental.constrained.powi.f16(half %x, i32 %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret half %val
+}
+
+define half @sin_f16(half %x) #0 {
+; CHECK-LABEL: sin_f16:
+; CHECK: .save {r11, lr}
+; CHECK-NEXT: push {r11, lr}
+; CHECK-NEXT: vcvtb.f32.f16 s0, s0
+; CHECK-NEXT: bl sinf
+; CHECK-NEXT: vcvtb.f16.f32 s0, s0
+; CHECK-NEXT: pop {r11, pc}
+ %val = call half @llvm.experimental.constrained.sin.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret half %val
+}
+
+define half @cos_f16(half %x) #0 {
+; CHECK-LABEL: cos_f16:
+; CHECK: .save {r11, lr}
+; CHECK-NEXT: push {r11, lr}
+; CHECK-NEXT: vcvtb.f32.f16 s0, s0
+; CHECK-NEXT: bl cosf
+; CHECK-NEXT: vcvtb.f16.f32 s0, s0
+; CHECK-NEXT: pop {r11, pc}
+ %val = call half @llvm.experimental.constrained.cos.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret half %val
+}
+
+define half @tan_f16(half %x) #0 {
+; CHECK-LABEL: tan_f16:
+; CHECK: .save {r11, lr}
+; CHECK-NEXT: push {r11, lr}
+; CHECK-NEXT: vcvtb.f32.f16 s0, s0
+; CHECK-NEXT: bl tanf
+; CHECK-NEXT: vcvtb.f16.f32 s0, s0
+; CHECK-NEXT: pop {r11, pc}
+ %val = call half @llvm.experimental.constrained.tan.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret half %val
+}
+
+define half @asin_f16(half %x) #0 {
+; CHECK-LABEL: asin_f16:
+; CHECK: .save {r11, lr}
+; CHECK-NEXT: push {r11, lr}
+; CHECK-NEXT: vcvtb.f32.f16 s0, s0
+; CHECK-NEXT: bl asinf
+; CHECK-NEXT: vcvtb.f16.f32 s0, s0
+; CHECK-NEXT: pop {r11, pc}
+ %val = call half @llvm.experimental.constrained.asin.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret half %val
+}
+
+define half @acos_f16(half %x) #0 {
+; CHECK-LABEL: acos_f16:
+; CHECK: .save {r11, lr}
+; CHECK-NEXT: push {r11, lr}
+; CHECK-NEXT: vcvtb.f32.f16 s0, s0
+; CHECK-NEXT: bl acosf
+; CHECK-NEXT: vcvtb.f16.f32 s0, s0
+; CHECK-NEXT: pop {r11, pc}
+ %val = call half @llvm.experimental.constrained.acos.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret half %val
+}
+
+define half @atan_f16(half %x) #0 {
+; CHECK-LABEL: atan_f16:
+; CHECK: .save {r11, lr}
+; CHECK-NEXT: push {r11, lr}
+; CHECK-NEXT: vcvtb.f32.f16 s0, s0
+; CHECK-NEXT: bl atanf
+; CHECK-NEXT: vcvtb.f16.f32 s0, s0
+; CHECK-NEXT: pop {r11, pc}
+ %val = call half @llvm.experimental.constrained.atan.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret half %val
+}
+
+define half @atan2_f16(half %x, half %y) #0 {
+; CHECK-LABEL: atan2_f16:
+; CHECK: .save {r11, lr}
+; CHECK-NEXT: push {r11, lr}
+; CHECK-NEXT: vcvtb.f32.f16 s0, s0
+; CHECK-NEXT: vcvtb.f32.f16 s1, s1
+; CHECK-NEXT: bl atan2f
+; CHECK-NEXT: vcvtb.f16.f32 s0, s0
+; CHECK-NEXT: pop {r11, pc}
+ %val = call half @llvm.experimental.constrained.atan2.f16(half %x, half %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret half %val
+}
+
+define half @sinh_f16(half %x) #0 {
+; CHECK-LABEL: sinh_f16:
+; CHECK: .save {r11, lr}
+; CHECK-NEXT: push {r11, lr}
+; CHECK-NEXT: vcvtb.f32.f16 s0, s0
+; CHECK-NEXT: bl sinhf
+; CHECK-NEXT: vcvtb.f16.f32 s0, s0
+; CHECK-NEXT: pop {r11, pc}
+ %val = call half @llvm.experimental.constrained.sinh.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret half %val
+}
+
+define half @cosh_f16(half %x) #0 {
+; CHECK-LABEL: cosh_f16:
+; CHECK: .save {r11, lr}
+; CHECK-NEXT: push {r11, lr}
+; CHECK-NEXT: vcvtb.f32.f16 s0, s0
+; CHECK-NEXT: bl coshf
+; CHECK-NEXT: vcvtb.f16.f32 s0, s0
+; CHECK-NEXT: pop {r11, pc}
+ %val = call half @llvm.experimental.constrained.cosh.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret half %val
+}
+
+define half @tanh_f16(half %x) #0 {
+; CHECK-LABEL: tanh_f16:
+; CHECK: .save {r11, lr}
+; CHECK-NEXT: push {r11, lr}
+; CHECK-NEXT: vcvtb.f32.f16 s0, s0
+; CHECK-NEXT: bl tanhf
+; CHECK-NEXT: vcvtb.f16.f32 s0, s0
+; CHECK-NEXT: pop {r11, pc}
+ %val = call half @llvm.experimental.constrained.tanh.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret half %val
+}
+
+define half @pow_f16(half %x, half %y) #0 {
+; CHECK-LABEL: pow_f16:
+; CHECK: .save {r11, lr}
+; CHECK-NEXT: push {r11, lr}
+; CHECK-NEXT: vcvtb.f32.f16 s0, s0
+; CHECK-NEXT: vcvtb.f32.f16 s1, s1
+; CHECK-NEXT: bl powf
+; CHECK-NEXT: vcvtb.f16.f32 s0, s0
+; CHECK-NEXT: pop {r11, pc}
+ %val = call half @llvm.experimental.constrained.pow.f16(half %x, half %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret half %val
+}
+
+define half @log_f16(half %x) #0 {
+; CHECK-LABEL: log_f16:
+; CHECK: .save {r11, lr}
+; CHECK-NEXT: push {r11, lr}
+; CHECK-NEXT: vcvtb.f32.f16 s0, s0
+; CHECK-NEXT: bl logf
+; CHECK-NEXT: vcvtb.f16.f32 s0, s0
+; CHECK-NEXT: pop {r11, pc}
+ %val = call half @llvm.experimental.constrained.log.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret half %val
+}
+
+define half @log10_f16(half %x) #0 {
+; CHECK-LABEL: log10_f16:
+; CHECK: .save {r11, lr}
+; CHECK-NEXT: push {r11, lr}
+; CHECK-NEXT: vcvtb.f32.f16 s0, s0
+; CHECK-NEXT: bl log10f
+; CHECK-NEXT: vcvtb.f16.f32 s0, s0
+; CHECK-NEXT: pop {r11, pc}
+ %val = call half @llvm.experimental.constrained.log10.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret half %val
+}
+
+define half @log2_f16(half %x) #0 {
+; CHECK-LABEL: log2_f16:
+; CHECK: .save {r11, lr}
+; CHECK-NEXT: push {r11, lr}
+; CHECK-NEXT: vcvtb.f32.f16 s0, s0
+; CHECK-NEXT: bl log2f
+; CHECK-NEXT: vcvtb.f16.f32 s0, s0
+; CHECK-NEXT: pop {r11, pc}
+ %val = call half @llvm.experimental.constrained.log2.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret half %val
+}
+
+define half @exp_f16(half %x) #0 {
+; CHECK-LABEL: exp_f16:
+; CHECK: .save {r11, lr}
+; CHECK-NEXT: push {r11, lr}
+; CHECK-NEXT: vcvtb.f32.f16 s0, s0
+; CHECK-NEXT: bl expf
+; CHECK-NEXT: vcvtb.f16.f32 s0, s0
+; CHECK-NEXT: pop {r11, pc}
+ %val = call half @llvm.experimental.constrained.exp.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret half %val
+}
+
+define half @exp2_f16(half %x) #0 {
+; CHECK-LABEL: exp2_f16:
+; CHECK: .save {r11, lr}
+; CHECK-NEXT: push {r11, lr}
+; CHECK-NEXT: vcvtb.f32.f16 s0, s0
+; CHECK-NEXT: bl exp2f
+; CHECK-NEXT: vcvtb.f16.f32 s0, s0
+; CHECK-NEXT: pop {r11, pc}
+ %val = call half @llvm.experimental.constrained.exp2.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret half %val
+}
+
+define half @rint_f16(half %x) #0 {
+; CHECK-LABEL: rint_f16:
+; CHECK: vrintx.f16 s0, s0
+; CHECK-NEXT: bx lr
+ %val = call half @llvm.experimental.constrained.rint.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret half %val
+}
+
+define half @nearbyint_f16(half %x) #0 {
+; CHECK-LABEL: nearbyint_f16:
+; CHECK: vrintr.f16 s0, s0
+; CHECK-NEXT: bx lr
+ %val = call half @llvm.experimental.constrained.nearbyint.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret half %val
+}
+
+define i32 @lrint_f16(half %x) #0 {
+; CHECK-LABEL: lrint_f16:
+; CHECK: .save {r11, lr}
+; CHECK-NEXT: push {r11, lr}
+; CHECK-NEXT: vcvtb.f32.f16 s0, s0
+; CHECK-NEXT: bl lrintf
+; CHECK-NEXT: pop {r11, pc}
+ %val = call i32 @llvm.experimental.constrained.lrint.i32.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret i32 %val
+}
+
+define i64 @llrint_f16(half %x) #0 {
+; CHECK-LABEL: llrint_f16:
+; CHECK: .save {r11, lr}
+; CHECK-NEXT: push {r11, lr}
+; CHECK-NEXT: vcvtb.f32.f16 s0, s0
+; CHECK-NEXT: bl llrintf
+; CHECK-NEXT: pop {r11, pc}
+ %val = call i64 @llvm.experimental.constrained.llrint.i64.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret i64 %val
+}
+
+define half @maxnum_f16(half %x, half %y) #0 {
+; CHECK-LABEL: maxnum_f16:
+; CHECK: vmaxnm.f16 s0, s0, s1
+; CHECK-NEXT: bx lr
+ %val = call half @llvm.experimental.constrained.maxnum.f16(half %x, half %y, metadata !"fpexcept.strict") #0
+ ret half %val
+}
+
+define half @minnum_f16(half %x, half %y) #0 {
+; CHECK-LABEL: minnum_f16:
+; CHECK: vminnm.f16 s0, s0, s1
+; CHECK-NEXT: bx lr
+ %val = call half @llvm.experimental.constrained.minnum.f16(half %x, half %y, metadata !"fpexcept.strict") #0
+ ret half %val
+}
+
+define half @ceil_f16(half %x) #0 {
+; CHECK-LABEL: ceil_f16:
+; CHECK: vrintp.f16 s0, s0
+; CHECK-NEXT: bx lr
+ %val = call half @llvm.experimental.constrained.ceil.f16(half %x, metadata !"fpexcept.strict") #0
+ ret half %val
+}
+
+define half @floor_f16(half %x) #0 {
+; CHECK-LABEL: floor_f16:
+; CHECK: vrintm.f16 s0, s0
+; CHECK-NEXT: bx lr
+ %val = call half @llvm.experimental.constrained.floor.f16(half %x, metadata !"fpexcept.strict") #0
+ ret half %val
+}
+
+define i32 @lround_f16(half %x) #0 {
+; CHECK-LABEL: lround_f16:
+; CHECK: .save {r11, lr}
+; CHECK-NEXT: push {r11, lr}
+; CHECK-NEXT: vcvtb.f32.f16 s0, s0
+; CHECK-NEXT: bl lroundf
+; CHECK-NEXT: pop {r11, pc}
+ %val = call i32 @llvm.experimental.constrained.lround.i32.f16(half %x, metadata !"fpexcept.strict") #0
+ ret i32 %val
+}
+
+define i64 @llround_f16(half %x) #0 {
+; CHECK-LABEL: llround_f16:
+; CHECK: .save {r11, lr}
+; CHECK-NEXT: push {r11, lr}
+; CHECK-NEXT: vcvtb.f32.f16 s0, s0
+; CHECK-NEXT: bl llroundf
+; CHECK-NEXT: pop {r11, pc}
+ %val = call i64 @llvm.experimental.constrained.llround.i64.f16(half %x, metadata !"fpexcept.strict") #0
+ ret i64 %val
+}
+
+define half @round_f16(half %x) #0 {
+; CHECK-LABEL: round_f16:
+; CHECK: vrinta.f16 s0, s0
+; CHECK-NEXT: bx lr
+ %val = call half @llvm.experimental.constrained.round.f16(half %x, metadata !"fpexcept.strict") #0
+ ret half %val
+}
+
+define half @roundeven_f16(half %x) #0 {
+; CHECK-LABEL: roundeven_f16:
+; CHECK: vrintn.f16 s0, s0
+; CHECK-NEXT: bx lr
+ %val = call half @llvm.experimental.constrained.roundeven.f16(half %x, metadata !"fpexcept.strict") #0
+ ret half %val
+}
+
+define half @trunc_f16(half %x) #0 {
+; CHECK-LABEL: trunc_f16:
+; CHECK: vrintz.f16 s0, s0
+; CHECK-NEXT: bx lr
+ %val = call half @llvm.experimental.constrained.trunc.f16(half %x, metadata !"fpexcept.strict") #0
+ ret half %val
+}
+
+define half @ldexp_f16(half %x, i32 %y) #0 {
+; CHECK-LABEL: ldexp_f16:
+; CHECK: .save {r11, lr}
+; CHECK-NEXT: push {r11, lr}
+; CHECK-NEXT: vcvtb.f32.f16 s0, s0
+; CHECK-NEXT: bl ldexpf
+; CHECK-NEXT: vcvtb.f16.f32 s0, s0
+; CHECK-NEXT: pop {r11, pc}
+ %val = call half @llvm.experimental.constrained.ldexp.f16.i32(half %x, i32 %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret half %val
+}
+
+define i32 @fcmp_olt_f16(half %a, half %b) #0 {
+; CHECK-LABEL: fcmp_olt_f16:
+; CHECK: vcmp.f16 s0, s1
+; CHECK-NEXT: mov r0, #0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: movwmi r0, #1
+; CHECK-NEXT: bx lr
+ %cmp = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"olt", metadata !"fpexcept.strict") #0
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+}
+
+define i32 @fcmp_ole_f16(half %a, half %b) #0 {
+; CHECK-LABEL: fcmp_ole_f16:
+; CHECK: vcmp.f16 s0, s1
+; CHECK-NEXT: mov r0, #0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: movwls r0, #1
+; CHECK-NEXT: bx lr
+ %cmp = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"ole", metadata !"fpexcept.strict") #0
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+}
+
+define i32 @fcmp_ogt_f16(half %a, half %b) #0 {
+; CHECK-LABEL: fcmp_ogt_f16:
+; CHECK: vcmp.f16 s0, s1
+; CHECK-NEXT: mov r0, #0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: movwgt r0, #1
+; CHECK-NEXT: bx lr
+ %cmp = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"ogt", metadata !"fpexcept.strict") #0
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+}
+
+define i32 @fcmp_oge_f16(half %a, half %b) #0 {
+; CHECK-LABEL: fcmp_oge_f16:
+; CHECK: vcmp.f16 s0, s1
+; CHECK-NEXT: mov r0, #0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: movwge r0, #1
+; CHECK-NEXT: bx lr
+ %cmp = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"oge", metadata !"fpexcept.strict") #0
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+}
+
+define i32 @fcmp_oeq_f16(half %a, half %b) #0 {
+; CHECK-LABEL: fcmp_oeq_f16:
+; CHECK: vcmp.f16 s0, s1
+; CHECK-NEXT: mov r0, #0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: movweq r0, #1
+; CHECK-NEXT: bx lr
+ %cmp = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"oeq", metadata !"fpexcept.strict") #0
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+}
+
+define i32 @fcmp_one_f16(half %a, half %b) #0 {
+; CHECK-LABEL: fcmp_one_f16:
+; CHECK: vcmp.f16 s0, s1
+; CHECK-NEXT: mov r0, #0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: movwmi r0, #1
+; CHECK-NEXT: movwgt r0, #1
+; CHECK-NEXT: bx lr
+ %cmp = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"one", metadata !"fpexcept.strict") #0
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+}
+
+define i32 @fcmp_ult_f16(half %a, half %b) #0 {
+; CHECK-LABEL: fcmp_ult_f16:
+; CHECK: vcmp.f16 s0, s1
+; CHECK-NEXT: mov r0, #0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: movwlt r0, #1
+; CHECK-NEXT: bx lr
+ %cmp = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"ult", metadata !"fpexcept.strict") #0
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+}
+
+define i32 @fcmp_ule_f16(half %a, half %b) #0 {
+; CHECK-LABEL: fcmp_ule_f16:
+; CHECK: vcmp.f16 s0, s1
+; CHECK-NEXT: mov r0, #0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: movwle r0, #1
+; CHECK-NEXT: bx lr
+ %cmp = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"ule", metadata !"fpexcept.strict") #0
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+}
+
+define i32 @fcmp_ugt_f16(half %a, half %b) #0 {
+; CHECK-LABEL: fcmp_ugt_f16:
+; CHECK: vcmp.f16 s0, s1
+; CHECK-NEXT: mov r0, #0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: movwhi r0, #1
+; CHECK-NEXT: bx lr
+ %cmp = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"ugt", metadata !"fpexcept.strict") #0
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+}
+
+define i32 @fcmp_uge_f16(half %a, half %b) #0 {
+; CHECK-LABEL: fcmp_uge_f16:
+; CHECK: vcmp.f16 s0, s1
+; CHECK-NEXT: mov r0, #0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: movwpl r0, #1
+; CHECK-NEXT: bx lr
+ %cmp = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"uge", metadata !"fpexcept.strict") #0
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+}
+
+define i32 @fcmp_ueq_f16(half %a, half %b) #0 {
+; CHECK-LABEL: fcmp_ueq_f16:
+; CHECK: vcmp.f16 s0, s1
+; CHECK-NEXT: mov r0, #0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: movweq r0, #1
+; CHECK-NEXT: movwvs r0, #1
+; CHECK-NEXT: bx lr
+ %cmp = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"ueq", metadata !"fpexcept.strict") #0
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+}
+
+define i32 @fcmp_une_f16(half %a, half %b) #0 {
+; CHECK-LABEL: fcmp_une_f16:
+; CHECK: vcmp.f16 s0, s1
+; CHECK-NEXT: mov r0, #0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: movwne r0, #1
+; CHECK-NEXT: bx lr
+ %cmp = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"une", metadata !"fpexcept.strict") #0
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+}
+
+define i32 @fcmps_olt_f16(half %a, half %b) #0 {
+; CHECK-LABEL: fcmps_olt_f16:
+; CHECK: vcmpe.f16 s0, s1
+; CHECK-NEXT: mov r0, #0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: movwmi r0, #1
+; CHECK-NEXT: bx lr
+ %cmp = call i1 @llvm.experimental.constrained.fcmps.f16(half %a, half %b, metadata !"olt", metadata !"fpexcept.strict") #0
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+}
+
+define i32 @fcmps_ole_f16(half %a, half %b) #0 {
+; CHECK-LABEL: fcmps_ole_f16:
+; CHECK: vcmpe.f16 s0, s1
+; CHECK-NEXT: mov r0, #0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: movwls r0, #1
+; CHECK-NEXT: bx lr
+ %cmp = call i1 @llvm.experimental.constrained.fcmps.f16(half %a, half %b, metadata !"ole", metadata !"fpexcept.strict") #0
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+}
+
+define i32 @fcmps_ogt_f16(half %a, half %b) #0 {
+; CHECK-LABEL: fcmps_ogt_f16:
+; CHECK: vcmpe.f16 s0, s1
+; CHECK-NEXT: mov r0, #0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: movwgt r0, #1
+; CHECK-NEXT: bx lr
+ %cmp = call i1 @llvm.experimental.constrained.fcmps.f16(half %a, half %b, metadata !"ogt", metadata !"fpexcept.strict") #0
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+}
+
+define i32 @fcmps_oge_f16(half %a, half %b) #0 {
+; CHECK-LABEL: fcmps_oge_f16:
+; CHECK: vcmpe.f16 s0, s1
+; CHECK-NEXT: mov r0, #0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: movwge r0, #1
+; CHECK-NEXT: bx lr
+ %cmp = call i1 @llvm.experimental.constrained.fcmps.f16(half %a, half %b, metadata !"oge", metadata !"fpexcept.strict") #0
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+}
+
+define i32 @fcmps_oeq_f16(half %a, half %b) #0 {
+; CHECK-LABEL: fcmps_oeq_f16:
+; CHECK: vcmpe.f16 s0, s1
+; CHECK-NEXT: mov r0, #0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: movweq r0, #1
+; CHECK-NEXT: bx lr
+ %cmp = call i1 @llvm.experimental.constrained.fcmps.f16(half %a, half %b, metadata !"oeq", metadata !"fpexcept.strict") #0
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+}
+
+define i32 @fcmps_one_f16(half %a, half %b) #0 {
+; CHECK-LABEL: fcmps_one_f16:
+; CHECK: vcmpe.f16 s0, s1
+; CHECK-NEXT: mov r0, #0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: movwmi r0, #1
+; CHECK-NEXT: movwgt r0, #1
+; CHECK-NEXT: bx lr
+ %cmp = call i1 @llvm.experimental.constrained.fcmps.f16(half %a, half %b, metadata !"one", metadata !"fpexcept.strict") #0
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+}
+
+define i32 @fcmps_ult_f16(half %a, half %b) #0 {
+; CHECK-LABEL: fcmps_ult_f16:
+; CHECK: vcmpe.f16 s0, s1
+; CHECK-NEXT: mov r0, #0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: movwlt r0, #1
+; CHECK-NEXT: bx lr
+ %cmp = call i1 @llvm.experimental.constrained.fcmps.f16(half %a, half %b, metadata !"ult", metadata !"fpexcept.strict") #0
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+}
+
+define i32 @fcmps_ule_f16(half %a, half %b) #0 {
+; CHECK-LABEL: fcmps_ule_f16:
+; CHECK: vcmpe.f16 s0, s1
+; CHECK-NEXT: mov r0, #0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: movwle r0, #1
+; CHECK-NEXT: bx lr
+ %cmp = call i1 @llvm.experimental.constrained.fcmps.f16(half %a, half %b, metadata !"ule", metadata !"fpexcept.strict") #0
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+}
+
+define i32 @fcmps_ugt_f16(half %a, half %b) #0 {
+; CHECK-LABEL: fcmps_ugt_f16:
+; CHECK: vcmpe.f16 s0, s1
+; CHECK-NEXT: mov r0, #0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: movwhi r0, #1
+; CHECK-NEXT: bx lr
+ %cmp = call i1 @llvm.experimental.constrained.fcmps.f16(half %a, half %b, metadata !"ugt", metadata !"fpexcept.strict") #0
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+}
+
+define i32 @fcmps_uge_f16(half %a, half %b) #0 {
+; CHECK-LABEL: fcmps_uge_f16:
+; CHECK: vcmpe.f16 s0, s1
+; CHECK-NEXT: mov r0, #0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: movwpl r0, #1
+; CHECK-NEXT: bx lr
+ %cmp = call i1 @llvm.experimental.constrained.fcmps.f16(half %a, half %b, metadata !"uge", metadata !"fpexcept.strict") #0
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+}
+
+define i32 @fcmps_ueq_f16(half %a, half %b) #0 {
+; CHECK-LABEL: fcmps_ueq_f16:
+; CHECK: vcmpe.f16 s0, s1
+; CHECK-NEXT: mov r0, #0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: movweq r0, #1
+; CHECK-NEXT: movwvs r0, #1
+; CHECK-NEXT: bx lr
+ %cmp = call i1 @llvm.experimental.constrained.fcmps.f16(half %a, half %b, metadata !"ueq", metadata !"fpexcept.strict") #0
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+}
+
+define i32 @fcmps_une_f16(half %a, half %b) #0 {
+; CHECK-LABEL: fcmps_une_f16:
+; CHECK: vcmpe.f16 s0, s1
+; CHECK-NEXT: mov r0, #0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: movwne r0, #1
+; CHECK-NEXT: bx lr
+ %cmp = call i1 @llvm.experimental.constrained.fcmps.f16(half %a, half %b, metadata !"une", metadata !"fpexcept.strict") #0
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+}
+
+
+; Intrinsics to convert between floating-point types
+
+define half @fptrunc_f16_f32(float %x) #0 {
+; CHECK-LABEL: fptrunc_f16_f32:
+; CHECK: vcvtb.f16.f32 s0, s0
+; CHECK-NEXT: bx lr
+ %val = call half @llvm.experimental.constrained.fptrunc.f16.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret half %val
+}
+
+define float @fpext_f32_f16(half %x) #0 {
+; CHECK-LABEL: fpext_f32_f16:
+; CHECK: vcvtb.f32.f16 s0, s0
+; CHECK-NEXT: bx lr
+ %val = call float @llvm.experimental.constrained.fpext.f32.f16(half %x, metadata !"fpexcept.strict") #0
+ ret float %val
+}
+
+
+attributes #0 = { strictfp }
+
+declare half @llvm.experimental.constrained.fadd.f16(half, half, metadata, metadata)
+declare half @llvm.experimental.constrained.fsub.f16(half, half, metadata, metadata)
+declare half @llvm.experimental.constrained.fmul.f16(half, half, metadata, metadata)
+declare half @llvm.experimental.constrained.fdiv.f16(half, half, metadata, metadata)
+declare half @llvm.experimental.constrained.frem.f16(half, half, metadata, metadata)
+declare half @llvm.experimental.constrained.fma.f16(half, half, half, metadata, metadata)
+declare half @llvm.experimental.constrained.fmuladd.f16(half, half, half, metadata, metadata)
+declare i32 @llvm.experimental.constrained.fptosi.i32.f16(half, metadata)
+declare i32 @llvm.experimental.constrained.fptoui.i32.f16(half, metadata)
+declare i64 @llvm.experimental.constrained.fptosi.i64.f16(half, metadata)
+declare i64 @llvm.experimental.constrained.fptoui.i64.f16(half, metadata)
+declare half @llvm.experimental.constrained.sitofp.f16.i32(i32, metadata, metadata)
+declare half @llvm.experimental.constrained.uitofp.f16.i32(i32, metadata, metadata)
+declare half @llvm.experimental.constrained.sitofp.f16.i64(i64, metadata, metadata)
+declare half @llvm.experimental.constrained.uitofp.f16.i64(i64, metadata, metadata)
+declare half @llvm.experimental.constrained.sitofp.f16.i128(i128, metadata, metadata)
+declare half @llvm.experimental.constrained.uitofp.f16.i128(i128, metadata, metadata)
+declare half @llvm.experimental.constrained.sqrt.f16(half, metadata, metadata)
+declare half @llvm.experimental.constrained.powi.f16(half, i32, metadata, metadata)
+declare half @llvm.experimental.constrained.sin.f16(half, metadata, metadata)
+declare half @llvm.experimental.constrained.cos.f16(half, metadata, metadata)
+declare half @llvm.experimental.constrained.tan.f16(half, metadata, metadata)
+declare half @llvm.experimental.constrained.pow.f16(half, half, metadata, metadata)
+declare half @llvm.experimental.constrained.log.f16(half, metadata, metadata)
+declare half @llvm.experimental.constrained.log10.f16(half, metadata, metadata)
+declare half @llvm.experimental.constrained.log2.f16(half, metadata, metadata)
+declare half @llvm.experimental.constrained.exp.f16(half, metadata, metadata)
+declare half @llvm.experimental.constrained.exp2.f16(half, metadata, metadata)
+declare half @llvm.experimental.constrained.rint.f16(half, metadata, metadata)
+declare half @llvm.experimental.constrained.nearbyint.f16(half, metadata, metadata)
+declare i32 @llvm.experimental.constrained.lrint.i32.f16(half, metadata, metadata)
+declare i64 @llvm.experimental.constrained.llrint.i64.f16(half, metadata, metadata)
+declare half @llvm.experimental.constrained.maxnum.f16(half, half, metadata)
+declare half @llvm.experimental.constrained.minnum.f16(half, half, metadata)
+declare half @llvm.experimental.constrained.ceil.f16(half, metadata)
+declare half @llvm.experimental.constrained.floor.f16(half, metadata)
+declare i32 @llvm.experimental.constrained.lround.i32.f16(half, metadata)
+declare i64 @llvm.experimental.constrained.llround.i64.f16(half, metadata)
+declare half @llvm.experimental.constrained.round.f16(half, metadata)
+declare half @llvm.experimental.constrained.roundeven.f16(half, metadata)
+declare half @llvm.experimental.constrained.trunc.f16(half, metadata)
+declare i1 @llvm.experimental.constrained.fcmps.f16(half, half, metadata, metadata)
+declare i1 @llvm.experimental.constrained.fcmp.f16(half, half, metadata, metadata)
+
+declare half @llvm.experimental.constrained.fptrunc.f16.f32(float, metadata, metadata)
+declare float @llvm.experimental.constrained.fpext.f32.f16(half, metadata)
+
+
declare half @llvm.sqrt.f16(half %a)
declare half @llvm.powi.f16.i32(half %a, i32 %b)
declare half @llvm.sin.f16(half %a)
diff --git a/llvm/test/CodeGen/ARM/strict-fp-func.ll b/llvm/test/CodeGen/ARM/strict-fp-func.ll
new file mode 100644
index 0000000..39bb2b4
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/strict-fp-func.ll
@@ -0,0 +1,13 @@
+; RUN: llc -mtriple arm-none-eabi -stop-after=finalize-isel %s -o - | FileCheck %s
+
+define float @func_02(float %x, float %y) strictfp nounwind {
+ %call = call float @func_01(float %x) strictfp
+ %res = call float @llvm.experimental.constrained.fadd.f32(float %call, float %y, metadata !"round.dynamic", metadata !"fpexcept.ignore") strictfp
+ ret float %res
+}
+; CHECK-LABEL: name: func_02
+; CHECK: BL @func_01, {{.*}}, implicit-def $fpscr_rm
+
+
+declare float @func_01(float)
+declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata)
diff --git a/llvm/test/CodeGen/ARM/strict-fp-int-promote.ll b/llvm/test/CodeGen/ARM/strict-fp-int-promote.ll
new file mode 100644
index 0000000..6e5b589
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/strict-fp-int-promote.ll
@@ -0,0 +1,159 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -mtriple armv7-- -mattr=+vfp4 -O0 -o - %s | FileCheck %s
+; RUN: llc -mtriple armv7-- -mattr=+vfp4 -O3 -o - %s | FileCheck %s --check-prefix=CHECK-O3
+
+declare float @llvm.experimental.constrained.sitofp.f32.i32(i32, metadata, metadata)
+declare float @llvm.experimental.constrained.sitofp.f32.i16(i16, metadata, metadata)
+declare i1 @llvm.experimental.constrained.fcmp.f32(float, float, metadata, metadata)
+declare float @llvm.experimental.constrained.uitofp.f32.i16(i16, metadata, metadata)
+
+define i32 @test(i32 %a, i16 %b) #0 {
+; CHECK-LABEL: test:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: sub sp, sp, #16
+; CHECK-NEXT: mov r2, r0
+; CHECK-NEXT: sxth r0, r1
+; CHECK-NEXT: movw r1, #0
+; CHECK-NEXT: movt r1, #17200
+; CHECK-NEXT: str r1, [sp, #4]
+; CHECK-NEXT: eor r2, r2, #-2147483648
+; CHECK-NEXT: str r2, [sp]
+; CHECK-NEXT: vldr d16, [sp]
+; CHECK-NEXT: vldr d17, .LCPI0_0
+; CHECK-NEXT: vsub.f64 d16, d16, d17
+; CHECK-NEXT: vcvt.f32.f64 s0, d16
+; CHECK-NEXT: str r1, [sp, #12]
+; CHECK-NEXT: eor r0, r0, #-2147483648
+; CHECK-NEXT: str r0, [sp, #8]
+; CHECK-NEXT: vldr d16, [sp, #8]
+; CHECK-NEXT: vsub.f64 d16, d16, d17
+; CHECK-NEXT: vcvt.f32.f64 s2, d16
+; CHECK-NEXT: vcmp.f32 s0, s2
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: mov r0, #0
+; CHECK-NEXT: movweq r0, #1
+; CHECK-NEXT: add sp, sp, #16
+; CHECK-NEXT: bx lr
+; CHECK-NEXT: .p2align 3
+; CHECK-NEXT: @ %bb.1:
+; CHECK-NEXT: .LCPI0_0:
+; CHECK-NEXT: .long 2147483648 @ double 4503601774854144
+; CHECK-NEXT: .long 1127219200
+;
+; CHECK-O3-LABEL: test:
+; CHECK-O3: @ %bb.0: @ %entry
+; CHECK-O3-NEXT: sub sp, sp, #16
+; CHECK-O3-NEXT: sxth r1, r1
+; CHECK-O3-NEXT: movw r2, #0
+; CHECK-O3-NEXT: movt r2, #17200
+; CHECK-O3-NEXT: str r2, [sp, #4]
+; CHECK-O3-NEXT: eor r0, r0, #-2147483648
+; CHECK-O3-NEXT: str r0, [sp]
+; CHECK-O3-NEXT: vldr d16, [sp]
+; CHECK-O3-NEXT: vldr d17, .LCPI0_0
+; CHECK-O3-NEXT: vsub.f64 d16, d16, d17
+; CHECK-O3-NEXT: vcvt.f32.f64 s0, d16
+; CHECK-O3-NEXT: str r2, [sp, #12]
+; CHECK-O3-NEXT: eor r0, r1, #-2147483648
+; CHECK-O3-NEXT: str r0, [sp, #8]
+; CHECK-O3-NEXT: vldr d16, [sp, #8]
+; CHECK-O3-NEXT: vsub.f64 d16, d16, d17
+; CHECK-O3-NEXT: vcvt.f32.f64 s2, d16
+; CHECK-O3-NEXT: vcmp.f32 s0, s2
+; CHECK-O3-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-O3-NEXT: mov r0, #0
+; CHECK-O3-NEXT: movweq r0, #1
+; CHECK-O3-NEXT: add sp, sp, #16
+; CHECK-O3-NEXT: bx lr
+; CHECK-O3-NEXT: .p2align 3
+; CHECK-O3-NEXT: @ %bb.1:
+; CHECK-O3-NEXT: .LCPI0_0:
+; CHECK-O3-NEXT: .long 2147483648 @ double 4503601774854144
+; CHECK-O3-NEXT: .long 1127219200
+entry:
+ %conv = call float @llvm.experimental.constrained.sitofp.f32.i32(i32 %a, metadata !"round.tonearest", metadata !"fpexcept.strict") #1
+ %conv1 = call float @llvm.experimental.constrained.sitofp.f32.i16(i16 %b, metadata !"round.tonearest", metadata !"fpexcept.strict") #1
+ %cmp = call i1 @llvm.experimental.constrained.fcmp.f32(float %conv, float %conv1, metadata !"oeq", metadata !"fpexcept.strict") #1
+ %conv2 = zext i1 %cmp to i32
+ ret i32 %conv2
+}
+
+define i32 @test2(i32 %a, i16 %b) #0 {
+; CHECK-LABEL: test2:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: sub sp, sp, #16
+; CHECK-NEXT: mov r2, r0
+; CHECK-NEXT: uxth r0, r1
+; CHECK-NEXT: movw r1, #0
+; CHECK-NEXT: movt r1, #17200
+; CHECK-NEXT: str r1, [sp, #4]
+; CHECK-NEXT: eor r2, r2, #-2147483648
+; CHECK-NEXT: str r2, [sp]
+; CHECK-NEXT: vldr d16, [sp]
+; CHECK-NEXT: vldr d17, .LCPI1_0
+; CHECK-NEXT: vsub.f64 d16, d16, d17
+; CHECK-NEXT: vcvt.f32.f64 s0, d16
+; CHECK-NEXT: str r1, [sp, #12]
+; CHECK-NEXT: str r0, [sp, #8]
+; CHECK-NEXT: vldr d16, [sp, #8]
+; CHECK-NEXT: vldr d17, .LCPI1_1
+; CHECK-NEXT: vsub.f64 d16, d16, d17
+; CHECK-NEXT: vcvt.f32.f64 s2, d16
+; CHECK-NEXT: vcmp.f32 s0, s2
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: mov r0, #0
+; CHECK-NEXT: movweq r0, #1
+; CHECK-NEXT: add sp, sp, #16
+; CHECK-NEXT: bx lr
+; CHECK-NEXT: .p2align 3
+; CHECK-NEXT: @ %bb.1:
+; CHECK-NEXT: .LCPI1_0:
+; CHECK-NEXT: .long 2147483648 @ double 4503601774854144
+; CHECK-NEXT: .long 1127219200
+; CHECK-NEXT: .LCPI1_1:
+; CHECK-NEXT: .long 0 @ double 4503599627370496
+; CHECK-NEXT: .long 1127219200
+;
+; CHECK-O3-LABEL: test2:
+; CHECK-O3: @ %bb.0: @ %entry
+; CHECK-O3-NEXT: sub sp, sp, #16
+; CHECK-O3-NEXT: uxth r1, r1
+; CHECK-O3-NEXT: movw r2, #0
+; CHECK-O3-NEXT: movt r2, #17200
+; CHECK-O3-NEXT: str r2, [sp, #4]
+; CHECK-O3-NEXT: eor r0, r0, #-2147483648
+; CHECK-O3-NEXT: str r0, [sp]
+; CHECK-O3-NEXT: vldr d16, [sp]
+; CHECK-O3-NEXT: vldr d17, .LCPI1_0
+; CHECK-O3-NEXT: vsub.f64 d16, d16, d17
+; CHECK-O3-NEXT: vcvt.f32.f64 s0, d16
+; CHECK-O3-NEXT: str r2, [sp, #12]
+; CHECK-O3-NEXT: str r1, [sp, #8]
+; CHECK-O3-NEXT: vldr d16, [sp, #8]
+; CHECK-O3-NEXT: vldr d17, .LCPI1_1
+; CHECK-O3-NEXT: vsub.f64 d16, d16, d17
+; CHECK-O3-NEXT: vcvt.f32.f64 s2, d16
+; CHECK-O3-NEXT: vcmp.f32 s0, s2
+; CHECK-O3-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-O3-NEXT: mov r0, #0
+; CHECK-O3-NEXT: movweq r0, #1
+; CHECK-O3-NEXT: add sp, sp, #16
+; CHECK-O3-NEXT: bx lr
+; CHECK-O3-NEXT: .p2align 3
+; CHECK-O3-NEXT: @ %bb.1:
+; CHECK-O3-NEXT: .LCPI1_0:
+; CHECK-O3-NEXT: .long 2147483648 @ double 4503601774854144
+; CHECK-O3-NEXT: .long 1127219200
+; CHECK-O3-NEXT: .LCPI1_1:
+; CHECK-O3-NEXT: .long 0 @ double 4503599627370496
+; CHECK-O3-NEXT: .long 1127219200
+entry:
+ %conv = call float @llvm.experimental.constrained.sitofp.f32.i32(i32 %a, metadata !"round.tonearest", metadata !"fpexcept.strict") #1
+ %conv1 = call float @llvm.experimental.constrained.uitofp.f32.i16(i16 %b, metadata !"round.tonearest", metadata !"fpexcept.strict") #1
+ %cmp = call i1 @llvm.experimental.constrained.fcmp.f32(float %conv, float %conv1, metadata !"oeq", metadata !"fpexcept.strict") #1
+ %conv2 = zext i1 %cmp to i32
+ ret i32 %conv2
+}
+
+attributes #0 = { strictfp noinline optnone }
+attributes #1 = { strictfp }
diff --git a/llvm/test/CodeGen/ARM/strict-fp-ops.ll b/llvm/test/CodeGen/ARM/strict-fp-ops.ll
new file mode 100644
index 0000000..608ab07
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/strict-fp-ops.ll
@@ -0,0 +1,202 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple armv7-- -mattr=+vfp4 %s -o - | FileCheck %s
+
+
+; Div whose result is unused should be removed unless we have strict exceptions
+
+define void @unused_div(float %x, float %y) {
+; CHECK-LABEL: unused_div:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: bx lr
+entry:
+ %add = fdiv float %x, %y
+ ret void
+}
+
+define void @unused_div_fpexcept_strict(float %x, float %y) #0 {
+; CHECK-LABEL: unused_div_fpexcept_strict:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov s0, r1
+; CHECK-NEXT: vmov s2, r0
+; CHECK-NEXT: vdiv.f32 s0, s2, s0
+; CHECK-NEXT: bx lr
+entry:
+ %add = call float @llvm.experimental.constrained.fdiv.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret void
+}
+
+define void @unused_div_round_dynamic(float %x, float %y) #0 {
+; CHECK-LABEL: unused_div_round_dynamic:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: bx lr
+entry:
+ %add = call float @llvm.experimental.constrained.fdiv.f32(float %x, float %y, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0
+ ret void
+}
+
+
+; Machine CSE should eliminate the second add unless we have strict exceptions
+
+define float @add_twice(float %x, float %y, i32 %n) {
+; CHECK-LABEL: add_twice:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov s0, r1
+; CHECK-NEXT: cmp r2, #0
+; CHECK-NEXT: vmov s2, r0
+; CHECK-NEXT: vadd.f32 s0, s2, s0
+; CHECK-NEXT: vmul.f32 s2, s0, s0
+; CHECK-NEXT: vmoveq.f32 s2, s0
+; CHECK-NEXT: vmov r0, s2
+; CHECK-NEXT: bx lr
+entry:
+ %add = fadd float %x, %y
+ %tobool.not = icmp eq i32 %n, 0
+ br i1 %tobool.not, label %if.end, label %if.then
+
+if.then:
+ %add1 = fadd float %x, %y
+ %mul = fmul float %add, %add1
+ br label %if.end
+
+if.end:
+ %a.0 = phi float [ %mul, %if.then ], [ %add, %entry ]
+ ret float %a.0
+}
+
+define float @add_twice_fpexcept_strict(float %x, float %y, i32 %n) #0 {
+; CHECK-LABEL: add_twice_fpexcept_strict:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov s2, r1
+; CHECK-NEXT: cmp r2, #0
+; CHECK-NEXT: vmov s4, r0
+; CHECK-NEXT: vadd.f32 s0, s4, s2
+; CHECK-NEXT: vaddne.f32 s2, s4, s2
+; CHECK-NEXT: vmulne.f32 s0, s0, s2
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: bx lr
+entry:
+ %add = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ %tobool.not = icmp eq i32 %n, 0
+ br i1 %tobool.not, label %if.end, label %if.then
+
+if.then:
+ %add1 = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ %mul = call float @llvm.experimental.constrained.fmul.f32(float %add, float %add1, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ br label %if.end
+
+if.end:
+ %a.0 = phi float [ %mul, %if.then ], [ %add, %entry ]
+ ret float %a.0
+}
+
+define float @add_twice_round_dynamic(float %x, float %y, i32 %n) #0 {
+; CHECK-LABEL: add_twice_round_dynamic:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov s0, r1
+; CHECK-NEXT: cmp r2, #0
+; CHECK-NEXT: vmov s2, r0
+; CHECK-NEXT: vadd.f32 s0, s2, s0
+; CHECK-NEXT: vmulne.f32 s0, s0, s0
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: bx lr
+entry:
+ %add = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0
+ %tobool.not = icmp eq i32 %n, 0
+ br i1 %tobool.not, label %if.end, label %if.then
+
+if.then:
+ %add1 = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0
+ %mul = call float @llvm.experimental.constrained.fmul.f32(float %add, float %add1, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0
+ br label %if.end
+
+if.end:
+ %a.0 = phi float [ %mul, %if.then ], [ %add, %entry ]
+ ret float %a.0
+}
+
+; Two adds separated by llvm.set.rounding should be preserved when rounding is
+; dynamic (as they may give different results) or when we have strict exceptions
+; (the llvm.set.rounding is irrelevant, but both could trap).
+
+define float @set_rounding(float %x, float %y) {
+; CHECK-LABEL: set_rounding:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmrs r2, fpscr
+; CHECK-NEXT: vmov s2, r0
+; CHECK-NEXT: vmov s0, r1
+; CHECK-NEXT: vadd.f32 s0, s2, s0
+; CHECK-NEXT: vsub.f32 s0, s0, s0
+; CHECK-NEXT: orr r0, r2, #12582912
+; CHECK-NEXT: vmsr fpscr, r0
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: vmrs r1, fpscr
+; CHECK-NEXT: bic r1, r1, #12582912
+; CHECK-NEXT: vmsr fpscr, r1
+; CHECK-NEXT: bx lr
+entry:
+ %add1 = fadd float %x, %y
+ call void @llvm.set.rounding(i32 0)
+ %add2 = fadd float %x, %y
+ call void @llvm.set.rounding(i32 1)
+ %sub = fsub float %add1, %add2
+ ret float %sub
+}
+
+define float @set_rounding_fpexcept_strict(float %x, float %y) #0 {
+; CHECK-LABEL: set_rounding_fpexcept_strict:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov s0, r1
+; CHECK-NEXT: vmov s2, r0
+; CHECK-NEXT: vadd.f32 s4, s2, s0
+; CHECK-NEXT: vmrs r0, fpscr
+; CHECK-NEXT: orr r0, r0, #12582912
+; CHECK-NEXT: vmsr fpscr, r0
+; CHECK-NEXT: vadd.f32 s0, s2, s0
+; CHECK-NEXT: vmrs r0, fpscr
+; CHECK-NEXT: bic r0, r0, #12582912
+; CHECK-NEXT: vmsr fpscr, r0
+; CHECK-NEXT: vsub.f32 s0, s4, s0
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: bx lr
+entry:
+ %add1 = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ call void @llvm.set.rounding(i32 0) #0
+ %add2 = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ call void @llvm.set.rounding(i32 1) #0
+ %sub = call float @llvm.experimental.constrained.fsub.f32(float %add1, float %add2, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret float %sub
+}
+
+define float @set_rounding_round_dynamic(float %x, float %y) #0 {
+; CHECK-LABEL: set_rounding_round_dynamic:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov s2, r0
+; CHECK-NEXT: vmrs r0, fpscr
+; CHECK-NEXT: vmov s0, r1
+; CHECK-NEXT: vadd.f32 s4, s2, s0
+; CHECK-NEXT: orr r0, r0, #12582912
+; CHECK-NEXT: vmsr fpscr, r0
+; CHECK-NEXT: vmrs r0, fpscr
+; CHECK-NEXT: vadd.f32 s0, s2, s0
+; CHECK-NEXT: bic r0, r0, #12582912
+; CHECK-NEXT: vmsr fpscr, r0
+; CHECK-NEXT: vsub.f32 s0, s4, s0
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: bx lr
+entry:
+ %add1 = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0
+ call void @llvm.set.rounding(i32 0) #0
+ %add2 = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0
+ call void @llvm.set.rounding(i32 1) #0
+ %sub = call float @llvm.experimental.constrained.fsub.f32(float %add1, float %add2, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0
+ ret float %sub
+}
+
+declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata)
+declare float @llvm.experimental.constrained.fsub.f32(float, float, metadata, metadata)
+declare float @llvm.experimental.constrained.fmul.f32(float, float, metadata, metadata)
+declare float @llvm.experimental.constrained.fdiv.f32(float, float, metadata, metadata)
+declare i32 @llvm.get.rounding()
+declare void @llvm.set.rounding(i32)
+
+attributes #0 = { strictfp }
diff --git a/llvm/test/CodeGen/ARM/strictfp_f16_abi_promote.ll b/llvm/test/CodeGen/ARM/strictfp_f16_abi_promote.ll
new file mode 100644
index 0000000..5906c79
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/strictfp_f16_abi_promote.ll
@@ -0,0 +1,270 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -mtriple=armv7-- < %s | FileCheck -check-prefix=NOFP16 %s
+
+declare void @f16_user(half)
+declare half @f16_result()
+
+declare void @v2f16_user(<2 x half>)
+declare <2 x half> @v2f16_result()
+
+declare void @v4f16_user(<4 x half>)
+declare <4 x half> @v4f16_result()
+
+declare void @v8f16_user(<8 x half>)
+declare <8 x half> @v8f16_result()
+
+define void @f16_arg(half %arg, ptr %ptr) #0 {
+; NOFP16-LABEL: f16_arg:
+; NOFP16: @ %bb.0:
+; NOFP16-NEXT: push {r4, lr}
+; NOFP16-NEXT: uxth r0, r0
+; NOFP16-NEXT: mov r4, r1
+; NOFP16-NEXT: bl __gnu_h2f_ieee
+; NOFP16-NEXT: str r0, [r4]
+; NOFP16-NEXT: pop {r4, pc}
+ %fpext = call float @llvm.experimental.constrained.fpext.f32.f16(half %arg, metadata !"fpexcept.strict")
+ store float %fpext, ptr %ptr
+ ret void
+}
+
+define void @v2f16_arg(<2 x half> %arg, ptr %ptr) #0 {
+; NOFP16-LABEL: v2f16_arg:
+; NOFP16: @ %bb.0:
+; NOFP16-NEXT: push {r4, r5, r11, lr}
+; NOFP16-NEXT: vpush {d8}
+; NOFP16-NEXT: mov r5, r0
+; NOFP16-NEXT: uxth r0, r1
+; NOFP16-NEXT: mov r4, r2
+; NOFP16-NEXT: bl __gnu_h2f_ieee
+; NOFP16-NEXT: uxth r1, r5
+; NOFP16-NEXT: vmov s17, r0
+; NOFP16-NEXT: mov r0, r1
+; NOFP16-NEXT: bl __gnu_h2f_ieee
+; NOFP16-NEXT: vmov s16, r0
+; NOFP16-NEXT: vstr d8, [r4]
+; NOFP16-NEXT: vpop {d8}
+; NOFP16-NEXT: pop {r4, r5, r11, pc}
+ %fpext = call <2 x float> @llvm.experimental.constrained.fpext.v2f32.v2f16(<2 x half> %arg, metadata !"fpexcept.strict")
+ store <2 x float> %fpext, ptr %ptr
+ ret void
+}
+
+define void @v3f16_arg(<3 x half> %arg, ptr %ptr) #0 {
+; NOFP16-LABEL: v3f16_arg:
+; NOFP16: @ %bb.0:
+; NOFP16-NEXT: push {r4, r5, r6, lr}
+; NOFP16-NEXT: vpush {d8}
+; NOFP16-NEXT: mov r6, r0
+; NOFP16-NEXT: uxth r0, r1
+; NOFP16-NEXT: mov r4, r3
+; NOFP16-NEXT: mov r5, r2
+; NOFP16-NEXT: bl __gnu_h2f_ieee
+; NOFP16-NEXT: uxth r1, r6
+; NOFP16-NEXT: vmov s17, r0
+; NOFP16-NEXT: mov r0, r1
+; NOFP16-NEXT: bl __gnu_h2f_ieee
+; NOFP16-NEXT: vmov s16, r0
+; NOFP16-NEXT: uxth r0, r5
+; NOFP16-NEXT: vst1.32 {d8}, [r4:64]!
+; NOFP16-NEXT: bl __gnu_h2f_ieee
+; NOFP16-NEXT: str r0, [r4]
+; NOFP16-NEXT: vpop {d8}
+; NOFP16-NEXT: pop {r4, r5, r6, pc}
+ %fpext = call <3 x float> @llvm.experimental.constrained.fpext.v3f32.v3f16(<3 x half> %arg, metadata !"fpexcept.strict")
+ store <3 x float> %fpext, ptr %ptr
+ ret void
+}
+
+define void @v4f16_arg(<4 x half> %arg, ptr %ptr) #0 {
+; NOFP16-LABEL: v4f16_arg:
+; NOFP16: @ %bb.0:
+; NOFP16-NEXT: push {r4, r5, r6, r7, r11, lr}
+; NOFP16-NEXT: vpush {d8, d9}
+; NOFP16-NEXT: mov r6, r0
+; NOFP16-NEXT: uxth r0, r1
+; NOFP16-NEXT: mov r4, r3
+; NOFP16-NEXT: mov r5, r2
+; NOFP16-NEXT: bl __gnu_h2f_ieee
+; NOFP16-NEXT: mov r7, r0
+; NOFP16-NEXT: uxth r0, r4
+; NOFP16-NEXT: bl __gnu_h2f_ieee
+; NOFP16-NEXT: vmov s19, r0
+; NOFP16-NEXT: uxth r0, r5
+; NOFP16-NEXT: ldr r4, [sp, #40]
+; NOFP16-NEXT: bl __gnu_h2f_ieee
+; NOFP16-NEXT: vmov s18, r0
+; NOFP16-NEXT: uxth r0, r6
+; NOFP16-NEXT: vmov s17, r7
+; NOFP16-NEXT: bl __gnu_h2f_ieee
+; NOFP16-NEXT: vmov s16, r0
+; NOFP16-NEXT: vst1.64 {d8, d9}, [r4]
+; NOFP16-NEXT: vpop {d8, d9}
+; NOFP16-NEXT: pop {r4, r5, r6, r7, r11, pc}
+ %fpext = call <4 x float> @llvm.experimental.constrained.fpext.v4f32.v4f16(<4 x half> %arg, metadata !"fpexcept.strict")
+ store <4 x float> %fpext, ptr %ptr
+ ret void
+}
+
+ define half @f16_return(float %arg) #0 {
+; NOFP16-LABEL: f16_return:
+; NOFP16: @ %bb.0:
+; NOFP16-NEXT: push {r11, lr}
+; NOFP16-NEXT: bl __gnu_f2h_ieee
+; NOFP16-NEXT: pop {r11, pc}
+ %fptrunc = call half @llvm.experimental.constrained.fptrunc.f16.f32(float %arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
+ ret half %fptrunc
+ }
+
+ define <2 x half> @v2f16_return(<2 x float> %arg) #0 {
+; NOFP16-LABEL: v2f16_return:
+; NOFP16: @ %bb.0:
+; NOFP16-NEXT: push {r11, lr}
+; NOFP16-NEXT: vpush {d8}
+; NOFP16-NEXT: sub sp, sp, #8
+; NOFP16-NEXT: vmov d8, r0, r1
+; NOFP16-NEXT: vmov r0, s17
+; NOFP16-NEXT: bl __gnu_f2h_ieee
+; NOFP16-NEXT: vmov r1, s16
+; NOFP16-NEXT: strh r0, [sp, #6]
+; NOFP16-NEXT: mov r0, r1
+; NOFP16-NEXT: bl __gnu_f2h_ieee
+; NOFP16-NEXT: strh r0, [sp, #4]
+; NOFP16-NEXT: add r0, sp, #4
+; NOFP16-NEXT: vld1.32 {d16[0]}, [r0:32]
+; NOFP16-NEXT: vmovl.u16 q8, d16
+; NOFP16-NEXT: vmov.32 r0, d16[0]
+; NOFP16-NEXT: vmov.32 r1, d16[1]
+; NOFP16-NEXT: add sp, sp, #8
+; NOFP16-NEXT: vpop {d8}
+; NOFP16-NEXT: pop {r11, pc}
+ %fptrunc = call <2 x half> @llvm.experimental.constrained.fptrunc.v2f16.v2f32(<2 x float> %arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
+ ret <2 x half> %fptrunc
+ }
+
+ define <3 x half> @v3f16_return(<3 x float> %arg) #0 {
+; NOFP16-LABEL: v3f16_return:
+; NOFP16: @ %bb.0:
+; NOFP16-NEXT: push {r4, r5, r6, lr}
+; NOFP16-NEXT: vmov d1, r2, r3
+; NOFP16-NEXT: mov r5, r0
+; NOFP16-NEXT: vmov d0, r0, r1
+; NOFP16-NEXT: mov r4, r1
+; NOFP16-NEXT: vmov r0, s2
+; NOFP16-NEXT: bl __gnu_f2h_ieee
+; NOFP16-NEXT: uxth r6, r0
+; NOFP16-NEXT: mov r0, r4
+; NOFP16-NEXT: bl __gnu_f2h_ieee
+; NOFP16-NEXT: mov r4, r0
+; NOFP16-NEXT: mov r0, r5
+; NOFP16-NEXT: bl __gnu_f2h_ieee
+; NOFP16-NEXT: pkhbt r0, r0, r4, lsl #16
+; NOFP16-NEXT: vmov d16, r0, r6
+; NOFP16-NEXT: vmov.u16 r0, d16[0]
+; NOFP16-NEXT: vmov.u16 r1, d16[1]
+; NOFP16-NEXT: vmov.u16 r2, d16[2]
+; NOFP16-NEXT: pop {r4, r5, r6, pc}
+ %fptrunc = call <3 x half> @llvm.experimental.constrained.fptrunc.v3f16.v3f32(<3 x float> %arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
+ ret <3 x half> %fptrunc
+ }
+
+ define <4 x half> @v4f16_return(<4 x float> %arg) #0 {
+; NOFP16-LABEL: v4f16_return:
+; NOFP16: @ %bb.0:
+; NOFP16-NEXT: push {r4, r5, r11, lr}
+; NOFP16-NEXT: vpush {d8, d9}
+; NOFP16-NEXT: vmov d8, r2, r3
+; NOFP16-NEXT: vmov d9, r0, r1
+; NOFP16-NEXT: vmov r2, s17
+; NOFP16-NEXT: mov r0, r2
+; NOFP16-NEXT: bl __gnu_f2h_ieee
+; NOFP16-NEXT: mov r4, r0
+; NOFP16-NEXT: vmov r0, s16
+; NOFP16-NEXT: bl __gnu_f2h_ieee
+; NOFP16-NEXT: vmov r1, s19
+; NOFP16-NEXT: pkhbt r5, r0, r4, lsl #16
+; NOFP16-NEXT: mov r0, r1
+; NOFP16-NEXT: bl __gnu_f2h_ieee
+; NOFP16-NEXT: mov r4, r0
+; NOFP16-NEXT: vmov r0, s18
+; NOFP16-NEXT: bl __gnu_f2h_ieee
+; NOFP16-NEXT: pkhbt r0, r0, r4, lsl #16
+; NOFP16-NEXT: vmov d16, r0, r5
+; NOFP16-NEXT: vmov.u16 r0, d16[0]
+; NOFP16-NEXT: vmov.u16 r1, d16[1]
+; NOFP16-NEXT: vmov.u16 r2, d16[2]
+; NOFP16-NEXT: vmov.u16 r3, d16[3]
+; NOFP16-NEXT: vpop {d8, d9}
+; NOFP16-NEXT: pop {r4, r5, r11, pc}
+ %fptrunc = call <4 x half> @llvm.experimental.constrained.fptrunc.v4f16.v4f32(<4 x float> %arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
+ ret <4 x half> %fptrunc
+ }
+
+define void @outgoing_v4f16_return(ptr %ptr) #0 {
+; NOFP16-LABEL: outgoing_v4f16_return:
+; NOFP16: @ %bb.0:
+; NOFP16-NEXT: push {r4, lr}
+; NOFP16-NEXT: mov r4, r0
+; NOFP16-NEXT: bl v4f16_result
+; NOFP16-NEXT: strh r3, [r4, #6]
+; NOFP16-NEXT: strh r2, [r4, #4]
+; NOFP16-NEXT: strh r1, [r4, #2]
+; NOFP16-NEXT: strh r0, [r4]
+; NOFP16-NEXT: pop {r4, pc}
+ %val = call <4 x half> @v4f16_result() #0
+ store <4 x half> %val, ptr %ptr
+ ret void
+}
+
+define void @outgoing_v8f16_return(ptr %ptr) #0 {
+; NOFP16-LABEL: outgoing_v8f16_return:
+; NOFP16: @ %bb.0:
+; NOFP16-NEXT: push {r4, r10, r11, lr}
+; NOFP16-NEXT: add r11, sp, #8
+; NOFP16-NEXT: sub sp, sp, #16
+; NOFP16-NEXT: bfc sp, #0, #4
+; NOFP16-NEXT: mov r4, r0
+; NOFP16-NEXT: mov r0, sp
+; NOFP16-NEXT: bl v8f16_result
+; NOFP16-NEXT: ldm sp, {r0, r1, r2, r3}
+; NOFP16-NEXT: stm r4, {r0, r1, r2, r3}
+; NOFP16-NEXT: sub sp, r11, #8
+; NOFP16-NEXT: pop {r4, r10, r11, pc}
+ %val = call <8 x half> @v8f16_result() #0
+ store <8 x half> %val, ptr %ptr
+ ret void
+}
+
+define half @call_split_type_used_outside_block_v8f16() #0 {
+; NOFP16-LABEL: call_split_type_used_outside_block_v8f16:
+; NOFP16: @ %bb.0: @ %bb0
+; NOFP16-NEXT: push {r4, r10, r11, lr}
+; NOFP16-NEXT: add r11, sp, #8
+; NOFP16-NEXT: sub sp, sp, #16
+; NOFP16-NEXT: bfc sp, #0, #4
+; NOFP16-NEXT: mov r4, sp
+; NOFP16-NEXT: mov r0, r4
+; NOFP16-NEXT: bl v8f16_result
+; NOFP16-NEXT: vld1.32 {d16[0]}, [r4:32]
+; NOFP16-NEXT: vmov.u16 r0, d16[0]
+; NOFP16-NEXT: sub sp, r11, #8
+; NOFP16-NEXT: pop {r4, r10, r11, pc}
+bb0:
+ %split.ret.type = call <8 x half> @v8f16_result() #0
+ br label %bb1
+
+bb1:
+ %extract = extractelement <8 x half> %split.ret.type, i32 0
+ ret half %extract
+}
+
+declare float @llvm.experimental.constrained.fpext.f32.f16(half, metadata) #0
+declare <2 x float> @llvm.experimental.constrained.fpext.v2f32.v2f16(<2 x half>, metadata) #0
+declare <3 x float> @llvm.experimental.constrained.fpext.v3f32.v3f16(<3 x half>, metadata) #0
+declare <4 x float> @llvm.experimental.constrained.fpext.v4f32.v4f16(<4 x half>, metadata) #0
+
+declare half @llvm.experimental.constrained.fptrunc.f16.f32(float, metadata, metadata) #0
+declare <2 x half> @llvm.experimental.constrained.fptrunc.v2f16.v2f32(<2 x float>, metadata, metadata) #0
+declare <3 x half> @llvm.experimental.constrained.fptrunc.v3f16.v3f32(<3 x float>, metadata, metadata) #0
+declare <4 x half> @llvm.experimental.constrained.fptrunc.v4f16.v4f32(<4 x float>, metadata, metadata) #0
+
+attributes #0 = { strictfp }
diff --git a/llvm/test/CodeGen/Hexagon/isel-fclass.ll b/llvm/test/CodeGen/Hexagon/isel-fclass.ll
new file mode 100644
index 0000000..96b0210
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/isel-fclass.ll
@@ -0,0 +1,86 @@
+; Tests lowering of sfclass/dfclass compares.
+; Sub-optimal code
+; {
+; p0 = sfclass(r0,#16)
+; r0 = sfadd(r0,r0)
+; }
+; {
+; r2 = p0
+; }
+; {
+; if (p0.new) r0 = ##1065353216
+; p0 = cmp.eq(r2,#0)
+; jumpr r31
+; }
+; With the patterns added, we should be generating
+; {
+; p0 = sfclass(r0,#16)
+; r0 = sfadd(r0,r0)
+; }
+; {
+; if (!p0) r0 = ##1065353216
+; jumpr r31
+; }
+
+; RUN: llc -march=hexagon -stop-after=hexagon-isel %s -o - | FileCheck %s
+
+; CHECK: bb.0.entry1
+; CHECK: F2_sfclass
+; CHECK-NOT: C2_cmp
+; CHECK: C2_not
+; CHECK: F2_sfadd
+; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
+define float @test1(float noundef %x) {
+entry1:
+ %0 = tail call i32 @llvm.hexagon.F2.sfclass(float %x, i32 16)
+ %tobool.not = icmp eq i32 %0, 0
+ %add = fadd float %x, %x
+ %spec.select = select i1 %tobool.not, float 1.000000e+00, float %add
+ ret float %spec.select
+}
+
+; CHECK: bb.0.entry2
+; CHECK: F2_sfclass
+; CHECK-NOT: C2_cmp
+; CHECK: F2_sfadd
+define float @test2(float noundef %x) {
+entry2:
+ %0 = tail call i32 @llvm.hexagon.F2.sfclass(float %x, i32 16)
+ %tobool.not = icmp eq i32 %0, 0
+ %add = fadd float %x, %x
+ %spec.select = select i1 %tobool.not, float %add, float 1.000000e+00
+ ret float %spec.select
+}
+
+; CHECK: bb.0.entry3
+; CHECK: F2_dfclass
+; CHECK-NOT: C2_cmp
+; CHECK: C2_not
+; CHECK: F2_dfadd
+define double @test3(double noundef %x) {
+entry3:
+ %0 = tail call i32 @llvm.hexagon.F2.dfclass(double %x, i32 16)
+ %tobool.not = icmp eq i32 %0, 0
+ %add = fadd double %x, %x
+ %spec.select = select i1 %tobool.not, double 1.000000e+00, double %add
+ ret double %spec.select
+}
+
+; CHECK: bb.0.entry4
+; CHECK: F2_dfclass
+; CHECK-NOT: C2_cmp
+; CHECK: F2_dfadd
+define double @test4(double noundef %x) {
+entry4:
+ %0 = tail call i32 @llvm.hexagon.F2.dfclass(double %x, i32 16)
+ %tobool.not = icmp eq i32 %0, 0
+ %add = fadd double %x, %x
+ %spec.select = select i1 %tobool.not, double %add, double 1.000000e+00
+ ret double %spec.select
+}
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
+declare i32 @llvm.hexagon.F2.dfclass(double, i32 immarg)
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
+declare i32 @llvm.hexagon.F2.sfclass(float, i32 immarg)
diff --git a/llvm/test/CodeGen/Hexagon/isel/trunc-vNi1-HVX.ll b/llvm/test/CodeGen/Hexagon/isel/trunc-vNi1-HVX.ll
new file mode 100644
index 0000000..1491729
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/isel/trunc-vNi1-HVX.ll
@@ -0,0 +1,18 @@
+; RUN: llc --mtriple=hexagon -mattr=+hvxv79,+hvx-length128b < %s | FileCheck %s
+
+define void @f5(<64 x i32> %a0, ptr %a1) {
+; CHECK-LABEL: f5:
+; CHECK: [[REG0:(r[0-9]+)]] = ##16843009
+; CHECK-DAG: q[[Q0:[0-9]+]] = vand(v{{[0-9]+}},[[REG0]])
+; CHECK-DAG: q[[Q1:[0-9]+]] = vand(v{{[0-9]+}},[[REG0]])
+; CHECK: v{{[0-9]+}}.b = vpacke(v{{[0-9]+}}.h,v{{[0-9]+}}.h)
+; CHECK: v{{[0-9]+}}.b = vpacke(v{{[0-9]+}}.h,v{{[0-9]+}}.h)
+; CHECK: v[[VROR:[0-9]+]] = vror(v{{[0-9]+}},r{{[0-9]+}})
+; CHECK: v[[VOR:[0-9]+]] = vor(v[[VROR]],v{{[0-9]+}})
+; CHECK: q{{[0-9]+}} = vand(v[[VOR]],r{{[0-9]+}})
+b0:
+ %v0 = trunc <64 x i32> %a0 to <64 x i1>
+ store <64 x i1> %v0, ptr %a1, align 1
+ ret void
+}
+
diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/flog2.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/flog2.ll
index 93fcd42..e02a2e7 100644
--- a/llvm/test/CodeGen/LoongArch/ir-instruction/flog2.ll
+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/flog2.ll
@@ -12,8 +12,8 @@ define float @flog2_s(float %x) nounwind {
;
; LA64-LABEL: flog2_s:
; LA64: # %bb.0:
-; LA64-NEXT: pcaddu18i $t8, %call36(log2f)
-; LA64-NEXT: jr $t8
+; LA64-NEXT: flogb.s $fa0, $fa0
+; LA64-NEXT: ret
%y = call float @llvm.log2.f32(float %x)
ret float %y
}
@@ -25,8 +25,8 @@ define double @flog2_d(double %x) nounwind {
;
; LA64-LABEL: flog2_d:
; LA64: # %bb.0:
-; LA64-NEXT: pcaddu18i $t8, %call36(log2)
-; LA64-NEXT: jr $t8
+; LA64-NEXT: flogb.d $fa0, $fa0
+; LA64-NEXT: ret
%y = call double @llvm.log2.f64(double %x)
ret double %y
}
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ctpop-ctlz.ll b/llvm/test/CodeGen/LoongArch/lasx/ctpop-ctlz.ll
index ba2118f..b3155c9 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ctpop-ctlz.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ctpop-ctlz.ll
@@ -106,6 +106,69 @@ define void @ctlz_v4i64(ptr %src, ptr %dst) nounwind {
ret void
}
+define void @not_ctlz_v32i8(ptr %src, ptr %dst) nounwind {
+; CHECK-LABEL: not_ctlz_v32i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvld $xr0, $a0, 0
+; CHECK-NEXT: xvxori.b $xr0, $xr0, 255
+; CHECK-NEXT: xvclz.b $xr0, $xr0
+; CHECK-NEXT: xvst $xr0, $a1, 0
+; CHECK-NEXT: ret
+ %v = load <32 x i8>, ptr %src
+ %neg = xor <32 x i8> %v, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+ %res = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %neg, i1 false)
+ store <32 x i8> %res, ptr %dst
+ ret void
+}
+
+define void @not_ctlz_v16i16(ptr %src, ptr %dst) nounwind {
+; CHECK-LABEL: not_ctlz_v16i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvld $xr0, $a0, 0
+; CHECK-NEXT: xvrepli.b $xr1, -1
+; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvclz.h $xr0, $xr0
+; CHECK-NEXT: xvst $xr0, $a1, 0
+; CHECK-NEXT: ret
+ %v = load <16 x i16>, ptr %src
+ %neg = xor <16 x i16> %v, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+ %res = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %neg, i1 false)
+ store <16 x i16> %res, ptr %dst
+ ret void
+}
+
+define void @not_ctlz_v8i32(ptr %src, ptr %dst) nounwind {
+; CHECK-LABEL: not_ctlz_v8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvld $xr0, $a0, 0
+; CHECK-NEXT: xvrepli.b $xr1, -1
+; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvclz.w $xr0, $xr0
+; CHECK-NEXT: xvst $xr0, $a1, 0
+; CHECK-NEXT: ret
+ %v = load <8 x i32>, ptr %src
+ %neg = xor <8 x i32> %v, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+ %res = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %neg, i1 false)
+ store <8 x i32> %res, ptr %dst
+ ret void
+}
+
+define void @not_ctlz_v4i64(ptr %src, ptr %dst) nounwind {
+; CHECK-LABEL: not_ctlz_v4i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvld $xr0, $a0, 0
+; CHECK-NEXT: xvrepli.b $xr1, -1
+; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvclz.d $xr0, $xr0
+; CHECK-NEXT: xvst $xr0, $a1, 0
+; CHECK-NEXT: ret
+ %v = load <4 x i64>, ptr %src
+ %neg = xor <4 x i64> %v, <i64 -1, i64 -1, i64 -1, i64 -1>
+ %res = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %neg, i1 false)
+ store <4 x i64> %res, ptr %dst
+ ret void
+}
+
declare <32 x i8> @llvm.ctpop.v32i8(<32 x i8>)
declare <16 x i16> @llvm.ctpop.v16i16(<16 x i16>)
declare <8 x i32> @llvm.ctpop.v8i32(<8 x i32>)
diff --git a/llvm/test/CodeGen/LoongArch/lasx/fp-rounding.ll b/llvm/test/CodeGen/LoongArch/lasx/fp-rounding.ll
new file mode 100644
index 0000000..fa5f27e
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/lasx/fp-rounding.ll
@@ -0,0 +1,132 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s
+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s
+
+;; ceilf
+define void @ceil_v8f32(ptr %res, ptr %a0) nounwind {
+; CHECK-LABEL: ceil_v8f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvfrintrp.s $xr0, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %v0 = load <8 x float>, ptr %a0
+ %r = call <8 x float> @llvm.ceil.v8f32(<8 x float> %v0)
+ store <8 x float> %r, ptr %res
+ ret void
+}
+
+;; ceil
+define void @ceil_v4f64(ptr %res, ptr %a0) nounwind {
+; CHECK-LABEL: ceil_v4f64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvfrintrp.d $xr0, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %v0 = load <4 x double>, ptr %a0
+ %r = call <4 x double> @llvm.ceil.v4f64(<4 x double> %v0)
+ store <4 x double> %r, ptr %res
+ ret void
+}
+
+;; floorf
+define void @floor_v8f32(ptr %res, ptr %a0) nounwind {
+; CHECK-LABEL: floor_v8f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvfrintrm.s $xr0, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %v0 = load <8 x float>, ptr %a0
+ %r = call <8 x float> @llvm.floor.v8f32(<8 x float> %v0)
+ store <8 x float> %r, ptr %res
+ ret void
+}
+
+;; floor
+define void @floor_v4f64(ptr %res, ptr %a0) nounwind {
+; CHECK-LABEL: floor_v4f64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvfrintrm.d $xr0, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %v0 = load <4 x double>, ptr %a0
+ %r = call <4 x double> @llvm.floor.v4f64(<4 x double> %v0)
+ store <4 x double> %r, ptr %res
+ ret void
+}
+
+;; truncf
+define void @trunc_v8f32(ptr %res, ptr %a0) nounwind {
+; CHECK-LABEL: trunc_v8f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvfrintrz.s $xr0, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %v0 = load <8 x float>, ptr %a0
+ %r = call <8 x float> @llvm.trunc.v8f32(<8 x float> %v0)
+ store <8 x float> %r, ptr %res
+ ret void
+}
+
+;; trunc
+define void @trunc_v4f64(ptr %res, ptr %a0) nounwind {
+; CHECK-LABEL: trunc_v4f64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvfrintrz.d $xr0, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %v0 = load <4 x double>, ptr %a0
+ %r = call <4 x double> @llvm.trunc.v4f64(<4 x double> %v0)
+ store <4 x double> %r, ptr %res
+ ret void
+}
+
+;; roundevenf
+define void @roundeven_v8f32(ptr %res, ptr %a0) nounwind {
+; CHECK-LABEL: roundeven_v8f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvfrintrne.s $xr0, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %v0 = load <8 x float>, ptr %a0
+ %r = call <8 x float> @llvm.roundeven.v8f32(<8 x float> %v0)
+ store <8 x float> %r, ptr %res
+ ret void
+}
+
+;; roundeven
+define void @roundeven_v4f64(ptr %res, ptr %a0) nounwind {
+; CHECK-LABEL: roundeven_v4f64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvfrintrne.d $xr0, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %v0 = load <4 x double>, ptr %a0
+ %r = call <4 x double> @llvm.roundeven.v4f64(<4 x double> %v0)
+ store <4 x double> %r, ptr %res
+ ret void
+}
+
+declare <8 x float> @llvm.ceil.v8f32(<8 x float>)
+declare <4 x double> @llvm.ceil.v4f64(<4 x double>)
+declare <8 x float> @llvm.floor.v8f32(<8 x float>)
+declare <4 x double> @llvm.floor.v4f64(<4 x double>)
+declare <8 x float> @llvm.trunc.v8f32(<8 x float>)
+declare <4 x double> @llvm.trunc.v4f64(<4 x double>)
+declare <8 x float> @llvm.roundeven.v8f32(<8 x float>)
+declare <4 x double> @llvm.roundeven.v4f64(<4 x double>)
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/avg.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/avg.ll
new file mode 100644
index 0000000..5c5c199
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/avg.ll
@@ -0,0 +1,321 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA32
+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA64
+
+define void @xvavg_b(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: xvavg_b:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvavg.b $xr0, $xr0, $xr1
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <32 x i8>, ptr %a
+ %vb = load <32 x i8>, ptr %b
+ %add = add <32 x i8> %va, %vb
+ %shr = ashr <32 x i8> %add, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+ store <32 x i8> %shr, ptr %res
+ ret void
+}
+
+define void @xvavg_h(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: xvavg_h:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvavg.h $xr0, $xr0, $xr1
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <16 x i16>, ptr %a
+ %vb = load <16 x i16>, ptr %b
+ %add = add <16 x i16> %va, %vb
+ %shr = ashr <16 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ store <16 x i16> %shr, ptr %res
+ ret void
+}
+
+define void @xvavg_w(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: xvavg_w:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvavg.w $xr0, $xr0, $xr1
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <8 x i32>, ptr %a
+ %vb = load <8 x i32>, ptr %b
+ %add = add <8 x i32> %va, %vb
+ %shr = ashr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ store <8 x i32> %shr, ptr %res
+ ret void
+}
+
+define void @xvavg_d(ptr %res, ptr %a, ptr %b) nounwind {
+; LA32-LABEL: xvavg_d:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: xvld $xr0, $a1, 0
+; LA32-NEXT: xvld $xr1, $a2, 0
+; LA32-NEXT: xvadd.d $xr0, $xr0, $xr1
+; LA32-NEXT: xvsrai.d $xr0, $xr0, 1
+; LA32-NEXT: xvst $xr0, $a0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: xvavg_d:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: xvld $xr0, $a1, 0
+; LA64-NEXT: xvld $xr1, $a2, 0
+; LA64-NEXT: xvavg.d $xr0, $xr0, $xr1
+; LA64-NEXT: xvst $xr0, $a0, 0
+; LA64-NEXT: ret
+entry:
+ %va = load <4 x i64>, ptr %a
+ %vb = load <4 x i64>, ptr %b
+ %add = add <4 x i64> %va, %vb
+ %shr = ashr <4 x i64> %add, <i64 1, i64 1, i64 1, i64 1>
+ store <4 x i64> %shr, ptr %res
+ ret void
+}
+
+define void @xvavg_bu(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: xvavg_bu:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvavg.bu $xr0, $xr0, $xr1
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <32 x i8>, ptr %a
+ %vb = load <32 x i8>, ptr %b
+ %add = add <32 x i8> %va, %vb
+ %shr = lshr <32 x i8> %add, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+ store <32 x i8> %shr, ptr %res
+ ret void
+}
+
+define void @xvavg_hu(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: xvavg_hu:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvavg.hu $xr0, $xr0, $xr1
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <16 x i16>, ptr %a
+ %vb = load <16 x i16>, ptr %b
+ %add = add <16 x i16> %va, %vb
+ %shr = lshr <16 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ store <16 x i16> %shr, ptr %res
+ ret void
+}
+
+define void @xvavg_wu(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: xvavg_wu:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvavg.wu $xr0, $xr0, $xr1
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <8 x i32>, ptr %a
+ %vb = load <8 x i32>, ptr %b
+ %add = add <8 x i32> %va, %vb
+ %shr = lshr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ store <8 x i32> %shr, ptr %res
+ ret void
+}
+
+define void @xvavg_du(ptr %res, ptr %a, ptr %b) nounwind {
+; LA32-LABEL: xvavg_du:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: xvld $xr0, $a1, 0
+; LA32-NEXT: xvld $xr1, $a2, 0
+; LA32-NEXT: xvadd.d $xr0, $xr0, $xr1
+; LA32-NEXT: xvsrli.d $xr0, $xr0, 1
+; LA32-NEXT: xvst $xr0, $a0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: xvavg_du:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: xvld $xr0, $a1, 0
+; LA64-NEXT: xvld $xr1, $a2, 0
+; LA64-NEXT: xvavg.du $xr0, $xr0, $xr1
+; LA64-NEXT: xvst $xr0, $a0, 0
+; LA64-NEXT: ret
+entry:
+ %va = load <4 x i64>, ptr %a
+ %vb = load <4 x i64>, ptr %b
+ %add = add <4 x i64> %va, %vb
+ %shr = lshr <4 x i64> %add, <i64 1, i64 1, i64 1, i64 1>
+ store <4 x i64> %shr, ptr %res
+ ret void
+}
+
+define void @xvavgr_b(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: xvavgr_b:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvavgr.b $xr0, $xr0, $xr1
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <32 x i8>, ptr %a
+ %vb = load <32 x i8>, ptr %b
+ %add = add <32 x i8> %va, %vb
+ %add1 = add <32 x i8> %add, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+ %shr = ashr <32 x i8> %add1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+ store <32 x i8> %shr, ptr %res
+ ret void
+}
+
+define void @xvavgr_h(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: xvavgr_h:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvavgr.h $xr0, $xr0, $xr1
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <16 x i16>, ptr %a
+ %vb = load <16 x i16>, ptr %b
+ %add = add <16 x i16> %va, %vb
+ %add1 = add <16 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ %shr = ashr <16 x i16> %add1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ store <16 x i16> %shr, ptr %res
+ ret void
+}
+
+define void @xvavgr_w(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: xvavgr_w:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvavgr.w $xr0, $xr0, $xr1
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <8 x i32>, ptr %a
+ %vb = load <8 x i32>, ptr %b
+ %add = add <8 x i32> %va, %vb
+ %add1 = add <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ %shr = ashr <8 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ store <8 x i32> %shr, ptr %res
+ ret void
+}
+
+define void @xvavgr_d(ptr %res, ptr %a, ptr %b) nounwind {
+; LA32-LABEL: xvavgr_d:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: xvld $xr0, $a1, 0
+; LA32-NEXT: xvld $xr1, $a2, 0
+; LA32-NEXT: xvadd.d $xr0, $xr0, $xr1
+; LA32-NEXT: xvaddi.du $xr0, $xr0, 1
+; LA32-NEXT: xvsrai.d $xr0, $xr0, 1
+; LA32-NEXT: xvst $xr0, $a0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: xvavgr_d:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: xvld $xr0, $a1, 0
+; LA64-NEXT: xvld $xr1, $a2, 0
+; LA64-NEXT: xvavgr.d $xr0, $xr0, $xr1
+; LA64-NEXT: xvst $xr0, $a0, 0
+; LA64-NEXT: ret
+entry:
+ %va = load <4 x i64>, ptr %a
+ %vb = load <4 x i64>, ptr %b
+ %add = add <4 x i64> %va, %vb
+ %add1 = add <4 x i64> %add, <i64 1, i64 1, i64 1, i64 1>
+ %shr = ashr <4 x i64> %add1, <i64 1, i64 1, i64 1, i64 1>
+ store <4 x i64> %shr, ptr %res
+ ret void
+}
+
+define void @xvavgr_bu(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: xvavgr_bu:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvavgr.bu $xr0, $xr0, $xr1
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <32 x i8>, ptr %a
+ %vb = load <32 x i8>, ptr %b
+ %add = add <32 x i8> %va, %vb
+ %add1 = add <32 x i8> %add, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+ %shr = lshr <32 x i8> %add1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+ store <32 x i8> %shr, ptr %res
+ ret void
+}
+
+define void @xvavgr_hu(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: xvavgr_hu:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvavgr.hu $xr0, $xr0, $xr1
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <16 x i16>, ptr %a
+ %vb = load <16 x i16>, ptr %b
+ %add = add <16 x i16> %va, %vb
+ %add1 = add <16 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ %shr = lshr <16 x i16> %add1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ store <16 x i16> %shr, ptr %res
+ ret void
+}
+
+define void @xvavgr_wu(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: xvavgr_wu:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvavgr.wu $xr0, $xr0, $xr1
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <8 x i32>, ptr %a
+ %vb = load <8 x i32>, ptr %b
+ %add = add <8 x i32> %va, %vb
+ %add1 = add <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ %shr = lshr <8 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ store <8 x i32> %shr, ptr %res
+ ret void
+}
+
+define void @xvavgr_du(ptr %res, ptr %a, ptr %b) nounwind {
+; LA32-LABEL: xvavgr_du:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: xvld $xr0, $a1, 0
+; LA32-NEXT: xvld $xr1, $a2, 0
+; LA32-NEXT: xvadd.d $xr0, $xr0, $xr1
+; LA32-NEXT: xvaddi.du $xr0, $xr0, 1
+; LA32-NEXT: xvsrli.d $xr0, $xr0, 1
+; LA32-NEXT: xvst $xr0, $a0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: xvavgr_du:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: xvld $xr0, $a1, 0
+; LA64-NEXT: xvld $xr1, $a2, 0
+; LA64-NEXT: xvavgr.du $xr0, $xr0, $xr1
+; LA64-NEXT: xvst $xr0, $a0, 0
+; LA64-NEXT: ret
+entry:
+ %va = load <4 x i64>, ptr %a
+ %vb = load <4 x i64>, ptr %b
+ %add = add <4 x i64> %va, %vb
+ %add1 = add <4 x i64> %add, <i64 1, i64 1, i64 1, i64 1>
+ %shr = lshr <4 x i64> %add1, <i64 1, i64 1, i64 1, i64 1>
+ store <4 x i64> %shr, ptr %res
+ ret void
+}
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/avgfloor-ceil.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/avgfloor-ceil.ll
new file mode 100644
index 0000000..c82adcb
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/avgfloor-ceil.ll
@@ -0,0 +1,379 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s
+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s
+
+define void @xvavg_b(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: xvavg_b:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvand.v $xr2, $xr0, $xr1
+; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvsrai.b $xr0, $xr0, 1
+; CHECK-NEXT: xvadd.b $xr0, $xr2, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <32 x i8>, ptr %a
+ %vb = load <32 x i8>, ptr %b
+ %ea = sext <32 x i8> %va to <32 x i16>
+ %eb = sext <32 x i8> %vb to <32 x i16>
+ %add = add <32 x i16> %ea, %eb
+ %shr = lshr <32 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ %r = trunc <32 x i16> %shr to <32 x i8>
+ store <32 x i8> %r, ptr %res
+ ret void
+}
+
+define void @xvavg_h(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: xvavg_h:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvand.v $xr2, $xr0, $xr1
+; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvsrai.h $xr0, $xr0, 1
+; CHECK-NEXT: xvadd.h $xr0, $xr2, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <16 x i16>, ptr %a
+ %vb = load <16 x i16>, ptr %b
+ %ea = sext <16 x i16> %va to <16 x i32>
+ %eb = sext <16 x i16> %vb to <16 x i32>
+ %add = add <16 x i32> %ea, %eb
+ %shr = lshr <16 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ %r = trunc <16 x i32> %shr to <16 x i16>
+ store <16 x i16> %r, ptr %res
+ ret void
+}
+
+define void @xvavg_w(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: xvavg_w:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvand.v $xr2, $xr0, $xr1
+; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvsrai.w $xr0, $xr0, 1
+; CHECK-NEXT: xvadd.w $xr0, $xr2, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <8 x i32>, ptr %a
+ %vb = load <8 x i32>, ptr %b
+ %ea = sext <8 x i32> %va to <8 x i64>
+ %eb = sext <8 x i32> %vb to <8 x i64>
+ %add = add <8 x i64> %ea, %eb
+ %shr = lshr <8 x i64> %add, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
+ %r = trunc <8 x i64> %shr to <8 x i32>
+ store <8 x i32> %r, ptr %res
+ ret void
+}
+
+define void @xvavg_d(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: xvavg_d:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvand.v $xr2, $xr0, $xr1
+; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvsrai.d $xr0, $xr0, 1
+; CHECK-NEXT: xvadd.d $xr0, $xr2, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <4 x i64>, ptr %a
+ %vb = load <4 x i64>, ptr %b
+ %ea = sext <4 x i64> %va to <4 x i128>
+ %eb = sext <4 x i64> %vb to <4 x i128>
+ %add = add <4 x i128> %ea, %eb
+ %shr = lshr <4 x i128> %add, <i128 1, i128 1, i128 1, i128 1>
+ %r = trunc <4 x i128> %shr to <4 x i64>
+ store <4 x i64> %r, ptr %res
+ ret void
+}
+
+define void @xvavg_bu(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: xvavg_bu:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvand.v $xr2, $xr0, $xr1
+; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvsrli.b $xr0, $xr0, 1
+; CHECK-NEXT: xvadd.b $xr0, $xr2, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <32 x i8>, ptr %a
+ %vb = load <32 x i8>, ptr %b
+ %ea = zext <32 x i8> %va to <32 x i16>
+ %eb = zext <32 x i8> %vb to <32 x i16>
+ %add = add <32 x i16> %ea, %eb
+ %shr = lshr <32 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ %r = trunc <32 x i16> %shr to <32 x i8>
+ store <32 x i8> %r, ptr %res
+ ret void
+}
+
+define void @xvavg_hu(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: xvavg_hu:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvand.v $xr2, $xr0, $xr1
+; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvsrli.h $xr0, $xr0, 1
+; CHECK-NEXT: xvadd.h $xr0, $xr2, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <16 x i16>, ptr %a
+ %vb = load <16 x i16>, ptr %b
+ %ea = zext <16 x i16> %va to <16 x i32>
+ %eb = zext <16 x i16> %vb to <16 x i32>
+ %add = add <16 x i32> %ea, %eb
+ %shr = lshr <16 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ %r = trunc <16 x i32> %shr to <16 x i16>
+ store <16 x i16> %r, ptr %res
+ ret void
+}
+
+define void @xvavg_wu(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: xvavg_wu:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvand.v $xr2, $xr0, $xr1
+; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvsrli.w $xr0, $xr0, 1
+; CHECK-NEXT: xvadd.w $xr0, $xr2, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <8 x i32>, ptr %a
+ %vb = load <8 x i32>, ptr %b
+ %ea = zext <8 x i32> %va to <8 x i64>
+ %eb = zext <8 x i32> %vb to <8 x i64>
+ %add = add <8 x i64> %ea, %eb
+ %shr = lshr <8 x i64> %add, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
+ %r = trunc <8 x i64> %shr to <8 x i32>
+ store <8 x i32> %r, ptr %res
+ ret void
+}
+
+define void @xvavg_du(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: xvavg_du:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvand.v $xr2, $xr0, $xr1
+; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvsrli.d $xr0, $xr0, 1
+; CHECK-NEXT: xvadd.d $xr0, $xr2, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <4 x i64>, ptr %a
+ %vb = load <4 x i64>, ptr %b
+ %ea = zext <4 x i64> %va to <4 x i128>
+ %eb = zext <4 x i64> %vb to <4 x i128>
+ %add = add <4 x i128> %ea, %eb
+ %shr = lshr <4 x i128> %add, <i128 1, i128 1, i128 1, i128 1>
+ %r = trunc <4 x i128> %shr to <4 x i64>
+ store <4 x i64> %r, ptr %res
+ ret void
+}
+
+define void @xvavgr_b(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: xvavgr_b:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvor.v $xr2, $xr0, $xr1
+; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvsrai.b $xr0, $xr0, 1
+; CHECK-NEXT: xvsub.b $xr0, $xr2, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <32 x i8>, ptr %a
+ %vb = load <32 x i8>, ptr %b
+ %ea = sext <32 x i8> %va to <32 x i16>
+ %eb = sext <32 x i8> %vb to <32 x i16>
+ %add = add <32 x i16> %ea, %eb
+ %add1 = add <32 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ %shr = lshr <32 x i16> %add1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ %r = trunc <32 x i16> %shr to <32 x i8>
+ store <32 x i8> %r, ptr %res
+ ret void
+}
+
+define void @xvavgr_h(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: xvavgr_h:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvor.v $xr2, $xr0, $xr1
+; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvsrai.h $xr0, $xr0, 1
+; CHECK-NEXT: xvsub.h $xr0, $xr2, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <16 x i16>, ptr %a
+ %vb = load <16 x i16>, ptr %b
+ %ea = sext <16 x i16> %va to <16 x i32>
+ %eb = sext <16 x i16> %vb to <16 x i32>
+ %add = add <16 x i32> %ea, %eb
+ %add1 = add <16 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ %shr = lshr <16 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ %r = trunc <16 x i32> %shr to <16 x i16>
+ store <16 x i16> %r, ptr %res
+ ret void
+}
+
+define void @xvavgr_w(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: xvavgr_w:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvor.v $xr2, $xr0, $xr1
+; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvsrai.w $xr0, $xr0, 1
+; CHECK-NEXT: xvsub.w $xr0, $xr2, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <8 x i32>, ptr %a
+ %vb = load <8 x i32>, ptr %b
+ %ea = sext <8 x i32> %va to <8 x i64>
+ %eb = sext <8 x i32> %vb to <8 x i64>
+ %add = add <8 x i64> %ea, %eb
+ %add1 = add <8 x i64> %add, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
+ %shr = lshr <8 x i64> %add1, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
+ %r = trunc <8 x i64> %shr to <8 x i32>
+ store <8 x i32> %r, ptr %res
+ ret void
+}
+
+define void @xvavgr_d(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: xvavgr_d:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvor.v $xr2, $xr0, $xr1
+; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvsrai.d $xr0, $xr0, 1
+; CHECK-NEXT: xvsub.d $xr0, $xr2, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <4 x i64>, ptr %a
+ %vb = load <4 x i64>, ptr %b
+ %ea = sext <4 x i64> %va to <4 x i128>
+ %eb = sext <4 x i64> %vb to <4 x i128>
+ %add = add <4 x i128> %ea, %eb
+ %add1 = add <4 x i128> %add, <i128 1, i128 1, i128 1, i128 1>
+ %shr = lshr <4 x i128> %add1, <i128 1, i128 1, i128 1, i128 1>
+ %r = trunc <4 x i128> %shr to <4 x i64>
+ store <4 x i64> %r, ptr %res
+ ret void
+}
+
+define void @xvavgr_bu(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: xvavgr_bu:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvor.v $xr2, $xr0, $xr1
+; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvsrli.b $xr0, $xr0, 1
+; CHECK-NEXT: xvsub.b $xr0, $xr2, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <32 x i8>, ptr %a
+ %vb = load <32 x i8>, ptr %b
+ %ea = zext <32 x i8> %va to <32 x i16>
+ %eb = zext <32 x i8> %vb to <32 x i16>
+ %add = add <32 x i16> %ea, %eb
+ %add1 = add <32 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ %shr = lshr <32 x i16> %add1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ %r = trunc <32 x i16> %shr to <32 x i8>
+ store <32 x i8> %r, ptr %res
+ ret void
+}
+
+define void @xvavgr_hu(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: xvavgr_hu:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvor.v $xr2, $xr0, $xr1
+; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvsrli.h $xr0, $xr0, 1
+; CHECK-NEXT: xvsub.h $xr0, $xr2, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <16 x i16>, ptr %a
+ %vb = load <16 x i16>, ptr %b
+ %ea = zext <16 x i16> %va to <16 x i32>
+ %eb = zext <16 x i16> %vb to <16 x i32>
+ %add = add <16 x i32> %ea, %eb
+ %add1 = add <16 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ %shr = lshr <16 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ %r = trunc <16 x i32> %shr to <16 x i16>
+ store <16 x i16> %r, ptr %res
+ ret void
+}
+
+define void @xvavgr_wu(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: xvavgr_wu:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvor.v $xr2, $xr0, $xr1
+; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvsrli.w $xr0, $xr0, 1
+; CHECK-NEXT: xvsub.w $xr0, $xr2, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <8 x i32>, ptr %a
+ %vb = load <8 x i32>, ptr %b
+ %ea = zext <8 x i32> %va to <8 x i64>
+ %eb = zext <8 x i32> %vb to <8 x i64>
+ %add = add <8 x i64> %ea, %eb
+ %add1 = add <8 x i64> %add, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
+ %shr = lshr <8 x i64> %add1, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
+ %r = trunc <8 x i64> %shr to <8 x i32>
+ store <8 x i32> %r, ptr %res
+ ret void
+}
+
+define void @xvavgr_du(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: xvavgr_du:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvor.v $xr2, $xr0, $xr1
+; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvsrli.d $xr0, $xr0, 1
+; CHECK-NEXT: xvsub.d $xr0, $xr2, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <4 x i64>, ptr %a
+ %vb = load <4 x i64>, ptr %b
+ %ea = zext <4 x i64> %va to <4 x i128>
+ %eb = zext <4 x i64> %vb to <4 x i128>
+ %add = add <4 x i128> %ea, %eb
+ %add1 = add <4 x i128> %add, <i128 1, i128 1, i128 1, i128 1>
+ %shr = lshr <4 x i128> %add1, <i128 1, i128 1, i128 1, i128 1>
+ %r = trunc <4 x i128> %shr to <4 x i64>
+ store <4 x i64> %r, ptr %res
+ ret void
+}
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/flog2.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/flog2.ll
index 68f2e3a..6b5f575 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/flog2.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/flog2.ll
@@ -1,166 +1,17 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
-; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s --check-prefix=LA32
-; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s --check-prefix=LA64
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s
+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s
declare <8 x float> @llvm.log2.v8f32(<8 x float>)
declare <4 x double> @llvm.log2.v4f64(<4 x double>)
define void @flog2_v8f32(ptr %res, ptr %a) nounwind {
-; LA32-LABEL: flog2_v8f32:
-; LA32: # %bb.0: # %entry
-; LA32-NEXT: addi.w $sp, $sp, -128
-; LA32-NEXT: st.w $ra, $sp, 124 # 4-byte Folded Spill
-; LA32-NEXT: st.w $fp, $sp, 120 # 4-byte Folded Spill
-; LA32-NEXT: xvld $xr0, $a1, 0
-; LA32-NEXT: xvst $xr0, $sp, 80 # 32-byte Folded Spill
-; LA32-NEXT: move $fp, $a0
-; LA32-NEXT: xvpickve.w $xr0, $xr0, 5
-; LA32-NEXT: # kill: def $f0 killed $f0 killed $xr0
-; LA32-NEXT: bl log2f
-; LA32-NEXT: # kill: def $f0 killed $f0 def $vr0
-; LA32-NEXT: vst $vr0, $sp, 48 # 16-byte Folded Spill
-; LA32-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload
-; LA32-NEXT: xvpickve.w $xr0, $xr0, 4
-; LA32-NEXT: # kill: def $f0 killed $f0 killed $xr0
-; LA32-NEXT: bl log2f
-; LA32-NEXT: # kill: def $f0 killed $f0 def $xr0
-; LA32-NEXT: vld $vr1, $sp, 48 # 16-byte Folded Reload
-; LA32-NEXT: vextrins.w $vr0, $vr1, 16
-; LA32-NEXT: xvst $xr0, $sp, 48 # 32-byte Folded Spill
-; LA32-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload
-; LA32-NEXT: xvpickve.w $xr0, $xr0, 6
-; LA32-NEXT: # kill: def $f0 killed $f0 killed $xr0
-; LA32-NEXT: bl log2f
-; LA32-NEXT: # kill: def $f0 killed $f0 def $vr0
-; LA32-NEXT: xvld $xr1, $sp, 48 # 32-byte Folded Reload
-; LA32-NEXT: vextrins.w $vr1, $vr0, 32
-; LA32-NEXT: xvst $xr1, $sp, 48 # 32-byte Folded Spill
-; LA32-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload
-; LA32-NEXT: xvpickve.w $xr0, $xr0, 7
-; LA32-NEXT: # kill: def $f0 killed $f0 killed $xr0
-; LA32-NEXT: bl log2f
-; LA32-NEXT: # kill: def $f0 killed $f0 def $vr0
-; LA32-NEXT: xvld $xr1, $sp, 48 # 32-byte Folded Reload
-; LA32-NEXT: vextrins.w $vr1, $vr0, 48
-; LA32-NEXT: xvst $xr1, $sp, 48 # 32-byte Folded Spill
-; LA32-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload
-; LA32-NEXT: xvpickve.w $xr0, $xr0, 1
-; LA32-NEXT: # kill: def $f0 killed $f0 killed $xr0
-; LA32-NEXT: bl log2f
-; LA32-NEXT: # kill: def $f0 killed $f0 def $vr0
-; LA32-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill
-; LA32-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload
-; LA32-NEXT: xvpickve.w $xr0, $xr0, 0
-; LA32-NEXT: # kill: def $f0 killed $f0 killed $xr0
-; LA32-NEXT: bl log2f
-; LA32-NEXT: # kill: def $f0 killed $f0 def $xr0
-; LA32-NEXT: vld $vr1, $sp, 16 # 16-byte Folded Reload
-; LA32-NEXT: vextrins.w $vr0, $vr1, 16
-; LA32-NEXT: xvst $xr0, $sp, 16 # 32-byte Folded Spill
-; LA32-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload
-; LA32-NEXT: xvpickve.w $xr0, $xr0, 2
-; LA32-NEXT: # kill: def $f0 killed $f0 killed $xr0
-; LA32-NEXT: bl log2f
-; LA32-NEXT: # kill: def $f0 killed $f0 def $vr0
-; LA32-NEXT: xvld $xr1, $sp, 16 # 32-byte Folded Reload
-; LA32-NEXT: vextrins.w $vr1, $vr0, 32
-; LA32-NEXT: xvst $xr1, $sp, 16 # 32-byte Folded Spill
-; LA32-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload
-; LA32-NEXT: xvpickve.w $xr0, $xr0, 3
-; LA32-NEXT: # kill: def $f0 killed $f0 killed $xr0
-; LA32-NEXT: bl log2f
-; LA32-NEXT: # kill: def $f0 killed $f0 def $vr0
-; LA32-NEXT: xvld $xr1, $sp, 16 # 32-byte Folded Reload
-; LA32-NEXT: vextrins.w $vr1, $vr0, 48
-; LA32-NEXT: xvld $xr0, $sp, 48 # 32-byte Folded Reload
-; LA32-NEXT: xvpermi.q $xr1, $xr0, 2
-; LA32-NEXT: xvst $xr1, $fp, 0
-; LA32-NEXT: ld.w $fp, $sp, 120 # 4-byte Folded Reload
-; LA32-NEXT: ld.w $ra, $sp, 124 # 4-byte Folded Reload
-; LA32-NEXT: addi.w $sp, $sp, 128
-; LA32-NEXT: ret
-;
-; LA64-LABEL: flog2_v8f32:
-; LA64: # %bb.0: # %entry
-; LA64-NEXT: addi.d $sp, $sp, -128
-; LA64-NEXT: st.d $ra, $sp, 120 # 8-byte Folded Spill
-; LA64-NEXT: st.d $fp, $sp, 112 # 8-byte Folded Spill
-; LA64-NEXT: xvld $xr0, $a1, 0
-; LA64-NEXT: xvst $xr0, $sp, 80 # 32-byte Folded Spill
-; LA64-NEXT: move $fp, $a0
-; LA64-NEXT: xvpickve.w $xr0, $xr0, 5
-; LA64-NEXT: # kill: def $f0 killed $f0 killed $xr0
-; LA64-NEXT: pcaddu18i $ra, %call36(log2f)
-; LA64-NEXT: jirl $ra, $ra, 0
-; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0
-; LA64-NEXT: vst $vr0, $sp, 48 # 16-byte Folded Spill
-; LA64-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload
-; LA64-NEXT: xvpickve.w $xr0, $xr0, 4
-; LA64-NEXT: # kill: def $f0 killed $f0 killed $xr0
-; LA64-NEXT: pcaddu18i $ra, %call36(log2f)
-; LA64-NEXT: jirl $ra, $ra, 0
-; LA64-NEXT: # kill: def $f0 killed $f0 def $xr0
-; LA64-NEXT: vld $vr1, $sp, 48 # 16-byte Folded Reload
-; LA64-NEXT: vextrins.w $vr0, $vr1, 16
-; LA64-NEXT: xvst $xr0, $sp, 48 # 32-byte Folded Spill
-; LA64-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload
-; LA64-NEXT: xvpickve.w $xr0, $xr0, 6
-; LA64-NEXT: # kill: def $f0 killed $f0 killed $xr0
-; LA64-NEXT: pcaddu18i $ra, %call36(log2f)
-; LA64-NEXT: jirl $ra, $ra, 0
-; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0
-; LA64-NEXT: xvld $xr1, $sp, 48 # 32-byte Folded Reload
-; LA64-NEXT: vextrins.w $vr1, $vr0, 32
-; LA64-NEXT: xvst $xr1, $sp, 48 # 32-byte Folded Spill
-; LA64-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload
-; LA64-NEXT: xvpickve.w $xr0, $xr0, 7
-; LA64-NEXT: # kill: def $f0 killed $f0 killed $xr0
-; LA64-NEXT: pcaddu18i $ra, %call36(log2f)
-; LA64-NEXT: jirl $ra, $ra, 0
-; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0
-; LA64-NEXT: xvld $xr1, $sp, 48 # 32-byte Folded Reload
-; LA64-NEXT: vextrins.w $vr1, $vr0, 48
-; LA64-NEXT: xvst $xr1, $sp, 48 # 32-byte Folded Spill
-; LA64-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload
-; LA64-NEXT: xvpickve.w $xr0, $xr0, 1
-; LA64-NEXT: # kill: def $f0 killed $f0 killed $xr0
-; LA64-NEXT: pcaddu18i $ra, %call36(log2f)
-; LA64-NEXT: jirl $ra, $ra, 0
-; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0
-; LA64-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill
-; LA64-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload
-; LA64-NEXT: xvpickve.w $xr0, $xr0, 0
-; LA64-NEXT: # kill: def $f0 killed $f0 killed $xr0
-; LA64-NEXT: pcaddu18i $ra, %call36(log2f)
-; LA64-NEXT: jirl $ra, $ra, 0
-; LA64-NEXT: # kill: def $f0 killed $f0 def $xr0
-; LA64-NEXT: vld $vr1, $sp, 16 # 16-byte Folded Reload
-; LA64-NEXT: vextrins.w $vr0, $vr1, 16
-; LA64-NEXT: xvst $xr0, $sp, 16 # 32-byte Folded Spill
-; LA64-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload
-; LA64-NEXT: xvpickve.w $xr0, $xr0, 2
-; LA64-NEXT: # kill: def $f0 killed $f0 killed $xr0
-; LA64-NEXT: pcaddu18i $ra, %call36(log2f)
-; LA64-NEXT: jirl $ra, $ra, 0
-; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0
-; LA64-NEXT: xvld $xr1, $sp, 16 # 32-byte Folded Reload
-; LA64-NEXT: vextrins.w $vr1, $vr0, 32
-; LA64-NEXT: xvst $xr1, $sp, 16 # 32-byte Folded Spill
-; LA64-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload
-; LA64-NEXT: xvpickve.w $xr0, $xr0, 3
-; LA64-NEXT: # kill: def $f0 killed $f0 killed $xr0
-; LA64-NEXT: pcaddu18i $ra, %call36(log2f)
-; LA64-NEXT: jirl $ra, $ra, 0
-; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0
-; LA64-NEXT: xvld $xr1, $sp, 16 # 32-byte Folded Reload
-; LA64-NEXT: vextrins.w $vr1, $vr0, 48
-; LA64-NEXT: xvld $xr0, $sp, 48 # 32-byte Folded Reload
-; LA64-NEXT: xvpermi.q $xr1, $xr0, 2
-; LA64-NEXT: xvst $xr1, $fp, 0
-; LA64-NEXT: ld.d $fp, $sp, 112 # 8-byte Folded Reload
-; LA64-NEXT: ld.d $ra, $sp, 120 # 8-byte Folded Reload
-; LA64-NEXT: addi.d $sp, $sp, 128
-; LA64-NEXT: ret
+; CHECK-LABEL: flog2_v8f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvflogb.s $xr0, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
entry:
%v = load <8 x float>, ptr %a
%r = call <8 x float> @llvm.log2.v8f32(<8 x float> %v)
@@ -169,93 +20,12 @@ entry:
}
define void @flog2_v4f64(ptr %res, ptr %a) nounwind {
-; LA32-LABEL: flog2_v4f64:
-; LA32: # %bb.0: # %entry
-; LA32-NEXT: addi.w $sp, $sp, -112
-; LA32-NEXT: st.w $ra, $sp, 108 # 4-byte Folded Spill
-; LA32-NEXT: st.w $fp, $sp, 104 # 4-byte Folded Spill
-; LA32-NEXT: xvld $xr0, $a1, 0
-; LA32-NEXT: xvst $xr0, $sp, 64 # 32-byte Folded Spill
-; LA32-NEXT: move $fp, $a0
-; LA32-NEXT: xvpickve.d $xr0, $xr0, 3
-; LA32-NEXT: # kill: def $f0_64 killed $f0_64 killed $xr0
-; LA32-NEXT: bl log2
-; LA32-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0
-; LA32-NEXT: vst $vr0, $sp, 32 # 16-byte Folded Spill
-; LA32-NEXT: xvld $xr0, $sp, 64 # 32-byte Folded Reload
-; LA32-NEXT: xvpickve.d $xr0, $xr0, 2
-; LA32-NEXT: # kill: def $f0_64 killed $f0_64 killed $xr0
-; LA32-NEXT: bl log2
-; LA32-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0
-; LA32-NEXT: vld $vr1, $sp, 32 # 16-byte Folded Reload
-; LA32-NEXT: vextrins.d $vr0, $vr1, 16
-; LA32-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill
-; LA32-NEXT: xvld $xr0, $sp, 64 # 32-byte Folded Reload
-; LA32-NEXT: xvpickve.d $xr0, $xr0, 1
-; LA32-NEXT: # kill: def $f0_64 killed $f0_64 killed $xr0
-; LA32-NEXT: bl log2
-; LA32-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0
-; LA32-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill
-; LA32-NEXT: xvld $xr0, $sp, 64 # 32-byte Folded Reload
-; LA32-NEXT: xvpickve.d $xr0, $xr0, 0
-; LA32-NEXT: # kill: def $f0_64 killed $f0_64 killed $xr0
-; LA32-NEXT: bl log2
-; LA32-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0
-; LA32-NEXT: vld $vr1, $sp, 16 # 16-byte Folded Reload
-; LA32-NEXT: vextrins.d $vr0, $vr1, 16
-; LA32-NEXT: xvld $xr1, $sp, 32 # 32-byte Folded Reload
-; LA32-NEXT: xvpermi.q $xr0, $xr1, 2
-; LA32-NEXT: xvst $xr0, $fp, 0
-; LA32-NEXT: ld.w $fp, $sp, 104 # 4-byte Folded Reload
-; LA32-NEXT: ld.w $ra, $sp, 108 # 4-byte Folded Reload
-; LA32-NEXT: addi.w $sp, $sp, 112
-; LA32-NEXT: ret
-;
-; LA64-LABEL: flog2_v4f64:
-; LA64: # %bb.0: # %entry
-; LA64-NEXT: addi.d $sp, $sp, -112
-; LA64-NEXT: st.d $ra, $sp, 104 # 8-byte Folded Spill
-; LA64-NEXT: st.d $fp, $sp, 96 # 8-byte Folded Spill
-; LA64-NEXT: xvld $xr0, $a1, 0
-; LA64-NEXT: xvst $xr0, $sp, 64 # 32-byte Folded Spill
-; LA64-NEXT: move $fp, $a0
-; LA64-NEXT: xvpickve.d $xr0, $xr0, 3
-; LA64-NEXT: # kill: def $f0_64 killed $f0_64 killed $xr0
-; LA64-NEXT: pcaddu18i $ra, %call36(log2)
-; LA64-NEXT: jirl $ra, $ra, 0
-; LA64-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0
-; LA64-NEXT: vst $vr0, $sp, 32 # 16-byte Folded Spill
-; LA64-NEXT: xvld $xr0, $sp, 64 # 32-byte Folded Reload
-; LA64-NEXT: xvpickve.d $xr0, $xr0, 2
-; LA64-NEXT: # kill: def $f0_64 killed $f0_64 killed $xr0
-; LA64-NEXT: pcaddu18i $ra, %call36(log2)
-; LA64-NEXT: jirl $ra, $ra, 0
-; LA64-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0
-; LA64-NEXT: vld $vr1, $sp, 32 # 16-byte Folded Reload
-; LA64-NEXT: vextrins.d $vr0, $vr1, 16
-; LA64-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill
-; LA64-NEXT: xvld $xr0, $sp, 64 # 32-byte Folded Reload
-; LA64-NEXT: xvpickve.d $xr0, $xr0, 1
-; LA64-NEXT: # kill: def $f0_64 killed $f0_64 killed $xr0
-; LA64-NEXT: pcaddu18i $ra, %call36(log2)
-; LA64-NEXT: jirl $ra, $ra, 0
-; LA64-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0
-; LA64-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill
-; LA64-NEXT: xvld $xr0, $sp, 64 # 32-byte Folded Reload
-; LA64-NEXT: xvpickve.d $xr0, $xr0, 0
-; LA64-NEXT: # kill: def $f0_64 killed $f0_64 killed $xr0
-; LA64-NEXT: pcaddu18i $ra, %call36(log2)
-; LA64-NEXT: jirl $ra, $ra, 0
-; LA64-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0
-; LA64-NEXT: vld $vr1, $sp, 16 # 16-byte Folded Reload
-; LA64-NEXT: vextrins.d $vr0, $vr1, 16
-; LA64-NEXT: xvld $xr1, $sp, 32 # 32-byte Folded Reload
-; LA64-NEXT: xvpermi.q $xr0, $xr1, 2
-; LA64-NEXT: xvst $xr0, $fp, 0
-; LA64-NEXT: ld.d $fp, $sp, 96 # 8-byte Folded Reload
-; LA64-NEXT: ld.d $ra, $sp, 104 # 8-byte Folded Reload
-; LA64-NEXT: addi.d $sp, $sp, 112
-; LA64-NEXT: ret
+; CHECK-LABEL: flog2_v4f64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvflogb.d $xr0, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
entry:
%v = load <4 x double>, ptr %a
%r = call <4 x double> @llvm.log2.v4f64(<4 x double> %v)
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ctpop-ctlz.ll b/llvm/test/CodeGen/LoongArch/lsx/ctpop-ctlz.ll
index a9a38e8..6ac7d51 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ctpop-ctlz.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ctpop-ctlz.ll
@@ -106,6 +106,69 @@ define void @ctlz_v2i64(ptr %src, ptr %dst) nounwind {
ret void
}
+define void @not_ctlz_v16i8(ptr %src, ptr %dst) nounwind {
+; CHECK-LABEL: not_ctlz_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vld $vr0, $a0, 0
+; CHECK-NEXT: vxori.b $vr0, $vr0, 255
+; CHECK-NEXT: vclz.b $vr0, $vr0
+; CHECK-NEXT: vst $vr0, $a1, 0
+; CHECK-NEXT: ret
+ %v = load <16 x i8>, ptr %src
+ %neg = xor <16 x i8> %v, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+ %res = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %neg, i1 false)
+ store <16 x i8> %res, ptr %dst
+ ret void
+}
+
+define void @not_ctlz_v8i16(ptr %src, ptr %dst) nounwind {
+; CHECK-LABEL: not_ctlz_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vld $vr0, $a0, 0
+; CHECK-NEXT: vrepli.b $vr1, -1
+; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vclz.h $vr0, $vr0
+; CHECK-NEXT: vst $vr0, $a1, 0
+; CHECK-NEXT: ret
+ %v = load <8 x i16>, ptr %src
+ %neg = xor <8 x i16> %v, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+ %res = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %neg, i1 false)
+ store <8 x i16> %res, ptr %dst
+ ret void
+}
+
+define void @not_ctlz_v4i32(ptr %src, ptr %dst) nounwind {
+; CHECK-LABEL: not_ctlz_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vld $vr0, $a0, 0
+; CHECK-NEXT: vrepli.b $vr1, -1
+; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vclz.w $vr0, $vr0
+; CHECK-NEXT: vst $vr0, $a1, 0
+; CHECK-NEXT: ret
+ %v = load <4 x i32>, ptr %src
+ %neg = xor <4 x i32> %v, <i32 -1, i32 -1, i32 -1, i32 -1>
+ %res = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %neg, i1 false)
+ store <4 x i32> %res, ptr %dst
+ ret void
+}
+
+define void @not_ctlz_v2i64(ptr %src, ptr %dst) nounwind {
+; CHECK-LABEL: not_ctlz_v2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vld $vr0, $a0, 0
+; CHECK-NEXT: vrepli.b $vr1, -1
+; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vclz.d $vr0, $vr0
+; CHECK-NEXT: vst $vr0, $a1, 0
+; CHECK-NEXT: ret
+ %v = load <2 x i64>, ptr %src
+ %neg = xor <2 x i64> %v, <i64 -1, i64 -1>
+ %res = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %neg, i1 false)
+ store <2 x i64> %res, ptr %dst
+ ret void
+}
+
declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8>)
declare <8 x i16> @llvm.ctpop.v8i16(<8 x i16>)
declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>)
diff --git a/llvm/test/CodeGen/LoongArch/lsx/fp-rounding.ll b/llvm/test/CodeGen/LoongArch/lsx/fp-rounding.ll
new file mode 100644
index 0000000..cb01ac0
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/lsx/fp-rounding.ll
@@ -0,0 +1,132 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s
+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s
+
+;; ceilf
+define void @ceil_v4f32(ptr %res, ptr %a0) nounwind {
+; CHECK-LABEL: ceil_v4f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vfrintrp.s $vr0, $vr0
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %v0 = load <4 x float>, ptr %a0
+ %r = call <4 x float> @llvm.ceil.v4f32(<4 x float> %v0)
+ store <4 x float> %r, ptr %res
+ ret void
+}
+
+;; ceil
+define void @ceil_v2f64(ptr %res, ptr %a0) nounwind {
+; CHECK-LABEL: ceil_v2f64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vfrintrp.d $vr0, $vr0
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %v0 = load <2 x double>, ptr %a0
+ %r = call <2 x double> @llvm.ceil.v2f64(<2 x double> %v0)
+ store <2 x double> %r, ptr %res
+ ret void
+}
+
+;; floorf
+define void @floor_v4f32(ptr %res, ptr %a0) nounwind {
+; CHECK-LABEL: floor_v4f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vfrintrm.s $vr0, $vr0
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %v0 = load <4 x float>, ptr %a0
+ %r = call <4 x float> @llvm.floor.v4f32(<4 x float> %v0)
+ store <4 x float> %r, ptr %res
+ ret void
+}
+
+;; floor
+define void @floor_v2f64(ptr %res, ptr %a0) nounwind {
+; CHECK-LABEL: floor_v2f64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vfrintrm.d $vr0, $vr0
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %v0 = load <2 x double>, ptr %a0
+ %r = call <2 x double> @llvm.floor.v2f64(<2 x double> %v0)
+ store <2 x double> %r, ptr %res
+ ret void
+}
+
+;; truncf
+define void @trunc_v4f32(ptr %res, ptr %a0) nounwind {
+; CHECK-LABEL: trunc_v4f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vfrintrz.s $vr0, $vr0
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %v0 = load <4 x float>, ptr %a0
+ %r = call <4 x float> @llvm.trunc.v4f32(<4 x float> %v0)
+ store <4 x float> %r, ptr %res
+ ret void
+}
+
+;; trunc
+define void @trunc_v2f64(ptr %res, ptr %a0) nounwind {
+; CHECK-LABEL: trunc_v2f64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vfrintrz.d $vr0, $vr0
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %v0 = load <2 x double>, ptr %a0
+ %r = call <2 x double> @llvm.trunc.v2f64(<2 x double> %v0)
+ store <2 x double> %r, ptr %res
+ ret void
+}
+
+;; roundevenf
+define void @roundeven_v4f32(ptr %res, ptr %a0) nounwind {
+; CHECK-LABEL: roundeven_v4f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vfrintrne.s $vr0, $vr0
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %v0 = load <4 x float>, ptr %a0
+ %r = call <4 x float> @llvm.roundeven.v4f32(<4 x float> %v0)
+ store <4 x float> %r, ptr %res
+ ret void
+}
+
+;; roundeven
+define void @roundeven_v2f64(ptr %res, ptr %a0) nounwind {
+; CHECK-LABEL: roundeven_v2f64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vfrintrne.d $vr0, $vr0
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %v0 = load <2 x double>, ptr %a0
+ %r = call <2 x double> @llvm.roundeven.v2f64(<2 x double> %v0)
+ store <2 x double> %r, ptr %res
+ ret void
+}
+
+declare <4 x float> @llvm.ceil.v4f32(<4 x float>)
+declare <2 x double> @llvm.ceil.v2f64(<2 x double>)
+declare <4 x float> @llvm.floor.v4f32(<4 x float>)
+declare <2 x double> @llvm.floor.v2f64(<2 x double>)
+declare <4 x float> @llvm.trunc.v4f32(<4 x float>)
+declare <2 x double> @llvm.trunc.v2f64(<2 x double>)
+declare <4 x float> @llvm.roundeven.v4f32(<4 x float>)
+declare <2 x double> @llvm.roundeven.v2f64(<2 x double>)
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/avg.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/avg.ll
new file mode 100644
index 0000000..334af22
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/avg.ll
@@ -0,0 +1,321 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA32
+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA64
+
+define void @vavg_b(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: vavg_b:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vld $vr1, $a2, 0
+; CHECK-NEXT: vavg.b $vr0, $vr0, $vr1
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <16 x i8>, ptr %a
+ %vb = load <16 x i8>, ptr %b
+ %add = add <16 x i8> %va, %vb
+ %shr = ashr <16 x i8> %add, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+ store <16 x i8> %shr, ptr %res
+ ret void
+}
+
+define void @vavg_h(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: vavg_h:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vld $vr1, $a2, 0
+; CHECK-NEXT: vavg.h $vr0, $vr0, $vr1
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <8 x i16>, ptr %a
+ %vb = load <8 x i16>, ptr %b
+ %add = add <8 x i16> %va, %vb
+ %shr = ashr <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ store <8 x i16> %shr, ptr %res
+ ret void
+}
+
+define void @vavg_w(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: vavg_w:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vld $vr1, $a2, 0
+; CHECK-NEXT: vavg.w $vr0, $vr0, $vr1
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <4 x i32>, ptr %a
+ %vb = load <4 x i32>, ptr %b
+ %add = add <4 x i32> %va, %vb
+ %shr = ashr <4 x i32> %add, <i32 1, i32 1, i32 1, i32 1>
+ store <4 x i32> %shr, ptr %res
+ ret void
+}
+
+define void @vavg_d(ptr %res, ptr %a, ptr %b) nounwind {
+; LA32-LABEL: vavg_d:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: vld $vr0, $a1, 0
+; LA32-NEXT: vld $vr1, $a2, 0
+; LA32-NEXT: vadd.d $vr0, $vr0, $vr1
+; LA32-NEXT: vsrai.d $vr0, $vr0, 1
+; LA32-NEXT: vst $vr0, $a0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: vavg_d:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: vld $vr0, $a1, 0
+; LA64-NEXT: vld $vr1, $a2, 0
+; LA64-NEXT: vavg.d $vr0, $vr0, $vr1
+; LA64-NEXT: vst $vr0, $a0, 0
+; LA64-NEXT: ret
+entry:
+ %va = load <2 x i64>, ptr %a
+ %vb = load <2 x i64>, ptr %b
+ %add = add <2 x i64> %va, %vb
+ %shr = ashr <2 x i64> %add, <i64 1, i64 1>
+ store <2 x i64> %shr, ptr %res
+ ret void
+}
+
+define void @vavg_bu(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: vavg_bu:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vld $vr1, $a2, 0
+; CHECK-NEXT: vavg.bu $vr0, $vr0, $vr1
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <16 x i8>, ptr %a
+ %vb = load <16 x i8>, ptr %b
+ %add = add <16 x i8> %va, %vb
+ %shr = lshr <16 x i8> %add, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+ store <16 x i8> %shr, ptr %res
+ ret void
+}
+
+define void @vavg_hu(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: vavg_hu:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vld $vr1, $a2, 0
+; CHECK-NEXT: vavg.hu $vr0, $vr0, $vr1
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <8 x i16>, ptr %a
+ %vb = load <8 x i16>, ptr %b
+ %add = add <8 x i16> %va, %vb
+ %shr = lshr <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ store <8 x i16> %shr, ptr %res
+ ret void
+}
+
+define void @vavg_wu(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: vavg_wu:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vld $vr1, $a2, 0
+; CHECK-NEXT: vavg.wu $vr0, $vr0, $vr1
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <4 x i32>, ptr %a
+ %vb = load <4 x i32>, ptr %b
+ %add = add <4 x i32> %va, %vb
+ %shr = lshr <4 x i32> %add, <i32 1, i32 1, i32 1, i32 1>
+ store <4 x i32> %shr, ptr %res
+ ret void
+}
+
+define void @vavg_du(ptr %res, ptr %a, ptr %b) nounwind {
+; LA32-LABEL: vavg_du:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: vld $vr0, $a1, 0
+; LA32-NEXT: vld $vr1, $a2, 0
+; LA32-NEXT: vadd.d $vr0, $vr0, $vr1
+; LA32-NEXT: vsrli.d $vr0, $vr0, 1
+; LA32-NEXT: vst $vr0, $a0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: vavg_du:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: vld $vr0, $a1, 0
+; LA64-NEXT: vld $vr1, $a2, 0
+; LA64-NEXT: vavg.du $vr0, $vr0, $vr1
+; LA64-NEXT: vst $vr0, $a0, 0
+; LA64-NEXT: ret
+entry:
+ %va = load <2 x i64>, ptr %a
+ %vb = load <2 x i64>, ptr %b
+ %add = add <2 x i64> %va, %vb
+ %shr = lshr <2 x i64> %add, <i64 1, i64 1>
+ store <2 x i64> %shr, ptr %res
+ ret void
+}
+
+define void @vavgr_b(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: vavgr_b:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vld $vr1, $a2, 0
+; CHECK-NEXT: vavgr.b $vr0, $vr0, $vr1
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <16 x i8>, ptr %a
+ %vb = load <16 x i8>, ptr %b
+ %add = add <16 x i8> %va, %vb
+ %add1 = add <16 x i8> %add, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+ %shr = ashr <16 x i8> %add1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+ store <16 x i8> %shr, ptr %res
+ ret void
+}
+
+define void @vavgr_h(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: vavgr_h:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vld $vr1, $a2, 0
+; CHECK-NEXT: vavgr.h $vr0, $vr0, $vr1
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <8 x i16>, ptr %a
+ %vb = load <8 x i16>, ptr %b
+ %add = add <8 x i16> %va, %vb
+ %add1 = add <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ %shr = ashr <8 x i16> %add1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ store <8 x i16> %shr, ptr %res
+ ret void
+}
+
+define void @vavgr_w(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: vavgr_w:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vld $vr1, $a2, 0
+; CHECK-NEXT: vavgr.w $vr0, $vr0, $vr1
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <4 x i32>, ptr %a
+ %vb = load <4 x i32>, ptr %b
+ %add = add <4 x i32> %va, %vb
+ %add1 = add <4 x i32> %add, <i32 1, i32 1, i32 1, i32 1>
+ %shr = ashr <4 x i32> %add1, <i32 1, i32 1, i32 1, i32 1>
+ store <4 x i32> %shr, ptr %res
+ ret void
+}
+
+define void @vavgr_d(ptr %res, ptr %a, ptr %b) nounwind {
+; LA32-LABEL: vavgr_d:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: vld $vr0, $a1, 0
+; LA32-NEXT: vld $vr1, $a2, 0
+; LA32-NEXT: vadd.d $vr0, $vr0, $vr1
+; LA32-NEXT: vaddi.du $vr0, $vr0, 1
+; LA32-NEXT: vsrai.d $vr0, $vr0, 1
+; LA32-NEXT: vst $vr0, $a0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: vavgr_d:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: vld $vr0, $a1, 0
+; LA64-NEXT: vld $vr1, $a2, 0
+; LA64-NEXT: vavgr.d $vr0, $vr0, $vr1
+; LA64-NEXT: vst $vr0, $a0, 0
+; LA64-NEXT: ret
+entry:
+ %va = load <2 x i64>, ptr %a
+ %vb = load <2 x i64>, ptr %b
+ %add = add <2 x i64> %va, %vb
+ %add1 = add <2 x i64> %add, <i64 1, i64 1>
+ %shr = ashr <2 x i64> %add1, <i64 1, i64 1>
+ store <2 x i64> %shr, ptr %res
+ ret void
+}
+
+define void @vavgr_bu(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: vavgr_bu:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vld $vr1, $a2, 0
+; CHECK-NEXT: vavgr.bu $vr0, $vr0, $vr1
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <16 x i8>, ptr %a
+ %vb = load <16 x i8>, ptr %b
+ %add = add <16 x i8> %va, %vb
+ %add1 = add <16 x i8> %add, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+ %shr = lshr <16 x i8> %add1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+ store <16 x i8> %shr, ptr %res
+ ret void
+}
+
+define void @vavgr_hu(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: vavgr_hu:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vld $vr1, $a2, 0
+; CHECK-NEXT: vavgr.hu $vr0, $vr0, $vr1
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <8 x i16>, ptr %a
+ %vb = load <8 x i16>, ptr %b
+ %add = add <8 x i16> %va, %vb
+ %add1 = add <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ %shr = lshr <8 x i16> %add1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ store <8 x i16> %shr, ptr %res
+ ret void
+}
+
+define void @vavgr_wu(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: vavgr_wu:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vld $vr1, $a2, 0
+; CHECK-NEXT: vavgr.wu $vr0, $vr0, $vr1
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <4 x i32>, ptr %a
+ %vb = load <4 x i32>, ptr %b
+ %add = add <4 x i32> %va, %vb
+ %add1 = add <4 x i32> %add, <i32 1, i32 1, i32 1, i32 1>
+ %shr = lshr <4 x i32> %add1, <i32 1, i32 1, i32 1, i32 1>
+ store <4 x i32> %shr, ptr %res
+ ret void
+}
+
+define void @vavgr_du(ptr %res, ptr %a, ptr %b) nounwind {
+; LA32-LABEL: vavgr_du:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: vld $vr0, $a1, 0
+; LA32-NEXT: vld $vr1, $a2, 0
+; LA32-NEXT: vadd.d $vr0, $vr0, $vr1
+; LA32-NEXT: vaddi.du $vr0, $vr0, 1
+; LA32-NEXT: vsrli.d $vr0, $vr0, 1
+; LA32-NEXT: vst $vr0, $a0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: vavgr_du:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: vld $vr0, $a1, 0
+; LA64-NEXT: vld $vr1, $a2, 0
+; LA64-NEXT: vavgr.du $vr0, $vr0, $vr1
+; LA64-NEXT: vst $vr0, $a0, 0
+; LA64-NEXT: ret
+entry:
+ %va = load <2 x i64>, ptr %a
+ %vb = load <2 x i64>, ptr %b
+ %add = add <2 x i64> %va, %vb
+ %add1 = add <2 x i64> %add, <i64 1, i64 1>
+ %shr = lshr <2 x i64> %add1, <i64 1, i64 1>
+ store <2 x i64> %shr, ptr %res
+ ret void
+}
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/avgfloor-ceil.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/avgfloor-ceil.ll
new file mode 100644
index 0000000..bb4df64
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/avgfloor-ceil.ll
@@ -0,0 +1,379 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s
+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s
+
+define void @vavg_b(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: vavg_b:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vld $vr1, $a2, 0
+; CHECK-NEXT: vand.v $vr2, $vr0, $vr1
+; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vsrai.b $vr0, $vr0, 1
+; CHECK-NEXT: vadd.b $vr0, $vr2, $vr0
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <16 x i8>, ptr %a
+ %vb = load <16 x i8>, ptr %b
+ %ea = sext <16 x i8> %va to <16 x i16>
+ %eb = sext <16 x i8> %vb to <16 x i16>
+ %add = add <16 x i16> %ea, %eb
+ %shr = lshr <16 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ %r = trunc <16 x i16> %shr to <16 x i8>
+ store <16 x i8> %r, ptr %res
+ ret void
+}
+
+define void @vavg_h(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: vavg_h:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vld $vr1, $a2, 0
+; CHECK-NEXT: vand.v $vr2, $vr0, $vr1
+; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vsrai.h $vr0, $vr0, 1
+; CHECK-NEXT: vadd.h $vr0, $vr2, $vr0
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <8 x i16>, ptr %a
+ %vb = load <8 x i16>, ptr %b
+ %ea = sext <8 x i16> %va to <8 x i32>
+ %eb = sext <8 x i16> %vb to <8 x i32>
+ %add = add <8 x i32> %ea, %eb
+ %shr = lshr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ %r = trunc <8 x i32> %shr to <8 x i16>
+ store <8 x i16> %r, ptr %res
+ ret void
+}
+
+define void @vavg_w(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: vavg_w:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vld $vr1, $a2, 0
+; CHECK-NEXT: vand.v $vr2, $vr0, $vr1
+; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vsrai.w $vr0, $vr0, 1
+; CHECK-NEXT: vadd.w $vr0, $vr2, $vr0
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <4 x i32>, ptr %a
+ %vb = load <4 x i32>, ptr %b
+ %ea = sext <4 x i32> %va to <4 x i64>
+ %eb = sext <4 x i32> %vb to <4 x i64>
+ %add = add <4 x i64> %ea, %eb
+ %shr = lshr <4 x i64> %add, <i64 1, i64 1, i64 1, i64 1>
+ %r = trunc <4 x i64> %shr to <4 x i32>
+ store <4 x i32> %r, ptr %res
+ ret void
+}
+
+define void @vavg_d(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: vavg_d:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vld $vr1, $a2, 0
+; CHECK-NEXT: vand.v $vr2, $vr0, $vr1
+; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vsrai.d $vr0, $vr0, 1
+; CHECK-NEXT: vadd.d $vr0, $vr2, $vr0
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <2 x i64>, ptr %a
+ %vb = load <2 x i64>, ptr %b
+ %ea = sext <2 x i64> %va to <2 x i128>
+ %eb = sext <2 x i64> %vb to <2 x i128>
+ %add = add <2 x i128> %ea, %eb
+ %shr = lshr <2 x i128> %add, <i128 1, i128 1>
+ %r = trunc <2 x i128> %shr to <2 x i64>
+ store <2 x i64> %r, ptr %res
+ ret void
+}
+
+define void @vavg_bu(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: vavg_bu:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vld $vr1, $a2, 0
+; CHECK-NEXT: vand.v $vr2, $vr0, $vr1
+; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vsrli.b $vr0, $vr0, 1
+; CHECK-NEXT: vadd.b $vr0, $vr2, $vr0
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <16 x i8>, ptr %a
+ %vb = load <16 x i8>, ptr %b
+ %ea = zext <16 x i8> %va to <16 x i16>
+ %eb = zext <16 x i8> %vb to <16 x i16>
+ %add = add <16 x i16> %ea, %eb
+ %shr = lshr <16 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ %r = trunc <16 x i16> %shr to <16 x i8>
+ store <16 x i8> %r, ptr %res
+ ret void
+}
+
+define void @vavg_hu(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: vavg_hu:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vld $vr1, $a2, 0
+; CHECK-NEXT: vand.v $vr2, $vr0, $vr1
+; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vsrli.h $vr0, $vr0, 1
+; CHECK-NEXT: vadd.h $vr0, $vr2, $vr0
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <8 x i16>, ptr %a
+ %vb = load <8 x i16>, ptr %b
+ %ea = zext <8 x i16> %va to <8 x i32>
+ %eb = zext <8 x i16> %vb to <8 x i32>
+ %add = add <8 x i32> %ea, %eb
+ %shr = lshr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ %r = trunc <8 x i32> %shr to <8 x i16>
+ store <8 x i16> %r, ptr %res
+ ret void
+}
+
+define void @vavg_wu(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: vavg_wu:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vld $vr1, $a2, 0
+; CHECK-NEXT: vand.v $vr2, $vr0, $vr1
+; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vsrli.w $vr0, $vr0, 1
+; CHECK-NEXT: vadd.w $vr0, $vr2, $vr0
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <4 x i32>, ptr %a
+ %vb = load <4 x i32>, ptr %b
+ %ea = zext <4 x i32> %va to <4 x i64>
+ %eb = zext <4 x i32> %vb to <4 x i64>
+ %add = add <4 x i64> %ea, %eb
+ %shr = lshr <4 x i64> %add, <i64 1, i64 1, i64 1, i64 1>
+ %r = trunc <4 x i64> %shr to <4 x i32>
+ store <4 x i32> %r, ptr %res
+ ret void
+}
+
+define void @vavg_du(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: vavg_du:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vld $vr1, $a2, 0
+; CHECK-NEXT: vand.v $vr2, $vr0, $vr1
+; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vsrli.d $vr0, $vr0, 1
+; CHECK-NEXT: vadd.d $vr0, $vr2, $vr0
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <2 x i64>, ptr %a
+ %vb = load <2 x i64>, ptr %b
+ %ea = zext <2 x i64> %va to <2 x i128>
+ %eb = zext <2 x i64> %vb to <2 x i128>
+ %add = add <2 x i128> %ea, %eb
+ %shr = lshr <2 x i128> %add, <i128 1, i128 1>
+ %r = trunc <2 x i128> %shr to <2 x i64>
+ store <2 x i64> %r, ptr %res
+ ret void
+}
+
+define void @vavgr_b(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: vavgr_b:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vld $vr1, $a2, 0
+; CHECK-NEXT: vor.v $vr2, $vr0, $vr1
+; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vsrai.b $vr0, $vr0, 1
+; CHECK-NEXT: vsub.b $vr0, $vr2, $vr0
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <16 x i8>, ptr %a
+ %vb = load <16 x i8>, ptr %b
+ %ea = sext <16 x i8> %va to <16 x i16>
+ %eb = sext <16 x i8> %vb to <16 x i16>
+ %add = add <16 x i16> %ea, %eb
+ %add1 = add <16 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ %shr = lshr <16 x i16> %add1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ %r = trunc <16 x i16> %shr to <16 x i8>
+ store <16 x i8> %r, ptr %res
+ ret void
+}
+
+define void @vavgr_h(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: vavgr_h:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vld $vr1, $a2, 0
+; CHECK-NEXT: vor.v $vr2, $vr0, $vr1
+; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vsrai.h $vr0, $vr0, 1
+; CHECK-NEXT: vsub.h $vr0, $vr2, $vr0
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <8 x i16>, ptr %a
+ %vb = load <8 x i16>, ptr %b
+ %ea = sext <8 x i16> %va to <8 x i32>
+ %eb = sext <8 x i16> %vb to <8 x i32>
+ %add = add <8 x i32> %ea, %eb
+ %add1 = add <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ %shr = lshr <8 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ %r = trunc <8 x i32> %shr to <8 x i16>
+ store <8 x i16> %r, ptr %res
+ ret void
+}
+
+define void @vavgr_w(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: vavgr_w:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vld $vr1, $a2, 0
+; CHECK-NEXT: vor.v $vr2, $vr0, $vr1
+; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vsrai.w $vr0, $vr0, 1
+; CHECK-NEXT: vsub.w $vr0, $vr2, $vr0
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <4 x i32>, ptr %a
+ %vb = load <4 x i32>, ptr %b
+ %ea = sext <4 x i32> %va to <4 x i64>
+ %eb = sext <4 x i32> %vb to <4 x i64>
+ %add = add <4 x i64> %ea, %eb
+ %add1 = add <4 x i64> %add, <i64 1, i64 1, i64 1, i64 1>
+ %shr = lshr <4 x i64> %add1, <i64 1, i64 1, i64 1, i64 1>
+ %r = trunc <4 x i64> %shr to <4 x i32>
+ store <4 x i32> %r, ptr %res
+ ret void
+}
+
+define void @vavgr_d(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: vavgr_d:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vld $vr1, $a2, 0
+; CHECK-NEXT: vor.v $vr2, $vr0, $vr1
+; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vsrai.d $vr0, $vr0, 1
+; CHECK-NEXT: vsub.d $vr0, $vr2, $vr0
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <2 x i64>, ptr %a
+ %vb = load <2 x i64>, ptr %b
+ %ea = sext <2 x i64> %va to <2 x i128>
+ %eb = sext <2 x i64> %vb to <2 x i128>
+ %add = add <2 x i128> %ea, %eb
+ %add1 = add <2 x i128> %add, <i128 1, i128 1>
+ %shr = lshr <2 x i128> %add1, <i128 1, i128 1>
+ %r = trunc <2 x i128> %shr to <2 x i64>
+ store <2 x i64> %r, ptr %res
+ ret void
+}
+
+define void @vavgr_bu(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: vavgr_bu:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vld $vr1, $a2, 0
+; CHECK-NEXT: vor.v $vr2, $vr0, $vr1
+; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vsrli.b $vr0, $vr0, 1
+; CHECK-NEXT: vsub.b $vr0, $vr2, $vr0
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <16 x i8>, ptr %a
+ %vb = load <16 x i8>, ptr %b
+ %ea = zext <16 x i8> %va to <16 x i16>
+ %eb = zext <16 x i8> %vb to <16 x i16>
+ %add = add <16 x i16> %ea, %eb
+ %add1 = add <16 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ %shr = lshr <16 x i16> %add1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ %r = trunc <16 x i16> %shr to <16 x i8>
+ store <16 x i8> %r, ptr %res
+ ret void
+}
+
+define void @vavgr_hu(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: vavgr_hu:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vld $vr1, $a2, 0
+; CHECK-NEXT: vor.v $vr2, $vr0, $vr1
+; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vsrli.h $vr0, $vr0, 1
+; CHECK-NEXT: vsub.h $vr0, $vr2, $vr0
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <8 x i16>, ptr %a
+ %vb = load <8 x i16>, ptr %b
+ %ea = zext <8 x i16> %va to <8 x i32>
+ %eb = zext <8 x i16> %vb to <8 x i32>
+ %add = add <8 x i32> %ea, %eb
+ %add1 = add <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ %shr = lshr <8 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ %r = trunc <8 x i32> %shr to <8 x i16>
+ store <8 x i16> %r, ptr %res
+ ret void
+}
+
+define void @vavgr_wu(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: vavgr_wu:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vld $vr1, $a2, 0
+; CHECK-NEXT: vor.v $vr2, $vr0, $vr1
+; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vsrli.w $vr0, $vr0, 1
+; CHECK-NEXT: vsub.w $vr0, $vr2, $vr0
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <4 x i32>, ptr %a
+ %vb = load <4 x i32>, ptr %b
+ %ea = zext <4 x i32> %va to <4 x i64>
+ %eb = zext <4 x i32> %vb to <4 x i64>
+ %add = add <4 x i64> %ea, %eb
+ %add1 = add <4 x i64> %add, <i64 1, i64 1, i64 1, i64 1>
+ %shr = lshr <4 x i64> %add1, <i64 1, i64 1, i64 1, i64 1>
+ %r = trunc <4 x i64> %shr to <4 x i32>
+ store <4 x i32> %r, ptr %res
+ ret void
+}
+
+define void @vavgr_du(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: vavgr_du:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vld $vr1, $a2, 0
+; CHECK-NEXT: vor.v $vr2, $vr0, $vr1
+; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vsrli.d $vr0, $vr0, 1
+; CHECK-NEXT: vsub.d $vr0, $vr2, $vr0
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <2 x i64>, ptr %a
+ %vb = load <2 x i64>, ptr %b
+ %ea = zext <2 x i64> %va to <2 x i128>
+ %eb = zext <2 x i64> %vb to <2 x i128>
+ %add = add <2 x i128> %ea, %eb
+ %add1 = add <2 x i128> %add, <i128 1, i128 1>
+ %shr = lshr <2 x i128> %add1, <i128 1, i128 1>
+ %r = trunc <2 x i128> %shr to <2 x i64>
+ store <2 x i64> %r, ptr %res
+ ret void
+}
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/flog2.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/flog2.ll
index e5e75ec..87cc7c6 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/flog2.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/flog2.ll
@@ -1,98 +1,17 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
-; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s --check-prefix=LA32
-; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s --check-prefix=LA64
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s
+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s
declare <4 x float> @llvm.log2.v4f32(<4 x float>)
declare <2 x double> @llvm.log2.v2f64(<2 x double>)
define void @flog2_v4f32(ptr %res, ptr %a) nounwind {
-; LA32-LABEL: flog2_v4f32:
-; LA32: # %bb.0: # %entry
-; LA32-NEXT: addi.w $sp, $sp, -48
-; LA32-NEXT: st.w $ra, $sp, 44 # 4-byte Folded Spill
-; LA32-NEXT: st.w $fp, $sp, 40 # 4-byte Folded Spill
-; LA32-NEXT: vld $vr0, $a1, 0
-; LA32-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill
-; LA32-NEXT: move $fp, $a0
-; LA32-NEXT: vreplvei.w $vr0, $vr0, 1
-; LA32-NEXT: # kill: def $f0 killed $f0 killed $vr0
-; LA32-NEXT: bl log2f
-; LA32-NEXT: # kill: def $f0 killed $f0 def $vr0
-; LA32-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill
-; LA32-NEXT: vld $vr0, $sp, 16 # 16-byte Folded Reload
-; LA32-NEXT: vreplvei.w $vr0, $vr0, 0
-; LA32-NEXT: # kill: def $f0 killed $f0 killed $vr0
-; LA32-NEXT: bl log2f
-; LA32-NEXT: # kill: def $f0 killed $f0 def $vr0
-; LA32-NEXT: vld $vr1, $sp, 0 # 16-byte Folded Reload
-; LA32-NEXT: vextrins.w $vr0, $vr1, 16
-; LA32-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill
-; LA32-NEXT: vld $vr0, $sp, 16 # 16-byte Folded Reload
-; LA32-NEXT: vreplvei.w $vr0, $vr0, 2
-; LA32-NEXT: # kill: def $f0 killed $f0 killed $vr0
-; LA32-NEXT: bl log2f
-; LA32-NEXT: # kill: def $f0 killed $f0 def $vr0
-; LA32-NEXT: vld $vr1, $sp, 0 # 16-byte Folded Reload
-; LA32-NEXT: vextrins.w $vr1, $vr0, 32
-; LA32-NEXT: vst $vr1, $sp, 0 # 16-byte Folded Spill
-; LA32-NEXT: vld $vr0, $sp, 16 # 16-byte Folded Reload
-; LA32-NEXT: vreplvei.w $vr0, $vr0, 3
-; LA32-NEXT: # kill: def $f0 killed $f0 killed $vr0
-; LA32-NEXT: bl log2f
-; LA32-NEXT: # kill: def $f0 killed $f0 def $vr0
-; LA32-NEXT: vld $vr1, $sp, 0 # 16-byte Folded Reload
-; LA32-NEXT: vextrins.w $vr1, $vr0, 48
-; LA32-NEXT: vst $vr1, $fp, 0
-; LA32-NEXT: ld.w $fp, $sp, 40 # 4-byte Folded Reload
-; LA32-NEXT: ld.w $ra, $sp, 44 # 4-byte Folded Reload
-; LA32-NEXT: addi.w $sp, $sp, 48
-; LA32-NEXT: ret
-;
-; LA64-LABEL: flog2_v4f32:
-; LA64: # %bb.0: # %entry
-; LA64-NEXT: addi.d $sp, $sp, -48
-; LA64-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill
-; LA64-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill
-; LA64-NEXT: vld $vr0, $a1, 0
-; LA64-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill
-; LA64-NEXT: move $fp, $a0
-; LA64-NEXT: vreplvei.w $vr0, $vr0, 1
-; LA64-NEXT: # kill: def $f0 killed $f0 killed $vr0
-; LA64-NEXT: pcaddu18i $ra, %call36(log2f)
-; LA64-NEXT: jirl $ra, $ra, 0
-; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0
-; LA64-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill
-; LA64-NEXT: vld $vr0, $sp, 16 # 16-byte Folded Reload
-; LA64-NEXT: vreplvei.w $vr0, $vr0, 0
-; LA64-NEXT: # kill: def $f0 killed $f0 killed $vr0
-; LA64-NEXT: pcaddu18i $ra, %call36(log2f)
-; LA64-NEXT: jirl $ra, $ra, 0
-; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0
-; LA64-NEXT: vld $vr1, $sp, 0 # 16-byte Folded Reload
-; LA64-NEXT: vextrins.w $vr0, $vr1, 16
-; LA64-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill
-; LA64-NEXT: vld $vr0, $sp, 16 # 16-byte Folded Reload
-; LA64-NEXT: vreplvei.w $vr0, $vr0, 2
-; LA64-NEXT: # kill: def $f0 killed $f0 killed $vr0
-; LA64-NEXT: pcaddu18i $ra, %call36(log2f)
-; LA64-NEXT: jirl $ra, $ra, 0
-; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0
-; LA64-NEXT: vld $vr1, $sp, 0 # 16-byte Folded Reload
-; LA64-NEXT: vextrins.w $vr1, $vr0, 32
-; LA64-NEXT: vst $vr1, $sp, 0 # 16-byte Folded Spill
-; LA64-NEXT: vld $vr0, $sp, 16 # 16-byte Folded Reload
-; LA64-NEXT: vreplvei.w $vr0, $vr0, 3
-; LA64-NEXT: # kill: def $f0 killed $f0 killed $vr0
-; LA64-NEXT: pcaddu18i $ra, %call36(log2f)
-; LA64-NEXT: jirl $ra, $ra, 0
-; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0
-; LA64-NEXT: vld $vr1, $sp, 0 # 16-byte Folded Reload
-; LA64-NEXT: vextrins.w $vr1, $vr0, 48
-; LA64-NEXT: vst $vr1, $fp, 0
-; LA64-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload
-; LA64-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload
-; LA64-NEXT: addi.d $sp, $sp, 48
-; LA64-NEXT: ret
+; CHECK-LABEL: flog2_v4f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vflogb.s $vr0, $vr0
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
entry:
%v = load <4 x float>, ptr %a
%r = call <4 x float> @llvm.log2.v4f32(<4 x float> %v)
@@ -101,59 +20,12 @@ entry:
}
define void @flog2_v2f64(ptr %res, ptr %a) nounwind {
-; LA32-LABEL: flog2_v2f64:
-; LA32: # %bb.0: # %entry
-; LA32-NEXT: addi.w $sp, $sp, -48
-; LA32-NEXT: st.w $ra, $sp, 44 # 4-byte Folded Spill
-; LA32-NEXT: st.w $fp, $sp, 40 # 4-byte Folded Spill
-; LA32-NEXT: vld $vr0, $a1, 0
-; LA32-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill
-; LA32-NEXT: move $fp, $a0
-; LA32-NEXT: vreplvei.d $vr0, $vr0, 1
-; LA32-NEXT: # kill: def $f0_64 killed $f0_64 killed $vr0
-; LA32-NEXT: bl log2
-; LA32-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0
-; LA32-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill
-; LA32-NEXT: vld $vr0, $sp, 0 # 16-byte Folded Reload
-; LA32-NEXT: vreplvei.d $vr0, $vr0, 0
-; LA32-NEXT: # kill: def $f0_64 killed $f0_64 killed $vr0
-; LA32-NEXT: bl log2
-; LA32-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0
-; LA32-NEXT: vld $vr1, $sp, 16 # 16-byte Folded Reload
-; LA32-NEXT: vextrins.d $vr0, $vr1, 16
-; LA32-NEXT: vst $vr0, $fp, 0
-; LA32-NEXT: ld.w $fp, $sp, 40 # 4-byte Folded Reload
-; LA32-NEXT: ld.w $ra, $sp, 44 # 4-byte Folded Reload
-; LA32-NEXT: addi.w $sp, $sp, 48
-; LA32-NEXT: ret
-;
-; LA64-LABEL: flog2_v2f64:
-; LA64: # %bb.0: # %entry
-; LA64-NEXT: addi.d $sp, $sp, -48
-; LA64-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill
-; LA64-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill
-; LA64-NEXT: vld $vr0, $a1, 0
-; LA64-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill
-; LA64-NEXT: move $fp, $a0
-; LA64-NEXT: vreplvei.d $vr0, $vr0, 1
-; LA64-NEXT: # kill: def $f0_64 killed $f0_64 killed $vr0
-; LA64-NEXT: pcaddu18i $ra, %call36(log2)
-; LA64-NEXT: jirl $ra, $ra, 0
-; LA64-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0
-; LA64-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill
-; LA64-NEXT: vld $vr0, $sp, 0 # 16-byte Folded Reload
-; LA64-NEXT: vreplvei.d $vr0, $vr0, 0
-; LA64-NEXT: # kill: def $f0_64 killed $f0_64 killed $vr0
-; LA64-NEXT: pcaddu18i $ra, %call36(log2)
-; LA64-NEXT: jirl $ra, $ra, 0
-; LA64-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0
-; LA64-NEXT: vld $vr1, $sp, 16 # 16-byte Folded Reload
-; LA64-NEXT: vextrins.d $vr0, $vr1, 16
-; LA64-NEXT: vst $vr0, $fp, 0
-; LA64-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload
-; LA64-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload
-; LA64-NEXT: addi.d $sp, $sp, 48
-; LA64-NEXT: ret
+; CHECK-LABEL: flog2_v2f64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vflogb.d $vr0, $vr0
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
entry:
%v = load <2 x double>, ptr %a
%r = call <2 x double> @llvm.log2.v2f64(<2 x double> %v)
diff --git a/llvm/test/CodeGen/LoongArch/sink-fold-addi.ll b/llvm/test/CodeGen/LoongArch/sink-fold-addi.ll
new file mode 100644
index 0000000..9a806a1
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/sink-fold-addi.ll
@@ -0,0 +1,758 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx --verify-machineinstrs < %s \
+; RUN: | FileCheck --check-prefix=LA32 %s
+; RUN: llc --mtriple=loongarch64 --mattr=+lasx --verify-machineinstrs < %s \
+; RUN: | FileCheck --check-prefix=LA64 %s
+
+%struct.S = type { i64, i64, i8 }
+%struct.F = type { float, double, float }
+%struct.V = type { <4 x i32>, <4 x i32>, <16 x i16> }
+
+define void @sink_fold_i64(i64 %k, i64 %n, ptr %a) nounwind {
+; LA32-LABEL: sink_fold_i64:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: addi.w $sp, $sp, -48
+; LA32-NEXT: st.w $ra, $sp, 44 # 4-byte Folded Spill
+; LA32-NEXT: st.w $fp, $sp, 40 # 4-byte Folded Spill
+; LA32-NEXT: st.w $s0, $sp, 36 # 4-byte Folded Spill
+; LA32-NEXT: st.w $s1, $sp, 32 # 4-byte Folded Spill
+; LA32-NEXT: st.w $s2, $sp, 28 # 4-byte Folded Spill
+; LA32-NEXT: st.w $s3, $sp, 24 # 4-byte Folded Spill
+; LA32-NEXT: st.w $s4, $sp, 20 # 4-byte Folded Spill
+; LA32-NEXT: st.w $s5, $sp, 16 # 4-byte Folded Spill
+; LA32-NEXT: st.w $s6, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT: move $s0, $a3
+; LA32-NEXT: move $s1, $a2
+; LA32-NEXT: slli.w $a1, $a0, 4
+; LA32-NEXT: alsl.w $a0, $a0, $a1, 3
+; LA32-NEXT: add.w $a0, $a4, $a0
+; LA32-NEXT: sltui $a1, $a3, 1
+; LA32-NEXT: slti $a2, $a3, 0
+; LA32-NEXT: masknez $a2, $a2, $a1
+; LA32-NEXT: sltui $a3, $s1, 1
+; LA32-NEXT: maskeqz $a1, $a3, $a1
+; LA32-NEXT: or $a1, $a1, $a2
+; LA32-NEXT: addi.w $s2, $a0, 8
+; LA32-NEXT: bnez $a1, .LBB0_3
+; LA32-NEXT: # %bb.1: # %for.body.preheader
+; LA32-NEXT: move $fp, $a4
+; LA32-NEXT: move $s4, $zero
+; LA32-NEXT: move $s5, $zero
+; LA32-NEXT: move $s3, $zero
+; LA32-NEXT: move $s6, $zero
+; LA32-NEXT: .p2align 4, , 16
+; LA32-NEXT: .LBB0_2: # %for.body
+; LA32-NEXT: # =>This Inner Loop Header: Depth=1
+; LA32-NEXT: move $a0, $fp
+; LA32-NEXT: bl f
+; LA32-NEXT: ld.w $a0, $s2, 4
+; LA32-NEXT: ld.w $a1, $s2, 0
+; LA32-NEXT: add.w $a0, $a0, $s6
+; LA32-NEXT: add.w $s3, $a1, $s3
+; LA32-NEXT: sltu $a1, $s3, $a1
+; LA32-NEXT: addi.w $s4, $s4, 1
+; LA32-NEXT: sltui $a2, $s4, 1
+; LA32-NEXT: add.w $s5, $s5, $a2
+; LA32-NEXT: xor $a2, $s4, $s1
+; LA32-NEXT: xor $a3, $s5, $s0
+; LA32-NEXT: or $a2, $a2, $a3
+; LA32-NEXT: add.w $s6, $a0, $a1
+; LA32-NEXT: bnez $a2, .LBB0_2
+; LA32-NEXT: b .LBB0_4
+; LA32-NEXT: .LBB0_3:
+; LA32-NEXT: move $s3, $zero
+; LA32-NEXT: move $s6, $zero
+; LA32-NEXT: .LBB0_4: # %for.cond.cleanup
+; LA32-NEXT: st.w $s3, $s2, 0
+; LA32-NEXT: st.w $s6, $s2, 4
+; LA32-NEXT: ld.w $s6, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $s5, $sp, 16 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $s4, $sp, 20 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $s3, $sp, 24 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $s2, $sp, 28 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $s1, $sp, 32 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $s0, $sp, 36 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $fp, $sp, 40 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $ra, $sp, 44 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 48
+; LA32-NEXT: ret
+;
+; LA64-LABEL: sink_fold_i64:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: addi.d $sp, $sp, -48
+; LA64-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill
+; LA64-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill
+; LA64-NEXT: st.d $s0, $sp, 24 # 8-byte Folded Spill
+; LA64-NEXT: st.d $s1, $sp, 16 # 8-byte Folded Spill
+; LA64-NEXT: st.d $s2, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT: move $s0, $a1
+; LA64-NEXT: slli.d $a1, $a0, 4
+; LA64-NEXT: alsl.d $a0, $a0, $a1, 3
+; LA64-NEXT: add.d $a0, $a2, $a0
+; LA64-NEXT: addi.d $s1, $a0, 8
+; LA64-NEXT: blez $s0, .LBB0_3
+; LA64-NEXT: # %bb.1: # %for.body.preheader
+; LA64-NEXT: move $fp, $a2
+; LA64-NEXT: move $s2, $zero
+; LA64-NEXT: .p2align 4, , 16
+; LA64-NEXT: .LBB0_2: # %for.body
+; LA64-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: move $a0, $fp
+; LA64-NEXT: pcaddu18i $ra, %call36(f)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: ld.d $a0, $s1, 0
+; LA64-NEXT: addi.d $s0, $s0, -1
+; LA64-NEXT: add.d $s2, $a0, $s2
+; LA64-NEXT: bnez $s0, .LBB0_2
+; LA64-NEXT: b .LBB0_4
+; LA64-NEXT: .LBB0_3:
+; LA64-NEXT: move $s2, $zero
+; LA64-NEXT: .LBB0_4: # %for.cond.cleanup
+; LA64-NEXT: st.d $s2, $s1, 0
+; LA64-NEXT: ld.d $s2, $sp, 8 # 8-byte Folded Reload
+; LA64-NEXT: ld.d $s1, $sp, 16 # 8-byte Folded Reload
+; LA64-NEXT: ld.d $s0, $sp, 24 # 8-byte Folded Reload
+; LA64-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload
+; LA64-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 48
+; LA64-NEXT: ret
+entry:
+ %y = getelementptr inbounds %struct.S, ptr %a, i64 %k, i32 1
+ %cmp4 = icmp sgt i64 %n, 0
+ br i1 %cmp4, label %for.body, label %for.cond.cleanup
+
+for.body: ; preds = %entry, %for.body
+ %i.06 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+ %s.05 = phi i64 [ 0, %entry ], [ %add, %for.body ]
+ call void @f(ptr %a)
+ %0 = load i64, ptr %y
+ %add = add nsw i64 %0, %s.05
+ %inc = add nuw nsw i64 %i.06, 1
+ %exitcond.not = icmp eq i64 %inc, %n
+ br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup: ; preds = %for.body, %entry
+ %s.0.lcssa = phi i64 [ 0, %entry ], [ %add, %for.body ]
+ store i64 %s.0.lcssa, ptr %y
+ ret void
+}
+
+define void @sink_fold_f32(i64 %k, i64 %n, ptr %a) nounwind {
+; LA32-LABEL: sink_fold_f32:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: addi.w $sp, $sp, -48
+; LA32-NEXT: st.w $ra, $sp, 44 # 4-byte Folded Spill
+; LA32-NEXT: st.w $fp, $sp, 40 # 4-byte Folded Spill
+; LA32-NEXT: st.w $s0, $sp, 36 # 4-byte Folded Spill
+; LA32-NEXT: st.w $s1, $sp, 32 # 4-byte Folded Spill
+; LA32-NEXT: st.w $s2, $sp, 28 # 4-byte Folded Spill
+; LA32-NEXT: st.w $s3, $sp, 24 # 4-byte Folded Spill
+; LA32-NEXT: st.w $s4, $sp, 20 # 4-byte Folded Spill
+; LA32-NEXT: fst.d $fs0, $sp, 8 # 8-byte Folded Spill
+; LA32-NEXT: move $s0, $a3
+; LA32-NEXT: move $s1, $a2
+; LA32-NEXT: slli.w $a1, $a0, 4
+; LA32-NEXT: alsl.w $a0, $a0, $a1, 3
+; LA32-NEXT: add.w $a0, $a4, $a0
+; LA32-NEXT: sltui $a1, $a3, 1
+; LA32-NEXT: slti $a2, $a3, 0
+; LA32-NEXT: masknez $a2, $a2, $a1
+; LA32-NEXT: sltui $a3, $s1, 1
+; LA32-NEXT: maskeqz $a1, $a3, $a1
+; LA32-NEXT: or $a1, $a1, $a2
+; LA32-NEXT: addi.w $s2, $a0, 16
+; LA32-NEXT: bnez $a1, .LBB1_3
+; LA32-NEXT: # %bb.1: # %for.body.preheader
+; LA32-NEXT: move $fp, $a4
+; LA32-NEXT: move $s3, $zero
+; LA32-NEXT: move $s4, $zero
+; LA32-NEXT: movgr2fr.w $fs0, $zero
+; LA32-NEXT: .p2align 4, , 16
+; LA32-NEXT: .LBB1_2: # %for.body
+; LA32-NEXT: # =>This Inner Loop Header: Depth=1
+; LA32-NEXT: move $a0, $fp
+; LA32-NEXT: bl f
+; LA32-NEXT: fld.s $fa0, $s2, 0
+; LA32-NEXT: addi.w $s3, $s3, 1
+; LA32-NEXT: sltui $a0, $s3, 1
+; LA32-NEXT: add.w $s4, $s4, $a0
+; LA32-NEXT: xor $a0, $s3, $s1
+; LA32-NEXT: xor $a1, $s4, $s0
+; LA32-NEXT: or $a0, $a0, $a1
+; LA32-NEXT: fadd.s $fs0, $fa0, $fs0
+; LA32-NEXT: bnez $a0, .LBB1_2
+; LA32-NEXT: b .LBB1_4
+; LA32-NEXT: .LBB1_3:
+; LA32-NEXT: movgr2fr.w $fs0, $zero
+; LA32-NEXT: .LBB1_4: # %for.cond.cleanup
+; LA32-NEXT: fst.s $fs0, $s2, 0
+; LA32-NEXT: fld.d $fs0, $sp, 8 # 8-byte Folded Reload
+; LA32-NEXT: ld.w $s4, $sp, 20 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $s3, $sp, 24 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $s2, $sp, 28 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $s1, $sp, 32 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $s0, $sp, 36 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $fp, $sp, 40 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $ra, $sp, 44 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 48
+; LA32-NEXT: ret
+;
+; LA64-LABEL: sink_fold_f32:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: addi.d $sp, $sp, -48
+; LA64-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill
+; LA64-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill
+; LA64-NEXT: st.d $s0, $sp, 24 # 8-byte Folded Spill
+; LA64-NEXT: st.d $s1, $sp, 16 # 8-byte Folded Spill
+; LA64-NEXT: fst.d $fs0, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT: move $s0, $a1
+; LA64-NEXT: slli.d $a1, $a0, 4
+; LA64-NEXT: alsl.d $a0, $a0, $a1, 3
+; LA64-NEXT: add.d $a0, $a2, $a0
+; LA64-NEXT: addi.d $s1, $a0, 16
+; LA64-NEXT: blez $s0, .LBB1_3
+; LA64-NEXT: # %bb.1: # %for.body.preheader
+; LA64-NEXT: move $fp, $a2
+; LA64-NEXT: movgr2fr.w $fs0, $zero
+; LA64-NEXT: .p2align 4, , 16
+; LA64-NEXT: .LBB1_2: # %for.body
+; LA64-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: move $a0, $fp
+; LA64-NEXT: pcaddu18i $ra, %call36(f)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: fld.s $fa0, $s1, 0
+; LA64-NEXT: addi.d $s0, $s0, -1
+; LA64-NEXT: fadd.s $fs0, $fa0, $fs0
+; LA64-NEXT: bnez $s0, .LBB1_2
+; LA64-NEXT: b .LBB1_4
+; LA64-NEXT: .LBB1_3:
+; LA64-NEXT: movgr2fr.w $fs0, $zero
+; LA64-NEXT: .LBB1_4: # %for.cond.cleanup
+; LA64-NEXT: fst.s $fs0, $s1, 0
+; LA64-NEXT: fld.d $fs0, $sp, 8 # 8-byte Folded Reload
+; LA64-NEXT: ld.d $s1, $sp, 16 # 8-byte Folded Reload
+; LA64-NEXT: ld.d $s0, $sp, 24 # 8-byte Folded Reload
+; LA64-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload
+; LA64-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 48
+; LA64-NEXT: ret
+entry:
+ %y = getelementptr inbounds %struct.F, ptr %a, i64 %k, i32 2
+ %cmp4 = icmp sgt i64 %n, 0
+ br i1 %cmp4, label %for.body, label %for.cond.cleanup
+
+for.body: ; preds = %entry, %for.body
+ %i.06 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+ %s.05 = phi float [ 0.0, %entry ], [ %add, %for.body ]
+ call void @f(ptr %a)
+ %0 = load float, ptr %y
+ %add = fadd float %0, %s.05
+ %inc = add nuw nsw i64 %i.06, 1
+ %exitcond.not = icmp eq i64 %inc, %n
+ br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup: ; preds = %for.body, %entry
+ %s.0.lcssa = phi float [ 0.0, %entry ], [ %add, %for.body ]
+ store float %s.0.lcssa, ptr %y
+ ret void
+}
+
+define void @sink_fold_v4i32(i64 %k, i64 %n, ptr %a) nounwind {
+; LA32-LABEL: sink_fold_v4i32:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: addi.w $sp, $sp, -48
+; LA32-NEXT: st.w $ra, $sp, 44 # 4-byte Folded Spill
+; LA32-NEXT: st.w $fp, $sp, 40 # 4-byte Folded Spill
+; LA32-NEXT: st.w $s0, $sp, 36 # 4-byte Folded Spill
+; LA32-NEXT: st.w $s1, $sp, 32 # 4-byte Folded Spill
+; LA32-NEXT: st.w $s2, $sp, 28 # 4-byte Folded Spill
+; LA32-NEXT: st.w $s3, $sp, 24 # 4-byte Folded Spill
+; LA32-NEXT: st.w $s4, $sp, 20 # 4-byte Folded Spill
+; LA32-NEXT: move $s0, $a3
+; LA32-NEXT: move $s1, $a2
+; LA32-NEXT: slli.w $a0, $a0, 6
+; LA32-NEXT: add.w $a0, $a4, $a0
+; LA32-NEXT: sltui $a1, $a3, 1
+; LA32-NEXT: slti $a2, $a3, 0
+; LA32-NEXT: masknez $a2, $a2, $a1
+; LA32-NEXT: sltui $a3, $s1, 1
+; LA32-NEXT: maskeqz $a1, $a3, $a1
+; LA32-NEXT: or $a1, $a1, $a2
+; LA32-NEXT: addi.w $s2, $a0, 16
+; LA32-NEXT: bnez $a1, .LBB2_3
+; LA32-NEXT: # %bb.1: # %for.body.preheader
+; LA32-NEXT: move $fp, $a4
+; LA32-NEXT: move $s3, $zero
+; LA32-NEXT: move $s4, $zero
+; LA32-NEXT: vrepli.b $vr0, 0
+; LA32-NEXT: .p2align 4, , 16
+; LA32-NEXT: .LBB2_2: # %for.body
+; LA32-NEXT: # =>This Inner Loop Header: Depth=1
+; LA32-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill
+; LA32-NEXT: move $a0, $fp
+; LA32-NEXT: bl f
+; LA32-NEXT: vld $vr0, $s2, 0
+; LA32-NEXT: addi.w $s3, $s3, 1
+; LA32-NEXT: sltui $a0, $s3, 1
+; LA32-NEXT: add.w $s4, $s4, $a0
+; LA32-NEXT: xor $a0, $s3, $s1
+; LA32-NEXT: xor $a1, $s4, $s0
+; LA32-NEXT: or $a0, $a0, $a1
+; LA32-NEXT: vld $vr1, $sp, 0 # 16-byte Folded Reload
+; LA32-NEXT: vadd.w $vr1, $vr0, $vr1
+; LA32-NEXT: vst $vr1, $sp, 0 # 16-byte Folded Spill
+; LA32-NEXT: vld $vr0, $sp, 0 # 16-byte Folded Reload
+; LA32-NEXT: bnez $a0, .LBB2_2
+; LA32-NEXT: b .LBB2_4
+; LA32-NEXT: .LBB2_3:
+; LA32-NEXT: vrepli.b $vr0, 0
+; LA32-NEXT: .LBB2_4: # %for.cond.cleanup
+; LA32-NEXT: vst $vr0, $s2, 0
+; LA32-NEXT: ld.w $s4, $sp, 20 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $s3, $sp, 24 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $s2, $sp, 28 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $s1, $sp, 32 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $s0, $sp, 36 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $fp, $sp, 40 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $ra, $sp, 44 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 48
+; LA32-NEXT: ret
+;
+; LA64-LABEL: sink_fold_v4i32:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: addi.d $sp, $sp, -48
+; LA64-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill
+; LA64-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill
+; LA64-NEXT: st.d $s0, $sp, 24 # 8-byte Folded Spill
+; LA64-NEXT: st.d $s1, $sp, 16 # 8-byte Folded Spill
+; LA64-NEXT: slli.d $a0, $a0, 6
+; LA64-NEXT: add.d $a0, $a2, $a0
+; LA64-NEXT: addi.d $s1, $a0, 16
+; LA64-NEXT: blez $a1, .LBB2_3
+; LA64-NEXT: # %bb.1: # %for.body.preheader
+; LA64-NEXT: move $fp, $a2
+; LA64-NEXT: move $s0, $a1
+; LA64-NEXT: vrepli.b $vr0, 0
+; LA64-NEXT: .p2align 4, , 16
+; LA64-NEXT: .LBB2_2: # %for.body
+; LA64-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill
+; LA64-NEXT: move $a0, $fp
+; LA64-NEXT: pcaddu18i $ra, %call36(f)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: vld $vr0, $s1, 0
+; LA64-NEXT: addi.d $s0, $s0, -1
+; LA64-NEXT: vld $vr1, $sp, 0 # 16-byte Folded Reload
+; LA64-NEXT: vadd.w $vr1, $vr0, $vr1
+; LA64-NEXT: vst $vr1, $sp, 0 # 16-byte Folded Spill
+; LA64-NEXT: vld $vr0, $sp, 0 # 16-byte Folded Reload
+; LA64-NEXT: bnez $s0, .LBB2_2
+; LA64-NEXT: b .LBB2_4
+; LA64-NEXT: .LBB2_3:
+; LA64-NEXT: vrepli.b $vr0, 0
+; LA64-NEXT: .LBB2_4: # %for.cond.cleanup
+; LA64-NEXT: vst $vr0, $s1, 0
+; LA64-NEXT: ld.d $s1, $sp, 16 # 8-byte Folded Reload
+; LA64-NEXT: ld.d $s0, $sp, 24 # 8-byte Folded Reload
+; LA64-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload
+; LA64-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 48
+; LA64-NEXT: ret
+entry:
+ %y = getelementptr inbounds %struct.V, ptr %a, i64 %k, i32 1
+ %cmp = icmp sgt i64 %n, 0
+ br i1 %cmp, label %for.body, label %for.cond.cleanup
+
+for.body: ; preds = %entry, %for.body
+ %i.0 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+ %sum.0 = phi <4 x i32> [ zeroinitializer, %entry ], [ %addv, %for.body ]
+ call void @f(ptr %a)
+ %v = load <4 x i32>, ptr %y
+ %addv = add <4 x i32> %v, %sum.0
+ %inc = add nuw nsw i64 %i.0, 1
+ %exitcond = icmp eq i64 %inc, %n
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup: ; preds = %for.body, %entry
+ %sum.lcssa = phi <4 x i32> [ zeroinitializer, %entry ], [ %addv, %for.body ]
+ store <4 x i32> %sum.lcssa, ptr %y
+ ret void
+}
+
+define void @sink_fold_v16i16(i64 %k, i64 %n, ptr %a) nounwind {
+; LA32-LABEL: sink_fold_v16i16:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: addi.w $sp, $sp, -80
+; LA32-NEXT: st.w $ra, $sp, 76 # 4-byte Folded Spill
+; LA32-NEXT: st.w $fp, $sp, 72 # 4-byte Folded Spill
+; LA32-NEXT: st.w $s0, $sp, 68 # 4-byte Folded Spill
+; LA32-NEXT: st.w $s1, $sp, 64 # 4-byte Folded Spill
+; LA32-NEXT: st.w $s2, $sp, 60 # 4-byte Folded Spill
+; LA32-NEXT: st.w $s3, $sp, 56 # 4-byte Folded Spill
+; LA32-NEXT: st.w $s4, $sp, 52 # 4-byte Folded Spill
+; LA32-NEXT: move $s0, $a3
+; LA32-NEXT: move $s1, $a2
+; LA32-NEXT: slli.w $a0, $a0, 6
+; LA32-NEXT: add.w $a0, $a4, $a0
+; LA32-NEXT: sltui $a1, $a3, 1
+; LA32-NEXT: slti $a2, $a3, 0
+; LA32-NEXT: masknez $a2, $a2, $a1
+; LA32-NEXT: sltui $a3, $s1, 1
+; LA32-NEXT: maskeqz $a1, $a3, $a1
+; LA32-NEXT: or $a1, $a1, $a2
+; LA32-NEXT: addi.w $s2, $a0, 32
+; LA32-NEXT: bnez $a1, .LBB3_3
+; LA32-NEXT: # %bb.1: # %for.body.preheader
+; LA32-NEXT: move $fp, $a4
+; LA32-NEXT: move $s3, $zero
+; LA32-NEXT: move $s4, $zero
+; LA32-NEXT: xvrepli.b $xr0, 0
+; LA32-NEXT: .p2align 4, , 16
+; LA32-NEXT: .LBB3_2: # %for.body
+; LA32-NEXT: # =>This Inner Loop Header: Depth=1
+; LA32-NEXT: xvst $xr0, $sp, 16 # 32-byte Folded Spill
+; LA32-NEXT: move $a0, $fp
+; LA32-NEXT: bl f
+; LA32-NEXT: xvld $xr0, $s2, 0
+; LA32-NEXT: addi.w $s3, $s3, 1
+; LA32-NEXT: sltui $a0, $s3, 1
+; LA32-NEXT: add.w $s4, $s4, $a0
+; LA32-NEXT: xor $a0, $s3, $s1
+; LA32-NEXT: xor $a1, $s4, $s0
+; LA32-NEXT: or $a0, $a0, $a1
+; LA32-NEXT: xvld $xr1, $sp, 16 # 32-byte Folded Reload
+; LA32-NEXT: xvadd.h $xr1, $xr0, $xr1
+; LA32-NEXT: xvst $xr1, $sp, 16 # 32-byte Folded Spill
+; LA32-NEXT: xvld $xr0, $sp, 16 # 32-byte Folded Reload
+; LA32-NEXT: bnez $a0, .LBB3_2
+; LA32-NEXT: b .LBB3_4
+; LA32-NEXT: .LBB3_3:
+; LA32-NEXT: xvrepli.b $xr0, 0
+; LA32-NEXT: .LBB3_4: # %for.cond.cleanup
+; LA32-NEXT: xvst $xr0, $s2, 0
+; LA32-NEXT: ld.w $s4, $sp, 52 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $s3, $sp, 56 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $s2, $sp, 60 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $s1, $sp, 64 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $s0, $sp, 68 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $fp, $sp, 72 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $ra, $sp, 76 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 80
+; LA32-NEXT: ret
+;
+; LA64-LABEL: sink_fold_v16i16:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: addi.d $sp, $sp, -80
+; LA64-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill
+; LA64-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill
+; LA64-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill
+; LA64-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill
+; LA64-NEXT: slli.d $a0, $a0, 6
+; LA64-NEXT: add.d $a0, $a2, $a0
+; LA64-NEXT: addi.d $s1, $a0, 32
+; LA64-NEXT: blez $a1, .LBB3_3
+; LA64-NEXT: # %bb.1: # %for.body.preheader
+; LA64-NEXT: move $fp, $a2
+; LA64-NEXT: move $s0, $a1
+; LA64-NEXT: xvrepli.b $xr0, 0
+; LA64-NEXT: .p2align 4, , 16
+; LA64-NEXT: .LBB3_2: # %for.body
+; LA64-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: xvst $xr0, $sp, 16 # 32-byte Folded Spill
+; LA64-NEXT: move $a0, $fp
+; LA64-NEXT: pcaddu18i $ra, %call36(f)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: xvld $xr0, $s1, 0
+; LA64-NEXT: addi.d $s0, $s0, -1
+; LA64-NEXT: xvld $xr1, $sp, 16 # 32-byte Folded Reload
+; LA64-NEXT: xvadd.h $xr1, $xr0, $xr1
+; LA64-NEXT: xvst $xr1, $sp, 16 # 32-byte Folded Spill
+; LA64-NEXT: xvld $xr0, $sp, 16 # 32-byte Folded Reload
+; LA64-NEXT: bnez $s0, .LBB3_2
+; LA64-NEXT: b .LBB3_4
+; LA64-NEXT: .LBB3_3:
+; LA64-NEXT: xvrepli.b $xr0, 0
+; LA64-NEXT: .LBB3_4: # %for.cond.cleanup
+; LA64-NEXT: xvst $xr0, $s1, 0
+; LA64-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload
+; LA64-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload
+; LA64-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload
+; LA64-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 80
+; LA64-NEXT: ret
+entry:
+ %y = getelementptr inbounds %struct.V, ptr %a, i64 %k, i32 2
+ %cmp = icmp sgt i64 %n, 0
+ br i1 %cmp, label %for.body, label %for.cond.cleanup
+
+for.body: ; preds = %entry, %for.body
+ %i.0 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+ %sum.0 = phi <16 x i16> [ zeroinitializer, %entry ], [ %addv, %for.body ]
+ call void @f(ptr %a)
+ %v = load <16 x i16>, ptr %y
+ %addv = add <16 x i16> %v, %sum.0
+ %inc = add nuw nsw i64 %i.0, 1
+ %exitcond = icmp eq i64 %inc, %n
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup: ; preds = %for.body, %entry
+ %sum.lcssa = phi <16 x i16> [ zeroinitializer, %entry ], [ %addv, %for.body ]
+ store <16 x i16> %sum.lcssa, ptr %y
+ ret void
+}
+
+define void @sink_fold_extracti8(i64 %k, i64 %n, ptr %a) nounwind {
+; LA32-LABEL: sink_fold_extracti8:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: addi.w $sp, $sp, -48
+; LA32-NEXT: st.w $ra, $sp, 44 # 4-byte Folded Spill
+; LA32-NEXT: st.w $fp, $sp, 40 # 4-byte Folded Spill
+; LA32-NEXT: st.w $s0, $sp, 36 # 4-byte Folded Spill
+; LA32-NEXT: st.w $s1, $sp, 32 # 4-byte Folded Spill
+; LA32-NEXT: st.w $s2, $sp, 28 # 4-byte Folded Spill
+; LA32-NEXT: st.w $s3, $sp, 24 # 4-byte Folded Spill
+; LA32-NEXT: st.w $s4, $sp, 20 # 4-byte Folded Spill
+; LA32-NEXT: move $s0, $a3
+; LA32-NEXT: move $s1, $a2
+; LA32-NEXT: slli.w $a1, $a0, 4
+; LA32-NEXT: alsl.w $a0, $a0, $a1, 3
+; LA32-NEXT: add.w $a0, $a4, $a0
+; LA32-NEXT: sltui $a1, $a3, 1
+; LA32-NEXT: slti $a2, $a3, 0
+; LA32-NEXT: masknez $a2, $a2, $a1
+; LA32-NEXT: sltui $a3, $s1, 1
+; LA32-NEXT: maskeqz $a1, $a3, $a1
+; LA32-NEXT: or $a1, $a1, $a2
+; LA32-NEXT: addi.w $s2, $a0, 16
+; LA32-NEXT: bnez $a1, .LBB4_3
+; LA32-NEXT: # %bb.1: # %for.body.preheader
+; LA32-NEXT: move $fp, $a4
+; LA32-NEXT: move $s3, $zero
+; LA32-NEXT: move $s4, $zero
+; LA32-NEXT: vrepli.b $vr0, 0
+; LA32-NEXT: .p2align 4, , 16
+; LA32-NEXT: .LBB4_2: # %for.body
+; LA32-NEXT: # =>This Inner Loop Header: Depth=1
+; LA32-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill
+; LA32-NEXT: move $a0, $fp
+; LA32-NEXT: bl f
+; LA32-NEXT: vldrepl.b $vr0, $s2, 0
+; LA32-NEXT: addi.w $s3, $s3, 1
+; LA32-NEXT: sltui $a0, $s3, 1
+; LA32-NEXT: add.w $s4, $s4, $a0
+; LA32-NEXT: xor $a0, $s3, $s1
+; LA32-NEXT: xor $a1, $s4, $s0
+; LA32-NEXT: or $a0, $a0, $a1
+; LA32-NEXT: vld $vr1, $sp, 0 # 16-byte Folded Reload
+; LA32-NEXT: vadd.b $vr1, $vr0, $vr1
+; LA32-NEXT: vst $vr1, $sp, 0 # 16-byte Folded Spill
+; LA32-NEXT: vld $vr0, $sp, 0 # 16-byte Folded Reload
+; LA32-NEXT: bnez $a0, .LBB4_2
+; LA32-NEXT: b .LBB4_4
+; LA32-NEXT: .LBB4_3:
+; LA32-NEXT: vrepli.b $vr0, 0
+; LA32-NEXT: .LBB4_4: # %for.cond.cleanup
+; LA32-NEXT: vstelm.b $vr0, $s2, 0, 1
+; LA32-NEXT: ld.w $s4, $sp, 20 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $s3, $sp, 24 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $s2, $sp, 28 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $s1, $sp, 32 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $s0, $sp, 36 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $fp, $sp, 40 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $ra, $sp, 44 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 48
+; LA32-NEXT: ret
+;
+; LA64-LABEL: sink_fold_extracti8:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: addi.d $sp, $sp, -48
+; LA64-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill
+; LA64-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill
+; LA64-NEXT: st.d $s0, $sp, 24 # 8-byte Folded Spill
+; LA64-NEXT: st.d $s1, $sp, 16 # 8-byte Folded Spill
+; LA64-NEXT: move $s0, $a1
+; LA64-NEXT: slli.d $a1, $a0, 4
+; LA64-NEXT: alsl.d $a0, $a0, $a1, 3
+; LA64-NEXT: add.d $a0, $a2, $a0
+; LA64-NEXT: addi.d $s1, $a0, 16
+; LA64-NEXT: blez $s0, .LBB4_3
+; LA64-NEXT: # %bb.1: # %for.body.preheader
+; LA64-NEXT: move $fp, $a2
+; LA64-NEXT: vrepli.b $vr0, 0
+; LA64-NEXT: .p2align 4, , 16
+; LA64-NEXT: .LBB4_2: # %for.body
+; LA64-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill
+; LA64-NEXT: move $a0, $fp
+; LA64-NEXT: pcaddu18i $ra, %call36(f)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: vldrepl.b $vr0, $s1, 0
+; LA64-NEXT: addi.d $s0, $s0, -1
+; LA64-NEXT: vld $vr1, $sp, 0 # 16-byte Folded Reload
+; LA64-NEXT: vadd.b $vr1, $vr0, $vr1
+; LA64-NEXT: vst $vr1, $sp, 0 # 16-byte Folded Spill
+; LA64-NEXT: vld $vr0, $sp, 0 # 16-byte Folded Reload
+; LA64-NEXT: bnez $s0, .LBB4_2
+; LA64-NEXT: b .LBB4_4
+; LA64-NEXT: .LBB4_3:
+; LA64-NEXT: vrepli.b $vr0, 0
+; LA64-NEXT: .LBB4_4: # %for.cond.cleanup
+; LA64-NEXT: vstelm.b $vr0, $s1, 0, 1
+; LA64-NEXT: ld.d $s1, $sp, 16 # 8-byte Folded Reload
+; LA64-NEXT: ld.d $s0, $sp, 24 # 8-byte Folded Reload
+; LA64-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload
+; LA64-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 48
+; LA64-NEXT: ret
+entry:
+ %y = getelementptr inbounds %struct.S, ptr %a, i64 %k, i32 2
+ %cmp = icmp sgt i64 %n, 0
+ br i1 %cmp, label %for.body, label %for.cond.cleanup
+
+for.body: ; preds = %entry, %for.body
+ %i.0 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+ %sum.0 = phi <16 x i8> [ zeroinitializer, %entry ], [ %addv, %for.body ]
+ call void @f(ptr %a)
+ %e = load i8, ptr %y
+ %ins0 = insertelement <16 x i8> poison, i8 %e, i32 0
+ %v = shufflevector <16 x i8> %ins0, <16 x i8> poison, <16 x i32> zeroinitializer
+ %addv = add <16 x i8> %v, %sum.0
+ %inc = add nuw nsw i64 %i.0, 1
+ %exitcond = icmp eq i64 %inc, %n
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup: ; preds = %for.body, %entry
+ %sum.lcssa = phi <16 x i8> [ zeroinitializer, %entry ], [ %addv, %for.body ]
+ %res = extractelement <16 x i8> %sum.lcssa, i32 1
+ store i8 %res, ptr %y
+ ret void
+}
+
+define void @sink_fold_extractf64(i64 %k, i64 %n, ptr %a) nounwind {
+; LA32-LABEL: sink_fold_extractf64:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: addi.w $sp, $sp, -80
+; LA32-NEXT: st.w $ra, $sp, 76 # 4-byte Folded Spill
+; LA32-NEXT: st.w $fp, $sp, 72 # 4-byte Folded Spill
+; LA32-NEXT: st.w $s0, $sp, 68 # 4-byte Folded Spill
+; LA32-NEXT: st.w $s1, $sp, 64 # 4-byte Folded Spill
+; LA32-NEXT: st.w $s2, $sp, 60 # 4-byte Folded Spill
+; LA32-NEXT: st.w $s3, $sp, 56 # 4-byte Folded Spill
+; LA32-NEXT: st.w $s4, $sp, 52 # 4-byte Folded Spill
+; LA32-NEXT: move $s0, $a3
+; LA32-NEXT: move $s1, $a2
+; LA32-NEXT: slli.w $a1, $a0, 4
+; LA32-NEXT: alsl.w $a0, $a0, $a1, 3
+; LA32-NEXT: add.w $a0, $a4, $a0
+; LA32-NEXT: sltui $a1, $a3, 1
+; LA32-NEXT: slti $a2, $a3, 0
+; LA32-NEXT: masknez $a2, $a2, $a1
+; LA32-NEXT: sltui $a3, $s1, 1
+; LA32-NEXT: maskeqz $a1, $a3, $a1
+; LA32-NEXT: or $a1, $a1, $a2
+; LA32-NEXT: addi.w $s2, $a0, 8
+; LA32-NEXT: bnez $a1, .LBB5_3
+; LA32-NEXT: # %bb.1: # %for.body.preheader
+; LA32-NEXT: move $fp, $a4
+; LA32-NEXT: move $s3, $zero
+; LA32-NEXT: move $s4, $zero
+; LA32-NEXT: xvrepli.b $xr0, 0
+; LA32-NEXT: .p2align 4, , 16
+; LA32-NEXT: .LBB5_2: # %for.body
+; LA32-NEXT: # =>This Inner Loop Header: Depth=1
+; LA32-NEXT: xvst $xr0, $sp, 16 # 32-byte Folded Spill
+; LA32-NEXT: move $a0, $fp
+; LA32-NEXT: bl f
+; LA32-NEXT: xvldrepl.d $xr0, $s2, 0
+; LA32-NEXT: addi.w $s3, $s3, 1
+; LA32-NEXT: sltui $a0, $s3, 1
+; LA32-NEXT: add.w $s4, $s4, $a0
+; LA32-NEXT: xor $a0, $s3, $s1
+; LA32-NEXT: xor $a1, $s4, $s0
+; LA32-NEXT: or $a0, $a0, $a1
+; LA32-NEXT: xvld $xr1, $sp, 16 # 32-byte Folded Reload
+; LA32-NEXT: xvfadd.d $xr1, $xr0, $xr1
+; LA32-NEXT: xvst $xr1, $sp, 16 # 32-byte Folded Spill
+; LA32-NEXT: xvld $xr0, $sp, 16 # 32-byte Folded Reload
+; LA32-NEXT: bnez $a0, .LBB5_2
+; LA32-NEXT: b .LBB5_4
+; LA32-NEXT: .LBB5_3:
+; LA32-NEXT: xvrepli.b $xr0, 0
+; LA32-NEXT: .LBB5_4: # %for.cond.cleanup
+; LA32-NEXT: xvstelm.d $xr0, $s2, 0, 1
+; LA32-NEXT: ld.w $s4, $sp, 52 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $s3, $sp, 56 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $s2, $sp, 60 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $s1, $sp, 64 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $s0, $sp, 68 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $fp, $sp, 72 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $ra, $sp, 76 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 80
+; LA32-NEXT: ret
+;
+; LA64-LABEL: sink_fold_extractf64:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: addi.d $sp, $sp, -80
+; LA64-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill
+; LA64-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill
+; LA64-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill
+; LA64-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill
+; LA64-NEXT: move $s0, $a1
+; LA64-NEXT: slli.d $a1, $a0, 4
+; LA64-NEXT: alsl.d $a0, $a0, $a1, 3
+; LA64-NEXT: add.d $a0, $a2, $a0
+; LA64-NEXT: addi.d $s1, $a0, 8
+; LA64-NEXT: blez $s0, .LBB5_3
+; LA64-NEXT: # %bb.1: # %for.body.preheader
+; LA64-NEXT: move $fp, $a2
+; LA64-NEXT: xvrepli.b $xr0, 0
+; LA64-NEXT: .p2align 4, , 16
+; LA64-NEXT: .LBB5_2: # %for.body
+; LA64-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: xvst $xr0, $sp, 16 # 32-byte Folded Spill
+; LA64-NEXT: move $a0, $fp
+; LA64-NEXT: pcaddu18i $ra, %call36(f)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: xvldrepl.d $xr0, $s1, 0
+; LA64-NEXT: addi.d $s0, $s0, -1
+; LA64-NEXT: xvld $xr1, $sp, 16 # 32-byte Folded Reload
+; LA64-NEXT: xvfadd.d $xr1, $xr0, $xr1
+; LA64-NEXT: xvst $xr1, $sp, 16 # 32-byte Folded Spill
+; LA64-NEXT: xvld $xr0, $sp, 16 # 32-byte Folded Reload
+; LA64-NEXT: bnez $s0, .LBB5_2
+; LA64-NEXT: b .LBB5_4
+; LA64-NEXT: .LBB5_3:
+; LA64-NEXT: xvrepli.b $xr0, 0
+; LA64-NEXT: .LBB5_4: # %for.cond.cleanup
+; LA64-NEXT: xvstelm.d $xr0, $s1, 0, 1
+; LA64-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload
+; LA64-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload
+; LA64-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload
+; LA64-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 80
+; LA64-NEXT: ret
+entry:
+ %y = getelementptr inbounds %struct.F, ptr %a, i64 %k, i32 1
+ %cmp = icmp sgt i64 %n, 0
+ br i1 %cmp, label %for.body, label %for.cond.cleanup
+
+for.body: ; preds = %entry, %for.body
+ %i.0 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+ %sum.0 = phi <4 x double> [ zeroinitializer, %entry ], [ %addv, %for.body ]
+ call void @f(ptr %a)
+ %e = load double, ptr %y
+ %ins0 = insertelement <4 x double> poison, double %e, i32 0
+ %v = shufflevector <4 x double> %ins0, <4 x double> poison, <4 x i32> zeroinitializer
+ %addv = fadd <4 x double> %v, %sum.0
+ %inc = add nuw nsw i64 %i.0, 1
+ %exitcond = icmp eq i64 %inc, %n
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup: ; preds = %for.body, %entry
+ %sum.lcssa = phi <4 x double> [ zeroinitializer, %entry ], [ %addv, %for.body ]
+ %res = extractelement <4 x double> %sum.lcssa, i32 1
+ store double %res, ptr %y
+ ret void
+}
+
+declare void @f(ptr)
diff --git a/llvm/test/CodeGen/MIR2Vec/Inputs/reference_x86_vocab_print.txt b/llvm/test/CodeGen/MIR2Vec/Inputs/reference_x86_vocab_print.txt
index d3c0da9..000c67ef 100644
--- a/llvm/test/CodeGen/MIR2Vec/Inputs/reference_x86_vocab_print.txt
+++ b/llvm/test/CodeGen/MIR2Vec/Inputs/reference_x86_vocab_print.txt
@@ -1439,11 +1439,8 @@ Key: PSUBWrm: [ 0.00 0.00 ]
Key: PSUBWrr: [ 0.00 0.00 ]
Key: PSWAPDrm: [ 0.00 0.00 ]
Key: PSWAPDrr: [ 0.00 0.00 ]
-Key: PT: [ 0.00 0.00 ]
Key: PTCMMIMFP: [ 0.00 0.00 ]
Key: PTCMMRLFP: [ 0.00 0.00 ]
-Key: PTCONJTCMMIMFP: [ 0.00 0.00 ]
-Key: PTCONJTFP: [ 0.00 0.00 ]
Key: PTCVTROWD: [ 0.00 0.00 ]
Key: PTCVTROWPS: [ 0.00 0.00 ]
Key: PTDPBF: [ 0.00 0.00 ]
@@ -1471,20 +1468,11 @@ Key: PTILEMOVROWrre: [ 0.00 0.00 ]
Key: PTILEMOVROWrreV: [ 0.00 0.00 ]
Key: PTILEMOVROWrri: [ 0.00 0.00 ]
Key: PTILEMOVROWrriV: [ 0.00 0.00 ]
-Key: PTILEPAIRLOAD: [ 0.00 0.00 ]
-Key: PTILEPAIRSTORE: [ 0.00 0.00 ]
Key: PTILESTORED: [ 0.00 0.00 ]
Key: PTILESTOREDV: [ 0.00 0.00 ]
Key: PTILEZERO: [ 0.00 0.00 ]
Key: PTILEZEROV: [ 0.00 0.00 ]
Key: PTMMULTF: [ 0.00 0.00 ]
-Key: PTTCMMIMFP: [ 0.00 0.00 ]
-Key: PTTCMMRLFP: [ 0.00 0.00 ]
-Key: PTTDPBF: [ 0.00 0.00 ]
-Key: PTTDPFP: [ 0.00 0.00 ]
-Key: PTTMMULTF: [ 0.00 0.00 ]
-Key: PTTRANSPOSED: [ 0.00 0.00 ]
-Key: PTTRANSPOSEDV: [ 0.00 0.00 ]
Key: PTWRITE: [ 0.00 0.00 ]
Key: PTWRITEm: [ 0.00 0.00 ]
Key: PTWRITEr: [ 0.00 0.00 ]
@@ -1717,8 +1705,6 @@ Key: TAILJMPm: [ 0.00 0.00 ]
Key: TAILJMPr: [ 0.00 0.00 ]
Key: TCMMIMFP: [ 0.00 0.00 ]
Key: TCMMRLFP: [ 0.00 0.00 ]
-Key: TCONJTCMMIMFP: [ 0.00 0.00 ]
-Key: TCONJTFP: [ 0.00 0.00 ]
Key: TCRETURN_HIPE: [ 0.00 0.00 ]
Key: TCRETURN_WIN: [ 0.00 0.00 ]
Key: TCRETURN_WINmi: [ 0.00 0.00 ]
@@ -1764,12 +1750,6 @@ Key: TPAUSE: [ 0.00 0.00 ]
Key: TRAP: [ 0.00 0.00 ]
Key: TST_F: [ 0.00 0.00 ]
Key: TST_Fp: [ 0.00 0.00 ]
-Key: TTCMMIMFP: [ 0.00 0.00 ]
-Key: TTCMMRLFP: [ 0.00 0.00 ]
-Key: TTDPBF: [ 0.00 0.00 ]
-Key: TTDPFP: [ 0.00 0.00 ]
-Key: TTMMULTF: [ 0.00 0.00 ]
-Key: TTRANSPOSED: [ 0.00 0.00 ]
Key: TZCNT: [ 0.00 0.00 ]
Key: TZMSK: [ 0.00 0.00 ]
Key: UBSAN_UD: [ 0.00 0.00 ]
@@ -7034,7 +7014,6 @@ Key: PhyReg_VR256: [ 0.00 0.00 ]
Key: PhyReg_VR512: [ 0.00 0.00 ]
Key: PhyReg_VR512_0_15: [ 0.00 0.00 ]
Key: PhyReg_TILE: [ 0.00 0.00 ]
-Key: PhyReg_TILEPAIR: [ 0.00 0.00 ]
Key: VirtReg_GR8: [ 0.00 0.00 ]
Key: VirtReg_GRH8: [ 0.00 0.00 ]
Key: VirtReg_GR8_NOREX2: [ 0.00 0.00 ]
@@ -7170,4 +7149,3 @@ Key: VirtReg_VR256: [ 0.00 0.00 ]
Key: VirtReg_VR512: [ 0.00 0.00 ]
Key: VirtReg_VR512_0_15: [ 0.00 0.00 ]
Key: VirtReg_TILE: [ 0.00 0.00 ]
-Key: VirtReg_TILEPAIR: [ 0.00 0.00 ]
diff --git a/llvm/test/CodeGen/MIR2Vec/Inputs/reference_x86_vocab_wo=0.5_print.txt b/llvm/test/CodeGen/MIR2Vec/Inputs/reference_x86_vocab_wo=0.5_print.txt
index c6e5508..bb72886 100644
--- a/llvm/test/CodeGen/MIR2Vec/Inputs/reference_x86_vocab_wo=0.5_print.txt
+++ b/llvm/test/CodeGen/MIR2Vec/Inputs/reference_x86_vocab_wo=0.5_print.txt
@@ -1439,11 +1439,8 @@ Key: PSUBWrm: [ 0.00 0.00 ]
Key: PSUBWrr: [ 0.00 0.00 ]
Key: PSWAPDrm: [ 0.00 0.00 ]
Key: PSWAPDrr: [ 0.00 0.00 ]
-Key: PT: [ 0.00 0.00 ]
Key: PTCMMIMFP: [ 0.00 0.00 ]
Key: PTCMMRLFP: [ 0.00 0.00 ]
-Key: PTCONJTCMMIMFP: [ 0.00 0.00 ]
-Key: PTCONJTFP: [ 0.00 0.00 ]
Key: PTCVTROWD: [ 0.00 0.00 ]
Key: PTCVTROWPS: [ 0.00 0.00 ]
Key: PTDPBF: [ 0.00 0.00 ]
@@ -1471,20 +1468,11 @@ Key: PTILEMOVROWrre: [ 0.00 0.00 ]
Key: PTILEMOVROWrreV: [ 0.00 0.00 ]
Key: PTILEMOVROWrri: [ 0.00 0.00 ]
Key: PTILEMOVROWrriV: [ 0.00 0.00 ]
-Key: PTILEPAIRLOAD: [ 0.00 0.00 ]
-Key: PTILEPAIRSTORE: [ 0.00 0.00 ]
Key: PTILESTORED: [ 0.00 0.00 ]
Key: PTILESTOREDV: [ 0.00 0.00 ]
Key: PTILEZERO: [ 0.00 0.00 ]
Key: PTILEZEROV: [ 0.00 0.00 ]
Key: PTMMULTF: [ 0.00 0.00 ]
-Key: PTTCMMIMFP: [ 0.00 0.00 ]
-Key: PTTCMMRLFP: [ 0.00 0.00 ]
-Key: PTTDPBF: [ 0.00 0.00 ]
-Key: PTTDPFP: [ 0.00 0.00 ]
-Key: PTTMMULTF: [ 0.00 0.00 ]
-Key: PTTRANSPOSED: [ 0.00 0.00 ]
-Key: PTTRANSPOSEDV: [ 0.00 0.00 ]
Key: PTWRITE: [ 0.00 0.00 ]
Key: PTWRITEm: [ 0.00 0.00 ]
Key: PTWRITEr: [ 0.00 0.00 ]
@@ -1717,8 +1705,6 @@ Key: TAILJMPm: [ 0.00 0.00 ]
Key: TAILJMPr: [ 0.00 0.00 ]
Key: TCMMIMFP: [ 0.00 0.00 ]
Key: TCMMRLFP: [ 0.00 0.00 ]
-Key: TCONJTCMMIMFP: [ 0.00 0.00 ]
-Key: TCONJTFP: [ 0.00 0.00 ]
Key: TCRETURN_HIPE: [ 0.00 0.00 ]
Key: TCRETURN_WIN: [ 0.00 0.00 ]
Key: TCRETURN_WINmi: [ 0.00 0.00 ]
@@ -1764,12 +1750,6 @@ Key: TPAUSE: [ 0.00 0.00 ]
Key: TRAP: [ 0.00 0.00 ]
Key: TST_F: [ 0.00 0.00 ]
Key: TST_Fp: [ 0.00 0.00 ]
-Key: TTCMMIMFP: [ 0.00 0.00 ]
-Key: TTCMMRLFP: [ 0.00 0.00 ]
-Key: TTDPBF: [ 0.00 0.00 ]
-Key: TTDPFP: [ 0.00 0.00 ]
-Key: TTMMULTF: [ 0.00 0.00 ]
-Key: TTRANSPOSED: [ 0.00 0.00 ]
Key: TZCNT: [ 0.00 0.00 ]
Key: TZMSK: [ 0.00 0.00 ]
Key: UBSAN_UD: [ 0.00 0.00 ]
@@ -7034,7 +7014,6 @@ Key: PhyReg_VR256: [ 0.00 0.00 ]
Key: PhyReg_VR512: [ 0.00 0.00 ]
Key: PhyReg_VR512_0_15: [ 0.00 0.00 ]
Key: PhyReg_TILE: [ 0.00 0.00 ]
-Key: PhyReg_TILEPAIR: [ 0.00 0.00 ]
Key: VirtReg_GR8: [ 0.00 0.00 ]
Key: VirtReg_GRH8: [ 0.00 0.00 ]
Key: VirtReg_GR8_NOREX2: [ 0.00 0.00 ]
@@ -7170,4 +7149,3 @@ Key: VirtReg_VR256: [ 0.00 0.00 ]
Key: VirtReg_VR512: [ 0.00 0.00 ]
Key: VirtReg_VR512_0_15: [ 0.00 0.00 ]
Key: VirtReg_TILE: [ 0.00 0.00 ]
-Key: VirtReg_TILEPAIR: [ 0.00 0.00 ]
diff --git a/llvm/test/CodeGen/MLRegAlloc/dev-mode-extra-features-logging.ll b/llvm/test/CodeGen/MLRegAlloc/dev-mode-extra-features-logging.ll
index bd8d882..9dd402d 100644
--- a/llvm/test/CodeGen/MLRegAlloc/dev-mode-extra-features-logging.ll
+++ b/llvm/test/CodeGen/MLRegAlloc/dev-mode-extra-features-logging.ll
@@ -26,7 +26,7 @@
; Also, the first eviction problem is significantly less than 300 instructions. Check
; that there is a zero value.
; Note: we're regex-ing some of the opcodes to avoid test flakyness.
-; CHECK: instructions: 20,{{([0-9]{4})}},1{{([0-9]{3})}},2{{([0-9]{3})}},{{.*}},0,
+; CHECK: instructions: 20,{{([0-9]{4})}},{{([0-9]{4})}},{{([0-9]{4})}},{{.*}},0,
; Only the candidate virtreg and the 10th LR are included in this problem. Make
; sure the other LRs have values of zero. There are 2700 0s followed by some 1s.
; There's a limit to how many repetitions can be matched.
diff --git a/llvm/test/CodeGen/NVPTX/cp-async-bulk-tensor-g2s-1cta.ll b/llvm/test/CodeGen/NVPTX/cp-async-bulk-tensor-g2s-1cta.ll
index b5c43fd2..d653895 100644
--- a/llvm/test/CodeGen/NVPTX/cp-async-bulk-tensor-g2s-1cta.ll
+++ b/llvm/test/CodeGen/NVPTX/cp-async-bulk-tensor-g2s-1cta.ll
@@ -1,8 +1,12 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_100a -mattr=+ptx86| FileCheck --check-prefixes=CHECK-PTX64 %s
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_100a -mattr=+ptx86 --nvptx-short-ptr| FileCheck --check-prefixes=CHECK-PTX-SHARED32 %s
+; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_100f -mattr=+ptx88 | FileCheck --check-prefixes=CHECK-PTX64 %s
+; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_110f -mattr=+ptx90 | FileCheck --check-prefixes=CHECK-PTX64 %s
; RUN: %if ptxas-sm_100a && ptxas-isa-8.6 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_100a -mattr=+ptx86| %ptxas-verify -arch=sm_100a %}
; RUN: %if ptxas-sm_100a && ptxas-isa-8.6 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_100a -mattr=+ptx86 --nvptx-short-ptr| %ptxas-verify -arch=sm_100a %}
+; RUN: %if ptxas-sm_100f && ptxas-isa-8.8 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_100f -mattr=+ptx88 | %ptxas-verify -arch=sm_100f %}
+; RUN: %if ptxas-sm_110f && ptxas-isa-9.0 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_110f -mattr=+ptx90 | %ptxas-verify -arch=sm_110f %}
target triple = "nvptx64-nvidia-cuda"
diff --git a/llvm/test/CodeGen/NVPTX/cp-async-bulk-tensor-g2s-2cta.ll b/llvm/test/CodeGen/NVPTX/cp-async-bulk-tensor-g2s-2cta.ll
index 57342dc..5de1ac8 100644
--- a/llvm/test/CodeGen/NVPTX/cp-async-bulk-tensor-g2s-2cta.ll
+++ b/llvm/test/CodeGen/NVPTX/cp-async-bulk-tensor-g2s-2cta.ll
@@ -1,8 +1,12 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_100a -mattr=+ptx86| FileCheck --check-prefixes=CHECK-PTX64 %s
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_100a -mattr=+ptx86 --nvptx-short-ptr| FileCheck --check-prefixes=CHECK-PTX-SHARED32 %s
+; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_100f -mattr=+ptx88 | FileCheck --check-prefixes=CHECK-PTX64 %s
+; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_110f -mattr=+ptx90 | FileCheck --check-prefixes=CHECK-PTX64 %s
; RUN: %if ptxas-sm_100a && ptxas-isa-8.6 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_100a -mattr=+ptx86| %ptxas-verify -arch=sm_100a %}
; RUN: %if ptxas-sm_100a && ptxas-isa-8.6 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_100a -mattr=+ptx86 --nvptx-short-ptr| %ptxas-verify -arch=sm_100a %}
+; RUN: %if ptxas-sm_100f && ptxas-isa-8.8 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_100f -mattr=+ptx88 | %ptxas-verify -arch=sm_100f %}
+; RUN: %if ptxas-sm_110f && ptxas-isa-9.0 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_110f -mattr=+ptx90 | %ptxas-verify -arch=sm_110f %}
target triple = "nvptx64-nvidia-cuda"
diff --git a/llvm/test/CodeGen/NVPTX/cp-async-bulk-tensor-g2s-gather4.ll b/llvm/test/CodeGen/NVPTX/cp-async-bulk-tensor-g2s-gather4.ll
index 6296d5a..2f5c1ef 100644
--- a/llvm/test/CodeGen/NVPTX/cp-async-bulk-tensor-g2s-gather4.ll
+++ b/llvm/test/CodeGen/NVPTX/cp-async-bulk-tensor-g2s-gather4.ll
@@ -1,8 +1,12 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_100a -mattr=+ptx86| FileCheck --check-prefixes=CHECK-PTX64 %s
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_100a -mattr=+ptx86 --nvptx-short-ptr| FileCheck --check-prefixes=CHECK-PTX-SHARED32 %s
+; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_100f -mattr=+ptx88 | FileCheck --check-prefixes=CHECK-PTX64 %s
+; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_110f -mattr=+ptx90 | FileCheck --check-prefixes=CHECK-PTX64 %s
; RUN: %if ptxas-sm_100a && ptxas-isa-8.6 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_100a -mattr=+ptx86| %ptxas-verify -arch=sm_100a %}
; RUN: %if ptxas-sm_100a && ptxas-isa-8.6 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_100a -mattr=+ptx86 --nvptx-short-ptr| %ptxas-verify -arch=sm_100a %}
+; RUN: %if ptxas-sm_100f && ptxas-isa-8.8 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_100f -mattr=+ptx88 | %ptxas-verify -arch=sm_100f %}
+; RUN: %if ptxas-sm_110f && ptxas-isa-9.0 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_110f -mattr=+ptx90 | %ptxas-verify -arch=sm_110f %}
target triple = "nvptx64-nvidia-cuda"
diff --git a/llvm/test/CodeGen/NVPTX/cp-async-bulk-tensor-g2s-im2colw.ll b/llvm/test/CodeGen/NVPTX/cp-async-bulk-tensor-g2s-im2colw.ll
index e5ae387..a2b2c2f 100644
--- a/llvm/test/CodeGen/NVPTX/cp-async-bulk-tensor-g2s-im2colw.ll
+++ b/llvm/test/CodeGen/NVPTX/cp-async-bulk-tensor-g2s-im2colw.ll
@@ -1,8 +1,12 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_100a -mattr=+ptx86| FileCheck --check-prefixes=CHECK-PTX64 %s
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_100a -mattr=+ptx86 --nvptx-short-ptr| FileCheck --check-prefixes=CHECK-PTX-SHARED32 %s
+; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_100f -mattr=+ptx88 | FileCheck --check-prefixes=CHECK-PTX64 %s
+; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_110f -mattr=+ptx90 | FileCheck --check-prefixes=CHECK-PTX64 %s
; RUN: %if ptxas-sm_100a && ptxas-isa-8.6 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_100a -mattr=+ptx86| %ptxas-verify -arch=sm_100a %}
; RUN: %if ptxas-sm_100a && ptxas-isa-8.6 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_100a -mattr=+ptx86 --nvptx-short-ptr| %ptxas-verify -arch=sm_100a %}
+; RUN: %if ptxas-sm_100f && ptxas-isa-8.8 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_100f -mattr=+ptx88 | %ptxas-verify -arch=sm_100f %}
+; RUN: %if ptxas-sm_110f && ptxas-isa-9.0 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_110f -mattr=+ptx90 | %ptxas-verify -arch=sm_110f %}
target triple = "nvptx64-nvidia-cuda"
diff --git a/llvm/test/CodeGen/NVPTX/cp-async-bulk-tensor-g2s-im2colw128.ll b/llvm/test/CodeGen/NVPTX/cp-async-bulk-tensor-g2s-im2colw128.ll
index 7d04ada..e4c48dd 100644
--- a/llvm/test/CodeGen/NVPTX/cp-async-bulk-tensor-g2s-im2colw128.ll
+++ b/llvm/test/CodeGen/NVPTX/cp-async-bulk-tensor-g2s-im2colw128.ll
@@ -1,8 +1,12 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_100a -mattr=+ptx86| FileCheck --check-prefixes=CHECK-PTX64 %s
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_100a -mattr=+ptx86 --nvptx-short-ptr| FileCheck --check-prefixes=CHECK-PTX-SHARED32 %s
+; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_100f -mattr=+ptx88 | FileCheck --check-prefixes=CHECK-PTX64 %s
+; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_110f -mattr=+ptx90 | FileCheck --check-prefixes=CHECK-PTX64 %s
; RUN: %if ptxas-sm_100a && ptxas-isa-8.6 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_100a -mattr=+ptx86| %ptxas-verify -arch=sm_100a %}
; RUN: %if ptxas-sm_100a && ptxas-isa-8.6 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_100a -mattr=+ptx86 --nvptx-short-ptr| %ptxas-verify -arch=sm_100a %}
+; RUN: %if ptxas-sm_100f && ptxas-isa-8.8 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_100f -mattr=+ptx88 | %ptxas-verify -arch=sm_100f %}
+; RUN: %if ptxas-sm_110f && ptxas-isa-9.0 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_110f -mattr=+ptx90 | %ptxas-verify -arch=sm_110f %}
target triple = "nvptx64-nvidia-cuda"
diff --git a/llvm/test/CodeGen/NVPTX/cp-async-bulk-tensor-g2s.ll b/llvm/test/CodeGen/NVPTX/cp-async-bulk-tensor-g2s.ll
index b0fe77c..727bb3b 100644
--- a/llvm/test/CodeGen/NVPTX/cp-async-bulk-tensor-g2s.ll
+++ b/llvm/test/CodeGen/NVPTX/cp-async-bulk-tensor-g2s.ll
@@ -1,8 +1,12 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx80| FileCheck --check-prefixes=CHECK-PTX64 %s
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx80 --nvptx-short-ptr| FileCheck --check-prefixes=CHECK-PTX-SHARED32 %s
+; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_100f -mattr=+ptx88 | FileCheck --check-prefixes=CHECK-PTX64 %s
+; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_110f -mattr=+ptx90 | FileCheck --check-prefixes=CHECK-PTX64 %s
; RUN: %if ptxas-sm_90 && ptxas-isa-8.0 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx80| %ptxas-verify -arch=sm_90 %}
; RUN: %if ptxas-sm_90 && ptxas-isa-8.0 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx80 --nvptx-short-ptr| %ptxas-verify -arch=sm_90 %}
+; RUN: %if ptxas-sm_100f && ptxas-isa-8.8 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_100f -mattr=+ptx88 | %ptxas-verify -arch=sm_100f %}
+; RUN: %if ptxas-sm_110f && ptxas-isa-9.0 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_110f -mattr=+ptx90 | %ptxas-verify -arch=sm_110f %}
target triple = "nvptx64-nvidia-cuda"
@@ -29,10 +33,10 @@ define void @cp_async_bulk_tensor_g2s_tile_1d(ptr addrspace(7) %d, ptr addrspace
; CHECK-PTX64-NEXT: ld.param.b64 %rd2, [cp_async_bulk_tensor_g2s_tile_1d_param_1];
; CHECK-PTX64-NEXT: ld.param.b64 %rd3, [cp_async_bulk_tensor_g2s_tile_1d_param_2];
; CHECK-PTX64-NEXT: ld.param.b32 %r1, [cp_async_bulk_tensor_g2s_tile_1d_param_3];
-; CHECK-PTX64-NEXT: cp.async.bulk.tensor.1d.shared::cluster.global.tile.mbarrier::complete_tx::bytes [%rd1], [%rd3, {%r1}], [%rd2];
+; CHECK-PTX64-NEXT: ld.param.b16 %rs1, [cp_async_bulk_tensor_g2s_tile_1d_param_4];
; CHECK-PTX64-NEXT: ld.param.b64 %rd4, [cp_async_bulk_tensor_g2s_tile_1d_param_5];
+; CHECK-PTX64-NEXT: cp.async.bulk.tensor.1d.shared::cluster.global.tile.mbarrier::complete_tx::bytes [%rd1], [%rd3, {%r1}], [%rd2];
; CHECK-PTX64-NEXT: cp.async.bulk.tensor.1d.shared::cluster.global.tile.mbarrier::complete_tx::bytes.L2::cache_hint [%rd1], [%rd3, {%r1}], [%rd2], %rd4;
-; CHECK-PTX64-NEXT: ld.param.b16 %rs1, [cp_async_bulk_tensor_g2s_tile_1d_param_4];
; CHECK-PTX64-NEXT: cp.async.bulk.tensor.1d.shared::cluster.global.tile.mbarrier::complete_tx::bytes.multicast::cluster [%rd1], [%rd3, {%r1}], [%rd2], %rs1;
; CHECK-PTX64-NEXT: cp.async.bulk.tensor.1d.shared::cluster.global.tile.mbarrier::complete_tx::bytes.multicast::cluster.L2::cache_hint [%rd1], [%rd3, {%r1}], [%rd2], %rs1, %rd4;
; CHECK-PTX64-NEXT: ret;
@@ -48,10 +52,10 @@ define void @cp_async_bulk_tensor_g2s_tile_1d(ptr addrspace(7) %d, ptr addrspace
; CHECK-PTX-SHARED32-NEXT: ld.param.b32 %r2, [cp_async_bulk_tensor_g2s_tile_1d_param_1];
; CHECK-PTX-SHARED32-NEXT: ld.param.b64 %rd1, [cp_async_bulk_tensor_g2s_tile_1d_param_2];
; CHECK-PTX-SHARED32-NEXT: ld.param.b32 %r3, [cp_async_bulk_tensor_g2s_tile_1d_param_3];
-; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.1d.shared::cluster.global.tile.mbarrier::complete_tx::bytes [%r1], [%rd1, {%r3}], [%r2];
+; CHECK-PTX-SHARED32-NEXT: ld.param.b16 %rs1, [cp_async_bulk_tensor_g2s_tile_1d_param_4];
; CHECK-PTX-SHARED32-NEXT: ld.param.b64 %rd2, [cp_async_bulk_tensor_g2s_tile_1d_param_5];
+; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.1d.shared::cluster.global.tile.mbarrier::complete_tx::bytes [%r1], [%rd1, {%r3}], [%r2];
; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.1d.shared::cluster.global.tile.mbarrier::complete_tx::bytes.L2::cache_hint [%r1], [%rd1, {%r3}], [%r2], %rd2;
-; CHECK-PTX-SHARED32-NEXT: ld.param.b16 %rs1, [cp_async_bulk_tensor_g2s_tile_1d_param_4];
; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.1d.shared::cluster.global.tile.mbarrier::complete_tx::bytes.multicast::cluster [%r1], [%rd1, {%r3}], [%r2], %rs1;
; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.1d.shared::cluster.global.tile.mbarrier::complete_tx::bytes.multicast::cluster.L2::cache_hint [%r1], [%rd1, {%r3}], [%r2], %rs1, %rd2;
; CHECK-PTX-SHARED32-NEXT: ret;
@@ -79,10 +83,10 @@ define void @cp_async_bulk_tensor_g2s_tile_2d(ptr addrspace(7) %d, ptr addrspace
; CHECK-PTX64-NEXT: ld.param.b64 %rd3, [cp_async_bulk_tensor_g2s_tile_2d_param_2];
; CHECK-PTX64-NEXT: ld.param.b32 %r1, [cp_async_bulk_tensor_g2s_tile_2d_param_3];
; CHECK-PTX64-NEXT: ld.param.b32 %r2, [cp_async_bulk_tensor_g2s_tile_2d_param_4];
-; CHECK-PTX64-NEXT: cp.async.bulk.tensor.2d.shared::cluster.global.tile.mbarrier::complete_tx::bytes [%rd1], [%rd3, {%r1, %r2}], [%rd2];
+; CHECK-PTX64-NEXT: ld.param.b16 %rs1, [cp_async_bulk_tensor_g2s_tile_2d_param_5];
; CHECK-PTX64-NEXT: ld.param.b64 %rd4, [cp_async_bulk_tensor_g2s_tile_2d_param_6];
+; CHECK-PTX64-NEXT: cp.async.bulk.tensor.2d.shared::cluster.global.tile.mbarrier::complete_tx::bytes [%rd1], [%rd3, {%r1, %r2}], [%rd2];
; CHECK-PTX64-NEXT: cp.async.bulk.tensor.2d.shared::cluster.global.tile.mbarrier::complete_tx::bytes.L2::cache_hint [%rd1], [%rd3, {%r1, %r2}], [%rd2], %rd4;
-; CHECK-PTX64-NEXT: ld.param.b16 %rs1, [cp_async_bulk_tensor_g2s_tile_2d_param_5];
; CHECK-PTX64-NEXT: cp.async.bulk.tensor.2d.shared::cluster.global.tile.mbarrier::complete_tx::bytes.multicast::cluster [%rd1], [%rd3, {%r1, %r2}], [%rd2], %rs1;
; CHECK-PTX64-NEXT: cp.async.bulk.tensor.2d.shared::cluster.global.tile.mbarrier::complete_tx::bytes.multicast::cluster.L2::cache_hint [%rd1], [%rd3, {%r1, %r2}], [%rd2], %rs1, %rd4;
; CHECK-PTX64-NEXT: ret;
@@ -99,10 +103,10 @@ define void @cp_async_bulk_tensor_g2s_tile_2d(ptr addrspace(7) %d, ptr addrspace
; CHECK-PTX-SHARED32-NEXT: ld.param.b64 %rd1, [cp_async_bulk_tensor_g2s_tile_2d_param_2];
; CHECK-PTX-SHARED32-NEXT: ld.param.b32 %r3, [cp_async_bulk_tensor_g2s_tile_2d_param_3];
; CHECK-PTX-SHARED32-NEXT: ld.param.b32 %r4, [cp_async_bulk_tensor_g2s_tile_2d_param_4];
-; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.2d.shared::cluster.global.tile.mbarrier::complete_tx::bytes [%r1], [%rd1, {%r3, %r4}], [%r2];
+; CHECK-PTX-SHARED32-NEXT: ld.param.b16 %rs1, [cp_async_bulk_tensor_g2s_tile_2d_param_5];
; CHECK-PTX-SHARED32-NEXT: ld.param.b64 %rd2, [cp_async_bulk_tensor_g2s_tile_2d_param_6];
+; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.2d.shared::cluster.global.tile.mbarrier::complete_tx::bytes [%r1], [%rd1, {%r3, %r4}], [%r2];
; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.2d.shared::cluster.global.tile.mbarrier::complete_tx::bytes.L2::cache_hint [%r1], [%rd1, {%r3, %r4}], [%r2], %rd2;
-; CHECK-PTX-SHARED32-NEXT: ld.param.b16 %rs1, [cp_async_bulk_tensor_g2s_tile_2d_param_5];
; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.2d.shared::cluster.global.tile.mbarrier::complete_tx::bytes.multicast::cluster [%r1], [%rd1, {%r3, %r4}], [%r2], %rs1;
; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.2d.shared::cluster.global.tile.mbarrier::complete_tx::bytes.multicast::cluster.L2::cache_hint [%r1], [%rd1, {%r3, %r4}], [%r2], %rs1, %rd2;
; CHECK-PTX-SHARED32-NEXT: ret;
@@ -131,10 +135,10 @@ define void @cp_async_bulk_tensor_g2s_tile_3d(ptr addrspace(7) %d, ptr addrspace
; CHECK-PTX64-NEXT: ld.param.b32 %r1, [cp_async_bulk_tensor_g2s_tile_3d_param_3];
; CHECK-PTX64-NEXT: ld.param.b32 %r2, [cp_async_bulk_tensor_g2s_tile_3d_param_4];
; CHECK-PTX64-NEXT: ld.param.b32 %r3, [cp_async_bulk_tensor_g2s_tile_3d_param_5];
-; CHECK-PTX64-NEXT: cp.async.bulk.tensor.3d.shared::cluster.global.tile.mbarrier::complete_tx::bytes [%rd1], [%rd3, {%r1, %r2, %r3}], [%rd2];
+; CHECK-PTX64-NEXT: ld.param.b16 %rs1, [cp_async_bulk_tensor_g2s_tile_3d_param_6];
; CHECK-PTX64-NEXT: ld.param.b64 %rd4, [cp_async_bulk_tensor_g2s_tile_3d_param_7];
+; CHECK-PTX64-NEXT: cp.async.bulk.tensor.3d.shared::cluster.global.tile.mbarrier::complete_tx::bytes [%rd1], [%rd3, {%r1, %r2, %r3}], [%rd2];
; CHECK-PTX64-NEXT: cp.async.bulk.tensor.3d.shared::cluster.global.tile.mbarrier::complete_tx::bytes.L2::cache_hint [%rd1], [%rd3, {%r1, %r2, %r3}], [%rd2], %rd4;
-; CHECK-PTX64-NEXT: ld.param.b16 %rs1, [cp_async_bulk_tensor_g2s_tile_3d_param_6];
; CHECK-PTX64-NEXT: cp.async.bulk.tensor.3d.shared::cluster.global.tile.mbarrier::complete_tx::bytes.multicast::cluster [%rd1], [%rd3, {%r1, %r2, %r3}], [%rd2], %rs1;
; CHECK-PTX64-NEXT: cp.async.bulk.tensor.3d.shared::cluster.global.tile.mbarrier::complete_tx::bytes.multicast::cluster.L2::cache_hint [%rd1], [%rd3, {%r1, %r2, %r3}], [%rd2], %rs1, %rd4;
; CHECK-PTX64-NEXT: ret;
@@ -152,10 +156,10 @@ define void @cp_async_bulk_tensor_g2s_tile_3d(ptr addrspace(7) %d, ptr addrspace
; CHECK-PTX-SHARED32-NEXT: ld.param.b32 %r3, [cp_async_bulk_tensor_g2s_tile_3d_param_3];
; CHECK-PTX-SHARED32-NEXT: ld.param.b32 %r4, [cp_async_bulk_tensor_g2s_tile_3d_param_4];
; CHECK-PTX-SHARED32-NEXT: ld.param.b32 %r5, [cp_async_bulk_tensor_g2s_tile_3d_param_5];
-; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.3d.shared::cluster.global.tile.mbarrier::complete_tx::bytes [%r1], [%rd1, {%r3, %r4, %r5}], [%r2];
+; CHECK-PTX-SHARED32-NEXT: ld.param.b16 %rs1, [cp_async_bulk_tensor_g2s_tile_3d_param_6];
; CHECK-PTX-SHARED32-NEXT: ld.param.b64 %rd2, [cp_async_bulk_tensor_g2s_tile_3d_param_7];
+; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.3d.shared::cluster.global.tile.mbarrier::complete_tx::bytes [%r1], [%rd1, {%r3, %r4, %r5}], [%r2];
; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.3d.shared::cluster.global.tile.mbarrier::complete_tx::bytes.L2::cache_hint [%r1], [%rd1, {%r3, %r4, %r5}], [%r2], %rd2;
-; CHECK-PTX-SHARED32-NEXT: ld.param.b16 %rs1, [cp_async_bulk_tensor_g2s_tile_3d_param_6];
; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.3d.shared::cluster.global.tile.mbarrier::complete_tx::bytes.multicast::cluster [%r1], [%rd1, {%r3, %r4, %r5}], [%r2], %rs1;
; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.3d.shared::cluster.global.tile.mbarrier::complete_tx::bytes.multicast::cluster.L2::cache_hint [%r1], [%rd1, {%r3, %r4, %r5}], [%r2], %rs1, %rd2;
; CHECK-PTX-SHARED32-NEXT: ret;
@@ -185,10 +189,10 @@ define void @cp_async_bulk_tensor_g2s_tile_4d(ptr addrspace(7) %d, ptr addrspace
; CHECK-PTX64-NEXT: ld.param.b32 %r2, [cp_async_bulk_tensor_g2s_tile_4d_param_4];
; CHECK-PTX64-NEXT: ld.param.b32 %r3, [cp_async_bulk_tensor_g2s_tile_4d_param_5];
; CHECK-PTX64-NEXT: ld.param.b32 %r4, [cp_async_bulk_tensor_g2s_tile_4d_param_6];
-; CHECK-PTX64-NEXT: cp.async.bulk.tensor.4d.shared::cluster.global.tile.mbarrier::complete_tx::bytes [%rd1], [%rd3, {%r1, %r2, %r3, %r4}], [%rd2];
+; CHECK-PTX64-NEXT: ld.param.b16 %rs1, [cp_async_bulk_tensor_g2s_tile_4d_param_7];
; CHECK-PTX64-NEXT: ld.param.b64 %rd4, [cp_async_bulk_tensor_g2s_tile_4d_param_8];
+; CHECK-PTX64-NEXT: cp.async.bulk.tensor.4d.shared::cluster.global.tile.mbarrier::complete_tx::bytes [%rd1], [%rd3, {%r1, %r2, %r3, %r4}], [%rd2];
; CHECK-PTX64-NEXT: cp.async.bulk.tensor.4d.shared::cluster.global.tile.mbarrier::complete_tx::bytes.L2::cache_hint [%rd1], [%rd3, {%r1, %r2, %r3, %r4}], [%rd2], %rd4;
-; CHECK-PTX64-NEXT: ld.param.b16 %rs1, [cp_async_bulk_tensor_g2s_tile_4d_param_7];
; CHECK-PTX64-NEXT: cp.async.bulk.tensor.4d.shared::cluster.global.tile.mbarrier::complete_tx::bytes.multicast::cluster [%rd1], [%rd3, {%r1, %r2, %r3, %r4}], [%rd2], %rs1;
; CHECK-PTX64-NEXT: cp.async.bulk.tensor.4d.shared::cluster.global.tile.mbarrier::complete_tx::bytes.multicast::cluster.L2::cache_hint [%rd1], [%rd3, {%r1, %r2, %r3, %r4}], [%rd2], %rs1, %rd4;
; CHECK-PTX64-NEXT: ret;
@@ -207,10 +211,10 @@ define void @cp_async_bulk_tensor_g2s_tile_4d(ptr addrspace(7) %d, ptr addrspace
; CHECK-PTX-SHARED32-NEXT: ld.param.b32 %r4, [cp_async_bulk_tensor_g2s_tile_4d_param_4];
; CHECK-PTX-SHARED32-NEXT: ld.param.b32 %r5, [cp_async_bulk_tensor_g2s_tile_4d_param_5];
; CHECK-PTX-SHARED32-NEXT: ld.param.b32 %r6, [cp_async_bulk_tensor_g2s_tile_4d_param_6];
-; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.4d.shared::cluster.global.tile.mbarrier::complete_tx::bytes [%r1], [%rd1, {%r3, %r4, %r5, %r6}], [%r2];
+; CHECK-PTX-SHARED32-NEXT: ld.param.b16 %rs1, [cp_async_bulk_tensor_g2s_tile_4d_param_7];
; CHECK-PTX-SHARED32-NEXT: ld.param.b64 %rd2, [cp_async_bulk_tensor_g2s_tile_4d_param_8];
+; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.4d.shared::cluster.global.tile.mbarrier::complete_tx::bytes [%r1], [%rd1, {%r3, %r4, %r5, %r6}], [%r2];
; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.4d.shared::cluster.global.tile.mbarrier::complete_tx::bytes.L2::cache_hint [%r1], [%rd1, {%r3, %r4, %r5, %r6}], [%r2], %rd2;
-; CHECK-PTX-SHARED32-NEXT: ld.param.b16 %rs1, [cp_async_bulk_tensor_g2s_tile_4d_param_7];
; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.4d.shared::cluster.global.tile.mbarrier::complete_tx::bytes.multicast::cluster [%r1], [%rd1, {%r3, %r4, %r5, %r6}], [%r2], %rs1;
; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.4d.shared::cluster.global.tile.mbarrier::complete_tx::bytes.multicast::cluster.L2::cache_hint [%r1], [%rd1, {%r3, %r4, %r5, %r6}], [%r2], %rs1, %rd2;
; CHECK-PTX-SHARED32-NEXT: ret;
@@ -241,10 +245,10 @@ define void @cp_async_bulk_tensor_g2s_tile_5d(ptr addrspace(7) %d, ptr addrspace
; CHECK-PTX64-NEXT: ld.param.b32 %r3, [cp_async_bulk_tensor_g2s_tile_5d_param_5];
; CHECK-PTX64-NEXT: ld.param.b32 %r4, [cp_async_bulk_tensor_g2s_tile_5d_param_6];
; CHECK-PTX64-NEXT: ld.param.b32 %r5, [cp_async_bulk_tensor_g2s_tile_5d_param_7];
-; CHECK-PTX64-NEXT: cp.async.bulk.tensor.5d.shared::cluster.global.tile.mbarrier::complete_tx::bytes [%rd1], [%rd3, {%r1, %r2, %r3, %r4, %r5}], [%rd2];
+; CHECK-PTX64-NEXT: ld.param.b16 %rs1, [cp_async_bulk_tensor_g2s_tile_5d_param_8];
; CHECK-PTX64-NEXT: ld.param.b64 %rd4, [cp_async_bulk_tensor_g2s_tile_5d_param_9];
+; CHECK-PTX64-NEXT: cp.async.bulk.tensor.5d.shared::cluster.global.tile.mbarrier::complete_tx::bytes [%rd1], [%rd3, {%r1, %r2, %r3, %r4, %r5}], [%rd2];
; CHECK-PTX64-NEXT: cp.async.bulk.tensor.5d.shared::cluster.global.tile.mbarrier::complete_tx::bytes.L2::cache_hint [%rd1], [%rd3, {%r1, %r2, %r3, %r4, %r5}], [%rd2], %rd4;
-; CHECK-PTX64-NEXT: ld.param.b16 %rs1, [cp_async_bulk_tensor_g2s_tile_5d_param_8];
; CHECK-PTX64-NEXT: cp.async.bulk.tensor.5d.shared::cluster.global.tile.mbarrier::complete_tx::bytes.multicast::cluster [%rd1], [%rd3, {%r1, %r2, %r3, %r4, %r5}], [%rd2], %rs1;
; CHECK-PTX64-NEXT: cp.async.bulk.tensor.5d.shared::cluster.global.tile.mbarrier::complete_tx::bytes.multicast::cluster.L2::cache_hint [%rd1], [%rd3, {%r1, %r2, %r3, %r4, %r5}], [%rd2], %rs1, %rd4;
; CHECK-PTX64-NEXT: ret;
@@ -264,10 +268,10 @@ define void @cp_async_bulk_tensor_g2s_tile_5d(ptr addrspace(7) %d, ptr addrspace
; CHECK-PTX-SHARED32-NEXT: ld.param.b32 %r5, [cp_async_bulk_tensor_g2s_tile_5d_param_5];
; CHECK-PTX-SHARED32-NEXT: ld.param.b32 %r6, [cp_async_bulk_tensor_g2s_tile_5d_param_6];
; CHECK-PTX-SHARED32-NEXT: ld.param.b32 %r7, [cp_async_bulk_tensor_g2s_tile_5d_param_7];
-; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.5d.shared::cluster.global.tile.mbarrier::complete_tx::bytes [%r1], [%rd1, {%r3, %r4, %r5, %r6, %r7}], [%r2];
+; CHECK-PTX-SHARED32-NEXT: ld.param.b16 %rs1, [cp_async_bulk_tensor_g2s_tile_5d_param_8];
; CHECK-PTX-SHARED32-NEXT: ld.param.b64 %rd2, [cp_async_bulk_tensor_g2s_tile_5d_param_9];
+; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.5d.shared::cluster.global.tile.mbarrier::complete_tx::bytes [%r1], [%rd1, {%r3, %r4, %r5, %r6, %r7}], [%r2];
; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.5d.shared::cluster.global.tile.mbarrier::complete_tx::bytes.L2::cache_hint [%r1], [%rd1, {%r3, %r4, %r5, %r6, %r7}], [%r2], %rd2;
-; CHECK-PTX-SHARED32-NEXT: ld.param.b16 %rs1, [cp_async_bulk_tensor_g2s_tile_5d_param_8];
; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.5d.shared::cluster.global.tile.mbarrier::complete_tx::bytes.multicast::cluster [%r1], [%rd1, {%r3, %r4, %r5, %r6, %r7}], [%r2], %rs1;
; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.5d.shared::cluster.global.tile.mbarrier::complete_tx::bytes.multicast::cluster.L2::cache_hint [%r1], [%rd1, {%r3, %r4, %r5, %r6, %r7}], [%r2], %rs1, %rd2;
; CHECK-PTX-SHARED32-NEXT: ret;
@@ -297,10 +301,10 @@ define void @cp_async_bulk_tensor_g2s_im2col_3d(ptr addrspace(7) %d, ptr addrspa
; CHECK-PTX64-NEXT: ld.param.b32 %r2, [cp_async_bulk_tensor_g2s_im2col_3d_param_4];
; CHECK-PTX64-NEXT: ld.param.b32 %r3, [cp_async_bulk_tensor_g2s_im2col_3d_param_5];
; CHECK-PTX64-NEXT: ld.param.b16 %rs1, [cp_async_bulk_tensor_g2s_im2col_3d_param_6];
-; CHECK-PTX64-NEXT: cp.async.bulk.tensor.3d.shared::cluster.global.im2col.mbarrier::complete_tx::bytes [%rd1], [%rd3, {%r1, %r2, %r3}], [%rd2], {%rs1};
+; CHECK-PTX64-NEXT: ld.param.b16 %rs2, [cp_async_bulk_tensor_g2s_im2col_3d_param_7];
; CHECK-PTX64-NEXT: ld.param.b64 %rd4, [cp_async_bulk_tensor_g2s_im2col_3d_param_8];
+; CHECK-PTX64-NEXT: cp.async.bulk.tensor.3d.shared::cluster.global.im2col.mbarrier::complete_tx::bytes [%rd1], [%rd3, {%r1, %r2, %r3}], [%rd2], {%rs1};
; CHECK-PTX64-NEXT: cp.async.bulk.tensor.3d.shared::cluster.global.im2col.mbarrier::complete_tx::bytes.L2::cache_hint [%rd1], [%rd3, {%r1, %r2, %r3}], [%rd2], {%rs1}, %rd4;
-; CHECK-PTX64-NEXT: ld.param.b16 %rs2, [cp_async_bulk_tensor_g2s_im2col_3d_param_7];
; CHECK-PTX64-NEXT: cp.async.bulk.tensor.3d.shared::cluster.global.im2col.mbarrier::complete_tx::bytes.multicast::cluster [%rd1], [%rd3, {%r1, %r2, %r3}], [%rd2], {%rs1}, %rs2;
; CHECK-PTX64-NEXT: cp.async.bulk.tensor.3d.shared::cluster.global.im2col.mbarrier::complete_tx::bytes.multicast::cluster.L2::cache_hint [%rd1], [%rd3, {%r1, %r2, %r3}], [%rd2], {%rs1}, %rs2, %rd4;
; CHECK-PTX64-NEXT: ret;
@@ -319,10 +323,10 @@ define void @cp_async_bulk_tensor_g2s_im2col_3d(ptr addrspace(7) %d, ptr addrspa
; CHECK-PTX-SHARED32-NEXT: ld.param.b32 %r4, [cp_async_bulk_tensor_g2s_im2col_3d_param_4];
; CHECK-PTX-SHARED32-NEXT: ld.param.b32 %r5, [cp_async_bulk_tensor_g2s_im2col_3d_param_5];
; CHECK-PTX-SHARED32-NEXT: ld.param.b16 %rs1, [cp_async_bulk_tensor_g2s_im2col_3d_param_6];
-; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.3d.shared::cluster.global.im2col.mbarrier::complete_tx::bytes [%r1], [%rd1, {%r3, %r4, %r5}], [%r2], {%rs1};
+; CHECK-PTX-SHARED32-NEXT: ld.param.b16 %rs2, [cp_async_bulk_tensor_g2s_im2col_3d_param_7];
; CHECK-PTX-SHARED32-NEXT: ld.param.b64 %rd2, [cp_async_bulk_tensor_g2s_im2col_3d_param_8];
+; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.3d.shared::cluster.global.im2col.mbarrier::complete_tx::bytes [%r1], [%rd1, {%r3, %r4, %r5}], [%r2], {%rs1};
; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.3d.shared::cluster.global.im2col.mbarrier::complete_tx::bytes.L2::cache_hint [%r1], [%rd1, {%r3, %r4, %r5}], [%r2], {%rs1}, %rd2;
-; CHECK-PTX-SHARED32-NEXT: ld.param.b16 %rs2, [cp_async_bulk_tensor_g2s_im2col_3d_param_7];
; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.3d.shared::cluster.global.im2col.mbarrier::complete_tx::bytes.multicast::cluster [%r1], [%rd1, {%r3, %r4, %r5}], [%r2], {%rs1}, %rs2;
; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.3d.shared::cluster.global.im2col.mbarrier::complete_tx::bytes.multicast::cluster.L2::cache_hint [%r1], [%rd1, {%r3, %r4, %r5}], [%r2], {%rs1}, %rs2, %rd2;
; CHECK-PTX-SHARED32-NEXT: ret;
@@ -354,10 +358,10 @@ define void @cp_async_bulk_tensor_g2s_im2col_4d(ptr addrspace(7) %d, ptr addrspa
; CHECK-PTX64-NEXT: ld.param.b32 %r4, [cp_async_bulk_tensor_g2s_im2col_4d_param_6];
; CHECK-PTX64-NEXT: ld.param.b16 %rs1, [cp_async_bulk_tensor_g2s_im2col_4d_param_7];
; CHECK-PTX64-NEXT: ld.param.b16 %rs2, [cp_async_bulk_tensor_g2s_im2col_4d_param_8];
-; CHECK-PTX64-NEXT: cp.async.bulk.tensor.4d.shared::cluster.global.im2col.mbarrier::complete_tx::bytes [%rd1], [%rd3, {%r1, %r2, %r3, %r4}], [%rd2], {%rs1, %rs2};
+; CHECK-PTX64-NEXT: ld.param.b16 %rs3, [cp_async_bulk_tensor_g2s_im2col_4d_param_9];
; CHECK-PTX64-NEXT: ld.param.b64 %rd4, [cp_async_bulk_tensor_g2s_im2col_4d_param_10];
+; CHECK-PTX64-NEXT: cp.async.bulk.tensor.4d.shared::cluster.global.im2col.mbarrier::complete_tx::bytes [%rd1], [%rd3, {%r1, %r2, %r3, %r4}], [%rd2], {%rs1, %rs2};
; CHECK-PTX64-NEXT: cp.async.bulk.tensor.4d.shared::cluster.global.im2col.mbarrier::complete_tx::bytes.L2::cache_hint [%rd1], [%rd3, {%r1, %r2, %r3, %r4}], [%rd2], {%rs1, %rs2}, %rd4;
-; CHECK-PTX64-NEXT: ld.param.b16 %rs3, [cp_async_bulk_tensor_g2s_im2col_4d_param_9];
; CHECK-PTX64-NEXT: cp.async.bulk.tensor.4d.shared::cluster.global.im2col.mbarrier::complete_tx::bytes.multicast::cluster [%rd1], [%rd3, {%r1, %r2, %r3, %r4}], [%rd2], {%rs1, %rs2}, %rs3;
; CHECK-PTX64-NEXT: cp.async.bulk.tensor.4d.shared::cluster.global.im2col.mbarrier::complete_tx::bytes.multicast::cluster.L2::cache_hint [%rd1], [%rd3, {%r1, %r2, %r3, %r4}], [%rd2], {%rs1, %rs2}, %rs3, %rd4;
; CHECK-PTX64-NEXT: ret;
@@ -378,10 +382,10 @@ define void @cp_async_bulk_tensor_g2s_im2col_4d(ptr addrspace(7) %d, ptr addrspa
; CHECK-PTX-SHARED32-NEXT: ld.param.b32 %r6, [cp_async_bulk_tensor_g2s_im2col_4d_param_6];
; CHECK-PTX-SHARED32-NEXT: ld.param.b16 %rs1, [cp_async_bulk_tensor_g2s_im2col_4d_param_7];
; CHECK-PTX-SHARED32-NEXT: ld.param.b16 %rs2, [cp_async_bulk_tensor_g2s_im2col_4d_param_8];
-; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.4d.shared::cluster.global.im2col.mbarrier::complete_tx::bytes [%r1], [%rd1, {%r3, %r4, %r5, %r6}], [%r2], {%rs1, %rs2};
+; CHECK-PTX-SHARED32-NEXT: ld.param.b16 %rs3, [cp_async_bulk_tensor_g2s_im2col_4d_param_9];
; CHECK-PTX-SHARED32-NEXT: ld.param.b64 %rd2, [cp_async_bulk_tensor_g2s_im2col_4d_param_10];
+; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.4d.shared::cluster.global.im2col.mbarrier::complete_tx::bytes [%r1], [%rd1, {%r3, %r4, %r5, %r6}], [%r2], {%rs1, %rs2};
; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.4d.shared::cluster.global.im2col.mbarrier::complete_tx::bytes.L2::cache_hint [%r1], [%rd1, {%r3, %r4, %r5, %r6}], [%r2], {%rs1, %rs2}, %rd2;
-; CHECK-PTX-SHARED32-NEXT: ld.param.b16 %rs3, [cp_async_bulk_tensor_g2s_im2col_4d_param_9];
; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.4d.shared::cluster.global.im2col.mbarrier::complete_tx::bytes.multicast::cluster [%r1], [%rd1, {%r3, %r4, %r5, %r6}], [%r2], {%rs1, %rs2}, %rs3;
; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.4d.shared::cluster.global.im2col.mbarrier::complete_tx::bytes.multicast::cluster.L2::cache_hint [%r1], [%rd1, {%r3, %r4, %r5, %r6}], [%r2], {%rs1, %rs2}, %rs3, %rd2;
; CHECK-PTX-SHARED32-NEXT: ret;
@@ -415,10 +419,10 @@ define void @cp_async_bulk_tensor_g2s_im2col_5d(ptr addrspace(7) %d, ptr addrspa
; CHECK-PTX64-NEXT: ld.param.b16 %rs1, [cp_async_bulk_tensor_g2s_im2col_5d_param_8];
; CHECK-PTX64-NEXT: ld.param.b16 %rs2, [cp_async_bulk_tensor_g2s_im2col_5d_param_9];
; CHECK-PTX64-NEXT: ld.param.b16 %rs3, [cp_async_bulk_tensor_g2s_im2col_5d_param_10];
-; CHECK-PTX64-NEXT: cp.async.bulk.tensor.5d.shared::cluster.global.im2col.mbarrier::complete_tx::bytes [%rd1], [%rd3, {%r1, %r2, %r3, %r4, %r5}], [%rd2], {%rs1, %rs2, %rs3};
+; CHECK-PTX64-NEXT: ld.param.b16 %rs4, [cp_async_bulk_tensor_g2s_im2col_5d_param_11];
; CHECK-PTX64-NEXT: ld.param.b64 %rd4, [cp_async_bulk_tensor_g2s_im2col_5d_param_12];
+; CHECK-PTX64-NEXT: cp.async.bulk.tensor.5d.shared::cluster.global.im2col.mbarrier::complete_tx::bytes [%rd1], [%rd3, {%r1, %r2, %r3, %r4, %r5}], [%rd2], {%rs1, %rs2, %rs3};
; CHECK-PTX64-NEXT: cp.async.bulk.tensor.5d.shared::cluster.global.im2col.mbarrier::complete_tx::bytes.L2::cache_hint [%rd1], [%rd3, {%r1, %r2, %r3, %r4, %r5}], [%rd2], {%rs1, %rs2, %rs3}, %rd4;
-; CHECK-PTX64-NEXT: ld.param.b16 %rs4, [cp_async_bulk_tensor_g2s_im2col_5d_param_11];
; CHECK-PTX64-NEXT: cp.async.bulk.tensor.5d.shared::cluster.global.im2col.mbarrier::complete_tx::bytes.multicast::cluster [%rd1], [%rd3, {%r1, %r2, %r3, %r4, %r5}], [%rd2], {%rs1, %rs2, %rs3}, %rs4;
; CHECK-PTX64-NEXT: cp.async.bulk.tensor.5d.shared::cluster.global.im2col.mbarrier::complete_tx::bytes.multicast::cluster.L2::cache_hint [%rd1], [%rd3, {%r1, %r2, %r3, %r4, %r5}], [%rd2], {%rs1, %rs2, %rs3}, %rs4, %rd4;
; CHECK-PTX64-NEXT: ret;
@@ -441,10 +445,10 @@ define void @cp_async_bulk_tensor_g2s_im2col_5d(ptr addrspace(7) %d, ptr addrspa
; CHECK-PTX-SHARED32-NEXT: ld.param.b16 %rs1, [cp_async_bulk_tensor_g2s_im2col_5d_param_8];
; CHECK-PTX-SHARED32-NEXT: ld.param.b16 %rs2, [cp_async_bulk_tensor_g2s_im2col_5d_param_9];
; CHECK-PTX-SHARED32-NEXT: ld.param.b16 %rs3, [cp_async_bulk_tensor_g2s_im2col_5d_param_10];
-; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.5d.shared::cluster.global.im2col.mbarrier::complete_tx::bytes [%r1], [%rd1, {%r3, %r4, %r5, %r6, %r7}], [%r2], {%rs1, %rs2, %rs3};
+; CHECK-PTX-SHARED32-NEXT: ld.param.b16 %rs4, [cp_async_bulk_tensor_g2s_im2col_5d_param_11];
; CHECK-PTX-SHARED32-NEXT: ld.param.b64 %rd2, [cp_async_bulk_tensor_g2s_im2col_5d_param_12];
+; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.5d.shared::cluster.global.im2col.mbarrier::complete_tx::bytes [%r1], [%rd1, {%r3, %r4, %r5, %r6, %r7}], [%r2], {%rs1, %rs2, %rs3};
; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.5d.shared::cluster.global.im2col.mbarrier::complete_tx::bytes.L2::cache_hint [%r1], [%rd1, {%r3, %r4, %r5, %r6, %r7}], [%r2], {%rs1, %rs2, %rs3}, %rd2;
-; CHECK-PTX-SHARED32-NEXT: ld.param.b16 %rs4, [cp_async_bulk_tensor_g2s_im2col_5d_param_11];
; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.5d.shared::cluster.global.im2col.mbarrier::complete_tx::bytes.multicast::cluster [%r1], [%rd1, {%r3, %r4, %r5, %r6, %r7}], [%r2], {%rs1, %rs2, %rs3}, %rs4;
; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.5d.shared::cluster.global.im2col.mbarrier::complete_tx::bytes.multicast::cluster.L2::cache_hint [%r1], [%rd1, {%r3, %r4, %r5, %r6, %r7}], [%r2], {%rs1, %rs2, %rs3}, %rs4, %rd2;
; CHECK-PTX-SHARED32-NEXT: ret;
diff --git a/llvm/test/CodeGen/NVPTX/f16-ex2.ll b/llvm/test/CodeGen/NVPTX/f16-ex2.ll
index ee79f9d..af3fe67 100644
--- a/llvm/test/CodeGen/NVPTX/f16-ex2.ll
+++ b/llvm/test/CodeGen/NVPTX/f16-ex2.ll
@@ -1,12 +1,13 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc < %s -mcpu=sm_75 -mattr=+ptx70 | FileCheck --check-prefixes=CHECK-FP16 %s
-; RUN: %if ptxas-sm_75 && ptxas-isa-7.0 %{ llc < %s -mcpu=sm_75 -mattr=+ptx70 | %ptxas-verify -arch=sm_75 %}
+; RUN: llc < %s -mcpu=sm_90 -mattr=+ptx78 | FileCheck --check-prefixes=CHECK-FP16 %s
+; RUN: %if ptxas-sm_90 && ptxas-isa-7.8 %{ llc < %s -mcpu=sm_90 -mattr=+ptx78 | %ptxas-verify -arch=sm_90 %}
target triple = "nvptx64-nvidia-cuda"
declare half @llvm.nvvm.ex2.approx.f16(half)
-declare <2 x half> @llvm.nvvm.ex2.approx.f16x2(<2 x half>)
+declare <2 x half> @llvm.nvvm.ex2.approx.v2f16(<2 x half>)
+declare bfloat @llvm.nvvm.ex2.approx.ftz.bf16(bfloat)
+declare <2 x bfloat> @llvm.nvvm.ex2.approx.ftz.v2bf16(<2 x bfloat>)
-; CHECK-LABEL: ex2_half
define half @ex2_half(half %0) {
; CHECK-FP16-LABEL: ex2_half(
; CHECK-FP16: {
@@ -21,7 +22,6 @@ define half @ex2_half(half %0) {
ret half %res
}
-; CHECK-LABEL: ex2_2xhalf
define <2 x half> @ex2_2xhalf(<2 x half> %0) {
; CHECK-FP16-LABEL: ex2_2xhalf(
; CHECK-FP16: {
@@ -32,6 +32,34 @@ define <2 x half> @ex2_2xhalf(<2 x half> %0) {
; CHECK-FP16-NEXT: ex2.approx.f16x2 %r2, %r1;
; CHECK-FP16-NEXT: st.param.b32 [func_retval0], %r2;
; CHECK-FP16-NEXT: ret;
- %res = call <2 x half> @llvm.nvvm.ex2.approx.f16x2(<2 x half> %0)
+ %res = call <2 x half> @llvm.nvvm.ex2.approx.v2f16(<2 x half> %0)
ret <2 x half> %res
}
+
+define bfloat @ex2_bfloat(bfloat %0) {
+; CHECK-FP16-LABEL: ex2_bfloat(
+; CHECK-FP16: {
+; CHECK-FP16-NEXT: .reg .b16 %rs<3>;
+; CHECK-FP16-EMPTY:
+; CHECK-FP16-NEXT: // %bb.0:
+; CHECK-FP16-NEXT: ld.param.b16 %rs1, [ex2_bfloat_param_0];
+; CHECK-FP16-NEXT: ex2.approx.ftz.bf16 %rs2, %rs1;
+; CHECK-FP16-NEXT: st.param.b16 [func_retval0], %rs2;
+; CHECK-FP16-NEXT: ret;
+ %res = call bfloat @llvm.nvvm.ex2.approx.ftz.bf16(bfloat %0)
+ ret bfloat %res
+}
+
+define <2 x bfloat> @ex2_2xbfloat(<2 x bfloat> %0) {
+; CHECK-FP16-LABEL: ex2_2xbfloat(
+; CHECK-FP16: {
+; CHECK-FP16-NEXT: .reg .b32 %r<3>;
+; CHECK-FP16-EMPTY:
+; CHECK-FP16-NEXT: // %bb.0:
+; CHECK-FP16-NEXT: ld.param.b32 %r1, [ex2_2xbfloat_param_0];
+; CHECK-FP16-NEXT: ex2.approx.ftz.bf16x2 %r2, %r1;
+; CHECK-FP16-NEXT: st.param.b32 [func_retval0], %r2;
+; CHECK-FP16-NEXT: ret;
+ %res = call <2 x bfloat> @llvm.nvvm.ex2.approx.ftz.v2bf16(<2 x bfloat> %0)
+ ret <2 x bfloat> %res
+}
diff --git a/llvm/test/CodeGen/NVPTX/f32-ex2.ll b/llvm/test/CodeGen/NVPTX/f32-ex2.ll
index 796d80d..97b9d35 100644
--- a/llvm/test/CodeGen/NVPTX/f32-ex2.ll
+++ b/llvm/test/CodeGen/NVPTX/f32-ex2.ll
@@ -3,7 +3,8 @@
; RUN: %if ptxas-sm_50 && ptxas-isa-3.2 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_50 -mattr=+ptx32 | %ptxas-verify -arch=sm_50 %}
target triple = "nvptx-nvidia-cuda"
-declare float @llvm.nvvm.ex2.approx.f(float)
+declare float @llvm.nvvm.ex2.approx.f32(float)
+declare float @llvm.nvvm.ex2.approx.ftz.f32(float)
; CHECK-LABEL: ex2_float
define float @ex2_float(float %0) {
@@ -16,7 +17,7 @@ define float @ex2_float(float %0) {
; CHECK-NEXT: ex2.approx.f32 %r2, %r1;
; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
; CHECK-NEXT: ret;
- %res = call float @llvm.nvvm.ex2.approx.f(float %0)
+ %res = call float @llvm.nvvm.ex2.approx.f32(float %0)
ret float %res
}
@@ -31,6 +32,6 @@ define float @ex2_float_ftz(float %0) {
; CHECK-NEXT: ex2.approx.ftz.f32 %r2, %r1;
; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
; CHECK-NEXT: ret;
- %res = call float @llvm.nvvm.ex2.approx.ftz.f(float %0)
+ %res = call float @llvm.nvvm.ex2.approx.ftz.f32(float %0)
ret float %res
}
diff --git a/llvm/test/CodeGen/PowerPC/combine-sext-and-shl-after-isel.ll b/llvm/test/CodeGen/PowerPC/combine-sext-and-shl-after-isel.ll
index 00a77f9..530169f 100644
--- a/llvm/test/CodeGen/PowerPC/combine-sext-and-shl-after-isel.ll
+++ b/llvm/test/CodeGen/PowerPC/combine-sext-and-shl-after-isel.ll
@@ -212,37 +212,33 @@ define hidden void @testCaller(i1 %incond) local_unnamed_addr align 2 nounwind {
; CHECK-NEXT: std r30, 48(r1) # 8-byte Folded Spill
; CHECK-NEXT: andi. r3, r3, 1
; CHECK-NEXT: li r3, -1
+; CHECK-NEXT: li r4, 0
; CHECK-NEXT: li r30, 0
; CHECK-NEXT: crmove 4*cr2+lt, gt
; CHECK-NEXT: std r29, 40(r1) # 8-byte Folded Spill
; CHECK-NEXT: b .LBB3_2
-; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB3_1: # %if.end116
; CHECK-NEXT: #
; CHECK-NEXT: bl callee
; CHECK-NEXT: nop
; CHECK-NEXT: mr r3, r29
-; CHECK-NEXT: .LBB3_2: # %cond.end.i.i
-; CHECK-NEXT: # =>This Loop Header: Depth=1
-; CHECK-NEXT: # Child Loop BB3_3 Depth 2
-; CHECK-NEXT: lwz r29, 0(r3)
-; CHECK-NEXT: li r5, 0
-; CHECK-NEXT: extsw r4, r29
-; CHECK-NEXT: .p2align 5
-; CHECK-NEXT: .LBB3_3: # %while.body5.i
-; CHECK-NEXT: # Parent Loop BB3_2 Depth=1
-; CHECK-NEXT: # => This Inner Loop Header: Depth=2
-; CHECK-NEXT: addi r5, r5, -1
-; CHECK-NEXT: cmpwi r5, 0
-; CHECK-NEXT: bgt cr0, .LBB3_3
-; CHECK-NEXT: # %bb.4: # %while.cond12.preheader.i
+; CHECK-NEXT: li r4, 0
+; CHECK-NEXT: .p2align 4
+; CHECK-NEXT: .LBB3_2: # %while.body5.i
; CHECK-NEXT: #
+; CHECK-NEXT: addi r4, r4, -1
+; CHECK-NEXT: cmpwi r4, 0
+; CHECK-NEXT: bgt cr0, .LBB3_2
+; CHECK-NEXT: # %bb.3: # %while.cond12.preheader.i
+; CHECK-NEXT: #
+; CHECK-NEXT: lwz r29, 0(r3)
; CHECK-NEXT: bc 12, 4*cr2+lt, .LBB3_1
-; CHECK-NEXT: # %bb.5: # %for.cond99.preheader
+; CHECK-NEXT: # %bb.4: # %for.cond99.preheader
; CHECK-NEXT: #
+; CHECK-NEXT: extsw r4, r29
; CHECK-NEXT: ld r5, 0(r3)
-; CHECK-NEXT: sldi r4, r4, 2
; CHECK-NEXT: stw r3, 0(r3)
+; CHECK-NEXT: sldi r4, r4, 2
; CHECK-NEXT: stwx r30, r5, r4
; CHECK-NEXT: b .LBB3_1
;
@@ -256,37 +252,33 @@ define hidden void @testCaller(i1 %incond) local_unnamed_addr align 2 nounwind {
; CHECK-BE-NEXT: std r30, 64(r1) # 8-byte Folded Spill
; CHECK-BE-NEXT: andi. r3, r3, 1
; CHECK-BE-NEXT: li r3, -1
+; CHECK-BE-NEXT: li r4, 0
; CHECK-BE-NEXT: li r30, 0
; CHECK-BE-NEXT: crmove 4*cr2+lt, gt
; CHECK-BE-NEXT: std r29, 56(r1) # 8-byte Folded Spill
; CHECK-BE-NEXT: b .LBB3_2
-; CHECK-BE-NEXT: .p2align 4
; CHECK-BE-NEXT: .LBB3_1: # %if.end116
; CHECK-BE-NEXT: #
; CHECK-BE-NEXT: bl callee
; CHECK-BE-NEXT: nop
; CHECK-BE-NEXT: mr r3, r29
-; CHECK-BE-NEXT: .LBB3_2: # %cond.end.i.i
-; CHECK-BE-NEXT: # =>This Loop Header: Depth=1
-; CHECK-BE-NEXT: # Child Loop BB3_3 Depth 2
-; CHECK-BE-NEXT: lwz r29, 0(r3)
-; CHECK-BE-NEXT: li r5, 0
-; CHECK-BE-NEXT: extsw r4, r29
-; CHECK-BE-NEXT: .p2align 5
-; CHECK-BE-NEXT: .LBB3_3: # %while.body5.i
-; CHECK-BE-NEXT: # Parent Loop BB3_2 Depth=1
-; CHECK-BE-NEXT: # => This Inner Loop Header: Depth=2
-; CHECK-BE-NEXT: addi r5, r5, -1
-; CHECK-BE-NEXT: cmpwi r5, 0
-; CHECK-BE-NEXT: bgt cr0, .LBB3_3
-; CHECK-BE-NEXT: # %bb.4: # %while.cond12.preheader.i
+; CHECK-BE-NEXT: li r4, 0
+; CHECK-BE-NEXT: .p2align 4
+; CHECK-BE-NEXT: .LBB3_2: # %while.body5.i
+; CHECK-BE-NEXT: #
+; CHECK-BE-NEXT: addi r4, r4, -1
+; CHECK-BE-NEXT: cmpwi r4, 0
+; CHECK-BE-NEXT: bgt cr0, .LBB3_2
+; CHECK-BE-NEXT: # %bb.3: # %while.cond12.preheader.i
; CHECK-BE-NEXT: #
+; CHECK-BE-NEXT: lwz r29, 0(r3)
; CHECK-BE-NEXT: bc 12, 4*cr2+lt, .LBB3_1
-; CHECK-BE-NEXT: # %bb.5: # %for.cond99.preheader
+; CHECK-BE-NEXT: # %bb.4: # %for.cond99.preheader
; CHECK-BE-NEXT: #
+; CHECK-BE-NEXT: extsw r4, r29
; CHECK-BE-NEXT: ld r5, 0(r3)
-; CHECK-BE-NEXT: sldi r4, r4, 2
; CHECK-BE-NEXT: stw r3, 0(r3)
+; CHECK-BE-NEXT: sldi r4, r4, 2
; CHECK-BE-NEXT: stwx r30, r5, r4
; CHECK-BE-NEXT: b .LBB3_1
;
@@ -300,32 +292,28 @@ define hidden void @testCaller(i1 %incond) local_unnamed_addr align 2 nounwind {
; CHECK-P9-NEXT: std r0, 80(r1)
; CHECK-P9-NEXT: std r30, 48(r1) # 8-byte Folded Spill
; CHECK-P9-NEXT: li r3, -1
+; CHECK-P9-NEXT: li r4, 0
; CHECK-P9-NEXT: li r30, 0
; CHECK-P9-NEXT: std r29, 40(r1) # 8-byte Folded Spill
; CHECK-P9-NEXT: crmove 4*cr2+lt, gt
; CHECK-P9-NEXT: b .LBB3_2
-; CHECK-P9-NEXT: .p2align 4
; CHECK-P9-NEXT: .LBB3_1: # %if.end116
; CHECK-P9-NEXT: #
; CHECK-P9-NEXT: bl callee
; CHECK-P9-NEXT: nop
; CHECK-P9-NEXT: mr r3, r29
-; CHECK-P9-NEXT: .LBB3_2: # %cond.end.i.i
-; CHECK-P9-NEXT: # =>This Loop Header: Depth=1
-; CHECK-P9-NEXT: # Child Loop BB3_3 Depth 2
-; CHECK-P9-NEXT: lwz r29, 0(r3)
; CHECK-P9-NEXT: li r4, 0
-; CHECK-P9-NEXT: .p2align 5
-; CHECK-P9-NEXT: .LBB3_3: # %while.body5.i
-; CHECK-P9-NEXT: # Parent Loop BB3_2 Depth=1
-; CHECK-P9-NEXT: # => This Inner Loop Header: Depth=2
+; CHECK-P9-NEXT: .p2align 4
+; CHECK-P9-NEXT: .LBB3_2: # %while.body5.i
+; CHECK-P9-NEXT: #
; CHECK-P9-NEXT: addi r4, r4, -1
; CHECK-P9-NEXT: cmpwi r4, 0
-; CHECK-P9-NEXT: bgt cr0, .LBB3_3
-; CHECK-P9-NEXT: # %bb.4: # %while.cond12.preheader.i
+; CHECK-P9-NEXT: bgt cr0, .LBB3_2
+; CHECK-P9-NEXT: # %bb.3: # %while.cond12.preheader.i
; CHECK-P9-NEXT: #
+; CHECK-P9-NEXT: lwz r29, 0(r3)
; CHECK-P9-NEXT: bc 12, 4*cr2+lt, .LBB3_1
-; CHECK-P9-NEXT: # %bb.5: # %for.cond99.preheader
+; CHECK-P9-NEXT: # %bb.4: # %for.cond99.preheader
; CHECK-P9-NEXT: #
; CHECK-P9-NEXT: ld r4, 0(r3)
; CHECK-P9-NEXT: extswsli r5, r29, 2
@@ -343,32 +331,28 @@ define hidden void @testCaller(i1 %incond) local_unnamed_addr align 2 nounwind {
; CHECK-P9-BE-NEXT: std r0, 96(r1)
; CHECK-P9-BE-NEXT: std r30, 64(r1) # 8-byte Folded Spill
; CHECK-P9-BE-NEXT: li r3, -1
+; CHECK-P9-BE-NEXT: li r4, 0
; CHECK-P9-BE-NEXT: li r30, 0
; CHECK-P9-BE-NEXT: std r29, 56(r1) # 8-byte Folded Spill
; CHECK-P9-BE-NEXT: crmove 4*cr2+lt, gt
; CHECK-P9-BE-NEXT: b .LBB3_2
-; CHECK-P9-BE-NEXT: .p2align 4
; CHECK-P9-BE-NEXT: .LBB3_1: # %if.end116
; CHECK-P9-BE-NEXT: #
; CHECK-P9-BE-NEXT: bl callee
; CHECK-P9-BE-NEXT: nop
; CHECK-P9-BE-NEXT: mr r3, r29
-; CHECK-P9-BE-NEXT: .LBB3_2: # %cond.end.i.i
-; CHECK-P9-BE-NEXT: # =>This Loop Header: Depth=1
-; CHECK-P9-BE-NEXT: # Child Loop BB3_3 Depth 2
-; CHECK-P9-BE-NEXT: lwz r29, 0(r3)
; CHECK-P9-BE-NEXT: li r4, 0
-; CHECK-P9-BE-NEXT: .p2align 5
-; CHECK-P9-BE-NEXT: .LBB3_3: # %while.body5.i
-; CHECK-P9-BE-NEXT: # Parent Loop BB3_2 Depth=1
-; CHECK-P9-BE-NEXT: # => This Inner Loop Header: Depth=2
+; CHECK-P9-BE-NEXT: .p2align 4
+; CHECK-P9-BE-NEXT: .LBB3_2: # %while.body5.i
+; CHECK-P9-BE-NEXT: #
; CHECK-P9-BE-NEXT: addi r4, r4, -1
; CHECK-P9-BE-NEXT: cmpwi r4, 0
-; CHECK-P9-BE-NEXT: bgt cr0, .LBB3_3
-; CHECK-P9-BE-NEXT: # %bb.4: # %while.cond12.preheader.i
+; CHECK-P9-BE-NEXT: bgt cr0, .LBB3_2
+; CHECK-P9-BE-NEXT: # %bb.3: # %while.cond12.preheader.i
; CHECK-P9-BE-NEXT: #
+; CHECK-P9-BE-NEXT: lwz r29, 0(r3)
; CHECK-P9-BE-NEXT: bc 12, 4*cr2+lt, .LBB3_1
-; CHECK-P9-BE-NEXT: # %bb.5: # %for.cond99.preheader
+; CHECK-P9-BE-NEXT: # %bb.4: # %for.cond99.preheader
; CHECK-P9-BE-NEXT: #
; CHECK-P9-BE-NEXT: ld r4, 0(r3)
; CHECK-P9-BE-NEXT: extswsli r5, r29, 2
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/rvv/vse.ll b/llvm/test/CodeGen/RISCV/GlobalISel/rvv/vse.ll
new file mode 100644
index 0000000..785d9fc
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/rvv/vse.ll
@@ -0,0 +1,1575 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvfhmin,+zvfbfmin \
+; RUN: -global-isel -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvfhmin,+zvfbfmin \
+; RUN: -global-isel -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+declare void @llvm.riscv.vse.nxv1i64(
+ <vscale x 1 x i64>,
+ ptr,
+ iXLen);
+
+define void @intrinsic_vse_v_nxv1i64_nxv1i64(<vscale x 1 x i64> %0, ptr %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vse_v_nxv1i64_nxv1i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-NEXT: vse64.v v8, (a0)
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.nxv1i64(
+ <vscale x 1 x i64> %0,
+ ptr %1,
+ iXLen %2)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.mask.nxv1i64(
+ <vscale x 1 x i64>,
+ ptr,
+ <vscale x 1 x i1>,
+ iXLen);
+
+define void @intrinsic_vse_mask_v_nxv1i64_nxv1i64(<vscale x 1 x i64> %0, ptr %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vse_mask_v_nxv1i64_nxv1i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-NEXT: vse64.v v8, (a0), v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.mask.nxv1i64(
+ <vscale x 1 x i64> %0,
+ ptr %1,
+ <vscale x 1 x i1> %2,
+ iXLen %3)
+
+ ret void
+}
+
+define void @intrinsic_vse_allonesmask_v_nxv1i64_nxv1i64(<vscale x 1 x i64> %0, ptr %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vse_allonesmask_v_nxv1i64_nxv1i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-NEXT: vse64.v v8, (a0)
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.mask.nxv1i64(
+ <vscale x 1 x i64> %0,
+ ptr %1,
+ <vscale x 1 x i1> splat (i1 true),
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.nxv2i64(
+ <vscale x 2 x i64>,
+ ptr,
+ iXLen);
+
+define void @intrinsic_vse_v_nxv2i64_nxv2i64(<vscale x 2 x i64> %0, ptr %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vse_v_nxv2i64_nxv2i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-NEXT: vse64.v v8, (a0)
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.nxv2i64(
+ <vscale x 2 x i64> %0,
+ ptr %1,
+ iXLen %2)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.mask.nxv2i64(
+ <vscale x 2 x i64>,
+ ptr,
+ <vscale x 2 x i1>,
+ iXLen);
+
+define void @intrinsic_vse_mask_v_nxv2i64_nxv2i64(<vscale x 2 x i64> %0, ptr %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vse_mask_v_nxv2i64_nxv2i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-NEXT: vse64.v v8, (a0), v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.mask.nxv2i64(
+ <vscale x 2 x i64> %0,
+ ptr %1,
+ <vscale x 2 x i1> %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.nxv4i64(
+ <vscale x 4 x i64>,
+ ptr,
+ iXLen);
+
+define void @intrinsic_vse_v_nxv4i64_nxv4i64(<vscale x 4 x i64> %0, ptr %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vse_v_nxv4i64_nxv4i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-NEXT: vse64.v v8, (a0)
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.nxv4i64(
+ <vscale x 4 x i64> %0,
+ ptr %1,
+ iXLen %2)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.mask.nxv4i64(
+ <vscale x 4 x i64>,
+ ptr,
+ <vscale x 4 x i1>,
+ iXLen);
+
+define void @intrinsic_vse_mask_v_nxv4i64_nxv4i64(<vscale x 4 x i64> %0, ptr %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vse_mask_v_nxv4i64_nxv4i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-NEXT: vse64.v v8, (a0), v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.mask.nxv4i64(
+ <vscale x 4 x i64> %0,
+ ptr %1,
+ <vscale x 4 x i1> %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.nxv8i64(
+ <vscale x 8 x i64>,
+ ptr,
+ iXLen);
+
+define void @intrinsic_vse_v_nxv8i64_nxv8i64(<vscale x 8 x i64> %0, ptr %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vse_v_nxv8i64_nxv8i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-NEXT: vse64.v v8, (a0)
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.nxv8i64(
+ <vscale x 8 x i64> %0,
+ ptr %1,
+ iXLen %2)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.mask.nxv8i64(
+ <vscale x 8 x i64>,
+ ptr,
+ <vscale x 8 x i1>,
+ iXLen);
+
+define void @intrinsic_vse_mask_v_nxv8i64_nxv8i64(<vscale x 8 x i64> %0, ptr %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vse_mask_v_nxv8i64_nxv8i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-NEXT: vse64.v v8, (a0), v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.mask.nxv8i64(
+ <vscale x 8 x i64> %0,
+ ptr %1,
+ <vscale x 8 x i1> %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.nxv1f64(
+ <vscale x 1 x double>,
+ ptr,
+ iXLen);
+
+define void @intrinsic_vse_v_nxv1f64_nxv1f64(<vscale x 1 x double> %0, ptr %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vse_v_nxv1f64_nxv1f64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-NEXT: vse64.v v8, (a0)
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.nxv1f64(
+ <vscale x 1 x double> %0,
+ ptr %1,
+ iXLen %2)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.mask.nxv1f64(
+ <vscale x 1 x double>,
+ ptr,
+ <vscale x 1 x i1>,
+ iXLen);
+
+define void @intrinsic_vse_mask_v_nxv1f64_nxv1f64(<vscale x 1 x double> %0, ptr %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vse_mask_v_nxv1f64_nxv1f64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-NEXT: vse64.v v8, (a0), v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.mask.nxv1f64(
+ <vscale x 1 x double> %0,
+ ptr %1,
+ <vscale x 1 x i1> %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.nxv2f64(
+ <vscale x 2 x double>,
+ ptr,
+ iXLen);
+
+define void @intrinsic_vse_v_nxv2f64_nxv2f64(<vscale x 2 x double> %0, ptr %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vse_v_nxv2f64_nxv2f64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-NEXT: vse64.v v8, (a0)
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.nxv2f64(
+ <vscale x 2 x double> %0,
+ ptr %1,
+ iXLen %2)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.mask.nxv2f64(
+ <vscale x 2 x double>,
+ ptr,
+ <vscale x 2 x i1>,
+ iXLen);
+
+define void @intrinsic_vse_mask_v_nxv2f64_nxv2f64(<vscale x 2 x double> %0, ptr %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vse_mask_v_nxv2f64_nxv2f64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-NEXT: vse64.v v8, (a0), v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.mask.nxv2f64(
+ <vscale x 2 x double> %0,
+ ptr %1,
+ <vscale x 2 x i1> %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.nxv4f64(
+ <vscale x 4 x double>,
+ ptr,
+ iXLen);
+
+define void @intrinsic_vse_v_nxv4f64_nxv4f64(<vscale x 4 x double> %0, ptr %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vse_v_nxv4f64_nxv4f64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-NEXT: vse64.v v8, (a0)
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.nxv4f64(
+ <vscale x 4 x double> %0,
+ ptr %1,
+ iXLen %2)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.mask.nxv4f64(
+ <vscale x 4 x double>,
+ ptr,
+ <vscale x 4 x i1>,
+ iXLen);
+
+define void @intrinsic_vse_mask_v_nxv4f64_nxv4f64(<vscale x 4 x double> %0, ptr %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vse_mask_v_nxv4f64_nxv4f64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-NEXT: vse64.v v8, (a0), v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.mask.nxv4f64(
+ <vscale x 4 x double> %0,
+ ptr %1,
+ <vscale x 4 x i1> %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.nxv8f64(
+ <vscale x 8 x double>,
+ ptr,
+ iXLen);
+
+define void @intrinsic_vse_v_nxv8f64_nxv8f64(<vscale x 8 x double> %0, ptr %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vse_v_nxv8f64_nxv8f64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-NEXT: vse64.v v8, (a0)
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.nxv8f64(
+ <vscale x 8 x double> %0,
+ ptr %1,
+ iXLen %2)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.mask.nxv8f64(
+ <vscale x 8 x double>,
+ ptr,
+ <vscale x 8 x i1>,
+ iXLen);
+
+define void @intrinsic_vse_mask_v_nxv8f64_nxv8f64(<vscale x 8 x double> %0, ptr %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vse_mask_v_nxv8f64_nxv8f64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-NEXT: vse64.v v8, (a0), v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.mask.nxv8f64(
+ <vscale x 8 x double> %0,
+ ptr %1,
+ <vscale x 8 x i1> %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.nxv1i32(
+ <vscale x 1 x i32>,
+ ptr,
+ iXLen);
+
+define void @intrinsic_vse_v_nxv1i32_nxv1i32(<vscale x 1 x i32> %0, ptr %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vse_v_nxv1i32_nxv1i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-NEXT: vse32.v v8, (a0)
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.nxv1i32(
+ <vscale x 1 x i32> %0,
+ ptr %1,
+ iXLen %2)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.mask.nxv1i32(
+ <vscale x 1 x i32>,
+ ptr,
+ <vscale x 1 x i1>,
+ iXLen);
+
+define void @intrinsic_vse_mask_v_nxv1i32_nxv1i32(<vscale x 1 x i32> %0, ptr %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vse_mask_v_nxv1i32_nxv1i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-NEXT: vse32.v v8, (a0), v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.mask.nxv1i32(
+ <vscale x 1 x i32> %0,
+ ptr %1,
+ <vscale x 1 x i1> %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.nxv2i32(
+ <vscale x 2 x i32>,
+ ptr,
+ iXLen);
+
+define void @intrinsic_vse_v_nxv2i32_nxv2i32(<vscale x 2 x i32> %0, ptr %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vse_v_nxv2i32_nxv2i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-NEXT: vse32.v v8, (a0)
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.nxv2i32(
+ <vscale x 2 x i32> %0,
+ ptr %1,
+ iXLen %2)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.mask.nxv2i32(
+ <vscale x 2 x i32>,
+ ptr,
+ <vscale x 2 x i1>,
+ iXLen);
+
+define void @intrinsic_vse_mask_v_nxv2i32_nxv2i32(<vscale x 2 x i32> %0, ptr %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vse_mask_v_nxv2i32_nxv2i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-NEXT: vse32.v v8, (a0), v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.mask.nxv2i32(
+ <vscale x 2 x i32> %0,
+ ptr %1,
+ <vscale x 2 x i1> %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.nxv4i32(
+ <vscale x 4 x i32>,
+ ptr,
+ iXLen);
+
+define void @intrinsic_vse_v_nxv4i32_nxv4i32(<vscale x 4 x i32> %0, ptr %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vse_v_nxv4i32_nxv4i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-NEXT: vse32.v v8, (a0)
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.nxv4i32(
+ <vscale x 4 x i32> %0,
+ ptr %1,
+ iXLen %2)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.mask.nxv4i32(
+ <vscale x 4 x i32>,
+ ptr,
+ <vscale x 4 x i1>,
+ iXLen);
+
+define void @intrinsic_vse_mask_v_nxv4i32_nxv4i32(<vscale x 4 x i32> %0, ptr %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vse_mask_v_nxv4i32_nxv4i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-NEXT: vse32.v v8, (a0), v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.mask.nxv4i32(
+ <vscale x 4 x i32> %0,
+ ptr %1,
+ <vscale x 4 x i1> %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.nxv8i32(
+ <vscale x 8 x i32>,
+ ptr,
+ iXLen);
+
+define void @intrinsic_vse_v_nxv8i32_nxv8i32(<vscale x 8 x i32> %0, ptr %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vse_v_nxv8i32_nxv8i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-NEXT: vse32.v v8, (a0)
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.nxv8i32(
+ <vscale x 8 x i32> %0,
+ ptr %1,
+ iXLen %2)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.mask.nxv8i32(
+ <vscale x 8 x i32>,
+ ptr,
+ <vscale x 8 x i1>,
+ iXLen);
+
+define void @intrinsic_vse_mask_v_nxv8i32_nxv8i32(<vscale x 8 x i32> %0, ptr %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vse_mask_v_nxv8i32_nxv8i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-NEXT: vse32.v v8, (a0), v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.mask.nxv8i32(
+ <vscale x 8 x i32> %0,
+ ptr %1,
+ <vscale x 8 x i1> %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.nxv16i32(
+ <vscale x 16 x i32>,
+ ptr,
+ iXLen);
+
+define void @intrinsic_vse_v_nxv16i32_nxv16i32(<vscale x 16 x i32> %0, ptr %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vse_v_nxv16i32_nxv16i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-NEXT: vse32.v v8, (a0)
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.nxv16i32(
+ <vscale x 16 x i32> %0,
+ ptr %1,
+ iXLen %2)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.mask.nxv16i32(
+ <vscale x 16 x i32>,
+ ptr,
+ <vscale x 16 x i1>,
+ iXLen);
+
+define void @intrinsic_vse_mask_v_nxv16i32_nxv16i32(<vscale x 16 x i32> %0, ptr %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vse_mask_v_nxv16i32_nxv16i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-NEXT: vse32.v v8, (a0), v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.mask.nxv16i32(
+ <vscale x 16 x i32> %0,
+ ptr %1,
+ <vscale x 16 x i1> %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.nxv1f32(
+ <vscale x 1 x float>,
+ ptr,
+ iXLen);
+
+define void @intrinsic_vse_v_nxv1f32_nxv1f32(<vscale x 1 x float> %0, ptr %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vse_v_nxv1f32_nxv1f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-NEXT: vse32.v v8, (a0)
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.nxv1f32(
+ <vscale x 1 x float> %0,
+ ptr %1,
+ iXLen %2)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.mask.nxv1f32(
+ <vscale x 1 x float>,
+ ptr,
+ <vscale x 1 x i1>,
+ iXLen);
+
+define void @intrinsic_vse_mask_v_nxv1f32_nxv1f32(<vscale x 1 x float> %0, ptr %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vse_mask_v_nxv1f32_nxv1f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-NEXT: vse32.v v8, (a0), v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.mask.nxv1f32(
+ <vscale x 1 x float> %0,
+ ptr %1,
+ <vscale x 1 x i1> %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.nxv2f32(
+ <vscale x 2 x float>,
+ ptr,
+ iXLen);
+
+define void @intrinsic_vse_v_nxv2f32_nxv2f32(<vscale x 2 x float> %0, ptr %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vse_v_nxv2f32_nxv2f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-NEXT: vse32.v v8, (a0)
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.nxv2f32(
+ <vscale x 2 x float> %0,
+ ptr %1,
+ iXLen %2)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.mask.nxv2f32(
+ <vscale x 2 x float>,
+ ptr,
+ <vscale x 2 x i1>,
+ iXLen);
+
+define void @intrinsic_vse_mask_v_nxv2f32_nxv2f32(<vscale x 2 x float> %0, ptr %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vse_mask_v_nxv2f32_nxv2f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-NEXT: vse32.v v8, (a0), v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.mask.nxv2f32(
+ <vscale x 2 x float> %0,
+ ptr %1,
+ <vscale x 2 x i1> %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.nxv4f32(
+ <vscale x 4 x float>,
+ ptr,
+ iXLen);
+
+define void @intrinsic_vse_v_nxv4f32_nxv4f32(<vscale x 4 x float> %0, ptr %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vse_v_nxv4f32_nxv4f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-NEXT: vse32.v v8, (a0)
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.nxv4f32(
+ <vscale x 4 x float> %0,
+ ptr %1,
+ iXLen %2)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.mask.nxv4f32(
+ <vscale x 4 x float>,
+ ptr,
+ <vscale x 4 x i1>,
+ iXLen);
+
+define void @intrinsic_vse_mask_v_nxv4f32_nxv4f32(<vscale x 4 x float> %0, ptr %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vse_mask_v_nxv4f32_nxv4f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-NEXT: vse32.v v8, (a0), v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.mask.nxv4f32(
+ <vscale x 4 x float> %0,
+ ptr %1,
+ <vscale x 4 x i1> %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.nxv8f32(
+ <vscale x 8 x float>,
+ ptr,
+ iXLen);
+
+define void @intrinsic_vse_v_nxv8f32_nxv8f32(<vscale x 8 x float> %0, ptr %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vse_v_nxv8f32_nxv8f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-NEXT: vse32.v v8, (a0)
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.nxv8f32(
+ <vscale x 8 x float> %0,
+ ptr %1,
+ iXLen %2)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.mask.nxv8f32(
+ <vscale x 8 x float>,
+ ptr,
+ <vscale x 8 x i1>,
+ iXLen);
+
+define void @intrinsic_vse_mask_v_nxv8f32_nxv8f32(<vscale x 8 x float> %0, ptr %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vse_mask_v_nxv8f32_nxv8f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-NEXT: vse32.v v8, (a0), v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.mask.nxv8f32(
+ <vscale x 8 x float> %0,
+ ptr %1,
+ <vscale x 8 x i1> %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.nxv16f32(
+ <vscale x 16 x float>,
+ ptr,
+ iXLen);
+
+define void @intrinsic_vse_v_nxv16f32_nxv16f32(<vscale x 16 x float> %0, ptr %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vse_v_nxv16f32_nxv16f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-NEXT: vse32.v v8, (a0)
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.nxv16f32(
+ <vscale x 16 x float> %0,
+ ptr %1,
+ iXLen %2)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.mask.nxv16f32(
+ <vscale x 16 x float>,
+ ptr,
+ <vscale x 16 x i1>,
+ iXLen);
+
+define void @intrinsic_vse_mask_v_nxv16f32_nxv16f32(<vscale x 16 x float> %0, ptr %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vse_mask_v_nxv16f32_nxv16f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-NEXT: vse32.v v8, (a0), v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.mask.nxv16f32(
+ <vscale x 16 x float> %0,
+ ptr %1,
+ <vscale x 16 x i1> %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.nxv1i16(
+ <vscale x 1 x i16>,
+ ptr,
+ iXLen);
+
+define void @intrinsic_vse_v_nxv1i16_nxv1i16(<vscale x 1 x i16> %0, ptr %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vse_v_nxv1i16_nxv1i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.nxv1i16(
+ <vscale x 1 x i16> %0,
+ ptr %1,
+ iXLen %2)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.mask.nxv1i16(
+ <vscale x 1 x i16>,
+ ptr,
+ <vscale x 1 x i1>,
+ iXLen);
+
+define void @intrinsic_vse_mask_v_nxv1i16_nxv1i16(<vscale x 1 x i16> %0, ptr %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vse_mask_v_nxv1i16_nxv1i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0), v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.mask.nxv1i16(
+ <vscale x 1 x i16> %0,
+ ptr %1,
+ <vscale x 1 x i1> %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.nxv2i16(
+ <vscale x 2 x i16>,
+ ptr,
+ iXLen);
+
+define void @intrinsic_vse_v_nxv2i16_nxv2i16(<vscale x 2 x i16> %0, ptr %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vse_v_nxv2i16_nxv2i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.nxv2i16(
+ <vscale x 2 x i16> %0,
+ ptr %1,
+ iXLen %2)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.mask.nxv2i16(
+ <vscale x 2 x i16>,
+ ptr,
+ <vscale x 2 x i1>,
+ iXLen);
+
+define void @intrinsic_vse_mask_v_nxv2i16_nxv2i16(<vscale x 2 x i16> %0, ptr %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vse_mask_v_nxv2i16_nxv2i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0), v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.mask.nxv2i16(
+ <vscale x 2 x i16> %0,
+ ptr %1,
+ <vscale x 2 x i1> %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.nxv4i16(
+ <vscale x 4 x i16>,
+ ptr,
+ iXLen);
+
+define void @intrinsic_vse_v_nxv4i16_nxv4i16(<vscale x 4 x i16> %0, ptr %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vse_v_nxv4i16_nxv4i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.nxv4i16(
+ <vscale x 4 x i16> %0,
+ ptr %1,
+ iXLen %2)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.mask.nxv4i16(
+ <vscale x 4 x i16>,
+ ptr,
+ <vscale x 4 x i1>,
+ iXLen);
+
+define void @intrinsic_vse_mask_v_nxv4i16_nxv4i16(<vscale x 4 x i16> %0, ptr %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vse_mask_v_nxv4i16_nxv4i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0), v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.mask.nxv4i16(
+ <vscale x 4 x i16> %0,
+ ptr %1,
+ <vscale x 4 x i1> %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.nxv8i16(
+ <vscale x 8 x i16>,
+ ptr,
+ iXLen);
+
+define void @intrinsic_vse_v_nxv8i16_nxv8i16(<vscale x 8 x i16> %0, ptr %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vse_v_nxv8i16_nxv8i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.nxv8i16(
+ <vscale x 8 x i16> %0,
+ ptr %1,
+ iXLen %2)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.mask.nxv8i16(
+ <vscale x 8 x i16>,
+ ptr,
+ <vscale x 8 x i1>,
+ iXLen);
+
+define void @intrinsic_vse_mask_v_nxv8i16_nxv8i16(<vscale x 8 x i16> %0, ptr %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vse_mask_v_nxv8i16_nxv8i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0), v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.mask.nxv8i16(
+ <vscale x 8 x i16> %0,
+ ptr %1,
+ <vscale x 8 x i1> %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.nxv16i16(
+ <vscale x 16 x i16>,
+ ptr,
+ iXLen);
+
+define void @intrinsic_vse_v_nxv16i16_nxv16i16(<vscale x 16 x i16> %0, ptr %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vse_v_nxv16i16_nxv16i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.nxv16i16(
+ <vscale x 16 x i16> %0,
+ ptr %1,
+ iXLen %2)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.mask.nxv16i16(
+ <vscale x 16 x i16>,
+ ptr,
+ <vscale x 16 x i1>,
+ iXLen);
+
+define void @intrinsic_vse_mask_v_nxv16i16_nxv16i16(<vscale x 16 x i16> %0, ptr %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vse_mask_v_nxv16i16_nxv16i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0), v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.mask.nxv16i16(
+ <vscale x 16 x i16> %0,
+ ptr %1,
+ <vscale x 16 x i1> %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.nxv32i16(
+ <vscale x 32 x i16>,
+ ptr,
+ iXLen);
+
+define void @intrinsic_vse_v_nxv32i16_nxv32i16(<vscale x 32 x i16> %0, ptr %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vse_v_nxv32i16_nxv32i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.nxv32i16(
+ <vscale x 32 x i16> %0,
+ ptr %1,
+ iXLen %2)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.mask.nxv32i16(
+ <vscale x 32 x i16>,
+ ptr,
+ <vscale x 32 x i1>,
+ iXLen);
+
+define void @intrinsic_vse_mask_v_nxv32i16_nxv32i16(<vscale x 32 x i16> %0, ptr %1, <vscale x 32 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vse_mask_v_nxv32i16_nxv32i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0), v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.mask.nxv32i16(
+ <vscale x 32 x i16> %0,
+ ptr %1,
+ <vscale x 32 x i1> %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.nxv1f16(
+ <vscale x 1 x half>,
+ ptr,
+ iXLen);
+
+define void @intrinsic_vse_v_nxv1f16_nxv1f16(<vscale x 1 x half> %0, ptr %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vse_v_nxv1f16_nxv1f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.nxv1f16(
+ <vscale x 1 x half> %0,
+ ptr %1,
+ iXLen %2)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.mask.nxv1f16(
+ <vscale x 1 x half>,
+ ptr,
+ <vscale x 1 x i1>,
+ iXLen);
+
+define void @intrinsic_vse_mask_v_nxv1f16_nxv1f16(<vscale x 1 x half> %0, ptr %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vse_mask_v_nxv1f16_nxv1f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0), v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.mask.nxv1f16(
+ <vscale x 1 x half> %0,
+ ptr %1,
+ <vscale x 1 x i1> %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.nxv2f16(
+ <vscale x 2 x half>,
+ ptr,
+ iXLen);
+
+define void @intrinsic_vse_v_nxv2f16_nxv2f16(<vscale x 2 x half> %0, ptr %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vse_v_nxv2f16_nxv2f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.nxv2f16(
+ <vscale x 2 x half> %0,
+ ptr %1,
+ iXLen %2)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.mask.nxv2f16(
+ <vscale x 2 x half>,
+ ptr,
+ <vscale x 2 x i1>,
+ iXLen);
+
+define void @intrinsic_vse_mask_v_nxv2f16_nxv2f16(<vscale x 2 x half> %0, ptr %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vse_mask_v_nxv2f16_nxv2f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0), v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.mask.nxv2f16(
+ <vscale x 2 x half> %0,
+ ptr %1,
+ <vscale x 2 x i1> %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.nxv4f16(
+ <vscale x 4 x half>,
+ ptr,
+ iXLen);
+
+define void @intrinsic_vse_v_nxv4f16_nxv4f16(<vscale x 4 x half> %0, ptr %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vse_v_nxv4f16_nxv4f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.nxv4f16(
+ <vscale x 4 x half> %0,
+ ptr %1,
+ iXLen %2)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.mask.nxv4f16(
+ <vscale x 4 x half>,
+ ptr,
+ <vscale x 4 x i1>,
+ iXLen);
+
+define void @intrinsic_vse_mask_v_nxv4f16_nxv4f16(<vscale x 4 x half> %0, ptr %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vse_mask_v_nxv4f16_nxv4f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0), v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.mask.nxv4f16(
+ <vscale x 4 x half> %0,
+ ptr %1,
+ <vscale x 4 x i1> %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.nxv8f16(
+ <vscale x 8 x half>,
+ ptr,
+ iXLen);
+
+define void @intrinsic_vse_v_nxv8f16_nxv8f16(<vscale x 8 x half> %0, ptr %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vse_v_nxv8f16_nxv8f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.nxv8f16(
+ <vscale x 8 x half> %0,
+ ptr %1,
+ iXLen %2)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.mask.nxv8f16(
+ <vscale x 8 x half>,
+ ptr,
+ <vscale x 8 x i1>,
+ iXLen);
+
+define void @intrinsic_vse_mask_v_nxv8f16_nxv8f16(<vscale x 8 x half> %0, ptr %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vse_mask_v_nxv8f16_nxv8f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0), v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.mask.nxv8f16(
+ <vscale x 8 x half> %0,
+ ptr %1,
+ <vscale x 8 x i1> %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.nxv16f16(
+ <vscale x 16 x half>,
+ ptr,
+ iXLen);
+
+define void @intrinsic_vse_v_nxv16f16_nxv16f16(<vscale x 16 x half> %0, ptr %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vse_v_nxv16f16_nxv16f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.nxv16f16(
+ <vscale x 16 x half> %0,
+ ptr %1,
+ iXLen %2)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.mask.nxv16f16(
+ <vscale x 16 x half>,
+ ptr,
+ <vscale x 16 x i1>,
+ iXLen);
+
+define void @intrinsic_vse_mask_v_nxv16f16_nxv16f16(<vscale x 16 x half> %0, ptr %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vse_mask_v_nxv16f16_nxv16f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0), v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.mask.nxv16f16(
+ <vscale x 16 x half> %0,
+ ptr %1,
+ <vscale x 16 x i1> %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.nxv32f16(
+ <vscale x 32 x half>,
+ ptr,
+ iXLen);
+
+define void @intrinsic_vse_v_nxv32f16_nxv32f16(<vscale x 32 x half> %0, ptr %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vse_v_nxv32f16_nxv32f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.nxv32f16(
+ <vscale x 32 x half> %0,
+ ptr %1,
+ iXLen %2)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.mask.nxv32f16(
+ <vscale x 32 x half>,
+ ptr,
+ <vscale x 32 x i1>,
+ iXLen);
+
+define void @intrinsic_vse_mask_v_nxv32f16_nxv32f16(<vscale x 32 x half> %0, ptr %1, <vscale x 32 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vse_mask_v_nxv32f16_nxv32f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0), v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.mask.nxv32f16(
+ <vscale x 32 x half> %0,
+ ptr %1,
+ <vscale x 32 x i1> %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.nxv1i8(
+ <vscale x 1 x i8>,
+ ptr,
+ iXLen);
+
+define void @intrinsic_vse_v_nxv1i8_nxv1i8(<vscale x 1 x i8> %0, ptr %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vse_v_nxv1i8_nxv1i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT: vse8.v v8, (a0)
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.nxv1i8(
+ <vscale x 1 x i8> %0,
+ ptr %1,
+ iXLen %2)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.mask.nxv1i8(
+ <vscale x 1 x i8>,
+ ptr,
+ <vscale x 1 x i1>,
+ iXLen);
+
+define void @intrinsic_vse_mask_v_nxv1i8_nxv1i8(<vscale x 1 x i8> %0, ptr %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vse_mask_v_nxv1i8_nxv1i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT: vse8.v v8, (a0), v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.mask.nxv1i8(
+ <vscale x 1 x i8> %0,
+ ptr %1,
+ <vscale x 1 x i1> %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.nxv2i8(
+ <vscale x 2 x i8>,
+ ptr,
+ iXLen);
+
+define void @intrinsic_vse_v_nxv2i8_nxv2i8(<vscale x 2 x i8> %0, ptr %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vse_v_nxv2i8_nxv2i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-NEXT: vse8.v v8, (a0)
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.nxv2i8(
+ <vscale x 2 x i8> %0,
+ ptr %1,
+ iXLen %2)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.mask.nxv2i8(
+ <vscale x 2 x i8>,
+ ptr,
+ <vscale x 2 x i1>,
+ iXLen);
+
+define void @intrinsic_vse_mask_v_nxv2i8_nxv2i8(<vscale x 2 x i8> %0, ptr %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vse_mask_v_nxv2i8_nxv2i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-NEXT: vse8.v v8, (a0), v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.mask.nxv2i8(
+ <vscale x 2 x i8> %0,
+ ptr %1,
+ <vscale x 2 x i1> %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.nxv4i8(
+ <vscale x 4 x i8>,
+ ptr,
+ iXLen);
+
+define void @intrinsic_vse_v_nxv4i8_nxv4i8(<vscale x 4 x i8> %0, ptr %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vse_v_nxv4i8_nxv4i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-NEXT: vse8.v v8, (a0)
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.nxv4i8(
+ <vscale x 4 x i8> %0,
+ ptr %1,
+ iXLen %2)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.mask.nxv4i8(
+ <vscale x 4 x i8>,
+ ptr,
+ <vscale x 4 x i1>,
+ iXLen);
+
+define void @intrinsic_vse_mask_v_nxv4i8_nxv4i8(<vscale x 4 x i8> %0, ptr %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vse_mask_v_nxv4i8_nxv4i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-NEXT: vse8.v v8, (a0), v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.mask.nxv4i8(
+ <vscale x 4 x i8> %0,
+ ptr %1,
+ <vscale x 4 x i1> %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.nxv8i8(
+ <vscale x 8 x i8>,
+ ptr,
+ iXLen);
+
+define void @intrinsic_vse_v_nxv8i8_nxv8i8(<vscale x 8 x i8> %0, ptr %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vse_v_nxv8i8_nxv8i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-NEXT: vse8.v v8, (a0)
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.nxv8i8(
+ <vscale x 8 x i8> %0,
+ ptr %1,
+ iXLen %2)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.mask.nxv8i8(
+ <vscale x 8 x i8>,
+ ptr,
+ <vscale x 8 x i1>,
+ iXLen);
+
+define void @intrinsic_vse_mask_v_nxv8i8_nxv8i8(<vscale x 8 x i8> %0, ptr %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vse_mask_v_nxv8i8_nxv8i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-NEXT: vse8.v v8, (a0), v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.mask.nxv8i8(
+ <vscale x 8 x i8> %0,
+ ptr %1,
+ <vscale x 8 x i1> %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.nxv16i8(
+ <vscale x 16 x i8>,
+ ptr,
+ iXLen);
+
+define void @intrinsic_vse_v_nxv16i8_nxv16i8(<vscale x 16 x i8> %0, ptr %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vse_v_nxv16i8_nxv16i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; CHECK-NEXT: vse8.v v8, (a0)
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.nxv16i8(
+ <vscale x 16 x i8> %0,
+ ptr %1,
+ iXLen %2)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.mask.nxv16i8(
+ <vscale x 16 x i8>,
+ ptr,
+ <vscale x 16 x i1>,
+ iXLen);
+
+define void @intrinsic_vse_mask_v_nxv16i8_nxv16i8(<vscale x 16 x i8> %0, ptr %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vse_mask_v_nxv16i8_nxv16i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; CHECK-NEXT: vse8.v v8, (a0), v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.mask.nxv16i8(
+ <vscale x 16 x i8> %0,
+ ptr %1,
+ <vscale x 16 x i1> %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.nxv32i8(
+ <vscale x 32 x i8>,
+ ptr,
+ iXLen);
+
+define void @intrinsic_vse_v_nxv32i8_nxv32i8(<vscale x 32 x i8> %0, ptr %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vse_v_nxv32i8_nxv32i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma
+; CHECK-NEXT: vse8.v v8, (a0)
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.nxv32i8(
+ <vscale x 32 x i8> %0,
+ ptr %1,
+ iXLen %2)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.mask.nxv32i8(
+ <vscale x 32 x i8>,
+ ptr,
+ <vscale x 32 x i1>,
+ iXLen);
+
+define void @intrinsic_vse_mask_v_nxv32i8_nxv32i8(<vscale x 32 x i8> %0, ptr %1, <vscale x 32 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vse_mask_v_nxv32i8_nxv32i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma
+; CHECK-NEXT: vse8.v v8, (a0), v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.mask.nxv32i8(
+ <vscale x 32 x i8> %0,
+ ptr %1,
+ <vscale x 32 x i1> %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.nxv64i8(
+ <vscale x 64 x i8>,
+ ptr,
+ iXLen);
+
+define void @intrinsic_vse_v_nxv64i8_nxv64i8(<vscale x 64 x i8> %0, ptr %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vse_v_nxv64i8_nxv64i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-NEXT: vse8.v v8, (a0)
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.nxv64i8(
+ <vscale x 64 x i8> %0,
+ ptr %1,
+ iXLen %2)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.mask.nxv64i8(
+ <vscale x 64 x i8>,
+ ptr,
+ <vscale x 64 x i1>,
+ iXLen);
+
+define void @intrinsic_vse_mask_v_nxv64i8_nxv64i8(<vscale x 64 x i8> %0, ptr %1, <vscale x 64 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vse_mask_v_nxv64i8_nxv64i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-NEXT: vse8.v v8, (a0), v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.mask.nxv64i8(
+ <vscale x 64 x i8> %0,
+ ptr %1,
+ <vscale x 64 x i1> %2,
+ iXLen %3)
+
+ ret void
+}
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/rvv/vsm.ll b/llvm/test/CodeGen/RISCV/GlobalISel/rvv/vsm.ll
new file mode 100644
index 0000000..5237536
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/rvv/vsm.ll
@@ -0,0 +1,139 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \
+; RUN: -global-isel -verify-machineinstrs | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \
+; RUN: -global-isel -verify-machineinstrs | FileCheck %s
+
+declare void @llvm.riscv.vsm.nxv1i1(<vscale x 1 x i1>, ptr, iXLen);
+
+define void @intrinsic_vsm_v_nxv1i1(<vscale x 1 x i1> %0, ptr %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vsm_v_nxv1i1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT: vsm.v v0, (a0)
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsm.nxv1i1(<vscale x 1 x i1> %0, ptr %1, iXLen %2)
+ ret void
+}
+
+declare void @llvm.riscv.vsm.nxv2i1(<vscale x 2 x i1>, ptr, iXLen);
+
+define void @intrinsic_vsm_v_nxv2i1(<vscale x 2 x i1> %0, ptr %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vsm_v_nxv2i1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-NEXT: vsm.v v0, (a0)
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsm.nxv2i1(<vscale x 2 x i1> %0, ptr %1, iXLen %2)
+ ret void
+}
+
+declare void @llvm.riscv.vsm.nxv4i1(<vscale x 4 x i1>, ptr, iXLen);
+
+define void @intrinsic_vsm_v_nxv4i1(<vscale x 4 x i1> %0, ptr %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vsm_v_nxv4i1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-NEXT: vsm.v v0, (a0)
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsm.nxv4i1(<vscale x 4 x i1> %0, ptr %1, iXLen %2)
+ ret void
+}
+
+declare void @llvm.riscv.vsm.nxv8i1(<vscale x 8 x i1>, ptr, iXLen);
+
+define void @intrinsic_vsm_v_nxv8i1(<vscale x 8 x i1> %0, ptr %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vsm_v_nxv8i1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-NEXT: vsm.v v0, (a0)
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsm.nxv8i1(<vscale x 8 x i1> %0, ptr %1, iXLen %2)
+ ret void
+}
+
+declare void @llvm.riscv.vsm.nxv16i1(<vscale x 16 x i1>, ptr, iXLen);
+
+define void @intrinsic_vsm_v_nxv16i1(<vscale x 16 x i1> %0, ptr %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vsm_v_nxv16i1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; CHECK-NEXT: vsm.v v0, (a0)
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsm.nxv16i1(<vscale x 16 x i1> %0, ptr %1, iXLen %2)
+ ret void
+}
+
+declare void @llvm.riscv.vsm.nxv32i1(<vscale x 32 x i1>, ptr, iXLen);
+
+define void @intrinsic_vsm_v_nxv32i1(<vscale x 32 x i1> %0, ptr %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vsm_v_nxv32i1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma
+; CHECK-NEXT: vsm.v v0, (a0)
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsm.nxv32i1(<vscale x 32 x i1> %0, ptr %1, iXLen %2)
+ ret void
+}
+
+declare void @llvm.riscv.vsm.nxv64i1(<vscale x 64 x i1>, ptr, iXLen);
+
+define void @intrinsic_vsm_v_nxv64i1(<vscale x 64 x i1> %0, ptr %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vsm_v_nxv64i1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-NEXT: vsm.v v0, (a0)
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsm.nxv64i1(<vscale x 64 x i1> %0, ptr %1, iXLen %2)
+ ret void
+}
+
+declare <vscale x 1 x i1> @llvm.riscv.vmseq.nxv1i16(
+ <vscale x 1 x i16>,
+ <vscale x 1 x i16>,
+ iXLen);
+
+; Make sure we can use the vsetvli from the producing instruction.
+define void @test_vsetvli_i16(<vscale x 1 x i16> %0, <vscale x 1 x i16> %1, ptr %2, iXLen %3) nounwind {
+; CHECK-LABEL: test_vsetvli_i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-NEXT: vmseq.vv v8, v8, v9
+; CHECK-NEXT: vsm.v v8, (a0)
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i1> @llvm.riscv.vmseq.nxv1i16(
+ <vscale x 1 x i16> %0,
+ <vscale x 1 x i16> %1,
+ iXLen %3)
+ call void @llvm.riscv.vsm.nxv1i1(<vscale x 1 x i1> %a, ptr %2, iXLen %3)
+ ret void
+}
+
+declare <vscale x 1 x i1> @llvm.riscv.vmseq.nxv1i32(
+ <vscale x 1 x i32>,
+ <vscale x 1 x i32>,
+ iXLen);
+
+define void @test_vsetvli_i32(<vscale x 1 x i32> %0, <vscale x 1 x i32> %1, ptr %2, iXLen %3) nounwind {
+; CHECK-LABEL: test_vsetvli_i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-NEXT: vmseq.vv v8, v8, v9
+; CHECK-NEXT: vsm.v v8, (a0)
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i1> @llvm.riscv.vmseq.nxv1i32(
+ <vscale x 1 x i32> %0,
+ <vscale x 1 x i32> %1,
+ iXLen %3)
+ call void @llvm.riscv.vsm.nxv1i1(<vscale x 1 x i1> %a, ptr %2, iXLen %3)
+ ret void
+}
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/rvv/vsse.ll b/llvm/test/CodeGen/RISCV/GlobalISel/rvv/vsse.ll
new file mode 100644
index 0000000..b7609ff
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/rvv/vsse.ll
@@ -0,0 +1,1724 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvfhmin,+zvfbfmin \
+; RUN: -global-isel -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvfhmin,+zvfbfmin \
+; RUN: -global-isel -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+declare void @llvm.riscv.vsse.nxv1i64(
+ <vscale x 1 x i64>,
+ ptr,
+ iXLen,
+ iXLen);
+
+define void @intrinsic_vsse_v_nxv1i64_nxv1i64(<vscale x 1 x i64> %0, ptr %1, iXLen %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vsse_v_nxv1i64_nxv1i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, ma
+; CHECK-NEXT: vsse64.v v8, (a0), a1
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.nxv1i64(
+ <vscale x 1 x i64> %0,
+ ptr %1,
+ iXLen %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.mask.nxv1i64(
+ <vscale x 1 x i64>,
+ ptr,
+ iXLen,
+ <vscale x 1 x i1>,
+ iXLen);
+
+define void @intrinsic_vsse_mask_v_nxv1i64_nxv1i64(<vscale x 1 x i64> %0, ptr %1, iXLen %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vsse_mask_v_nxv1i64_nxv1i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, ma
+; CHECK-NEXT: vsse64.v v8, (a0), a1, v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.mask.nxv1i64(
+ <vscale x 1 x i64> %0,
+ ptr %1,
+ iXLen %2,
+ <vscale x 1 x i1> %3,
+ iXLen %4)
+
+ ret void
+}
+
+define void @intrinsic_vsse_allonesmask_v_nxv1i64_nxv1i64(<vscale x 1 x i64> %0, ptr %1, iXLen %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vsse_allonesmask_v_nxv1i64_nxv1i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, ma
+; CHECK-NEXT: vsse64.v v8, (a0), a1
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.mask.nxv1i64(
+ <vscale x 1 x i64> %0,
+ ptr %1,
+ iXLen %2,
+ <vscale x 1 x i1> splat (i1 true),
+ iXLen %4)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.nxv2i64(
+ <vscale x 2 x i64>,
+ ptr,
+ iXLen,
+ iXLen);
+
+define void @intrinsic_vsse_v_nxv2i64_nxv2i64(<vscale x 2 x i64> %0, ptr %1, iXLen %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vsse_v_nxv2i64_nxv2i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e64, m2, ta, ma
+; CHECK-NEXT: vsse64.v v8, (a0), a1
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.nxv2i64(
+ <vscale x 2 x i64> %0,
+ ptr %1,
+ iXLen %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.mask.nxv2i64(
+ <vscale x 2 x i64>,
+ ptr,
+ iXLen,
+ <vscale x 2 x i1>,
+ iXLen);
+
+define void @intrinsic_vsse_mask_v_nxv2i64_nxv2i64(<vscale x 2 x i64> %0, ptr %1, iXLen %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vsse_mask_v_nxv2i64_nxv2i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e64, m2, ta, ma
+; CHECK-NEXT: vsse64.v v8, (a0), a1, v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.mask.nxv2i64(
+ <vscale x 2 x i64> %0,
+ ptr %1,
+ iXLen %2,
+ <vscale x 2 x i1> %3,
+ iXLen %4)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.nxv4i64(
+ <vscale x 4 x i64>,
+ ptr,
+ iXLen,
+ iXLen);
+
+define void @intrinsic_vsse_v_nxv4i64_nxv4i64(<vscale x 4 x i64> %0, ptr %1, iXLen %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vsse_v_nxv4i64_nxv4i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e64, m4, ta, ma
+; CHECK-NEXT: vsse64.v v8, (a0), a1
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.nxv4i64(
+ <vscale x 4 x i64> %0,
+ ptr %1,
+ iXLen %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.mask.nxv4i64(
+ <vscale x 4 x i64>,
+ ptr,
+ iXLen,
+ <vscale x 4 x i1>,
+ iXLen);
+
+define void @intrinsic_vsse_mask_v_nxv4i64_nxv4i64(<vscale x 4 x i64> %0, ptr %1, iXLen %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vsse_mask_v_nxv4i64_nxv4i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e64, m4, ta, ma
+; CHECK-NEXT: vsse64.v v8, (a0), a1, v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.mask.nxv4i64(
+ <vscale x 4 x i64> %0,
+ ptr %1,
+ iXLen %2,
+ <vscale x 4 x i1> %3,
+ iXLen %4)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.nxv8i64(
+ <vscale x 8 x i64>,
+ ptr,
+ iXLen,
+ iXLen);
+
+define void @intrinsic_vsse_v_nxv8i64_nxv8i64(<vscale x 8 x i64> %0, ptr %1, iXLen %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vsse_v_nxv8i64_nxv8i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
+; CHECK-NEXT: vsse64.v v8, (a0), a1
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.nxv8i64(
+ <vscale x 8 x i64> %0,
+ ptr %1,
+ iXLen %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.mask.nxv8i64(
+ <vscale x 8 x i64>,
+ ptr,
+ iXLen,
+ <vscale x 8 x i1>,
+ iXLen);
+
+define void @intrinsic_vsse_mask_v_nxv8i64_nxv8i64(<vscale x 8 x i64> %0, ptr %1, iXLen %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vsse_mask_v_nxv8i64_nxv8i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
+; CHECK-NEXT: vsse64.v v8, (a0), a1, v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.mask.nxv8i64(
+ <vscale x 8 x i64> %0,
+ ptr %1,
+ iXLen %2,
+ <vscale x 8 x i1> %3,
+ iXLen %4)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.nxv1f64(
+ <vscale x 1 x double>,
+ ptr,
+ iXLen,
+ iXLen);
+
+define void @intrinsic_vsse_v_nxv1f64_nxv1f64(<vscale x 1 x double> %0, ptr %1, iXLen %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vsse_v_nxv1f64_nxv1f64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, ma
+; CHECK-NEXT: vsse64.v v8, (a0), a1
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.nxv1f64(
+ <vscale x 1 x double> %0,
+ ptr %1,
+ iXLen %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.mask.nxv1f64(
+ <vscale x 1 x double>,
+ ptr,
+ iXLen,
+ <vscale x 1 x i1>,
+ iXLen);
+
+define void @intrinsic_vsse_mask_v_nxv1f64_nxv1f64(<vscale x 1 x double> %0, ptr %1, iXLen %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vsse_mask_v_nxv1f64_nxv1f64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, ma
+; CHECK-NEXT: vsse64.v v8, (a0), a1, v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.mask.nxv1f64(
+ <vscale x 1 x double> %0,
+ ptr %1,
+ iXLen %2,
+ <vscale x 1 x i1> %3,
+ iXLen %4)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.nxv2f64(
+ <vscale x 2 x double>,
+ ptr,
+ iXLen,
+ iXLen);
+
+define void @intrinsic_vsse_v_nxv2f64_nxv2f64(<vscale x 2 x double> %0, ptr %1, iXLen %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vsse_v_nxv2f64_nxv2f64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e64, m2, ta, ma
+; CHECK-NEXT: vsse64.v v8, (a0), a1
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.nxv2f64(
+ <vscale x 2 x double> %0,
+ ptr %1,
+ iXLen %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.mask.nxv2f64(
+ <vscale x 2 x double>,
+ ptr,
+ iXLen,
+ <vscale x 2 x i1>,
+ iXLen);
+
+define void @intrinsic_vsse_mask_v_nxv2f64_nxv2f64(<vscale x 2 x double> %0, ptr %1, iXLen %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vsse_mask_v_nxv2f64_nxv2f64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e64, m2, ta, ma
+; CHECK-NEXT: vsse64.v v8, (a0), a1, v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.mask.nxv2f64(
+ <vscale x 2 x double> %0,
+ ptr %1,
+ iXLen %2,
+ <vscale x 2 x i1> %3,
+ iXLen %4)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.nxv4f64(
+ <vscale x 4 x double>,
+ ptr,
+ iXLen,
+ iXLen);
+
+define void @intrinsic_vsse_v_nxv4f64_nxv4f64(<vscale x 4 x double> %0, ptr %1, iXLen %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vsse_v_nxv4f64_nxv4f64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e64, m4, ta, ma
+; CHECK-NEXT: vsse64.v v8, (a0), a1
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.nxv4f64(
+ <vscale x 4 x double> %0,
+ ptr %1,
+ iXLen %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.mask.nxv4f64(
+ <vscale x 4 x double>,
+ ptr,
+ iXLen,
+ <vscale x 4 x i1>,
+ iXLen);
+
+define void @intrinsic_vsse_mask_v_nxv4f64_nxv4f64(<vscale x 4 x double> %0, ptr %1, iXLen %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vsse_mask_v_nxv4f64_nxv4f64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e64, m4, ta, ma
+; CHECK-NEXT: vsse64.v v8, (a0), a1, v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.mask.nxv4f64(
+ <vscale x 4 x double> %0,
+ ptr %1,
+ iXLen %2,
+ <vscale x 4 x i1> %3,
+ iXLen %4)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.nxv8f64(
+ <vscale x 8 x double>,
+ ptr,
+ iXLen,
+ iXLen);
+
+define void @intrinsic_vsse_v_nxv8f64_nxv8f64(<vscale x 8 x double> %0, ptr %1, iXLen %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vsse_v_nxv8f64_nxv8f64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
+; CHECK-NEXT: vsse64.v v8, (a0), a1
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.nxv8f64(
+ <vscale x 8 x double> %0,
+ ptr %1,
+ iXLen %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.mask.nxv8f64(
+ <vscale x 8 x double>,
+ ptr,
+ iXLen,
+ <vscale x 8 x i1>,
+ iXLen);
+
+define void @intrinsic_vsse_mask_v_nxv8f64_nxv8f64(<vscale x 8 x double> %0, ptr %1, iXLen %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vsse_mask_v_nxv8f64_nxv8f64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
+; CHECK-NEXT: vsse64.v v8, (a0), a1, v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.mask.nxv8f64(
+ <vscale x 8 x double> %0,
+ ptr %1,
+ iXLen %2,
+ <vscale x 8 x i1> %3,
+ iXLen %4)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.nxv1i32(
+ <vscale x 1 x i32>,
+ ptr,
+ iXLen,
+ iXLen);
+
+define void @intrinsic_vsse_v_nxv1i32_nxv1i32(<vscale x 1 x i32> %0, ptr %1, iXLen %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vsse_v_nxv1i32_nxv1i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e32, mf2, ta, ma
+; CHECK-NEXT: vsse32.v v8, (a0), a1
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.nxv1i32(
+ <vscale x 1 x i32> %0,
+ ptr %1,
+ iXLen %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.mask.nxv1i32(
+ <vscale x 1 x i32>,
+ ptr,
+ iXLen,
+ <vscale x 1 x i1>,
+ iXLen);
+
+define void @intrinsic_vsse_mask_v_nxv1i32_nxv1i32(<vscale x 1 x i32> %0, ptr %1, iXLen %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vsse_mask_v_nxv1i32_nxv1i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e32, mf2, ta, ma
+; CHECK-NEXT: vsse32.v v8, (a0), a1, v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.mask.nxv1i32(
+ <vscale x 1 x i32> %0,
+ ptr %1,
+ iXLen %2,
+ <vscale x 1 x i1> %3,
+ iXLen %4)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.nxv2i32(
+ <vscale x 2 x i32>,
+ ptr,
+ iXLen,
+ iXLen);
+
+define void @intrinsic_vsse_v_nxv2i32_nxv2i32(<vscale x 2 x i32> %0, ptr %1, iXLen %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vsse_v_nxv2i32_nxv2i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; CHECK-NEXT: vsse32.v v8, (a0), a1
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.nxv2i32(
+ <vscale x 2 x i32> %0,
+ ptr %1,
+ iXLen %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.mask.nxv2i32(
+ <vscale x 2 x i32>,
+ ptr,
+ iXLen,
+ <vscale x 2 x i1>,
+ iXLen);
+
+define void @intrinsic_vsse_mask_v_nxv2i32_nxv2i32(<vscale x 2 x i32> %0, ptr %1, iXLen %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vsse_mask_v_nxv2i32_nxv2i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; CHECK-NEXT: vsse32.v v8, (a0), a1, v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.mask.nxv2i32(
+ <vscale x 2 x i32> %0,
+ ptr %1,
+ iXLen %2,
+ <vscale x 2 x i1> %3,
+ iXLen %4)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.nxv4i32(
+ <vscale x 4 x i32>,
+ ptr,
+ iXLen,
+ iXLen);
+
+define void @intrinsic_vsse_v_nxv4i32_nxv4i32(<vscale x 4 x i32> %0, ptr %1, iXLen %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vsse_v_nxv4i32_nxv4i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e32, m2, ta, ma
+; CHECK-NEXT: vsse32.v v8, (a0), a1
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.nxv4i32(
+ <vscale x 4 x i32> %0,
+ ptr %1,
+ iXLen %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.mask.nxv4i32(
+ <vscale x 4 x i32>,
+ ptr,
+ iXLen,
+ <vscale x 4 x i1>,
+ iXLen);
+
+define void @intrinsic_vsse_mask_v_nxv4i32_nxv4i32(<vscale x 4 x i32> %0, ptr %1, iXLen %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vsse_mask_v_nxv4i32_nxv4i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e32, m2, ta, ma
+; CHECK-NEXT: vsse32.v v8, (a0), a1, v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.mask.nxv4i32(
+ <vscale x 4 x i32> %0,
+ ptr %1,
+ iXLen %2,
+ <vscale x 4 x i1> %3,
+ iXLen %4)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.nxv8i32(
+ <vscale x 8 x i32>,
+ ptr,
+ iXLen,
+ iXLen);
+
+define void @intrinsic_vsse_v_nxv8i32_nxv8i32(<vscale x 8 x i32> %0, ptr %1, iXLen %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vsse_v_nxv8i32_nxv8i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e32, m4, ta, ma
+; CHECK-NEXT: vsse32.v v8, (a0), a1
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.nxv8i32(
+ <vscale x 8 x i32> %0,
+ ptr %1,
+ iXLen %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.mask.nxv8i32(
+ <vscale x 8 x i32>,
+ ptr,
+ iXLen,
+ <vscale x 8 x i1>,
+ iXLen);
+
+define void @intrinsic_vsse_mask_v_nxv8i32_nxv8i32(<vscale x 8 x i32> %0, ptr %1, iXLen %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vsse_mask_v_nxv8i32_nxv8i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e32, m4, ta, ma
+; CHECK-NEXT: vsse32.v v8, (a0), a1, v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.mask.nxv8i32(
+ <vscale x 8 x i32> %0,
+ ptr %1,
+ iXLen %2,
+ <vscale x 8 x i1> %3,
+ iXLen %4)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.nxv16i32(
+ <vscale x 16 x i32>,
+ ptr,
+ iXLen,
+ iXLen);
+
+define void @intrinsic_vsse_v_nxv16i32_nxv16i32(<vscale x 16 x i32> %0, ptr %1, iXLen %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vsse_v_nxv16i32_nxv16i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; CHECK-NEXT: vsse32.v v8, (a0), a1
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.nxv16i32(
+ <vscale x 16 x i32> %0,
+ ptr %1,
+ iXLen %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.mask.nxv16i32(
+ <vscale x 16 x i32>,
+ ptr,
+ iXLen,
+ <vscale x 16 x i1>,
+ iXLen);
+
+define void @intrinsic_vsse_mask_v_nxv16i32_nxv16i32(<vscale x 16 x i32> %0, ptr %1, iXLen %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vsse_mask_v_nxv16i32_nxv16i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; CHECK-NEXT: vsse32.v v8, (a0), a1, v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.mask.nxv16i32(
+ <vscale x 16 x i32> %0,
+ ptr %1,
+ iXLen %2,
+ <vscale x 16 x i1> %3,
+ iXLen %4)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.nxv1f32(
+ <vscale x 1 x float>,
+ ptr,
+ iXLen,
+ iXLen);
+
+define void @intrinsic_vsse_v_nxv1f32_nxv1f32(<vscale x 1 x float> %0, ptr %1, iXLen %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vsse_v_nxv1f32_nxv1f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e32, mf2, ta, ma
+; CHECK-NEXT: vsse32.v v8, (a0), a1
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.nxv1f32(
+ <vscale x 1 x float> %0,
+ ptr %1,
+ iXLen %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.mask.nxv1f32(
+ <vscale x 1 x float>,
+ ptr,
+ iXLen,
+ <vscale x 1 x i1>,
+ iXLen);
+
+define void @intrinsic_vsse_mask_v_nxv1f32_nxv1f32(<vscale x 1 x float> %0, ptr %1, iXLen %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vsse_mask_v_nxv1f32_nxv1f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e32, mf2, ta, ma
+; CHECK-NEXT: vsse32.v v8, (a0), a1, v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.mask.nxv1f32(
+ <vscale x 1 x float> %0,
+ ptr %1,
+ iXLen %2,
+ <vscale x 1 x i1> %3,
+ iXLen %4)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.nxv2f32(
+ <vscale x 2 x float>,
+ ptr,
+ iXLen,
+ iXLen);
+
+define void @intrinsic_vsse_v_nxv2f32_nxv2f32(<vscale x 2 x float> %0, ptr %1, iXLen %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vsse_v_nxv2f32_nxv2f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; CHECK-NEXT: vsse32.v v8, (a0), a1
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.nxv2f32(
+ <vscale x 2 x float> %0,
+ ptr %1,
+ iXLen %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.mask.nxv2f32(
+ <vscale x 2 x float>,
+ ptr,
+ iXLen,
+ <vscale x 2 x i1>,
+ iXLen);
+
+define void @intrinsic_vsse_mask_v_nxv2f32_nxv2f32(<vscale x 2 x float> %0, ptr %1, iXLen %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vsse_mask_v_nxv2f32_nxv2f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; CHECK-NEXT: vsse32.v v8, (a0), a1, v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.mask.nxv2f32(
+ <vscale x 2 x float> %0,
+ ptr %1,
+ iXLen %2,
+ <vscale x 2 x i1> %3,
+ iXLen %4)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.nxv4f32(
+ <vscale x 4 x float>,
+ ptr,
+ iXLen,
+ iXLen);
+
+define void @intrinsic_vsse_v_nxv4f32_nxv4f32(<vscale x 4 x float> %0, ptr %1, iXLen %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vsse_v_nxv4f32_nxv4f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e32, m2, ta, ma
+; CHECK-NEXT: vsse32.v v8, (a0), a1
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.nxv4f32(
+ <vscale x 4 x float> %0,
+ ptr %1,
+ iXLen %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.mask.nxv4f32(
+ <vscale x 4 x float>,
+ ptr,
+ iXLen,
+ <vscale x 4 x i1>,
+ iXLen);
+
+define void @intrinsic_vsse_mask_v_nxv4f32_nxv4f32(<vscale x 4 x float> %0, ptr %1, iXLen %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vsse_mask_v_nxv4f32_nxv4f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e32, m2, ta, ma
+; CHECK-NEXT: vsse32.v v8, (a0), a1, v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.mask.nxv4f32(
+ <vscale x 4 x float> %0,
+ ptr %1,
+ iXLen %2,
+ <vscale x 4 x i1> %3,
+ iXLen %4)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.nxv8f32(
+ <vscale x 8 x float>,
+ ptr,
+ iXLen,
+ iXLen);
+
+define void @intrinsic_vsse_v_nxv8f32_nxv8f32(<vscale x 8 x float> %0, ptr %1, iXLen %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vsse_v_nxv8f32_nxv8f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e32, m4, ta, ma
+; CHECK-NEXT: vsse32.v v8, (a0), a1
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.nxv8f32(
+ <vscale x 8 x float> %0,
+ ptr %1,
+ iXLen %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.mask.nxv8f32(
+ <vscale x 8 x float>,
+ ptr,
+ iXLen,
+ <vscale x 8 x i1>,
+ iXLen);
+
+define void @intrinsic_vsse_mask_v_nxv8f32_nxv8f32(<vscale x 8 x float> %0, ptr %1, iXLen %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vsse_mask_v_nxv8f32_nxv8f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e32, m4, ta, ma
+; CHECK-NEXT: vsse32.v v8, (a0), a1, v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.mask.nxv8f32(
+ <vscale x 8 x float> %0,
+ ptr %1,
+ iXLen %2,
+ <vscale x 8 x i1> %3,
+ iXLen %4)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.nxv16f32(
+ <vscale x 16 x float>,
+ ptr,
+ iXLen,
+ iXLen);
+
+define void @intrinsic_vsse_v_nxv16f32_nxv16f32(<vscale x 16 x float> %0, ptr %1, iXLen %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vsse_v_nxv16f32_nxv16f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; CHECK-NEXT: vsse32.v v8, (a0), a1
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.nxv16f32(
+ <vscale x 16 x float> %0,
+ ptr %1,
+ iXLen %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.mask.nxv16f32(
+ <vscale x 16 x float>,
+ ptr,
+ iXLen,
+ <vscale x 16 x i1>,
+ iXLen);
+
+define void @intrinsic_vsse_mask_v_nxv16f32_nxv16f32(<vscale x 16 x float> %0, ptr %1, iXLen %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vsse_mask_v_nxv16f32_nxv16f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; CHECK-NEXT: vsse32.v v8, (a0), a1, v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.mask.nxv16f32(
+ <vscale x 16 x float> %0,
+ ptr %1,
+ iXLen %2,
+ <vscale x 16 x i1> %3,
+ iXLen %4)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.nxv1i16(
+ <vscale x 1 x i16>,
+ ptr,
+ iXLen,
+ iXLen);
+
+define void @intrinsic_vsse_v_nxv1i16_nxv1i16(<vscale x 1 x i16> %0, ptr %1, iXLen %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vsse_v_nxv1i16_nxv1i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e16, mf4, ta, ma
+; CHECK-NEXT: vsse16.v v8, (a0), a1
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.nxv1i16(
+ <vscale x 1 x i16> %0,
+ ptr %1,
+ iXLen %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.mask.nxv1i16(
+ <vscale x 1 x i16>,
+ ptr,
+ iXLen,
+ <vscale x 1 x i1>,
+ iXLen);
+
+define void @intrinsic_vsse_mask_v_nxv1i16_nxv1i16(<vscale x 1 x i16> %0, ptr %1, iXLen %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vsse_mask_v_nxv1i16_nxv1i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e16, mf4, ta, ma
+; CHECK-NEXT: vsse16.v v8, (a0), a1, v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.mask.nxv1i16(
+ <vscale x 1 x i16> %0,
+ ptr %1,
+ iXLen %2,
+ <vscale x 1 x i1> %3,
+ iXLen %4)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.nxv2i16(
+ <vscale x 2 x i16>,
+ ptr,
+ iXLen,
+ iXLen);
+
+define void @intrinsic_vsse_v_nxv2i16_nxv2i16(<vscale x 2 x i16> %0, ptr %1, iXLen %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vsse_v_nxv2i16_nxv2i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma
+; CHECK-NEXT: vsse16.v v8, (a0), a1
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.nxv2i16(
+ <vscale x 2 x i16> %0,
+ ptr %1,
+ iXLen %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.mask.nxv2i16(
+ <vscale x 2 x i16>,
+ ptr,
+ iXLen,
+ <vscale x 2 x i1>,
+ iXLen);
+
+define void @intrinsic_vsse_mask_v_nxv2i16_nxv2i16(<vscale x 2 x i16> %0, ptr %1, iXLen %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vsse_mask_v_nxv2i16_nxv2i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma
+; CHECK-NEXT: vsse16.v v8, (a0), a1, v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.mask.nxv2i16(
+ <vscale x 2 x i16> %0,
+ ptr %1,
+ iXLen %2,
+ <vscale x 2 x i1> %3,
+ iXLen %4)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.nxv4i16(
+ <vscale x 4 x i16>,
+ ptr,
+ iXLen,
+ iXLen);
+
+define void @intrinsic_vsse_v_nxv4i16_nxv4i16(<vscale x 4 x i16> %0, ptr %1, iXLen %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vsse_v_nxv4i16_nxv4i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; CHECK-NEXT: vsse16.v v8, (a0), a1
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.nxv4i16(
+ <vscale x 4 x i16> %0,
+ ptr %1,
+ iXLen %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.mask.nxv4i16(
+ <vscale x 4 x i16>,
+ ptr,
+ iXLen,
+ <vscale x 4 x i1>,
+ iXLen);
+
+define void @intrinsic_vsse_mask_v_nxv4i16_nxv4i16(<vscale x 4 x i16> %0, ptr %1, iXLen %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vsse_mask_v_nxv4i16_nxv4i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; CHECK-NEXT: vsse16.v v8, (a0), a1, v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.mask.nxv4i16(
+ <vscale x 4 x i16> %0,
+ ptr %1,
+ iXLen %2,
+ <vscale x 4 x i1> %3,
+ iXLen %4)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.nxv8i16(
+ <vscale x 8 x i16>,
+ ptr,
+ iXLen,
+ iXLen);
+
+define void @intrinsic_vsse_v_nxv8i16_nxv8i16(<vscale x 8 x i16> %0, ptr %1, iXLen %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vsse_v_nxv8i16_nxv8i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e16, m2, ta, ma
+; CHECK-NEXT: vsse16.v v8, (a0), a1
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.nxv8i16(
+ <vscale x 8 x i16> %0,
+ ptr %1,
+ iXLen %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.mask.nxv8i16(
+ <vscale x 8 x i16>,
+ ptr,
+ iXLen,
+ <vscale x 8 x i1>,
+ iXLen);
+
+define void @intrinsic_vsse_mask_v_nxv8i16_nxv8i16(<vscale x 8 x i16> %0, ptr %1, iXLen %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vsse_mask_v_nxv8i16_nxv8i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e16, m2, ta, ma
+; CHECK-NEXT: vsse16.v v8, (a0), a1, v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.mask.nxv8i16(
+ <vscale x 8 x i16> %0,
+ ptr %1,
+ iXLen %2,
+ <vscale x 8 x i1> %3,
+ iXLen %4)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.nxv16i16(
+ <vscale x 16 x i16>,
+ ptr,
+ iXLen,
+ iXLen);
+
+define void @intrinsic_vsse_v_nxv16i16_nxv16i16(<vscale x 16 x i16> %0, ptr %1, iXLen %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vsse_v_nxv16i16_nxv16i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma
+; CHECK-NEXT: vsse16.v v8, (a0), a1
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.nxv16i16(
+ <vscale x 16 x i16> %0,
+ ptr %1,
+ iXLen %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.mask.nxv16i16(
+ <vscale x 16 x i16>,
+ ptr,
+ iXLen,
+ <vscale x 16 x i1>,
+ iXLen);
+
+define void @intrinsic_vsse_mask_v_nxv16i16_nxv16i16(<vscale x 16 x i16> %0, ptr %1, iXLen %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vsse_mask_v_nxv16i16_nxv16i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma
+; CHECK-NEXT: vsse16.v v8, (a0), a1, v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.mask.nxv16i16(
+ <vscale x 16 x i16> %0,
+ ptr %1,
+ iXLen %2,
+ <vscale x 16 x i1> %3,
+ iXLen %4)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.nxv32i16(
+ <vscale x 32 x i16>,
+ ptr,
+ iXLen,
+ iXLen);
+
+define void @intrinsic_vsse_v_nxv32i16_nxv32i16(<vscale x 32 x i16> %0, ptr %1, iXLen %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vsse_v_nxv32i16_nxv32i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e16, m8, ta, ma
+; CHECK-NEXT: vsse16.v v8, (a0), a1
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.nxv32i16(
+ <vscale x 32 x i16> %0,
+ ptr %1,
+ iXLen %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.mask.nxv32i16(
+ <vscale x 32 x i16>,
+ ptr,
+ iXLen,
+ <vscale x 32 x i1>,
+ iXLen);
+
+define void @intrinsic_vsse_mask_v_nxv32i16_nxv32i16(<vscale x 32 x i16> %0, ptr %1, iXLen %2, <vscale x 32 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vsse_mask_v_nxv32i16_nxv32i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e16, m8, ta, ma
+; CHECK-NEXT: vsse16.v v8, (a0), a1, v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.mask.nxv32i16(
+ <vscale x 32 x i16> %0,
+ ptr %1,
+ iXLen %2,
+ <vscale x 32 x i1> %3,
+ iXLen %4)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.nxv1f16(
+ <vscale x 1 x half>,
+ ptr,
+ iXLen,
+ iXLen);
+
+define void @intrinsic_vsse_v_nxv1f16_nxv1f16(<vscale x 1 x half> %0, ptr %1, iXLen %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vsse_v_nxv1f16_nxv1f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e16, mf4, ta, ma
+; CHECK-NEXT: vsse16.v v8, (a0), a1
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.nxv1f16(
+ <vscale x 1 x half> %0,
+ ptr %1,
+ iXLen %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.mask.nxv1f16(
+ <vscale x 1 x half>,
+ ptr,
+ iXLen,
+ <vscale x 1 x i1>,
+ iXLen);
+
+define void @intrinsic_vsse_mask_v_nxv1f16_nxv1f16(<vscale x 1 x half> %0, ptr %1, iXLen %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vsse_mask_v_nxv1f16_nxv1f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e16, mf4, ta, ma
+; CHECK-NEXT: vsse16.v v8, (a0), a1, v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.mask.nxv1f16(
+ <vscale x 1 x half> %0,
+ ptr %1,
+ iXLen %2,
+ <vscale x 1 x i1> %3,
+ iXLen %4)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.nxv2f16(
+ <vscale x 2 x half>,
+ ptr,
+ iXLen,
+ iXLen);
+
+define void @intrinsic_vsse_v_nxv2f16_nxv2f16(<vscale x 2 x half> %0, ptr %1, iXLen %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vsse_v_nxv2f16_nxv2f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma
+; CHECK-NEXT: vsse16.v v8, (a0), a1
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.nxv2f16(
+ <vscale x 2 x half> %0,
+ ptr %1,
+ iXLen %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.mask.nxv2f16(
+ <vscale x 2 x half>,
+ ptr,
+ iXLen,
+ <vscale x 2 x i1>,
+ iXLen);
+
+define void @intrinsic_vsse_mask_v_nxv2f16_nxv2f16(<vscale x 2 x half> %0, ptr %1, iXLen %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vsse_mask_v_nxv2f16_nxv2f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma
+; CHECK-NEXT: vsse16.v v8, (a0), a1, v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.mask.nxv2f16(
+ <vscale x 2 x half> %0,
+ ptr %1,
+ iXLen %2,
+ <vscale x 2 x i1> %3,
+ iXLen %4)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.nxv4f16(
+ <vscale x 4 x half>,
+ ptr,
+ iXLen,
+ iXLen);
+
+define void @intrinsic_vsse_v_nxv4f16_nxv4f16(<vscale x 4 x half> %0, ptr %1, iXLen %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vsse_v_nxv4f16_nxv4f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; CHECK-NEXT: vsse16.v v8, (a0), a1
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.nxv4f16(
+ <vscale x 4 x half> %0,
+ ptr %1,
+ iXLen %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.mask.nxv4f16(
+ <vscale x 4 x half>,
+ ptr,
+ iXLen,
+ <vscale x 4 x i1>,
+ iXLen);
+
+define void @intrinsic_vsse_mask_v_nxv4f16_nxv4f16(<vscale x 4 x half> %0, ptr %1, iXLen %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vsse_mask_v_nxv4f16_nxv4f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; CHECK-NEXT: vsse16.v v8, (a0), a1, v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.mask.nxv4f16(
+ <vscale x 4 x half> %0,
+ ptr %1,
+ iXLen %2,
+ <vscale x 4 x i1> %3,
+ iXLen %4)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.nxv8f16(
+ <vscale x 8 x half>,
+ ptr,
+ iXLen,
+ iXLen);
+
+define void @intrinsic_vsse_v_nxv8f16_nxv8f16(<vscale x 8 x half> %0, ptr %1, iXLen %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vsse_v_nxv8f16_nxv8f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e16, m2, ta, ma
+; CHECK-NEXT: vsse16.v v8, (a0), a1
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.nxv8f16(
+ <vscale x 8 x half> %0,
+ ptr %1,
+ iXLen %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.mask.nxv8f16(
+ <vscale x 8 x half>,
+ ptr,
+ iXLen,
+ <vscale x 8 x i1>,
+ iXLen);
+
+define void @intrinsic_vsse_mask_v_nxv8f16_nxv8f16(<vscale x 8 x half> %0, ptr %1, iXLen %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vsse_mask_v_nxv8f16_nxv8f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e16, m2, ta, ma
+; CHECK-NEXT: vsse16.v v8, (a0), a1, v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.mask.nxv8f16(
+ <vscale x 8 x half> %0,
+ ptr %1,
+ iXLen %2,
+ <vscale x 8 x i1> %3,
+ iXLen %4)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.nxv16f16(
+ <vscale x 16 x half>,
+ ptr,
+ iXLen,
+ iXLen);
+
+define void @intrinsic_vsse_v_nxv16f16_nxv16f16(<vscale x 16 x half> %0, ptr %1, iXLen %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vsse_v_nxv16f16_nxv16f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma
+; CHECK-NEXT: vsse16.v v8, (a0), a1
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.nxv16f16(
+ <vscale x 16 x half> %0,
+ ptr %1,
+ iXLen %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.mask.nxv16f16(
+ <vscale x 16 x half>,
+ ptr,
+ iXLen,
+ <vscale x 16 x i1>,
+ iXLen);
+
+define void @intrinsic_vsse_mask_v_nxv16f16_nxv16f16(<vscale x 16 x half> %0, ptr %1, iXLen %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vsse_mask_v_nxv16f16_nxv16f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma
+; CHECK-NEXT: vsse16.v v8, (a0), a1, v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.mask.nxv16f16(
+ <vscale x 16 x half> %0,
+ ptr %1,
+ iXLen %2,
+ <vscale x 16 x i1> %3,
+ iXLen %4)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.nxv32f16(
+ <vscale x 32 x half>,
+ ptr,
+ iXLen,
+ iXLen);
+
+define void @intrinsic_vsse_v_nxv32f16_nxv32f16(<vscale x 32 x half> %0, ptr %1, iXLen %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vsse_v_nxv32f16_nxv32f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e16, m8, ta, ma
+; CHECK-NEXT: vsse16.v v8, (a0), a1
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.nxv32f16(
+ <vscale x 32 x half> %0,
+ ptr %1,
+ iXLen %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.mask.nxv32f16(
+ <vscale x 32 x half>,
+ ptr,
+ iXLen,
+ <vscale x 32 x i1>,
+ iXLen);
+
+define void @intrinsic_vsse_mask_v_nxv32f16_nxv32f16(<vscale x 32 x half> %0, ptr %1, iXLen %2, <vscale x 32 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vsse_mask_v_nxv32f16_nxv32f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e16, m8, ta, ma
+; CHECK-NEXT: vsse16.v v8, (a0), a1, v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.mask.nxv32f16(
+ <vscale x 32 x half> %0,
+ ptr %1,
+ iXLen %2,
+ <vscale x 32 x i1> %3,
+ iXLen %4)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.nxv1i8(
+ <vscale x 1 x i8>,
+ ptr,
+ iXLen,
+ iXLen);
+
+define void @intrinsic_vsse_v_nxv1i8_nxv1i8(<vscale x 1 x i8> %0, ptr %1, iXLen %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vsse_v_nxv1i8_nxv1i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e8, mf8, ta, ma
+; CHECK-NEXT: vsse8.v v8, (a0), a1
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.nxv1i8(
+ <vscale x 1 x i8> %0,
+ ptr %1,
+ iXLen %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.mask.nxv1i8(
+ <vscale x 1 x i8>,
+ ptr,
+ iXLen,
+ <vscale x 1 x i1>,
+ iXLen);
+
+define void @intrinsic_vsse_mask_v_nxv1i8_nxv1i8(<vscale x 1 x i8> %0, ptr %1, iXLen %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vsse_mask_v_nxv1i8_nxv1i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e8, mf8, ta, ma
+; CHECK-NEXT: vsse8.v v8, (a0), a1, v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.mask.nxv1i8(
+ <vscale x 1 x i8> %0,
+ ptr %1,
+ iXLen %2,
+ <vscale x 1 x i1> %3,
+ iXLen %4)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.nxv2i8(
+ <vscale x 2 x i8>,
+ ptr,
+ iXLen,
+ iXLen);
+
+define void @intrinsic_vsse_v_nxv2i8_nxv2i8(<vscale x 2 x i8> %0, ptr %1, iXLen %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vsse_v_nxv2i8_nxv2i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e8, mf4, ta, ma
+; CHECK-NEXT: vsse8.v v8, (a0), a1
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.nxv2i8(
+ <vscale x 2 x i8> %0,
+ ptr %1,
+ iXLen %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.mask.nxv2i8(
+ <vscale x 2 x i8>,
+ ptr,
+ iXLen,
+ <vscale x 2 x i1>,
+ iXLen);
+
+define void @intrinsic_vsse_mask_v_nxv2i8_nxv2i8(<vscale x 2 x i8> %0, ptr %1, iXLen %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vsse_mask_v_nxv2i8_nxv2i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e8, mf4, ta, ma
+; CHECK-NEXT: vsse8.v v8, (a0), a1, v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.mask.nxv2i8(
+ <vscale x 2 x i8> %0,
+ ptr %1,
+ iXLen %2,
+ <vscale x 2 x i1> %3,
+ iXLen %4)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.nxv4i8(
+ <vscale x 4 x i8>,
+ ptr,
+ iXLen,
+ iXLen);
+
+define void @intrinsic_vsse_v_nxv4i8_nxv4i8(<vscale x 4 x i8> %0, ptr %1, iXLen %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vsse_v_nxv4i8_nxv4i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e8, mf2, ta, ma
+; CHECK-NEXT: vsse8.v v8, (a0), a1
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.nxv4i8(
+ <vscale x 4 x i8> %0,
+ ptr %1,
+ iXLen %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.mask.nxv4i8(
+ <vscale x 4 x i8>,
+ ptr,
+ iXLen,
+ <vscale x 4 x i1>,
+ iXLen);
+
+define void @intrinsic_vsse_mask_v_nxv4i8_nxv4i8(<vscale x 4 x i8> %0, ptr %1, iXLen %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vsse_mask_v_nxv4i8_nxv4i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e8, mf2, ta, ma
+; CHECK-NEXT: vsse8.v v8, (a0), a1, v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.mask.nxv4i8(
+ <vscale x 4 x i8> %0,
+ ptr %1,
+ iXLen %2,
+ <vscale x 4 x i1> %3,
+ iXLen %4)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.nxv8i8(
+ <vscale x 8 x i8>,
+ ptr,
+ iXLen,
+ iXLen);
+
+define void @intrinsic_vsse_v_nxv8i8_nxv8i8(<vscale x 8 x i8> %0, ptr %1, iXLen %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vsse_v_nxv8i8_nxv8i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-NEXT: vsse8.v v8, (a0), a1
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.nxv8i8(
+ <vscale x 8 x i8> %0,
+ ptr %1,
+ iXLen %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.mask.nxv8i8(
+ <vscale x 8 x i8>,
+ ptr,
+ iXLen,
+ <vscale x 8 x i1>,
+ iXLen);
+
+define void @intrinsic_vsse_mask_v_nxv8i8_nxv8i8(<vscale x 8 x i8> %0, ptr %1, iXLen %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vsse_mask_v_nxv8i8_nxv8i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-NEXT: vsse8.v v8, (a0), a1, v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.mask.nxv8i8(
+ <vscale x 8 x i8> %0,
+ ptr %1,
+ iXLen %2,
+ <vscale x 8 x i1> %3,
+ iXLen %4)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.nxv16i8(
+ <vscale x 16 x i8>,
+ ptr,
+ iXLen,
+ iXLen);
+
+define void @intrinsic_vsse_v_nxv16i8_nxv16i8(<vscale x 16 x i8> %0, ptr %1, iXLen %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vsse_v_nxv16i8_nxv16i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, ma
+; CHECK-NEXT: vsse8.v v8, (a0), a1
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.nxv16i8(
+ <vscale x 16 x i8> %0,
+ ptr %1,
+ iXLen %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.mask.nxv16i8(
+ <vscale x 16 x i8>,
+ ptr,
+ iXLen,
+ <vscale x 16 x i1>,
+ iXLen);
+
+define void @intrinsic_vsse_mask_v_nxv16i8_nxv16i8(<vscale x 16 x i8> %0, ptr %1, iXLen %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vsse_mask_v_nxv16i8_nxv16i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, ma
+; CHECK-NEXT: vsse8.v v8, (a0), a1, v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.mask.nxv16i8(
+ <vscale x 16 x i8> %0,
+ ptr %1,
+ iXLen %2,
+ <vscale x 16 x i1> %3,
+ iXLen %4)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.nxv32i8(
+ <vscale x 32 x i8>,
+ ptr,
+ iXLen,
+ iXLen);
+
+define void @intrinsic_vsse_v_nxv32i8_nxv32i8(<vscale x 32 x i8> %0, ptr %1, iXLen %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vsse_v_nxv32i8_nxv32i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, ma
+; CHECK-NEXT: vsse8.v v8, (a0), a1
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.nxv32i8(
+ <vscale x 32 x i8> %0,
+ ptr %1,
+ iXLen %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.mask.nxv32i8(
+ <vscale x 32 x i8>,
+ ptr,
+ iXLen,
+ <vscale x 32 x i1>,
+ iXLen);
+
+define void @intrinsic_vsse_mask_v_nxv32i8_nxv32i8(<vscale x 32 x i8> %0, ptr %1, iXLen %2, <vscale x 32 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vsse_mask_v_nxv32i8_nxv32i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, ma
+; CHECK-NEXT: vsse8.v v8, (a0), a1, v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.mask.nxv32i8(
+ <vscale x 32 x i8> %0,
+ ptr %1,
+ iXLen %2,
+ <vscale x 32 x i1> %3,
+ iXLen %4)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.nxv64i8(
+ <vscale x 64 x i8>,
+ ptr,
+ iXLen,
+ iXLen);
+
+define void @intrinsic_vsse_v_nxv64i8_nxv64i8(<vscale x 64 x i8> %0, ptr %1, iXLen %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vsse_v_nxv64i8_nxv64i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; CHECK-NEXT: vsse8.v v8, (a0), a1
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.nxv64i8(
+ <vscale x 64 x i8> %0,
+ ptr %1,
+ iXLen %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.mask.nxv64i8(
+ <vscale x 64 x i8>,
+ ptr,
+ iXLen,
+ <vscale x 64 x i1>,
+ iXLen);
+
+define void @intrinsic_vsse_mask_v_nxv64i8_nxv64i8(<vscale x 64 x i8> %0, ptr %1, iXLen %2, <vscale x 64 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vsse_mask_v_nxv64i8_nxv64i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; CHECK-NEXT: vsse8.v v8, (a0), a1, v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.mask.nxv64i8(
+ <vscale x 64 x i8> %0,
+ ptr %1,
+ iXLen %2,
+ <vscale x 64 x i1> %3,
+ iXLen %4)
+
+ ret void
+}
diff --git a/llvm/test/CodeGen/RISCV/features-info.ll b/llvm/test/CodeGen/RISCV/features-info.ll
index 37e11db..988d049 100644
--- a/llvm/test/CodeGen/RISCV/features-info.ll
+++ b/llvm/test/CodeGen/RISCV/features-info.ll
@@ -136,6 +136,7 @@
; CHECK-NEXT: shgatpa - 'Shgatpa' (SvNNx4 mode supported for all modes supported by satp, as well as Bare).
; CHECK-NEXT: shifted-zextw-fusion - Enable SLLI+SRLI to be fused when computing (shifted) word zero extension.
; CHECK-NEXT: shlcofideleg - 'Shlcofideleg' (Delegating LCOFI Interrupts to VS-mode).
+; CHECK-NEXT: short-forward-branch-i-minmax - Enable short forward branch optimization for min,max instructions in Zbb.
; CHECK-NEXT: short-forward-branch-opt - Enable short forward branch optimization.
; CHECK-NEXT: shtvala - 'Shtvala' (htval provides all needed values).
; CHECK-NEXT: shvsatpa - 'Shvsatpa' (vsatp supports all modes supported by satp).
diff --git a/llvm/test/CodeGen/RISCV/rv64-stackmap.ll b/llvm/test/CodeGen/RISCV/rv64-stackmap.ll
index d07f608..c50a0fb3 100644
--- a/llvm/test/CodeGen/RISCV/rv64-stackmap.ll
+++ b/llvm/test/CodeGen/RISCV/rv64-stackmap.ll
@@ -7,11 +7,11 @@
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .half 0
; Num Functions
-; CHECK-NEXT: .word 12
+; CHECK-NEXT: .word 13
; Num LargeConstants
-; CHECK-NEXT: .word 2
+; CHECK-NEXT: .word 3
; Num Callsites
-; CHECK-NEXT: .word 16
+; CHECK-NEXT: .word 17
; Functions and stack size
; CHECK-NEXT: .quad constantargs
@@ -38,8 +38,8 @@
; CHECK-NEXT: .quad liveConstant
; CHECK-NEXT: .quad 0
; CHECK-NEXT: .quad 1
-; CHECK-NEXT: .quad spilledValue
-; CHECK-NEXT: .quad 144
+; CHECK-NEXT: .quad liveArgs
+; CHECK-NEXT: .quad 0
; CHECK-NEXT: .quad 1
; CHECK-NEXT: .quad directFrameIdx
; CHECK-NEXT: .quad 48
@@ -50,10 +50,14 @@
; CHECK-NEXT: .quad needsStackRealignment
; CHECK-NEXT: .quad -1
; CHECK-NEXT: .quad 1
+; CHECK-NEXT: .quad floats
+; CHECK-NEXT: .quad 32
+; CHECK-NEXT: .quad 1
; Num LargeConstants
; CHECK-NEXT: .quad 4294967295
; CHECK-NEXT: .quad 4294967296
+; CHECK-NEXT: .quad 4609434218613702656
; Constant arguments
;
@@ -278,7 +282,7 @@ define void @liveConstant() {
;
; Verify 28 stack map entries.
;
-; CHECK-LABEL: .word .L{{.*}}-spilledValue
+; CHECK-LABEL: .word .L{{.*}}-liveArgs
; CHECK-NEXT: .half 0
; CHECK-NEXT: .half 28
;
@@ -290,9 +294,9 @@ define void @liveConstant() {
; CHECK-NEXT: .half 2
; CHECK-NEXT: .half 0
; CHECK-NEXT: .word
-define void @spilledValue(i64 %arg0, i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16, i64 %l17, i64 %l18, i64 %l19, i64 %l20, i64 %l21, i64 %l22, i64 %l23, i64 %l24, i64 %l25, i64 %l26, i64 %l27) {
+define void @liveArgs(i64 %arg0, i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16, i64 %l17, i64 %l18, i64 %l19, i64 %l20, i64 %l21, i64 %l22, i64 %l23, i64 %l24, i8 %l25, i16 zeroext %l26, i32 signext %l27) {
entry:
- call void (i64, i32, ptr, i32, ...) @llvm.experimental.patchpoint.void(i64 11, i32 28, ptr null, i32 5, i64 %arg0, i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16, i64 %l17, i64 %l18, i64 %l19, i64 %l20, i64 %l21, i64 %l22, i64 %l23, i64 %l24, i64 %l25, i64 %l26, i64 %l27)
+ call void (i64, i32, ptr, i32, ...) @llvm.experimental.patchpoint.void(i64 11, i32 28, ptr null, i32 5, i64 %arg0, i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16, i64 %l17, i64 %l18, i64 %l19, i64 %l20, i64 %l21, i64 %l22, i64 %l23, i64 %l24, i8 %l25, i16 %l26, i32 %l27)
ret void
}
@@ -379,6 +383,104 @@ define void @needsStackRealignment() {
}
declare void @escape_values(...)
+; CHECK-LABEL: .word .L{{.*}}-floats
+; CHECK-NEXT: .half 0
+; Num Locations
+; CHECK-NEXT: .half 12
+; Loc 0: constant float as constant integer
+; CHECK-NEXT: .byte 4
+; CHECK-NEXT: .byte 0
+; CHECK-NEXT: .half 8
+; CHECK-NEXT: .half 0
+; CHECK-NEXT: .half 0
+; CHECK-NEXT: .word
+; Loc 1: constant double as large constant integer
+; CHECK-NEXT: .byte 5
+; CHECK-NEXT: .byte 0
+; CHECK-NEXT: .half 8
+; CHECK-NEXT: .half 0
+; CHECK-NEXT: .half 0
+; CHECK-NEXT: .word
+; Loc 2: constant half as constant integer
+; CHECK-NEXT: .byte 4
+; CHECK-NEXT: .byte 0
+; CHECK-NEXT: .half 8
+; CHECK-NEXT: .half 0
+; CHECK-NEXT: .half 0
+; CHECK-NEXT: .word
+; Loc 3: constant bfloat as constant integer
+; CHECK-NEXT: .byte 4
+; CHECK-NEXT: .byte 0
+; CHECK-NEXT: .half 8
+; CHECK-NEXT: .half 0
+; CHECK-NEXT: .half 0
+; CHECK-NEXT: .word
+; Loc 4: float value in X register
+; CHECK-NEXT: .byte 1
+; CHECK-NEXT: .byte 0
+; CHECK-NEXT: .half 8
+; CHECK-NEXT: .half 10
+; CHECK-NEXT: .half 0
+; CHECK-NEXT: .word
+; Loc 5: double value in X register
+; CHECK-NEXT: .byte 1
+; CHECK-NEXT: .byte 0
+; CHECK-NEXT: .half 8
+; CHECK-NEXT: .half 11
+; CHECK-NEXT: .half 0
+; CHECK-NEXT: .word
+; Loc 6: half value in X register
+; CHECK-NEXT: .byte 1
+; CHECK-NEXT: .byte 0
+; CHECK-NEXT: .half 8
+; CHECK-NEXT: .half 12
+; CHECK-NEXT: .half 0
+; CHECK-NEXT: .word
+; Loc 7: bfloat value in X register
+; CHECK-NEXT: .byte 1
+; CHECK-NEXT: .byte 0
+; CHECK-NEXT: .half 8
+; CHECK-NEXT: .half 13
+; CHECK-NEXT: .half 0
+; CHECK-NEXT: .word
+; Loc 8: float on stack
+; CHECK-NEXT: .byte 2
+; CHECK-NEXT: .byte 0
+; CHECK-NEXT: .half 8
+; CHECK-NEXT: .half 2
+; CHECK-NEXT: .half 0
+; CHECK-NEXT: .word
+; Loc 9: double on stack
+; CHECK-NEXT: .byte 2
+; CHECK-NEXT: .byte 0
+; CHECK-NEXT: .half 8
+; CHECK-NEXT: .half 2
+; CHECK-NEXT: .half 0
+; CHECK-NEXT: .word
+; Loc 10: half on stack
+; CHECK-NEXT: .byte 2
+; CHECK-NEXT: .byte 0
+; CHECK-NEXT: .half 8
+; CHECK-NEXT: .half 2
+; CHECK-NEXT: .half 0
+; CHECK-NEXT: .word
+; Loc 11: bfloat on stack
+; CHECK-NEXT: .byte 2
+; CHECK-NEXT: .byte 0
+; CHECK-NEXT: .half 8
+; CHECK-NEXT: .half 2
+; CHECK-NEXT: .half 0
+; CHECK-NEXT: .word
+define void @floats(float %f, double %g, half %h, bfloat %i) {
+ %ff = alloca float
+ %gg = alloca double
+ %hh = alloca half
+ %ii = alloca bfloat
+ call void (i64, i32, ...) @llvm.experimental.stackmap(i64 888, i32 0, float 1.25,
+ double 1.5, half 1.5, bfloat 1.5, float %f, double %g, half %h, bfloat %i, ptr %ff, ptr %gg, ptr %hh, ptr %ii)
+ ret void
+}
+
declare void @llvm.experimental.stackmap(i64, i32, ...)
declare void @llvm.experimental.patchpoint.void(i64, i32, ptr, i32, ...)
declare i64 @llvm.experimental.patchpoint.i64(i64, i32, ptr, i32, ...)
diff --git a/llvm/test/CodeGen/RISCV/rv64p.ll b/llvm/test/CodeGen/RISCV/rv64p.ll
index cb07f94..f937f44 100644
--- a/llvm/test/CodeGen/RISCV/rv64p.ll
+++ b/llvm/test/CodeGen/RISCV/rv64p.ll
@@ -297,8 +297,7 @@ declare i32 @llvm.abs.i32(i32, i1 immarg)
define i32 @abs_i32(i32 %x) {
; CHECK-LABEL: abs_i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: sext.w a0, a0
-; CHECK-NEXT: abs a0, a0
+; CHECK-NEXT: absw a0, a0
; CHECK-NEXT: ret
%abs = tail call i32 @llvm.abs.i32(i32 %x, i1 true)
ret i32 %abs
@@ -307,8 +306,7 @@ define i32 @abs_i32(i32 %x) {
define signext i32 @abs_i32_sext(i32 signext %x) {
; CHECK-LABEL: abs_i32_sext:
; CHECK: # %bb.0:
-; CHECK-NEXT: abs a0, a0
-; CHECK-NEXT: sext.w a0, a0
+; CHECK-NEXT: absw a0, a0
; CHECK-NEXT: ret
%abs = tail call i32 @llvm.abs.i32(i32 %x, i1 true)
ret i32 %abs
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
index 4c35b25..7e6f2c7 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
@@ -15265,6 +15265,259 @@ define <4 x i32> @masked_gather_widen_sew_negative_stride(ptr %base) {
ret <4 x i32> %x
}
+define <7 x i8> @mgather_baseidx_v7i8(ptr %base, <7 x i8> %idxs, <7 x i1> %m, <7 x i8> %passthru) {
+; RV32-LABEL: mgather_baseidx_v7i8:
+; RV32: # %bb.0:
+; RV32-NEXT: li a1, 127
+; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV32-NEXT: vmv.s.x v10, a1
+; RV32-NEXT: vmand.mm v0, v0, v10
+; RV32-NEXT: vsext.vf4 v10, v8
+; RV32-NEXT: vsetvli zero, zero, e8, mf2, ta, mu
+; RV32-NEXT: vluxei32.v v9, (a0), v10, v0.t
+; RV32-NEXT: vmv1r.v v8, v9
+; RV32-NEXT: ret
+;
+; RV64V-LABEL: mgather_baseidx_v7i8:
+; RV64V: # %bb.0:
+; RV64V-NEXT: li a1, 127
+; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64V-NEXT: vmv.s.x v10, a1
+; RV64V-NEXT: vmand.mm v0, v0, v10
+; RV64V-NEXT: vsext.vf8 v12, v8
+; RV64V-NEXT: vsetvli zero, zero, e8, mf2, ta, mu
+; RV64V-NEXT: vluxei64.v v9, (a0), v12, v0.t
+; RV64V-NEXT: vmv1r.v v8, v9
+; RV64V-NEXT: ret
+;
+; RV64ZVE32F-LABEL: mgather_baseidx_v7i8:
+; RV64ZVE32F: # %bb.0:
+; RV64ZVE32F-NEXT: addi sp, sp, -16
+; RV64ZVE32F-NEXT: .cfi_def_cfa_offset 16
+; RV64ZVE32F-NEXT: .cfi_remember_state
+; RV64ZVE32F-NEXT: li a1, 64
+; RV64ZVE32F-NEXT: addi a2, sp, 8
+; RV64ZVE32F-NEXT: vsetvli zero, a1, e8, m4, ta, ma
+; RV64ZVE32F-NEXT: vsm.v v0, (a2)
+; RV64ZVE32F-NEXT: ld a1, 8(sp)
+; RV64ZVE32F-NEXT: andi a2, a1, 1
+; RV64ZVE32F-NEXT: beqz a2, .LBB132_2
+; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
+; RV64ZVE32F-NEXT: vmv.x.s a2, v8
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: lbu a2, 0(a2)
+; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1
+; RV64ZVE32F-NEXT: vmv.v.x v11, a2
+; RV64ZVE32F-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 2
+; RV64ZVE32F-NEXT: vslide1down.vx v11, v11, a2
+; RV64ZVE32F-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 3
+; RV64ZVE32F-NEXT: vslide1down.vx v11, v11, a2
+; RV64ZVE32F-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4
+; RV64ZVE32F-NEXT: vslide1down.vx v11, v11, a2
+; RV64ZVE32F-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 5
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 6
+; RV64ZVE32F-NEXT: vslide1down.vx v11, v11, a2
+; RV64ZVE32F-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-NEXT: vslide1down.vx v10, v11, a2
+; RV64ZVE32F-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-NEXT: vslide1down.vx v9, v10, a2
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
+; RV64ZVE32F-NEXT: .LBB132_2: # %else
+; RV64ZVE32F-NEXT: andi a2, a1, 2
+; RV64ZVE32F-NEXT: beqz a2, .LBB132_4
+; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
+; RV64ZVE32F-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v11, v9, 2
+; RV64ZVE32F-NEXT: vmv.x.s a3, v10
+; RV64ZVE32F-NEXT: add a3, a0, a3
+; RV64ZVE32F-NEXT: lbu a3, 0(a3)
+; RV64ZVE32F-NEXT: vmv.v.x v10, a2
+; RV64ZVE32F-NEXT: vmv.x.s a2, v11
+; RV64ZVE32F-NEXT: vslidedown.vi v11, v9, 3
+; RV64ZVE32F-NEXT: vslide1down.vx v10, v10, a3
+; RV64ZVE32F-NEXT: vmv.x.s a3, v11
+; RV64ZVE32F-NEXT: vslidedown.vi v11, v9, 4
+; RV64ZVE32F-NEXT: vslide1down.vx v10, v10, a2
+; RV64ZVE32F-NEXT: vmv.x.s a2, v11
+; RV64ZVE32F-NEXT: vslidedown.vi v11, v9, 5
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 6
+; RV64ZVE32F-NEXT: vslide1down.vx v10, v10, a3
+; RV64ZVE32F-NEXT: vmv.x.s a3, v11
+; RV64ZVE32F-NEXT: vslide1down.vx v10, v10, a2
+; RV64ZVE32F-NEXT: vslide1down.vx v10, v10, a3
+; RV64ZVE32F-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-NEXT: vslide1down.vx v9, v10, a2
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
+; RV64ZVE32F-NEXT: .LBB132_4: # %else2
+; RV64ZVE32F-NEXT: andi a2, a1, 4
+; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 2
+; RV64ZVE32F-NEXT: beqz a2, .LBB132_6
+; RV64ZVE32F-NEXT: # %bb.5: # %cond.load4
+; RV64ZVE32F-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v11, v9, 1
+; RV64ZVE32F-NEXT: vmv.x.s a3, v9
+; RV64ZVE32F-NEXT: vslidedown.vi v12, v9, 3
+; RV64ZVE32F-NEXT: vmv.x.s a4, v11
+; RV64ZVE32F-NEXT: vmv.v.x v11, a3
+; RV64ZVE32F-NEXT: vmv.x.s a3, v12
+; RV64ZVE32F-NEXT: vslidedown.vi v12, v9, 4
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: lbu a2, 0(a2)
+; RV64ZVE32F-NEXT: vslide1down.vx v11, v11, a4
+; RV64ZVE32F-NEXT: vmv.x.s a4, v12
+; RV64ZVE32F-NEXT: vslidedown.vi v12, v9, 5
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 6
+; RV64ZVE32F-NEXT: vslide1down.vx v11, v11, a2
+; RV64ZVE32F-NEXT: vmv.x.s a2, v12
+; RV64ZVE32F-NEXT: vslide1down.vx v11, v11, a3
+; RV64ZVE32F-NEXT: vslide1down.vx v11, v11, a4
+; RV64ZVE32F-NEXT: vslide1down.vx v11, v11, a2
+; RV64ZVE32F-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-NEXT: vslide1down.vx v9, v11, a2
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
+; RV64ZVE32F-NEXT: .LBB132_6: # %else5
+; RV64ZVE32F-NEXT: andi a2, a1, 8
+; RV64ZVE32F-NEXT: beqz a2, .LBB132_8
+; RV64ZVE32F-NEXT: # %bb.7: # %cond.load7
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
+; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v11, v9, 1
+; RV64ZVE32F-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-NEXT: vmv.x.s a3, v10
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 2
+; RV64ZVE32F-NEXT: vmv.x.s a4, v11
+; RV64ZVE32F-NEXT: vmv.v.x v11, a2
+; RV64ZVE32F-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4
+; RV64ZVE32F-NEXT: vslide1down.vx v11, v11, a4
+; RV64ZVE32F-NEXT: vmv.x.s a4, v10
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 5
+; RV64ZVE32F-NEXT: add a3, a0, a3
+; RV64ZVE32F-NEXT: lbu a3, 0(a3)
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 6
+; RV64ZVE32F-NEXT: vslide1down.vx v11, v11, a2
+; RV64ZVE32F-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-NEXT: vslide1down.vx v10, v11, a3
+; RV64ZVE32F-NEXT: vslide1down.vx v10, v10, a4
+; RV64ZVE32F-NEXT: vslide1down.vx v10, v10, a2
+; RV64ZVE32F-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-NEXT: vslide1down.vx v9, v10, a2
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
+; RV64ZVE32F-NEXT: .LBB132_8: # %else8
+; RV64ZVE32F-NEXT: andi a2, a1, 16
+; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 4
+; RV64ZVE32F-NEXT: bnez a2, .LBB132_13
+; RV64ZVE32F-NEXT: # %bb.9: # %else11
+; RV64ZVE32F-NEXT: andi a2, a1, 32
+; RV64ZVE32F-NEXT: bnez a2, .LBB132_14
+; RV64ZVE32F-NEXT: .LBB132_10: # %else14
+; RV64ZVE32F-NEXT: andi a1, a1, 64
+; RV64ZVE32F-NEXT: beqz a1, .LBB132_12
+; RV64ZVE32F-NEXT: .LBB132_11: # %cond.load16
+; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
+; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1
+; RV64ZVE32F-NEXT: vmv.x.s a1, v9
+; RV64ZVE32F-NEXT: vmv.x.s a2, v8
+; RV64ZVE32F-NEXT: vmv.v.x v8, a1
+; RV64ZVE32F-NEXT: vmv.x.s a1, v10
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 2
+; RV64ZVE32F-NEXT: add a0, a0, a2
+; RV64ZVE32F-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 3
+; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a1
+; RV64ZVE32F-NEXT: vmv.x.s a1, v10
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 5
+; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2
+; RV64ZVE32F-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-NEXT: lbu a0, 0(a0)
+; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a1
+; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2
+; RV64ZVE32F-NEXT: vmv.x.s a1, v9
+; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a1
+; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
+; RV64ZVE32F-NEXT: .LBB132_12: # %else17
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64ZVE32F-NEXT: vmv1r.v v8, v9
+; RV64ZVE32F-NEXT: addi sp, sp, 16
+; RV64ZVE32F-NEXT: .cfi_def_cfa_offset 0
+; RV64ZVE32F-NEXT: ret
+; RV64ZVE32F-NEXT: .LBB132_13: # %cond.load10
+; RV64ZVE32F-NEXT: .cfi_restore_state
+; RV64ZVE32F-NEXT: vmv.x.s a2, v8
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1
+; RV64ZVE32F-NEXT: vmv.x.s a3, v9
+; RV64ZVE32F-NEXT: vslidedown.vi v11, v9, 2
+; RV64ZVE32F-NEXT: vmv.x.s a4, v10
+; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV64ZVE32F-NEXT: vmv.v.x v10, a3
+; RV64ZVE32F-NEXT: vmv.x.s a3, v11
+; RV64ZVE32F-NEXT: vslidedown.vi v11, v9, 3
+; RV64ZVE32F-NEXT: vslide1down.vx v10, v10, a4
+; RV64ZVE32F-NEXT: vmv.x.s a4, v11
+; RV64ZVE32F-NEXT: vslidedown.vi v11, v9, 5
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 6
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: lbu a2, 0(a2)
+; RV64ZVE32F-NEXT: vslide1down.vx v10, v10, a3
+; RV64ZVE32F-NEXT: vmv.x.s a3, v11
+; RV64ZVE32F-NEXT: vslide1down.vx v10, v10, a4
+; RV64ZVE32F-NEXT: vslide1down.vx v10, v10, a2
+; RV64ZVE32F-NEXT: vslide1down.vx v10, v10, a3
+; RV64ZVE32F-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-NEXT: vslide1down.vx v9, v10, a2
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
+; RV64ZVE32F-NEXT: andi a2, a1, 32
+; RV64ZVE32F-NEXT: beqz a2, .LBB132_10
+; RV64ZVE32F-NEXT: .LBB132_14: # %cond.load13
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
+; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v11, v9, 1
+; RV64ZVE32F-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-NEXT: vmv.x.s a3, v10
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 2
+; RV64ZVE32F-NEXT: vmv.x.s a4, v11
+; RV64ZVE32F-NEXT: vmv.v.x v11, a2
+; RV64ZVE32F-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 3
+; RV64ZVE32F-NEXT: vslide1down.vx v11, v11, a4
+; RV64ZVE32F-NEXT: vmv.x.s a4, v10
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 6
+; RV64ZVE32F-NEXT: add a3, a0, a3
+; RV64ZVE32F-NEXT: lbu a3, 0(a3)
+; RV64ZVE32F-NEXT: vslide1down.vx v11, v11, a2
+; RV64ZVE32F-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-NEXT: vslide1down.vx v10, v11, a4
+; RV64ZVE32F-NEXT: vslide1down.vx v10, v10, a2
+; RV64ZVE32F-NEXT: vslide1down.vx v10, v10, a3
+; RV64ZVE32F-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-NEXT: vslide1down.vx v9, v10, a2
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
+; RV64ZVE32F-NEXT: andi a1, a1, 64
+; RV64ZVE32F-NEXT: bnez a1, .LBB132_11
+; RV64ZVE32F-NEXT: j .LBB132_12
+ %ptrs = getelementptr inbounds i8, ptr %base, <7 x i8> %idxs
+ %v = call <7 x i8> @llvm.masked.gather.v7i8.v7p0(<7 x ptr> %ptrs, i32 1, <7 x i1> %m, <7 x i8> %passthru)
+ ret <7 x i8> %v
+}
+
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; RV32V-ZVFH: {{.*}}
; RV32V-ZVFHMIN: {{.*}}
diff --git a/llvm/test/CodeGen/RISCV/rvv/pr165232.ll b/llvm/test/CodeGen/RISCV/rvv/pr165232.ll
new file mode 100644
index 0000000..bef53c6
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/pr165232.ll
@@ -0,0 +1,244 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc < %s -mtriple=riscv64 -mattr=+v | FileCheck %s
+
+target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128"
+target triple = "riscv64-unknown-linux-gnu"
+
+define i1 @main(ptr %var_117, ptr %arrayinit.element3045, ptr %arrayinit.element3047, ptr %arrayinit.element3049, ptr %arrayinit.element3051, ptr %arrayinit.element3053, ptr %arrayinit.element3055, ptr %arrayinit.element3057, ptr %arrayinit.element3059, ptr %arrayinit.element3061, ptr %arrayinit.element3063, ptr %arrayinit.element3065, ptr %arrayinit.element3067, i64 %var_94_i.07698, target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %1) {
+; CHECK-LABEL: main:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr t0, vlenb
+; CHECK-NEXT: slli t0, t0, 3
+; CHECK-NEXT: mv t1, t0
+; CHECK-NEXT: slli t0, t0, 1
+; CHECK-NEXT: add t0, t0, t1
+; CHECK-NEXT: sub sp, sp, t0
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
+; CHECK-NEXT: sd a1, 8(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd a2, 0(sp) # 8-byte Folded Spill
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 4
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vs4r.v v12, (a1) # vscale x 32-byte Folded Spill
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a2, a2, 2
+; CHECK-NEXT: add a1, a1, a2
+; CHECK-NEXT: vs4r.v v16, (a1) # vscale x 32-byte Folded Spill
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 2
+; CHECK-NEXT: mv a2, a1
+; CHECK-NEXT: slli a1, a1, 1
+; CHECK-NEXT: add a1, a1, a2
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vs4r.v v8, (a1) # vscale x 32-byte Folded Spill
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: mv a2, a1
+; CHECK-NEXT: slli a1, a1, 1
+; CHECK-NEXT: add a1, a1, a2
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: ld t0, 56(a1)
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: mv a2, a1
+; CHECK-NEXT: slli a1, a1, 1
+; CHECK-NEXT: add a1, a1, a2
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: ld t1, 48(a1)
+; CHECK-NEXT: vsetvli t2, zero, e8, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v9, 0
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: mv a2, a1
+; CHECK-NEXT: slli a1, a1, 1
+; CHECK-NEXT: add a1, a1, a2
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: ld t2, 40(a1)
+; CHECK-NEXT: # kill: def $v10 killed $v9 killed $vtype
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: mv a2, a1
+; CHECK-NEXT: slli a1, a1, 1
+; CHECK-NEXT: add a1, a1, a2
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: ld t3, 32(a1)
+; CHECK-NEXT: vmv.v.i v11, 0
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: mv a2, a1
+; CHECK-NEXT: slli a1, a1, 1
+; CHECK-NEXT: add a1, a1, a2
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: ld t4, 16(a1)
+; CHECK-NEXT: vmv.v.i v12, 0
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: mv a2, a1
+; CHECK-NEXT: slli a1, a1, 1
+; CHECK-NEXT: add a1, a1, a2
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: ld t5, 24(a1)
+; CHECK-NEXT: vmv.v.i v13, 0
+; CHECK-NEXT: vsetvli t6, zero, e8, m2, ta, ma
+; CHECK-NEXT: vmv.v.i v22, 0
+; CHECK-NEXT: vmv1r.v v14, v9
+; CHECK-NEXT: sd zero, 0(a0)
+; CHECK-NEXT: vmv.v.i v24, 0
+; CHECK-NEXT: vmv1r.v v15, v9
+; CHECK-NEXT: vmv1r.v v18, v9
+; CHECK-NEXT: li t6, 1023
+; CHECK-NEXT: vmv.v.i v26, 0
+; CHECK-NEXT: vmv1r.v v19, v9
+; CHECK-NEXT: slli t6, t6, 52
+; CHECK-NEXT: vmv.v.i v28, 0
+; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: vs2r.v v22, (a1) # vscale x 16-byte Folded Spill
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a2, a2, 1
+; CHECK-NEXT: add a1, a1, a2
+; CHECK-NEXT: vs4r.v v24, (a1) # vscale x 32-byte Folded Spill
+; CHECK-NEXT: slli a2, a2, 1
+; CHECK-NEXT: add a1, a1, a2
+; CHECK-NEXT: ld a2, 0(sp) # 8-byte Folded Reload
+; CHECK-NEXT: vs2r.v v28, (a1) # vscale x 16-byte Folded Spill
+; CHECK-NEXT: ld a1, 8(sp) # 8-byte Folded Reload
+; CHECK-NEXT: vmv1r.v v20, v9
+; CHECK-NEXT: sd t6, 0(t5)
+; CHECK-NEXT: vmv2r.v v16, v14
+; CHECK-NEXT: vmv2r.v v14, v12
+; CHECK-NEXT: vmv2r.v v12, v10
+; CHECK-NEXT: vmv1r.v v11, v9
+; CHECK-NEXT: vmv1r.v v21, v9
+; CHECK-NEXT: csrr t5, vlenb
+; CHECK-NEXT: slli t5, t5, 3
+; CHECK-NEXT: add t5, sp, t5
+; CHECK-NEXT: addi t5, t5, 16
+; CHECK-NEXT: vs2r.v v18, (t5) # vscale x 16-byte Folded Spill
+; CHECK-NEXT: csrr t6, vlenb
+; CHECK-NEXT: slli t6, t6, 1
+; CHECK-NEXT: add t5, t5, t6
+; CHECK-NEXT: vs2r.v v20, (t5) # vscale x 16-byte Folded Spill
+; CHECK-NEXT: vsetivli zero, 0, e32, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v19, 0
+; CHECK-NEXT: vmclr.m v10
+; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
+; CHECK-NEXT: vmv.v.i v6, 0
+; CHECK-NEXT: .LBB0_1: # %for.body
+; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: vmv1r.v v0, v10
+; CHECK-NEXT: vmv1r.v v20, v19
+; CHECK-NEXT: vmv1r.v v3, v19
+; CHECK-NEXT: vmv1r.v v5, v19
+; CHECK-NEXT: vmv1r.v v2, v19
+; CHECK-NEXT: vmv1r.v v31, v19
+; CHECK-NEXT: vmv1r.v v30, v19
+; CHECK-NEXT: vmv1r.v v4, v19
+; CHECK-NEXT: vmv2r.v v22, v10
+; CHECK-NEXT: vmv4r.v v24, v12
+; CHECK-NEXT: vmv2r.v v28, v16
+; CHECK-NEXT: vmv2r.v v8, v6
+; CHECK-NEXT: vmv1r.v v18, v19
+; CHECK-NEXT: vmv1r.v v21, v10
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, ma
+; CHECK-NEXT: vle32.v v20, (t4)
+; CHECK-NEXT: vle32.v v3, (t1)
+; CHECK-NEXT: vle32.v v30, (a7)
+; CHECK-NEXT: vle64.v v8, (a4)
+; CHECK-NEXT: vle32.v v5, (t2)
+; CHECK-NEXT: vle32.v v2, (t3)
+; CHECK-NEXT: vle32.v v31, (a6)
+; CHECK-NEXT: vmv1r.v v24, v30
+; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu
+; CHECK-NEXT: vmflt.vv v21, v8, v6, v0.t
+; CHECK-NEXT: vmv1r.v v8, v19
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu
+; CHECK-NEXT: vle32.v v18, (a2)
+; CHECK-NEXT: vle32.v v8, (a3)
+; CHECK-NEXT: vle32.v v4, (a5)
+; CHECK-NEXT: vmv1r.v v22, v20
+; CHECK-NEXT: csrr t5, vlenb
+; CHECK-NEXT: slli t5, t5, 3
+; CHECK-NEXT: add t5, sp, t5
+; CHECK-NEXT: addi t5, t5, 16
+; CHECK-NEXT: vl1r.v v1, (t5) # vscale x 8-byte Folded Reload
+; CHECK-NEXT: csrr t6, vlenb
+; CHECK-NEXT: add t5, t5, t6
+; CHECK-NEXT: vl2r.v v2, (t5) # vscale x 16-byte Folded Reload
+; CHECK-NEXT: slli t6, t6, 1
+; CHECK-NEXT: add t5, t5, t6
+; CHECK-NEXT: vl1r.v v4, (t5) # vscale x 8-byte Folded Reload
+; CHECK-NEXT: vsseg4e32.v v1, (zero)
+; CHECK-NEXT: vsseg8e32.v v22, (a1)
+; CHECK-NEXT: vmv1r.v v0, v21
+; CHECK-NEXT: vssub.vv v8, v19, v18, v0.t
+; CHECK-NEXT: csrr t5, vlenb
+; CHECK-NEXT: slli t5, t5, 2
+; CHECK-NEXT: mv t6, t5
+; CHECK-NEXT: slli t5, t5, 1
+; CHECK-NEXT: add t5, t5, t6
+; CHECK-NEXT: add t5, sp, t5
+; CHECK-NEXT: addi t5, t5, 16
+; CHECK-NEXT: vl4r.v v20, (t5) # vscale x 32-byte Folded Reload
+; CHECK-NEXT: vsetvli zero, t0, e64, m2, ta, ma
+; CHECK-NEXT: vsseg2e64.v v20, (zero)
+; CHECK-NEXT: vmv1r.v v0, v10
+; CHECK-NEXT: addi t5, sp, 16
+; CHECK-NEXT: vl4r.v v20, (t5) # vscale x 32-byte Folded Reload
+; CHECK-NEXT: csrr t6, vlenb
+; CHECK-NEXT: slli t6, t6, 2
+; CHECK-NEXT: add t5, t5, t6
+; CHECK-NEXT: vl4r.v v24, (t5) # vscale x 32-byte Folded Reload
+; CHECK-NEXT: vsetivli zero, 0, e64, m2, ta, ma
+; CHECK-NEXT: vsseg4e64.v v20, (zero), v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-NEXT: vsseg8e32.v v8, (a0)
+; CHECK-NEXT: csrr t5, vlenb
+; CHECK-NEXT: slli t5, t5, 4
+; CHECK-NEXT: add t5, sp, t5
+; CHECK-NEXT: addi t5, t5, 16
+; CHECK-NEXT: vl4r.v v20, (t5) # vscale x 32-byte Folded Reload
+; CHECK-NEXT: csrr t6, vlenb
+; CHECK-NEXT: slli t6, t6, 2
+; CHECK-NEXT: add t5, t5, t6
+; CHECK-NEXT: vl4r.v v24, (t5) # vscale x 32-byte Folded Reload
+; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
+; CHECK-NEXT: vsseg4e64.v v20, (zero)
+; CHECK-NEXT: j .LBB0_1
+entry:
+ store double 0.000000e+00, ptr %var_117, align 8
+ store double 1.000000e+00, ptr %arrayinit.element3061, align 8
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %2 = call <vscale x 2 x float> @llvm.riscv.vle.nxv2f32.p0.i64(<vscale x 2 x float> zeroinitializer, ptr %arrayinit.element3059, i64 0)
+ %3 = call <vscale x 2 x float> @llvm.riscv.vle.nxv2f32.p0.i64(<vscale x 2 x float> zeroinitializer, ptr %arrayinit.element3067, i64 0)
+ %4 = call <vscale x 2 x float> @llvm.riscv.vle.nxv2f32.p0.i64(<vscale x 2 x float> zeroinitializer, ptr %arrayinit.element3065, i64 0)
+ %5 = call <vscale x 2 x float> @llvm.riscv.vle.nxv2f32.p0.i64(<vscale x 2 x float> zeroinitializer, ptr %arrayinit.element3063, i64 0)
+ %6 = call <vscale x 2 x float> @llvm.riscv.vle.nxv2f32.p0.i64(<vscale x 2 x float> zeroinitializer, ptr %arrayinit.element3055, i64 0)
+ %7 = call <vscale x 2 x float> @llvm.riscv.vle.nxv2f32.p0.i64(<vscale x 2 x float> zeroinitializer, ptr %arrayinit.element3057, i64 0)
+ %8 = call <vscale x 2 x float> @llvm.riscv.vle.nxv2f32.p0.i64(<vscale x 2 x float> zeroinitializer, ptr %arrayinit.element3053, i64 0)
+ %9 = call <vscale x 2 x double> @llvm.riscv.vle.nxv2f64.p0.i64(<vscale x 2 x double> zeroinitializer, ptr %arrayinit.element3051, i64 0)
+ %10 = tail call <vscale x 2 x i32> @llvm.riscv.vle.nxv2i32.p0.i64(<vscale x 2 x i32> zeroinitializer, ptr %arrayinit.element3047, i64 0)
+ %11 = tail call <vscale x 2 x i32> @llvm.riscv.vle.nxv2i32.p0.i64(<vscale x 2 x i32> zeroinitializer, ptr %arrayinit.element3049, i64 0)
+ call void @llvm.riscv.vsseg4.triscv.vector.tuple_nxv8i8_4t.p0.i64(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) zeroinitializer, ptr null, i64 0, i64 5)
+ %12 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_8t.nxv2f32(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) zeroinitializer, <vscale x 2 x float> %8, i32 0)
+ %13 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_8t.nxv2f32(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %12, <vscale x 2 x float> %7, i32 2)
+ %14 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_8t.nxv2f32(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %13, <vscale x 2 x float> %6, i32 0)
+ %15 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_8t.nxv2f32(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %14, <vscale x 2 x float> %5, i32 0)
+ %16 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_8t.nxv2f32(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %15, <vscale x 2 x float> %4, i32 0)
+ %17 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_8t.nxv2f32(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %16, <vscale x 2 x float> %3, i32 0)
+ %18 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_8t.nxv2f32(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %17, <vscale x 2 x float> %2, i32 0)
+ call void @llvm.riscv.vsseg8.triscv.vector.tuple_nxv8i8_8t.p0.i64(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %18, ptr %arrayinit.element3045, i64 0, i64 5)
+ %19 = tail call <vscale x 2 x i1> @llvm.riscv.vmfgt.mask.nxv2f64.nxv2f64.i64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x double> zeroinitializer, <vscale x 2 x double> %9, <vscale x 2 x i1> zeroinitializer, i64 0)
+ %20 = tail call <vscale x 2 x i32> @llvm.riscv.vssub.mask.nxv2i32.nxv2i32.i64(<vscale x 2 x i32> %11, <vscale x 2 x i32> zeroinitializer, <vscale x 2 x i32> %10, <vscale x 2 x i1> %19, i64 0, i64 0)
+ call void @llvm.riscv.vsseg2.triscv.vector.tuple_nxv16i8_2t.p0.i64(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, ptr null, i64 %var_94_i.07698, i64 6)
+ call void @llvm.riscv.vsseg4.mask.triscv.vector.tuple_nxv16i8_4t.p0.nxv2i1.i64(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) zeroinitializer, ptr null, <vscale x 2 x i1> zeroinitializer, i64 0, i64 6)
+ %21 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_8t.nxv2i32(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) poison, <vscale x 2 x i32> %20, i32 0)
+ call void @llvm.riscv.vsseg8.triscv.vector.tuple_nxv8i8_8t.p0.i64(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %21, ptr %var_117, i64 0, i64 5)
+ call void @llvm.riscv.vsseg4.triscv.vector.tuple_nxv16i8_4t.p0.i64(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %1, ptr null, i64 0, i64 6)
+ br label %for.body
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir b/llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir
index dd9960d..9c2fa9d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir
@@ -32,10 +32,10 @@ body: |
; CHECK-NEXT: $x11 = ADDI $x2, 16
; CHECK-NEXT: VS4R_V $v0m4, $x11, implicit $v0_v1_v2_v3_v4_v5_v6 :: (store (<vscale x 1 x s256>) into %stack.0, align 8)
; CHECK-NEXT: $x12 = PseudoReadVLENB
- ; CHECK-NEXT: $x13 = SLLI $x12, 2
- ; CHECK-NEXT: $x11 = ADD killed $x11, killed $x13
+ ; CHECK-NEXT: $x12 = SLLI killed $x12, 2
+ ; CHECK-NEXT: $x11 = ADD killed $x11, $x12
; CHECK-NEXT: VS2R_V $v4m2, $x11, implicit $v0_v1_v2_v3_v4_v5_v6 :: (store (<vscale x 1 x s128>) into %stack.0, align 8)
- ; CHECK-NEXT: $x12 = SLLI killed $x12, 1
+ ; CHECK-NEXT: $x12 = SRLI killed $x12, 1
; CHECK-NEXT: $x11 = ADD killed $x11, killed $x12
; CHECK-NEXT: VS1R_V $v6, killed $x11, implicit $v0_v1_v2_v3_v4_v5_v6 :: (store (<vscale x 1 x s64>) into %stack.0)
; CHECK-NEXT: $x11 = ADDI $x2, 16
@@ -93,10 +93,10 @@ body: |
; CHECK-NEXT: $x11 = ADDI $x2, 16
; CHECK-NEXT: $v10m2 = VL2RE8_V $x11 :: (load (<vscale x 1 x s128>) from %stack.0, align 8)
; CHECK-NEXT: $x12 = PseudoReadVLENB
- ; CHECK-NEXT: $x13 = SLLI $x12, 1
- ; CHECK-NEXT: $x11 = ADD killed $x11, killed $x13
+ ; CHECK-NEXT: $x12 = SLLI killed $x12, 1
+ ; CHECK-NEXT: $x11 = ADD killed $x11, $x12
; CHECK-NEXT: $v12m4 = VL4RE8_V $x11 :: (load (<vscale x 1 x s256>) from %stack.0, align 8)
- ; CHECK-NEXT: $x12 = SLLI killed $x12, 2
+ ; CHECK-NEXT: $x12 = SLLI killed $x12, 1
; CHECK-NEXT: $x11 = ADD killed $x11, killed $x12
; CHECK-NEXT: $v16 = VL1RE8_V killed $x11 :: (load (<vscale x 1 x s64>) from %stack.0)
; CHECK-NEXT: VS1R_V killed $v10, killed renamable $x10
diff --git a/llvm/test/CodeGen/RISCV/short-forward-branch-opt-min-max.ll b/llvm/test/CodeGen/RISCV/short-forward-branch-opt-min-max.ll
new file mode 100644
index 0000000..05e06cea
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/short-forward-branch-opt-min-max.ll
@@ -0,0 +1,703 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc < %s -mtriple=riscv32 -mattr=+zbb | FileCheck %s --check-prefixes=RV32I-ZBB
+; RUN: llc < %s -mtriple=riscv64 -mattr=+zbb | FileCheck %s --check-prefixes=RV64I-ZBB
+; RUN: llc < %s -mtriple=riscv32 -mattr=+zbb,+short-forward-branch-opt | \
+; RUN: FileCheck %s --check-prefixes=RV32I-SFB-ZBB
+; RUN: llc < %s -mtriple=riscv64 -mattr=+zbb,+short-forward-branch-opt | \
+; RUN: FileCheck %s --check-prefixes=RV64I-SFB-ZBB
+; RUN: llc < %s -mtriple=riscv32 -mattr=+zbb,+short-forward-branch-i-minmax | \
+; RUN: FileCheck %s --check-prefixes=RV32I-SFBIMinMax-ZBB
+; RUN: llc < %s -mtriple=riscv64 -mattr=+zbb,+short-forward-branch-i-minmax | \
+; RUN: FileCheck %s --check-prefixes=RV64I-SFBIMinMax-ZBB
+
+define i32 @select_example_smax(i32 %a, i32 %b, i1 zeroext %x, i32 %y) {
+; RV32I-ZBB-LABEL: select_example_smax:
+; RV32I-ZBB: # %bb.0: # %entry
+; RV32I-ZBB-NEXT: beqz a2, .LBB0_2
+; RV32I-ZBB-NEXT: # %bb.1:
+; RV32I-ZBB-NEXT: max a1, a0, a3
+; RV32I-ZBB-NEXT: .LBB0_2: # %entry
+; RV32I-ZBB-NEXT: mv a0, a1
+; RV32I-ZBB-NEXT: ret
+;
+; RV64I-ZBB-LABEL: select_example_smax:
+; RV64I-ZBB: # %bb.0: # %entry
+; RV64I-ZBB-NEXT: beqz a2, .LBB0_2
+; RV64I-ZBB-NEXT: # %bb.1:
+; RV64I-ZBB-NEXT: sext.w a3, a3
+; RV64I-ZBB-NEXT: sext.w a0, a0
+; RV64I-ZBB-NEXT: max a1, a0, a3
+; RV64I-ZBB-NEXT: .LBB0_2: # %entry
+; RV64I-ZBB-NEXT: mv a0, a1
+; RV64I-ZBB-NEXT: ret
+;
+; RV32I-SFB-ZBB-LABEL: select_example_smax:
+; RV32I-SFB-ZBB: # %bb.0: # %entry
+; RV32I-SFB-ZBB-NEXT: max a0, a0, a3
+; RV32I-SFB-ZBB-NEXT: bnez a2, .LBB0_2
+; RV32I-SFB-ZBB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-ZBB-NEXT: mv a0, a1
+; RV32I-SFB-ZBB-NEXT: .LBB0_2: # %entry
+; RV32I-SFB-ZBB-NEXT: ret
+;
+; RV64I-SFB-ZBB-LABEL: select_example_smax:
+; RV64I-SFB-ZBB: # %bb.0: # %entry
+; RV64I-SFB-ZBB-NEXT: sext.w a3, a3
+; RV64I-SFB-ZBB-NEXT: sext.w a0, a0
+; RV64I-SFB-ZBB-NEXT: max a0, a0, a3
+; RV64I-SFB-ZBB-NEXT: bnez a2, .LBB0_2
+; RV64I-SFB-ZBB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-ZBB-NEXT: mv a0, a1
+; RV64I-SFB-ZBB-NEXT: .LBB0_2: # %entry
+; RV64I-SFB-ZBB-NEXT: ret
+;
+; RV32I-SFBIMinMax-ZBB-LABEL: select_example_smax:
+; RV32I-SFBIMinMax-ZBB: # %bb.0: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: beqz a2, .LBB0_2
+; RV32I-SFBIMinMax-ZBB-NEXT: # %bb.1: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: max a1, a0, a3
+; RV32I-SFBIMinMax-ZBB-NEXT: .LBB0_2: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: mv a0, a1
+; RV32I-SFBIMinMax-ZBB-NEXT: ret
+;
+; RV64I-SFBIMinMax-ZBB-LABEL: select_example_smax:
+; RV64I-SFBIMinMax-ZBB: # %bb.0: # %entry
+; RV64I-SFBIMinMax-ZBB-NEXT: sext.w a3, a3
+; RV64I-SFBIMinMax-ZBB-NEXT: sext.w a0, a0
+; RV64I-SFBIMinMax-ZBB-NEXT: beqz a2, .LBB0_2
+; RV64I-SFBIMinMax-ZBB-NEXT: # %bb.1: # %entry
+; RV64I-SFBIMinMax-ZBB-NEXT: max a1, a0, a3
+; RV64I-SFBIMinMax-ZBB-NEXT: .LBB0_2: # %entry
+; RV64I-SFBIMinMax-ZBB-NEXT: mv a0, a1
+; RV64I-SFBIMinMax-ZBB-NEXT: ret
+entry:
+ %res = call i32 @llvm.smax.i32(i32 %a, i32 %y)
+ %sel = select i1 %x, i32 %res, i32 %b
+ ret i32 %sel
+}
+
+define i32 @select_example_smin(i32 %a, i32 %b, i1 zeroext %x, i32 %y) {
+; RV32I-ZBB-LABEL: select_example_smin:
+; RV32I-ZBB: # %bb.0: # %entry
+; RV32I-ZBB-NEXT: beqz a2, .LBB1_2
+; RV32I-ZBB-NEXT: # %bb.1:
+; RV32I-ZBB-NEXT: min a1, a0, a3
+; RV32I-ZBB-NEXT: .LBB1_2: # %entry
+; RV32I-ZBB-NEXT: mv a0, a1
+; RV32I-ZBB-NEXT: ret
+;
+; RV64I-ZBB-LABEL: select_example_smin:
+; RV64I-ZBB: # %bb.0: # %entry
+; RV64I-ZBB-NEXT: beqz a2, .LBB1_2
+; RV64I-ZBB-NEXT: # %bb.1:
+; RV64I-ZBB-NEXT: sext.w a3, a3
+; RV64I-ZBB-NEXT: sext.w a0, a0
+; RV64I-ZBB-NEXT: min a1, a0, a3
+; RV64I-ZBB-NEXT: .LBB1_2: # %entry
+; RV64I-ZBB-NEXT: mv a0, a1
+; RV64I-ZBB-NEXT: ret
+;
+; RV32I-SFB-ZBB-LABEL: select_example_smin:
+; RV32I-SFB-ZBB: # %bb.0: # %entry
+; RV32I-SFB-ZBB-NEXT: min a0, a0, a3
+; RV32I-SFB-ZBB-NEXT: bnez a2, .LBB1_2
+; RV32I-SFB-ZBB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-ZBB-NEXT: mv a0, a1
+; RV32I-SFB-ZBB-NEXT: .LBB1_2: # %entry
+; RV32I-SFB-ZBB-NEXT: ret
+;
+; RV64I-SFB-ZBB-LABEL: select_example_smin:
+; RV64I-SFB-ZBB: # %bb.0: # %entry
+; RV64I-SFB-ZBB-NEXT: sext.w a3, a3
+; RV64I-SFB-ZBB-NEXT: sext.w a0, a0
+; RV64I-SFB-ZBB-NEXT: min a0, a0, a3
+; RV64I-SFB-ZBB-NEXT: bnez a2, .LBB1_2
+; RV64I-SFB-ZBB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-ZBB-NEXT: mv a0, a1
+; RV64I-SFB-ZBB-NEXT: .LBB1_2: # %entry
+; RV64I-SFB-ZBB-NEXT: ret
+;
+; RV32I-SFBIMinMax-ZBB-LABEL: select_example_smin:
+; RV32I-SFBIMinMax-ZBB: # %bb.0: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: beqz a2, .LBB1_2
+; RV32I-SFBIMinMax-ZBB-NEXT: # %bb.1: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: min a1, a0, a3
+; RV32I-SFBIMinMax-ZBB-NEXT: .LBB1_2: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: mv a0, a1
+; RV32I-SFBIMinMax-ZBB-NEXT: ret
+;
+; RV64I-SFBIMinMax-ZBB-LABEL: select_example_smin:
+; RV64I-SFBIMinMax-ZBB: # %bb.0: # %entry
+; RV64I-SFBIMinMax-ZBB-NEXT: sext.w a3, a3
+; RV64I-SFBIMinMax-ZBB-NEXT: sext.w a0, a0
+; RV64I-SFBIMinMax-ZBB-NEXT: beqz a2, .LBB1_2
+; RV64I-SFBIMinMax-ZBB-NEXT: # %bb.1: # %entry
+; RV64I-SFBIMinMax-ZBB-NEXT: min a1, a0, a3
+; RV64I-SFBIMinMax-ZBB-NEXT: .LBB1_2: # %entry
+; RV64I-SFBIMinMax-ZBB-NEXT: mv a0, a1
+; RV64I-SFBIMinMax-ZBB-NEXT: ret
+entry:
+ %res = call i32 @llvm.smin.i32(i32 %a, i32 %y)
+ %sel = select i1 %x, i32 %res, i32 %b
+ ret i32 %sel
+}
+
+define i32 @select_example_umax(i32 %a, i32 %b, i1 zeroext %x, i32 %y) {
+; RV32I-ZBB-LABEL: select_example_umax:
+; RV32I-ZBB: # %bb.0: # %entry
+; RV32I-ZBB-NEXT: beqz a2, .LBB2_2
+; RV32I-ZBB-NEXT: # %bb.1:
+; RV32I-ZBB-NEXT: maxu a1, a0, a3
+; RV32I-ZBB-NEXT: .LBB2_2: # %entry
+; RV32I-ZBB-NEXT: mv a0, a1
+; RV32I-ZBB-NEXT: ret
+;
+; RV64I-ZBB-LABEL: select_example_umax:
+; RV64I-ZBB: # %bb.0: # %entry
+; RV64I-ZBB-NEXT: beqz a2, .LBB2_2
+; RV64I-ZBB-NEXT: # %bb.1:
+; RV64I-ZBB-NEXT: sext.w a3, a3
+; RV64I-ZBB-NEXT: sext.w a0, a0
+; RV64I-ZBB-NEXT: maxu a1, a0, a3
+; RV64I-ZBB-NEXT: .LBB2_2: # %entry
+; RV64I-ZBB-NEXT: mv a0, a1
+; RV64I-ZBB-NEXT: ret
+;
+; RV32I-SFB-ZBB-LABEL: select_example_umax:
+; RV32I-SFB-ZBB: # %bb.0: # %entry
+; RV32I-SFB-ZBB-NEXT: maxu a0, a0, a3
+; RV32I-SFB-ZBB-NEXT: bnez a2, .LBB2_2
+; RV32I-SFB-ZBB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-ZBB-NEXT: mv a0, a1
+; RV32I-SFB-ZBB-NEXT: .LBB2_2: # %entry
+; RV32I-SFB-ZBB-NEXT: ret
+;
+; RV64I-SFB-ZBB-LABEL: select_example_umax:
+; RV64I-SFB-ZBB: # %bb.0: # %entry
+; RV64I-SFB-ZBB-NEXT: sext.w a3, a3
+; RV64I-SFB-ZBB-NEXT: sext.w a0, a0
+; RV64I-SFB-ZBB-NEXT: maxu a0, a0, a3
+; RV64I-SFB-ZBB-NEXT: bnez a2, .LBB2_2
+; RV64I-SFB-ZBB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-ZBB-NEXT: mv a0, a1
+; RV64I-SFB-ZBB-NEXT: .LBB2_2: # %entry
+; RV64I-SFB-ZBB-NEXT: ret
+;
+; RV32I-SFBIMinMax-ZBB-LABEL: select_example_umax:
+; RV32I-SFBIMinMax-ZBB: # %bb.0: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: beqz a2, .LBB2_2
+; RV32I-SFBIMinMax-ZBB-NEXT: # %bb.1: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: maxu a1, a0, a3
+; RV32I-SFBIMinMax-ZBB-NEXT: .LBB2_2: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: mv a0, a1
+; RV32I-SFBIMinMax-ZBB-NEXT: ret
+;
+; RV64I-SFBIMinMax-ZBB-LABEL: select_example_umax:
+; RV64I-SFBIMinMax-ZBB: # %bb.0: # %entry
+; RV64I-SFBIMinMax-ZBB-NEXT: sext.w a3, a3
+; RV64I-SFBIMinMax-ZBB-NEXT: sext.w a0, a0
+; RV64I-SFBIMinMax-ZBB-NEXT: beqz a2, .LBB2_2
+; RV64I-SFBIMinMax-ZBB-NEXT: # %bb.1: # %entry
+; RV64I-SFBIMinMax-ZBB-NEXT: maxu a1, a0, a3
+; RV64I-SFBIMinMax-ZBB-NEXT: .LBB2_2: # %entry
+; RV64I-SFBIMinMax-ZBB-NEXT: mv a0, a1
+; RV64I-SFBIMinMax-ZBB-NEXT: ret
+entry:
+ %res = call i32 @llvm.umax.i32(i32 %a, i32 %y)
+ %sel = select i1 %x, i32 %res, i32 %b
+ ret i32 %sel
+}
+
+define i32 @select_example_umin(i32 %a, i32 %b, i1 zeroext %x, i32 %y) {
+; RV32I-ZBB-LABEL: select_example_umin:
+; RV32I-ZBB: # %bb.0: # %entry
+; RV32I-ZBB-NEXT: beqz a2, .LBB3_2
+; RV32I-ZBB-NEXT: # %bb.1:
+; RV32I-ZBB-NEXT: minu a1, a0, a3
+; RV32I-ZBB-NEXT: .LBB3_2: # %entry
+; RV32I-ZBB-NEXT: mv a0, a1
+; RV32I-ZBB-NEXT: ret
+;
+; RV64I-ZBB-LABEL: select_example_umin:
+; RV64I-ZBB: # %bb.0: # %entry
+; RV64I-ZBB-NEXT: beqz a2, .LBB3_2
+; RV64I-ZBB-NEXT: # %bb.1:
+; RV64I-ZBB-NEXT: sext.w a3, a3
+; RV64I-ZBB-NEXT: sext.w a0, a0
+; RV64I-ZBB-NEXT: minu a1, a0, a3
+; RV64I-ZBB-NEXT: .LBB3_2: # %entry
+; RV64I-ZBB-NEXT: mv a0, a1
+; RV64I-ZBB-NEXT: ret
+;
+; RV32I-SFB-ZBB-LABEL: select_example_umin:
+; RV32I-SFB-ZBB: # %bb.0: # %entry
+; RV32I-SFB-ZBB-NEXT: minu a0, a0, a3
+; RV32I-SFB-ZBB-NEXT: bnez a2, .LBB3_2
+; RV32I-SFB-ZBB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-ZBB-NEXT: mv a0, a1
+; RV32I-SFB-ZBB-NEXT: .LBB3_2: # %entry
+; RV32I-SFB-ZBB-NEXT: ret
+;
+; RV64I-SFB-ZBB-LABEL: select_example_umin:
+; RV64I-SFB-ZBB: # %bb.0: # %entry
+; RV64I-SFB-ZBB-NEXT: sext.w a3, a3
+; RV64I-SFB-ZBB-NEXT: sext.w a0, a0
+; RV64I-SFB-ZBB-NEXT: minu a0, a0, a3
+; RV64I-SFB-ZBB-NEXT: bnez a2, .LBB3_2
+; RV64I-SFB-ZBB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-ZBB-NEXT: mv a0, a1
+; RV64I-SFB-ZBB-NEXT: .LBB3_2: # %entry
+; RV64I-SFB-ZBB-NEXT: ret
+;
+; RV32I-SFBIMinMax-ZBB-LABEL: select_example_umin:
+; RV32I-SFBIMinMax-ZBB: # %bb.0: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: beqz a2, .LBB3_2
+; RV32I-SFBIMinMax-ZBB-NEXT: # %bb.1: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: minu a1, a0, a3
+; RV32I-SFBIMinMax-ZBB-NEXT: .LBB3_2: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: mv a0, a1
+; RV32I-SFBIMinMax-ZBB-NEXT: ret
+;
+; RV64I-SFBIMinMax-ZBB-LABEL: select_example_umin:
+; RV64I-SFBIMinMax-ZBB: # %bb.0: # %entry
+; RV64I-SFBIMinMax-ZBB-NEXT: sext.w a3, a3
+; RV64I-SFBIMinMax-ZBB-NEXT: sext.w a0, a0
+; RV64I-SFBIMinMax-ZBB-NEXT: beqz a2, .LBB3_2
+; RV64I-SFBIMinMax-ZBB-NEXT: # %bb.1: # %entry
+; RV64I-SFBIMinMax-ZBB-NEXT: minu a1, a0, a3
+; RV64I-SFBIMinMax-ZBB-NEXT: .LBB3_2: # %entry
+; RV64I-SFBIMinMax-ZBB-NEXT: mv a0, a1
+; RV64I-SFBIMinMax-ZBB-NEXT: ret
+entry:
+ %res = call i32 @llvm.umin.i32(i32 %a, i32 %y)
+ %sel = select i1 %x, i32 %res, i32 %b
+ ret i32 %sel
+}
+
+define i64 @select_example_smax_1(i64 %a, i64 %b, i1 zeroext %x, i64 %y) {
+; RV32I-ZBB-LABEL: select_example_smax_1:
+; RV32I-ZBB: # %bb.0: # %entry
+; RV32I-ZBB-NEXT: beq a1, a6, .LBB4_2
+; RV32I-ZBB-NEXT: # %bb.1: # %entry
+; RV32I-ZBB-NEXT: slt a7, a6, a1
+; RV32I-ZBB-NEXT: beqz a7, .LBB4_3
+; RV32I-ZBB-NEXT: j .LBB4_4
+; RV32I-ZBB-NEXT: .LBB4_2:
+; RV32I-ZBB-NEXT: sltu a7, a5, a0
+; RV32I-ZBB-NEXT: bnez a7, .LBB4_4
+; RV32I-ZBB-NEXT: .LBB4_3: # %entry
+; RV32I-ZBB-NEXT: mv a1, a6
+; RV32I-ZBB-NEXT: mv a0, a5
+; RV32I-ZBB-NEXT: .LBB4_4: # %entry
+; RV32I-ZBB-NEXT: beqz a4, .LBB4_6
+; RV32I-ZBB-NEXT: # %bb.5: # %entry
+; RV32I-ZBB-NEXT: ret
+; RV32I-ZBB-NEXT: .LBB4_6: # %entry
+; RV32I-ZBB-NEXT: mv a0, a2
+; RV32I-ZBB-NEXT: mv a1, a3
+; RV32I-ZBB-NEXT: ret
+;
+; RV64I-ZBB-LABEL: select_example_smax_1:
+; RV64I-ZBB: # %bb.0: # %entry
+; RV64I-ZBB-NEXT: beqz a2, .LBB4_2
+; RV64I-ZBB-NEXT: # %bb.1:
+; RV64I-ZBB-NEXT: max a1, a0, a3
+; RV64I-ZBB-NEXT: .LBB4_2: # %entry
+; RV64I-ZBB-NEXT: mv a0, a1
+; RV64I-ZBB-NEXT: ret
+;
+; RV32I-SFB-ZBB-LABEL: select_example_smax_1:
+; RV32I-SFB-ZBB: # %bb.0: # %entry
+; RV32I-SFB-ZBB-NEXT: sltu a7, a5, a0
+; RV32I-SFB-ZBB-NEXT: slt t0, a6, a1
+; RV32I-SFB-ZBB-NEXT: bne a1, a6, .LBB4_2
+; RV32I-SFB-ZBB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-ZBB-NEXT: mv t0, a7
+; RV32I-SFB-ZBB-NEXT: .LBB4_2: # %entry
+; RV32I-SFB-ZBB-NEXT: bnez t0, .LBB4_4
+; RV32I-SFB-ZBB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-ZBB-NEXT: mv a1, a6
+; RV32I-SFB-ZBB-NEXT: .LBB4_4: # %entry
+; RV32I-SFB-ZBB-NEXT: bnez t0, .LBB4_6
+; RV32I-SFB-ZBB-NEXT: # %bb.5: # %entry
+; RV32I-SFB-ZBB-NEXT: mv a0, a5
+; RV32I-SFB-ZBB-NEXT: .LBB4_6: # %entry
+; RV32I-SFB-ZBB-NEXT: bnez a4, .LBB4_8
+; RV32I-SFB-ZBB-NEXT: # %bb.7: # %entry
+; RV32I-SFB-ZBB-NEXT: mv a0, a2
+; RV32I-SFB-ZBB-NEXT: .LBB4_8: # %entry
+; RV32I-SFB-ZBB-NEXT: bnez a4, .LBB4_10
+; RV32I-SFB-ZBB-NEXT: # %bb.9: # %entry
+; RV32I-SFB-ZBB-NEXT: mv a1, a3
+; RV32I-SFB-ZBB-NEXT: .LBB4_10: # %entry
+; RV32I-SFB-ZBB-NEXT: ret
+;
+; RV64I-SFB-ZBB-LABEL: select_example_smax_1:
+; RV64I-SFB-ZBB: # %bb.0: # %entry
+; RV64I-SFB-ZBB-NEXT: max a0, a0, a3
+; RV64I-SFB-ZBB-NEXT: bnez a2, .LBB4_2
+; RV64I-SFB-ZBB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-ZBB-NEXT: mv a0, a1
+; RV64I-SFB-ZBB-NEXT: .LBB4_2: # %entry
+; RV64I-SFB-ZBB-NEXT: ret
+;
+; RV32I-SFBIMinMax-ZBB-LABEL: select_example_smax_1:
+; RV32I-SFBIMinMax-ZBB: # %bb.0: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: sltu a7, a5, a0
+; RV32I-SFBIMinMax-ZBB-NEXT: slt t0, a6, a1
+; RV32I-SFBIMinMax-ZBB-NEXT: bne a1, a6, .LBB4_2
+; RV32I-SFBIMinMax-ZBB-NEXT: # %bb.1: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: mv t0, a7
+; RV32I-SFBIMinMax-ZBB-NEXT: .LBB4_2: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: bnez t0, .LBB4_4
+; RV32I-SFBIMinMax-ZBB-NEXT: # %bb.3: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: mv a1, a6
+; RV32I-SFBIMinMax-ZBB-NEXT: .LBB4_4: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: bnez t0, .LBB4_6
+; RV32I-SFBIMinMax-ZBB-NEXT: # %bb.5: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: mv a0, a5
+; RV32I-SFBIMinMax-ZBB-NEXT: .LBB4_6: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: bnez a4, .LBB4_8
+; RV32I-SFBIMinMax-ZBB-NEXT: # %bb.7: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: mv a0, a2
+; RV32I-SFBIMinMax-ZBB-NEXT: .LBB4_8: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: bnez a4, .LBB4_10
+; RV32I-SFBIMinMax-ZBB-NEXT: # %bb.9: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: mv a1, a3
+; RV32I-SFBIMinMax-ZBB-NEXT: .LBB4_10: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: ret
+;
+; RV64I-SFBIMinMax-ZBB-LABEL: select_example_smax_1:
+; RV64I-SFBIMinMax-ZBB: # %bb.0: # %entry
+; RV64I-SFBIMinMax-ZBB-NEXT: beqz a2, .LBB4_2
+; RV64I-SFBIMinMax-ZBB-NEXT: # %bb.1: # %entry
+; RV64I-SFBIMinMax-ZBB-NEXT: max a1, a0, a3
+; RV64I-SFBIMinMax-ZBB-NEXT: .LBB4_2: # %entry
+; RV64I-SFBIMinMax-ZBB-NEXT: mv a0, a1
+; RV64I-SFBIMinMax-ZBB-NEXT: ret
+entry:
+ %res = call i64 @llvm.smax.i64(i64 %a, i64 %y)
+ %sel = select i1 %x, i64 %res, i64 %b
+ ret i64 %sel
+}
+
+define i64 @select_example_smin_1(i64 %a, i64 %b, i1 zeroext %x, i64 %y) {
+; RV32I-ZBB-LABEL: select_example_smin_1:
+; RV32I-ZBB: # %bb.0: # %entry
+; RV32I-ZBB-NEXT: beq a1, a6, .LBB5_2
+; RV32I-ZBB-NEXT: # %bb.1: # %entry
+; RV32I-ZBB-NEXT: slt a7, a1, a6
+; RV32I-ZBB-NEXT: beqz a7, .LBB5_3
+; RV32I-ZBB-NEXT: j .LBB5_4
+; RV32I-ZBB-NEXT: .LBB5_2:
+; RV32I-ZBB-NEXT: sltu a7, a0, a5
+; RV32I-ZBB-NEXT: bnez a7, .LBB5_4
+; RV32I-ZBB-NEXT: .LBB5_3: # %entry
+; RV32I-ZBB-NEXT: mv a1, a6
+; RV32I-ZBB-NEXT: mv a0, a5
+; RV32I-ZBB-NEXT: .LBB5_4: # %entry
+; RV32I-ZBB-NEXT: beqz a4, .LBB5_6
+; RV32I-ZBB-NEXT: # %bb.5: # %entry
+; RV32I-ZBB-NEXT: ret
+; RV32I-ZBB-NEXT: .LBB5_6: # %entry
+; RV32I-ZBB-NEXT: mv a0, a2
+; RV32I-ZBB-NEXT: mv a1, a3
+; RV32I-ZBB-NEXT: ret
+;
+; RV64I-ZBB-LABEL: select_example_smin_1:
+; RV64I-ZBB: # %bb.0: # %entry
+; RV64I-ZBB-NEXT: beqz a2, .LBB5_2
+; RV64I-ZBB-NEXT: # %bb.1:
+; RV64I-ZBB-NEXT: min a1, a0, a3
+; RV64I-ZBB-NEXT: .LBB5_2: # %entry
+; RV64I-ZBB-NEXT: mv a0, a1
+; RV64I-ZBB-NEXT: ret
+;
+; RV32I-SFB-ZBB-LABEL: select_example_smin_1:
+; RV32I-SFB-ZBB: # %bb.0: # %entry
+; RV32I-SFB-ZBB-NEXT: sltu a7, a0, a5
+; RV32I-SFB-ZBB-NEXT: slt t0, a1, a6
+; RV32I-SFB-ZBB-NEXT: bne a1, a6, .LBB5_2
+; RV32I-SFB-ZBB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-ZBB-NEXT: mv t0, a7
+; RV32I-SFB-ZBB-NEXT: .LBB5_2: # %entry
+; RV32I-SFB-ZBB-NEXT: bnez t0, .LBB5_4
+; RV32I-SFB-ZBB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-ZBB-NEXT: mv a1, a6
+; RV32I-SFB-ZBB-NEXT: .LBB5_4: # %entry
+; RV32I-SFB-ZBB-NEXT: bnez t0, .LBB5_6
+; RV32I-SFB-ZBB-NEXT: # %bb.5: # %entry
+; RV32I-SFB-ZBB-NEXT: mv a0, a5
+; RV32I-SFB-ZBB-NEXT: .LBB5_6: # %entry
+; RV32I-SFB-ZBB-NEXT: bnez a4, .LBB5_8
+; RV32I-SFB-ZBB-NEXT: # %bb.7: # %entry
+; RV32I-SFB-ZBB-NEXT: mv a0, a2
+; RV32I-SFB-ZBB-NEXT: .LBB5_8: # %entry
+; RV32I-SFB-ZBB-NEXT: bnez a4, .LBB5_10
+; RV32I-SFB-ZBB-NEXT: # %bb.9: # %entry
+; RV32I-SFB-ZBB-NEXT: mv a1, a3
+; RV32I-SFB-ZBB-NEXT: .LBB5_10: # %entry
+; RV32I-SFB-ZBB-NEXT: ret
+;
+; RV64I-SFB-ZBB-LABEL: select_example_smin_1:
+; RV64I-SFB-ZBB: # %bb.0: # %entry
+; RV64I-SFB-ZBB-NEXT: min a0, a0, a3
+; RV64I-SFB-ZBB-NEXT: bnez a2, .LBB5_2
+; RV64I-SFB-ZBB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-ZBB-NEXT: mv a0, a1
+; RV64I-SFB-ZBB-NEXT: .LBB5_2: # %entry
+; RV64I-SFB-ZBB-NEXT: ret
+;
+; RV32I-SFBIMinMax-ZBB-LABEL: select_example_smin_1:
+; RV32I-SFBIMinMax-ZBB: # %bb.0: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: sltu a7, a0, a5
+; RV32I-SFBIMinMax-ZBB-NEXT: slt t0, a1, a6
+; RV32I-SFBIMinMax-ZBB-NEXT: bne a1, a6, .LBB5_2
+; RV32I-SFBIMinMax-ZBB-NEXT: # %bb.1: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: mv t0, a7
+; RV32I-SFBIMinMax-ZBB-NEXT: .LBB5_2: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: bnez t0, .LBB5_4
+; RV32I-SFBIMinMax-ZBB-NEXT: # %bb.3: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: mv a1, a6
+; RV32I-SFBIMinMax-ZBB-NEXT: .LBB5_4: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: bnez t0, .LBB5_6
+; RV32I-SFBIMinMax-ZBB-NEXT: # %bb.5: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: mv a0, a5
+; RV32I-SFBIMinMax-ZBB-NEXT: .LBB5_6: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: bnez a4, .LBB5_8
+; RV32I-SFBIMinMax-ZBB-NEXT: # %bb.7: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: mv a0, a2
+; RV32I-SFBIMinMax-ZBB-NEXT: .LBB5_8: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: bnez a4, .LBB5_10
+; RV32I-SFBIMinMax-ZBB-NEXT: # %bb.9: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: mv a1, a3
+; RV32I-SFBIMinMax-ZBB-NEXT: .LBB5_10: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: ret
+;
+; RV64I-SFBIMinMax-ZBB-LABEL: select_example_smin_1:
+; RV64I-SFBIMinMax-ZBB: # %bb.0: # %entry
+; RV64I-SFBIMinMax-ZBB-NEXT: beqz a2, .LBB5_2
+; RV64I-SFBIMinMax-ZBB-NEXT: # %bb.1: # %entry
+; RV64I-SFBIMinMax-ZBB-NEXT: min a1, a0, a3
+; RV64I-SFBIMinMax-ZBB-NEXT: .LBB5_2: # %entry
+; RV64I-SFBIMinMax-ZBB-NEXT: mv a0, a1
+; RV64I-SFBIMinMax-ZBB-NEXT: ret
+entry:
+ %res = call i64 @llvm.smin.i64(i64 %a, i64 %y)
+ %sel = select i1 %x, i64 %res, i64 %b
+ ret i64 %sel
+}
+
+define i64 @select_example_umax_1(i64 %a, i64 %b, i1 zeroext %x, i64 %y) {
+; RV32I-ZBB-LABEL: select_example_umax_1:
+; RV32I-ZBB: # %bb.0: # %entry
+; RV32I-ZBB-NEXT: beq a1, a6, .LBB6_2
+; RV32I-ZBB-NEXT: # %bb.1: # %entry
+; RV32I-ZBB-NEXT: sltu a7, a6, a1
+; RV32I-ZBB-NEXT: beqz a7, .LBB6_3
+; RV32I-ZBB-NEXT: j .LBB6_4
+; RV32I-ZBB-NEXT: .LBB6_2:
+; RV32I-ZBB-NEXT: sltu a7, a5, a0
+; RV32I-ZBB-NEXT: bnez a7, .LBB6_4
+; RV32I-ZBB-NEXT: .LBB6_3: # %entry
+; RV32I-ZBB-NEXT: mv a1, a6
+; RV32I-ZBB-NEXT: mv a0, a5
+; RV32I-ZBB-NEXT: .LBB6_4: # %entry
+; RV32I-ZBB-NEXT: beqz a4, .LBB6_6
+; RV32I-ZBB-NEXT: # %bb.5: # %entry
+; RV32I-ZBB-NEXT: ret
+; RV32I-ZBB-NEXT: .LBB6_6: # %entry
+; RV32I-ZBB-NEXT: mv a0, a2
+; RV32I-ZBB-NEXT: mv a1, a3
+; RV32I-ZBB-NEXT: ret
+;
+; RV64I-ZBB-LABEL: select_example_umax_1:
+; RV64I-ZBB: # %bb.0: # %entry
+; RV64I-ZBB-NEXT: beqz a2, .LBB6_2
+; RV64I-ZBB-NEXT: # %bb.1:
+; RV64I-ZBB-NEXT: maxu a1, a0, a3
+; RV64I-ZBB-NEXT: .LBB6_2: # %entry
+; RV64I-ZBB-NEXT: mv a0, a1
+; RV64I-ZBB-NEXT: ret
+;
+; RV32I-SFB-ZBB-LABEL: select_example_umax_1:
+; RV32I-SFB-ZBB: # %bb.0: # %entry
+; RV32I-SFB-ZBB-NEXT: sltu a7, a5, a0
+; RV32I-SFB-ZBB-NEXT: sltu t0, a6, a1
+; RV32I-SFB-ZBB-NEXT: bne a1, a6, .LBB6_2
+; RV32I-SFB-ZBB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-ZBB-NEXT: mv t0, a7
+; RV32I-SFB-ZBB-NEXT: .LBB6_2: # %entry
+; RV32I-SFB-ZBB-NEXT: bnez t0, .LBB6_4
+; RV32I-SFB-ZBB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-ZBB-NEXT: mv a1, a6
+; RV32I-SFB-ZBB-NEXT: .LBB6_4: # %entry
+; RV32I-SFB-ZBB-NEXT: bnez t0, .LBB6_6
+; RV32I-SFB-ZBB-NEXT: # %bb.5: # %entry
+; RV32I-SFB-ZBB-NEXT: mv a0, a5
+; RV32I-SFB-ZBB-NEXT: .LBB6_6: # %entry
+; RV32I-SFB-ZBB-NEXT: bnez a4, .LBB6_8
+; RV32I-SFB-ZBB-NEXT: # %bb.7: # %entry
+; RV32I-SFB-ZBB-NEXT: mv a0, a2
+; RV32I-SFB-ZBB-NEXT: .LBB6_8: # %entry
+; RV32I-SFB-ZBB-NEXT: bnez a4, .LBB6_10
+; RV32I-SFB-ZBB-NEXT: # %bb.9: # %entry
+; RV32I-SFB-ZBB-NEXT: mv a1, a3
+; RV32I-SFB-ZBB-NEXT: .LBB6_10: # %entry
+; RV32I-SFB-ZBB-NEXT: ret
+;
+; RV64I-SFB-ZBB-LABEL: select_example_umax_1:
+; RV64I-SFB-ZBB: # %bb.0: # %entry
+; RV64I-SFB-ZBB-NEXT: maxu a0, a0, a3
+; RV64I-SFB-ZBB-NEXT: bnez a2, .LBB6_2
+; RV64I-SFB-ZBB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-ZBB-NEXT: mv a0, a1
+; RV64I-SFB-ZBB-NEXT: .LBB6_2: # %entry
+; RV64I-SFB-ZBB-NEXT: ret
+;
+; RV32I-SFBIMinMax-ZBB-LABEL: select_example_umax_1:
+; RV32I-SFBIMinMax-ZBB: # %bb.0: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: sltu a7, a5, a0
+; RV32I-SFBIMinMax-ZBB-NEXT: sltu t0, a6, a1
+; RV32I-SFBIMinMax-ZBB-NEXT: bne a1, a6, .LBB6_2
+; RV32I-SFBIMinMax-ZBB-NEXT: # %bb.1: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: mv t0, a7
+; RV32I-SFBIMinMax-ZBB-NEXT: .LBB6_2: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: bnez t0, .LBB6_4
+; RV32I-SFBIMinMax-ZBB-NEXT: # %bb.3: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: mv a1, a6
+; RV32I-SFBIMinMax-ZBB-NEXT: .LBB6_4: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: bnez t0, .LBB6_6
+; RV32I-SFBIMinMax-ZBB-NEXT: # %bb.5: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: mv a0, a5
+; RV32I-SFBIMinMax-ZBB-NEXT: .LBB6_6: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: bnez a4, .LBB6_8
+; RV32I-SFBIMinMax-ZBB-NEXT: # %bb.7: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: mv a0, a2
+; RV32I-SFBIMinMax-ZBB-NEXT: .LBB6_8: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: bnez a4, .LBB6_10
+; RV32I-SFBIMinMax-ZBB-NEXT: # %bb.9: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: mv a1, a3
+; RV32I-SFBIMinMax-ZBB-NEXT: .LBB6_10: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: ret
+;
+; RV64I-SFBIMinMax-ZBB-LABEL: select_example_umax_1:
+; RV64I-SFBIMinMax-ZBB: # %bb.0: # %entry
+; RV64I-SFBIMinMax-ZBB-NEXT: beqz a2, .LBB6_2
+; RV64I-SFBIMinMax-ZBB-NEXT: # %bb.1: # %entry
+; RV64I-SFBIMinMax-ZBB-NEXT: maxu a1, a0, a3
+; RV64I-SFBIMinMax-ZBB-NEXT: .LBB6_2: # %entry
+; RV64I-SFBIMinMax-ZBB-NEXT: mv a0, a1
+; RV64I-SFBIMinMax-ZBB-NEXT: ret
+entry:
+ %res = call i64 @llvm.umax.i64(i64 %a, i64 %y)
+ %sel = select i1 %x, i64 %res, i64 %b
+ ret i64 %sel
+}
+
+define i64 @select_example_umin_1(i64 %a, i64 %b, i1 zeroext %x, i64 %y) {
+; RV32I-ZBB-LABEL: select_example_umin_1:
+; RV32I-ZBB: # %bb.0: # %entry
+; RV32I-ZBB-NEXT: beq a1, a6, .LBB7_2
+; RV32I-ZBB-NEXT: # %bb.1: # %entry
+; RV32I-ZBB-NEXT: sltu a7, a1, a6
+; RV32I-ZBB-NEXT: beqz a7, .LBB7_3
+; RV32I-ZBB-NEXT: j .LBB7_4
+; RV32I-ZBB-NEXT: .LBB7_2:
+; RV32I-ZBB-NEXT: sltu a7, a0, a5
+; RV32I-ZBB-NEXT: bnez a7, .LBB7_4
+; RV32I-ZBB-NEXT: .LBB7_3: # %entry
+; RV32I-ZBB-NEXT: mv a1, a6
+; RV32I-ZBB-NEXT: mv a0, a5
+; RV32I-ZBB-NEXT: .LBB7_4: # %entry
+; RV32I-ZBB-NEXT: beqz a4, .LBB7_6
+; RV32I-ZBB-NEXT: # %bb.5: # %entry
+; RV32I-ZBB-NEXT: ret
+; RV32I-ZBB-NEXT: .LBB7_6: # %entry
+; RV32I-ZBB-NEXT: mv a0, a2
+; RV32I-ZBB-NEXT: mv a1, a3
+; RV32I-ZBB-NEXT: ret
+;
+; RV64I-ZBB-LABEL: select_example_umin_1:
+; RV64I-ZBB: # %bb.0: # %entry
+; RV64I-ZBB-NEXT: beqz a2, .LBB7_2
+; RV64I-ZBB-NEXT: # %bb.1:
+; RV64I-ZBB-NEXT: minu a1, a0, a3
+; RV64I-ZBB-NEXT: .LBB7_2: # %entry
+; RV64I-ZBB-NEXT: mv a0, a1
+; RV64I-ZBB-NEXT: ret
+;
+; RV32I-SFB-ZBB-LABEL: select_example_umin_1:
+; RV32I-SFB-ZBB: # %bb.0: # %entry
+; RV32I-SFB-ZBB-NEXT: sltu a7, a0, a5
+; RV32I-SFB-ZBB-NEXT: sltu t0, a1, a6
+; RV32I-SFB-ZBB-NEXT: bne a1, a6, .LBB7_2
+; RV32I-SFB-ZBB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-ZBB-NEXT: mv t0, a7
+; RV32I-SFB-ZBB-NEXT: .LBB7_2: # %entry
+; RV32I-SFB-ZBB-NEXT: bnez t0, .LBB7_4
+; RV32I-SFB-ZBB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-ZBB-NEXT: mv a1, a6
+; RV32I-SFB-ZBB-NEXT: .LBB7_4: # %entry
+; RV32I-SFB-ZBB-NEXT: bnez t0, .LBB7_6
+; RV32I-SFB-ZBB-NEXT: # %bb.5: # %entry
+; RV32I-SFB-ZBB-NEXT: mv a0, a5
+; RV32I-SFB-ZBB-NEXT: .LBB7_6: # %entry
+; RV32I-SFB-ZBB-NEXT: bnez a4, .LBB7_8
+; RV32I-SFB-ZBB-NEXT: # %bb.7: # %entry
+; RV32I-SFB-ZBB-NEXT: mv a0, a2
+; RV32I-SFB-ZBB-NEXT: .LBB7_8: # %entry
+; RV32I-SFB-ZBB-NEXT: bnez a4, .LBB7_10
+; RV32I-SFB-ZBB-NEXT: # %bb.9: # %entry
+; RV32I-SFB-ZBB-NEXT: mv a1, a3
+; RV32I-SFB-ZBB-NEXT: .LBB7_10: # %entry
+; RV32I-SFB-ZBB-NEXT: ret
+;
+; RV64I-SFB-ZBB-LABEL: select_example_umin_1:
+; RV64I-SFB-ZBB: # %bb.0: # %entry
+; RV64I-SFB-ZBB-NEXT: minu a0, a0, a3
+; RV64I-SFB-ZBB-NEXT: bnez a2, .LBB7_2
+; RV64I-SFB-ZBB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-ZBB-NEXT: mv a0, a1
+; RV64I-SFB-ZBB-NEXT: .LBB7_2: # %entry
+; RV64I-SFB-ZBB-NEXT: ret
+;
+; RV32I-SFBIMinMax-ZBB-LABEL: select_example_umin_1:
+; RV32I-SFBIMinMax-ZBB: # %bb.0: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: sltu a7, a0, a5
+; RV32I-SFBIMinMax-ZBB-NEXT: sltu t0, a1, a6
+; RV32I-SFBIMinMax-ZBB-NEXT: bne a1, a6, .LBB7_2
+; RV32I-SFBIMinMax-ZBB-NEXT: # %bb.1: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: mv t0, a7
+; RV32I-SFBIMinMax-ZBB-NEXT: .LBB7_2: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: bnez t0, .LBB7_4
+; RV32I-SFBIMinMax-ZBB-NEXT: # %bb.3: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: mv a1, a6
+; RV32I-SFBIMinMax-ZBB-NEXT: .LBB7_4: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: bnez t0, .LBB7_6
+; RV32I-SFBIMinMax-ZBB-NEXT: # %bb.5: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: mv a0, a5
+; RV32I-SFBIMinMax-ZBB-NEXT: .LBB7_6: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: bnez a4, .LBB7_8
+; RV32I-SFBIMinMax-ZBB-NEXT: # %bb.7: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: mv a0, a2
+; RV32I-SFBIMinMax-ZBB-NEXT: .LBB7_8: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: bnez a4, .LBB7_10
+; RV32I-SFBIMinMax-ZBB-NEXT: # %bb.9: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: mv a1, a3
+; RV32I-SFBIMinMax-ZBB-NEXT: .LBB7_10: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: ret
+;
+; RV64I-SFBIMinMax-ZBB-LABEL: select_example_umin_1:
+; RV64I-SFBIMinMax-ZBB: # %bb.0: # %entry
+; RV64I-SFBIMinMax-ZBB-NEXT: beqz a2, .LBB7_2
+; RV64I-SFBIMinMax-ZBB-NEXT: # %bb.1: # %entry
+; RV64I-SFBIMinMax-ZBB-NEXT: minu a1, a0, a3
+; RV64I-SFBIMinMax-ZBB-NEXT: .LBB7_2: # %entry
+; RV64I-SFBIMinMax-ZBB-NEXT: mv a0, a1
+; RV64I-SFBIMinMax-ZBB-NEXT: ret
+entry:
+ %res = call i64 @llvm.umin.i64(i64 %a, i64 %y)
+ %sel = select i1 %x, i64 %res, i64 %b
+ ret i64 %sel
+}
diff --git a/llvm/test/CodeGen/SPIRV/hlsl-resources/issue-146942-ptr-cast.ll b/llvm/test/CodeGen/SPIRV/hlsl-resources/issue-146942-ptr-cast.ll
index ed67344..4817e74 100644
--- a/llvm/test/CodeGen/SPIRV/hlsl-resources/issue-146942-ptr-cast.ll
+++ b/llvm/test/CodeGen/SPIRV/hlsl-resources/issue-146942-ptr-cast.ll
@@ -16,7 +16,6 @@
define void @case1() local_unnamed_addr {
; CHECK: %[[#BUFFER_LOAD:]] = OpLoad %[[#FLOAT4]] %{{[0-9]+}} Aligned 16
; CHECK: %[[#CAST_LOAD:]] = OpBitcast %[[#INT4]] %[[#BUFFER_LOAD]]
- ; CHECK: %[[#VEC_SHUFFLE:]] = OpVectorShuffle %[[#INT4]] %[[#CAST_LOAD]] %[[#CAST_LOAD]] 0 1 2 3
%1 = tail call target("spirv.VulkanBuffer", [0 x <4 x float>], 12, 0) @llvm.spv.resource.handlefrombinding.tspirv.VulkanBuffer_a0v4f32_12_0t(i32 0, i32 2, i32 1, i32 0, ptr nonnull @.str)
%2 = tail call target("spirv.VulkanBuffer", [0 x <4 x i32>], 12, 1) @llvm.spv.resource.handlefrombinding.tspirv.VulkanBuffer_a0v4i32_12_1t(i32 0, i32 5, i32 1, i32 0, ptr nonnull @.str.2)
%3 = tail call noundef align 16 dereferenceable(16) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.VulkanBuffer_a0v4f32_12_0t(target("spirv.VulkanBuffer", [0 x <4 x float>], 12, 0) %1, i32 0)
@@ -29,8 +28,7 @@ define void @case1() local_unnamed_addr {
define void @case2() local_unnamed_addr {
; CHECK: %[[#BUFFER_LOAD:]] = OpLoad %[[#FLOAT4]] %{{[0-9]+}} Aligned 16
; CHECK: %[[#CAST_LOAD:]] = OpBitcast %[[#INT4]] %[[#BUFFER_LOAD]]
- ; CHECK: %[[#VEC_SHUFFLE:]] = OpVectorShuffle %[[#INT4]] %[[#CAST_LOAD]] %[[#CAST_LOAD]] 0 1 2 3
- ; CHECK: %[[#VEC_TRUNCATE:]] = OpVectorShuffle %[[#INT3]] %[[#VEC_SHUFFLE]] %[[#UNDEF_INT4]] 0 1 2
+ ; CHECK: %[[#VEC_TRUNCATE:]] = OpVectorShuffle %[[#INT3]] %[[#CAST_LOAD]] %[[#UNDEF_INT4]] 0 1 2
%1 = tail call target("spirv.VulkanBuffer", [0 x <4 x float>], 12, 0) @llvm.spv.resource.handlefrombinding.tspirv.VulkanBuffer_a0v4f32_12_0t(i32 0, i32 2, i32 1, i32 0, ptr nonnull @.str)
%2 = tail call target("spirv.VulkanBuffer", [0 x <3 x i32>], 12, 1) @llvm.spv.resource.handlefrombinding.tspirv.VulkanBuffer_a0v3i32_12_1t(i32 0, i32 5, i32 1, i32 0, ptr nonnull @.str.3)
%3 = tail call noundef align 16 dereferenceable(16) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.VulkanBuffer_a0v4f32_12_0t(target("spirv.VulkanBuffer", [0 x <4 x float>], 12, 0) %1, i32 0)
diff --git a/llvm/test/CodeGen/SPIRV/pointers/ptrcast-bitcast.ll b/llvm/test/CodeGen/SPIRV/pointers/ptrcast-bitcast.ll
index 8491328..a1ec2cd 100644
--- a/llvm/test/CodeGen/SPIRV/pointers/ptrcast-bitcast.ll
+++ b/llvm/test/CodeGen/SPIRV/pointers/ptrcast-bitcast.ll
@@ -26,3 +26,25 @@ entry:
store <4 x i32> %6, ptr addrspace(11) %7, align 16
ret void
}
+
+; This tests a load from a pointer that has been bitcast between vector types
+; which share the same total bit-width but have different numbers of elements.
+; Tests that legalize-pointer-casts works correctly by moving the bitcast to
+; the element that was loaded.
+
+define void @main2() local_unnamed_addr #0 {
+entry:
+; CHECK: %[[LOAD:[0-9]+]] = OpLoad %[[#v2_double]] {{.*}}
+; CHECK: %[[BITCAST1:[0-9]+]] = OpBitcast %[[#v4_uint]] %[[LOAD]]
+; CHECK: %[[BITCAST2:[0-9]+]] = OpBitcast %[[#v2_double]] %[[BITCAST1]]
+; CHECK: OpStore {{%[0-9]+}} %[[BITCAST2]] {{.*}}
+
+ %0 = tail call target("spirv.VulkanBuffer", [0 x <2 x double>], 12, 1) @llvm.spv.resource.handlefrombinding.tspirv.VulkanBuffer_a0v2f64_12_1t(i32 0, i32 2, i32 1, i32 0, ptr nonnull @.str.2)
+ %2 = tail call noundef align 16 dereferenceable(16) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.VulkanBuffer_a0v2f64_12_1t(target("spirv.VulkanBuffer", [0 x <2 x double>], 12, 1) %0, i32 0)
+ %3 = load <4 x i32>, ptr addrspace(11) %2
+ %4 = tail call noundef align 16 dereferenceable(16) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.VulkanBuffer_a0v2f64_12_1t(target("spirv.VulkanBuffer", [0 x <2 x double>], 12, 1) %0, i32 1)
+ store <4 x i32> %3, ptr addrspace(11) %4
+ ret void
+}
+
+attributes #0 = { "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" }
diff --git a/llvm/test/CodeGen/SystemZ/stackmap.ll b/llvm/test/CodeGen/SystemZ/stackmap.ll
index 05b8de7..f414ea3 100644
--- a/llvm/test/CodeGen/SystemZ/stackmap.ll
+++ b/llvm/test/CodeGen/SystemZ/stackmap.ll
@@ -84,14 +84,14 @@
; CHECK-NEXT: .short 8
; CHECK-NEXT: .short 0
; CHECK-NEXT: .short 0
-; CHECK-NEXT: .long 65535
+; CHECK-NEXT: .long -1
; SmallConstant
; CHECK-NEXT: .byte 4
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .short 8
; CHECK-NEXT: .short 0
; CHECK-NEXT: .short 0
-; CHECK-NEXT: .long 65535
+; CHECK-NEXT: .long -1
; SmallConstant
; CHECK-NEXT: .byte 4
; CHECK-NEXT: .byte 0
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/minloop.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/minloop.ll
index 9c36bae..ec257bc 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/minloop.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/minloop.ll
@@ -6,77 +6,81 @@ define void @arm_min_q31(ptr nocapture readonly %pSrc, i32 %blockSize, ptr nocap
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NEXT: .pad #4
+; CHECK-NEXT: sub sp, #4
; CHECK-NEXT: ldr.w r12, [r0]
; CHECK-NEXT: subs.w r9, r1, #1
; CHECK-NEXT: beq .LBB0_3
; CHECK-NEXT: @ %bb.1: @ %while.body.preheader
-; CHECK-NEXT: and r8, r9, #3
+; CHECK-NEXT: and r6, r9, #3
; CHECK-NEXT: subs r7, r1, #2
; CHECK-NEXT: cmp r7, #3
; CHECK-NEXT: bhs .LBB0_4
; CHECK-NEXT: @ %bb.2:
-; CHECK-NEXT: movs r6, #0
-; CHECK-NEXT: b .LBB0_6
+; CHECK-NEXT: mov.w r10, #0
+; CHECK-NEXT: cbnz r6, .LBB0_7
+; CHECK-NEXT: b .LBB0_10
; CHECK-NEXT: .LBB0_3:
-; CHECK-NEXT: movs r6, #0
+; CHECK-NEXT: mov.w r10, #0
; CHECK-NEXT: b .LBB0_10
; CHECK-NEXT: .LBB0_4: @ %while.body.preheader.new
; CHECK-NEXT: bic r7, r9, #3
-; CHECK-NEXT: movs r6, #1
+; CHECK-NEXT: str r6, [sp] @ 4-byte Spill
; CHECK-NEXT: subs r7, #4
+; CHECK-NEXT: movs r6, #1
+; CHECK-NEXT: mov.w r8, #0
+; CHECK-NEXT: mov.w r10, #0
; CHECK-NEXT: add.w lr, r6, r7, lsr #2
-; CHECK-NEXT: movs r6, #0
-; CHECK-NEXT: movs r7, #4
; CHECK-NEXT: .LBB0_5: @ %while.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldr r10, [r0, #16]!
-; CHECK-NEXT: sub.w r9, r9, #4
-; CHECK-NEXT: ldrd r5, r4, [r0, #-12]
-; CHECK-NEXT: ldr r11, [r0, #-4]
+; CHECK-NEXT: ldr r11, [r0, #16]!
+; CHECK-NEXT: ldrd r5, r7, [r0, #-12]
+; CHECK-NEXT: ldr r4, [r0, #-4]
; CHECK-NEXT: cmp r12, r5
-; CHECK-NEXT: it gt
-; CHECK-NEXT: subgt r6, r7, #3
; CHECK-NEXT: csel r5, r5, r12, gt
-; CHECK-NEXT: cmp r5, r4
+; CHECK-NEXT: csinc r6, r10, r8, le
+; CHECK-NEXT: cmp r5, r7
; CHECK-NEXT: it gt
-; CHECK-NEXT: subgt r6, r7, #2
-; CHECK-NEXT: csel r5, r4, r5, gt
-; CHECK-NEXT: cmp r5, r11
+; CHECK-NEXT: addgt.w r6, r8, #2
+; CHECK-NEXT: csel r7, r7, r5, gt
+; CHECK-NEXT: cmp r7, r4
; CHECK-NEXT: it gt
-; CHECK-NEXT: subgt r6, r7, #1
-; CHECK-NEXT: csel r5, r11, r5, gt
-; CHECK-NEXT: cmp r5, r10
-; CHECK-NEXT: csel r6, r7, r6, gt
-; CHECK-NEXT: add.w r7, r7, #4
-; CHECK-NEXT: csel r12, r10, r5, gt
+; CHECK-NEXT: addgt.w r6, r8, #3
+; CHECK-NEXT: csel r7, r4, r7, gt
+; CHECK-NEXT: add.w r8, r8, #4
+; CHECK-NEXT: cmp r7, r11
+; CHECK-NEXT: csel r10, r8, r6, gt
+; CHECK-NEXT: csel r12, r11, r7, gt
; CHECK-NEXT: le lr, .LBB0_5
-; CHECK-NEXT: .LBB0_6: @ %while.end.loopexit.unr-lcssa
-; CHECK-NEXT: cmp.w r8, #0
-; CHECK-NEXT: beq .LBB0_10
-; CHECK-NEXT: @ %bb.7: @ %while.body.epil
+; CHECK-NEXT: @ %bb.6: @ %while.end.loopexit.unr-lcssa.loopexit
+; CHECK-NEXT: ldr r6, [sp] @ 4-byte Reload
+; CHECK-NEXT: sub.w r9, r9, r8
+; CHECK-NEXT: cbz r6, .LBB0_10
+; CHECK-NEXT: .LBB0_7: @ %while.body.epil
; CHECK-NEXT: ldr r7, [r0, #4]
; CHECK-NEXT: sub.w r1, r1, r9
; CHECK-NEXT: cmp r12, r7
-; CHECK-NEXT: csel r6, r1, r6, gt
+; CHECK-NEXT: csel r10, r1, r10, gt
; CHECK-NEXT: csel r12, r7, r12, gt
-; CHECK-NEXT: cmp.w r8, #1
+; CHECK-NEXT: cmp r6, #1
; CHECK-NEXT: beq .LBB0_10
; CHECK-NEXT: @ %bb.8: @ %while.body.epil.1
; CHECK-NEXT: ldr r7, [r0, #8]
; CHECK-NEXT: cmp r12, r7
-; CHECK-NEXT: csinc r6, r6, r1, le
+; CHECK-NEXT: csinc r10, r10, r1, le
; CHECK-NEXT: csel r12, r7, r12, gt
-; CHECK-NEXT: cmp.w r8, #2
+; CHECK-NEXT: cmp r6, #2
; CHECK-NEXT: beq .LBB0_10
; CHECK-NEXT: @ %bb.9: @ %while.body.epil.2
; CHECK-NEXT: ldr r0, [r0, #12]
; CHECK-NEXT: cmp r12, r0
; CHECK-NEXT: it gt
-; CHECK-NEXT: addgt r6, r1, #2
+; CHECK-NEXT: addgt.w r10, r1, #2
; CHECK-NEXT: csel r12, r0, r12, gt
; CHECK-NEXT: .LBB0_10: @ %while.end
; CHECK-NEXT: str.w r12, [r2]
-; CHECK-NEXT: str r6, [r3]
+; CHECK-NEXT: str.w r10, [r3]
+; CHECK-NEXT: add sp, #4
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
entry:
%0 = load i32, ptr %pSrc, align 4
diff --git a/llvm/test/CodeGen/X86/amx-tf32-internal.ll b/llvm/test/CodeGen/X86/amx-tf32-internal.ll
index 6d0f3c5..caf7a1c 100644
--- a/llvm/test/CodeGen/X86/amx-tf32-internal.ll
+++ b/llvm/test/CodeGen/X86/amx-tf32-internal.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-tile,+avx512f, \
-; RUN: -mattr=+amx-tf32,+amx-transpose -verify-machineinstrs | FileCheck %s
+; RUN: -mattr=+amx-tf32 -verify-machineinstrs | FileCheck %s
define void @test_amx(i8* %pointer, i8* %base, i64 %stride) {
; CHECK-LABEL: test_amx:
@@ -20,7 +20,6 @@ define void @test_amx(i8* %pointer, i8* %base, i64 %stride) {
; CHECK-NEXT: tilezero %tmm1
; CHECK-NEXT: tilezero %tmm2
; CHECK-NEXT: tmmultf32ps %tmm1, %tmm0, %tmm2
-; CHECK-NEXT: ttmmultf32ps %tmm1, %tmm0, %tmm2
; CHECK-NEXT: tilestored %tmm2, (%rdi,%rdx)
; CHECK-NEXT: tilerelease
; CHECK-NEXT: vzeroupper
@@ -31,9 +30,8 @@ define void @test_amx(i8* %pointer, i8* %base, i64 %stride) {
%c = call x86_amx @llvm.x86.tilezero.internal(i16 8, i16 8)
%c1 = call x86_amx @llvm.x86.tmmultf32ps.internal(i16 8, i16 8, i16 8, x86_amx %c, x86_amx %a, x86_amx %b)
- %c2 = call x86_amx @llvm.x86.ttmmultf32ps.internal(i16 8, i16 8, i16 8, x86_amx %c1, x86_amx %a, x86_amx %b)
- call void @llvm.x86.tilestored64.internal(i16 8, i16 8, i8* %pointer, i64 %stride, x86_amx %c2)
+ call void @llvm.x86.tilestored64.internal(i16 8, i16 8, i8* %pointer, i64 %stride, x86_amx %c1)
ret void
}
@@ -43,4 +41,3 @@ declare void @llvm.x86.tilestored64.internal(i16, i16, i8*, i64, x86_amx)
declare x86_amx @llvm.x86.tmmultf32ps.internal(i16, i16, i16, x86_amx, x86_amx, x86_amx)
-declare x86_amx @llvm.x86.ttmmultf32ps.internal(i16, i16, i16, x86_amx, x86_amx, x86_amx)
diff --git a/llvm/test/CodeGen/X86/amx-tf32-intrinsics.ll b/llvm/test/CodeGen/X86/amx-tf32-intrinsics.ll
index af1a7ae..642c1b7 100644
--- a/llvm/test/CodeGen/X86/amx-tf32-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/amx-tf32-intrinsics.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -O0 -mtriple=x86_64-unknown-unknown -mattr=+amx-tile,+amx-tf32,+amx-transpose -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -O0 -mtriple=x86_64-unknown-unknown -mattr=+amx-tile,+amx-tf32 -verify-machineinstrs | FileCheck %s
define void @test_tmmultf32ps() {
; CHECK-LABEL: test_tmmultf32ps:
@@ -11,13 +11,3 @@ define void @test_tmmultf32ps() {
}
declare void @llvm.x86.tmmultf32ps(i8 %A, i8 %B, i8 %C)
-define void @test_ttmmultf32ps() {
-; CHECK-LABEL: test_ttmmultf32ps:
-; CHECK: # %bb.0:
-; CHECK-NEXT: ttmmultf32ps %tmm3, %tmm2, %tmm1
-; CHECK-NEXT: retq
- call void @llvm.x86.ttmmultf32ps(i8 1, i8 2, i8 3)
- ret void
-}
-declare void @llvm.x86.ttmmultf32ps(i8 %A, i8 %B, i8 %C)
-
diff --git a/llvm/test/CodeGen/X86/amx_movrs_transpose_intrinsics.ll b/llvm/test/CodeGen/X86/amx_movrs_transpose_intrinsics.ll
deleted file mode 100755
index 1f5758c..0000000
--- a/llvm/test/CodeGen/X86/amx_movrs_transpose_intrinsics.ll
+++ /dev/null
@@ -1,122 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -O0 -mtriple=x86_64-unknown-unknown -mattr=+amx-transpose,+amx-movrs | FileCheck %s --check-prefixes=CHECK,O0
-; RUN: llc < %s -O2 -mtriple=x86_64-unknown-unknown -mattr=+amx-transpose,+amx-movrs | FileCheck %s --check-prefixes=CHECK,O2
-; RUN: llc < %s -O2 -mtriple=x86_64-unknown-unknown -mattr=+amx-transpose,+amx-movrs,+egpr --show-mc-encoding | FileCheck %s --check-prefix=EGPR
-
-define void @test_amx(i64 %stride, i8* %addr1) #0 {
-; CHECK-LABEL: test_amx:
-; CHECK: # %bb.0:
-; CHECK-NEXT: t2rpntlvwz0rs (%rsi,%rdi), %tmm0
-; CHECK-NEXT: t2rpntlvwz0rst1 (%rsi,%rdi), %tmm2
-; CHECK-NEXT: t2rpntlvwz1rs (%rsi,%rdi), %tmm0
-; CHECK-NEXT: t2rpntlvwz1rst1 (%rsi,%rdi), %tmm2
-; CHECK-NEXT: retq
-;
-; EGPR-LABEL: test_amx:
-; EGPR: # %bb.0:
-; EGPR-NEXT: t2rpntlvwz0rs (%rsi,%rdi), %tmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe5,0x78,0xf8,0x04,0x3e]
-; EGPR-NEXT: t2rpntlvwz0rst1 (%rsi,%rdi), %tmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe5,0x78,0xf9,0x14,0x3e]
-; EGPR-NEXT: t2rpntlvwz1rs (%rsi,%rdi), %tmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe5,0x79,0xf8,0x04,0x3e]
-; EGPR-NEXT: t2rpntlvwz1rst1 (%rsi,%rdi), %tmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe5,0x79,0xf9,0x14,0x3e]
-; EGPR-NEXT: retq # encoding: [0xc3]
- call void @llvm.x86.t2rpntlvwz0rs(i8 1, i8* %addr1, i64 %stride)
- call void @llvm.x86.t2rpntlvwz0rst1(i8 2, i8* %addr1, i64 %stride)
- call void @llvm.x86.t2rpntlvwz1rs(i8 1, i8* %addr1, i64 %stride)
- call void @llvm.x86.t2rpntlvwz1rst1(i8 2, i8* %addr1, i64 %stride)
- ret void
-}
-declare void @llvm.x86.t2rpntlvwz0rs(i8 , i8* , i64 )
-declare void @llvm.x86.t2rpntlvwz0rst1(i8 , i8* , i64 )
-declare void @llvm.x86.t2rpntlvwz1rs(i8 , i8* , i64 )
-declare void @llvm.x86.t2rpntlvwz1rst1(i8 , i8* , i64 )
-
-define void @test_amx2(i8* %base, i64 %stride) #0 {
-; O0-LABEL: test_amx2:
-; O0: # %bb.0:
-; O0-NEXT: xorps %xmm0, %xmm0
-; O0-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp)
-; O0-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp)
-; O0-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp)
-; O0-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp)
-; O0-NEXT: movb $1, -{{[0-9]+}}(%rsp)
-; O0-NEXT: movw $8, %ax
-; O0-NEXT: # implicit-def: $al
-; O0-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; O0-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
-; O0-NEXT: # implicit-def: $al
-; O0-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; O0-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
-; O0-NEXT: ldtilecfg -{{[0-9]+}}(%rsp)
-; O0-NEXT: t2rpntlvwz0rst1 (%rdi,%rsi), %tmm4
-; O0-NEXT: movw $8, %ax
-; O0-NEXT: # implicit-def: $al
-; O0-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; O0-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
-; O0-NEXT: # implicit-def: $al
-; O0-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; O0-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
-; O0-NEXT: ldtilecfg -{{[0-9]+}}(%rsp)
-; O0-NEXT: t2rpntlvwz1rs (%rdi,%rsi), %tmm4
-; O0-NEXT: movw $8, %ax
-; O0-NEXT: # implicit-def: $al
-; O0-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; O0-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
-; O0-NEXT: # implicit-def: $al
-; O0-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; O0-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
-; O0-NEXT: ldtilecfg -{{[0-9]+}}(%rsp)
-; O0-NEXT: t2rpntlvwz1rst1 (%rdi,%rsi), %tmm4
-; O0-NEXT: tilerelease
-; O0-NEXT: retq
-;
-; O2-LABEL: test_amx2:
-; O2: # %bb.0:
-; O2-NEXT: xorps %xmm0, %xmm0
-; O2-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp)
-; O2-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp)
-; O2-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp)
-; O2-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp)
-; O2-NEXT: movb $1, -{{[0-9]+}}(%rsp)
-; O2-NEXT: movb $8, -{{[0-9]+}}(%rsp)
-; O2-NEXT: movw $8, -{{[0-9]+}}(%rsp)
-; O2-NEXT: movb $8, -{{[0-9]+}}(%rsp)
-; O2-NEXT: movw $8, -{{[0-9]+}}(%rsp)
-; O2-NEXT: ldtilecfg -{{[0-9]+}}(%rsp)
-; O2-NEXT: movw $8, %ax
-; O2-NEXT: t2rpntlvwz0rs (%rdi,%rsi), %tmm4
-; O2-NEXT: t2rpntlvwz0rst1 (%rdi,%rsi), %tmm4
-; O2-NEXT: t2rpntlvwz1rs (%rdi,%rsi), %tmm4
-; O2-NEXT: t2rpntlvwz1rst1 (%rdi,%rsi), %tmm4
-; O2-NEXT: tilerelease
-; O2-NEXT: retq
-;
-; EGPR-LABEL: test_amx2:
-; EGPR: # %bb.0:
-; EGPR-NEXT: xorps %xmm0, %xmm0 # encoding: [0x0f,0x57,0xc0]
-; EGPR-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x44,0x24,0xc0]
-; EGPR-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x44,0x24,0xd0]
-; EGPR-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x44,0x24,0xe0]
-; EGPR-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x44,0x24,0xf0]
-; EGPR-NEXT: movb $1, -{{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0xc0,0x01]
-; EGPR-NEXT: movb $8, -{{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0xf4,0x08]
-; EGPR-NEXT: movw $8, -{{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x44,0x24,0xd8,0x08,0x00]
-; EGPR-NEXT: movb $8, -{{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0xf5,0x08]
-; EGPR-NEXT: movw $8, -{{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x44,0x24,0xda,0x08,0x00]
-; EGPR-NEXT: ldtilecfg -{{[0-9]+}}(%rsp) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x49,0x44,0x24,0xc0]
-; EGPR-NEXT: movw $8, %ax # encoding: [0x66,0xb8,0x08,0x00]
-; EGPR-NEXT: t2rpntlvwz0rs (%rdi,%rsi), %tmm4 # EVEX TO VEX Compression encoding: [0xc4,0xe5,0x78,0xf8,0x24,0x37]
-; EGPR-NEXT: t2rpntlvwz0rst1 (%rdi,%rsi), %tmm4 # EVEX TO VEX Compression encoding: [0xc4,0xe5,0x78,0xf9,0x24,0x37]
-; EGPR-NEXT: t2rpntlvwz1rs (%rdi,%rsi), %tmm4 # EVEX TO VEX Compression encoding: [0xc4,0xe5,0x79,0xf8,0x24,0x37]
-; EGPR-NEXT: t2rpntlvwz1rst1 (%rdi,%rsi), %tmm4 # EVEX TO VEX Compression encoding: [0xc4,0xe5,0x79,0xf9,0x24,0x37]
-; EGPR-NEXT: tilerelease # encoding: [0xc4,0xe2,0x78,0x49,0xc0]
-; EGPR-NEXT: retq # encoding: [0xc3]
- call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz0rs.internal(i16 8, i16 8, i16 8, i8* %base, i64 %stride)
- call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz0rst1.internal(i16 8, i16 8, i16 8, i8* %base, i64 %stride)
- call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz1rs.internal(i16 8, i16 8, i16 8, i8* %base, i64 %stride)
- call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz1rst1.internal(i16 8, i16 8, i16 8, i8* %base, i64 %stride)
- ret void
-}
-declare { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz0rs.internal(i16, i16, i16, i8*, i64)
-declare { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz0rst1.internal(i16, i16, i16, i8*, i64)
-declare { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz1rs.internal(i16, i16, i16, i8*, i64)
-declare { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz1rst1.internal(i16, i16, i16, i8*, i64)
diff --git a/llvm/test/CodeGen/X86/amx_tile_pair_O2_to_O0.ll b/llvm/test/CodeGen/X86/amx_tile_pair_O2_to_O0.ll
deleted file mode 100644
index 4f41410..0000000
--- a/llvm/test/CodeGen/X86/amx_tile_pair_O2_to_O0.ll
+++ /dev/null
@@ -1,136 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -O0 -mtriple=x86_64-unknown-unknown -mattr=+amx-tile,+amx-bf16,+avx512f, \
-; RUN: -mattr=+amx-transpose -verify-machineinstrs | FileCheck %s
-
-@buf = dso_local global [2048 x i8] zeroinitializer, align 16
-@buf2 = dso_local global [2048 x i8] zeroinitializer, align 16
-
-define dso_local void @test_tile_2rpntlvwz0(i16 noundef signext %row, i16 noundef signext %col0, i16 noundef signext %col1) local_unnamed_addr #0 {
-; CHECK-LABEL: test_tile_2rpntlvwz0:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: pushq %rbp
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: .cfi_offset %rbp, -16
-; CHECK-NEXT: movq %rsp, %rbp
-; CHECK-NEXT: .cfi_def_cfa_register %rbp
-; CHECK-NEXT: pushq %rbx
-; CHECK-NEXT: andq $-1024, %rsp # imm = 0xFC00
-; CHECK-NEXT: subq $8192, %rsp # imm = 0x2000
-; CHECK-NEXT: .cfi_offset %rbx, -24
-; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0
-; CHECK-NEXT: vmovups %zmm0, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movb $1, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: # kill: def $dx killed $dx killed $edx
-; CHECK-NEXT: movw %si, %cx
-; CHECK-NEXT: movw %di, %ax
-; CHECK-NEXT: # implicit-def: $al
-; CHECK-NEXT: movb %al, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movw %dx, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: # implicit-def: $al
-; CHECK-NEXT: movb %al, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movw %dx, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: # implicit-def: $al
-; CHECK-NEXT: movb %al, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movw %cx, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: # implicit-def: $cl
-; CHECK-NEXT: movb %cl, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movw %dx, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: # implicit-def: $al
-; CHECK-NEXT: movb %al, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movw %cx, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: # implicit-def: $al
-; CHECK-NEXT: movb %al, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movw %cx, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: # implicit-def: $al
-; CHECK-NEXT: movb %al, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movw %cx, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: # implicit-def: $al
-; CHECK-NEXT: movb %al, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movw %dx, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: ldtilecfg {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movl $buf, %esi
-; CHECK-NEXT: movl $32, %edi
-; CHECK-NEXT: t2rpntlvwz0 (%rsi,%rdi), %tmm4
-; CHECK-NEXT: movabsq $64, %rbx
-; CHECK-NEXT: tilestored %tmm5, (%rsp,%rbx) # 1024-byte Folded Spill
-; CHECK-NEXT: tileloadd (%rsp,%rbx), %tmm0 # 1024-byte Folded Reload
-; CHECK-NEXT: movabsq $64, %rbx
-; CHECK-NEXT: tilestored %tmm4, 1024(%rsp,%rbx) # 1024-byte Folded Spill
-; CHECK-NEXT: tileloadd 1024(%rsp,%rbx), %tmm1 # 1024-byte Folded Reload
-; CHECK-NEXT: movl $64, %edi
-; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rsi
-; CHECK-NEXT: tilestored %tmm1, (%rsi,%rdi)
-; CHECK-NEXT: movl $64, %edi
-; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rsi
-; CHECK-NEXT: tilestored %tmm0, (%rsi,%rdi)
-; CHECK-NEXT: tilezero %tmm0
-; CHECK-NEXT: movl $64, %edi
-; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rsi
-; CHECK-NEXT: tilestored %tmm0, (%rsi,%rdi)
-; CHECK-NEXT: movl $64, %edi
-; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rsi
-; CHECK-NEXT: tileloadd (%rsi,%rdi), %tmm1
-; CHECK-NEXT: movl $64, %edi
-; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rsi
-; CHECK-NEXT: tileloadd (%rsi,%rdi), %tmm2
-; CHECK-NEXT: movl $64, %edi
-; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rsi
-; CHECK-NEXT: tileloadd (%rsi,%rdi), %tmm0
-; CHECK-NEXT: tdpbssd %tmm2, %tmm1, %tmm0
-; CHECK-NEXT: movl $64, %edi
-; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rsi
-; CHECK-NEXT: tilestored %tmm0, (%rsi,%rdi)
-; CHECK-NEXT: movl $64, %edi
-; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rsi
-; CHECK-NEXT: tileloadd (%rsi,%rdi), %tmm0
-; CHECK-NEXT: movl $buf2, %edx
-; CHECK-NEXT: movl $32, %esi
-; CHECK-NEXT: tilestored %tmm0, (%rdx,%rsi)
-; CHECK-NEXT: leaq -8(%rbp), %rsp
-; CHECK-NEXT: popq %rbx
-; CHECK-NEXT: popq %rbp
-; CHECK-NEXT: .cfi_def_cfa %rsp, 8
-; CHECK-NEXT: tilerelease
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
-entry:
- %0 = tail call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz0.internal(i16 %row, i16 %col0, i16 %col1, ptr @buf, i64 32) #3
- %1 = extractvalue { x86_amx, x86_amx } %0, 0
- %2 = tail call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx %1) #3
- %3 = extractvalue { x86_amx, x86_amx } %0, 1
- %4 = tail call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx %3) #3
- %5 = tail call x86_amx @llvm.x86.tilezero.internal(i16 %row, i16 %col0) #3
- %6 = tail call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx %5) #3
- %7 = tail call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> %6) #3
- %8 = tail call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> %2) #3
- %9 = tail call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> %4) #3
- %10 = tail call x86_amx @llvm.x86.tdpbssd.internal(i16 %row, i16 %col1, i16 %col0, x86_amx %7, x86_amx %8, x86_amx %9) #3
- %11 = tail call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx %10) #3
- %12 = tail call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> %11) #3
- tail call void @llvm.x86.tilestored64.internal(i16 %row, i16 %col0, ptr @buf2, i64 32, x86_amx %12) #3
- ret void
-}
-
-declare { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz0.internal(i16, i16, i16, ptr, i64) #1
-
-declare <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx) #2
-
-declare x86_amx @llvm.x86.tilezero.internal(i16, i16) #3
-
-declare x86_amx @llvm.x86.tdpbssd.internal(i16, i16, i16, x86_amx, x86_amx, x86_amx) #3
-
-declare x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32>) #2
-
-declare void @llvm.x86.tilestored64.internal(i16, i16, ptr, i64, x86_amx) #4
-
-attributes #0 = { nounwind uwtable "target-cpu"="x86-64" "target-features"="+amx-bf16,+amx-int8,+amx-tile,+amx-transpose" }
-attributes #1 = { argmemonly nofree nounwind readonly }
-attributes #2 = { nofree nosync nounwind readnone }
-attributes #3 = { nounwind }
-attributes #4 = { argmemonly nounwind writeonly }
-
-!llvm.module.flags = !{!0, !1, !2}
-
-!0 = !{i32 1, !"wchar_size", i32 4}
-!1 = !{i32 7, !"uwtable", i32 2}
-!2 = !{i32 7, !"frame-pointer", i32 2}
diff --git a/llvm/test/CodeGen/X86/amx_tile_pair_configure_O0.mir b/llvm/test/CodeGen/X86/amx_tile_pair_configure_O0.mir
deleted file mode 100644
index ab12ab3..0000000
--- a/llvm/test/CodeGen/X86/amx_tile_pair_configure_O0.mir
+++ /dev/null
@@ -1,165 +0,0 @@
-# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -O0 -mtriple=x86_64-unknown-unknown -mattr=+amx-tile,+amx-bf16,+avx512f, \
-# RUN: -mattr=+amx-transpose -run-pass=fasttileconfig -o - %s | FileCheck %s
-
----
-name: test_tile_2rpntlvwz0
-alignment: 16
-exposesReturnsTwice: false
-legalized: false
-regBankSelected: false
-selected: false
-failedISel: false
-tracksRegLiveness: true
-hasWinCFI: false
-callsEHReturn: false
-callsUnwindInit: false
-hasEHContTarget: false
-hasEHScopes: false
-hasEHFunclets: false
-failsVerification: false
-tracksDebugUserValues: false
-registers: []
-liveins:
- - { reg: '$edi', virtual-reg: '' }
- - { reg: '$esi', virtual-reg: '' }
- - { reg: '$edx', virtual-reg: '' }
-frameInfo:
- isFrameAddressTaken: false
- isReturnAddressTaken: false
- hasStackMap: false
- hasPatchPoint: false
- stackSize: 0
- offsetAdjustment: 0
- maxAlignment: 1024
- adjustsStack: false
- hasCalls: true
- stackProtector: ''
- functionContext: ''
- maxCallFrameSize: 4294967295
- cvBytesOfCalleeSavedRegisters: 0
- hasOpaqueSPAdjustment: false
- hasVAStart: false
- hasMustTailInVarArgFunc: false
- hasTailCall: false
- localFrameSize: 0
- savePoint: []
- restorePoint: []
-fixedStack: []
-stack:
- - { id: 0, name: '', type: default, offset: 0, size: 8, alignment: 8,
- stack-id: default, callee-saved-register: '', callee-saved-restored: true,
- debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
- - { id: 1, name: '', type: default, offset: 0, size: 8, alignment: 8,
- stack-id: default, callee-saved-register: '', callee-saved-restored: true,
- debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
- - { id: 2, name: '', type: default, offset: 0, size: 8, alignment: 8,
- stack-id: default, callee-saved-register: '', callee-saved-restored: true,
- debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
- - { id: 3, name: '', type: default, offset: 0, size: 8, alignment: 8,
- stack-id: default, callee-saved-register: '', callee-saved-restored: true,
- debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
- - { id: 4, name: '', type: default, offset: 0, size: 64, alignment: 4,
- stack-id: default, callee-saved-register: '', callee-saved-restored: true,
- debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
- - { id: 5, name: '', type: spill-slot, offset: 0, size: 2, alignment: 2,
- stack-id: default, callee-saved-register: '', callee-saved-restored: true,
- debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
- - { id: 6, name: '', type: spill-slot, offset: 0, size: 2, alignment: 2,
- stack-id: default, callee-saved-register: '', callee-saved-restored: true,
- debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
- - { id: 7, name: '', type: spill-slot, offset: 0, size: 8, alignment: 8,
- stack-id: default, callee-saved-register: '', callee-saved-restored: true,
- debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
-callSites: []
-debugValueSubstitutions: []
-constants: []
-machineFunctionInfo:
- amxProgModel: ManagedRA
-body: |
- bb.0.entry:
- liveins: $rdi, $rsi, $rdx, $rax
-
- ; CHECK-LABEL: name: test_tile_2rpntlvwz0
- ; CHECK: liveins: $rdi, $rsi, $rdx, $rax
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $zmm0 = AVX512_512_SET0
- ; CHECK-NEXT: VMOVUPSZmr %stack.4, 1, $noreg, 0, $noreg, killed renamable $zmm0 :: (store (s512) into %stack.4, align 4)
- ; CHECK-NEXT: MOV8mi %stack.4, 1, $noreg, 0, $noreg, 1 :: (store (s512) into %stack.4, align 4)
- ; CHECK-NEXT: renamable $rcx = MOV32ri64 64
- ; CHECK-NEXT: MOV64mr %stack.7, 1, $noreg, 0, $noreg, $rcx :: (store (s64) into %stack.7)
- ; CHECK-NEXT: renamable $cx = MOV16ri 64
- ; CHECK-NEXT: MOV16mr %stack.5, 1, $noreg, 0, $noreg, $cx :: (store (s16) into %stack.5)
- ; CHECK-NEXT: renamable $cx = MOV16ri 16
- ; CHECK-NEXT: renamable $r8w = MOV16ri 16
- ; CHECK-NEXT: MOV16mr %stack.6, 1, $noreg, 0, $noreg, $r8w :: (store (s16) into %stack.6)
- ; CHECK-NEXT: $al = IMPLICIT_DEF
- ; CHECK-NEXT: MOV8mr %stack.4, 1, $noreg, 48, $noreg, $al :: (store (s512) into %stack.4 + 48, align 4)
- ; CHECK-NEXT: MOV16mr %stack.4, 1, $noreg, 16, $noreg, $cx :: (store (s512) into %stack.4 + 16, align 4)
- ; CHECK-NEXT: $al = IMPLICIT_DEF
- ; CHECK-NEXT: MOV8mr %stack.4, 1, $noreg, 50, $noreg, $al :: (store (s512) into %stack.4 + 50, align 2, basealign 4)
- ; CHECK-NEXT: MOV16mr %stack.4, 1, $noreg, 20, $noreg, $cx :: (store (s512) into %stack.4 + 20, align 4)
- ; CHECK-NEXT: $al = IMPLICIT_DEF
- ; CHECK-NEXT: MOV8mr %stack.4, 1, $noreg, 49, $noreg, $al :: (store (s512) into %stack.4 + 49, align 1, basealign 4)
- ; CHECK-NEXT: MOV16mr %stack.4, 1, $noreg, 18, $noreg, $di :: (store (s512) into %stack.4 + 18, align 2, basealign 4)
- ; CHECK-NEXT: $al = IMPLICIT_DEF
- ; CHECK-NEXT: MOV8mr %stack.4, 1, $noreg, 48, $noreg, $al :: (store (s512) into %stack.4 + 48, align 4)
- ; CHECK-NEXT: MOV16mr %stack.4, 1, $noreg, 16, $noreg, $cx :: (store (s512) into %stack.4 + 16, align 4)
- ; CHECK-NEXT: $al = IMPLICIT_DEF
- ; CHECK-NEXT: MOV8mr %stack.4, 1, $noreg, 48, $noreg, $al :: (store (s512) into %stack.4 + 48, align 4)
- ; CHECK-NEXT: MOV16mr %stack.4, 1, $noreg, 16, $noreg, $cx :: (store (s512) into %stack.4 + 16, align 4)
- ; CHECK-NEXT: $al = IMPLICIT_DEF
- ; CHECK-NEXT: MOV8mr %stack.4, 1, $noreg, 52, $noreg, $al :: (store (s512) into %stack.4 + 52, align 4)
- ; CHECK-NEXT: MOV16mr %stack.4, 1, $noreg, 24, $noreg, $cx :: (store (s512) into %stack.4 + 24, align 4)
- ; CHECK-NEXT: $al = IMPLICIT_DEF
- ; CHECK-NEXT: MOV8mr %stack.4, 1, $noreg, 53, $noreg, $al :: (store (s512) into %stack.4 + 53, align 1, basealign 4)
- ; CHECK-NEXT: MOV16mr %stack.4, 1, $noreg, 26, $noreg, $di :: (store (s512) into %stack.4 + 26, align 2, basealign 4)
- ; CHECK-NEXT: PLDTILECFGV %stack.4, 1, $noreg, 0, $noreg, implicit-def dead $tmm0, implicit-def dead $tmm1, implicit-def dead $tmm2, implicit-def dead $tmm3, implicit-def dead $tmm4, implicit-def dead $tmm5, implicit-def dead $tmm6, implicit-def dead $tmm7 :: (load (s512) from %stack.4, align 4)
- ; CHECK-NEXT: renamable $r9 = COPY $rsi
- ; CHECK-NEXT: $rsi = MOV64rm %stack.7, 1, $noreg, 0, $noreg :: (load (s64) from %stack.7)
- ; CHECK-NEXT: renamable $r8 = COPY $rdi
- ; CHECK-NEXT: $di = MOV16rm %stack.6, 1, $noreg, 0, $noreg :: (load (s16) from %stack.6)
- ; CHECK-NEXT: renamable $r10 = COPY $rax
- ; CHECK-NEXT: $ax = MOV16rm %stack.5, 1, $noreg, 0, $noreg :: (load (s16) from %stack.5)
- ; CHECK-NEXT: renamable $tmm4_tmm5 = PT2RPNTLVWZ0V renamable $ax, renamable $cx, renamable $di, renamable $rdx, 1, killed renamable $r10, 0, $noreg
- ; CHECK-NEXT: renamable $tmm0 = COPY renamable $tmm5
- ; CHECK-NEXT: renamable $tmm1 = COPY renamable $tmm4, implicit killed $tmm4_tmm5
- ; CHECK-NEXT: PTILESTOREDV renamable $ax, renamable $cx, renamable $r9, 1, renamable $rsi, 0, $noreg, killed renamable $tmm1
- ; CHECK-NEXT: PTILESTOREDV renamable $ax, renamable $di, renamable $r8, 1, renamable $rsi, 0, $noreg, killed renamable $tmm0
- ; CHECK-NEXT: renamable $tmm0 = PTILEZEROV renamable $ax, renamable $cx
- ; CHECK-NEXT: PTILESTOREDV renamable $ax, renamable $cx, renamable $rdx, 1, renamable $rsi, 0, $noreg, killed renamable $tmm0
- ; CHECK-NEXT: renamable $tmm0 = PTILELOADDV renamable $ax, renamable $cx, killed renamable $r9, 1, renamable $rsi, 0, $noreg
- ; CHECK-NEXT: renamable $tmm1 = PTILELOADDV renamable $ax, renamable $di, killed renamable $r8, 1, renamable $rsi, 0, $noreg
- ; CHECK-NEXT: renamable $tmm2 = PTILELOADDV renamable $ax, renamable $cx, renamable $rdx, 1, renamable $rsi, 0, $noreg
- ; CHECK-NEXT: renamable $tmm0 = PTDPBSSDV renamable $ax, renamable $cx, killed renamable $di, renamable $tmm0, killed renamable $tmm1, killed renamable $tmm2
- ; CHECK-NEXT: PTILESTOREDV killed renamable $ax, killed renamable $cx, killed renamable $rdx, 1, killed renamable $rsi, 0, $noreg, killed renamable $tmm0
- renamable $zmm0 = AVX512_512_SET0
- VMOVUPSZmr %stack.4, 1, $noreg, 0, $noreg, killed renamable $zmm0 :: (store (s512) into %stack.4, align 4)
- MOV8mi %stack.4, 1, $noreg, 0, $noreg, 1 :: (store (s512) into %stack.4, align 4)
- renamable $rcx = MOV32ri64 64
- MOV64mr %stack.7, 1, $noreg, 0, $noreg, $rcx :: (store (s64) into %stack.7)
- renamable $cx = MOV16ri 64
- MOV16mr %stack.5, 1, $noreg, 0, $noreg, $cx :: (store (s16) into %stack.5)
- renamable $cx = MOV16ri 16
- renamable $r8w = MOV16ri 16
- MOV16mr %stack.6, 1, $noreg, 0, $noreg, $r8w :: (store (s16) into %stack.6)
- PLDTILECFGV %stack.4, 1, $noreg, 0, $noreg, implicit-def dead $tmm0, implicit-def dead $tmm1, implicit-def dead $tmm2, implicit-def dead $tmm3, implicit-def dead $tmm4, implicit-def dead $tmm5, implicit-def dead $tmm6, implicit-def dead $tmm7 :: (load (s512) from %stack.4, align 4)
- renamable $r9 = COPY $rsi
- $rsi = MOV64rm %stack.7, 1, $noreg, 0, $noreg :: (load (s64) from %stack.7)
- renamable $r8 = COPY $rdi
- $di = MOV16rm %stack.6, 1, $noreg, 0, $noreg :: (load (s16) from %stack.6)
- renamable $r10 = COPY $rax
- $ax = MOV16rm %stack.5, 1, $noreg, 0, $noreg :: (load (s16) from %stack.5)
- renamable $tmm4_tmm5 = PT2RPNTLVWZ0V renamable $ax, renamable $cx, renamable $di, renamable $rdx, 1, killed renamable $r10, 0, $noreg
- renamable $tmm0 = COPY renamable $tmm5
- renamable $tmm1 = COPY renamable $tmm4, implicit killed $tmm4_tmm5
- PTILESTOREDV renamable $ax, renamable $cx, renamable $r9, 1, renamable $rsi, 0, $noreg, killed renamable $tmm1
- PTILESTOREDV renamable $ax, renamable $di, renamable $r8, 1, renamable $rsi, 0, $noreg, killed renamable $tmm0
- renamable $tmm0 = PTILEZEROV renamable $ax, renamable $cx
- PTILESTOREDV renamable $ax, renamable $cx, renamable $rdx, 1, renamable $rsi, 0, $noreg, killed renamable $tmm0
- renamable $tmm0 = PTILELOADDV renamable $ax, renamable $cx, killed renamable $r9, 1, renamable $rsi, 0, $noreg
- renamable $tmm1 = PTILELOADDV renamable $ax, renamable $di, killed renamable $r8, 1, renamable $rsi, 0, $noreg
- renamable $tmm2 = PTILELOADDV renamable $ax, renamable $cx, renamable $rdx, 1, renamable $rsi, 0, $noreg
- renamable $tmm0 = PTDPBSSDV renamable $ax, renamable $cx, killed renamable $di, renamable $tmm0, killed renamable $tmm1, killed renamable $tmm2
- PTILESTOREDV killed renamable $ax, killed renamable $cx, killed renamable $rdx, 1, killed renamable $rsi, 0, $noreg, killed renamable $tmm0
-...
diff --git a/llvm/test/CodeGen/X86/amx_tile_pair_configure_O2.mir b/llvm/test/CodeGen/X86/amx_tile_pair_configure_O2.mir
deleted file mode 100644
index c7d241f..0000000
--- a/llvm/test/CodeGen/X86/amx_tile_pair_configure_O2.mir
+++ /dev/null
@@ -1,153 +0,0 @@
-# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -O2 -mtriple=x86_64-unknown-unknown -mattr=+amx-tile,+amx-bf16,+avx512f, \
-# RUN: -mattr=+amx-transpose -run-pass=greedy,tileconfig -o - %s | FileCheck %s
-
---- |
- @buf = dso_local global [2048 x i8] zeroinitializer, align 16
- @buf2 = dso_local global [2048 x i8] zeroinitializer, align 16
-
- define dso_local void @test_tile_2rpntlvwz0(i16 noundef signext %row, i16 noundef signext %col0, i16 noundef signext %col1) local_unnamed_addr #0 {
- entry:
- %0 = tail call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz0.internal(i16 %row, i16 %col0, i16 %col1, i8* getelementptr inbounds ([2048 x i8], [2048 x i8]* @buf, i64 0, i64 0), i64 32) #5
- %1 = extractvalue { x86_amx, x86_amx } %0, 0
- %2 = extractvalue { x86_amx, x86_amx } %0, 1
- %3 = tail call x86_amx @llvm.x86.tilezero.internal(i16 %row, i16 %col0) #5
- %4 = tail call x86_amx @llvm.x86.tdpbssd.internal(i16 %row, i16 %col1, i16 %col0, x86_amx %3, x86_amx %1, x86_amx %2) #5
- tail call void @llvm.x86.tilestored64.internal(i16 %row, i16 %col0, i8* getelementptr inbounds ([2048 x i8], [2048 x i8]* @buf2, i64 0, i64 0), i64 32, x86_amx %4) #5
- ret void
- }
-
- declare { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz0.internal(i16, i16, i16, i8*, i64) #1
-
- declare <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx) #2
-
- declare x86_amx @llvm.x86.tilezero.internal(i16, i16) #3
-
- declare x86_amx @llvm.x86.tdpbssd.internal(i16, i16, i16, x86_amx, x86_amx, x86_amx) #3
-
- declare x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32>) #2
-
- declare void @llvm.x86.tilestored64.internal(i16, i16, i8*, i64, x86_amx) #4
-
- attributes #0 = { nounwind uwtable "frame-pointer"="all" "min-legal-vector-width"="8192" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+amx-bf16,+amx-int8,+amx-tile,+amx-transpose,+avx,+avx2,+avx512f,+crc32,+cx8,+f16c,+fma,+fxsr,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+amx-tile,+amx-bf16,+avx512f,+amx-transpose" "tune-cpu"="generic" }
- attributes #1 = { argmemonly nounwind readonly "target-features"="+amx-tile,+amx-bf16,+avx512f,+amx-transpose" }
- attributes #2 = { nounwind readnone "target-features"="+amx-tile,+amx-bf16,+avx512f,+amx-transpose" }
- attributes #3 = { nounwind "target-features"="+amx-tile,+amx-bf16,+avx512f,+amx-transpose" }
- attributes #4 = { argmemonly nounwind writeonly "target-features"="+amx-tile,+amx-bf16,+avx512f,+amx-transpose" }
- attributes #5 = { nounwind }
-
-...
----
-name: test_tile_2rpntlvwz0
-alignment: 16
-exposesReturnsTwice: false
-legalized: false
-regBankSelected: false
-selected: false
-failedISel: false
-tracksRegLiveness: true
-hasWinCFI: false
-callsEHReturn: false
-callsUnwindInit: false
-hasEHContTarget: false
-hasEHScopes: false
-hasEHFunclets: false
-failsVerification: false
-tracksDebugUserValues: false
-registers:
- - { id: 0, class: gr32, preferred-register: '' }
- - { id: 1, class: gr32, preferred-register: '' }
- - { id: 2, class: gr32, preferred-register: '' }
- - { id: 3, class: gr16, preferred-register: '' }
- - { id: 4, class: gr16, preferred-register: '' }
- - { id: 5, class: gr16, preferred-register: '' }
- - { id: 6, class: gr64, preferred-register: '' }
- - { id: 7, class: gr64_nosp, preferred-register: '' }
- - { id: 8, class: tilepair, preferred-register: '' }
- - { id: 9, class: tile, preferred-register: '' }
- - { id: 10, class: tile, preferred-register: '' }
- - { id: 11, class: tile, preferred-register: '' }
- - { id: 12, class: tile, preferred-register: '' }
- - { id: 13, class: gr64, preferred-register: '' }
- - { id: 14, class: vr512, preferred-register: '' }
-liveins:
- - { reg: '$edi', virtual-reg: '%0' }
- - { reg: '$esi', virtual-reg: '%1' }
- - { reg: '$edx', virtual-reg: '%2' }
-frameInfo:
- isFrameAddressTaken: false
- isReturnAddressTaken: false
- hasStackMap: false
- hasPatchPoint: false
- stackSize: 0
- offsetAdjustment: 0
- maxAlignment: 4
- adjustsStack: false
- hasCalls: false
- stackProtector: ''
- functionContext: ''
- maxCallFrameSize: 4294967295
- cvBytesOfCalleeSavedRegisters: 0
- hasOpaqueSPAdjustment: false
- hasVAStart: false
- hasMustTailInVarArgFunc: false
- hasTailCall: false
- localFrameSize: 0
- savePoint: []
- restorePoint: []
-fixedStack: []
-stack:
- - { id: 0, name: '', type: default, offset: 0, size: 64, alignment: 4,
- stack-id: default, callee-saved-register: '', callee-saved-restored: true,
- debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
-callSites: []
-debugValueSubstitutions: []
-constants: []
-machineFunctionInfo:
- amxProgModel: ManagedRA
-body: |
- bb.0.entry:
- liveins: $edi, $esi, $edx
-
-
- ; CHECK-LABEL: name: test_tile_2rpntlvwz0
- ; CHECK: liveins: $edi, $esi, $edx
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr32 = COPY $edx
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr32 = COPY $esi
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr32 = COPY $edi
- ; CHECK-NEXT: [[AVX512_512_SET0_:%[0-9]+]]:vr512 = AVX512_512_SET0
- ; CHECK-NEXT: VMOVUPSZmr %stack.0, 1, $noreg, 0, $noreg, [[AVX512_512_SET0_]] :: (store (s512) into %stack.0, align 4)
- ; CHECK-NEXT: MOV8mi %stack.0, 1, $noreg, 0, $noreg, 1 :: (store (s512) into %stack.0, align 4)
- ; CHECK-NEXT: MOV16mr %stack.0, 1, $noreg, 26, $noreg, [[COPY]].sub_16bit :: (store (s512) into %stack.0 + 26, align 2, basealign 4)
- ; CHECK-NEXT: MOV8mr %stack.0, 1, $noreg, 53, $noreg, [[COPY2]].sub_8bit :: (store (s512) into %stack.0 + 53, align 1, basealign 4)
- ; CHECK-NEXT: MOV16mr %stack.0, 1, $noreg, 24, $noreg, [[COPY1]].sub_16bit :: (store (s512) into %stack.0 + 24, align 4)
- ; CHECK-NEXT: MOV8mr %stack.0, 1, $noreg, 52, $noreg, [[COPY2]].sub_8bit :: (store (s512) into %stack.0 + 52, align 4)
- ; CHECK-NEXT: MOV16mr %stack.0, 1, $noreg, 16, $noreg, [[COPY]].sub_16bit :: (store (s512) into %stack.0 + 16, align 4)
- ; CHECK-NEXT: MOV8mr %stack.0, 1, $noreg, 48, $noreg, [[COPY2]].sub_8bit :: (store (s512) into %stack.0 + 48, align 4)
- ; CHECK-NEXT: PLDTILECFGV %stack.0, 1, $noreg, 0, $noreg, implicit-def dead $tmm0, implicit-def dead $tmm1, implicit-def dead $tmm2, implicit-def dead $tmm3, implicit-def dead $tmm4, implicit-def dead $tmm5, implicit-def dead $tmm6, implicit-def dead $tmm7 :: (load (s512) from %stack.0, align 4)
- ; CHECK-NEXT: [[MOV32ri64_:%[0-9]+]]:gr64 = MOV32ri64 @buf
- ; CHECK-NEXT: [[MOV32ri64_1:%[0-9]+]]:gr64_nosp = MOV32ri64 32
- ; CHECK-NEXT: [[PT2RPNTLVWZ0V:%[0-9]+]]:tilepair = PT2RPNTLVWZ0V [[COPY2]].sub_16bit, [[COPY1]].sub_16bit, [[COPY]].sub_16bit, [[MOV32ri64_]], 1, [[MOV32ri64_1]], 0, $noreg
- ; CHECK-NEXT: [[PTILEZEROV:%[0-9]+]]:tile = PTILEZEROV [[COPY2]].sub_16bit, [[COPY1]].sub_16bit
- ; CHECK-NEXT: [[PTILEZEROV:%[0-9]+]]:tile = PTDPBSSDV [[COPY2]].sub_16bit, [[COPY]].sub_16bit, [[COPY1]].sub_16bit, [[PTILEZEROV]], [[PT2RPNTLVWZ0V]].sub_t0, [[PT2RPNTLVWZ0V]].sub_t1
- ; CHECK-NEXT: [[MOV32ri64_2:%[0-9]+]]:gr64 = MOV32ri64 @buf2
- ; CHECK-NEXT: PTILESTOREDV [[COPY2]].sub_16bit, [[COPY1]].sub_16bit, [[MOV32ri64_2]], 1, [[MOV32ri64_1]], 0, $noreg, [[PTILEZEROV]]
- ; CHECK-NEXT: RET 0
- %2:gr32 = COPY $edx
- %1:gr32 = COPY $esi
- %0:gr32 = COPY $edi
- %14:vr512 = AVX512_512_SET0
- VMOVUPSZmr %stack.0, 1, $noreg, 0, $noreg, %14 :: (store (s512) into %stack.0, align 4)
- MOV8mi %stack.0, 1, $noreg, 0, $noreg, 1 :: (store (s512) into %stack.0, align 4)
- PLDTILECFGV %stack.0, 1, $noreg, 0, $noreg, implicit-def dead $tmm0, implicit-def dead $tmm1, implicit-def dead $tmm2, implicit-def dead $tmm3, implicit-def dead $tmm4, implicit-def dead $tmm5, implicit-def dead $tmm6, implicit-def dead $tmm7 :: (load (s512) from %stack.0, align 4)
- %6:gr64 = MOV32ri64 @buf
- %7:gr64_nosp = MOV32ri64 32
- %8:tilepair = PT2RPNTLVWZ0V %0.sub_16bit, %1.sub_16bit, %2.sub_16bit, %6, 1, %7, 0, $noreg
- %12:tile = PTILEZEROV %0.sub_16bit, %1.sub_16bit
- %12:tile = PTDPBSSDV %0.sub_16bit, %2.sub_16bit, %1.sub_16bit, %12, %8.sub_t0, %8.sub_t1
- %13:gr64 = MOV32ri64 @buf2
- PTILESTOREDV %0.sub_16bit, %1.sub_16bit, %13, 1, %7, 0, $noreg, %12
- RET 0
-
-...
diff --git a/llvm/test/CodeGen/X86/amx_tile_pair_copy.mir b/llvm/test/CodeGen/X86/amx_tile_pair_copy.mir
deleted file mode 100644
index 66b15aa..0000000
--- a/llvm/test/CodeGen/X86/amx_tile_pair_copy.mir
+++ /dev/null
@@ -1,97 +0,0 @@
-# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -O0 -mtriple=x86_64-unknown-unknown -mattr=+amx-tile,+amx-bf16,+avx512f, \
-# RUN: -mattr=+amx-transpose -run-pass=lowertilecopy -o - %s | FileCheck %s
-
----
-name: test_tile_2rpntlvwz0
-alignment: 16
-exposesReturnsTwice: false
-legalized: false
-regBankSelected: false
-selected: false
-failedISel: false
-tracksRegLiveness: true
-hasWinCFI: false
-callsEHReturn: false
-callsUnwindInit: false
-hasEHContTarget: false
-hasEHScopes: false
-hasEHFunclets: false
-failsVerification: false
-tracksDebugUserValues: false
-registers: []
-liveins:
- - { reg: '$edi', virtual-reg: '' }
- - { reg: '$esi', virtual-reg: '' }
- - { reg: '$edx', virtual-reg: '' }
- - { reg: '$cx', virtual-reg: '' }
- - { reg: '$r9', virtual-reg: '' }
- - { reg: '$r10', virtual-reg: '' }
-frameInfo:
- isFrameAddressTaken: false
- isReturnAddressTaken: false
- hasStackMap: false
- hasPatchPoint: false
- stackSize: 0
- offsetAdjustment: 0
- maxAlignment: 1024
- adjustsStack: false
- hasCalls: true
- stackProtector: ''
- functionContext: ''
- maxCallFrameSize: 4294967295
- cvBytesOfCalleeSavedRegisters: 0
- hasOpaqueSPAdjustment: false
- hasVAStart: false
- hasMustTailInVarArgFunc: false
- hasTailCall: false
- localFrameSize: 0
- savePoint: []
- restorePoint: []
-fixedStack: []
-stack:
- - { id: 43, name: '', type: default, offset: 0, size: 64, alignment: 4,
- stack-id: default, callee-saved-register: '', callee-saved-restored: true,
- debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
- - { id: 68, name: '', type: spill-slot, offset: 0, size: 8, alignment: 8,
- stack-id: default, callee-saved-register: '', callee-saved-restored: true,
- debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
-callSites: []
-debugValueSubstitutions: []
-constants: []
-machineFunctionInfo:
- amxProgModel: ManagedRA
-body: |
- bb.0.entry:
- liveins: $edi, $esi, $edx, $cx, $di, $r8w, $r11, $r10, $rbx, $r8, $r9
-
-
- ; CHECK-LABEL: name: test_tile_2rpntlvwz0
- ; CHECK: liveins: $edi, $esi, $edx, $cx, $di, $r8w, $r11, $r10, $rbx, $r8, $r9
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: PLDTILECFGV %stack.0, 1, $noreg, 0, $noreg, implicit-def dead $tmm0, implicit-def dead $tmm1, implicit-def dead $tmm2, implicit-def dead $tmm3, implicit-def dead $tmm4, implicit-def dead $tmm5, implicit-def dead $tmm6, implicit-def dead $tmm7 :: (load (s512) from %stack.0, align 4)
- ; CHECK-NEXT: renamable $tmm4_tmm5 = PT2RPNTLVWZ0V killed renamable $cx, killed renamable $di, killed renamable $r8w, killed renamable $r11, 1, killed renamable $rbx, 0, $noreg
- ; CHECK-NEXT: $rax = MOV64ri 64
- ; CHECK-NEXT: TILESTORED %stack.3, 1, $rax, 0, $noreg, $tmm5 :: (store (s8192) into %stack.3)
- ; CHECK-NEXT: $tmm0 = TILELOADD %stack.3, 1, killed $rax, 0, $noreg :: (load (s8192) from %stack.3)
- ; CHECK-NEXT: $rax = MOV64ri 64
- ; CHECK-NEXT: TILESTORED %stack.2, 1, $rax, 0, $noreg, $tmm4 :: (store (s8192) into %stack.2)
- ; CHECK-NEXT: $tmm1 = TILELOADD %stack.2, 1, killed $rax, 0, $noreg :: (load (s8192) from %stack.2)
- ; CHECK-NEXT: renamable $r8 = MOV32ri64 64
- ; CHECK-NEXT: MOV64mr %stack.1, 1, $noreg, 0, $noreg, $r8 :: (store (s64) into %stack.1)
- ; CHECK-NEXT: renamable $di = MOV16ri 64
- ; CHECK-NEXT: renamable $cx = MOV16ri 16
- ; CHECK-NEXT: PTILESTOREDV renamable $cx, renamable $di, killed renamable $r10, 1, renamable $r8, 0, $noreg, killed renamable $tmm1
- ; CHECK-NEXT: PTILESTOREDV killed renamable $cx, killed renamable $di, killed renamable $r9, 1, renamable $r8, 0, $noreg, killed renamable $tmm0
- PLDTILECFGV %stack.43, 1, $noreg, 0, $noreg, implicit-def dead $tmm0, implicit-def dead $tmm1, implicit-def dead $tmm2, implicit-def dead $tmm3, implicit-def dead $tmm4, implicit-def dead $tmm5, implicit-def dead $tmm6, implicit-def dead $tmm7 :: (load (s512) from %stack.43, align 4)
- renamable $tmm4_tmm5 = PT2RPNTLVWZ0V killed renamable $cx, killed renamable $di, killed renamable $r8w, killed renamable $r11, 1, killed renamable $rbx, 0, $noreg
- renamable $tmm0 = COPY renamable $tmm5
- renamable $tmm1 = COPY renamable $tmm4, implicit killed $tmm4_tmm5
- renamable $r8 = MOV32ri64 64
- MOV64mr %stack.68, 1, $noreg, 0, $noreg, $r8 :: (store (s64) into %stack.68)
- renamable $di = MOV16ri 64
- renamable $cx = MOV16ri 16
- PTILESTOREDV renamable $cx, renamable $di, killed renamable $r10, 1, renamable $r8, 0, $noreg, killed renamable $tmm1
- PTILESTOREDV killed renamable $cx, killed renamable $di, killed renamable $r9, 1, renamable $r8, 0, $noreg, killed renamable $tmm0
-
-...
diff --git a/llvm/test/CodeGen/X86/amx_tile_pair_lower_type_O0.ll b/llvm/test/CodeGen/X86/amx_tile_pair_lower_type_O0.ll
deleted file mode 100644
index 3549875..0000000
--- a/llvm/test/CodeGen/X86/amx_tile_pair_lower_type_O0.ll
+++ /dev/null
@@ -1,87 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
- ; RUN: opt --codegen-opt-level=0 -mtriple=x86_64 -x86-lower-amx-type %s -S | FileCheck %s
- ; RUN: opt --codegen-opt-level=0 -mtriple=x86_64 -passes=x86-lower-amx-type %s -S | FileCheck %s
-
- @buf = dso_local global [2048 x i8] zeroinitializer, align 16
-
- ; Function Attrs: noinline nounwind optnone uwtable
- define dso_local void @test_tile_2rpntlvwz0(i16 noundef signext %row, i16 noundef signext %col0, i16 noundef signext %col1, ptr %m) #0 {
-; CHECK-LABEL: @test_tile_2rpntlvwz0(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = udiv i16 [[COL1:%.*]], 4
-; CHECK-NEXT: [[TMP1:%.*]] = call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz0.internal(i16 [[ROW:%.*]], i16 [[COL0:%.*]], i16 [[COL1]], ptr @buf, i64 32) #[[ATTR3:[0-9]+]]
-; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { x86_amx, x86_amx } [[TMP1]], 0
-; CHECK-NEXT: [[TMP3:%.*]] = sext i16 [[COL0]] to i64
-; CHECK-NEXT: call void @llvm.x86.tilestored64.internal(i16 [[ROW]], i16 [[COL0]], ptr [[M:%.*]], i64 [[TMP3]], x86_amx [[TMP2]])
-; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { x86_amx, x86_amx } [[TMP1]], 1
-; CHECK-NEXT: [[TMP6:%.*]] = sext i16 [[COL1]] to i64
-; CHECK-NEXT: call void @llvm.x86.tilestored64.internal(i16 [[ROW]], i16 [[COL1]], ptr [[M]], i64 [[TMP6]], x86_amx [[TMP5]])
-; CHECK-NEXT: [[TMP8:%.*]] = call x86_amx @llvm.x86.tilezero.internal(i16 [[ROW]], i16 [[COL0]]) #[[ATTR3]]
-; CHECK-NEXT: [[TMP9:%.*]] = sext i16 [[COL0]] to i64
-; CHECK-NEXT: call void @llvm.x86.tilestored64.internal(i16 [[ROW]], i16 [[COL0]], ptr [[M]], i64 [[TMP9]], x86_amx [[TMP8]])
-; CHECK-NEXT: [[TMP11:%.*]] = sext i16 [[COL0]] to i64
-; CHECK-NEXT: [[TMP13:%.*]] = call x86_amx @llvm.x86.tileloadd64.internal(i16 [[ROW]], i16 [[COL0]], ptr [[M]], i64 [[TMP11]])
-; CHECK-NEXT: [[TMP14:%.*]] = sext i16 [[COL1]] to i64
-; CHECK-NEXT: [[TMP16:%.*]] = call x86_amx @llvm.x86.tileloadd64.internal(i16 [[ROW]], i16 [[COL1]], ptr [[M]], i64 [[TMP14]])
-; CHECK-NEXT: [[TMP17:%.*]] = sext i16 [[COL0]] to i64
-; CHECK-NEXT: [[TMP19:%.*]] = call x86_amx @llvm.x86.tileloadd64.internal(i16 [[TMP0]], i16 [[COL0]], ptr [[M]], i64 [[TMP17]])
-; CHECK-NEXT: [[TMP20:%.*]] = call x86_amx @llvm.x86.tdpbssd.internal(i16 [[ROW]], i16 [[COL0]], i16 [[COL1]], x86_amx [[TMP13]], x86_amx [[TMP16]], x86_amx [[TMP19]]) #[[ATTR3]]
-; CHECK-NEXT: [[TMP21:%.*]] = sext i16 [[COL0]] to i64
-; CHECK-NEXT: call void @llvm.x86.tilestored64.internal(i16 [[ROW]], i16 [[COL0]], ptr [[M]], i64 [[TMP21]], x86_amx [[TMP20]])
-; CHECK-NEXT: ret void
-;
- entry:
-
- %0 = call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz0.internal(i16 %row, i16 %col0, i16 %col1, ptr getelementptr inbounds ([2048 x i8], ptr @buf, i64 0, i64 0), i64 32) #7
- %1 = extractvalue { x86_amx, x86_amx } %0, 0
- %2 = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx %1) #7
- store <256 x i32> %2, ptr %m, align 1024
-
- %3 = extractvalue { x86_amx, x86_amx } %0, 1
- %4 = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx %3) #7
- store <256 x i32> %4, ptr %m, align 1024
-
- %5 = call x86_amx @llvm.x86.tilezero.internal(i16 %row, i16 %col0) #7
- %6 = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx %5) #7
- store <256 x i32> %6, ptr %m, align 64
-
- %7 = load <256 x i32>, ptr %m, align 64
- %8 = call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> %7) #7
- %9 = load <256 x i32>, ptr %m, align 64
- %10 = call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> %9) #7
- %11 = load <256 x i32>, ptr %m, align 64
- %12 = call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> %11) #7
-
- %13 = call x86_amx @llvm.x86.tdpbssd.internal(i16 %row, i16 %col0, i16 %col1, x86_amx %8, x86_amx %10, x86_amx %12) #7
- %14 = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx %13) #7
- store <256 x i32> %14, ptr %m, align 64
-
- ret void
- }
-
- ; Function Attrs: argmemonly nounwind readonly
- declare { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz0.internal(i16, i16, i16, ptr, i64) #2
-
- ; Function Attrs: nounwind readnone
- declare <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx) #3
-
- ; Function Attrs: nounwind
- declare x86_amx @llvm.x86.tilezero.internal(i16, i16) #4
-
- ; Function Attrs: nounwind
- declare x86_amx @llvm.x86.tdpbssd.internal(i16, i16, i16, x86_amx, x86_amx, x86_amx) #4
-
- ; Function Attrs: nounwind readnone
- declare x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32>) #3
-
- ; Function Attrs: argmemonly nounwind writeonly
- declare void @llvm.x86.tilestored64.internal(i16, i16, ptr, i64, x86_amx) #5
-
- attributes #0 = { noinline nounwind optnone uwtable "frame-pointer"="all" "min-legal-vector-width"="8192" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+amx-bf16,+amx-int8,+amx-tile,+amx-transpose,+avx,+avx2,+avx512f,+crc32,+cx8,+f16c,+fma,+fxsr,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+amx-tile,+amx-bf16,+avx512f,+amx-transpose" "tune-cpu"="generic" }
- attributes #1 = { argmemonly nofree nounwind willreturn writeonly "target-features"="+amx-tile,+amx-bf16,+avx512f,+amx-transpose" }
- attributes #2 = { argmemonly nounwind readonly "target-features"="+amx-tile,+amx-bf16,+avx512f,+amx-transpose" }
- attributes #3 = { nounwind readnone "target-features"="+amx-tile,+amx-bf16,+avx512f,+amx-transpose" }
- attributes #4 = { nounwind "target-features"="+amx-tile,+amx-bf16,+avx512f,+amx-transpose" }
- attributes #5 = { argmemonly nounwind writeonly "target-features"="+amx-tile,+amx-bf16,+avx512f,+amx-transpose" }
- attributes #6 = { argmemonly nofree nounwind willreturn "target-features"="+amx-tile,+amx-bf16,+avx512f,+amx-transpose" }
- attributes #7 = { nounwind }
diff --git a/llvm/test/CodeGen/X86/amx_tile_pair_lower_type_O2.ll b/llvm/test/CodeGen/X86/amx_tile_pair_lower_type_O2.ll
deleted file mode 100644
index 96966264..0000000
--- a/llvm/test/CodeGen/X86/amx_tile_pair_lower_type_O2.ll
+++ /dev/null
@@ -1,61 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt --codegen-opt-level=2 -mtriple=x86_64 -x86-lower-amx-type %s -S | FileCheck %s
-; RUN: opt --codegen-opt-level=2 -mtriple=x86_64 -passes=x86-lower-amx-type %s -S | FileCheck %s
-
- @buf = dso_local global [2048 x i8] zeroinitializer, align 16
- @buf2 = dso_local global [2048 x i8] zeroinitializer, align 16
-
- ; Function Attrs: nounwind uwtable
- define dso_local void @test_tile_2rpntlvwz0(i16 noundef signext %row, i16 noundef signext %col0, i16 noundef signext %col1) local_unnamed_addr #0 {
-; CHECK-LABEL: @test_tile_2rpntlvwz0(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = tail call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz0.internal(i16 [[ROW:%.*]], i16 [[COL0:%.*]], i16 [[COL1:%.*]], ptr @buf, i64 32) #[[ATTR3:[0-9]+]]
-; CHECK-NEXT: [[TMP1:%.*]] = extractvalue { x86_amx, x86_amx } [[TMP0]], 0
-; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { x86_amx, x86_amx } [[TMP0]], 1
-; CHECK-NEXT: [[TMP3:%.*]] = tail call x86_amx @llvm.x86.tilezero.internal(i16 [[ROW]], i16 [[COL0]]) #[[ATTR3]]
-; CHECK-NEXT: [[TMP4:%.*]] = tail call x86_amx @llvm.x86.tdpbssd.internal(i16 [[ROW]], i16 [[COL1]], i16 [[COL0]], x86_amx [[TMP3]], x86_amx [[TMP1]], x86_amx [[TMP2]]) #[[ATTR3]]
-; CHECK-NEXT: tail call void @llvm.x86.tilestored64.internal(i16 [[ROW]], i16 [[COL0]], ptr @buf2, i64 32, x86_amx [[TMP4]]) #[[ATTR3]]
-; CHECK-NEXT: ret void
-;
- entry:
- %0 = tail call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz0.internal(i16 %row, i16 %col0, i16 %col1, ptr @buf, i64 32) #5
- %1 = extractvalue { x86_amx, x86_amx } %0, 0
- %2 = tail call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx %1) #5
- %3 = extractvalue { x86_amx, x86_amx } %0, 1
- %4 = tail call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx %3) #5
- %5 = tail call x86_amx @llvm.x86.tilezero.internal(i16 %row, i16 %col0) #5
- %6 = tail call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx %5) #5
- %7 = tail call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> %6) #5
- %8 = tail call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> %2) #5
- %9 = tail call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> %4) #5
- %10 = tail call x86_amx @llvm.x86.tdpbssd.internal(i16 %row, i16 %col1, i16 %col0, x86_amx %7, x86_amx %8, x86_amx %9) #5
- %11 = tail call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx %10) #5
- %12 = tail call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> %11) #5
- tail call void @llvm.x86.tilestored64.internal(i16 %row, i16 %col0, ptr @buf2, i64 32, x86_amx %12) #5
- ret void
- }
-
- ; Function Attrs: argmemonly nounwind readonly
- declare { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz0.internal(i16, i16, i16, ptr, i64) #1
-
- ; Function Attrs: nounwind readnone
- declare <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx) #2
-
- ; Function Attrs: nounwind
- declare x86_amx @llvm.x86.tilezero.internal(i16, i16) #3
-
- ; Function Attrs: nounwind
- declare x86_amx @llvm.x86.tdpbssd.internal(i16, i16, i16, x86_amx, x86_amx, x86_amx) #3
-
- ; Function Attrs: nounwind readnone
- declare x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32>) #2
-
- ; Function Attrs: argmemonly nounwind writeonly
- declare void @llvm.x86.tilestored64.internal(i16, i16, ptr, i64, x86_amx) #4
-
- attributes #0 = { nounwind uwtable "frame-pointer"="all" "min-legal-vector-width"="8192" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+amx-bf16,+amx-int8,+amx-tile,+amx-transpose,+avx,+avx2,+avx512f,+crc32,+cx8,+f16c,+fma,+fxsr,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+amx-tile,+amx-bf16,+avx512f,+amx-transpose" "tune-cpu"="generic" }
- attributes #1 = { argmemonly nounwind readonly "target-features"="+amx-tile,+amx-bf16,+avx512f,+amx-transpose" }
- attributes #2 = { nounwind readnone "target-features"="+amx-tile,+amx-bf16,+avx512f,+amx-transpose" }
- attributes #3 = { nounwind "target-features"="+amx-tile,+amx-bf16,+avx512f,+amx-transpose" }
- attributes #4 = { argmemonly nounwind writeonly "target-features"="+amx-tile,+amx-bf16,+avx512f,+amx-transpose" }
- attributes #5 = { nounwind }
diff --git a/llvm/test/CodeGen/X86/amx_tile_pair_preconfigure_O0.mir b/llvm/test/CodeGen/X86/amx_tile_pair_preconfigure_O0.mir
deleted file mode 100644
index 1e3b242..0000000
--- a/llvm/test/CodeGen/X86/amx_tile_pair_preconfigure_O0.mir
+++ /dev/null
@@ -1,134 +0,0 @@
-# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -O0 -mtriple=x86_64-unknown-unknown -mattr=+amx-tile,+amx-bf16,+avx512f, \
-# RUN: -mattr=+amx-transpose -run-pass=fastpretileconfig -o - %s | FileCheck %s
-
----
-name: test_tile_2rpntlvwz0
-alignment: 16
-exposesReturnsTwice: false
-legalized: false
-regBankSelected: false
-selected: false
-failedISel: false
-tracksRegLiveness: true
-hasWinCFI: false
-callsEHReturn: false
-callsUnwindInit: false
-hasEHContTarget: false
-hasEHScopes: false
-hasEHFunclets: false
-failsVerification: false
-tracksDebugUserValues: false
-registers:
- - { id: 0, class: gr64_nosp, preferred-register: '' }
- - { id: 1, class: gr16, preferred-register: '' }
- - { id: 2, class: gr16, preferred-register: '' }
- - { id: 3, class: gr16, preferred-register: '' }
- - { id: 4, class: gr64, preferred-register: '' }
- - { id: 5, class: gr64, preferred-register: '' }
- - { id: 6, class: gr64, preferred-register: '' }
- - { id: 7, class: gr64_nosp, preferred-register: '' }
- - { id: 8, class: tilepair, preferred-register: '' }
- - { id: 9, class: tile, preferred-register: '' }
- - { id: 10, class: tile, preferred-register: '' }
- - { id: 11, class: tile, preferred-register: '' }
- - { id: 181, class: tile, preferred-register: '' }
- - { id: 183, class: tile, preferred-register: '' }
- - { id: 185, class: tile, preferred-register: '' }
- - { id: 186, class: tile, preferred-register: '' }
-liveins:
- - { reg: '$edi', virtual-reg: '%0' }
- - { reg: '$esi', virtual-reg: '%1' }
- - { reg: '$edx', virtual-reg: '%2' }
-frameInfo:
- isFrameAddressTaken: false
- isReturnAddressTaken: false
- hasStackMap: false
- hasPatchPoint: false
- stackSize: 0
- offsetAdjustment: 0
- maxAlignment: 1024
- adjustsStack: false
- hasCalls: true
- stackProtector: ''
- functionContext: ''
- maxCallFrameSize: 4294967295
- cvBytesOfCalleeSavedRegisters: 0
- hasOpaqueSPAdjustment: false
- hasVAStart: false
- hasMustTailInVarArgFunc: false
- hasTailCall: false
- localFrameSize: 0
- savePoint: []
- restorePoint: []
-fixedStack: []
-stack:
- - { id: 18, name: '', type: default, offset: 0, size: 8, alignment: 8,
- stack-id: default, callee-saved-register: '', callee-saved-restored: true,
- debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
- - { id: 19, name: '', type: default, offset: 0, size: 8, alignment: 8,
- stack-id: default, callee-saved-register: '', callee-saved-restored: true,
- debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
- - { id: 20, name: '', type: default, offset: 0, size: 8, alignment: 8,
- stack-id: default, callee-saved-register: '', callee-saved-restored: true,
- debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
- - { id: 21, name: '', type: default, offset: 0, size: 8,
- alignment: 8, stack-id: default, callee-saved-register: '', callee-saved-restored: true,
- debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
-callSites: []
-debugValueSubstitutions: []
-constants: []
-machineFunctionInfo:
- amxProgModel: ManagedRA
-body: |
- bb.0.entry:
- liveins: $rdi, $rsi, $rdx, $rax
-
- ; CHECK-LABEL: name: test_tile_2rpntlvwz0
- ; CHECK: liveins: $rdi, $rsi, $rdx, $rax
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[AVX512_512_SET0_:%[0-9]+]]:vr512 = AVX512_512_SET0
- ; CHECK-NEXT: VMOVUPSZmr %stack.4, 1, $noreg, 0, $noreg, [[AVX512_512_SET0_]] :: (store (s512) into %stack.4, align 4)
- ; CHECK-NEXT: MOV8mi %stack.4, 1, $noreg, 0, $noreg, 1 :: (store (s512) into %stack.4, align 4)
- ; CHECK-NEXT: [[MOV32ri64_:%[0-9]+]]:gr64_nosp = MOV32ri64 64
- ; CHECK-NEXT: [[MOV16ri:%[0-9]+]]:gr16 = MOV16ri 64
- ; CHECK-NEXT: [[MOV16ri1:%[0-9]+]]:gr16 = MOV16ri 16
- ; CHECK-NEXT: [[MOV16ri2:%[0-9]+]]:gr16 = MOV16ri 16
- ; CHECK-NEXT: PLDTILECFGV %stack.4, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.4, align 4)
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr64 = COPY $rsi
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr64 = COPY $rdi
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr64 = COPY $rdx
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gr64_nosp = COPY $rax
- ; CHECK-NEXT: [[PT2RPNTLVWZ0V:%[0-9]+]]:tilepair = PT2RPNTLVWZ0V [[MOV16ri]], [[MOV16ri1]], [[MOV16ri2]], [[COPY2]], 1, killed [[COPY3]], 0, $noreg
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:tile = COPY [[PT2RPNTLVWZ0V]].sub_t1
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:tile = COPY [[PT2RPNTLVWZ0V]].sub_t0
- ; CHECK-NEXT: PTILESTOREDV [[MOV16ri]], [[MOV16ri1]], [[COPY]], 1, [[MOV32ri64_]], 0, $noreg, killed [[COPY5]]
- ; CHECK-NEXT: PTILESTOREDV [[MOV16ri]], [[MOV16ri2]], [[COPY1]], 1, [[MOV32ri64_]], 0, $noreg, killed [[COPY4]]
- ; CHECK-NEXT: [[PTILEZEROV:%[0-9]+]]:tile = PTILEZEROV [[MOV16ri]], [[MOV16ri1]]
- ; CHECK-NEXT: PTILESTOREDV [[MOV16ri]], [[MOV16ri1]], [[COPY2]], 1, [[MOV32ri64_]], 0, $noreg, killed [[PTILEZEROV]]
- ; CHECK-NEXT: [[PTILELOADDV:%[0-9]+]]:tile = PTILELOADDV [[MOV16ri]], [[MOV16ri1]], [[COPY]], 1, [[MOV32ri64_]], 0, $noreg
- ; CHECK-NEXT: [[PTILELOADDV1:%[0-9]+]]:tile = PTILELOADDV [[MOV16ri]], [[MOV16ri2]], [[COPY1]], 1, [[MOV32ri64_]], 0, $noreg
- ; CHECK-NEXT: [[PTILELOADDV2:%[0-9]+]]:tile = PTILELOADDV [[MOV16ri]], [[MOV16ri1]], [[COPY2]], 1, [[MOV32ri64_]], 0, $noreg
- ; CHECK-NEXT: [[PTDPBSSDV:%[0-9]+]]:tile = PTDPBSSDV [[MOV16ri]], [[MOV16ri1]], [[MOV16ri2]], [[PTILELOADDV]], killed [[PTILELOADDV1]], killed [[PTILELOADDV2]]
- ; CHECK-NEXT: PTILESTOREDV killed [[MOV16ri]], killed [[MOV16ri1]], killed [[COPY2]], 1, killed [[MOV32ri64_]], 0, $noreg, killed [[PTDPBSSDV]]
- %0:gr64_nosp = MOV32ri64 64
- %1:gr16 = MOV16ri 64
- %2:gr16 = MOV16ri 16
- %3:gr16 = MOV16ri 16
- %4:gr64 = COPY $rsi
- %5:gr64 = COPY $rdi
- %6:gr64 = COPY $rdx
- %7:gr64_nosp = COPY $rax
- %8:tilepair = PT2RPNTLVWZ0V %1, %2, %3, %6, 1, killed %7, 0, $noreg
- %9:tile = COPY %8.sub_t1
- %10:tile = COPY %8.sub_t0
- PTILESTOREDV %1, %2, %4, 1, %0, 0, $noreg, killed %10
- PTILESTOREDV %1, %3, %5, 1, %0, 0, $noreg, killed %9
- %11:tile = PTILEZEROV %1, %2
- PTILESTOREDV %1, %2, %6, 1, %0, 0, $noreg, killed %11
- %181:tile = PTILELOADDV %1, %2, %4, 1, %0, 0, $noreg
- %183:tile = PTILELOADDV %1, %3, %5, 1, %0, 0, $noreg
- %185:tile = PTILELOADDV %1, %2, %6, 1, %0, 0, $noreg
- %186:tile = PTDPBSSDV %1, %2, %3, %181, killed %183, killed %185
- PTILESTOREDV killed %1, killed %2, killed %6, 1, killed %0, 0, $noreg, killed %186
-...
diff --git a/llvm/test/CodeGen/X86/amx_tile_pair_preconfigure_O2.mir b/llvm/test/CodeGen/X86/amx_tile_pair_preconfigure_O2.mir
deleted file mode 100644
index ac2cdb4..0000000
--- a/llvm/test/CodeGen/X86/amx_tile_pair_preconfigure_O2.mir
+++ /dev/null
@@ -1,113 +0,0 @@
-# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -O2 -mtriple=x86_64-unknown-unknown -mattr=+amx-tile,+amx-bf16,+avx512f, \
-# RUN: -mattr=+amx-transpose -run-pass=tilepreconfig -o - %s | FileCheck %s
-
----
-name: test_tile_2rpntlvwz0
-alignment: 16
-exposesReturnsTwice: false
-legalized: false
-regBankSelected: false
-selected: false
-failedISel: false
-tracksRegLiveness: true
-hasWinCFI: false
-callsEHReturn: false
-callsUnwindInit: false
-hasEHContTarget: false
-hasEHScopes: false
-hasEHFunclets: false
-failsVerification: false
-tracksDebugUserValues: false
-registers:
- - { id: 0, class: gr32, preferred-register: '' }
- - { id: 1, class: gr32, preferred-register: '' }
- - { id: 2, class: gr32, preferred-register: '' }
- - { id: 3, class: gr16, preferred-register: '' }
- - { id: 4, class: gr16, preferred-register: '' }
- - { id: 5, class: gr16, preferred-register: '' }
- - { id: 6, class: gr64, preferred-register: '' }
- - { id: 7, class: gr64_nosp, preferred-register: '' }
- - { id: 8, class: tilepair, preferred-register: '' }
- - { id: 9, class: tile, preferred-register: '' }
- - { id: 10, class: tile, preferred-register: '' }
- - { id: 11, class: tile, preferred-register: '' }
- - { id: 12, class: tile, preferred-register: '' }
- - { id: 13, class: gr64, preferred-register: '' }
-liveins:
- - { reg: '$edi', virtual-reg: '%0' }
- - { reg: '$esi', virtual-reg: '%1' }
- - { reg: '$edx', virtual-reg: '%2' }
-frameInfo:
- isFrameAddressTaken: false
- isReturnAddressTaken: false
- hasStackMap: false
- hasPatchPoint: false
- stackSize: 0
- offsetAdjustment: 0
- maxAlignment: 1
- adjustsStack: false
- hasCalls: false
- stackProtector: ''
- functionContext: ''
- maxCallFrameSize: 4294967295
- cvBytesOfCalleeSavedRegisters: 0
- hasOpaqueSPAdjustment: false
- hasVAStart: false
- hasMustTailInVarArgFunc: false
- hasTailCall: false
- localFrameSize: 0
- savePoint: []
- restorePoint: []
-fixedStack: []
-stack: []
-callSites: []
-debugValueSubstitutions: []
-constants: []
-machineFunctionInfo:
- amxProgModel: ManagedRA
-body: |
- bb.0.entry:
- liveins: $edi, $esi, $edx, $rax, $rbx
-
- ; CHECK-LABEL: name: test_tile_2rpntlvwz0
- ; CHECK: liveins: $edi, $esi, $edx, $rax, $rbx
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[AVX512_512_SET0_:%[0-9]+]]:vr512 = AVX512_512_SET0
- ; CHECK-NEXT: VMOVUPSZmr %stack.0, 1, $noreg, 0, $noreg, [[AVX512_512_SET0_]] :: (store (s512) into %stack.0, align 4)
- ; CHECK-NEXT: MOV8mi %stack.0, 1, $noreg, 0, $noreg, 1 :: (store (s512) into %stack.0, align 4)
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr32 = COPY $edx
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr32 = COPY $esi
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr32 = COPY $edi
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gr16 = COPY [[COPY]].sub_16bit
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gr16 = COPY [[COPY1]].sub_16bit
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:gr16 = COPY [[COPY2]].sub_16bit
- ; CHECK-NEXT: PLDTILECFGV %stack.0, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.0, align 4)
- ; CHECK-NEXT: [[COPY6:%[0-9]+]]:gr64 = COPY $rax
- ; CHECK-NEXT: [[MOV32ri64_:%[0-9]+]]:gr64_nosp = MOV32ri64 32
- ; CHECK-NEXT: [[PT2RPNTLVWZ0V:%[0-9]+]]:tilepair = PT2RPNTLVWZ0V [[COPY5]], [[COPY4]], [[COPY3]], killed [[COPY6]], 1, [[MOV32ri64_]], 0, $noreg
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:tile = COPY [[PT2RPNTLVWZ0V]].sub_t1
- ; CHECK-NEXT: [[COPY8:%[0-9]+]]:tile = COPY [[PT2RPNTLVWZ0V]].sub_t0
- ; CHECK-NEXT: [[PTILEZEROV:%[0-9]+]]:tile = PTILEZEROV [[COPY5]], [[COPY4]]
- ; CHECK-NEXT: [[PTDPBSSDV:%[0-9]+]]:tile = PTDPBSSDV [[COPY5]], [[COPY3]], [[COPY4]], [[PTILEZEROV]], killed [[COPY8]], killed [[COPY7]]
- ; CHECK-NEXT: [[COPY9:%[0-9]+]]:gr64 = COPY $rbx
- ; CHECK-NEXT: PTILESTOREDV [[COPY5]], [[COPY4]], killed [[COPY9]], 1, [[MOV32ri64_]], 0, $noreg, killed [[PTDPBSSDV]]
- ; CHECK-NEXT: RET 0
- %2:gr32 = COPY $edx
- %1:gr32 = COPY $esi
- %0:gr32 = COPY $edi
- %3:gr16 = COPY %2.sub_16bit
- %4:gr16 = COPY %1.sub_16bit
- %5:gr16 = COPY %0.sub_16bit
- %6:gr64 = COPY $rax
- %7:gr64_nosp = MOV32ri64 32
- %8:tilepair = PT2RPNTLVWZ0V %5, %4, %3, killed %6, 1, %7, 0, $noreg
- %9:tile = COPY %8.sub_t1
- %10:tile = COPY %8.sub_t0
- %11:tile = PTILEZEROV %5, %4
- %12:tile = PTDPBSSDV %5, %3, %4, %11, killed %10, killed %9
- %13:gr64 = COPY $rbx
- PTILESTOREDV %5, %4, killed %13, 1, %7, 0, $noreg, killed %12
- RET 0
-
-...
diff --git a/llvm/test/CodeGen/X86/amx_transpose_intrinsics.ll b/llvm/test/CodeGen/X86/amx_transpose_intrinsics.ll
deleted file mode 100644
index 4cfd97a..0000000
--- a/llvm/test/CodeGen/X86/amx_transpose_intrinsics.ll
+++ /dev/null
@@ -1,371 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+amx-bf16,+amx-fp16,+amx-complex,+amx-transpose | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+amx-bf16,+amx-fp16,+amx-complex,+amx-transpose,+egpr --show-mc-encoding | FileCheck %s --check-prefix=EGPR
-
-define void @test_amx(i32 %rv32, i64 %stride, i64 %rvalue, i8* %addr1, <4 x float> %xmm) #0 {
-; CHECK-LABEL: test_amx:
-; CHECK: # %bb.0:
-; CHECK-NEXT: t2rpntlvwz0 (%rcx,%rsi), %tmm0
-; CHECK-NEXT: t2rpntlvwz0t1 (%rcx,%rsi), %tmm2
-; CHECK-NEXT: t2rpntlvwz1 (%rcx,%rsi), %tmm0
-; CHECK-NEXT: t2rpntlvwz1t1 (%rcx,%rsi), %tmm2
-; CHECK-NEXT: ttransposed %tmm3, %tmm1
-; CHECK-NEXT: ttdpbf16ps %tmm3, %tmm2, %tmm1
-; CHECK-NEXT: ttdpfp16ps %tmm6, %tmm5, %tmm4
-; CHECK-NEXT: ttcmmimfp16ps %tmm3, %tmm2, %tmm1
-; CHECK-NEXT: ttcmmrlfp16ps %tmm3, %tmm2, %tmm1
-; CHECK-NEXT: tconjtcmmimfp16ps %tmm3, %tmm2, %tmm1
-; CHECK-NEXT: tconjtfp16 %tmm2, %tmm1
-; CHECK-NEXT: retq
-;
-; EGPR-LABEL: test_amx:
-; EGPR: # %bb.0:
-; EGPR-NEXT: t2rpntlvwz0 (%rcx,%rsi), %tmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x6e,0x04,0x31]
-; EGPR-NEXT: t2rpntlvwz0t1 (%rcx,%rsi), %tmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x6f,0x14,0x31]
-; EGPR-NEXT: t2rpntlvwz1 (%rcx,%rsi), %tmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x6e,0x04,0x31]
-; EGPR-NEXT: t2rpntlvwz1t1 (%rcx,%rsi), %tmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x6f,0x14,0x31]
-; EGPR-NEXT: ttransposed %tmm3, %tmm1 # encoding: [0xc4,0xe2,0x7a,0x5f,0xcb]
-; EGPR-NEXT: ttdpbf16ps %tmm3, %tmm2, %tmm1 # encoding: [0xc4,0xe2,0x62,0x6c,0xca]
-; EGPR-NEXT: ttdpfp16ps %tmm6, %tmm5, %tmm4 # encoding: [0xc4,0xe2,0x4b,0x6c,0xe5]
-; EGPR-NEXT: ttcmmimfp16ps %tmm3, %tmm2, %tmm1 # encoding: [0xc4,0xe2,0x63,0x6b,0xca]
-; EGPR-NEXT: ttcmmrlfp16ps %tmm3, %tmm2, %tmm1 # encoding: [0xc4,0xe2,0x62,0x6b,0xca]
-; EGPR-NEXT: tconjtcmmimfp16ps %tmm3, %tmm2, %tmm1 # encoding: [0xc4,0xe2,0x60,0x6b,0xca]
-; EGPR-NEXT: tconjtfp16 %tmm2, %tmm1 # encoding: [0xc4,0xe2,0x79,0x6b,0xca]
-; EGPR-NEXT: retq # encoding: [0xc3]
- call void @llvm.x86.t2rpntlvwz0(i8 1, i8* %addr1, i64 %stride)
- call void @llvm.x86.t2rpntlvwz0t1(i8 2, i8* %addr1, i64 %stride)
- call void @llvm.x86.t2rpntlvwz1(i8 1, i8* %addr1, i64 %stride)
- call void @llvm.x86.t2rpntlvwz1t1(i8 2, i8* %addr1, i64 %stride)
- call void @llvm.x86.ttransposed(i8 1, i8 3)
- call void @llvm.x86.ttdpbf16ps(i8 1, i8 2, i8 3)
- call void @llvm.x86.ttdpfp16ps(i8 4, i8 5, i8 6)
- call void @llvm.x86.ttcmmimfp16ps(i8 1, i8 2, i8 3)
- call void @llvm.x86.ttcmmrlfp16ps(i8 1, i8 2, i8 3)
- call void @llvm.x86.tconjtcmmimfp16ps(i8 1, i8 2, i8 3)
- call void @llvm.x86.tconjtfp16(i8 1, i8 2)
- ret void
-}
-
-declare void @llvm.x86.t2rpntlvwz0(i8 %tile1, i8* %addr1, i64 %stride)
-declare void @llvm.x86.t2rpntlvwz0t1(i8 %tile1, i8* %addr1, i64 %stride)
-declare void @llvm.x86.t2rpntlvwz1(i8 %tile1, i8* %addr1, i64 %stride)
-declare void @llvm.x86.t2rpntlvwz1t1(i8 %tile1, i8* %addr1, i64 %stride)
-declare void @llvm.x86.ttransposed(i8 %tile0, i8 %tile1)
-declare void @llvm.x86.ttdpbf16ps(i8 %tile0, i8 %tile1, i8 %tile2)
-declare void @llvm.x86.ttdpfp16ps(i8 %tile0, i8 %tile1, i8 %tile2)
-declare void @llvm.x86.ttcmmimfp16ps(i8 %A, i8 %B, i8 %C)
-declare void @llvm.x86.ttcmmrlfp16ps(i8 %A, i8 %B, i8 %C)
-declare void @llvm.x86.tconjtcmmimfp16ps(i8 %A, i8 %B, i8 %C)
-declare void @llvm.x86.tconjtfp16(i8 %A, i8 %B)
-
-define void @test_amx2(i8* %pointer, i8* %base, i64 %stride) #0 {
-; CHECK-LABEL: test_amx2:
-; CHECK: # %bb.0:
-; CHECK-NEXT: pushq %rbp
-; CHECK-NEXT: subq $2928, %rsp # imm = 0xB70
-; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0
-; CHECK-NEXT: vmovups %zmm0, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movb $1, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movb $8, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movw $8, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movb $8, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movw $8, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movb $8, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movw $8, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movb $8, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movw $8, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: ldtilecfg {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movw $8, %ax
-; CHECK-NEXT: tileloadd (%rsi,%rdx), %tmm0
-; CHECK-NEXT: tilezero %tmm1
-; CHECK-NEXT: tilezero %tmm2
-; CHECK-NEXT: ttdpbf16ps %tmm1, %tmm0, %tmm2
-; CHECK-NEXT: ttdpfp16ps %tmm1, %tmm0, %tmm2
-; CHECK-NEXT: ttcmmimfp16ps %tmm1, %tmm0, %tmm2
-; CHECK-NEXT: ttcmmrlfp16ps %tmm1, %tmm0, %tmm2
-; CHECK-NEXT: movabsq $64, %rbp
-; CHECK-NEXT: tilestored %tmm2, 896(%rsp,%rbp) # 1024-byte Folded Spill
-; CHECK-NEXT: tileloadd 896(%rsp,%rbp), %tmm3 # 1024-byte Folded Reload
-; CHECK-NEXT: tconjtcmmimfp16ps %tmm1, %tmm0, %tmm3
-; CHECK-NEXT: tconjtfp16 %tmm3, %tmm0
-; CHECK-NEXT: tilestored %tmm2, (%rdi,%rdx)
-; CHECK-NEXT: addq $2928, %rsp # imm = 0xB70
-; CHECK-NEXT: popq %rbp
-; CHECK-NEXT: tilerelease
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
-;
-; EGPR-LABEL: test_amx2:
-; EGPR: # %bb.0:
-; EGPR-NEXT: pushq %rbp # encoding: [0x55]
-; EGPR-NEXT: subq $2928, %rsp # encoding: [0x48,0x81,0xec,0x70,0x0b,0x00,0x00]
-; EGPR-NEXT: # imm = 0xB70
-; EGPR-NEXT: vxorps %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc0]
-; EGPR-NEXT: vmovups %zmm0, {{[0-9]+}}(%rsp) # encoding: [0x62,0xf1,0x7c,0x48,0x11,0x44,0x24,0x0d]
-; EGPR-NEXT: movb $1, {{[0-9]+}}(%rsp) # encoding: [0xc6,0x84,0x24,0x40,0x03,0x00,0x00,0x01]
-; EGPR-NEXT: movb $8, {{[0-9]+}}(%rsp) # encoding: [0xc6,0x84,0x24,0x70,0x03,0x00,0x00,0x08]
-; EGPR-NEXT: movw $8, {{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x84,0x24,0x50,0x03,0x00,0x00,0x08,0x00]
-; EGPR-NEXT: movb $8, {{[0-9]+}}(%rsp) # encoding: [0xc6,0x84,0x24,0x71,0x03,0x00,0x00,0x08]
-; EGPR-NEXT: movw $8, {{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x84,0x24,0x52,0x03,0x00,0x00,0x08,0x00]
-; EGPR-NEXT: movb $8, {{[0-9]+}}(%rsp) # encoding: [0xc6,0x84,0x24,0x72,0x03,0x00,0x00,0x08]
-; EGPR-NEXT: movw $8, {{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x84,0x24,0x54,0x03,0x00,0x00,0x08,0x00]
-; EGPR-NEXT: movb $8, {{[0-9]+}}(%rsp) # encoding: [0xc6,0x84,0x24,0x73,0x03,0x00,0x00,0x08]
-; EGPR-NEXT: movw $8, {{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x84,0x24,0x56,0x03,0x00,0x00,0x08,0x00]
-; EGPR-NEXT: ldtilecfg {{[0-9]+}}(%rsp) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x49,0x84,0x24,0x40,0x03,0x00,0x00]
-; EGPR-NEXT: movw $8, %ax # encoding: [0x66,0xb8,0x08,0x00]
-; EGPR-NEXT: tileloadd (%rsi,%rdx), %tmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7b,0x4b,0x04,0x16]
-; EGPR-NEXT: tilezero %tmm1 # encoding: [0xc4,0xe2,0x7b,0x49,0xc8]
-; EGPR-NEXT: tilezero %tmm2 # encoding: [0xc4,0xe2,0x7b,0x49,0xd0]
-; EGPR-NEXT: ttdpbf16ps %tmm1, %tmm0, %tmm2 # encoding: [0xc4,0xe2,0x72,0x6c,0xd0]
-; EGPR-NEXT: ttdpfp16ps %tmm1, %tmm0, %tmm2 # encoding: [0xc4,0xe2,0x73,0x6c,0xd0]
-; EGPR-NEXT: ttcmmimfp16ps %tmm1, %tmm0, %tmm2 # encoding: [0xc4,0xe2,0x73,0x6b,0xd0]
-; EGPR-NEXT: ttcmmrlfp16ps %tmm1, %tmm0, %tmm2 # encoding: [0xc4,0xe2,0x72,0x6b,0xd0]
-; EGPR-NEXT: movabsq $64, %rbp # encoding: [0x48,0xbd,0x40,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
-; EGPR-NEXT: tilestored %tmm2, 896(%rsp,%rbp) # 1024-byte Folded Spill
-; EGPR-NEXT: # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x94,0x2c,0x80,0x03,0x00,0x00]
-; EGPR-NEXT: tileloadd 896(%rsp,%rbp), %tmm3 # 1024-byte Folded Reload
-; EGPR-NEXT: # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7b,0x4b,0x9c,0x2c,0x80,0x03,0x00,0x00]
-; EGPR-NEXT: tconjtcmmimfp16ps %tmm1, %tmm0, %tmm3 # encoding: [0xc4,0xe2,0x70,0x6b,0xd8]
-; EGPR-NEXT: tconjtfp16 %tmm3, %tmm0 # encoding: [0xc4,0xe2,0x79,0x6b,0xc3]
-; EGPR-NEXT: tilestored %tmm2, (%rdi,%rdx) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x14,0x17]
-; EGPR-NEXT: addq $2928, %rsp # encoding: [0x48,0x81,0xc4,0x70,0x0b,0x00,0x00]
-; EGPR-NEXT: # imm = 0xB70
-; EGPR-NEXT: popq %rbp # encoding: [0x5d]
-; EGPR-NEXT: tilerelease # encoding: [0xc4,0xe2,0x78,0x49,0xc0]
-; EGPR-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
-; EGPR-NEXT: retq # encoding: [0xc3]
-
- %a = call x86_amx @llvm.x86.tileloadd64.internal(i16 8, i16 8, i8* %base, i64 %stride)
- %b = call x86_amx @llvm.x86.tilezero.internal(i16 8, i16 8)
- %c = call x86_amx @llvm.x86.tilezero.internal(i16 8, i16 8)
- %c1 = call x86_amx @llvm.x86.ttdpbf16ps.internal(i16 8, i16 8, i16 8, x86_amx %c, x86_amx %a, x86_amx %b)
- %c2 = call x86_amx @llvm.x86.ttdpfp16ps.internal(i16 8, i16 8, i16 8, x86_amx %c1, x86_amx %a, x86_amx %b)
- %c3 = call x86_amx @llvm.x86.ttcmmimfp16ps.internal(i16 8, i16 8, i16 8, x86_amx %c2, x86_amx %a, x86_amx %b)
- %c4 = call x86_amx @llvm.x86.ttcmmrlfp16ps.internal(i16 8, i16 8, i16 8, x86_amx %c3, x86_amx %a, x86_amx %b)
- %c5 = call x86_amx @llvm.x86.tconjtcmmimfp16ps.internal(i16 8, i16 8, i16 8, x86_amx %c4, x86_amx %a, x86_amx %b)
- %c6 = call x86_amx @llvm.x86.tconjtfp16.internal(i16 8, i16 8, x86_amx %c5)
-
- call void @llvm.x86.tilestored64.internal(i16 8, i16 8, i8* %pointer, i64 %stride, x86_amx %c4)
- ret void
-}
-
-define void @test_amx3(i8* %pointer, i8* %base, i64 %stride) #0 {
-; CHECK-LABEL: test_amx3:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0
-; CHECK-NEXT: vmovups %zmm0, -{{[0-9]+}}(%rsp)
-; CHECK-NEXT: movb $1, -{{[0-9]+}}(%rsp)
-; CHECK-NEXT: movb $8, -{{[0-9]+}}(%rsp)
-; CHECK-NEXT: movw $8, -{{[0-9]+}}(%rsp)
-; CHECK-NEXT: movb $8, -{{[0-9]+}}(%rsp)
-; CHECK-NEXT: movw $8, -{{[0-9]+}}(%rsp)
-; CHECK-NEXT: movb $0, -{{[0-9]+}}(%rsp)
-; CHECK-NEXT: movw $0, -{{[0-9]+}}(%rsp)
-; CHECK-NEXT: ldtilecfg -{{[0-9]+}}(%rsp)
-; CHECK-NEXT: xorl %eax, %eax
-; CHECK-NEXT: movw $8, %cx
-; CHECK-NEXT: t2rpntlvwz0 (%rsi,%rdx), %tmm4
-; CHECK-NEXT: t2rpntlvwz0t1 (%rsi,%rdx), %tmm4
-; CHECK-NEXT: t2rpntlvwz1 (%rsi,%rdx), %tmm4
-; CHECK-NEXT: t2rpntlvwz1t1 (%rsi,%rdx), %tmm4
-; CHECK-NEXT: ttransposed %tmm4, %tmm0
-; CHECK-NEXT: tilestored %tmm0, (%rdi,%rdx)
-; CHECK-NEXT: tilerelease
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
-;
-; EGPR-LABEL: test_amx3:
-; EGPR: # %bb.0:
-; EGPR-NEXT: vxorps %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc0]
-; EGPR-NEXT: vmovups %zmm0, -{{[0-9]+}}(%rsp) # encoding: [0x62,0xf1,0x7c,0x48,0x11,0x44,0x24,0xff]
-; EGPR-NEXT: movb $1, -{{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0xc0,0x01]
-; EGPR-NEXT: movb $8, -{{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0xf0,0x08]
-; EGPR-NEXT: movw $8, -{{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x44,0x24,0xd0,0x08,0x00]
-; EGPR-NEXT: movb $8, -{{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0xf4,0x08]
-; EGPR-NEXT: movw $8, -{{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x44,0x24,0xd8,0x08,0x00]
-; EGPR-NEXT: movb $0, -{{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0xf5,0x00]
-; EGPR-NEXT: movw $0, -{{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x44,0x24,0xda,0x00,0x00]
-; EGPR-NEXT: ldtilecfg -{{[0-9]+}}(%rsp) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x49,0x44,0x24,0xc0]
-; EGPR-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
-; EGPR-NEXT: movw $8, %cx # encoding: [0x66,0xb9,0x08,0x00]
-; EGPR-NEXT: t2rpntlvwz0 (%rsi,%rdx), %tmm4 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x6e,0x24,0x16]
-; EGPR-NEXT: t2rpntlvwz0t1 (%rsi,%rdx), %tmm4 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x6f,0x24,0x16]
-; EGPR-NEXT: t2rpntlvwz1 (%rsi,%rdx), %tmm4 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x6e,0x24,0x16]
-; EGPR-NEXT: t2rpntlvwz1t1 (%rsi,%rdx), %tmm4 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x6f,0x24,0x16]
-; EGPR-NEXT: ttransposed %tmm4, %tmm0 # encoding: [0xc4,0xe2,0x7a,0x5f,0xc4]
-; EGPR-NEXT: tilestored %tmm0, (%rdi,%rdx) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x04,0x17]
-; EGPR-NEXT: tilerelease # encoding: [0xc4,0xe2,0x78,0x49,0xc0]
-; EGPR-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
-; EGPR-NEXT: retq # encoding: [0xc3]
- %1 = call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz0.internal(i16 8, i16 8, i16 0, i8* %base, i64 %stride)
- %2 = call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz0t1.internal(i16 8, i16 8, i16 0, i8* %base, i64 %stride)
- %3 = call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz1.internal(i16 8, i16 8, i16 0, i8* %base, i64 %stride)
- %4 = call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz1t1.internal(i16 8, i16 8, i16 0, i8* %base, i64 %stride)
- %5 = extractvalue { x86_amx, x86_amx } %4, 0
- %6 = call x86_amx @llvm.x86.ttransposed.internal(i16 8, i16 8, x86_amx %5)
- call void @llvm.x86.tilestored64.internal(i16 8, i16 8, i8* %pointer, i64 %stride, x86_amx %6)
- ret void
-}
-
-define void @test_amx_spill(i8* %pointer, i8* %base, i64 %stride) #0 {
-; CHECK-LABEL: test_amx_spill:
-; CHECK: # %bb.0:
-; CHECK-NEXT: subq $6088, %rsp # imm = 0x17C8
-; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0
-; CHECK-NEXT: vmovups %zmm0, -{{[0-9]+}}(%rsp)
-; CHECK-NEXT: movb $1, -{{[0-9]+}}(%rsp)
-; CHECK-NEXT: movb $8, -{{[0-9]+}}(%rsp)
-; CHECK-NEXT: movw $8, -{{[0-9]+}}(%rsp)
-; CHECK-NEXT: movb $8, -{{[0-9]+}}(%rsp)
-; CHECK-NEXT: movw $8, -{{[0-9]+}}(%rsp)
-; CHECK-NEXT: movb $8, -{{[0-9]+}}(%rsp)
-; CHECK-NEXT: movw $8, -{{[0-9]+}}(%rsp)
-; CHECK-NEXT: movb $8, -{{[0-9]+}}(%rsp)
-; CHECK-NEXT: movw $8, -{{[0-9]+}}(%rsp)
-; CHECK-NEXT: movb $8, -{{[0-9]+}}(%rsp)
-; CHECK-NEXT: movw $8, -{{[0-9]+}}(%rsp)
-; CHECK-NEXT: ldtilecfg -{{[0-9]+}}(%rsp)
-; CHECK-NEXT: movw $8, %ax
-; CHECK-NEXT: tileloadd (%rsi,%rdx), %tmm0
-; CHECK-NEXT: t2rpntlvwz0 (%rsi,%rdx), %tmm4
-; CHECK-NEXT: t2rpntlvwz0t1 (%rsi,%rdx), %tmm6
-; CHECK-NEXT: movabsq $64, %rcx
-; CHECK-NEXT: tilestored %tmm6, 4032(%rsp,%rcx) # 1024-byte Folded Spill
-; CHECK-NEXT: tilestored %tmm7, 5056(%rsp,%rcx) # 1024-byte Folded Spill
-; CHECK-NEXT: t2rpntlvwz1 (%rsi,%rdx), %tmm6
-; CHECK-NEXT: tilestored %tmm6, 1984(%rsp,%rcx) # 1024-byte Folded Spill
-; CHECK-NEXT: tilestored %tmm7, 3008(%rsp,%rcx) # 1024-byte Folded Spill
-; CHECK-NEXT: t2rpntlvwz1t1 (%rsi,%rdx), %tmm6
-; CHECK-NEXT: tilestored %tmm6, -64(%rsp,%rcx) # 1024-byte Folded Spill
-; CHECK-NEXT: tilestored %tmm7, 960(%rsp,%rcx) # 1024-byte Folded Spill
-; CHECK-NEXT: t2rpntlvwz0 (%rsi,%rdx), %tmm6
-; CHECK-NEXT: tilestored %tmm4, (%rsi,%rdx)
-; CHECK-NEXT: tilestored %tmm5, (%rsi,%rdx)
-; CHECK-NEXT: tileloadd 4032(%rsp,%rcx), %tmm4 # 1024-byte Folded Reload
-; CHECK-NEXT: tileloadd 5056(%rsp,%rcx), %tmm5 # 1024-byte Folded Reload
-; CHECK-NEXT: tilestored %tmm4, (%rsi,%rdx)
-; CHECK-NEXT: tilestored %tmm5, (%rsi,%rdx)
-; CHECK-NEXT: tileloadd 1984(%rsp,%rcx), %tmm4 # 1024-byte Folded Reload
-; CHECK-NEXT: tileloadd 3008(%rsp,%rcx), %tmm5 # 1024-byte Folded Reload
-; CHECK-NEXT: tilestored %tmm4, (%rsi,%rdx)
-; CHECK-NEXT: tilestored %tmm5, (%rsi,%rdx)
-; CHECK-NEXT: tileloadd -64(%rsp,%rcx), %tmm4 # 1024-byte Folded Reload
-; CHECK-NEXT: tileloadd 960(%rsp,%rcx), %tmm5 # 1024-byte Folded Reload
-; CHECK-NEXT: tilestored %tmm4, (%rsi,%rdx)
-; CHECK-NEXT: tilestored %tmm5, (%rsi,%rdx)
-; CHECK-NEXT: tilestored %tmm6, (%rsi,%rdx)
-; CHECK-NEXT: tilestored %tmm7, (%rsi,%rdx)
-; CHECK-NEXT: addq $6088, %rsp # imm = 0x17C8
-; CHECK-NEXT: tilerelease
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
-;
-; EGPR-LABEL: test_amx_spill:
-; EGPR: # %bb.0:
-; EGPR-NEXT: subq $6088, %rsp # encoding: [0x48,0x81,0xec,0xc8,0x17,0x00,0x00]
-; EGPR-NEXT: # imm = 0x17C8
-; EGPR-NEXT: vxorps %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc0]
-; EGPR-NEXT: vmovups %zmm0, -{{[0-9]+}}(%rsp) # encoding: [0x62,0xf1,0x7c,0x48,0x11,0x44,0x24,0xfe]
-; EGPR-NEXT: movb $1, -{{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0x80,0x01]
-; EGPR-NEXT: movb $8, -{{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0xb0,0x08]
-; EGPR-NEXT: movw $8, -{{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x44,0x24,0x90,0x08,0x00]
-; EGPR-NEXT: movb $8, -{{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0xb4,0x08]
-; EGPR-NEXT: movw $8, -{{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x44,0x24,0x98,0x08,0x00]
-; EGPR-NEXT: movb $8, -{{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0xb5,0x08]
-; EGPR-NEXT: movw $8, -{{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x44,0x24,0x9a,0x08,0x00]
-; EGPR-NEXT: movb $8, -{{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0xb6,0x08]
-; EGPR-NEXT: movw $8, -{{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x44,0x24,0x9c,0x08,0x00]
-; EGPR-NEXT: movb $8, -{{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0xb7,0x08]
-; EGPR-NEXT: movw $8, -{{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x44,0x24,0x9e,0x08,0x00]
-; EGPR-NEXT: ldtilecfg -{{[0-9]+}}(%rsp) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x49,0x44,0x24,0x80]
-; EGPR-NEXT: movw $8, %ax # encoding: [0x66,0xb8,0x08,0x00]
-; EGPR-NEXT: tileloadd (%rsi,%rdx), %tmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7b,0x4b,0x04,0x16]
-; EGPR-NEXT: t2rpntlvwz0 (%rsi,%rdx), %tmm4 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x6e,0x24,0x16]
-; EGPR-NEXT: t2rpntlvwz0t1 (%rsi,%rdx), %tmm6 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x6f,0x34,0x16]
-; EGPR-NEXT: movabsq $64, %rcx # encoding: [0x48,0xb9,0x40,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
-; EGPR-NEXT: tilestored %tmm6, 4032(%rsp,%rcx) # 1024-byte Folded Spill
-; EGPR-NEXT: # encoding: [0xc4,0xe2,0x7a,0x4b,0xb4,0x0c,0xc0,0x0f,0x00,0x00]
-; EGPR-NEXT: tilestored %tmm7, 5056(%rsp,%rcx) # 1024-byte Folded Spill
-; EGPR-NEXT: # encoding: [0xc4,0xe2,0x7a,0x4b,0xbc,0x0c,0xc0,0x13,0x00,0x00]
-; EGPR-NEXT: t2rpntlvwz1 (%rsi,%rdx), %tmm6 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x6e,0x34,0x16]
-; EGPR-NEXT: tilestored %tmm6, 1984(%rsp,%rcx) # 1024-byte Folded Spill
-; EGPR-NEXT: # encoding: [0xc4,0xe2,0x7a,0x4b,0xb4,0x0c,0xc0,0x07,0x00,0x00]
-; EGPR-NEXT: tilestored %tmm7, 3008(%rsp,%rcx) # 1024-byte Folded Spill
-; EGPR-NEXT: # encoding: [0xc4,0xe2,0x7a,0x4b,0xbc,0x0c,0xc0,0x0b,0x00,0x00]
-; EGPR-NEXT: t2rpntlvwz1t1 (%rsi,%rdx), %tmm6 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x6f,0x34,0x16]
-; EGPR-NEXT: tilestored %tmm6, -64(%rsp,%rcx) # 1024-byte Folded Spill
-; EGPR-NEXT: # encoding: [0xc4,0xe2,0x7a,0x4b,0x74,0x0c,0xc0]
-; EGPR-NEXT: tilestored %tmm7, 960(%rsp,%rcx) # 1024-byte Folded Spill
-; EGPR-NEXT: # encoding: [0xc4,0xe2,0x7a,0x4b,0xbc,0x0c,0xc0,0x03,0x00,0x00]
-; EGPR-NEXT: t2rpntlvwz0 (%rsi,%rdx), %tmm6 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x6e,0x34,0x16]
-; EGPR-NEXT: tilestored %tmm4, (%rsi,%rdx) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x24,0x16]
-; EGPR-NEXT: tilestored %tmm5, (%rsi,%rdx) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x2c,0x16]
-; EGPR-NEXT: tileloadd 4032(%rsp,%rcx), %tmm4 # 1024-byte Folded Reload
-; EGPR-NEXT: # encoding: [0xc4,0xe2,0x7b,0x4b,0xa4,0x0c,0xc0,0x0f,0x00,0x00]
-; EGPR-NEXT: tileloadd 5056(%rsp,%rcx), %tmm5 # 1024-byte Folded Reload
-; EGPR-NEXT: # encoding: [0xc4,0xe2,0x7b,0x4b,0xac,0x0c,0xc0,0x13,0x00,0x00]
-; EGPR-NEXT: tilestored %tmm4, (%rsi,%rdx) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x24,0x16]
-; EGPR-NEXT: tilestored %tmm5, (%rsi,%rdx) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x2c,0x16]
-; EGPR-NEXT: tileloadd 1984(%rsp,%rcx), %tmm4 # 1024-byte Folded Reload
-; EGPR-NEXT: # encoding: [0xc4,0xe2,0x7b,0x4b,0xa4,0x0c,0xc0,0x07,0x00,0x00]
-; EGPR-NEXT: tileloadd 3008(%rsp,%rcx), %tmm5 # 1024-byte Folded Reload
-; EGPR-NEXT: # encoding: [0xc4,0xe2,0x7b,0x4b,0xac,0x0c,0xc0,0x0b,0x00,0x00]
-; EGPR-NEXT: tilestored %tmm4, (%rsi,%rdx) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x24,0x16]
-; EGPR-NEXT: tilestored %tmm5, (%rsi,%rdx) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x2c,0x16]
-; EGPR-NEXT: tileloadd -64(%rsp,%rcx), %tmm4 # 1024-byte Folded Reload
-; EGPR-NEXT: # encoding: [0xc4,0xe2,0x7b,0x4b,0x64,0x0c,0xc0]
-; EGPR-NEXT: tileloadd 960(%rsp,%rcx), %tmm5 # 1024-byte Folded Reload
-; EGPR-NEXT: # encoding: [0xc4,0xe2,0x7b,0x4b,0xac,0x0c,0xc0,0x03,0x00,0x00]
-; EGPR-NEXT: tilestored %tmm4, (%rsi,%rdx) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x24,0x16]
-; EGPR-NEXT: tilestored %tmm5, (%rsi,%rdx) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x2c,0x16]
-; EGPR-NEXT: tilestored %tmm6, (%rsi,%rdx) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x34,0x16]
-; EGPR-NEXT: tilestored %tmm7, (%rsi,%rdx) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x3c,0x16]
-; EGPR-NEXT: addq $6088, %rsp # encoding: [0x48,0x81,0xc4,0xc8,0x17,0x00,0x00]
-; EGPR-NEXT: # imm = 0x17C8
-; EGPR-NEXT: tilerelease # encoding: [0xc4,0xe2,0x78,0x49,0xc0]
-; EGPR-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
-; EGPR-NEXT: retq # encoding: [0xc3]
- %a = call x86_amx @llvm.x86.tileloadd64.internal(i16 8, i16 8, i8* %base, i64 %stride)
- %b1 = call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz0.internal(i16 8, i16 8, i16 8, i8* %base, i64 %stride)
- %b2 = call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz0t1.internal(i16 8, i16 8, i16 8, i8* %base, i64 %stride)
- %b3 = call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz1.internal(i16 8, i16 8, i16 8, i8* %base, i64 %stride)
- %b4 = call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz1t1.internal(i16 8, i16 8, i16 8, i8* %base, i64 %stride)
- %b5 = call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz0.internal(i16 8, i16 8, i16 8, i8* %base, i64 %stride)
- %e11 = extractvalue { x86_amx, x86_amx } %b1, 0
- %e12 = extractvalue { x86_amx, x86_amx } %b1, 1
- %e21 = extractvalue { x86_amx, x86_amx } %b2, 0
- %e22 = extractvalue { x86_amx, x86_amx } %b2, 1
- %e31 = extractvalue { x86_amx, x86_amx } %b3, 0
- %e32 = extractvalue { x86_amx, x86_amx } %b3, 1
- %e41 = extractvalue { x86_amx, x86_amx } %b4, 0
- %e42 = extractvalue { x86_amx, x86_amx } %b4, 1
- %e51 = extractvalue { x86_amx, x86_amx } %b5, 0
- %e52 = extractvalue { x86_amx, x86_amx } %b5, 1
- call void @llvm.x86.tilestored64.internal(i16 8, i16 8, i8* %base, i64 %stride, x86_amx %e11)
- call void @llvm.x86.tilestored64.internal(i16 8, i16 8, i8* %base, i64 %stride, x86_amx %e12)
- call void @llvm.x86.tilestored64.internal(i16 8, i16 8, i8* %base, i64 %stride, x86_amx %e21)
- call void @llvm.x86.tilestored64.internal(i16 8, i16 8, i8* %base, i64 %stride, x86_amx %e22)
- call void @llvm.x86.tilestored64.internal(i16 8, i16 8, i8* %base, i64 %stride, x86_amx %e31)
- call void @llvm.x86.tilestored64.internal(i16 8, i16 8, i8* %base, i64 %stride, x86_amx %e32)
- call void @llvm.x86.tilestored64.internal(i16 8, i16 8, i8* %base, i64 %stride, x86_amx %e41)
- call void @llvm.x86.tilestored64.internal(i16 8, i16 8, i8* %base, i64 %stride, x86_amx %e42)
- call void @llvm.x86.tilestored64.internal(i16 8, i16 8, i8* %base, i64 %stride, x86_amx %e51)
- call void @llvm.x86.tilestored64.internal(i16 8, i16 8, i8* %base, i64 %stride, x86_amx %e52)
- ret void
-}
-
-declare x86_amx @llvm.x86.tileloadd64.internal(i16, i16, i8*, i64)
-declare void @llvm.x86.tilestored64.internal(i16, i16, i8*, i64, x86_amx)
-declare { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz0.internal(i16, i16, i16, i8*, i64)
-declare { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz0t1.internal(i16, i16, i16, i8*, i64)
-declare { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz1.internal(i16, i16, i16, i8*, i64)
-declare { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz1t1.internal(i16, i16, i16, i8*, i64)
-declare x86_amx @llvm.x86.ttransposed.internal(i16, i16, x86_amx)
-declare x86_amx @llvm.x86.ttdpbf16ps.internal(i16, i16, i16, x86_amx, x86_amx, x86_amx)
-declare x86_amx @llvm.x86.ttdpfp16ps.internal(i16, i16, i16, x86_amx, x86_amx, x86_amx)
-declare x86_amx @llvm.x86.ttcmmimfp16ps.internal(i16, i16, i16, x86_amx, x86_amx, x86_amx)
-declare x86_amx @llvm.x86.ttcmmrlfp16ps.internal(i16, i16, i16, x86_amx, x86_amx, x86_amx)
-declare x86_amx @llvm.x86.tconjtcmmimfp16ps.internal(i16, i16, i16, x86_amx, x86_amx, x86_amx)
-declare x86_amx @llvm.x86.tconjtfp16.internal(i16, i16, x86_amx)
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/X86/basic-block-sections-bb-hash.ll b/llvm/test/CodeGen/X86/basic-block-sections-bb-hash.ll
new file mode 100644
index 0000000..293b48d
--- /dev/null
+++ b/llvm/test/CodeGen/X86/basic-block-sections-bb-hash.ll
@@ -0,0 +1,39 @@
+;; BB section test with basic block hashes.
+
+;; basic block sections Profile with bb hashes
+; RUN: echo 'v1' > %t
+; RUN: echo 'f foo' >> %t
+; RUN: echo 'g 0:10,1:9,2:1 1:8,3:8 2:2,3:2 3:11' >> %t
+; RUN: echo 'c 0 2 3' >> %t
+; RUN: echo 'h 0:64863A11B5CA0000 1:54F1E80D6B270006 2:54F1F4E66B270008 3:C8BC6041A2CB0009' >> %t
+; RUN: llc < %s -O0 -mtriple=x86_64-pc-linux -function-sections -basic-block-sections=%t | FileCheck %s
+;
+define void @foo(i1 zeroext) nounwind {
+ %2 = alloca i8, align 1
+ %3 = zext i1 %0 to i8
+ store i8 %3, ptr %2, align 1
+ %4 = load i8, ptr %2, align 1
+ %5 = trunc i8 %4 to i1
+ br i1 %5, label %6, label %8
+
+6: ; preds = %1
+ %7 = call i32 @bar()
+ br label %10
+
+8: ; preds = %1
+ %9 = call i32 @baz()
+ br label %10
+
+10: ; preds = %8, %6
+ ret void
+}
+
+declare i32 @bar() #1
+
+declare i32 @baz() #1
+
+; CHECK: .section .text.foo,"ax",@progbits
+; CHECK: callq baz
+; CHECK: retq
+; CHECK: .section .text.split.foo,"ax",@progbits
+; CHECK: callq bar
diff --git a/llvm/test/CodeGen/X86/basic-block-sections-clusters-error.ll b/llvm/test/CodeGen/X86/basic-block-sections-clusters-error.ll
index 751ab76..eb0a14b 100644
--- a/llvm/test/CodeGen/X86/basic-block-sections-clusters-error.ll
+++ b/llvm/test/CodeGen/X86/basic-block-sections-clusters-error.ll
@@ -69,6 +69,20 @@
; RUN: echo 'g 0:4,1:2:3' >> %t15
; RUN: not --crash llc < %s -O0 -mtriple=x86_64 -function-sections -basic-block-sections=%t15 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR15
; CHECK-ERROR15: LLVM ERROR: invalid profile {{.*}} at line 4: unsigned integer expected: '2:3'
+; RUN: echo 'v1' > %t16
+; RUN: echo 'f dummy1' >> %t16
+; RUN: echo 'c 0 1' >> %t16
+; RUN: echo 'g 0:4,1:2' >> %t16
+; RUN: echo 'h a:1111111111111111 1:ffffffffffffffff' >> %t16
+; RUN: not --crash llc < %s -O0 -mtriple=x86_64 -function-sections -basic-block-sections=%t16 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR16
+; CHECK-ERROR16: LLVM ERROR: invalid profile {{.*}} at line 5: unsigned integer expected: 'a'
+; RUN: echo 'v1' > %t17
+; RUN: echo 'f dummy1' >> %t17
+; RUN: echo 'c 0 1' >> %t17
+; RUN: echo 'g 0:4,1:2' >> %t17
+; RUN: echo 'h 0:111111111111111g 1:ffffffffffffffff' >> %t17
+; RUN: not --crash llc < %s -O0 -mtriple=x86_64 -function-sections -basic-block-sections=%t17 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR17
+; CHECK-ERROR17: LLVM ERROR: invalid profile {{.*}} at line 5: unsigned integer expected in hex format: '111111111111111g'
define i32 @dummy1(i32 %x, i32 %y, i32 %z) {
diff --git a/llvm/test/CodeGen/X86/bittest-big-integer.ll b/llvm/test/CodeGen/X86/bittest-big-integer.ll
index 19d751d1..8007d9d 100644
--- a/llvm/test/CodeGen/X86/bittest-big-integer.ll
+++ b/llvm/test/CodeGen/X86/bittest-big-integer.ll
@@ -7025,3 +7025,279 @@ define i1 @test_ne_i4096(ptr %word, i32 %position) nounwind {
%cmp = icmp ne i4096 %test, 0
ret i1 %cmp
}
+
+; Special Cases
+
+; Multiple uses of the stored value
+define i1 @complement_cmpz_i128(ptr %word, i32 %position) nounwind {
+; X86-LABEL: complement_cmpz_i128:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $64, %esp
+; X86-NEXT: movzbl 12(%ebp), %ecx
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $1, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: shrb $3, %al
+; X86-NEXT: andb $12, %al
+; X86-NEXT: negb %al
+; X86-NEXT: movsbl %al, %esi
+; X86-NEXT: movl 36(%esp,%esi), %eax
+; X86-NEXT: movl 40(%esp,%esi), %edi
+; X86-NEXT: movl %edi, %edx
+; X86-NEXT: shldl %cl, %eax, %edx
+; X86-NEXT: movl 32(%esp,%esi), %ebx
+; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 44(%esp,%esi), %esi
+; X86-NEXT: shldl %cl, %edi, %esi
+; X86-NEXT: movl %ebx, %edi
+; X86-NEXT: shll %cl, %edi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT: shldl %cl, %ebx, %eax
+; X86-NEXT: movl 8(%ebp), %ecx
+; X86-NEXT: xorl 12(%ecx), %esi
+; X86-NEXT: xorl 8(%ecx), %edx
+; X86-NEXT: xorl 4(%ecx), %eax
+; X86-NEXT: xorl (%ecx), %edi
+; X86-NEXT: movl %edx, 8(%ecx)
+; X86-NEXT: movl %esi, 12(%ecx)
+; X86-NEXT: movl %edi, (%ecx)
+; X86-NEXT: movl %eax, 4(%ecx)
+; X86-NEXT: orl %esi, %eax
+; X86-NEXT: orl %edx, %edi
+; X86-NEXT: orl %eax, %edi
+; X86-NEXT: setne %al
+; X86-NEXT: leal -12(%ebp), %esp
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
+; X86-NEXT: retl
+;
+; SSE-LABEL: complement_cmpz_i128:
+; SSE: # %bb.0:
+; SSE-NEXT: movl %esi, %ecx
+; SSE-NEXT: movl $1, %eax
+; SSE-NEXT: xorl %edx, %edx
+; SSE-NEXT: shldq %cl, %rax, %rdx
+; SSE-NEXT: shlq %cl, %rax
+; SSE-NEXT: xorl %esi, %esi
+; SSE-NEXT: testb $64, %cl
+; SSE-NEXT: cmovneq %rax, %rdx
+; SSE-NEXT: cmovneq %rsi, %rax
+; SSE-NEXT: xorq 8(%rdi), %rdx
+; SSE-NEXT: xorq (%rdi), %rax
+; SSE-NEXT: movq %rax, (%rdi)
+; SSE-NEXT: movq %rdx, 8(%rdi)
+; SSE-NEXT: orq %rdx, %rax
+; SSE-NEXT: setne %al
+; SSE-NEXT: retq
+;
+; AVX2-LABEL: complement_cmpz_i128:
+; AVX2: # %bb.0:
+; AVX2-NEXT: movl %esi, %ecx
+; AVX2-NEXT: movl $1, %eax
+; AVX2-NEXT: xorl %edx, %edx
+; AVX2-NEXT: shldq %cl, %rax, %rdx
+; AVX2-NEXT: xorl %esi, %esi
+; AVX2-NEXT: shlxq %rcx, %rax, %rax
+; AVX2-NEXT: testb $64, %cl
+; AVX2-NEXT: cmovneq %rax, %rdx
+; AVX2-NEXT: cmovneq %rsi, %rax
+; AVX2-NEXT: xorq 8(%rdi), %rdx
+; AVX2-NEXT: xorq (%rdi), %rax
+; AVX2-NEXT: movq %rax, (%rdi)
+; AVX2-NEXT: movq %rdx, 8(%rdi)
+; AVX2-NEXT: orq %rdx, %rax
+; AVX2-NEXT: setne %al
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: complement_cmpz_i128:
+; AVX512: # %bb.0:
+; AVX512-NEXT: movl %esi, %ecx
+; AVX512-NEXT: xorl %eax, %eax
+; AVX512-NEXT: movl $1, %edx
+; AVX512-NEXT: xorl %esi, %esi
+; AVX512-NEXT: shldq %cl, %rdx, %rsi
+; AVX512-NEXT: shlxq %rcx, %rdx, %rdx
+; AVX512-NEXT: testb $64, %cl
+; AVX512-NEXT: cmovneq %rdx, %rsi
+; AVX512-NEXT: cmovneq %rax, %rdx
+; AVX512-NEXT: xorq 8(%rdi), %rsi
+; AVX512-NEXT: xorq (%rdi), %rdx
+; AVX512-NEXT: movq %rdx, (%rdi)
+; AVX512-NEXT: movq %rsi, 8(%rdi)
+; AVX512-NEXT: orq %rsi, %rdx
+; AVX512-NEXT: setne %al
+; AVX512-NEXT: retq
+ %rem = and i32 %position, 127
+ %ofs = zext nneg i32 %rem to i128
+ %bit = shl nuw i128 1, %ofs
+ %ld = load i128, ptr %word
+ %res = xor i128 %ld, %bit
+ store i128 %res, ptr %word
+ %cmp = icmp ne i128 %res, 0
+ ret i1 %cmp
+}
+
+; Multiple loads in store chain
+define i32 @reset_multiload_i128(ptr %word, i32 %position, ptr %p) nounwind {
+; X86-LABEL: reset_multiload_i128:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $80, %esp
+; X86-NEXT: movzbl 12(%ebp), %ecx
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $1, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: shrb $3, %al
+; X86-NEXT: andb $12, %al
+; X86-NEXT: negb %al
+; X86-NEXT: movsbl %al, %eax
+; X86-NEXT: movl 56(%esp,%eax), %esi
+; X86-NEXT: movl 60(%esp,%eax), %edx
+; X86-NEXT: shldl %cl, %esi, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 48(%esp,%eax), %edi
+; X86-NEXT: movl 52(%esp,%eax), %eax
+; X86-NEXT: shldl %cl, %eax, %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shldl %cl, %edi, %eax
+; X86-NEXT: movl 8(%ebp), %ebx
+; X86-NEXT: shll %cl, %edi
+; X86-NEXT: movl 8(%ebx), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: andl %esi, %ecx
+; X86-NEXT: movl (%ebx), %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: andl %edi, %esi
+; X86-NEXT: orl %ecx, %esi
+; X86-NEXT: movl 12(%ebx), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT: movl %eax, %edx
+; X86-NEXT: movl 4(%ebx), %ebx
+; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: andl %ebx, %eax
+; X86-NEXT: orl %ecx, %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: notl %ecx
+; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT: notl %ebx
+; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-NEXT: notl %edx
+; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT: notl %edi
+; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT: orl %esi, %eax
+; X86-NEXT: movl 16(%ebp), %eax
+; X86-NEXT: movl (%eax), %eax
+; X86-NEXT: movl 8(%ebp), %esi
+; X86-NEXT: movl %ebx, 8(%esi)
+; X86-NEXT: movl %ecx, 12(%esi)
+; X86-NEXT: movl %edi, (%esi)
+; X86-NEXT: movl %edx, 4(%esi)
+; X86-NEXT: je .LBB22_2
+; X86-NEXT: # %bb.1:
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: .LBB22_2:
+; X86-NEXT: leal -12(%ebp), %esp
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
+; X86-NEXT: retl
+;
+; SSE-LABEL: reset_multiload_i128:
+; SSE: # %bb.0:
+; SSE-NEXT: movl %esi, %ecx
+; SSE-NEXT: movl $1, %esi
+; SSE-NEXT: xorl %r8d, %r8d
+; SSE-NEXT: shldq %cl, %rsi, %r8
+; SSE-NEXT: xorl %eax, %eax
+; SSE-NEXT: shlq %cl, %rsi
+; SSE-NEXT: testb $64, %cl
+; SSE-NEXT: cmovneq %rsi, %r8
+; SSE-NEXT: cmovneq %rax, %rsi
+; SSE-NEXT: movq (%rdi), %rcx
+; SSE-NEXT: movq 8(%rdi), %r9
+; SSE-NEXT: movq %r9, %r10
+; SSE-NEXT: andq %r8, %r10
+; SSE-NEXT: notq %r8
+; SSE-NEXT: movq %rcx, %r11
+; SSE-NEXT: andq %rsi, %r11
+; SSE-NEXT: notq %rsi
+; SSE-NEXT: andq %r9, %r8
+; SSE-NEXT: andq %rcx, %rsi
+; SSE-NEXT: orq %r10, %r11
+; SSE-NEXT: jne .LBB22_2
+; SSE-NEXT: # %bb.1:
+; SSE-NEXT: movl (%rdx), %eax
+; SSE-NEXT: .LBB22_2:
+; SSE-NEXT: movq %rsi, (%rdi)
+; SSE-NEXT: movq %r8, 8(%rdi)
+; SSE-NEXT: # kill: def $eax killed $eax killed $rax
+; SSE-NEXT: retq
+;
+; AVX-LABEL: reset_multiload_i128:
+; AVX: # %bb.0:
+; AVX-NEXT: movl %esi, %ecx
+; AVX-NEXT: movl $1, %esi
+; AVX-NEXT: xorl %r8d, %r8d
+; AVX-NEXT: shldq %cl, %rsi, %r8
+; AVX-NEXT: xorl %eax, %eax
+; AVX-NEXT: shlxq %rcx, %rsi, %r9
+; AVX-NEXT: testb $64, %cl
+; AVX-NEXT: cmovneq %r9, %r8
+; AVX-NEXT: cmovneq %rax, %r9
+; AVX-NEXT: movq (%rdi), %r10
+; AVX-NEXT: movq 8(%rdi), %r11
+; AVX-NEXT: andnq %r11, %r8, %rcx
+; AVX-NEXT: andq %r8, %r11
+; AVX-NEXT: andnq %r10, %r9, %rsi
+; AVX-NEXT: andq %r9, %r10
+; AVX-NEXT: orq %r11, %r10
+; AVX-NEXT: jne .LBB22_2
+; AVX-NEXT: # %bb.1:
+; AVX-NEXT: movl (%rdx), %eax
+; AVX-NEXT: .LBB22_2:
+; AVX-NEXT: movq %rsi, (%rdi)
+; AVX-NEXT: movq %rcx, 8(%rdi)
+; AVX-NEXT: # kill: def $eax killed $eax killed $rax
+; AVX-NEXT: retq
+ %rem = and i32 %position, 127
+ %ofs = zext nneg i32 %rem to i128
+ %bit = shl nuw i128 1, %ofs
+ %mask = xor i128 %bit, -1
+ %ld = load i128, ptr %word
+ %sel = load i32, ptr %p
+ %test = and i128 %ld, %bit
+ %res = and i128 %ld, %mask
+ %cmp = icmp eq i128 %test, 0
+ store i128 %res, ptr %word
+ %ret = select i1 %cmp, i32 %sel, i32 0
+ ret i32 %ret
+}
diff --git a/llvm/test/CodeGen/X86/ipra-reg-usage.ll b/llvm/test/CodeGen/X86/ipra-reg-usage.ll
index e73ff79..f270f8f 100644
--- a/llvm/test/CodeGen/X86/ipra-reg-usage.ll
+++ b/llvm/test/CodeGen/X86/ipra-reg-usage.ll
@@ -7,7 +7,7 @@
target triple = "x86_64-unknown-unknown"
declare void @bar1()
define preserve_allcc void @foo()#0 {
-; CHECK: foo Clobbered Registers: $cs $df $ds $eflags $eip $eiz $es $esp $fpcw $fpsw $fs $fs_base $gs $gs_base $hip $hsp $ip $mxcsr $rflags $rip $riz $rsp $sp $sph $spl $ss $ssp $_eflags $cr0 $cr1 $cr2 $cr3 $cr4 $cr5 $cr6 $cr7 $cr8 $cr9 $cr10 $cr11 $cr12 $cr13 $cr14 $cr15 $dr0 $dr1 $dr2 $dr3 $dr4 $dr5 $dr6 $dr7 $dr8 $dr9 $dr10 $dr11 $dr12 $dr13 $dr14 $dr15 $fp0 $fp1 $fp2 $fp3 $fp4 $fp5 $fp6 $fp7 $mm0 $mm1 $mm2 $mm3 $mm4 $mm5 $mm6 $mm7 $r11 $st0 $st1 $st2 $st3 $st4 $st5 $st6 $st7 $r11b $r11bh $r11d $r11w $r11wh $ymm0 $ymm1 $ymm2 $ymm3 $ymm4 $ymm5 $ymm6 $ymm7 $ymm8 $ymm9 $ymm10 $ymm11 $ymm12 $ymm13 $ymm14 $ymm15 $k0 $k1 $k2 $k3 $k4 $k5 $k6 $k7 $xmm16 $xmm17 $xmm18 $xmm19 $xmm20 $xmm21 $xmm22 $xmm23 $xmm24 $xmm25 $xmm26 $xmm27 $xmm28 $xmm29 $xmm30 $xmm31 $ymm16 $ymm17 $ymm18 $ymm19 $ymm20 $ymm21 $ymm22 $ymm23 $ymm24 $ymm25 $ymm26 $ymm27 $ymm28 $ymm29 $ymm30 $ymm31 $zmm0 $zmm1 $zmm2 $zmm3 $zmm4 $zmm5 $zmm6 $zmm7 $zmm8 $zmm9 $zmm10 $zmm11 $zmm12 $zmm13 $zmm14 $zmm15 $zmm16 $zmm17 $zmm18 $zmm19 $zmm20 $zmm21 $zmm22 $zmm23 $zmm24 $zmm25 $zmm26 $zmm27 $zmm28 $zmm29 $zmm30 $zmm31 $k0_k1 $k2_k3 $k4_k5 $k6_k7 $tmmcfg $tmm0 $tmm1 $tmm2 $tmm3 $tmm4 $tmm5 $tmm6 $tmm7 $tmm0_tmm1 $tmm2_tmm3 $tmm4_tmm5 $tmm6_tmm7 $r16 $r17 $r18 $r19 $r20 $r21 $r22 $r23 $r24 $r25 $r26 $r27 $r28 $r29 $r30 $r31 $r16b $r17b $r18b $r19b $r20b $r21b $r22b $r23b $r24b $r25b $r26b $r27b $r28b $r29b $r30b $r31b $r16bh $r17bh $r18bh $r19bh $r20bh $r21bh $r22bh $r23bh $r24bh $r25bh $r26bh $r27bh $r28bh $r29bh $r30bh $r31bh $r16d $r17d $r18d $r19d $r20d $r21d $r22d $r23d $r24d $r25d $r26d $r27d $r28d $r29d $r30d $r31d $r16w $r17w $r18w $r19w $r20w $r21w $r22w $r23w $r24w $r25w $r26w $r27w $r28w $r29w $r30w $r31w $r16wh $r17wh $r18wh $r19wh $r20wh $r21wh $r22wh $r23wh $r24wh $r25wh $r26wh $r27wh $r28wh $r29wh $r30wh $r31wh
+; CHECK: foo Clobbered Registers: $cs $df $ds $eflags $eip $eiz $es $esp $fpcw $fpsw $fs $fs_base $gs $gs_base $hip $hsp $ip $mxcsr $rflags $rip $riz $rsp $sp $sph $spl $ss $ssp $_eflags $cr0 $cr1 $cr2 $cr3 $cr4 $cr5 $cr6 $cr7 $cr8 $cr9 $cr10 $cr11 $cr12 $cr13 $cr14 $cr15 $dr0 $dr1 $dr2 $dr3 $dr4 $dr5 $dr6 $dr7 $dr8 $dr9 $dr10 $dr11 $dr12 $dr13 $dr14 $dr15 $fp0 $fp1 $fp2 $fp3 $fp4 $fp5 $fp6 $fp7 $mm0 $mm1 $mm2 $mm3 $mm4 $mm5 $mm6 $mm7 $r11 $st0 $st1 $st2 $st3 $st4 $st5 $st6 $st7 $r11b $r11bh $r11d $r11w $r11wh $ymm0 $ymm1 $ymm2 $ymm3 $ymm4 $ymm5 $ymm6 $ymm7 $ymm8 $ymm9 $ymm10 $ymm11 $ymm12 $ymm13 $ymm14 $ymm15 $k0 $k1 $k2 $k3 $k4 $k5 $k6 $k7 $xmm16 $xmm17 $xmm18 $xmm19 $xmm20 $xmm21 $xmm22 $xmm23 $xmm24 $xmm25 $xmm26 $xmm27 $xmm28 $xmm29 $xmm30 $xmm31 $ymm16 $ymm17 $ymm18 $ymm19 $ymm20 $ymm21 $ymm22 $ymm23 $ymm24 $ymm25 $ymm26 $ymm27 $ymm28 $ymm29 $ymm30 $ymm31 $zmm0 $zmm1 $zmm2 $zmm3 $zmm4 $zmm5 $zmm6 $zmm7 $zmm8 $zmm9 $zmm10 $zmm11 $zmm12 $zmm13 $zmm14 $zmm15 $zmm16 $zmm17 $zmm18 $zmm19 $zmm20 $zmm21 $zmm22 $zmm23 $zmm24 $zmm25 $zmm26 $zmm27 $zmm28 $zmm29 $zmm30 $zmm31 $k0_k1 $k2_k3 $k4_k5 $k6_k7 $tmmcfg $tmm0 $tmm1 $tmm2 $tmm3 $tmm4 $tmm5 $tmm6 $tmm7 $r16 $r17 $r18 $r19 $r20 $r21 $r22 $r23 $r24 $r25 $r26 $r27 $r28 $r29 $r30 $r31 $r16b $r17b $r18b $r19b $r20b $r21b $r22b $r23b $r24b $r25b $r26b $r27b $r28b $r29b $r30b $r31b $r16bh $r17bh $r18bh $r19bh $r20bh $r21bh $r22bh $r23bh $r24bh $r25bh $r26bh $r27bh $r28bh $r29bh $r30bh $r31bh $r16d $r17d $r18d $r19d $r20d $r21d $r22d $r23d $r24d $r25d $r26d $r27d $r28d $r29d $r30d $r31d $r16w $r17w $r18w $r19w $r20w $r21w $r22w $r23w $r24w $r25w $r26w $r27w $r28w $r29w $r30w $r31w $r16wh $r17wh $r18wh $r19wh $r20wh $r21wh $r22wh $r23wh $r24wh $r25wh $r26wh $r27wh $r28wh $r29wh $r30wh $r31wh
call void @bar1()
call void @bar2()
ret void
@@ -15,7 +15,7 @@ define preserve_allcc void @foo()#0 {
declare void @bar2()
define preserve_nonecc void @foo2()#0 {
-; CHECK: foo2 Clobbered Registers: $ah $al $ax $ch $cl $cs $cx $df $dh $di $dih $dil $dl $ds $dx $eax $ecx $edi $edx $eflags $eip $eiz $es $esi $esp $fpcw $fpsw $fs $fs_base $gs $gs_base $hax $hcx $hdi $hdx $hip $hsi $hsp $ip $mxcsr $rax $rcx $rdi $rdx $rflags $rip $riz $rsi $rsp $si $sih $sil $sp $sph $spl $ss $ssp $_eflags $cr0 $cr1 $cr2 $cr3 $cr4 $cr5 $cr6 $cr7 $cr8 $cr9 $cr10 $cr11 $cr12 $cr13 $cr14 $cr15 $dr0 $dr1 $dr2 $dr3 $dr4 $dr5 $dr6 $dr7 $dr8 $dr9 $dr10 $dr11 $dr12 $dr13 $dr14 $dr15 $fp0 $fp1 $fp2 $fp3 $fp4 $fp5 $fp6 $fp7 $mm0 $mm1 $mm2 $mm3 $mm4 $mm5 $mm6 $mm7 $r8 $r9 $r10 $r11 $st0 $st1 $st2 $st3 $st4 $st5 $st6 $st7 $xmm0 $xmm1 $xmm2 $xmm3 $xmm4 $xmm5 $xmm6 $xmm7 $xmm8 $xmm9 $xmm10 $xmm11 $xmm12 $xmm13 $xmm14 $xmm15 $r8b $r9b $r10b $r11b $r8bh $r9bh $r10bh $r11bh $r8d $r9d $r10d $r11d $r8w $r9w $r10w $r11w $r8wh $r9wh $r10wh $r11wh $ymm0 $ymm1 $ymm2 $ymm3 $ymm4 $ymm5 $ymm6 $ymm7 $ymm8 $ymm9 $ymm10 $ymm11 $ymm12 $ymm13 $ymm14 $ymm15 $k0 $k1 $k2 $k3 $k4 $k5 $k6 $k7 $xmm16 $xmm17 $xmm18 $xmm19 $xmm20 $xmm21 $xmm22 $xmm23 $xmm24 $xmm25 $xmm26 $xmm27 $xmm28 $xmm29 $xmm30 $xmm31 $ymm16 $ymm17 $ymm18 $ymm19 $ymm20 $ymm21 $ymm22 $ymm23 $ymm24 $ymm25 $ymm26 $ymm27 $ymm28 $ymm29 $ymm30 $ymm31 $zmm0 $zmm1 $zmm2 $zmm3 $zmm4 $zmm5 $zmm6 $zmm7 $zmm8 $zmm9 $zmm10 $zmm11 $zmm12 $zmm13 $zmm14 $zmm15 $zmm16 $zmm17 $zmm18 $zmm19 $zmm20 $zmm21 $zmm22 $zmm23 $zmm24 $zmm25 $zmm26 $zmm27 $zmm28 $zmm29 $zmm30 $zmm31 $k0_k1 $k2_k3 $k4_k5 $k6_k7 $tmmcfg $tmm0 $tmm1 $tmm2 $tmm3 $tmm4 $tmm5 $tmm6 $tmm7 $tmm0_tmm1 $tmm2_tmm3 $tmm4_tmm5 $tmm6_tmm7 $r16 $r17 $r18 $r19 $r20 $r21 $r22 $r23 $r24 $r25 $r26 $r27 $r28 $r29 $r30 $r31 $r16b $r17b $r18b $r19b $r20b $r21b $r22b $r23b $r24b $r25b $r26b $r27b $r28b $r29b $r30b $r31b $r16bh $r17bh $r18bh $r19bh $r20bh $r21bh $r22bh $r23bh $r24bh $r25bh $r26bh $r27bh $r28bh $r29bh $r30bh $r31bh $r16d $r17d $r18d $r19d $r20d $r21d $r22d $r23d $r24d $r25d $r26d $r27d $r28d $r29d $r30d $r31d $r16w $r17w $r18w $r19w $r20w $r21w $r22w $r23w $r24w $r25w $r26w $r27w $r28w $r29w $r30w $r31w $r16wh $r17wh $r18wh $r19wh $r20wh $r21wh $r22wh $r23wh $r24wh $r25wh $r26wh $r27wh $r28wh $r29wh $r30wh $r31wh
+; CHECK: foo2 Clobbered Registers: $ah $al $ax $ch $cl $cs $cx $df $dh $di $dih $dil $dl $ds $dx $eax $ecx $edi $edx $eflags $eip $eiz $es $esi $esp $fpcw $fpsw $fs $fs_base $gs $gs_base $hax $hcx $hdi $hdx $hip $hsi $hsp $ip $mxcsr $rax $rcx $rdi $rdx $rflags $rip $riz $rsi $rsp $si $sih $sil $sp $sph $spl $ss $ssp $_eflags $cr0 $cr1 $cr2 $cr3 $cr4 $cr5 $cr6 $cr7 $cr8 $cr9 $cr10 $cr11 $cr12 $cr13 $cr14 $cr15 $dr0 $dr1 $dr2 $dr3 $dr4 $dr5 $dr6 $dr7 $dr8 $dr9 $dr10 $dr11 $dr12 $dr13 $dr14 $dr15 $fp0 $fp1 $fp2 $fp3 $fp4 $fp5 $fp6 $fp7 $mm0 $mm1 $mm2 $mm3 $mm4 $mm5 $mm6 $mm7 $r8 $r9 $r10 $r11 $st0 $st1 $st2 $st3 $st4 $st5 $st6 $st7 $xmm0 $xmm1 $xmm2 $xmm3 $xmm4 $xmm5 $xmm6 $xmm7 $xmm8 $xmm9 $xmm10 $xmm11 $xmm12 $xmm13 $xmm14 $xmm15 $r8b $r9b $r10b $r11b $r8bh $r9bh $r10bh $r11bh $r8d $r9d $r10d $r11d $r8w $r9w $r10w $r11w $r8wh $r9wh $r10wh $r11wh $ymm0 $ymm1 $ymm2 $ymm3 $ymm4 $ymm5 $ymm6 $ymm7 $ymm8 $ymm9 $ymm10 $ymm11 $ymm12 $ymm13 $ymm14 $ymm15 $k0 $k1 $k2 $k3 $k4 $k5 $k6 $k7 $xmm16 $xmm17 $xmm18 $xmm19 $xmm20 $xmm21 $xmm22 $xmm23 $xmm24 $xmm25 $xmm26 $xmm27 $xmm28 $xmm29 $xmm30 $xmm31 $ymm16 $ymm17 $ymm18 $ymm19 $ymm20 $ymm21 $ymm22 $ymm23 $ymm24 $ymm25 $ymm26 $ymm27 $ymm28 $ymm29 $ymm30 $ymm31 $zmm0 $zmm1 $zmm2 $zmm3 $zmm4 $zmm5 $zmm6 $zmm7 $zmm8 $zmm9 $zmm10 $zmm11 $zmm12 $zmm13 $zmm14 $zmm15 $zmm16 $zmm17 $zmm18 $zmm19 $zmm20 $zmm21 $zmm22 $zmm23 $zmm24 $zmm25 $zmm26 $zmm27 $zmm28 $zmm29 $zmm30 $zmm31 $k0_k1 $k2_k3 $k4_k5 $k6_k7 $tmmcfg $tmm0 $tmm1 $tmm2 $tmm3 $tmm4 $tmm5 $tmm6 $tmm7 $r16 $r17 $r18 $r19 $r20 $r21 $r22 $r23 $r24 $r25 $r26 $r27 $r28 $r29 $r30 $r31 $r16b $r17b $r18b $r19b $r20b $r21b $r22b $r23b $r24b $r25b $r26b $r27b $r28b $r29b $r30b $r31b $r16bh $r17bh $r18bh $r19bh $r20bh $r21bh $r22bh $r23bh $r24bh $r25bh $r26bh $r27bh $r28bh $r29bh $r30bh $r31bh $r16d $r17d $r18d $r19d $r20d $r21d $r22d $r23d $r24d $r25d $r26d $r27d $r28d $r29d $r30d $r31d $r16w $r17w $r18w $r19w $r20w $r21w $r22w $r23w $r24w $r25w $r26w $r27w $r28w $r29w $r30w $r31w $r16wh $r17wh $r18wh $r19wh $r20wh $r21wh $r22wh $r23wh $r24wh $r25wh $r26wh $r27wh $r28wh $r29wh $r30wh $r31wh
call void @bar1()
call void @bar2()
ret void
diff --git a/llvm/test/CodeGen/X86/ldexp-avx512.ll b/llvm/test/CodeGen/X86/ldexp-avx512.ll
new file mode 100644
index 0000000..ea93a91
--- /dev/null
+++ b/llvm/test/CodeGen/X86/ldexp-avx512.ll
@@ -0,0 +1,467 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512VL
+
+define half @test_half(half %x, i32 %exp) nounwind {
+; CHECK-LABEL: test_half:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: vcvtph2ps %xmm0, %xmm0
+; CHECK-NEXT: callq ldexpf@PLT
+; CHECK-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; CHECK-NEXT: popq %rax
+; CHECK-NEXT: retq
+entry:
+ %r = tail call fast half @llvm.ldexp.f16.i32(half %x, i32 %exp)
+ ret half %r
+}
+declare half @llvm.ldexp.f16.i32(half, i32) memory(none)
+
+define float @test_float(float %x, i32 %exp) nounwind {
+; CHECK-LABEL: test_float:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: jmp ldexpf@PLT # TAILCALL
+entry:
+ %r = tail call fast float @ldexpf(float %x, i32 %exp)
+ ret float %r
+}
+declare float @ldexpf(float, i32) memory(none)
+
+define double @test_double(double %x, i32 %exp) nounwind {
+; CHECK-LABEL: test_double:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: jmp ldexp@PLT # TAILCALL
+entry:
+ %r = tail call fast double @ldexp(double %x, i32 %exp)
+ ret double %r
+}
+declare double @ldexp(double, i32) memory(none)
+
+define fp128 @testExpl(fp128 %x, i32 %exp) nounwind {
+; CHECK-LABEL: testExpl:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: jmp ldexpl@PLT # TAILCALL
+entry:
+ %r = tail call fast fp128 @ldexpl(fp128 %x, i32 %exp)
+ ret fp128 %r
+}
+declare fp128 @ldexpl(fp128, i32) memory(none)
+
+define <4 x float> @test_ldexp_4xfloat(<4 x float> %x, <4 x i32> %exp) nounwind {
+; CHECK-LABEL: test_ldexp_4xfloat:
+; CHECK: # %bb.0:
+; CHECK-NEXT: subq $56, %rsp
+; CHECK-NEXT: vmovdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: vmovd %xmm1, %edi
+; CHECK-NEXT: callq ldexpf@PLT
+; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-NEXT: vextractps $1, %xmm0, %edi
+; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
+; CHECK-NEXT: callq ldexpf@PLT
+; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
+; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
+; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-NEXT: vextractps $2, %xmm0, %edi
+; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = mem[1,0]
+; CHECK-NEXT: callq ldexpf@PLT
+; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
+; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
+; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-NEXT: vextractps $3, %xmm0, %edi
+; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = mem[3,3,3,3]
+; CHECK-NEXT: callq ldexpf@PLT
+; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
+; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
+; CHECK-NEXT: addq $56, %rsp
+; CHECK-NEXT: retq
+ %r = call <4 x float> @llvm.ldexp.v4f32.v4i32(<4 x float> %x, <4 x i32> %exp)
+ ret <4 x float> %r
+}
+declare <4 x float> @llvm.ldexp.v4f32.v4i32(<4 x float>, <4 x i32>)
+
+define <2 x double> @test_ldexp_2xdouble(<2 x double> %x, <2 x i32> %exp) nounwind {
+; CHECK-LABEL: test_ldexp_2xdouble:
+; CHECK: # %bb.0:
+; CHECK-NEXT: subq $56, %rsp
+; CHECK-NEXT: vmovdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: vmovd %xmm1, %edi
+; CHECK-NEXT: callq ldexp@PLT
+; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-NEXT: vextractps $1, %xmm0, %edi
+; CHECK-NEXT: vpermilpd $1, (%rsp), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = mem[1,0]
+; CHECK-NEXT: callq ldexp@PLT
+; CHECK-NEXT: vmovapd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; CHECK-NEXT: addq $56, %rsp
+; CHECK-NEXT: retq
+ %r = call <2 x double> @llvm.ldexp.v2f64.v2i32(<2 x double> %x, <2 x i32> %exp)
+ ret <2 x double> %r
+}
+declare <2 x double> @llvm.ldexp.v2f64.v2i32(<2 x double>, <2 x i32>)
+
+define <8 x float> @test_ldexp_8xfloat(<8 x float> %x, <8 x i32> %exp) nounwind {
+; CHECK-LABEL: test_ldexp_8xfloat:
+; CHECK: # %bb.0:
+; CHECK-NEXT: subq $120, %rsp
+; CHECK-NEXT: vmovdqu %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
+; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
+; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
+; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: vextracti128 $1, %ymm1, %xmm1
+; CHECK-NEXT: vmovdqa %xmm1, (%rsp) # 16-byte Spill
+; CHECK-NEXT: vmovd %xmm1, %edi
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: callq ldexpf@PLT
+; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT: vextractps $1, %xmm0, %edi
+; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
+; CHECK-NEXT: callq ldexpf@PLT
+; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
+; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT: vextractps $2, %xmm0, %edi
+; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = mem[1,0]
+; CHECK-NEXT: callq ldexpf@PLT
+; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
+; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT: vextractps $3, %xmm0, %edi
+; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = mem[3,3,3,3]
+; CHECK-NEXT: callq ldexpf@PLT
+; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
+; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
+; CHECK-NEXT: vmovd %xmm0, %edi
+; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
+; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: callq ldexpf@PLT
+; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
+; CHECK-NEXT: vextractps $1, %xmm0, %edi
+; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: callq ldexpf@PLT
+; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
+; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
+; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
+; CHECK-NEXT: vextractps $2, %xmm0, %edi
+; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = mem[1,0]
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: callq ldexpf@PLT
+; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
+; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
+; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
+; CHECK-NEXT: vextractps $3, %xmm0, %edi
+; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = mem[3,3,3,3]
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: callq ldexpf@PLT
+; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
+; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
+; CHECK-NEXT: vinsertf128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
+; CHECK-NEXT: addq $120, %rsp
+; CHECK-NEXT: retq
+ %r = call <8 x float> @llvm.ldexp.v8f32.v8i32(<8 x float> %x, <8 x i32> %exp)
+ ret <8 x float> %r
+}
+declare <8 x float> @llvm.ldexp.v8f32.v8i32(<8 x float>, <8 x i32>)
+
+define <4 x double> @test_ldexp_4xdouble(<4 x double> %x, <4 x i32> %exp) nounwind {
+; CHECK-LABEL: test_ldexp_4xdouble:
+; CHECK: # %bb.0:
+; CHECK-NEXT: subq $88, %rsp
+; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
+; CHECK-NEXT: vextractps $2, %xmm1, %edi
+; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
+; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: callq ldexp@PLT
+; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-NEXT: vextractps $3, %xmm0, %edi
+; CHECK-NEXT: vpermilpd $1, (%rsp), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = mem[1,0]
+; CHECK-NEXT: callq ldexp@PLT
+; CHECK-NEXT: vmovapd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; CHECK-NEXT: vmovapd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-NEXT: vmovd %xmm0, %edi
+; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
+; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: callq ldexp@PLT
+; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-NEXT: vextractps $1, %xmm0, %edi
+; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = mem[1,0]
+; CHECK-NEXT: callq ldexp@PLT
+; CHECK-NEXT: vmovapd (%rsp), %xmm1 # 16-byte Reload
+; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; CHECK-NEXT: vinsertf128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
+; CHECK-NEXT: addq $88, %rsp
+; CHECK-NEXT: retq
+ %r = call <4 x double> @llvm.ldexp.v4f64.v4i32(<4 x double> %x, <4 x i32> %exp)
+ ret <4 x double> %r
+}
+declare <4 x double> @llvm.ldexp.v4f64.v4i32(<4 x double>, <4 x i32>)
+
+define <16 x float> @test_ldexp_16xfloat(<16 x float> %x, <16 x i32> %exp) nounwind {
+; CHECK-LABEL: test_ldexp_16xfloat:
+; CHECK: # %bb.0:
+; CHECK-NEXT: subq $216, %rsp
+; CHECK-NEXT: vmovdqu64 %zmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
+; CHECK-NEXT: vmovups %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
+; CHECK-NEXT: vextractf32x4 $3, %zmm0, %xmm0
+; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: vextracti32x4 $3, %zmm1, %xmm1
+; CHECK-NEXT: vmovdqa %xmm1, (%rsp) # 16-byte Spill
+; CHECK-NEXT: vmovd %xmm1, %edi
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: callq ldexpf@PLT
+; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT: vextractps $1, %xmm0, %edi
+; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
+; CHECK-NEXT: callq ldexpf@PLT
+; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
+; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT: vextractps $2, %xmm0, %edi
+; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = mem[1,0]
+; CHECK-NEXT: callq ldexpf@PLT
+; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
+; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT: vextractps $3, %xmm0, %edi
+; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = mem[3,3,3,3]
+; CHECK-NEXT: callq ldexpf@PLT
+; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
+; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload
+; CHECK-NEXT: vextractf32x4 $2, %zmm0, %xmm1
+; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload
+; CHECK-NEXT: vextracti32x4 $2, %zmm0, %xmm0
+; CHECK-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: vmovd %xmm0, %edi
+; CHECK-NEXT: vmovaps %xmm1, %xmm0
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: callq ldexpf@PLT
+; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT: vextractps $1, %xmm0, %edi
+; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
+; CHECK-NEXT: callq ldexpf@PLT
+; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
+; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT: vextractps $2, %xmm0, %edi
+; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = mem[1,0]
+; CHECK-NEXT: callq ldexpf@PLT
+; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
+; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT: vextractps $3, %xmm0, %edi
+; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = mem[3,3,3,3]
+; CHECK-NEXT: callq ldexpf@PLT
+; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
+; CHECK-NEXT: vinsertf128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
+; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
+; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload
+; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
+; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload
+; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm0
+; CHECK-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: vmovd %xmm0, %edi
+; CHECK-NEXT: vmovaps %xmm1, %xmm0
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: callq ldexpf@PLT
+; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT: vextractps $1, %xmm0, %edi
+; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
+; CHECK-NEXT: callq ldexpf@PLT
+; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
+; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT: vextractps $2, %xmm0, %edi
+; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = mem[1,0]
+; CHECK-NEXT: callq ldexpf@PLT
+; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
+; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT: vextractps $3, %xmm0, %edi
+; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = mem[3,3,3,3]
+; CHECK-NEXT: callq ldexpf@PLT
+; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
+; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload
+; CHECK-NEXT: vmovd %xmm0, %edi
+; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload
+; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: callq ldexpf@PLT
+; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload
+; CHECK-NEXT: vextractps $1, %xmm0, %edi
+; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: callq ldexpf@PLT
+; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
+; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
+; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload
+; CHECK-NEXT: vextractps $2, %xmm0, %edi
+; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = mem[1,0]
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: callq ldexpf@PLT
+; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
+; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
+; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload
+; CHECK-NEXT: vextractps $3, %xmm0, %edi
+; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = mem[3,3,3,3]
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: callq ldexpf@PLT
+; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
+; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
+; CHECK-NEXT: vinsertf128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
+; CHECK-NEXT: vinsertf64x4 $1, {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %zmm0 # 32-byte Folded Reload
+; CHECK-NEXT: addq $216, %rsp
+; CHECK-NEXT: retq
+ %r = call <16 x float> @llvm.ldexp.v16f32.v16i32(<16 x float> %x, <16 x i32> %exp)
+ ret <16 x float> %r
+}
+declare <16 x float> @llvm.ldexp.v16f32.v16i32(<16 x float>, <16 x i32>)
+
+define <8 x double> @test_ldexp_8xdouble(<8 x double> %x, <8 x i32> %exp) nounwind {
+; CHECK-LABEL: test_ldexp_8xdouble:
+; CHECK: # %bb.0:
+; CHECK-NEXT: subq $184, %rsp
+; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
+; CHECK-NEXT: vmovups %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
+; CHECK-NEXT: vextractf32x4 $3, %zmm0, %xmm0
+; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: vextractf128 $1, %ymm1, %xmm1
+; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: vextractps $2, %xmm1, %edi
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: callq ldexp@PLT
+; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-NEXT: vextractps $3, %xmm0, %edi
+; CHECK-NEXT: vpermilpd $1, (%rsp), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = mem[1,0]
+; CHECK-NEXT: callq ldexp@PLT
+; CHECK-NEXT: vmovapd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; CHECK-NEXT: vmovapd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload
+; CHECK-NEXT: vextractf32x4 $2, %zmm0, %xmm1
+; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-NEXT: vmovd %xmm0, %edi
+; CHECK-NEXT: vmovaps %xmm1, %xmm0
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: callq ldexp@PLT
+; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-NEXT: vextractps $1, %xmm0, %edi
+; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = mem[1,0]
+; CHECK-NEXT: callq ldexp@PLT
+; CHECK-NEXT: vmovapd (%rsp), %xmm1 # 16-byte Reload
+; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; CHECK-NEXT: vinsertf128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
+; CHECK-NEXT: vmovupd %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
+; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
+; CHECK-NEXT: vextractps $2, %xmm0, %edi
+; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload
+; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
+; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: callq ldexp@PLT
+; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
+; CHECK-NEXT: vextractps $3, %xmm0, %edi
+; CHECK-NEXT: vpermilpd $1, (%rsp), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = mem[1,0]
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: callq ldexp@PLT
+; CHECK-NEXT: vmovapd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; CHECK-NEXT: vmovapd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
+; CHECK-NEXT: vmovd %xmm0, %edi
+; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload
+; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: callq ldexp@PLT
+; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
+; CHECK-NEXT: vextractps $1, %xmm0, %edi
+; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = mem[1,0]
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: callq ldexp@PLT
+; CHECK-NEXT: vmovapd (%rsp), %xmm1 # 16-byte Reload
+; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; CHECK-NEXT: vinsertf128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
+; CHECK-NEXT: vinsertf64x4 $1, {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %zmm0 # 32-byte Folded Reload
+; CHECK-NEXT: addq $184, %rsp
+; CHECK-NEXT: retq
+ %r = call <8 x double> @llvm.ldexp.v8f64.v8i32(<8 x double> %x, <8 x i32> %exp)
+ ret <8 x double> %r
+}
+declare <8 x double> @llvm.ldexp.v8f64.v8i32(<8 x double>, <8 x i32>)
+
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; AVX512: {{.*}}
+; AVX512VL: {{.*}}
diff --git a/llvm/test/CodeGen/X86/pr165755.ll b/llvm/test/CodeGen/X86/pr165755.ll
new file mode 100644
index 0000000..3ab484f
--- /dev/null
+++ b/llvm/test/CodeGen/X86/pr165755.ll
@@ -0,0 +1,26 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc < %s -mtriple=i686-- | FileCheck %s --check-prefixes=X86
+; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s --check-prefixes=X64
+
+define i32 @PR165755(ptr %p0) {
+; X86-LABEL: PR165755:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl (%ecx), %eax
+; X86-NEXT: movb $0, (%ecx)
+; X86-NEXT: retl
+;
+; X64-LABEL: PR165755:
+; X64: # %bb.0:
+; X64-NEXT: movl (%rdi), %eax
+; X64-NEXT: movb $0, (%rdi)
+; X64-NEXT: retq
+ %ld64 = load i64, ptr %p0, align 8
+ store i8 0, ptr %p0, align 1
+ %ld32 = load i32, ptr %p0, align 8
+ %mask = and i32 %ld32, 32
+ %zext = zext i32 %mask to i64
+ %srl = lshr i64 %ld64, %zext
+ %res = trunc i64 %srl to i32
+ ret i32 %res
+}
diff --git a/llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll b/llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll
index 9816fa7..044327d 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll
@@ -875,28 +875,12 @@ define i1 @mask_v8i32(<8 x i32> %a0) {
; SSE41-NEXT: sete %al
; SSE41-NEXT: retq
;
-; AVX1-LABEL: mask_v8i32:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0
-; AVX1-NEXT: sete %al
-; AVX1-NEXT: vzeroupper
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: mask_v8i32:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372039002259456,9223372039002259456,9223372039002259456,9223372039002259456]
-; AVX2-NEXT: vptest %ymm1, %ymm0
-; AVX2-NEXT: sete %al
-; AVX2-NEXT: vzeroupper
-; AVX2-NEXT: retq
-;
-; AVX512-LABEL: mask_v8i32:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372039002259456,9223372039002259456,9223372039002259456,9223372039002259456]
-; AVX512-NEXT: vptest %ymm1, %ymm0
-; AVX512-NEXT: sete %al
-; AVX512-NEXT: vzeroupper
-; AVX512-NEXT: retq
+; AVX-LABEL: mask_v8i32:
+; AVX: # %bb.0:
+; AVX-NEXT: vtestps %ymm0, %ymm0
+; AVX-NEXT: sete %al
+; AVX-NEXT: vzeroupper
+; AVX-NEXT: retq
%1 = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> %a0)
%2 = and i32 %1, 2147483648
%3 = icmp eq i32 %2, 0
@@ -965,28 +949,12 @@ define i1 @signtest_v8i32(<8 x i32> %a0) {
; SSE41-NEXT: sete %al
; SSE41-NEXT: retq
;
-; AVX1-LABEL: signtest_v8i32:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0
-; AVX1-NEXT: sete %al
-; AVX1-NEXT: vzeroupper
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: signtest_v8i32:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372039002259456,9223372039002259456,9223372039002259456,9223372039002259456]
-; AVX2-NEXT: vptest %ymm1, %ymm0
-; AVX2-NEXT: sete %al
-; AVX2-NEXT: vzeroupper
-; AVX2-NEXT: retq
-;
-; AVX512-LABEL: signtest_v8i32:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372039002259456,9223372039002259456,9223372039002259456,9223372039002259456]
-; AVX512-NEXT: vptest %ymm1, %ymm0
-; AVX512-NEXT: sete %al
-; AVX512-NEXT: vzeroupper
-; AVX512-NEXT: retq
+; AVX-LABEL: signtest_v8i32:
+; AVX: # %bb.0:
+; AVX-NEXT: vtestps %ymm0, %ymm0
+; AVX-NEXT: sete %al
+; AVX-NEXT: vzeroupper
+; AVX-NEXT: retq
%1 = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> %a0)
%2 = icmp sgt i32 %1, -1
ret i1 %2
@@ -1010,28 +978,12 @@ define i1 @signtest_v4i64(<4 x i64> %a0) {
; SSE41-NEXT: sete %al
; SSE41-NEXT: retq
;
-; AVX1-LABEL: signtest_v4i64:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0
-; AVX1-NEXT: sete %al
-; AVX1-NEXT: vzeroupper
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: signtest_v4i64:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
-; AVX2-NEXT: vptest %ymm1, %ymm0
-; AVX2-NEXT: sete %al
-; AVX2-NEXT: vzeroupper
-; AVX2-NEXT: retq
-;
-; AVX512-LABEL: signtest_v4i64:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
-; AVX512-NEXT: vptest %ymm1, %ymm0
-; AVX512-NEXT: sete %al
-; AVX512-NEXT: vzeroupper
-; AVX512-NEXT: retq
+; AVX-LABEL: signtest_v4i64:
+; AVX: # %bb.0:
+; AVX-NEXT: vtestpd %ymm0, %ymm0
+; AVX-NEXT: sete %al
+; AVX-NEXT: vzeroupper
+; AVX-NEXT: retq
%1 = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> %a0)
%2 = icmp sgt i64 %1, -1
ret i1 %2
diff --git a/llvm/test/DebugInfo/Generic/objc-property.ll b/llvm/test/DebugInfo/Generic/objc-property.ll
index 007d1fe..1ee7929 100644
--- a/llvm/test/DebugInfo/Generic/objc-property.ll
+++ b/llvm/test/DebugInfo/Generic/objc-property.ll
@@ -5,33 +5,33 @@
; CHECK: DW_TAG_structure_type
; CHECK: DW_AT_name ("Foo")
;
-; CHECK: DW_TAG_APPLE_property
+; CHECK: 0x[[AUTO_SYNTH:[0-9a-f]+]]: DW_TAG_APPLE_property
; CHECK: DW_AT_APPLE_property_name ("autoSynthProp")
; CHECK: DW_AT_APPLE_property_attribute
; CHECK-SAME: DW_APPLE_PROPERTY_assign, DW_APPLE_PROPERTY_readwrite,
; CHECK-SAME: DW_APPLE_PROPERTY_atomic, DW_APPLE_PROPERTY_unsafe_unretained
;
-; CHECK: DW_TAG_APPLE_property
+; CHECK: 0x[[SYNTH:[0-9a-f]+]]: DW_TAG_APPLE_property
; CHECK: DW_AT_APPLE_property_name ("synthProp")
; CHECK: DW_AT_APPLE_property_attribute
; CHECK-SAME: DW_APPLE_PROPERTY_assign, DW_APPLE_PROPERTY_readwrite,
; CHECK-SAME: DW_APPLE_PROPERTY_atomic, DW_APPLE_PROPERTY_unsafe_unretained
;
-; CHECK: DW_TAG_APPLE_property
+; CHECK: 0x[[GET:[0-9a-f]+]]: DW_TAG_APPLE_property
; CHECK: DW_AT_APPLE_property_name ("customGetterProp")
; CHECK: DW_AT_APPLE_property_getter ("customGetter")
; CHECK: DW_AT_APPLE_property_attribute
; CHECK-SAME: DW_APPLE_PROPERTY_getter, DW_APPLE_PROPERTY_assign, DW_APPLE_PROPERTY_readwrite,
; CHECK-SAME: DW_APPLE_PROPERTY_atomic, DW_APPLE_PROPERTY_unsafe_unretained
;
-; CHECK: DW_TAG_APPLE_property
+; CHECK: 0x[[SET:[0-9a-f]+]]: DW_TAG_APPLE_property
; CHECK: DW_AT_APPLE_property_name ("customSetterProp")
; CHECK: DW_AT_APPLE_property_setter ("customSetter:")
; CHECK: DW_AT_APPLE_property_attribute
; CHECK-SAME: DW_APPLE_PROPERTY_assign, DW_APPLE_PROPERTY_readwrite,
; CHECK-SAME: DW_APPLE_PROPERTY_setter, DW_APPLE_PROPERTY_atomic, DW_APPLE_PROPERTY_unsafe_unretained
;
-; CHECK: DW_TAG_APPLE_property
+; CHECK: 0x[[ACCESSORS:[0-9a-f]+]]: DW_TAG_APPLE_property
; CHECK: DW_AT_APPLE_property_name ("customAccessorsProp")
; CHECK: DW_AT_APPLE_property_getter ("customGetter")
; CHECK: DW_AT_APPLE_property_setter ("customSetter:")
@@ -39,15 +39,21 @@
; CHECK-SAME: DW_APPLE_PROPERTY_getter, DW_APPLE_PROPERTY_assign, DW_APPLE_PROPERTY_readwrite,
; CHECK-SAME: DW_APPLE_PROPERTY_setter, DW_APPLE_PROPERTY_atomic, DW_APPLE_PROPERTY_unsafe_unretained
;
-; FIXME: missing link between DW_TAG_member and the associated DW_TAG_APPLE_property
; CHECK: DW_TAG_member
-; CHECK-NOT: DW_AT_APPLE_property
+; CHECK: DW_AT_name ("someBackingIvar")
+; CHECK: DW_AT_APPLE_property (0x[[SYNTH]] "synthProp")
+;
; CHECK: DW_TAG_member
-; CHECK-NOT: DW_AT_APPLE_property
+; CHECK: DW_AT_name ("_autoSynthProp")
+; CHECK: DW_AT_APPLE_property (0x[[AUTO_SYNTH]] "autoSynthProp")
+;
; CHECK: DW_TAG_member
-; CHECK-NOT: DW_AT_APPLE_property
+; CHECK: DW_AT_name ("_customGetterProp")
+; CHECK: DW_AT_APPLE_property (0x[[GET]] "customGetterProp")
+;
; CHECK: DW_TAG_member
-; CHECK-NOT: DW_AT_APPLE_property
+; CHECK: DW_AT_name ("_customSetterProp")
+; CHECK: DW_AT_APPLE_property (0x[[SET]] "customSetterProp")
!llvm.module.flags = !{!0, !1}
!llvm.dbg.cu = !{!2}
diff --git a/llvm/test/DebugInfo/PDB/Native/pdb-native-index-overflow.test b/llvm/test/DebugInfo/PDB/Native/pdb-native-index-overflow.test
new file mode 100755
index 0000000..aa3f6dc
--- /dev/null
+++ b/llvm/test/DebugInfo/PDB/Native/pdb-native-index-overflow.test
@@ -0,0 +1,13 @@
+; Test that the native PDB reader isn't crashed by index value bigger than
+; number of types in TPI or IPI stream
+; RUN: llvm-pdbutil dump %p/../Inputs/empty.pdb --type-index=20000000\
+; RUN: | FileCheck -check-prefixes=TYPES,NOT_FOUND %s
+; RUN: llvm-pdbutil dump %p/../Inputs/empty.pdb --id-index=20000000\
+; RUN: | FileCheck -check-prefixes=IDS,NOT_FOUND %s
+
+TYPES: Types (TPI Stream)
+IDS: Types (IPI Stream)
+NOT_FOUND:============================================================
+NOT_FOUND: Showing 1 records.
+NOT_FOUND: Type 0x1312D00 doesn't exist in TPI stream
+
diff --git a/llvm/test/Instrumentation/TypeSanitizer/basic_outlined.ll b/llvm/test/Instrumentation/TypeSanitizer/basic_outlined.ll
new file mode 100644
index 0000000..1d11856
--- /dev/null
+++ b/llvm/test/Instrumentation/TypeSanitizer/basic_outlined.ll
@@ -0,0 +1,68 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals
+; Test basic type sanitizer instrumentation.
+;
+; RUN: opt -passes='tysan' -tysan-outline-instrumentation -S %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+;.
+; CHECK: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 0, ptr @tysan.module_ctor, ptr null }]
+; CHECK: @__tysan_v1_Simple_20C_2b_2b_20TBAA = linkonce_odr constant { i64, i64, [16 x i8] } { i64 2, i64 0, [16 x i8] c"Simple C++ TBAA\00" }, comdat
+; CHECK: @__tysan_v1_omnipotent_20char = linkonce_odr constant { i64, i64, ptr, i64, [16 x i8] } { i64 2, i64 1, ptr @__tysan_v1_Simple_20C_2b_2b_20TBAA, i64 0, [16 x i8] c"omnipotent char\00" }, comdat
+; CHECK: @__tysan_v1_int = linkonce_odr constant { i64, i64, ptr, i64, [4 x i8] } { i64 2, i64 1, ptr @__tysan_v1_omnipotent_20char, i64 0, [4 x i8] c"int\00" }, comdat
+; CHECK: @__tysan_v1_int_o_0 = linkonce_odr constant { i64, ptr, ptr, i64 } { i64 1, ptr @__tysan_v1_int, ptr @__tysan_v1_int, i64 0 }, comdat
+; CHECK: @__tysan_shadow_memory_address = external global i64
+; CHECK: @__tysan_app_memory_mask = external global i64
+; CHECK: @__tysan_v1___ZTS1x = linkonce_odr constant { i64, i64, ptr, i64, ptr, i64, [7 x i8] } { i64 2, i64 2, ptr @__tysan_v1_int, i64 0, ptr @__tysan_v1_int, i64 4, [7 x i8] c"_ZTS1x\00" }, comdat
+; CHECK: @__tysan_v1___ZTS1v = linkonce_odr constant { i64, i64, ptr, i64, ptr, i64, ptr, i64, [7 x i8] } { i64 2, i64 3, ptr @__tysan_v1_int, i64 8, ptr @__tysan_v1_int, i64 12, ptr @__tysan_v1___ZTS1x, i64 16, [7 x i8] c"_ZTS1v\00" }, comdat
+; CHECK: @__tysan_v1___ZTS1v_o_12 = linkonce_odr constant { i64, ptr, ptr, i64 } { i64 1, ptr @__tysan_v1___ZTS1v, ptr @__tysan_v1_int, i64 12 }, comdat
+; CHECK: @llvm.used = appending global [8 x ptr] [ptr @tysan.module_ctor, ptr @__tysan_v1_Simple_20C_2b_2b_20TBAA, ptr @__tysan_v1_omnipotent_20char, ptr @__tysan_v1_int, ptr @__tysan_v1_int_o_0, ptr @__tysan_v1___ZTS1x, ptr @__tysan_v1___ZTS1v, ptr @__tysan_v1___ZTS1v_o_12], section "llvm.metadata"
+;.
+define i32 @test_load(ptr %a) sanitize_type {
+; CHECK-LABEL: @test_load(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[APP_MEM_MASK:%.*]] = load i64, ptr @__tysan_app_memory_mask, align 8
+; CHECK-NEXT: [[SHADOW_BASE:%.*]] = load i64, ptr @__tysan_shadow_memory_address, align 8
+; CHECK-NEXT: call void @__tysan_instrument_with_shadow_update(ptr [[A:%.*]], ptr @__tysan_v1_int_o_0, i1 true, i64 4, i32 1)
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 4, !tbaa [[TBAA0:![0-9]+]]
+; CHECK-NEXT: ret i32 [[TMP1]]
+;
+entry:
+ %tmp1 = load i32, ptr %a, align 4, !tbaa !3
+ ret i32 %tmp1
+}
+
+define void @test_store(ptr %a) sanitize_type {
+; CHECK-LABEL: @test_store(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[APP_MEM_MASK:%.*]] = load i64, ptr @__tysan_app_memory_mask, align 8
+; CHECK-NEXT: [[SHADOW_BASE:%.*]] = load i64, ptr @__tysan_shadow_memory_address, align 8
+; CHECK-NEXT: call void @__tysan_instrument_with_shadow_update(ptr [[A:%.*]], ptr @__tysan_v1___ZTS1v_o_12, i1 true, i64 4, i32 2)
+; CHECK-NEXT: store i32 42, ptr [[A]], align 4, !tbaa [[TBAA4:![0-9]+]]
+; CHECK-NEXT: ret void
+;
+
+entry:
+ store i32 42, ptr %a, align 4, !tbaa !6
+ ret void
+}
+
+!0 = !{!"Simple C++ TBAA"}
+!1 = !{!"omnipotent char", !0, i64 0}
+!2 = !{!"int", !1, i64 0}
+!3 = !{!2, !2, i64 0}
+!4 = !{!"_ZTS1x", !2, i64 0, !2, i64 4}
+!5 = !{!"_ZTS1v", !2, i64 8, !2, i64 12, !4, i64 16}
+!6 = !{!5, !2, i64 12}
+;.
+; CHECK: attributes #[[ATTR0:[0-9]+]] = { sanitize_type }
+; CHECK: attributes #[[ATTR1:[0-9]+]] = { nounwind }
+;.
+; CHECK: [[TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
+; CHECK: [[META1]] = !{!"int", [[META2:![0-9]+]], i64 0}
+; CHECK: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]], i64 0}
+; CHECK: [[META3]] = !{!"Simple C++ TBAA"}
+; CHECK: [[TBAA4]] = !{[[META5:![0-9]+]], [[META1]], i64 12}
+; CHECK: [[META5]] = !{!"_ZTS1v", [[META1]], i64 8, [[META1]], i64 12, [[META6:![0-9]+]], i64 16}
+; CHECK: [[META6]] = !{!"_ZTS1x", [[META1]], i64 0, [[META1]], i64 4}
+;.
diff --git a/llvm/test/Instrumentation/TypeSanitizer/basic_verify_outlined.ll b/llvm/test/Instrumentation/TypeSanitizer/basic_verify_outlined.ll
new file mode 100644
index 0000000..187a41e
--- /dev/null
+++ b/llvm/test/Instrumentation/TypeSanitizer/basic_verify_outlined.ll
@@ -0,0 +1,736 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals
+; Test basic type sanitizer instrumentation.
+;
+; RUN: opt -passes='tysan' -S -tysan-outline-instrumentation -tysan-verify-outlined-instrumentation -S %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+;.
+; CHECK: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 0, ptr @tysan.module_ctor, ptr null }]
+; CHECK: @__tysan_v1_Simple_20C_2b_2b_20TBAA = linkonce_odr constant { i64, i64, [16 x i8] } { i64 2, i64 0, [16 x i8] c"Simple C++ TBAA\00" }, comdat
+; CHECK: @__tysan_v1_omnipotent_20char = linkonce_odr constant { i64, i64, ptr, i64, [16 x i8] } { i64 2, i64 1, ptr @__tysan_v1_Simple_20C_2b_2b_20TBAA, i64 0, [16 x i8] c"omnipotent char\00" }, comdat
+; CHECK: @__tysan_v1_int = linkonce_odr constant { i64, i64, ptr, i64, [4 x i8] } { i64 2, i64 1, ptr @__tysan_v1_omnipotent_20char, i64 0, [4 x i8] c"int\00" }, comdat
+; CHECK: @__tysan_v1_int_o_0 = linkonce_odr constant { i64, ptr, ptr, i64 } { i64 1, ptr @__tysan_v1_int, ptr @__tysan_v1_int, i64 0 }, comdat
+; CHECK: @__tysan_shadow_memory_address = external global i64
+; CHECK: @__tysan_app_memory_mask = external global i64
+; CHECK: @__tysan_v1___ZTS1x = linkonce_odr constant { i64, i64, ptr, i64, ptr, i64, [7 x i8] } { i64 2, i64 2, ptr @__tysan_v1_int, i64 0, ptr @__tysan_v1_int, i64 4, [7 x i8] c"_ZTS1x\00" }, comdat
+; CHECK: @__tysan_v1___ZTS1v = linkonce_odr constant { i64, i64, ptr, i64, ptr, i64, ptr, i64, [7 x i8] } { i64 2, i64 3, ptr @__tysan_v1_int, i64 8, ptr @__tysan_v1_int, i64 12, ptr @__tysan_v1___ZTS1x, i64 16, [7 x i8] c"_ZTS1v\00" }, comdat
+; CHECK: @__tysan_v1___ZTS1v_o_12 = linkonce_odr constant { i64, ptr, ptr, i64 } { i64 1, ptr @__tysan_v1___ZTS1v, ptr @__tysan_v1_int, i64 12 }, comdat
+; CHECK: @llvm.used = appending global [8 x ptr] [ptr @tysan.module_ctor, ptr @__tysan_v1_Simple_20C_2b_2b_20TBAA, ptr @__tysan_v1_omnipotent_20char, ptr @__tysan_v1_int, ptr @__tysan_v1_int_o_0, ptr @__tysan_v1___ZTS1x, ptr @__tysan_v1___ZTS1v, ptr @__tysan_v1___ZTS1v_o_12], section "llvm.metadata"
+;.
+define i32 @test_load(ptr %a) sanitize_type {
+; CHECK-LABEL: @test_load(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[APP_MEM_MASK2:%.*]] = load i64, ptr @__tysan_app_memory_mask, align 8
+; CHECK-NEXT: [[SHADOW_BASE1:%.*]] = load i64, ptr @__tysan_shadow_memory_address, align 8
+; CHECK-NEXT: [[APP_PTR_MASKED:%.*]] = and i64 ptrtoint (ptr @__tysan_app_memory_mask to i64), [[APP_MEM_MASK2]]
+; CHECK-NEXT: [[APP_PTR_SHIFTED:%.*]] = shl i64 [[APP_PTR_MASKED]], 3
+; CHECK-NEXT: [[SHADOW_PTR_INT:%.*]] = add i64 [[APP_PTR_SHIFTED]], [[SHADOW_BASE1]]
+; CHECK-NEXT: [[SHADOW_PTR:%.*]] = inttoptr i64 [[SHADOW_PTR_INT]] to ptr
+; CHECK-NEXT: [[SHADOW_DESC:%.*]] = load ptr, ptr [[SHADOW_PTR]], align 8
+; CHECK-NEXT: [[BAD_DESC:%.*]] = icmp ne ptr [[SHADOW_DESC]], null
+; CHECK-NEXT: br i1 [[BAD_DESC]], label [[TMP0:%.*]], label [[TMP42:%.*]], !prof [[PROF0:![0-9]+]]
+; CHECK: 0:
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq ptr [[SHADOW_DESC]], null
+; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP40:%.*]]
+; CHECK: 2:
+; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[SHADOW_PTR_INT]], 8
+; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr
+; CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
+; CHECK-NEXT: [[TMP6:%.*]] = icmp ne ptr [[TMP5]], null
+; CHECK-NEXT: [[TMP7:%.*]] = or i1 false, [[TMP6]]
+; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[SHADOW_PTR_INT]], 16
+; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
+; CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8
+; CHECK-NEXT: [[TMP11:%.*]] = icmp ne ptr [[TMP10]], null
+; CHECK-NEXT: [[TMP12:%.*]] = or i1 [[TMP7]], [[TMP11]]
+; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[SHADOW_PTR_INT]], 24
+; CHECK-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr
+; CHECK-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP14]], align 8
+; CHECK-NEXT: [[TMP16:%.*]] = icmp ne ptr [[TMP15]], null
+; CHECK-NEXT: [[TMP17:%.*]] = or i1 [[TMP12]], [[TMP16]]
+; CHECK-NEXT: [[TMP18:%.*]] = add i64 [[SHADOW_PTR_INT]], 32
+; CHECK-NEXT: [[TMP19:%.*]] = inttoptr i64 [[TMP18]] to ptr
+; CHECK-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 8
+; CHECK-NEXT: [[TMP21:%.*]] = icmp ne ptr [[TMP20]], null
+; CHECK-NEXT: [[TMP22:%.*]] = or i1 [[TMP17]], [[TMP21]]
+; CHECK-NEXT: [[TMP23:%.*]] = add i64 [[SHADOW_PTR_INT]], 40
+; CHECK-NEXT: [[TMP24:%.*]] = inttoptr i64 [[TMP23]] to ptr
+; CHECK-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP24]], align 8
+; CHECK-NEXT: [[TMP26:%.*]] = icmp ne ptr [[TMP25]], null
+; CHECK-NEXT: [[TMP27:%.*]] = or i1 [[TMP22]], [[TMP26]]
+; CHECK-NEXT: [[TMP28:%.*]] = add i64 [[SHADOW_PTR_INT]], 48
+; CHECK-NEXT: [[TMP29:%.*]] = inttoptr i64 [[TMP28]] to ptr
+; CHECK-NEXT: [[TMP30:%.*]] = load ptr, ptr [[TMP29]], align 8
+; CHECK-NEXT: [[TMP31:%.*]] = icmp ne ptr [[TMP30]], null
+; CHECK-NEXT: [[TMP32:%.*]] = or i1 [[TMP27]], [[TMP31]]
+; CHECK-NEXT: [[TMP33:%.*]] = add i64 [[SHADOW_PTR_INT]], 56
+; CHECK-NEXT: [[TMP34:%.*]] = inttoptr i64 [[TMP33]] to ptr
+; CHECK-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP34]], align 8
+; CHECK-NEXT: [[TMP36:%.*]] = icmp ne ptr [[TMP35]], null
+; CHECK-NEXT: [[TMP37:%.*]] = or i1 [[TMP32]], [[TMP36]]
+; CHECK-NEXT: br i1 [[TMP37]], label [[TMP38:%.*]], label [[TMP39:%.*]], !prof [[PROF0]]
+; CHECK: 38:
+; CHECK-NEXT: call void @__tysan_check(ptr @__tysan_app_memory_mask, i32 8, ptr null, i32 1)
+; CHECK-NEXT: br label [[TMP39]]
+; CHECK: 39:
+; CHECK-NEXT: store ptr null, ptr [[SHADOW_PTR]], align 8
+; CHECK-NEXT: [[SHADOW_BYTE_1_OFFSET:%.*]] = add i64 [[SHADOW_PTR_INT]], 8
+; CHECK-NEXT: [[SHADOW_BYTE_1_PTR:%.*]] = inttoptr i64 [[SHADOW_BYTE_1_OFFSET]] to ptr
+; CHECK-NEXT: store ptr inttoptr (i64 -1 to ptr), ptr [[SHADOW_BYTE_1_PTR]], align 8
+; CHECK-NEXT: [[SHADOW_BYTE_2_OFFSET:%.*]] = add i64 [[SHADOW_PTR_INT]], 16
+; CHECK-NEXT: [[SHADOW_BYTE_2_PTR:%.*]] = inttoptr i64 [[SHADOW_BYTE_2_OFFSET]] to ptr
+; CHECK-NEXT: store ptr inttoptr (i64 -2 to ptr), ptr [[SHADOW_BYTE_2_PTR]], align 8
+; CHECK-NEXT: [[SHADOW_BYTE_3_OFFSET:%.*]] = add i64 [[SHADOW_PTR_INT]], 24
+; CHECK-NEXT: [[SHADOW_BYTE_3_PTR:%.*]] = inttoptr i64 [[SHADOW_BYTE_3_OFFSET]] to ptr
+; CHECK-NEXT: store ptr inttoptr (i64 -3 to ptr), ptr [[SHADOW_BYTE_3_PTR]], align 8
+; CHECK-NEXT: [[SHADOW_BYTE_4_OFFSET:%.*]] = add i64 [[SHADOW_PTR_INT]], 32
+; CHECK-NEXT: [[SHADOW_BYTE_4_PTR:%.*]] = inttoptr i64 [[SHADOW_BYTE_4_OFFSET]] to ptr
+; CHECK-NEXT: store ptr inttoptr (i64 -4 to ptr), ptr [[SHADOW_BYTE_4_PTR]], align 8
+; CHECK-NEXT: [[SHADOW_BYTE_5_OFFSET:%.*]] = add i64 [[SHADOW_PTR_INT]], 40
+; CHECK-NEXT: [[SHADOW_BYTE_5_PTR:%.*]] = inttoptr i64 [[SHADOW_BYTE_5_OFFSET]] to ptr
+; CHECK-NEXT: store ptr inttoptr (i64 -5 to ptr), ptr [[SHADOW_BYTE_5_PTR]], align 8
+; CHECK-NEXT: [[SHADOW_BYTE_6_OFFSET:%.*]] = add i64 [[SHADOW_PTR_INT]], 48
+; CHECK-NEXT: [[SHADOW_BYTE_6_PTR:%.*]] = inttoptr i64 [[SHADOW_BYTE_6_OFFSET]] to ptr
+; CHECK-NEXT: store ptr inttoptr (i64 -6 to ptr), ptr [[SHADOW_BYTE_6_PTR]], align 8
+; CHECK-NEXT: [[SHADOW_BYTE_7_OFFSET:%.*]] = add i64 [[SHADOW_PTR_INT]], 56
+; CHECK-NEXT: [[SHADOW_BYTE_7_PTR:%.*]] = inttoptr i64 [[SHADOW_BYTE_7_OFFSET]] to ptr
+; CHECK-NEXT: store ptr inttoptr (i64 -7 to ptr), ptr [[SHADOW_BYTE_7_PTR]], align 8
+; CHECK-NEXT: br label [[TMP41:%.*]]
+; CHECK: 40:
+; CHECK-NEXT: call void @__tysan_check(ptr @__tysan_app_memory_mask, i32 8, ptr null, i32 1)
+; CHECK-NEXT: br label [[TMP41]]
+; CHECK: 41:
+; CHECK-NEXT: br label [[TMP87:%.*]]
+; CHECK: 42:
+; CHECK-NEXT: [[TMP43:%.*]] = add i64 [[SHADOW_PTR_INT]], 8
+; CHECK-NEXT: [[TMP44:%.*]] = inttoptr i64 [[TMP43]] to ptr
+; CHECK-NEXT: [[TMP45:%.*]] = load ptr, ptr [[TMP44]], align 8
+; CHECK-NEXT: [[TMP46:%.*]] = ptrtoint ptr [[TMP45]] to i64
+; CHECK-NEXT: [[TMP47:%.*]] = icmp sge i64 [[TMP46]], 0
+; CHECK-NEXT: [[TMP48:%.*]] = or i1 false, [[TMP47]]
+; CHECK-NEXT: [[TMP49:%.*]] = add i64 [[SHADOW_PTR_INT]], 16
+; CHECK-NEXT: [[TMP50:%.*]] = inttoptr i64 [[TMP49]] to ptr
+; CHECK-NEXT: [[TMP51:%.*]] = load ptr, ptr [[TMP50]], align 8
+; CHECK-NEXT: [[TMP52:%.*]] = ptrtoint ptr [[TMP51]] to i64
+; CHECK-NEXT: [[TMP53:%.*]] = icmp sge i64 [[TMP52]], 0
+; CHECK-NEXT: [[TMP54:%.*]] = or i1 [[TMP48]], [[TMP53]]
+; CHECK-NEXT: [[TMP55:%.*]] = add i64 [[SHADOW_PTR_INT]], 24
+; CHECK-NEXT: [[TMP56:%.*]] = inttoptr i64 [[TMP55]] to ptr
+; CHECK-NEXT: [[TMP57:%.*]] = load ptr, ptr [[TMP56]], align 8
+; CHECK-NEXT: [[TMP58:%.*]] = ptrtoint ptr [[TMP57]] to i64
+; CHECK-NEXT: [[TMP59:%.*]] = icmp sge i64 [[TMP58]], 0
+; CHECK-NEXT: [[TMP60:%.*]] = or i1 [[TMP54]], [[TMP59]]
+; CHECK-NEXT: [[TMP61:%.*]] = add i64 [[SHADOW_PTR_INT]], 32
+; CHECK-NEXT: [[TMP62:%.*]] = inttoptr i64 [[TMP61]] to ptr
+; CHECK-NEXT: [[TMP63:%.*]] = load ptr, ptr [[TMP62]], align 8
+; CHECK-NEXT: [[TMP64:%.*]] = ptrtoint ptr [[TMP63]] to i64
+; CHECK-NEXT: [[TMP65:%.*]] = icmp sge i64 [[TMP64]], 0
+; CHECK-NEXT: [[TMP66:%.*]] = or i1 [[TMP60]], [[TMP65]]
+; CHECK-NEXT: [[TMP67:%.*]] = add i64 [[SHADOW_PTR_INT]], 40
+; CHECK-NEXT: [[TMP68:%.*]] = inttoptr i64 [[TMP67]] to ptr
+; CHECK-NEXT: [[TMP69:%.*]] = load ptr, ptr [[TMP68]], align 8
+; CHECK-NEXT: [[TMP70:%.*]] = ptrtoint ptr [[TMP69]] to i64
+; CHECK-NEXT: [[TMP71:%.*]] = icmp sge i64 [[TMP70]], 0
+; CHECK-NEXT: [[TMP72:%.*]] = or i1 [[TMP66]], [[TMP71]]
+; CHECK-NEXT: [[TMP73:%.*]] = add i64 [[SHADOW_PTR_INT]], 48
+; CHECK-NEXT: [[TMP74:%.*]] = inttoptr i64 [[TMP73]] to ptr
+; CHECK-NEXT: [[TMP75:%.*]] = load ptr, ptr [[TMP74]], align 8
+; CHECK-NEXT: [[TMP76:%.*]] = ptrtoint ptr [[TMP75]] to i64
+; CHECK-NEXT: [[TMP77:%.*]] = icmp sge i64 [[TMP76]], 0
+; CHECK-NEXT: [[TMP78:%.*]] = or i1 [[TMP72]], [[TMP77]]
+; CHECK-NEXT: [[TMP79:%.*]] = add i64 [[SHADOW_PTR_INT]], 56
+; CHECK-NEXT: [[TMP80:%.*]] = inttoptr i64 [[TMP79]] to ptr
+; CHECK-NEXT: [[TMP81:%.*]] = load ptr, ptr [[TMP80]], align 8
+; CHECK-NEXT: [[TMP82:%.*]] = ptrtoint ptr [[TMP81]] to i64
+; CHECK-NEXT: [[TMP83:%.*]] = icmp sge i64 [[TMP82]], 0
+; CHECK-NEXT: [[TMP84:%.*]] = or i1 [[TMP78]], [[TMP83]]
+; CHECK-NEXT: br i1 [[TMP84]], label [[TMP85:%.*]], label [[TMP86:%.*]], !prof [[PROF0]]
+; CHECK: 85:
+; CHECK-NEXT: call void @__tysan_check(ptr @__tysan_app_memory_mask, i32 8, ptr null, i32 1)
+; CHECK-NEXT: br label [[TMP86]]
+; CHECK: 86:
+; CHECK-NEXT: br label [[TMP87]]
+; CHECK: 87:
+; CHECK-NEXT: [[APP_MEM_MASK:%.*]] = load i64, ptr @__tysan_app_memory_mask, align 8
+; CHECK-NEXT: [[APP_PTR_MASKED3:%.*]] = and i64 ptrtoint (ptr @__tysan_shadow_memory_address to i64), [[APP_MEM_MASK2]]
+; CHECK-NEXT: [[APP_PTR_SHIFTED4:%.*]] = shl i64 [[APP_PTR_MASKED3]], 3
+; CHECK-NEXT: [[SHADOW_PTR_INT5:%.*]] = add i64 [[APP_PTR_SHIFTED4]], [[SHADOW_BASE1]]
+; CHECK-NEXT: [[SHADOW_PTR6:%.*]] = inttoptr i64 [[SHADOW_PTR_INT5]] to ptr
+; CHECK-NEXT: [[SHADOW_DESC7:%.*]] = load ptr, ptr [[SHADOW_PTR6]], align 8
+; CHECK-NEXT: [[BAD_DESC8:%.*]] = icmp ne ptr [[SHADOW_DESC7]], null
+; CHECK-NEXT: br i1 [[BAD_DESC8]], label [[TMP88:%.*]], label [[TMP130:%.*]], !prof [[PROF0]]
+; CHECK: 88:
+; CHECK-NEXT: [[TMP89:%.*]] = icmp eq ptr [[SHADOW_DESC7]], null
+; CHECK-NEXT: br i1 [[TMP89]], label [[TMP90:%.*]], label [[TMP128:%.*]]
+; CHECK: 90:
+; CHECK-NEXT: [[TMP91:%.*]] = add i64 [[SHADOW_PTR_INT5]], 8
+; CHECK-NEXT: [[TMP92:%.*]] = inttoptr i64 [[TMP91]] to ptr
+; CHECK-NEXT: [[TMP93:%.*]] = load ptr, ptr [[TMP92]], align 8
+; CHECK-NEXT: [[TMP94:%.*]] = icmp ne ptr [[TMP93]], null
+; CHECK-NEXT: [[TMP95:%.*]] = or i1 false, [[TMP94]]
+; CHECK-NEXT: [[TMP96:%.*]] = add i64 [[SHADOW_PTR_INT5]], 16
+; CHECK-NEXT: [[TMP97:%.*]] = inttoptr i64 [[TMP96]] to ptr
+; CHECK-NEXT: [[TMP98:%.*]] = load ptr, ptr [[TMP97]], align 8
+; CHECK-NEXT: [[TMP99:%.*]] = icmp ne ptr [[TMP98]], null
+; CHECK-NEXT: [[TMP100:%.*]] = or i1 [[TMP95]], [[TMP99]]
+; CHECK-NEXT: [[TMP101:%.*]] = add i64 [[SHADOW_PTR_INT5]], 24
+; CHECK-NEXT: [[TMP102:%.*]] = inttoptr i64 [[TMP101]] to ptr
+; CHECK-NEXT: [[TMP103:%.*]] = load ptr, ptr [[TMP102]], align 8
+; CHECK-NEXT: [[TMP104:%.*]] = icmp ne ptr [[TMP103]], null
+; CHECK-NEXT: [[TMP105:%.*]] = or i1 [[TMP100]], [[TMP104]]
+; CHECK-NEXT: [[TMP106:%.*]] = add i64 [[SHADOW_PTR_INT5]], 32
+; CHECK-NEXT: [[TMP107:%.*]] = inttoptr i64 [[TMP106]] to ptr
+; CHECK-NEXT: [[TMP108:%.*]] = load ptr, ptr [[TMP107]], align 8
+; CHECK-NEXT: [[TMP109:%.*]] = icmp ne ptr [[TMP108]], null
+; CHECK-NEXT: [[TMP110:%.*]] = or i1 [[TMP105]], [[TMP109]]
+; CHECK-NEXT: [[TMP111:%.*]] = add i64 [[SHADOW_PTR_INT5]], 40
+; CHECK-NEXT: [[TMP112:%.*]] = inttoptr i64 [[TMP111]] to ptr
+; CHECK-NEXT: [[TMP113:%.*]] = load ptr, ptr [[TMP112]], align 8
+; CHECK-NEXT: [[TMP114:%.*]] = icmp ne ptr [[TMP113]], null
+; CHECK-NEXT: [[TMP115:%.*]] = or i1 [[TMP110]], [[TMP114]]
+; CHECK-NEXT: [[TMP116:%.*]] = add i64 [[SHADOW_PTR_INT5]], 48
+; CHECK-NEXT: [[TMP117:%.*]] = inttoptr i64 [[TMP116]] to ptr
+; CHECK-NEXT: [[TMP118:%.*]] = load ptr, ptr [[TMP117]], align 8
+; CHECK-NEXT: [[TMP119:%.*]] = icmp ne ptr [[TMP118]], null
+; CHECK-NEXT: [[TMP120:%.*]] = or i1 [[TMP115]], [[TMP119]]
+; CHECK-NEXT: [[TMP121:%.*]] = add i64 [[SHADOW_PTR_INT5]], 56
+; CHECK-NEXT: [[TMP122:%.*]] = inttoptr i64 [[TMP121]] to ptr
+; CHECK-NEXT: [[TMP123:%.*]] = load ptr, ptr [[TMP122]], align 8
+; CHECK-NEXT: [[TMP124:%.*]] = icmp ne ptr [[TMP123]], null
+; CHECK-NEXT: [[TMP125:%.*]] = or i1 [[TMP120]], [[TMP124]]
+; CHECK-NEXT: br i1 [[TMP125]], label [[TMP126:%.*]], label [[TMP127:%.*]], !prof [[PROF0]]
+; CHECK: 126:
+; CHECK-NEXT: call void @__tysan_check(ptr @__tysan_shadow_memory_address, i32 8, ptr null, i32 1)
+; CHECK-NEXT: br label [[TMP127]]
+; CHECK: 127:
+; CHECK-NEXT: store ptr null, ptr [[SHADOW_PTR6]], align 8
+; CHECK-NEXT: [[SHADOW_BYTE_1_OFFSET9:%.*]] = add i64 [[SHADOW_PTR_INT5]], 8
+; CHECK-NEXT: [[SHADOW_BYTE_1_PTR10:%.*]] = inttoptr i64 [[SHADOW_BYTE_1_OFFSET9]] to ptr
+; CHECK-NEXT: store ptr inttoptr (i64 -1 to ptr), ptr [[SHADOW_BYTE_1_PTR10]], align 8
+; CHECK-NEXT: [[SHADOW_BYTE_2_OFFSET11:%.*]] = add i64 [[SHADOW_PTR_INT5]], 16
+; CHECK-NEXT: [[SHADOW_BYTE_2_PTR12:%.*]] = inttoptr i64 [[SHADOW_BYTE_2_OFFSET11]] to ptr
+; CHECK-NEXT: store ptr inttoptr (i64 -2 to ptr), ptr [[SHADOW_BYTE_2_PTR12]], align 8
+; CHECK-NEXT: [[SHADOW_BYTE_3_OFFSET13:%.*]] = add i64 [[SHADOW_PTR_INT5]], 24
+; CHECK-NEXT: [[SHADOW_BYTE_3_PTR14:%.*]] = inttoptr i64 [[SHADOW_BYTE_3_OFFSET13]] to ptr
+; CHECK-NEXT: store ptr inttoptr (i64 -3 to ptr), ptr [[SHADOW_BYTE_3_PTR14]], align 8
+; CHECK-NEXT: [[SHADOW_BYTE_4_OFFSET15:%.*]] = add i64 [[SHADOW_PTR_INT5]], 32
+; CHECK-NEXT: [[SHADOW_BYTE_4_PTR16:%.*]] = inttoptr i64 [[SHADOW_BYTE_4_OFFSET15]] to ptr
+; CHECK-NEXT: store ptr inttoptr (i64 -4 to ptr), ptr [[SHADOW_BYTE_4_PTR16]], align 8
+; CHECK-NEXT: [[SHADOW_BYTE_5_OFFSET17:%.*]] = add i64 [[SHADOW_PTR_INT5]], 40
+; CHECK-NEXT: [[SHADOW_BYTE_5_PTR18:%.*]] = inttoptr i64 [[SHADOW_BYTE_5_OFFSET17]] to ptr
+; CHECK-NEXT: store ptr inttoptr (i64 -5 to ptr), ptr [[SHADOW_BYTE_5_PTR18]], align 8
+; CHECK-NEXT: [[SHADOW_BYTE_6_OFFSET19:%.*]] = add i64 [[SHADOW_PTR_INT5]], 48
+; CHECK-NEXT: [[SHADOW_BYTE_6_PTR20:%.*]] = inttoptr i64 [[SHADOW_BYTE_6_OFFSET19]] to ptr
+; CHECK-NEXT: store ptr inttoptr (i64 -6 to ptr), ptr [[SHADOW_BYTE_6_PTR20]], align 8
+; CHECK-NEXT: [[SHADOW_BYTE_7_OFFSET21:%.*]] = add i64 [[SHADOW_PTR_INT5]], 56
+; CHECK-NEXT: [[SHADOW_BYTE_7_PTR22:%.*]] = inttoptr i64 [[SHADOW_BYTE_7_OFFSET21]] to ptr
+; CHECK-NEXT: store ptr inttoptr (i64 -7 to ptr), ptr [[SHADOW_BYTE_7_PTR22]], align 8
+; CHECK-NEXT: br label [[TMP129:%.*]]
+; CHECK: 128:
+; CHECK-NEXT: call void @__tysan_check(ptr @__tysan_shadow_memory_address, i32 8, ptr null, i32 1)
+; CHECK-NEXT: br label [[TMP129]]
+; CHECK: 129:
+; CHECK-NEXT: br label [[TMP175:%.*]]
+; CHECK: 130:
+; CHECK-NEXT: [[TMP131:%.*]] = add i64 [[SHADOW_PTR_INT5]], 8
+; CHECK-NEXT: [[TMP132:%.*]] = inttoptr i64 [[TMP131]] to ptr
+; CHECK-NEXT: [[TMP133:%.*]] = load ptr, ptr [[TMP132]], align 8
+; CHECK-NEXT: [[TMP134:%.*]] = ptrtoint ptr [[TMP133]] to i64
+; CHECK-NEXT: [[TMP135:%.*]] = icmp sge i64 [[TMP134]], 0
+; CHECK-NEXT: [[TMP136:%.*]] = or i1 false, [[TMP135]]
+; CHECK-NEXT: [[TMP137:%.*]] = add i64 [[SHADOW_PTR_INT5]], 16
+; CHECK-NEXT: [[TMP138:%.*]] = inttoptr i64 [[TMP137]] to ptr
+; CHECK-NEXT: [[TMP139:%.*]] = load ptr, ptr [[TMP138]], align 8
+; CHECK-NEXT: [[TMP140:%.*]] = ptrtoint ptr [[TMP139]] to i64
+; CHECK-NEXT: [[TMP141:%.*]] = icmp sge i64 [[TMP140]], 0
+; CHECK-NEXT: [[TMP142:%.*]] = or i1 [[TMP136]], [[TMP141]]
+; CHECK-NEXT: [[TMP143:%.*]] = add i64 [[SHADOW_PTR_INT5]], 24
+; CHECK-NEXT: [[TMP144:%.*]] = inttoptr i64 [[TMP143]] to ptr
+; CHECK-NEXT: [[TMP145:%.*]] = load ptr, ptr [[TMP144]], align 8
+; CHECK-NEXT: [[TMP146:%.*]] = ptrtoint ptr [[TMP145]] to i64
+; CHECK-NEXT: [[TMP147:%.*]] = icmp sge i64 [[TMP146]], 0
+; CHECK-NEXT: [[TMP148:%.*]] = or i1 [[TMP142]], [[TMP147]]
+; CHECK-NEXT: [[TMP149:%.*]] = add i64 [[SHADOW_PTR_INT5]], 32
+; CHECK-NEXT: [[TMP150:%.*]] = inttoptr i64 [[TMP149]] to ptr
+; CHECK-NEXT: [[TMP151:%.*]] = load ptr, ptr [[TMP150]], align 8
+; CHECK-NEXT: [[TMP152:%.*]] = ptrtoint ptr [[TMP151]] to i64
+; CHECK-NEXT: [[TMP153:%.*]] = icmp sge i64 [[TMP152]], 0
+; CHECK-NEXT: [[TMP154:%.*]] = or i1 [[TMP148]], [[TMP153]]
+; CHECK-NEXT: [[TMP155:%.*]] = add i64 [[SHADOW_PTR_INT5]], 40
+; CHECK-NEXT: [[TMP156:%.*]] = inttoptr i64 [[TMP155]] to ptr
+; CHECK-NEXT: [[TMP157:%.*]] = load ptr, ptr [[TMP156]], align 8
+; CHECK-NEXT: [[TMP158:%.*]] = ptrtoint ptr [[TMP157]] to i64
+; CHECK-NEXT: [[TMP159:%.*]] = icmp sge i64 [[TMP158]], 0
+; CHECK-NEXT: [[TMP160:%.*]] = or i1 [[TMP154]], [[TMP159]]
+; CHECK-NEXT: [[TMP161:%.*]] = add i64 [[SHADOW_PTR_INT5]], 48
+; CHECK-NEXT: [[TMP162:%.*]] = inttoptr i64 [[TMP161]] to ptr
+; CHECK-NEXT: [[TMP163:%.*]] = load ptr, ptr [[TMP162]], align 8
+; CHECK-NEXT: [[TMP164:%.*]] = ptrtoint ptr [[TMP163]] to i64
+; CHECK-NEXT: [[TMP165:%.*]] = icmp sge i64 [[TMP164]], 0
+; CHECK-NEXT: [[TMP166:%.*]] = or i1 [[TMP160]], [[TMP165]]
+; CHECK-NEXT: [[TMP167:%.*]] = add i64 [[SHADOW_PTR_INT5]], 56
+; CHECK-NEXT: [[TMP168:%.*]] = inttoptr i64 [[TMP167]] to ptr
+; CHECK-NEXT: [[TMP169:%.*]] = load ptr, ptr [[TMP168]], align 8
+; CHECK-NEXT: [[TMP170:%.*]] = ptrtoint ptr [[TMP169]] to i64
+; CHECK-NEXT: [[TMP171:%.*]] = icmp sge i64 [[TMP170]], 0
+; CHECK-NEXT: [[TMP172:%.*]] = or i1 [[TMP166]], [[TMP171]]
+; CHECK-NEXT: br i1 [[TMP172]], label [[TMP173:%.*]], label [[TMP174:%.*]], !prof [[PROF0]]
+; CHECK: 173:
+; CHECK-NEXT: call void @__tysan_check(ptr @__tysan_shadow_memory_address, i32 8, ptr null, i32 1)
+; CHECK-NEXT: br label [[TMP174]]
+; CHECK: 174:
+; CHECK-NEXT: br label [[TMP175]]
+; CHECK: 175:
+; CHECK-NEXT: [[SHADOW_BASE:%.*]] = load i64, ptr @__tysan_shadow_memory_address, align 8
+; CHECK-NEXT: call void @__tysan_instrument_with_shadow_update(ptr [[A:%.*]], ptr @__tysan_v1_int_o_0, i1 true, i64 4, i32 1)
+; CHECK-NEXT: [[APP_PTR_INT:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[APP_PTR_MASKED23:%.*]] = and i64 [[APP_PTR_INT]], [[APP_MEM_MASK2]]
+; CHECK-NEXT: [[APP_PTR_SHIFTED24:%.*]] = shl i64 [[APP_PTR_MASKED23]], 3
+; CHECK-NEXT: [[SHADOW_PTR_INT25:%.*]] = add i64 [[APP_PTR_SHIFTED24]], [[SHADOW_BASE1]]
+; CHECK-NEXT: [[SHADOW_PTR26:%.*]] = inttoptr i64 [[SHADOW_PTR_INT25]] to ptr
+; CHECK-NEXT: [[SHADOW_DESC27:%.*]] = load ptr, ptr [[SHADOW_PTR26]], align 8
+; CHECK-NEXT: [[BAD_DESC28:%.*]] = icmp ne ptr [[SHADOW_DESC27]], @__tysan_v1_int_o_0
+; CHECK-NEXT: br i1 [[BAD_DESC28]], label [[TMP176:%.*]], label [[TMP198:%.*]], !prof [[PROF0]]
+; CHECK: 176:
+; CHECK-NEXT: [[TMP177:%.*]] = icmp eq ptr [[SHADOW_DESC27]], null
+; CHECK-NEXT: br i1 [[TMP177]], label [[TMP178:%.*]], label [[TMP196:%.*]]
+; CHECK: 178:
+; CHECK-NEXT: [[TMP179:%.*]] = add i64 [[SHADOW_PTR_INT25]], 8
+; CHECK-NEXT: [[TMP180:%.*]] = inttoptr i64 [[TMP179]] to ptr
+; CHECK-NEXT: [[TMP181:%.*]] = load ptr, ptr [[TMP180]], align 8
+; CHECK-NEXT: [[TMP182:%.*]] = icmp ne ptr [[TMP181]], null
+; CHECK-NEXT: [[TMP183:%.*]] = or i1 false, [[TMP182]]
+; CHECK-NEXT: [[TMP184:%.*]] = add i64 [[SHADOW_PTR_INT25]], 16
+; CHECK-NEXT: [[TMP185:%.*]] = inttoptr i64 [[TMP184]] to ptr
+; CHECK-NEXT: [[TMP186:%.*]] = load ptr, ptr [[TMP185]], align 8
+; CHECK-NEXT: [[TMP187:%.*]] = icmp ne ptr [[TMP186]], null
+; CHECK-NEXT: [[TMP188:%.*]] = or i1 [[TMP183]], [[TMP187]]
+; CHECK-NEXT: [[TMP189:%.*]] = add i64 [[SHADOW_PTR_INT25]], 24
+; CHECK-NEXT: [[TMP190:%.*]] = inttoptr i64 [[TMP189]] to ptr
+; CHECK-NEXT: [[TMP191:%.*]] = load ptr, ptr [[TMP190]], align 8
+; CHECK-NEXT: [[TMP192:%.*]] = icmp ne ptr [[TMP191]], null
+; CHECK-NEXT: [[TMP193:%.*]] = or i1 [[TMP188]], [[TMP192]]
+; CHECK-NEXT: br i1 [[TMP193]], label [[TMP194:%.*]], label [[TMP195:%.*]], !prof [[PROF0]]
+; CHECK: 194:
+; CHECK-NEXT: call void @__tysan_check(ptr [[A]], i32 4, ptr @__tysan_v1_int_o_0, i32 1)
+; CHECK-NEXT: br label [[TMP195]]
+; CHECK: 195:
+; CHECK-NEXT: store ptr @__tysan_v1_int_o_0, ptr [[SHADOW_PTR26]], align 8
+; CHECK-NEXT: [[SHADOW_BYTE_1_OFFSET29:%.*]] = add i64 [[SHADOW_PTR_INT25]], 8
+; CHECK-NEXT: [[SHADOW_BYTE_1_PTR30:%.*]] = inttoptr i64 [[SHADOW_BYTE_1_OFFSET29]] to ptr
+; CHECK-NEXT: store ptr inttoptr (i64 -1 to ptr), ptr [[SHADOW_BYTE_1_PTR30]], align 8
+; CHECK-NEXT: [[SHADOW_BYTE_2_OFFSET31:%.*]] = add i64 [[SHADOW_PTR_INT25]], 16
+; CHECK-NEXT: [[SHADOW_BYTE_2_PTR32:%.*]] = inttoptr i64 [[SHADOW_BYTE_2_OFFSET31]] to ptr
+; CHECK-NEXT: store ptr inttoptr (i64 -2 to ptr), ptr [[SHADOW_BYTE_2_PTR32]], align 8
+; CHECK-NEXT: [[SHADOW_BYTE_3_OFFSET33:%.*]] = add i64 [[SHADOW_PTR_INT25]], 24
+; CHECK-NEXT: [[SHADOW_BYTE_3_PTR34:%.*]] = inttoptr i64 [[SHADOW_BYTE_3_OFFSET33]] to ptr
+; CHECK-NEXT: store ptr inttoptr (i64 -3 to ptr), ptr [[SHADOW_BYTE_3_PTR34]], align 8
+; CHECK-NEXT: br label [[TMP197:%.*]]
+; CHECK: 196:
+; CHECK-NEXT: call void @__tysan_check(ptr [[A]], i32 4, ptr @__tysan_v1_int_o_0, i32 1)
+; CHECK-NEXT: br label [[TMP197]]
+; CHECK: 197:
+; CHECK-NEXT: br label [[TMP219:%.*]]
+; CHECK: 198:
+; CHECK-NEXT: [[TMP199:%.*]] = add i64 [[SHADOW_PTR_INT25]], 8
+; CHECK-NEXT: [[TMP200:%.*]] = inttoptr i64 [[TMP199]] to ptr
+; CHECK-NEXT: [[TMP201:%.*]] = load ptr, ptr [[TMP200]], align 8
+; CHECK-NEXT: [[TMP202:%.*]] = ptrtoint ptr [[TMP201]] to i64
+; CHECK-NEXT: [[TMP203:%.*]] = icmp sge i64 [[TMP202]], 0
+; CHECK-NEXT: [[TMP204:%.*]] = or i1 false, [[TMP203]]
+; CHECK-NEXT: [[TMP205:%.*]] = add i64 [[SHADOW_PTR_INT25]], 16
+; CHECK-NEXT: [[TMP206:%.*]] = inttoptr i64 [[TMP205]] to ptr
+; CHECK-NEXT: [[TMP207:%.*]] = load ptr, ptr [[TMP206]], align 8
+; CHECK-NEXT: [[TMP208:%.*]] = ptrtoint ptr [[TMP207]] to i64
+; CHECK-NEXT: [[TMP209:%.*]] = icmp sge i64 [[TMP208]], 0
+; CHECK-NEXT: [[TMP210:%.*]] = or i1 [[TMP204]], [[TMP209]]
+; CHECK-NEXT: [[TMP211:%.*]] = add i64 [[SHADOW_PTR_INT25]], 24
+; CHECK-NEXT: [[TMP212:%.*]] = inttoptr i64 [[TMP211]] to ptr
+; CHECK-NEXT: [[TMP213:%.*]] = load ptr, ptr [[TMP212]], align 8
+; CHECK-NEXT: [[TMP214:%.*]] = ptrtoint ptr [[TMP213]] to i64
+; CHECK-NEXT: [[TMP215:%.*]] = icmp sge i64 [[TMP214]], 0
+; CHECK-NEXT: [[TMP216:%.*]] = or i1 [[TMP210]], [[TMP215]]
+; CHECK-NEXT: br i1 [[TMP216]], label [[TMP217:%.*]], label [[TMP218:%.*]], !prof [[PROF0]]
+; CHECK: 217:
+; CHECK-NEXT: call void @__tysan_check(ptr [[A]], i32 4, ptr @__tysan_v1_int_o_0, i32 1)
+; CHECK-NEXT: br label [[TMP218]]
+; CHECK: 218:
+; CHECK-NEXT: br label [[TMP219]]
+; CHECK: 219:
+; CHECK-NEXT: [[WAA:%.*]] = load i32, ptr [[A]], align 4, !tbaa [[TBAA1:![0-9]+]]
+; CHECK-NEXT: ret i32 [[WAA]]
+;
+entry:
+ %WAA = load i32, ptr %a, align 4, !tbaa !3
+ ret i32 %WAA
+}
+
+define void @test_store(ptr %a) sanitize_type {
+; CHECK-LABEL: @test_store(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[APP_MEM_MASK2:%.*]] = load i64, ptr @__tysan_app_memory_mask, align 8
+; CHECK-NEXT: [[SHADOW_BASE1:%.*]] = load i64, ptr @__tysan_shadow_memory_address, align 8
+; CHECK-NEXT: [[APP_PTR_MASKED:%.*]] = and i64 ptrtoint (ptr @__tysan_app_memory_mask to i64), [[APP_MEM_MASK2]]
+; CHECK-NEXT: [[APP_PTR_SHIFTED:%.*]] = shl i64 [[APP_PTR_MASKED]], 3
+; CHECK-NEXT: [[SHADOW_PTR_INT:%.*]] = add i64 [[APP_PTR_SHIFTED]], [[SHADOW_BASE1]]
+; CHECK-NEXT: [[SHADOW_PTR:%.*]] = inttoptr i64 [[SHADOW_PTR_INT]] to ptr
+; CHECK-NEXT: [[SHADOW_DESC:%.*]] = load ptr, ptr [[SHADOW_PTR]], align 8
+; CHECK-NEXT: [[BAD_DESC:%.*]] = icmp ne ptr [[SHADOW_DESC]], null
+; CHECK-NEXT: br i1 [[BAD_DESC]], label [[TMP0:%.*]], label [[TMP42:%.*]], !prof [[PROF0]]
+; CHECK: 0:
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq ptr [[SHADOW_DESC]], null
+; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP40:%.*]]
+; CHECK: 2:
+; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[SHADOW_PTR_INT]], 8
+; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr
+; CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
+; CHECK-NEXT: [[TMP6:%.*]] = icmp ne ptr [[TMP5]], null
+; CHECK-NEXT: [[TMP7:%.*]] = or i1 false, [[TMP6]]
+; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[SHADOW_PTR_INT]], 16
+; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
+; CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8
+; CHECK-NEXT: [[TMP11:%.*]] = icmp ne ptr [[TMP10]], null
+; CHECK-NEXT: [[TMP12:%.*]] = or i1 [[TMP7]], [[TMP11]]
+; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[SHADOW_PTR_INT]], 24
+; CHECK-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr
+; CHECK-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP14]], align 8
+; CHECK-NEXT: [[TMP16:%.*]] = icmp ne ptr [[TMP15]], null
+; CHECK-NEXT: [[TMP17:%.*]] = or i1 [[TMP12]], [[TMP16]]
+; CHECK-NEXT: [[TMP18:%.*]] = add i64 [[SHADOW_PTR_INT]], 32
+; CHECK-NEXT: [[TMP19:%.*]] = inttoptr i64 [[TMP18]] to ptr
+; CHECK-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 8
+; CHECK-NEXT: [[TMP21:%.*]] = icmp ne ptr [[TMP20]], null
+; CHECK-NEXT: [[TMP22:%.*]] = or i1 [[TMP17]], [[TMP21]]
+; CHECK-NEXT: [[TMP23:%.*]] = add i64 [[SHADOW_PTR_INT]], 40
+; CHECK-NEXT: [[TMP24:%.*]] = inttoptr i64 [[TMP23]] to ptr
+; CHECK-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP24]], align 8
+; CHECK-NEXT: [[TMP26:%.*]] = icmp ne ptr [[TMP25]], null
+; CHECK-NEXT: [[TMP27:%.*]] = or i1 [[TMP22]], [[TMP26]]
+; CHECK-NEXT: [[TMP28:%.*]] = add i64 [[SHADOW_PTR_INT]], 48
+; CHECK-NEXT: [[TMP29:%.*]] = inttoptr i64 [[TMP28]] to ptr
+; CHECK-NEXT: [[TMP30:%.*]] = load ptr, ptr [[TMP29]], align 8
+; CHECK-NEXT: [[TMP31:%.*]] = icmp ne ptr [[TMP30]], null
+; CHECK-NEXT: [[TMP32:%.*]] = or i1 [[TMP27]], [[TMP31]]
+; CHECK-NEXT: [[TMP33:%.*]] = add i64 [[SHADOW_PTR_INT]], 56
+; CHECK-NEXT: [[TMP34:%.*]] = inttoptr i64 [[TMP33]] to ptr
+; CHECK-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP34]], align 8
+; CHECK-NEXT: [[TMP36:%.*]] = icmp ne ptr [[TMP35]], null
+; CHECK-NEXT: [[TMP37:%.*]] = or i1 [[TMP32]], [[TMP36]]
+; CHECK-NEXT: br i1 [[TMP37]], label [[TMP38:%.*]], label [[TMP39:%.*]], !prof [[PROF0]]
+; CHECK: 38:
+; CHECK-NEXT: call void @__tysan_check(ptr @__tysan_app_memory_mask, i32 8, ptr null, i32 1)
+; CHECK-NEXT: br label [[TMP39]]
+; CHECK: 39:
+; CHECK-NEXT: store ptr null, ptr [[SHADOW_PTR]], align 8
+; CHECK-NEXT: [[SHADOW_BYTE_1_OFFSET:%.*]] = add i64 [[SHADOW_PTR_INT]], 8
+; CHECK-NEXT: [[SHADOW_BYTE_1_PTR:%.*]] = inttoptr i64 [[SHADOW_BYTE_1_OFFSET]] to ptr
+; CHECK-NEXT: store ptr inttoptr (i64 -1 to ptr), ptr [[SHADOW_BYTE_1_PTR]], align 8
+; CHECK-NEXT: [[SHADOW_BYTE_2_OFFSET:%.*]] = add i64 [[SHADOW_PTR_INT]], 16
+; CHECK-NEXT: [[SHADOW_BYTE_2_PTR:%.*]] = inttoptr i64 [[SHADOW_BYTE_2_OFFSET]] to ptr
+; CHECK-NEXT: store ptr inttoptr (i64 -2 to ptr), ptr [[SHADOW_BYTE_2_PTR]], align 8
+; CHECK-NEXT: [[SHADOW_BYTE_3_OFFSET:%.*]] = add i64 [[SHADOW_PTR_INT]], 24
+; CHECK-NEXT: [[SHADOW_BYTE_3_PTR:%.*]] = inttoptr i64 [[SHADOW_BYTE_3_OFFSET]] to ptr
+; CHECK-NEXT: store ptr inttoptr (i64 -3 to ptr), ptr [[SHADOW_BYTE_3_PTR]], align 8
+; CHECK-NEXT: [[SHADOW_BYTE_4_OFFSET:%.*]] = add i64 [[SHADOW_PTR_INT]], 32
+; CHECK-NEXT: [[SHADOW_BYTE_4_PTR:%.*]] = inttoptr i64 [[SHADOW_BYTE_4_OFFSET]] to ptr
+; CHECK-NEXT: store ptr inttoptr (i64 -4 to ptr), ptr [[SHADOW_BYTE_4_PTR]], align 8
+; CHECK-NEXT: [[SHADOW_BYTE_5_OFFSET:%.*]] = add i64 [[SHADOW_PTR_INT]], 40
+; CHECK-NEXT: [[SHADOW_BYTE_5_PTR:%.*]] = inttoptr i64 [[SHADOW_BYTE_5_OFFSET]] to ptr
+; CHECK-NEXT: store ptr inttoptr (i64 -5 to ptr), ptr [[SHADOW_BYTE_5_PTR]], align 8
+; CHECK-NEXT: [[SHADOW_BYTE_6_OFFSET:%.*]] = add i64 [[SHADOW_PTR_INT]], 48
+; CHECK-NEXT: [[SHADOW_BYTE_6_PTR:%.*]] = inttoptr i64 [[SHADOW_BYTE_6_OFFSET]] to ptr
+; CHECK-NEXT: store ptr inttoptr (i64 -6 to ptr), ptr [[SHADOW_BYTE_6_PTR]], align 8
+; CHECK-NEXT: [[SHADOW_BYTE_7_OFFSET:%.*]] = add i64 [[SHADOW_PTR_INT]], 56
+; CHECK-NEXT: [[SHADOW_BYTE_7_PTR:%.*]] = inttoptr i64 [[SHADOW_BYTE_7_OFFSET]] to ptr
+; CHECK-NEXT: store ptr inttoptr (i64 -7 to ptr), ptr [[SHADOW_BYTE_7_PTR]], align 8
+; CHECK-NEXT: br label [[TMP41:%.*]]
+; CHECK: 40:
+; CHECK-NEXT: call void @__tysan_check(ptr @__tysan_app_memory_mask, i32 8, ptr null, i32 1)
+; CHECK-NEXT: br label [[TMP41]]
+; CHECK: 41:
+; CHECK-NEXT: br label [[TMP87:%.*]]
+; CHECK: 42:
+; CHECK-NEXT: [[TMP43:%.*]] = add i64 [[SHADOW_PTR_INT]], 8
+; CHECK-NEXT: [[TMP44:%.*]] = inttoptr i64 [[TMP43]] to ptr
+; CHECK-NEXT: [[TMP45:%.*]] = load ptr, ptr [[TMP44]], align 8
+; CHECK-NEXT: [[TMP46:%.*]] = ptrtoint ptr [[TMP45]] to i64
+; CHECK-NEXT: [[TMP47:%.*]] = icmp sge i64 [[TMP46]], 0
+; CHECK-NEXT: [[TMP48:%.*]] = or i1 false, [[TMP47]]
+; CHECK-NEXT: [[TMP49:%.*]] = add i64 [[SHADOW_PTR_INT]], 16
+; CHECK-NEXT: [[TMP50:%.*]] = inttoptr i64 [[TMP49]] to ptr
+; CHECK-NEXT: [[TMP51:%.*]] = load ptr, ptr [[TMP50]], align 8
+; CHECK-NEXT: [[TMP52:%.*]] = ptrtoint ptr [[TMP51]] to i64
+; CHECK-NEXT: [[TMP53:%.*]] = icmp sge i64 [[TMP52]], 0
+; CHECK-NEXT: [[TMP54:%.*]] = or i1 [[TMP48]], [[TMP53]]
+; CHECK-NEXT: [[TMP55:%.*]] = add i64 [[SHADOW_PTR_INT]], 24
+; CHECK-NEXT: [[TMP56:%.*]] = inttoptr i64 [[TMP55]] to ptr
+; CHECK-NEXT: [[TMP57:%.*]] = load ptr, ptr [[TMP56]], align 8
+; CHECK-NEXT: [[TMP58:%.*]] = ptrtoint ptr [[TMP57]] to i64
+; CHECK-NEXT: [[TMP59:%.*]] = icmp sge i64 [[TMP58]], 0
+; CHECK-NEXT: [[TMP60:%.*]] = or i1 [[TMP54]], [[TMP59]]
+; CHECK-NEXT: [[TMP61:%.*]] = add i64 [[SHADOW_PTR_INT]], 32
+; CHECK-NEXT: [[TMP62:%.*]] = inttoptr i64 [[TMP61]] to ptr
+; CHECK-NEXT: [[TMP63:%.*]] = load ptr, ptr [[TMP62]], align 8
+; CHECK-NEXT: [[TMP64:%.*]] = ptrtoint ptr [[TMP63]] to i64
+; CHECK-NEXT: [[TMP65:%.*]] = icmp sge i64 [[TMP64]], 0
+; CHECK-NEXT: [[TMP66:%.*]] = or i1 [[TMP60]], [[TMP65]]
+; CHECK-NEXT: [[TMP67:%.*]] = add i64 [[SHADOW_PTR_INT]], 40
+; CHECK-NEXT: [[TMP68:%.*]] = inttoptr i64 [[TMP67]] to ptr
+; CHECK-NEXT: [[TMP69:%.*]] = load ptr, ptr [[TMP68]], align 8
+; CHECK-NEXT: [[TMP70:%.*]] = ptrtoint ptr [[TMP69]] to i64
+; CHECK-NEXT: [[TMP71:%.*]] = icmp sge i64 [[TMP70]], 0
+; CHECK-NEXT: [[TMP72:%.*]] = or i1 [[TMP66]], [[TMP71]]
+; CHECK-NEXT: [[TMP73:%.*]] = add i64 [[SHADOW_PTR_INT]], 48
+; CHECK-NEXT: [[TMP74:%.*]] = inttoptr i64 [[TMP73]] to ptr
+; CHECK-NEXT: [[TMP75:%.*]] = load ptr, ptr [[TMP74]], align 8
+; CHECK-NEXT: [[TMP76:%.*]] = ptrtoint ptr [[TMP75]] to i64
+; CHECK-NEXT: [[TMP77:%.*]] = icmp sge i64 [[TMP76]], 0
+; CHECK-NEXT: [[TMP78:%.*]] = or i1 [[TMP72]], [[TMP77]]
+; CHECK-NEXT: [[TMP79:%.*]] = add i64 [[SHADOW_PTR_INT]], 56
+; CHECK-NEXT: [[TMP80:%.*]] = inttoptr i64 [[TMP79]] to ptr
+; CHECK-NEXT: [[TMP81:%.*]] = load ptr, ptr [[TMP80]], align 8
+; CHECK-NEXT: [[TMP82:%.*]] = ptrtoint ptr [[TMP81]] to i64
+; CHECK-NEXT: [[TMP83:%.*]] = icmp sge i64 [[TMP82]], 0
+; CHECK-NEXT: [[TMP84:%.*]] = or i1 [[TMP78]], [[TMP83]]
+; CHECK-NEXT: br i1 [[TMP84]], label [[TMP85:%.*]], label [[TMP86:%.*]], !prof [[PROF0]]
+; CHECK: 85:
+; CHECK-NEXT: call void @__tysan_check(ptr @__tysan_app_memory_mask, i32 8, ptr null, i32 1)
+; CHECK-NEXT: br label [[TMP86]]
+; CHECK: 86:
+; CHECK-NEXT: br label [[TMP87]]
+; CHECK: 87:
+; CHECK-NEXT: [[APP_MEM_MASK:%.*]] = load i64, ptr @__tysan_app_memory_mask, align 8
+; CHECK-NEXT: [[APP_PTR_MASKED3:%.*]] = and i64 ptrtoint (ptr @__tysan_shadow_memory_address to i64), [[APP_MEM_MASK2]]
+; CHECK-NEXT: [[APP_PTR_SHIFTED4:%.*]] = shl i64 [[APP_PTR_MASKED3]], 3
+; CHECK-NEXT: [[SHADOW_PTR_INT5:%.*]] = add i64 [[APP_PTR_SHIFTED4]], [[SHADOW_BASE1]]
+; CHECK-NEXT: [[SHADOW_PTR6:%.*]] = inttoptr i64 [[SHADOW_PTR_INT5]] to ptr
+; CHECK-NEXT: [[SHADOW_DESC7:%.*]] = load ptr, ptr [[SHADOW_PTR6]], align 8
+; CHECK-NEXT: [[BAD_DESC8:%.*]] = icmp ne ptr [[SHADOW_DESC7]], null
+; CHECK-NEXT: br i1 [[BAD_DESC8]], label [[TMP88:%.*]], label [[TMP130:%.*]], !prof [[PROF0]]
+; CHECK: 88:
+; CHECK-NEXT: [[TMP89:%.*]] = icmp eq ptr [[SHADOW_DESC7]], null
+; CHECK-NEXT: br i1 [[TMP89]], label [[TMP90:%.*]], label [[TMP128:%.*]]
+; CHECK: 90:
+; CHECK-NEXT: [[TMP91:%.*]] = add i64 [[SHADOW_PTR_INT5]], 8
+; CHECK-NEXT: [[TMP92:%.*]] = inttoptr i64 [[TMP91]] to ptr
+; CHECK-NEXT: [[TMP93:%.*]] = load ptr, ptr [[TMP92]], align 8
+; CHECK-NEXT: [[TMP94:%.*]] = icmp ne ptr [[TMP93]], null
+; CHECK-NEXT: [[TMP95:%.*]] = or i1 false, [[TMP94]]
+; CHECK-NEXT: [[TMP96:%.*]] = add i64 [[SHADOW_PTR_INT5]], 16
+; CHECK-NEXT: [[TMP97:%.*]] = inttoptr i64 [[TMP96]] to ptr
+; CHECK-NEXT: [[TMP98:%.*]] = load ptr, ptr [[TMP97]], align 8
+; CHECK-NEXT: [[TMP99:%.*]] = icmp ne ptr [[TMP98]], null
+; CHECK-NEXT: [[TMP100:%.*]] = or i1 [[TMP95]], [[TMP99]]
+; CHECK-NEXT: [[TMP101:%.*]] = add i64 [[SHADOW_PTR_INT5]], 24
+; CHECK-NEXT: [[TMP102:%.*]] = inttoptr i64 [[TMP101]] to ptr
+; CHECK-NEXT: [[TMP103:%.*]] = load ptr, ptr [[TMP102]], align 8
+; CHECK-NEXT: [[TMP104:%.*]] = icmp ne ptr [[TMP103]], null
+; CHECK-NEXT: [[TMP105:%.*]] = or i1 [[TMP100]], [[TMP104]]
+; CHECK-NEXT: [[TMP106:%.*]] = add i64 [[SHADOW_PTR_INT5]], 32
+; CHECK-NEXT: [[TMP107:%.*]] = inttoptr i64 [[TMP106]] to ptr
+; CHECK-NEXT: [[TMP108:%.*]] = load ptr, ptr [[TMP107]], align 8
+; CHECK-NEXT: [[TMP109:%.*]] = icmp ne ptr [[TMP108]], null
+; CHECK-NEXT: [[TMP110:%.*]] = or i1 [[TMP105]], [[TMP109]]
+; CHECK-NEXT: [[TMP111:%.*]] = add i64 [[SHADOW_PTR_INT5]], 40
+; CHECK-NEXT: [[TMP112:%.*]] = inttoptr i64 [[TMP111]] to ptr
+; CHECK-NEXT: [[TMP113:%.*]] = load ptr, ptr [[TMP112]], align 8
+; CHECK-NEXT: [[TMP114:%.*]] = icmp ne ptr [[TMP113]], null
+; CHECK-NEXT: [[TMP115:%.*]] = or i1 [[TMP110]], [[TMP114]]
+; CHECK-NEXT: [[TMP116:%.*]] = add i64 [[SHADOW_PTR_INT5]], 48
+; CHECK-NEXT: [[TMP117:%.*]] = inttoptr i64 [[TMP116]] to ptr
+; CHECK-NEXT: [[TMP118:%.*]] = load ptr, ptr [[TMP117]], align 8
+; CHECK-NEXT: [[TMP119:%.*]] = icmp ne ptr [[TMP118]], null
+; CHECK-NEXT: [[TMP120:%.*]] = or i1 [[TMP115]], [[TMP119]]
+; CHECK-NEXT: [[TMP121:%.*]] = add i64 [[SHADOW_PTR_INT5]], 56
+; CHECK-NEXT: [[TMP122:%.*]] = inttoptr i64 [[TMP121]] to ptr
+; CHECK-NEXT: [[TMP123:%.*]] = load ptr, ptr [[TMP122]], align 8
+; CHECK-NEXT: [[TMP124:%.*]] = icmp ne ptr [[TMP123]], null
+; CHECK-NEXT: [[TMP125:%.*]] = or i1 [[TMP120]], [[TMP124]]
+; CHECK-NEXT: br i1 [[TMP125]], label [[TMP126:%.*]], label [[TMP127:%.*]], !prof [[PROF0]]
+; CHECK: 126:
+; CHECK-NEXT: call void @__tysan_check(ptr @__tysan_shadow_memory_address, i32 8, ptr null, i32 1)
+; CHECK-NEXT: br label [[TMP127]]
+; CHECK: 127:
+; CHECK-NEXT: store ptr null, ptr [[SHADOW_PTR6]], align 8
+; CHECK-NEXT: [[SHADOW_BYTE_1_OFFSET9:%.*]] = add i64 [[SHADOW_PTR_INT5]], 8
+; CHECK-NEXT: [[SHADOW_BYTE_1_PTR10:%.*]] = inttoptr i64 [[SHADOW_BYTE_1_OFFSET9]] to ptr
+; CHECK-NEXT: store ptr inttoptr (i64 -1 to ptr), ptr [[SHADOW_BYTE_1_PTR10]], align 8
+; CHECK-NEXT: [[SHADOW_BYTE_2_OFFSET11:%.*]] = add i64 [[SHADOW_PTR_INT5]], 16
+; CHECK-NEXT: [[SHADOW_BYTE_2_PTR12:%.*]] = inttoptr i64 [[SHADOW_BYTE_2_OFFSET11]] to ptr
+; CHECK-NEXT: store ptr inttoptr (i64 -2 to ptr), ptr [[SHADOW_BYTE_2_PTR12]], align 8
+; CHECK-NEXT: [[SHADOW_BYTE_3_OFFSET13:%.*]] = add i64 [[SHADOW_PTR_INT5]], 24
+; CHECK-NEXT: [[SHADOW_BYTE_3_PTR14:%.*]] = inttoptr i64 [[SHADOW_BYTE_3_OFFSET13]] to ptr
+; CHECK-NEXT: store ptr inttoptr (i64 -3 to ptr), ptr [[SHADOW_BYTE_3_PTR14]], align 8
+; CHECK-NEXT: [[SHADOW_BYTE_4_OFFSET15:%.*]] = add i64 [[SHADOW_PTR_INT5]], 32
+; CHECK-NEXT: [[SHADOW_BYTE_4_PTR16:%.*]] = inttoptr i64 [[SHADOW_BYTE_4_OFFSET15]] to ptr
+; CHECK-NEXT: store ptr inttoptr (i64 -4 to ptr), ptr [[SHADOW_BYTE_4_PTR16]], align 8
+; CHECK-NEXT: [[SHADOW_BYTE_5_OFFSET17:%.*]] = add i64 [[SHADOW_PTR_INT5]], 40
+; CHECK-NEXT: [[SHADOW_BYTE_5_PTR18:%.*]] = inttoptr i64 [[SHADOW_BYTE_5_OFFSET17]] to ptr
+; CHECK-NEXT: store ptr inttoptr (i64 -5 to ptr), ptr [[SHADOW_BYTE_5_PTR18]], align 8
+; CHECK-NEXT: [[SHADOW_BYTE_6_OFFSET19:%.*]] = add i64 [[SHADOW_PTR_INT5]], 48
+; CHECK-NEXT: [[SHADOW_BYTE_6_PTR20:%.*]] = inttoptr i64 [[SHADOW_BYTE_6_OFFSET19]] to ptr
+; CHECK-NEXT: store ptr inttoptr (i64 -6 to ptr), ptr [[SHADOW_BYTE_6_PTR20]], align 8
+; CHECK-NEXT: [[SHADOW_BYTE_7_OFFSET21:%.*]] = add i64 [[SHADOW_PTR_INT5]], 56
+; CHECK-NEXT: [[SHADOW_BYTE_7_PTR22:%.*]] = inttoptr i64 [[SHADOW_BYTE_7_OFFSET21]] to ptr
+; CHECK-NEXT: store ptr inttoptr (i64 -7 to ptr), ptr [[SHADOW_BYTE_7_PTR22]], align 8
+; CHECK-NEXT: br label [[TMP129:%.*]]
+; CHECK: 128:
+; CHECK-NEXT: call void @__tysan_check(ptr @__tysan_shadow_memory_address, i32 8, ptr null, i32 1)
+; CHECK-NEXT: br label [[TMP129]]
+; CHECK: 129:
+; CHECK-NEXT: br label [[TMP175:%.*]]
+; CHECK: 130:
+; CHECK-NEXT: [[TMP131:%.*]] = add i64 [[SHADOW_PTR_INT5]], 8
+; CHECK-NEXT: [[TMP132:%.*]] = inttoptr i64 [[TMP131]] to ptr
+; CHECK-NEXT: [[TMP133:%.*]] = load ptr, ptr [[TMP132]], align 8
+; CHECK-NEXT: [[TMP134:%.*]] = ptrtoint ptr [[TMP133]] to i64
+; CHECK-NEXT: [[TMP135:%.*]] = icmp sge i64 [[TMP134]], 0
+; CHECK-NEXT: [[TMP136:%.*]] = or i1 false, [[TMP135]]
+; CHECK-NEXT: [[TMP137:%.*]] = add i64 [[SHADOW_PTR_INT5]], 16
+; CHECK-NEXT: [[TMP138:%.*]] = inttoptr i64 [[TMP137]] to ptr
+; CHECK-NEXT: [[TMP139:%.*]] = load ptr, ptr [[TMP138]], align 8
+; CHECK-NEXT: [[TMP140:%.*]] = ptrtoint ptr [[TMP139]] to i64
+; CHECK-NEXT: [[TMP141:%.*]] = icmp sge i64 [[TMP140]], 0
+; CHECK-NEXT: [[TMP142:%.*]] = or i1 [[TMP136]], [[TMP141]]
+; CHECK-NEXT: [[TMP143:%.*]] = add i64 [[SHADOW_PTR_INT5]], 24
+; CHECK-NEXT: [[TMP144:%.*]] = inttoptr i64 [[TMP143]] to ptr
+; CHECK-NEXT: [[TMP145:%.*]] = load ptr, ptr [[TMP144]], align 8
+; CHECK-NEXT: [[TMP146:%.*]] = ptrtoint ptr [[TMP145]] to i64
+; CHECK-NEXT: [[TMP147:%.*]] = icmp sge i64 [[TMP146]], 0
+; CHECK-NEXT: [[TMP148:%.*]] = or i1 [[TMP142]], [[TMP147]]
+; CHECK-NEXT: [[TMP149:%.*]] = add i64 [[SHADOW_PTR_INT5]], 32
+; CHECK-NEXT: [[TMP150:%.*]] = inttoptr i64 [[TMP149]] to ptr
+; CHECK-NEXT: [[TMP151:%.*]] = load ptr, ptr [[TMP150]], align 8
+; CHECK-NEXT: [[TMP152:%.*]] = ptrtoint ptr [[TMP151]] to i64
+; CHECK-NEXT: [[TMP153:%.*]] = icmp sge i64 [[TMP152]], 0
+; CHECK-NEXT: [[TMP154:%.*]] = or i1 [[TMP148]], [[TMP153]]
+; CHECK-NEXT: [[TMP155:%.*]] = add i64 [[SHADOW_PTR_INT5]], 40
+; CHECK-NEXT: [[TMP156:%.*]] = inttoptr i64 [[TMP155]] to ptr
+; CHECK-NEXT: [[TMP157:%.*]] = load ptr, ptr [[TMP156]], align 8
+; CHECK-NEXT: [[TMP158:%.*]] = ptrtoint ptr [[TMP157]] to i64
+; CHECK-NEXT: [[TMP159:%.*]] = icmp sge i64 [[TMP158]], 0
+; CHECK-NEXT: [[TMP160:%.*]] = or i1 [[TMP154]], [[TMP159]]
+; CHECK-NEXT: [[TMP161:%.*]] = add i64 [[SHADOW_PTR_INT5]], 48
+; CHECK-NEXT: [[TMP162:%.*]] = inttoptr i64 [[TMP161]] to ptr
+; CHECK-NEXT: [[TMP163:%.*]] = load ptr, ptr [[TMP162]], align 8
+; CHECK-NEXT: [[TMP164:%.*]] = ptrtoint ptr [[TMP163]] to i64
+; CHECK-NEXT: [[TMP165:%.*]] = icmp sge i64 [[TMP164]], 0
+; CHECK-NEXT: [[TMP166:%.*]] = or i1 [[TMP160]], [[TMP165]]
+; CHECK-NEXT: [[TMP167:%.*]] = add i64 [[SHADOW_PTR_INT5]], 56
+; CHECK-NEXT: [[TMP168:%.*]] = inttoptr i64 [[TMP167]] to ptr
+; CHECK-NEXT: [[TMP169:%.*]] = load ptr, ptr [[TMP168]], align 8
+; CHECK-NEXT: [[TMP170:%.*]] = ptrtoint ptr [[TMP169]] to i64
+; CHECK-NEXT: [[TMP171:%.*]] = icmp sge i64 [[TMP170]], 0
+; CHECK-NEXT: [[TMP172:%.*]] = or i1 [[TMP166]], [[TMP171]]
+; CHECK-NEXT: br i1 [[TMP172]], label [[TMP173:%.*]], label [[TMP174:%.*]], !prof [[PROF0]]
+; CHECK: 173:
+; CHECK-NEXT: call void @__tysan_check(ptr @__tysan_shadow_memory_address, i32 8, ptr null, i32 1)
+; CHECK-NEXT: br label [[TMP174]]
+; CHECK: 174:
+; CHECK-NEXT: br label [[TMP175]]
+; CHECK: 175:
+; CHECK-NEXT: [[SHADOW_BASE:%.*]] = load i64, ptr @__tysan_shadow_memory_address, align 8
+; CHECK-NEXT: call void @__tysan_instrument_with_shadow_update(ptr [[A:%.*]], ptr @__tysan_v1___ZTS1v_o_12, i1 true, i64 4, i32 2)
+; CHECK-NEXT: [[APP_PTR_INT:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[APP_PTR_MASKED23:%.*]] = and i64 [[APP_PTR_INT]], [[APP_MEM_MASK2]]
+; CHECK-NEXT: [[APP_PTR_SHIFTED24:%.*]] = shl i64 [[APP_PTR_MASKED23]], 3
+; CHECK-NEXT: [[SHADOW_PTR_INT25:%.*]] = add i64 [[APP_PTR_SHIFTED24]], [[SHADOW_BASE1]]
+; CHECK-NEXT: [[SHADOW_PTR26:%.*]] = inttoptr i64 [[SHADOW_PTR_INT25]] to ptr
+; CHECK-NEXT: [[SHADOW_DESC27:%.*]] = load ptr, ptr [[SHADOW_PTR26]], align 8
+; CHECK-NEXT: [[BAD_DESC28:%.*]] = icmp ne ptr [[SHADOW_DESC27]], @__tysan_v1___ZTS1v_o_12
+; CHECK-NEXT: br i1 [[BAD_DESC28]], label [[TMP176:%.*]], label [[TMP198:%.*]], !prof [[PROF0]]
+; CHECK: 176:
+; CHECK-NEXT: [[TMP177:%.*]] = icmp eq ptr [[SHADOW_DESC27]], null
+; CHECK-NEXT: br i1 [[TMP177]], label [[TMP178:%.*]], label [[TMP196:%.*]]
+; CHECK: 178:
+; CHECK-NEXT: [[TMP179:%.*]] = add i64 [[SHADOW_PTR_INT25]], 8
+; CHECK-NEXT: [[TMP180:%.*]] = inttoptr i64 [[TMP179]] to ptr
+; CHECK-NEXT: [[TMP181:%.*]] = load ptr, ptr [[TMP180]], align 8
+; CHECK-NEXT: [[TMP182:%.*]] = icmp ne ptr [[TMP181]], null
+; CHECK-NEXT: [[TMP183:%.*]] = or i1 false, [[TMP182]]
+; CHECK-NEXT: [[TMP184:%.*]] = add i64 [[SHADOW_PTR_INT25]], 16
+; CHECK-NEXT: [[TMP185:%.*]] = inttoptr i64 [[TMP184]] to ptr
+; CHECK-NEXT: [[TMP186:%.*]] = load ptr, ptr [[TMP185]], align 8
+; CHECK-NEXT: [[TMP187:%.*]] = icmp ne ptr [[TMP186]], null
+; CHECK-NEXT: [[TMP188:%.*]] = or i1 [[TMP183]], [[TMP187]]
+; CHECK-NEXT: [[TMP189:%.*]] = add i64 [[SHADOW_PTR_INT25]], 24
+; CHECK-NEXT: [[TMP190:%.*]] = inttoptr i64 [[TMP189]] to ptr
+; CHECK-NEXT: [[TMP191:%.*]] = load ptr, ptr [[TMP190]], align 8
+; CHECK-NEXT: [[TMP192:%.*]] = icmp ne ptr [[TMP191]], null
+; CHECK-NEXT: [[TMP193:%.*]] = or i1 [[TMP188]], [[TMP192]]
+; CHECK-NEXT: br i1 [[TMP193]], label [[TMP194:%.*]], label [[TMP195:%.*]], !prof [[PROF0]]
+; CHECK: 194:
+; CHECK-NEXT: call void @__tysan_check(ptr [[A]], i32 4, ptr @__tysan_v1___ZTS1v_o_12, i32 2)
+; CHECK-NEXT: br label [[TMP195]]
+; CHECK: 195:
+; CHECK-NEXT: store ptr @__tysan_v1___ZTS1v_o_12, ptr [[SHADOW_PTR26]], align 8
+; CHECK-NEXT: [[SHADOW_BYTE_1_OFFSET29:%.*]] = add i64 [[SHADOW_PTR_INT25]], 8
+; CHECK-NEXT: [[SHADOW_BYTE_1_PTR30:%.*]] = inttoptr i64 [[SHADOW_BYTE_1_OFFSET29]] to ptr
+; CHECK-NEXT: store ptr inttoptr (i64 -1 to ptr), ptr [[SHADOW_BYTE_1_PTR30]], align 8
+; CHECK-NEXT: [[SHADOW_BYTE_2_OFFSET31:%.*]] = add i64 [[SHADOW_PTR_INT25]], 16
+; CHECK-NEXT: [[SHADOW_BYTE_2_PTR32:%.*]] = inttoptr i64 [[SHADOW_BYTE_2_OFFSET31]] to ptr
+; CHECK-NEXT: store ptr inttoptr (i64 -2 to ptr), ptr [[SHADOW_BYTE_2_PTR32]], align 8
+; CHECK-NEXT: [[SHADOW_BYTE_3_OFFSET33:%.*]] = add i64 [[SHADOW_PTR_INT25]], 24
+; CHECK-NEXT: [[SHADOW_BYTE_3_PTR34:%.*]] = inttoptr i64 [[SHADOW_BYTE_3_OFFSET33]] to ptr
+; CHECK-NEXT: store ptr inttoptr (i64 -3 to ptr), ptr [[SHADOW_BYTE_3_PTR34]], align 8
+; CHECK-NEXT: br label [[TMP197:%.*]]
+; CHECK: 196:
+; CHECK-NEXT: call void @__tysan_check(ptr [[A]], i32 4, ptr @__tysan_v1___ZTS1v_o_12, i32 2)
+; CHECK-NEXT: br label [[TMP197]]
+; CHECK: 197:
+; CHECK-NEXT: br label [[TMP219:%.*]]
+; CHECK: 198:
+; CHECK-NEXT: [[TMP199:%.*]] = add i64 [[SHADOW_PTR_INT25]], 8
+; CHECK-NEXT: [[TMP200:%.*]] = inttoptr i64 [[TMP199]] to ptr
+; CHECK-NEXT: [[TMP201:%.*]] = load ptr, ptr [[TMP200]], align 8
+; CHECK-NEXT: [[TMP202:%.*]] = ptrtoint ptr [[TMP201]] to i64
+; CHECK-NEXT: [[TMP203:%.*]] = icmp sge i64 [[TMP202]], 0
+; CHECK-NEXT: [[TMP204:%.*]] = or i1 false, [[TMP203]]
+; CHECK-NEXT: [[TMP205:%.*]] = add i64 [[SHADOW_PTR_INT25]], 16
+; CHECK-NEXT: [[TMP206:%.*]] = inttoptr i64 [[TMP205]] to ptr
+; CHECK-NEXT: [[TMP207:%.*]] = load ptr, ptr [[TMP206]], align 8
+; CHECK-NEXT: [[TMP208:%.*]] = ptrtoint ptr [[TMP207]] to i64
+; CHECK-NEXT: [[TMP209:%.*]] = icmp sge i64 [[TMP208]], 0
+; CHECK-NEXT: [[TMP210:%.*]] = or i1 [[TMP204]], [[TMP209]]
+; CHECK-NEXT: [[TMP211:%.*]] = add i64 [[SHADOW_PTR_INT25]], 24
+; CHECK-NEXT: [[TMP212:%.*]] = inttoptr i64 [[TMP211]] to ptr
+; CHECK-NEXT: [[TMP213:%.*]] = load ptr, ptr [[TMP212]], align 8
+; CHECK-NEXT: [[TMP214:%.*]] = ptrtoint ptr [[TMP213]] to i64
+; CHECK-NEXT: [[TMP215:%.*]] = icmp sge i64 [[TMP214]], 0
+; CHECK-NEXT: [[TMP216:%.*]] = or i1 [[TMP210]], [[TMP215]]
+; CHECK-NEXT: br i1 [[TMP216]], label [[TMP217:%.*]], label [[TMP218:%.*]], !prof [[PROF0]]
+; CHECK: 217:
+; CHECK-NEXT: call void @__tysan_check(ptr [[A]], i32 4, ptr @__tysan_v1___ZTS1v_o_12, i32 2)
+; CHECK-NEXT: br label [[TMP218]]
+; CHECK: 218:
+; CHECK-NEXT: br label [[TMP219]]
+; CHECK: 219:
+; CHECK-NEXT: store i32 42, ptr [[A]], align 4, !tbaa [[TBAA5:![0-9]+]]
+; CHECK-NEXT: ret void
+;
+entry:
+ store i32 42, ptr %a, align 4, !tbaa !6
+ ret void
+}
+
+!0 = !{!"Simple C++ TBAA"}
+!1 = !{!"omnipotent char", !0, i64 0}
+!2 = !{!"int", !1, i64 0}
+!3 = !{!2, !2, i64 0}
+!4 = !{!"_ZTS1x", !2, i64 0, !2, i64 4}
+!5 = !{!"_ZTS1v", !2, i64 8, !2, i64 12, !4, i64 16}
+!6 = !{!5, !2, i64 12}
+;.
+; CHECK: attributes #[[ATTR0:[0-9]+]] = { sanitize_type }
+; CHECK: attributes #[[ATTR1:[0-9]+]] = { nounwind }
+;.
+; CHECK: [[PROF0]] = !{!"branch_weights", i32 1, i32 100000}
+; CHECK: [[TBAA1]] = !{[[META2:![0-9]+]], [[META2]], i64 0}
+; CHECK: [[META2]] = !{!"int", [[META3:![0-9]+]], i64 0}
+; CHECK: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0}
+; CHECK: [[META4]] = !{!"Simple C++ TBAA"}
+; CHECK: [[TBAA5]] = !{[[META6:![0-9]+]], [[META2]], i64 12}
+; CHECK: [[META6]] = !{!"_ZTS1v", [[META2]], i64 8, [[META2]], i64 12, [[META7:![0-9]+]], i64 16}
+; CHECK: [[META7]] = !{!"_ZTS1x", [[META2]], i64 0, [[META2]], i64 4}
+;.
diff --git a/llvm/test/Instrumentation/TypeSanitizer/globals_outlined.ll b/llvm/test/Instrumentation/TypeSanitizer/globals_outlined.ll
new file mode 100644
index 0000000..0bd7940
--- /dev/null
+++ b/llvm/test/Instrumentation/TypeSanitizer/globals_outlined.ll
@@ -0,0 +1,24 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals --include-generated-funcs
+; RUN: opt -passes='tysan' -tysan-outline-instrumentation -S %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+@global1 = global i32 0, align 4
+@global2 = global i32 0, align 4
+
+
+; CHECK-LABEL: define internal void @__tysan_set_globals_types(
+; CHECK-NEXT: %app.mem.mask = load i64, ptr @__tysan_app_memory_mask, align 8
+; CHECK-NEXT: %shadow.base = load i64, ptr @__tysan_shadow_memory_address, align 8
+; CHECK-NEXT: call void @__tysan_set_shadow_type(ptr @global1, ptr @__tysan_v1_int, i64 4)
+; CHECK-NEXT: call void @__tysan_set_shadow_type(ptr @global1, ptr @__tysan_v1_int, i64 4)
+; CHECK-NEXT: ret void
+; CHECK-NEXT: }
+
+!llvm.tysan.globals = !{!13, !14}
+
+!0 = !{!"Simple C++ TBAA"}
+!1 = !{!"omnipotent char", !0, i64 0}
+!2 = !{!"int", !1, i64 0}
+!13 = !{ptr @global1, !2}
+!14 = !{ptr @global1, !2}
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vimage.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vimage.s
index fec8ba1..0a480a7 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vimage.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vimage.s
@@ -2,33 +2,33 @@
; RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX12-ERR --implicit-check-not=error: --strict-whitespace %s
tensor_load_to_lds s[0:3], s[4:11]
-// GFX1250: tensor_load_to_lds s[0:3], s[4:11] ; encoding: [0x01,0x00,0x71,0xd0,0x00,0x00,0x00,0x00,0x00,0x04,0x7c,0x7c]
+// GFX1250: tensor_load_to_lds s[0:3], s[4:11] ; encoding: [0x01,0x00,0x71,0xd0,0x00,0x00,0x00,0x7c,0x00,0x04,0x7c,0x7c]
// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
tensor_load_to_lds s[0:3], s[4:11] th:TH_LOAD_BYPASS scope:SCOPE_SYS
-// GFX1250: tensor_load_to_lds s[0:3], s[4:11] th:TH_LOAD_BYPASS scope:SCOPE_SYS ; encoding: [0x01,0x00,0x71,0xd0,0x00,0x00,0x3c,0x00,0x00,0x04,0x7c,0x7c]
+// GFX1250: tensor_load_to_lds s[0:3], s[4:11] th:TH_LOAD_BYPASS scope:SCOPE_SYS ; encoding: [0x01,0x00,0x71,0xd0,0x00,0x00,0x3c,0x7c,0x00,0x04,0x7c,0x7c]
// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
tensor_load_to_lds s[0:3], s[4:11], s[12:15], s[16:19]
-// GFX1250: tensor_load_to_lds s[0:3], s[4:11], s[12:15], s[16:19] ; encoding: [0x01,0x00,0x71,0xd0,0x00,0x00,0x00,0x00,0x00,0x04,0x0c,0x10]
+// GFX1250: tensor_load_to_lds s[0:3], s[4:11], s[12:15], s[16:19] ; encoding: [0x01,0x00,0x71,0xd0,0x00,0x00,0x00,0x7c,0x00,0x04,0x0c,0x10]
// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
tensor_load_to_lds s[0:3], s[4:11], s[12:15], s[16:19] th:TH_LOAD_NT_HT scope:SCOPE_DEV
-// GFX1250: tensor_load_to_lds s[0:3], s[4:11], s[12:15], s[16:19] th:TH_LOAD_NT_HT scope:SCOPE_DEV ; encoding: [0x01,0x00,0x71,0xd0,0x00,0x00,0x68,0x00,0x00,0x04,0x0c,0x10]
+// GFX1250: tensor_load_to_lds s[0:3], s[4:11], s[12:15], s[16:19] th:TH_LOAD_NT_HT scope:SCOPE_DEV ; encoding: [0x01,0x00,0x71,0xd0,0x00,0x00,0x68,0x7c,0x00,0x04,0x0c,0x10]
// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
tensor_store_from_lds s[0:3], s[4:11]
-// GFX1250: tensor_store_from_lds s[0:3], s[4:11] ; encoding: [0x01,0x40,0x71,0xd0,0x00,0x00,0x00,0x00,0x00,0x04,0x7c,0x7c]
+// GFX1250: tensor_store_from_lds s[0:3], s[4:11] ; encoding: [0x01,0x40,0x71,0xd0,0x00,0x00,0x00,0x7c,0x00,0x04,0x7c,0x7c]
// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
tensor_store_from_lds s[0:3], s[4:11] th:TH_STORE_BYPASS scope:SCOPE_SYS
-// GFX1250: tensor_store_from_lds s[0:3], s[4:11] th:TH_STORE_BYPASS scope:SCOPE_SYS ; encoding: [0x01,0x40,0x71,0xd0,0x00,0x00,0x3c,0x00,0x00,0x04,0x7c,0x7c]
+// GFX1250: tensor_store_from_lds s[0:3], s[4:11] th:TH_STORE_BYPASS scope:SCOPE_SYS ; encoding: [0x01,0x40,0x71,0xd0,0x00,0x00,0x3c,0x7c,0x00,0x04,0x7c,0x7c]
// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
tensor_store_from_lds s[0:3], s[4:11], s[12:15], s[16:19]
-// GFX1250: tensor_store_from_lds s[0:3], s[4:11], s[12:15], s[16:19] ; encoding: [0x01,0x40,0x71,0xd0,0x00,0x00,0x00,0x00,0x00,0x04,0x0c,0x10]
+// GFX1250: tensor_store_from_lds s[0:3], s[4:11], s[12:15], s[16:19] ; encoding: [0x01,0x40,0x71,0xd0,0x00,0x00,0x00,0x7c,0x00,0x04,0x0c,0x10]
// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
tensor_store_from_lds s[0:3], s[4:11], s[12:15], s[16:19] th:TH_STORE_NT_HT scope:SCOPE_DEV
-// GFX1250: tensor_store_from_lds s[0:3], s[4:11], s[12:15], s[16:19] th:TH_STORE_NT_HT scope:SCOPE_DEV ; encoding: [0x01,0x40,0x71,0xd0,0x00,0x00,0x68,0x00,0x00,0x04,0x0c,0x10]
+// GFX1250: tensor_store_from_lds s[0:3], s[4:11], s[12:15], s[16:19] th:TH_STORE_NT_HT scope:SCOPE_DEV ; encoding: [0x01,0x40,0x71,0xd0,0x00,0x00,0x68,0x7c,0x00,0x04,0x0c,0x10]
// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop1.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop1.s
index d85ea79..399a644 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_vop1.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop1.s
@@ -1,8 +1,8 @@
// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --unique --version 5
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -show-encoding -comment-column=0 %s | FileCheck --strict-whitespace --check-prefixes=GFX12,GFX12-ASM %s
-// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | sed -n 's#.*\(\[0x[0-9a-fx,]\{1,\}\]\)#\1#p' | llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -disassemble -show-encoding -comment-column=0 | FileCheck --strict-whitespace --check-prefixes=GFX12,GFX12-DIS %s
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | %extract-encodings | llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -disassemble -show-encoding -comment-column=0 | FileCheck --strict-whitespace --check-prefixes=GFX12,GFX12-DIS %s
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -show-encoding -comment-column=0 %s | FileCheck --strict-whitespace --check-prefixes=GFX12,GFX12-ASM %s
-// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | sed -n 's#.*\(\[0x[0-9a-fx,]\{1,\}\]\)#\1#p' | llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -disassemble -show-encoding -comment-column=0 | FileCheck --strict-whitespace --check-prefixes=GFX12,GFX12-DIS %s
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | %extract-encodings | llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -disassemble -show-encoding -comment-column=0 | FileCheck --strict-whitespace --check-prefixes=GFX12,GFX12-DIS %s
v_bfrev_b32_e32 v5, v1
// GFX12: v_bfrev_b32_e32 v5, v1 ; encoding: [0x01,0x71,0x0a,0x7e]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vimage.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vimage.txt
index 9afaa07..8005793 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vimage.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vimage.txt
@@ -1,25 +1,25 @@
# RUN: llvm-mc -disassemble -triple=amdgcn -mcpu=gfx1250 -show-encoding %s | FileCheck --check-prefix=GFX1250 %s
-# GFX1250: tensor_load_to_lds s[0:3], s[4:11] ; encoding: [0x01,0x00,0x71,0xd0,0x00,0x00,0x00,0x00,0x00,0x04,0x7c,0x7c]
-0x01,0x00,0x71,0xd0,0x00,0x00,0x00,0x00,0x00,0x04,0x7c,0x7c
+# GFX1250: tensor_load_to_lds s[0:3], s[4:11] ; encoding: [0x01,0x00,0x71,0xd0,0x00,0x00,0x00,0x7c,0x00,0x04,0x7c,0x7c]
+0x01,0x00,0x71,0xd0,0x00,0x00,0x00,0x7c,0x00,0x04,0x7c,0x7c
-# GFX1250: tensor_load_to_lds s[0:3], s[4:11] th:TH_LOAD_BYPASS scope:SCOPE_SYS ; encoding: [0x01,0x00,0x71,0xd0,0x00,0x00,0x3c,0x00,0x00,0x04,0x7c,0x7c]
-0x01,0x00,0x71,0xd0,0x00,0x00,0x3c,0x00,0x00,0x04,0x7c,0x7c
+# GFX1250: tensor_load_to_lds s[0:3], s[4:11] th:TH_LOAD_BYPASS scope:SCOPE_SYS ; encoding: [0x01,0x00,0x71,0xd0,0x00,0x00,0x3c,0x7c,0x00,0x04,0x7c,0x7c]
+0x01,0x00,0x71,0xd0,0x00,0x00,0x3c,0x7c,0x00,0x04,0x7c,0x7c
-# GFX1250: tensor_load_to_lds s[0:3], s[4:11], s[12:15], s[16:19] ; encoding: [0x01,0x00,0x71,0xd0,0x00,0x00,0x00,0x00,0x00,0x04,0x0c,0x10]
-0x01,0x00,0x71,0xd0,0x00,0x00,0x00,0x00,0x00,0x04,0x0c,0x10
+# GFX1250: tensor_load_to_lds s[0:3], s[4:11], s[12:15], s[16:19] ; encoding: [0x01,0x00,0x71,0xd0,0x00,0x00,0x00,0x7c,0x00,0x04,0x0c,0x10]
+0x01,0x00,0x71,0xd0,0x00,0x00,0x00,0x7c,0x00,0x04,0x0c,0x10
-# GFX1250: tensor_load_to_lds s[0:3], s[4:11], s[12:15], s[16:19] th:TH_LOAD_NT_HT scope:SCOPE_DEV ; encoding: [0x01,0x00,0x71,0xd0,0x00,0x00,0x68,0x00,0x00,0x04,0x0c,0x10]
-0x01,0x00,0x71,0xd0,0x00,0x00,0x68,0x00,0x00,0x04,0x0c,0x10
+# GFX1250: tensor_load_to_lds s[0:3], s[4:11], s[12:15], s[16:19] th:TH_LOAD_NT_HT scope:SCOPE_DEV ; encoding: [0x01,0x00,0x71,0xd0,0x00,0x00,0x68,0x7c,0x00,0x04,0x0c,0x10]
+0x01,0x00,0x71,0xd0,0x00,0x00,0x68,0x7c,0x00,0x04,0x0c,0x10
-# GFX1250: tensor_store_from_lds s[0:3], s[4:11] ; encoding: [0x01,0x40,0x71,0xd0,0x00,0x00,0x00,0x00,0x00,0x04,0x7c,0x7c]
-0x01,0x40,0x71,0xd0,0x00,0x00,0x00,0x00,0x00,0x04,0x7c,0x7c
+# GFX1250: tensor_store_from_lds s[0:3], s[4:11] ; encoding: [0x01,0x40,0x71,0xd0,0x00,0x00,0x00,0x7c,0x00,0x04,0x7c,0x7c]
+0x01,0x40,0x71,0xd0,0x00,0x00,0x00,0x7c,0x00,0x04,0x7c,0x7c
-# GFX1250: tensor_store_from_lds s[0:3], s[4:11] th:TH_STORE_BYPASS scope:SCOPE_SYS ; encoding: [0x01,0x40,0x71,0xd0,0x00,0x00,0x3c,0x00,0x00,0x04,0x7c,0x7c]
-0x01,0x40,0x71,0xd0,0x00,0x00,0x3c,0x00,0x00,0x04,0x7c,0x7c
+# GFX1250: tensor_store_from_lds s[0:3], s[4:11] th:TH_STORE_BYPASS scope:SCOPE_SYS ; encoding: [0x01,0x40,0x71,0xd0,0x00,0x00,0x3c,0x7c,0x00,0x04,0x7c,0x7c]
+0x01,0x40,0x71,0xd0,0x00,0x00,0x3c,0x7c,0x00,0x04,0x7c,0x7c
-# GFX1250: tensor_store_from_lds s[0:3], s[4:11], s[12:15], s[16:19] ; encoding: [0x01,0x40,0x71,0xd0,0x00,0x00,0x00,0x00,0x00,0x04,0x0c,0x10]
-0x01,0x40,0x71,0xd0,0x00,0x00,0x00,0x00,0x00,0x04,0x0c,0x10
+# GFX1250: tensor_store_from_lds s[0:3], s[4:11], s[12:15], s[16:19] ; encoding: [0x01,0x40,0x71,0xd0,0x00,0x00,0x00,0x7c,0x00,0x04,0x0c,0x10]
+0x01,0x40,0x71,0xd0,0x00,0x00,0x00,0x7c,0x00,0x04,0x0c,0x10
-# GFX1250: tensor_store_from_lds s[0:3], s[4:11], s[12:15], s[16:19] th:TH_STORE_NT_HT scope:SCOPE_DEV ; encoding: [0x01,0x40,0x71,0xd0,0x00,0x00,0x68,0x00,0x00,0x04,0x0c,0x10]
-0x01,0x40,0x71,0xd0,0x00,0x00,0x68,0x00,0x00,0x04,0x0c,0x10
+# GFX1250: tensor_store_from_lds s[0:3], s[4:11], s[12:15], s[16:19] th:TH_STORE_NT_HT scope:SCOPE_DEV ; encoding: [0x01,0x40,0x71,0xd0,0x00,0x00,0x68,0x7c,0x00,0x04,0x0c,0x10]
+0x01,0x40,0x71,0xd0,0x00,0x00,0x68,0x7c,0x00,0x04,0x0c,0x10
diff --git a/llvm/test/MC/Disassembler/X86/AMX/x86-64-amx-movrs.txt b/llvm/test/MC/Disassembler/X86/AMX/x86-64-amx-movrs.txt
index 57e3153..5c2927a 100755
--- a/llvm/test/MC/Disassembler/X86/AMX/x86-64-amx-movrs.txt
+++ b/llvm/test/MC/Disassembler/X86/AMX/x86-64-amx-movrs.txt
@@ -1,70 +1,6 @@
# RUN: llvm-mc --disassemble %s -triple=x86_64 | FileCheck %s -check-prefix=ATT
# RUN: llvm-mc --disassemble %s -triple=x86_64 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s -check-prefix=INTEL
-# ATT: t2rpntlvwz0rs 268435456(%rbp,%r14,8), %tmm6
-# INTEL: t2rpntlvwz0rs tmm6, [rbp + 8*r14 + 268435456]
-0xc4,0xa5,0x78,0xf8,0xb4,0xf5,0x00,0x00,0x00,0x10
-
-# ATT: t2rpntlvwz0rs 291(%r8,%rax,4), %tmm2
-# INTEL: t2rpntlvwz0rs tmm2, [r8 + 4*rax + 291]
-0xc4,0xc5,0x78,0xf8,0x94,0x80,0x23,0x01,0x00,0x00
-
-# ATT: t2rpntlvwz0rs 64(%rbx), %tmm6
-# INTEL: t2rpntlvwz0rs tmm6, [rbx + 64]
-0xc4,0xe5,0x78,0xf8,0x74,0x23,0x40
-
-# ATT: t2rpntlvwz0rs -32(,%rbp,2), %tmm2
-# INTEL: t2rpntlvwz0rs tmm2, [2*rbp - 32]
-0xc4,0xe5,0x78,0xf8,0x14,0x6d,0xe0,0xff,0xff,0xff
-
-# ATT: t2rpntlvwz0rst1 268435456(%rbp,%r14,8), %tmm6
-# INTEL: t2rpntlvwz0rst1 tmm6, [rbp + 8*r14 + 268435456]
-0xc4,0xa5,0x78,0xf9,0xb4,0xf5,0x00,0x00,0x00,0x10
-
-# ATT: t2rpntlvwz0rst1 291(%r8,%rax,4), %tmm2
-# INTEL: t2rpntlvwz0rst1 tmm2, [r8 + 4*rax + 291]
-0xc4,0xc5,0x78,0xf9,0x94,0x80,0x23,0x01,0x00,0x00
-
-# ATT: t2rpntlvwz0rst1 64(%rbx), %tmm6
-# INTEL: t2rpntlvwz0rst1 tmm6, [rbx + 64]
-0xc4,0xe5,0x78,0xf9,0x74,0x23,0x40
-
-# ATT: t2rpntlvwz0rst1 -32(,%rbp,2), %tmm2
-# INTEL: t2rpntlvwz0rst1 tmm2, [2*rbp - 32]
-0xc4,0xe5,0x78,0xf9,0x14,0x6d,0xe0,0xff,0xff,0xff
-
-# ATT: t2rpntlvwz1rs 268435456(%rbp,%r14,8), %tmm6
-# INTEL: t2rpntlvwz1rs tmm6, [rbp + 8*r14 + 268435456]
-0xc4,0xa5,0x79,0xf8,0xb4,0xf5,0x00,0x00,0x00,0x10
-
-# ATT: t2rpntlvwz1rs 291(%r8,%rax,4), %tmm2
-# INTEL: t2rpntlvwz1rs tmm2, [r8 + 4*rax + 291]
-0xc4,0xc5,0x79,0xf8,0x94,0x80,0x23,0x01,0x00,0x00
-
-# ATT: t2rpntlvwz1rs 64(%rbx), %tmm6
-# INTEL: t2rpntlvwz1rs tmm6, [rbx + 64]
-0xc4,0xe5,0x79,0xf8,0x74,0x23,0x40
-
-# ATT: t2rpntlvwz1rs -32(,%rbp,2), %tmm2
-# INTEL: t2rpntlvwz1rs tmm2, [2*rbp - 32]
-0xc4,0xe5,0x79,0xf8,0x14,0x6d,0xe0,0xff,0xff,0xff
-
-# ATT: t2rpntlvwz1rst1 268435456(%rbp,%r14,8), %tmm6
-# INTEL: t2rpntlvwz1rst1 tmm6, [rbp + 8*r14 + 268435456]
-0xc4,0xa5,0x79,0xf9,0xb4,0xf5,0x00,0x00,0x00,0x10
-
-# ATT: t2rpntlvwz1rst1 291(%r8,%rax,4), %tmm2
-# INTEL: t2rpntlvwz1rst1 tmm2, [r8 + 4*rax + 291]
-0xc4,0xc5,0x79,0xf9,0x94,0x80,0x23,0x01,0x00,0x00
-
-# ATT: t2rpntlvwz1rst1 64(%rbx), %tmm6
-# INTEL: t2rpntlvwz1rst1 tmm6, [rbx + 64]
-0xc4,0xe5,0x79,0xf9,0x74,0x23,0x40
-
-# ATT: t2rpntlvwz1rst1 -32(,%rbp,2), %tmm2
-# INTEL: t2rpntlvwz1rst1 tmm2, [2*rbp - 32]
-0xc4,0xe5,0x79,0xf9,0x14,0x6d,0xe0,0xff,0xff,0xff
-
# ATT: tileloaddrs 268435456(%rbp,%r14,8), %tmm6
# INTEL: tileloaddrs tmm6, [rbp + 8*r14 + 268435456]
0xc4,0xa2,0x7b,0x4a,0xb4,0xf5,0x00,0x00,0x00,0x10
@@ -97,70 +33,6 @@
# INTEL: tileloaddrst1 tmm3, [2*rbp - 32]
0xc4,0xe2,0x79,0x4a,0x1c,0x6d,0xe0,0xff,0xff,0xff
-# ATT: t2rpntlvwz0rs 268435456(%r16,%r14,8), %tmm6
-# INTEL: t2rpntlvwz0rs tmm6, [r16 + 8*r14 + 268435456]
-0x62,0xbd,0x7c,0x08,0xf8,0xb4,0xf0,0x00,0x00,0x00,0x10
-
-# ATT: t2rpntlvwz0rs 291(%r8,%r17,4), %tmm2
-# INTEL: t2rpntlvwz0rs tmm2, [r8 + 4*r17 + 291]
-0x62,0xd5,0x78,0x08,0xf8,0x94,0x88,0x23,0x01,0x00,0x00
-
-# ATT: t2rpntlvwz0rs 64(%r18), %tmm6
-# INTEL: t2rpntlvwz0rs tmm6, [r18 + 64]
-0x62,0xfd,0x7c,0x08,0xf8,0x74,0x22,0x40
-
-# ATT: t2rpntlvwz0rs -32(,%rbp,2), %tmm2
-# INTEL: t2rpntlvwz0rs tmm2, [2*rbp - 32]
-0x62,0xf5,0x7c,0x08,0xf8,0x14,0x6d,0xe0,0xff,0xff,0xff
-
-# ATT: t2rpntlvwz0rst1 268435456(%r16,%r14,8), %tmm6
-# INTEL: t2rpntlvwz0rst1 tmm6, [r16 + 8*r14 + 268435456]
-0x62,0xbd,0x7c,0x08,0xf9,0xb4,0xf0,0x00,0x00,0x00,0x10
-
-# ATT: t2rpntlvwz0rst1 291(%r8,%r17,4), %tmm2
-# INTEL: t2rpntlvwz0rst1 tmm2, [r8 + 4*r17 + 291]
-0x62,0xd5,0x78,0x08,0xf9,0x94,0x88,0x23,0x01,0x00,0x00
-
-# ATT: t2rpntlvwz0rst1 64(%r18), %tmm6
-# INTEL: t2rpntlvwz0rst1 tmm6, [r18 + 64]
-0x62,0xfd,0x7c,0x08,0xf9,0x74,0x22,0x40
-
-# ATT: t2rpntlvwz0rst1 -32(,%rbp,2), %tmm2
-# INTEL: t2rpntlvwz0rst1 tmm2, [2*rbp - 32]
-0x62,0xf5,0x7c,0x08,0xf9,0x14,0x6d,0xe0,0xff,0xff,0xff
-
-# ATT: t2rpntlvwz1rs 268435456(%r16,%r14,8), %tmm6
-# INTEL: t2rpntlvwz1rs tmm6, [r16 + 8*r14 + 268435456]
-0x62,0xbd,0x7d,0x08,0xf8,0xb4,0xf0,0x00,0x00,0x00,0x10
-
-# ATT: t2rpntlvwz1rs 291(%r8,%r17,4), %tmm2
-# INTEL: t2rpntlvwz1rs tmm2, [r8 + 4*r17 + 291]
-0x62,0xd5,0x79,0x08,0xf8,0x94,0x88,0x23,0x01,0x00,0x00
-
-# ATT: t2rpntlvwz1rs 64(%r18), %tmm6
-# INTEL: t2rpntlvwz1rs tmm6, [r18 + 64]
-0x62,0xfd,0x7d,0x08,0xf8,0x74,0x22,0x40
-
-# ATT: t2rpntlvwz1rs -32(,%rbp,2), %tmm2
-# INTEL: t2rpntlvwz1rs tmm2, [2*rbp - 32]
-0x62,0xf5,0x7d,0x08,0xf8,0x14,0x6d,0xe0,0xff,0xff,0xff
-
-# ATT: t2rpntlvwz1rst1 268435456(%r16,%r14,8), %tmm6
-# INTEL: t2rpntlvwz1rst1 tmm6, [r16 + 8*r14 + 268435456]
-0x62,0xbd,0x7d,0x08,0xf9,0xb4,0xf0,0x00,0x00,0x00,0x10
-
-# ATT: t2rpntlvwz1rst1 291(%r8,%r17,4), %tmm2
-# INTEL: t2rpntlvwz1rst1 tmm2, [r8 + 4*r17 + 291]
-0x62,0xd5,0x79,0x08,0xf9,0x94,0x88,0x23,0x01,0x00,0x00
-
-# ATT: t2rpntlvwz1rst1 64(%r18), %tmm6
-# INTEL: t2rpntlvwz1rst1 tmm6, [r18 + 64]
-0x62,0xfd,0x7d,0x08,0xf9,0x74,0x22,0x40
-
-# ATT: t2rpntlvwz1rst1 -32(,%rbp,2), %tmm2
-# INTEL: t2rpntlvwz1rst1 tmm2, [2*rbp - 32]
-0x62,0xf5,0x7d,0x08,0xf9,0x14,0x6d,0xe0,0xff,0xff,0xff
-
# ATT: tileloaddrs 268435456(%r16,%r14,8), %tmm6
# INTEL: tileloaddrs tmm6, [r16 + 8*r14 + 268435456]
0x62,0xba,0x7f,0x08,0x4a,0xb4,0xf0,0x00,0x00,0x00,0x10
diff --git a/llvm/test/MC/Disassembler/X86/AMX/x86-64-amx-tf32.txt b/llvm/test/MC/Disassembler/X86/AMX/x86-64-amx-tf32.txt
index f372c42..347e61c 100644
--- a/llvm/test/MC/Disassembler/X86/AMX/x86-64-amx-tf32.txt
+++ b/llvm/test/MC/Disassembler/X86/AMX/x86-64-amx-tf32.txt
@@ -9,11 +9,3 @@
# INTEL: tmmultf32ps tmm3, tmm2, tmm1
0xc4,0xe2,0x71,0x48,0xda
-# ATT: ttmmultf32ps %tmm4, %tmm5, %tmm6
-# INTEL: ttmmultf32ps tmm6, tmm5, tmm4
-0xc4,0xe2,0x58,0x48,0xf5
-
-# ATT: ttmmultf32ps %tmm1, %tmm2, %tmm3
-# INTEL: ttmmultf32ps tmm3, tmm2, tmm1
-0xc4,0xe2,0x70,0x48,0xda
-
diff --git a/llvm/test/MC/Disassembler/X86/amx-transpose-att.txt b/llvm/test/MC/Disassembler/X86/amx-transpose-att.txt
deleted file mode 100644
index d768630..0000000
--- a/llvm/test/MC/Disassembler/X86/amx-transpose-att.txt
+++ /dev/null
@@ -1,154 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=x86_64 | FileCheck %s --check-prefixes=ATT
-# RUN: llvm-mc --disassemble %s -triple=x86_64 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL
-
-# ATT: t2rpntlvwz0 268435456(%rbp,%r14,8), %tmm4
-# INTEL: t2rpntlvwz0 tmm4, [rbp + 8*r14 + 268435456]
-0xc4,0xa2,0x78,0x6e,0xa4,0xf5,0x00,0x00,0x00,0x10
-
-# ATT: t2rpntlvwz0 291(%r8,%rax,4), %tmm2
-# INTEL: t2rpntlvwz0 tmm2, [r8 + 4*rax + 291]
-0xc4,0xc2,0x78,0x6e,0x94,0x80,0x23,0x01,0x00,0x00
-
-# ATT: t2rpntlvwz0 -32(,%rbp,2), %tmm2
-# INTEL: t2rpntlvwz0 tmm2, [2*rbp - 32]
-0xc4,0xe2,0x78,0x6e,0x14,0x6d,0xe0,0xff,0xff,0xff
-
-# ATT: t2rpntlvwz0t1 268435456(%rbp,%r14,8), %tmm4
-# INTEL: t2rpntlvwz0t1 tmm4, [rbp + 8*r14 + 268435456]
-0xc4,0xa2,0x78,0x6f,0xa4,0xf5,0x00,0x00,0x00,0x10
-
-# ATT: t2rpntlvwz0t1 291(%r8,%rax,4), %tmm2
-# INTEL: t2rpntlvwz0t1 tmm2, [r8 + 4*rax + 291]
-0xc4,0xc2,0x78,0x6f,0x94,0x80,0x23,0x01,0x00,0x00
-
-# ATT: t2rpntlvwz0t1 -32(,%rbp,2), %tmm2
-# INTEL: t2rpntlvwz0t1 tmm2, [2*rbp - 32]
-0xc4,0xe2,0x78,0x6f,0x14,0x6d,0xe0,0xff,0xff,0xff
-
-# ATT: t2rpntlvwz1 268435456(%rbp,%r14,8), %tmm4
-# INTEL: t2rpntlvwz1 tmm4, [rbp + 8*r14 + 268435456]
-0xc4,0xa2,0x79,0x6e,0xa4,0xf5,0x00,0x00,0x00,0x10
-
-# ATT: t2rpntlvwz1 291(%r8,%rax,4), %tmm2
-# INTEL: t2rpntlvwz1 tmm2, [r8 + 4*rax + 291]
-0xc4,0xc2,0x79,0x6e,0x94,0x80,0x23,0x01,0x00,0x00
-
-# ATT: t2rpntlvwz1 -32(,%rbp,2), %tmm2
-# INTEL: t2rpntlvwz1 tmm2, [2*rbp - 32]
-0xc4,0xe2,0x79,0x6e,0x14,0x6d,0xe0,0xff,0xff,0xff
-
-# ATT: t2rpntlvwz1t1 268435456(%rbp,%r14,8), %tmm4
-# INTEL: t2rpntlvwz1t1 tmm4, [rbp + 8*r14 + 268435456]
-0xc4,0xa2,0x79,0x6f,0xa4,0xf5,0x00,0x00,0x00,0x10
-
-# ATT: t2rpntlvwz1t1 291(%r8,%rax,4), %tmm2
-# INTEL: t2rpntlvwz1t1 tmm2, [r8 + 4*rax + 291]
-0xc4,0xc2,0x79,0x6f,0x94,0x80,0x23,0x01,0x00,0x00
-
-# ATT: t2rpntlvwz1t1 -32(,%rbp,2), %tmm2
-# INTEL: t2rpntlvwz1t1 tmm2, [2*rbp - 32]
-0xc4,0xe2,0x79,0x6f,0x14,0x6d,0xe0,0xff,0xff,0xff
-
-# ATT: t2rpntlvwz0 268435456(%r16,%r14,8), %tmm4
-# INTEL: t2rpntlvwz0 tmm4, [r16 + 8*r14 + 268435456]
-0x62,0xba,0x7c,0x08,0x6e,0xa4,0xf0,0x00,0x00,0x00,0x10
-
-# ATT: t2rpntlvwz0 291(%r8,%r17,4), %tmm2
-# INTEL: t2rpntlvwz0 tmm2, [r8 + 4*r17 + 291]
-0x62,0xd2,0x78,0x08,0x6e,0x94,0x88,0x23,0x01,0x00,0x00
-
-# ATT: t2rpntlvwz0 -32(,%rbp,2), %tmm2
-# INTEL: t2rpntlvwz0 tmm2, [2*rbp - 32]
-0x62,0xf2,0x7c,0x08,0x6e,0x14,0x6d,0xe0,0xff,0xff,0xff
-
-# ATT: t2rpntlvwz0t1 268435456(%r16,%r14,8), %tmm4
-# INTEL: t2rpntlvwz0t1 tmm4, [r16 + 8*r14 + 268435456]
-0x62,0xba,0x7c,0x08,0x6f,0xa4,0xf0,0x00,0x00,0x00,0x10
-
-# ATT: t2rpntlvwz0t1 291(%r8,%r17,4), %tmm2
-# INTEL: t2rpntlvwz0t1 tmm2, [r8 + 4*r17 + 291]
-0x62,0xd2,0x78,0x08,0x6f,0x94,0x88,0x23,0x01,0x00,0x00
-
-# ATT: t2rpntlvwz0t1 -32(,%rbp,2), %tmm2
-# INTEL: t2rpntlvwz0t1 tmm2, [2*rbp - 32]
-0x62,0xf2,0x7c,0x08,0x6f,0x14,0x6d,0xe0,0xff,0xff,0xff
-
-# ATT: t2rpntlvwz1 268435456(%r16,%r14,8), %tmm4
-# INTEL: t2rpntlvwz1 tmm4, [r16 + 8*r14 + 268435456]
-0x62,0xba,0x7d,0x08,0x6e,0xa4,0xf0,0x00,0x00,0x00,0x10
-
-# ATT: t2rpntlvwz1 291(%r8,%r17,4), %tmm2
-# INTEL: t2rpntlvwz1 tmm2, [r8 + 4*r17 + 291]
-0x62,0xd2,0x79,0x08,0x6e,0x94,0x88,0x23,0x01,0x00,0x00
-
-# ATT: t2rpntlvwz1 -32(,%rbp,2), %tmm2
-# INTEL: t2rpntlvwz1 tmm2, [2*rbp - 32]
-0x62,0xf2,0x7d,0x08,0x6e,0x14,0x6d,0xe0,0xff,0xff,0xff
-
-# ATT: t2rpntlvwz1t1 268435456(%r16,%r14,8), %tmm4
-# INTEL: t2rpntlvwz1t1 tmm4, [r16 + 8*r14 + 268435456]
-0x62,0xba,0x7d,0x08,0x6f,0xa4,0xf0,0x00,0x00,0x00,0x10
-
-# ATT: t2rpntlvwz1t1 291(%r8,%r17,4), %tmm2
-# INTEL: t2rpntlvwz1t1 tmm2, [r8 + 4*r17 + 291]
-0x62,0xd2,0x79,0x08,0x6f,0x94,0x88,0x23,0x01,0x00,0x00
-
-# ATT: t2rpntlvwz1t1 -32(,%rbp,2), %tmm2
-# INTEL: t2rpntlvwz1t1 tmm2, [2*rbp - 32]
-0x62,0xf2,0x7d,0x08,0x6f,0x14,0x6d,0xe0,0xff,0xff,0xff
-
-# ATT: ttransposed %tmm1, %tmm2
-# INTEL: ttransposed tmm2, tmm1
-0xc4,0xe2,0x7a,0x5f,0xd1
-
-# ATT: ttransposed %tmm2, %tmm3
-# INTEL: ttransposed tmm3, tmm2
-0xc4,0xe2,0x7a,0x5f,0xda
-
-# ATT: ttdpbf16ps %tmm7, %tmm6, %tmm5
-# INTEL: ttdpbf16ps tmm5, tmm6, tmm7
-0xc4,0xe2,0x42,0x6c,0xee
-
-# ATT: ttdpbf16ps %tmm1, %tmm2, %tmm3
-# INTEL: ttdpbf16ps tmm3, tmm2, tmm1
-0xc4,0xe2,0x72,0x6c,0xda
-
-# ATT: ttdpfp16ps %tmm7, %tmm6, %tmm5
-# INTEL: ttdpfp16ps tmm5, tmm6, tmm7
-0xc4,0xe2,0x43,0x6c,0xee
-
-# ATT: ttdpfp16ps %tmm1, %tmm2, %tmm3
-# INTEL: ttdpfp16ps tmm3, tmm2, tmm1
-0xc4,0xe2,0x73,0x6c,0xda
-
-# ATT: ttcmmimfp16ps %tmm4, %tmm5, %tmm6
-# INTEL: ttcmmimfp16ps tmm6, tmm5, tmm4
-0xc4,0xe2,0x5b,0x6b,0xf5
-
-# ATT: ttcmmimfp16ps %tmm1, %tmm2, %tmm3
-# INTEL: ttcmmimfp16ps tmm3, tmm2, tmm1
-0xc4,0xe2,0x73,0x6b,0xda
-
-# ATT: ttcmmrlfp16ps %tmm4, %tmm5, %tmm6
-# INTEL: ttcmmrlfp16ps tmm6, tmm5, tmm4
-0xc4,0xe2,0x5a,0x6b,0xf5
-
-# ATT: ttcmmrlfp16ps %tmm1, %tmm2, %tmm3
-# INTEL: ttcmmrlfp16ps tmm3, tmm2, tmm1
-0xc4,0xe2,0x72,0x6b,0xda
-
-# ATT: tconjtcmmimfp16ps %tmm4, %tmm5, %tmm6
-# INTEL: tconjtcmmimfp16ps tmm6, tmm5, tmm4
-0xc4,0xe2,0x58,0x6b,0xf5
-
-# ATT: tconjtcmmimfp16ps %tmm1, %tmm2, %tmm3
-# INTEL: tconjtcmmimfp16ps tmm3, tmm2, tmm1
-0xc4,0xe2,0x70,0x6b,0xda
-
-# ATT: tconjtfp16 %tmm5, %tmm6
-# INTEL: tconjtfp16 tmm6, tmm5
-0xc4,0xe2,0x79,0x6b,0xf5
-
-# ATT: tconjtfp16 %tmm2, %tmm3
-# INTEL: tconjtfp16 tmm3, tmm2
-0xc4,0xe2,0x79,0x6b,0xda
diff --git a/llvm/test/MC/X86/AMX/x86-64-amx-movrs-att.s b/llvm/test/MC/X86/AMX/x86-64-amx-movrs-att.s
index 92db672..497a1c6 100755
--- a/llvm/test/MC/X86/AMX/x86-64-amx-movrs-att.s
+++ b/llvm/test/MC/X86/AMX/x86-64-amx-movrs-att.s
@@ -1,69 +1,5 @@
// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s
-// CHECK: t2rpntlvwz0rs 268435456(%rbp,%r14,8), %tmm6
-// CHECK: encoding: [0xc4,0xa5,0x78,0xf8,0xb4,0xf5,0x00,0x00,0x00,0x10]
- t2rpntlvwz0rs 268435456(%rbp,%r14,8), %tmm6
-
-// CHECK: t2rpntlvwz0rs 291(%r8,%rax,4), %tmm2
-// CHECK: encoding: [0xc4,0xc5,0x78,0xf8,0x94,0x80,0x23,0x01,0x00,0x00]
- t2rpntlvwz0rs 291(%r8,%rax,4), %tmm2
-
-// CHECK: t2rpntlvwz0rs 64(%rbx), %tmm6
-// CHECK: encoding: [0xc4,0xe5,0x78,0xf8,0x74,0x23,0x40]
- t2rpntlvwz0rs 64(%rbx), %tmm6
-
-// CHECK: t2rpntlvwz0rs -32(,%rbp,2), %tmm2
-// CHECK: encoding: [0xc4,0xe5,0x78,0xf8,0x14,0x6d,0xe0,0xff,0xff,0xff]
- t2rpntlvwz0rs -32(,%rbp,2), %tmm2
-
-// CHECK: t2rpntlvwz0rst1 268435456(%rbp,%r14,8), %tmm6
-// CHECK: encoding: [0xc4,0xa5,0x78,0xf9,0xb4,0xf5,0x00,0x00,0x00,0x10]
- t2rpntlvwz0rst1 268435456(%rbp,%r14,8), %tmm6
-
-// CHECK: t2rpntlvwz0rst1 291(%r8,%rax,4), %tmm2
-// CHECK: encoding: [0xc4,0xc5,0x78,0xf9,0x94,0x80,0x23,0x01,0x00,0x00]
- t2rpntlvwz0rst1 291(%r8,%rax,4), %tmm2
-
-// CHECK: t2rpntlvwz0rst1 64(%rbx), %tmm6
-// CHECK: encoding: [0xc4,0xe5,0x78,0xf9,0x74,0x23,0x40]
- t2rpntlvwz0rst1 64(%rbx), %tmm6
-
-// CHECK: t2rpntlvwz0rst1 -32(,%rbp,2), %tmm2
-// CHECK: encoding: [0xc4,0xe5,0x78,0xf9,0x14,0x6d,0xe0,0xff,0xff,0xff]
- t2rpntlvwz0rst1 -32(,%rbp,2), %tmm2
-
-// CHECK: t2rpntlvwz1rs 268435456(%rbp,%r14,8), %tmm6
-// CHECK: encoding: [0xc4,0xa5,0x79,0xf8,0xb4,0xf5,0x00,0x00,0x00,0x10]
- t2rpntlvwz1rs 268435456(%rbp,%r14,8), %tmm6
-
-// CHECK: t2rpntlvwz1rs 291(%r8,%rax,4), %tmm2
-// CHECK: encoding: [0xc4,0xc5,0x79,0xf8,0x94,0x80,0x23,0x01,0x00,0x00]
- t2rpntlvwz1rs 291(%r8,%rax,4), %tmm2
-
-// CHECK: t2rpntlvwz1rs 64(%rbx), %tmm6
-// CHECK: encoding: [0xc4,0xe5,0x79,0xf8,0x74,0x23,0x40]
- t2rpntlvwz1rs 64(%rbx), %tmm6
-
-// CHECK: t2rpntlvwz1rs -32(,%rbp,2), %tmm2
-// CHECK: encoding: [0xc4,0xe5,0x79,0xf8,0x14,0x6d,0xe0,0xff,0xff,0xff]
- t2rpntlvwz1rs -32(,%rbp,2), %tmm2
-
-// CHECK: t2rpntlvwz1rst1 268435456(%rbp,%r14,8), %tmm6
-// CHECK: encoding: [0xc4,0xa5,0x79,0xf9,0xb4,0xf5,0x00,0x00,0x00,0x10]
- t2rpntlvwz1rst1 268435456(%rbp,%r14,8), %tmm6
-
-// CHECK: t2rpntlvwz1rst1 291(%r8,%rax,4), %tmm2
-// CHECK: encoding: [0xc4,0xc5,0x79,0xf9,0x94,0x80,0x23,0x01,0x00,0x00]
- t2rpntlvwz1rst1 291(%r8,%rax,4), %tmm2
-
-// CHECK: t2rpntlvwz1rst1 64(%rbx), %tmm6
-// CHECK: encoding: [0xc4,0xe5,0x79,0xf9,0x74,0x23,0x40]
- t2rpntlvwz1rst1 64(%rbx), %tmm6
-
-// CHECK: t2rpntlvwz1rst1 -32(,%rbp,2), %tmm2
-// CHECK: encoding: [0xc4,0xe5,0x79,0xf9,0x14,0x6d,0xe0,0xff,0xff,0xff]
- t2rpntlvwz1rst1 -32(,%rbp,2), %tmm2
-
// CHECK: tileloaddrs 268435456(%rbp,%r14,8), %tmm6
// CHECK: encoding: [0xc4,0xa2,0x7b,0x4a,0xb4,0xf5,0x00,0x00,0x00,0x10]
tileloaddrs 268435456(%rbp,%r14,8), %tmm6
@@ -88,70 +24,6 @@
// CHECK: encoding: [0xc4,0xe2,0x79,0x4a,0x1c,0x6d,0xe0,0xff,0xff,0xff]
tileloaddrst1 -32(,%rbp,2), %tmm3
-// CHECK: t2rpntlvwz0rs 268435456(%r16,%r14,8), %tmm6
-// CHECK: encoding: [0x62,0xbd,0x7c,0x08,0xf8,0xb4,0xf0,0x00,0x00,0x00,0x10]
- t2rpntlvwz0rs 268435456(%r16,%r14,8), %tmm6
-
-// CHECK: t2rpntlvwz0rs 291(%r8,%r17,4), %tmm2
-// CHECK: encoding: [0x62,0xd5,0x78,0x08,0xf8,0x94,0x88,0x23,0x01,0x00,0x00]
- t2rpntlvwz0rs 291(%r8,%r17,4), %tmm2
-
-// CHECK: t2rpntlvwz0rs 64(%r18), %tmm6
-// CHECK: encoding: [0x62,0xfd,0x7c,0x08,0xf8,0x74,0x22,0x40]
- t2rpntlvwz0rs 64(%r18), %tmm6
-
-// CHECK: {evex} t2rpntlvwz0rs -32(,%rbp,2), %tmm2
-// CHECK: encoding: [0x62,0xf5,0x7c,0x08,0xf8,0x14,0x6d,0xe0,0xff,0xff,0xff]
- {evex} t2rpntlvwz0rs -32(,%rbp,2), %tmm2
-
-// CHECK: t2rpntlvwz0rst1 268435456(%r16,%r14,8), %tmm6
-// CHECK: encoding: [0x62,0xbd,0x7c,0x08,0xf9,0xb4,0xf0,0x00,0x00,0x00,0x10]
- t2rpntlvwz0rst1 268435456(%r16,%r14,8), %tmm6
-
-// CHECK: t2rpntlvwz0rst1 291(%r8,%r17,4), %tmm2
-// CHECK: encoding: [0x62,0xd5,0x78,0x08,0xf9,0x94,0x88,0x23,0x01,0x00,0x00]
- t2rpntlvwz0rst1 291(%r8,%r17,4), %tmm2
-
-// CHECK: t2rpntlvwz0rst1 64(%r18), %tmm6
-// CHECK: encoding: [0x62,0xfd,0x7c,0x08,0xf9,0x74,0x22,0x40]
- t2rpntlvwz0rst1 64(%r18), %tmm6
-
-// CHECK: {evex} t2rpntlvwz0rst1 -32(,%rbp,2), %tmm2
-// CHECK: encoding: [0x62,0xf5,0x7c,0x08,0xf9,0x14,0x6d,0xe0,0xff,0xff,0xff]
- {evex} t2rpntlvwz0rst1 -32(,%rbp,2), %tmm2
-
-// CHECK: t2rpntlvwz1rs 268435456(%r16,%r14,8), %tmm6
-// CHECK: encoding: [0x62,0xbd,0x7d,0x08,0xf8,0xb4,0xf0,0x00,0x00,0x00,0x10]
- t2rpntlvwz1rs 268435456(%r16,%r14,8), %tmm6
-
-// CHECK: t2rpntlvwz1rs 291(%r8,%r17,4), %tmm2
-// CHECK: encoding: [0x62,0xd5,0x79,0x08,0xf8,0x94,0x88,0x23,0x01,0x00,0x00]
- t2rpntlvwz1rs 291(%r8,%r17,4), %tmm2
-
-// CHECK: t2rpntlvwz1rs 64(%r18), %tmm6
-// CHECK: encoding: [0x62,0xfd,0x7d,0x08,0xf8,0x74,0x22,0x40]
- t2rpntlvwz1rs 64(%r18), %tmm6
-
-// CHECK: {evex} t2rpntlvwz1rs -32(,%rbp,2), %tmm2
-// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0xf8,0x14,0x6d,0xe0,0xff,0xff,0xff]
- {evex} t2rpntlvwz1rs -32(,%rbp,2), %tmm2
-
-// CHECK: t2rpntlvwz1rst1 268435456(%r16,%r14,8), %tmm6
-// CHECK: encoding: [0x62,0xbd,0x7d,0x08,0xf9,0xb4,0xf0,0x00,0x00,0x00,0x10]
- t2rpntlvwz1rst1 268435456(%r16,%r14,8), %tmm6
-
-// CHECK: t2rpntlvwz1rst1 291(%r8,%r17,4), %tmm2
-// CHECK: encoding: [0x62,0xd5,0x79,0x08,0xf9,0x94,0x88,0x23,0x01,0x00,0x00]
- t2rpntlvwz1rst1 291(%r8,%r17,4), %tmm2
-
-// CHECK: t2rpntlvwz1rst1 64(%r18), %tmm6
-// CHECK: encoding: [0x62,0xfd,0x7d,0x08,0xf9,0x74,0x22,0x40]
- t2rpntlvwz1rst1 64(%r18), %tmm6
-
-// CHECK: {evex} t2rpntlvwz1rst1 -32(,%rbp,2), %tmm2
-// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0xf9,0x14,0x6d,0xe0,0xff,0xff,0xff]
- {evex} t2rpntlvwz1rst1 -32(,%rbp,2), %tmm2
-
// CHECK: tileloaddrs 291(%r16,%rax,4), %tmm3
// CHECK: encoding: [0x62,0xfa,0x7f,0x08,0x4a,0x9c,0x80,0x23,0x01,0x00,0x00]
tileloaddrs 291(%r16,%rax,4), %tmm3
diff --git a/llvm/test/MC/X86/AMX/x86-64-amx-movrs-intel.s b/llvm/test/MC/X86/AMX/x86-64-amx-movrs-intel.s
index 140d1aa..0e030ca 100755
--- a/llvm/test/MC/X86/AMX/x86-64-amx-movrs-intel.s
+++ b/llvm/test/MC/X86/AMX/x86-64-amx-movrs-intel.s
@@ -1,69 +1,5 @@
// RUN: llvm-mc -triple x86_64-unknown-unknown -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
-// CHECK: t2rpntlvwz0rs tmm6, [rbp + 8*r14 + 268435456]
-// CHECK: encoding: [0xc4,0xa5,0x78,0xf8,0xb4,0xf5,0x00,0x00,0x00,0x10]
- t2rpntlvwz0rs tmm6, [rbp + 8*r14 + 268435456]
-
-// CHECK: t2rpntlvwz0rs tmm2, [r8 + 4*rax + 291]
-// CHECK: encoding: [0xc4,0xc5,0x78,0xf8,0x94,0x80,0x23,0x01,0x00,0x00]
- t2rpntlvwz0rs tmm2, [r8 + 4*rax + 291]
-
-// CHECK: t2rpntlvwz0rs tmm6, [rbx + 64]
-// CHECK: encoding: [0xc4,0xe5,0x78,0xf8,0x74,0x23,0x40]
- t2rpntlvwz0rs tmm6, [rbx + 64]
-
-// CHECK: t2rpntlvwz0rs tmm2, [2*rbp - 32]
-// CHECK: encoding: [0xc4,0xe5,0x78,0xf8,0x14,0x6d,0xe0,0xff,0xff,0xff]
- t2rpntlvwz0rs tmm2, [2*rbp - 32]
-
-// CHECK: t2rpntlvwz0rst1 tmm6, [rbp + 8*r14 + 268435456]
-// CHECK: encoding: [0xc4,0xa5,0x78,0xf9,0xb4,0xf5,0x00,0x00,0x00,0x10]
- t2rpntlvwz0rst1 tmm6, [rbp + 8*r14 + 268435456]
-
-// CHECK: t2rpntlvwz0rst1 tmm2, [r8 + 4*rax + 291]
-// CHECK: encoding: [0xc4,0xc5,0x78,0xf9,0x94,0x80,0x23,0x01,0x00,0x00]
- t2rpntlvwz0rst1 tmm2, [r8 + 4*rax + 291]
-
-// CHECK: t2rpntlvwz0rst1 tmm6, [rbx + 64]
-// CHECK: encoding: [0xc4,0xe5,0x78,0xf9,0x74,0x23,0x40]
- t2rpntlvwz0rst1 tmm6, [rbx + 64]
-
-// CHECK: t2rpntlvwz0rst1 tmm2, [2*rbp - 32]
-// CHECK: encoding: [0xc4,0xe5,0x78,0xf9,0x14,0x6d,0xe0,0xff,0xff,0xff]
- t2rpntlvwz0rst1 tmm2, [2*rbp - 32]
-
-// CHECK: t2rpntlvwz1rs tmm6, [rbp + 8*r14 + 268435456]
-// CHECK: encoding: [0xc4,0xa5,0x79,0xf8,0xb4,0xf5,0x00,0x00,0x00,0x10]
- t2rpntlvwz1rs tmm6, [rbp + 8*r14 + 268435456]
-
-// CHECK: t2rpntlvwz1rs tmm2, [r8 + 4*rax + 291]
-// CHECK: encoding: [0xc4,0xc5,0x79,0xf8,0x94,0x80,0x23,0x01,0x00,0x00]
- t2rpntlvwz1rs tmm2, [r8 + 4*rax + 291]
-
-// CHECK: t2rpntlvwz1rs tmm6, [rbx + 64]
-// CHECK: encoding: [0xc4,0xe5,0x79,0xf8,0x74,0x23,0x40]
- t2rpntlvwz1rs tmm6, [rbx + 64]
-
-// CHECK: t2rpntlvwz1rs tmm2, [2*rbp - 32]
-// CHECK: encoding: [0xc4,0xe5,0x79,0xf8,0x14,0x6d,0xe0,0xff,0xff,0xff]
- t2rpntlvwz1rs tmm2, [2*rbp - 32]
-
-// CHECK: t2rpntlvwz1rst1 tmm6, [rbp + 8*r14 + 268435456]
-// CHECK: encoding: [0xc4,0xa5,0x79,0xf9,0xb4,0xf5,0x00,0x00,0x00,0x10]
- t2rpntlvwz1rst1 tmm6, [rbp + 8*r14 + 268435456]
-
-// CHECK: t2rpntlvwz1rst1 tmm2, [r8 + 4*rax + 291]
-// CHECK: encoding: [0xc4,0xc5,0x79,0xf9,0x94,0x80,0x23,0x01,0x00,0x00]
- t2rpntlvwz1rst1 tmm2, [r8 + 4*rax + 291]
-
-// CHECK: t2rpntlvwz1rst1 tmm6, [rbx + 64]
-// CHECK: encoding: [0xc4,0xe5,0x79,0xf9,0x74,0x23,0x40]
- t2rpntlvwz1rst1 tmm6, [rbx + 64]
-
-// CHECK: t2rpntlvwz1rst1 tmm2, [2*rbp - 32]
-// CHECK: encoding: [0xc4,0xe5,0x79,0xf9,0x14,0x6d,0xe0,0xff,0xff,0xff]
- t2rpntlvwz1rst1 tmm2, [2*rbp - 32]
-
// CHECK: tileloaddrs tmm6, [rbp + 8*r14 + 268435456]
// CHECK: encoding: [0xc4,0xa2,0x7b,0x4a,0xb4,0xf5,0x00,0x00,0x00,0x10]
tileloaddrs tmm6, [rbp + 8*r14 + 268435456]
@@ -96,70 +32,6 @@
// CHECK: encoding: [0xc4,0xe2,0x79,0x4a,0x1c,0x6d,0xe0,0xff,0xff,0xff]
tileloaddrst1 tmm3, [2*rbp - 32]
-// CHECK: t2rpntlvwz0rs tmm6, [r16 + 8*r14 + 268435456]
-// CHECK: encoding: [0x62,0xbd,0x7c,0x08,0xf8,0xb4,0xf0,0x00,0x00,0x00,0x10]
- t2rpntlvwz0rs tmm6, [r16 + 8*r14 + 268435456]
-
-// CHECK: t2rpntlvwz0rs tmm2, [r8 + 4*r17 + 291]
-// CHECK: encoding: [0x62,0xd5,0x78,0x08,0xf8,0x94,0x88,0x23,0x01,0x00,0x00]
- t2rpntlvwz0rs tmm2, [r8 + 4*r17 + 291]
-
-// CHECK: t2rpntlvwz0rs tmm6, [r18 + 64]
-// CHECK: encoding: [0x62,0xfd,0x7c,0x08,0xf8,0x74,0x22,0x40]
- t2rpntlvwz0rs tmm6, [r18 + 64]
-
-// CHECK: {evex} t2rpntlvwz0rs tmm2, [2*rbp - 32]
-// CHECK: encoding: [0x62,0xf5,0x7c,0x08,0xf8,0x14,0x6d,0xe0,0xff,0xff,0xff]
- {evex} t2rpntlvwz0rs tmm2, [2*rbp - 32]
-
-// CHECK: t2rpntlvwz0rst1 tmm6, [r16 + 8*r14 + 268435456]
-// CHECK: encoding: [0x62,0xbd,0x7c,0x08,0xf9,0xb4,0xf0,0x00,0x00,0x00,0x10]
- t2rpntlvwz0rst1 tmm6, [r16 + 8*r14 + 268435456]
-
-// CHECK: t2rpntlvwz0rst1 tmm2, [r8 + 4*r17 + 291]
-// CHECK: encoding: [0x62,0xd5,0x78,0x08,0xf9,0x94,0x88,0x23,0x01,0x00,0x00]
- t2rpntlvwz0rst1 tmm2, [r8 + 4*r17 + 291]
-
-// CHECK: t2rpntlvwz0rst1 tmm6, [r18 + 64]
-// CHECK: encoding: [0x62,0xfd,0x7c,0x08,0xf9,0x74,0x22,0x40]
- t2rpntlvwz0rst1 tmm6, [r18 + 64]
-
-// CHECK: {evex} t2rpntlvwz0rst1 tmm2, [2*rbp - 32]
-// CHECK: encoding: [0x62,0xf5,0x7c,0x08,0xf9,0x14,0x6d,0xe0,0xff,0xff,0xff]
- {evex} t2rpntlvwz0rst1 tmm2, [2*rbp - 32]
-
-// CHECK: t2rpntlvwz1rs tmm6, [r16 + 8*r14 + 268435456]
-// CHECK: encoding: [0x62,0xbd,0x7d,0x08,0xf8,0xb4,0xf0,0x00,0x00,0x00,0x10]
- t2rpntlvwz1rs tmm6, [r16 + 8*r14 + 268435456]
-
-// CHECK: t2rpntlvwz1rs tmm2, [r8 + 4*r17 + 291]
-// CHECK: encoding: [0x62,0xd5,0x79,0x08,0xf8,0x94,0x88,0x23,0x01,0x00,0x00]
- t2rpntlvwz1rs tmm2, [r8 + 4*r17 + 291]
-
-// CHECK: t2rpntlvwz1rs tmm6, [r18 + 64]
-// CHECK: encoding: [0x62,0xfd,0x7d,0x08,0xf8,0x74,0x22,0x40]
- t2rpntlvwz1rs tmm6, [r18 + 64]
-
-// CHECK: {evex} t2rpntlvwz1rs tmm2, [2*rbp - 32]
-// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0xf8,0x14,0x6d,0xe0,0xff,0xff,0xff]
- {evex} t2rpntlvwz1rs tmm2, [2*rbp - 32]
-
-// CHECK: t2rpntlvwz1rst1 tmm6, [r16 + 8*r14 + 268435456]
-// CHECK: encoding: [0x62,0xbd,0x7d,0x08,0xf9,0xb4,0xf0,0x00,0x00,0x00,0x10]
- t2rpntlvwz1rst1 tmm6, [r16 + 8*r14 + 268435456]
-
-// CHECK: t2rpntlvwz1rst1 tmm2, [r8 + 4*r17 + 291]
-// CHECK: encoding: [0x62,0xd5,0x79,0x08,0xf9,0x94,0x88,0x23,0x01,0x00,0x00]
- t2rpntlvwz1rst1 tmm2, [r8 + 4*r17 + 291]
-
-// CHECK: t2rpntlvwz1rst1 tmm6, [r18 + 64]
-// CHECK: encoding: [0x62,0xfd,0x7d,0x08,0xf9,0x74,0x22,0x40]
- t2rpntlvwz1rst1 tmm6, [r18 + 64]
-
-// CHECK: {evex} t2rpntlvwz1rst1 tmm2, [2*rbp - 32]
-// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0xf9,0x14,0x6d,0xe0,0xff,0xff,0xff]
- {evex} t2rpntlvwz1rst1 tmm2, [2*rbp - 32]
-
// CHECK: tileloaddrs tmm6, [r16 + 8*r14 + 268435456]
// CHECK: encoding: [0x62,0xba,0x7f,0x08,0x4a,0xb4,0xf0,0x00,0x00,0x00,0x10]
tileloaddrs tmm6, [r16 + 8*r14 + 268435456]
diff --git a/llvm/test/MC/X86/AMX/x86-64-amx-tf32-att.s b/llvm/test/MC/X86/AMX/x86-64-amx-tf32-att.s
index b413597..d1d0997 100644
--- a/llvm/test/MC/X86/AMX/x86-64-amx-tf32-att.s
+++ b/llvm/test/MC/X86/AMX/x86-64-amx-tf32-att.s
@@ -8,10 +8,3 @@
// CHECK: encoding: [0xc4,0xe2,0x71,0x48,0xda]
tmmultf32ps %tmm1, %tmm2, %tmm3
-// CHECK: ttmmultf32ps %tmm4, %tmm5, %tmm6
-// CHECK: encoding: [0xc4,0xe2,0x58,0x48,0xf5]
- ttmmultf32ps %tmm4, %tmm5, %tmm6
-
-// CHECK: ttmmultf32ps %tmm1, %tmm2, %tmm3
-// CHECK: encoding: [0xc4,0xe2,0x70,0x48,0xda]
- ttmmultf32ps %tmm1, %tmm2, %tmm3
diff --git a/llvm/test/MC/X86/AMX/x86-64-amx-tf32-intel.s b/llvm/test/MC/X86/AMX/x86-64-amx-tf32-intel.s
index 98f5527..b6c0947 100644
--- a/llvm/test/MC/X86/AMX/x86-64-amx-tf32-intel.s
+++ b/llvm/test/MC/X86/AMX/x86-64-amx-tf32-intel.s
@@ -8,10 +8,3 @@
// CHECK: encoding: [0xc4,0xe2,0x71,0x48,0xda]
tmmultf32ps tmm3, tmm2, tmm1
-// CHECK: ttmmultf32ps tmm6, tmm5, tmm4
-// CHECK: encoding: [0xc4,0xe2,0x58,0x48,0xf5]
- ttmmultf32ps tmm6, tmm5, tmm4
-
-// CHECK: ttmmultf32ps tmm3, tmm2, tmm1
-// CHECK: encoding: [0xc4,0xe2,0x70,0x48,0xda]
- ttmmultf32ps tmm3, tmm2, tmm1
diff --git a/llvm/test/MC/X86/amx-transpose-att.s b/llvm/test/MC/X86/amx-transpose-att.s
deleted file mode 100644
index 5158470..0000000
--- a/llvm/test/MC/X86/amx-transpose-att.s
+++ /dev/null
@@ -1,153 +0,0 @@
-// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s
-
-// CHECK: t2rpntlvwz0 268435456(%rbp,%r14,8), %tmm4
-// CHECK: encoding: [0xc4,0xa2,0x78,0x6e,0xa4,0xf5,0x00,0x00,0x00,0x10]
- t2rpntlvwz0 268435456(%rbp,%r14,8), %tmm4
-
-// CHECK: t2rpntlvwz0 291(%r8,%rax,4), %tmm2
-// CHECK: encoding: [0xc4,0xc2,0x78,0x6e,0x94,0x80,0x23,0x01,0x00,0x00]
- t2rpntlvwz0 291(%r8,%rax,4), %tmm2
-
-// CHECK: t2rpntlvwz0 -32(,%rbp,2), %tmm2
-// CHECK: encoding: [0xc4,0xe2,0x78,0x6e,0x14,0x6d,0xe0,0xff,0xff,0xff]
- t2rpntlvwz0 -32(,%rbp,2), %tmm2
-
-// CHECK: t2rpntlvwz0t1 268435456(%rbp,%r14,8), %tmm4
-// CHECK: encoding: [0xc4,0xa2,0x78,0x6f,0xa4,0xf5,0x00,0x00,0x00,0x10]
- t2rpntlvwz0t1 268435456(%rbp,%r14,8), %tmm5
-
-// CHECK: t2rpntlvwz0t1 291(%r8,%rax,4), %tmm2
-// CHECK: encoding: [0xc4,0xc2,0x78,0x6f,0x94,0x80,0x23,0x01,0x00,0x00]
- t2rpntlvwz0t1 291(%r8,%rax,4), %tmm2
-
-// CHECK: t2rpntlvwz0t1 -32(,%rbp,2), %tmm2
-// CHECK: encoding: [0xc4,0xe2,0x78,0x6f,0x14,0x6d,0xe0,0xff,0xff,0xff]
- t2rpntlvwz0t1 -32(,%rbp,2), %tmm2
-
-// CHECK: t2rpntlvwz1 268435456(%rbp,%r14,8), %tmm4
-// CHECK: encoding: [0xc4,0xa2,0x79,0x6e,0xa4,0xf5,0x00,0x00,0x00,0x10]
- t2rpntlvwz1 268435456(%rbp,%r14,8), %tmm5
-
-// CHECK: t2rpntlvwz1 291(%r8,%rax,4), %tmm2
-// CHECK: encoding: [0xc4,0xc2,0x79,0x6e,0x94,0x80,0x23,0x01,0x00,0x00]
- t2rpntlvwz1 291(%r8,%rax,4), %tmm2
-
-// CHECK: t2rpntlvwz1 -32(,%rbp,2), %tmm2
-// CHECK: encoding: [0xc4,0xe2,0x79,0x6e,0x14,0x6d,0xe0,0xff,0xff,0xff]
- t2rpntlvwz1 -32(,%rbp,2), %tmm2
-
-// CHECK: t2rpntlvwz1t1 268435456(%rbp,%r14,8), %tmm2
-// CHECK: encoding: [0xc4,0xa2,0x79,0x6f,0x94,0xf5,0x00,0x00,0x00,0x10]
- t2rpntlvwz1t1 268435456(%rbp,%r14,8), %tmm3
-
-// CHECK: t2rpntlvwz1t1 291(%r8,%rax,4), %tmm2
-// CHECK: encoding: [0xc4,0xc2,0x79,0x6f,0x94,0x80,0x23,0x01,0x00,0x00]
- t2rpntlvwz1t1 291(%r8,%rax,4), %tmm2
-
-// CHECK: t2rpntlvwz1t1 -32(,%rbp,2), %tmm2
-// CHECK: encoding: [0xc4,0xe2,0x79,0x6f,0x14,0x6d,0xe0,0xff,0xff,0xff]
- t2rpntlvwz1t1 -32(,%rbp,2), %tmm2
-
-// CHECK: t2rpntlvwz0 268435456(%r16,%r14,8), %tmm4
-// CHECK: encoding: [0x62,0xba,0x7c,0x08,0x6e,0xa4,0xf0,0x00,0x00,0x00,0x10]
- t2rpntlvwz0 268435456(%r16,%r14,8), %tmm4
-
-// CHECK: t2rpntlvwz0 291(%r8,%r17,4), %tmm2
-// CHECK: encoding: [0x62,0xd2,0x78,0x08,0x6e,0x94,0x88,0x23,0x01,0x00,0x00]
- t2rpntlvwz0 291(%r8,%r17,4), %tmm2
-
-// CHECK: {evex} t2rpntlvwz0 -32(,%rbp,2), %tmm2
-// CHECK: encoding: [0x62,0xf2,0x7c,0x08,0x6e,0x14,0x6d,0xe0,0xff,0xff,0xff]
- {evex} t2rpntlvwz0 -32(,%rbp,2), %tmm2
-
-// CHECK: t2rpntlvwz0t1 268435456(%r16,%r14,8), %tmm4
-// CHECK: encoding: [0x62,0xba,0x7c,0x08,0x6f,0xa4,0xf0,0x00,0x00,0x00,0x10]
- t2rpntlvwz0t1 268435456(%r16,%r14,8), %tmm4
-
-// CHECK: t2rpntlvwz0t1 291(%r8,%r17,4), %tmm2
-// CHECK: encoding: [0x62,0xd2,0x78,0x08,0x6f,0x94,0x88,0x23,0x01,0x00,0x00]
- t2rpntlvwz0t1 291(%r8,%r17,4), %tmm2
-
-// CHECK: {evex} t2rpntlvwz0t1 -32(,%rbp,2), %tmm2
-// CHECK: encoding: [0x62,0xf2,0x7c,0x08,0x6f,0x14,0x6d,0xe0,0xff,0xff,0xff]
- {evex} t2rpntlvwz0t1 -32(,%rbp,2), %tmm2
-
-// CHECK: t2rpntlvwz1 268435456(%r16,%r14,8), %tmm4
-// CHECK: encoding: [0x62,0xba,0x7d,0x08,0x6e,0xa4,0xf0,0x00,0x00,0x00,0x10]
- t2rpntlvwz1 268435456(%r16,%r14,8), %tmm4
-
-// CHECK: t2rpntlvwz1 291(%r8,%r17,4), %tmm2
-// CHECK: encoding: [0x62,0xd2,0x79,0x08,0x6e,0x94,0x88,0x23,0x01,0x00,0x00]
- t2rpntlvwz1 291(%r8,%r17,4), %tmm2
-
-// CHECK: {evex} t2rpntlvwz1 -32(,%rbp,2), %tmm2
-// CHECK: encoding: [0x62,0xf2,0x7d,0x08,0x6e,0x14,0x6d,0xe0,0xff,0xff,0xff]
- {evex} t2rpntlvwz1 -32(,%rbp,2), %tmm2
-
-// CHECK: t2rpntlvwz1t1 268435456(%r16,%r14,8), %tmm4
-// CHECK: encoding: [0x62,0xba,0x7d,0x08,0x6f,0xa4,0xf0,0x00,0x00,0x00,0x10]
- t2rpntlvwz1t1 268435456(%r16,%r14,8), %tmm4
-
-// CHECK: t2rpntlvwz1t1 291(%r8,%r17,4), %tmm2
-// CHECK: encoding: [0x62,0xd2,0x79,0x08,0x6f,0x94,0x88,0x23,0x01,0x00,0x00]
- t2rpntlvwz1t1 291(%r8,%r17,4), %tmm2
-
-// CHECK: {evex} t2rpntlvwz1t1 -32(,%rbp,2), %tmm2
-// CHECK: encoding: [0x62,0xf2,0x7d,0x08,0x6f,0x14,0x6d,0xe0,0xff,0xff,0xff]
- {evex} t2rpntlvwz1t1 -32(,%rbp,2), %tmm2
-
-// CHECK: ttransposed %tmm1, %tmm5
-// CHECK: encoding: [0xc4,0xe2,0x7a,0x5f,0xe9]
- ttransposed %tmm1, %tmm5
-
-// CHECK: ttransposed %tmm2, %tmm3
-// CHECK: encoding: [0xc4,0xe2,0x7a,0x5f,0xda]
- ttransposed %tmm2, %tmm3
-
-// CHECK: ttdpbf16ps %tmm1, %tmm2, %tmm5
-// CHECK: encoding: [0xc4,0xe2,0x72,0x6c,0xea]
- ttdpbf16ps %tmm1, %tmm2, %tmm5
-
-// CHECK: ttdpbf16ps %tmm1, %tmm2, %tmm3
-// CHECK: encoding: [0xc4,0xe2,0x72,0x6c,0xda]
- ttdpbf16ps %tmm1, %tmm2, %tmm3
-
-// CHECK: ttdpfp16ps %tmm3, %tmm4, %tmm5
-// CHECK: encoding: [0xc4,0xe2,0x63,0x6c,0xec]
- ttdpfp16ps %tmm3, %tmm4, %tmm5
-
-// CHECK: ttdpfp16ps %tmm1, %tmm2, %tmm3
-// CHECK: encoding: [0xc4,0xe2,0x73,0x6c,0xda]
- ttdpfp16ps %tmm1, %tmm2, %tmm3
-
-// CHECK: ttcmmimfp16ps %tmm4, %tmm5, %tmm6
-// CHECK: encoding: [0xc4,0xe2,0x5b,0x6b,0xf5]
- ttcmmimfp16ps %tmm4, %tmm5, %tmm6
-
-// CHECK: ttcmmimfp16ps %tmm1, %tmm2, %tmm3
-// CHECK: encoding: [0xc4,0xe2,0x73,0x6b,0xda]
- ttcmmimfp16ps %tmm1, %tmm2, %tmm3
-
-// CHECK: ttcmmrlfp16ps %tmm4, %tmm5, %tmm6
-// CHECK: encoding: [0xc4,0xe2,0x5a,0x6b,0xf5]
- ttcmmrlfp16ps %tmm4, %tmm5, %tmm6
-
-// CHECK: ttcmmrlfp16ps %tmm1, %tmm2, %tmm3
-// CHECK: encoding: [0xc4,0xe2,0x72,0x6b,0xda]
- ttcmmrlfp16ps %tmm1, %tmm2, %tmm3
-
-// CHECK: tconjtcmmimfp16ps %tmm4, %tmm5, %tmm6
-// CHECK: encoding: [0xc4,0xe2,0x58,0x6b,0xf5]
- tconjtcmmimfp16ps %tmm4, %tmm5, %tmm6
-
-// CHECK: tconjtcmmimfp16ps %tmm1, %tmm2, %tmm3
-// CHECK: encoding: [0xc4,0xe2,0x70,0x6b,0xda]
- tconjtcmmimfp16ps %tmm1, %tmm2, %tmm3
-
-// CHECK: tconjtfp16 %tmm5, %tmm6
-// CHECK: encoding: [0xc4,0xe2,0x79,0x6b,0xf5]
- tconjtfp16 %tmm5, %tmm6
-
-// CHECK: tconjtfp16 %tmm2, %tmm3
-// CHECK: encoding: [0xc4,0xe2,0x79,0x6b,0xda]
- tconjtfp16 %tmm2, %tmm3
diff --git a/llvm/test/MC/X86/amx-transpose-intel.s b/llvm/test/MC/X86/amx-transpose-intel.s
deleted file mode 100644
index 0d2c22f..0000000
--- a/llvm/test/MC/X86/amx-transpose-intel.s
+++ /dev/null
@@ -1,153 +0,0 @@
-// RUN: llvm-mc -triple x86_64-unknown-unknown -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
-
-// CHECK: t2rpntlvwz0 tmm6, [rbp + 8*r14 + 268435456]
-// CHECK: encoding: [0xc4,0xa2,0x78,0x6e,0xb4,0xf5,0x00,0x00,0x00,0x10]
- t2rpntlvwz0 tmm6, [rbp + 8*r14 + 268435456]
-
-// CHECK: t2rpntlvwz0 tmm2, [r8 + 4*rax + 291]
-// CHECK: encoding: [0xc4,0xc2,0x78,0x6e,0x94,0x80,0x23,0x01,0x00,0x00]
- t2rpntlvwz0 tmm2, [r8 + 4*rax + 291]
-
-// CHECK: t2rpntlvwz0 tmm2, [2*rbp - 32]
-// CHECK: encoding: [0xc4,0xe2,0x78,0x6e,0x14,0x6d,0xe0,0xff,0xff,0xff]
- t2rpntlvwz0 tmm2, [2*rbp - 32]
-
-// CHECK: t2rpntlvwz0t1 tmm6, [rbp + 8*r14 + 268435456]
-// CHECK: encoding: [0xc4,0xa2,0x78,0x6f,0xb4,0xf5,0x00,0x00,0x00,0x10]
- t2rpntlvwz0t1 tmm7, [rbp + 8*r14 + 268435456]
-
-// CHECK: t2rpntlvwz0t1 tmm2, [r8 + 4*rax + 291]
-// CHECK: encoding: [0xc4,0xc2,0x78,0x6f,0x94,0x80,0x23,0x01,0x00,0x00]
- t2rpntlvwz0t1 tmm2, [r8 + 4*rax + 291]
-
-// CHECK: t2rpntlvwz0t1 tmm2, [2*rbp - 32]
-// CHECK: encoding: [0xc4,0xe2,0x78,0x6f,0x14,0x6d,0xe0,0xff,0xff,0xff]
- t2rpntlvwz0t1 tmm2, [2*rbp - 32]
-
-// CHECK: t2rpntlvwz1 tmm0, [rbp + 8*r14 + 268435456]
-// CHECK: encoding: [0xc4,0xa2,0x79,0x6e,0x84,0xf5,0x00,0x00,0x00,0x10]
- t2rpntlvwz1 tmm1, [rbp + 8*r14 + 268435456]
-
-// CHECK: t2rpntlvwz1 tmm2, [r8 + 4*rax + 291]
-// CHECK: encoding: [0xc4,0xc2,0x79,0x6e,0x94,0x80,0x23,0x01,0x00,0x00]
- t2rpntlvwz1 tmm2, [r8 + 4*rax + 291]
-
-// CHECK: t2rpntlvwz1 tmm2, [2*rbp - 32]
-// CHECK: encoding: [0xc4,0xe2,0x79,0x6e,0x14,0x6d,0xe0,0xff,0xff,0xff]
- t2rpntlvwz1 tmm2, [2*rbp - 32]
-
-// CHECK: t2rpntlvwz1t1 tmm6, [rbp + 8*r14 + 268435456]
-// CHECK: encoding: [0xc4,0xa2,0x79,0x6f,0xb4,0xf5,0x00,0x00,0x00,0x10]
- t2rpntlvwz1t1 tmm6, [rbp + 8*r14 + 268435456]
-
-// CHECK: t2rpntlvwz1t1 tmm2, [r8 + 4*rax + 291]
-// CHECK: encoding: [0xc4,0xc2,0x79,0x6f,0x94,0x80,0x23,0x01,0x00,0x00]
- t2rpntlvwz1t1 tmm2, [r8 + 4*rax + 291]
-
-// CHECK: t2rpntlvwz1t1 tmm2, [2*rbp - 32]
-// CHECK: encoding: [0xc4,0xe2,0x79,0x6f,0x14,0x6d,0xe0,0xff,0xff,0xff]
- t2rpntlvwz1t1 tmm2, [2*rbp - 32]
-
-// CHECK: t2rpntlvwz0 tmm4, [r16 + 8*r14 + 268435456]
-// CHECK: encoding: [0x62,0xba,0x7c,0x08,0x6e,0xa4,0xf0,0x00,0x00,0x00,0x10]
- t2rpntlvwz0 tmm4, [r16 + 8*r14 + 268435456]
-
-// CHECK: t2rpntlvwz0 tmm2, [r8 + 4*r17 + 291]
-// CHECK: encoding: [0x62,0xd2,0x78,0x08,0x6e,0x94,0x88,0x23,0x01,0x00,0x00]
- t2rpntlvwz0 tmm2, [r8 + 4*r17 + 291]
-
-// CHECK: {evex} t2rpntlvwz0 tmm2, [2*rbp - 32]
-// CHECK: encoding: [0x62,0xf2,0x7c,0x08,0x6e,0x14,0x6d,0xe0,0xff,0xff,0xff]
- {evex} t2rpntlvwz0 tmm2, [2*rbp - 32]
-
-// CHECK: t2rpntlvwz0t1 tmm4, [r16 + 8*r14 + 268435456]
-// CHECK: encoding: [0x62,0xba,0x7c,0x08,0x6f,0xa4,0xf0,0x00,0x00,0x00,0x10]
- t2rpntlvwz0t1 tmm4, [r16 + 8*r14 + 268435456]
-
-// CHECK: t2rpntlvwz0t1 tmm2, [r8 + 4*r17 + 291]
-// CHECK: encoding: [0x62,0xd2,0x78,0x08,0x6f,0x94,0x88,0x23,0x01,0x00,0x00]
- t2rpntlvwz0t1 tmm2, [r8 + 4*r17 + 291]
-
-// CHECK: {evex} t2rpntlvwz0t1 tmm2, [2*rbp - 32]
-// CHECK: encoding: [0x62,0xf2,0x7c,0x08,0x6f,0x14,0x6d,0xe0,0xff,0xff,0xff]
- {evex} t2rpntlvwz0t1 tmm2, [2*rbp - 32]
-
-// CHECK: t2rpntlvwz1 tmm4, [r16 + 8*r14 + 268435456]
-// CHECK: encoding: [0x62,0xba,0x7d,0x08,0x6e,0xa4,0xf0,0x00,0x00,0x00,0x10]
- t2rpntlvwz1 tmm4, [r16 + 8*r14 + 268435456]
-
-// CHECK: t2rpntlvwz1 tmm2, [r8 + 4*r17 + 291]
-// CHECK: encoding: [0x62,0xd2,0x79,0x08,0x6e,0x94,0x88,0x23,0x01,0x00,0x00]
- t2rpntlvwz1 tmm2, [r8 + 4*r17 + 291]
-
-// CHECK: {evex} t2rpntlvwz1 tmm2, [2*rbp - 32]
-// CHECK: encoding: [0x62,0xf2,0x7d,0x08,0x6e,0x14,0x6d,0xe0,0xff,0xff,0xff]
- {evex} t2rpntlvwz1 tmm2, [2*rbp - 32]
-
-// CHECK: t2rpntlvwz1t1 tmm4, [r16 + 8*r14 + 268435456]
-// CHECK: encoding: [0x62,0xba,0x7d,0x08,0x6f,0xa4,0xf0,0x00,0x00,0x00,0x10]
- t2rpntlvwz1t1 tmm4, [r16 + 8*r14 + 268435456]
-
-// CHECK: t2rpntlvwz1t1 tmm2, [r8 + 4*r17 + 291]
-// CHECK: encoding: [0x62,0xd2,0x79,0x08,0x6f,0x94,0x88,0x23,0x01,0x00,0x00]
- t2rpntlvwz1t1 tmm2, [r8 + 4*r17 + 291]
-
-// CHECK: {evex} t2rpntlvwz1t1 tmm2, [2*rbp - 32]
-// CHECK: encoding: [0x62,0xf2,0x7d,0x08,0x6f,0x14,0x6d,0xe0,0xff,0xff,0xff]
- {evex} t2rpntlvwz1t1 tmm2, [2*rbp - 32]
-
-// CHECK: ttransposed tmm5, tmm1
-// CHECK: encoding: [0xc4,0xe2,0x7a,0x5f,0xe9]
- ttransposed tmm5, tmm1
-
-// CHECK: ttransposed tmm3, tmm2
-// CHECK: encoding: [0xc4,0xe2,0x7a,0x5f,0xda]
- ttransposed tmm3, tmm2
-
-// CHECK: ttdpbf16ps tmm5, tmm0, tmm4
-// CHECK: encoding: [0xc4,0xe2,0x5a,0x6c,0xe8]
- ttdpbf16ps tmm5, tmm0, tmm4
-
-// CHECK: ttdpbf16ps tmm3, tmm2, tmm1
-// CHECK: encoding: [0xc4,0xe2,0x72,0x6c,0xda]
- ttdpbf16ps tmm3, tmm2, tmm1
-
-// CHECK: ttdpfp16ps tmm1, tmm0, tmm4
-// CHECK: encoding: [0xc4,0xe2,0x5b,0x6c,0xc8]
- ttdpfp16ps tmm1, tmm0, tmm4
-
-// CHECK: ttdpfp16ps tmm3, tmm2, tmm1
-// CHECK: encoding: [0xc4,0xe2,0x73,0x6c,0xda]
- ttdpfp16ps tmm3, tmm2, tmm1
-
-// CHECK: ttcmmimfp16ps tmm6, tmm5, tmm4
-// CHECK: encoding: [0xc4,0xe2,0x5b,0x6b,0xf5]
- ttcmmimfp16ps tmm6, tmm5, tmm4
-
-// CHECK: ttcmmimfp16ps tmm3, tmm2, tmm1
-// CHECK: encoding: [0xc4,0xe2,0x73,0x6b,0xda]
- ttcmmimfp16ps tmm3, tmm2, tmm1
-
-// CHECK: ttcmmrlfp16ps tmm6, tmm5, tmm4
-// CHECK: encoding: [0xc4,0xe2,0x5a,0x6b,0xf5]
- ttcmmrlfp16ps tmm6, tmm5, tmm4
-
-// CHECK: ttcmmrlfp16ps tmm3, tmm2, tmm1
-// CHECK: encoding: [0xc4,0xe2,0x72,0x6b,0xda]
- ttcmmrlfp16ps tmm3, tmm2, tmm1
-
-// CHECK: tconjtcmmimfp16ps tmm6, tmm5, tmm4
-// CHECK: encoding: [0xc4,0xe2,0x58,0x6b,0xf5]
- tconjtcmmimfp16ps tmm6, tmm5, tmm4
-
-// CHECK: tconjtcmmimfp16ps tmm3, tmm2, tmm1
-// CHECK: encoding: [0xc4,0xe2,0x70,0x6b,0xda]
- tconjtcmmimfp16ps tmm3, tmm2, tmm1
-
-// CHECK: tconjtfp16 tmm6, tmm5
-// CHECK: encoding: [0xc4,0xe2,0x79,0x6b,0xf5]
- tconjtfp16 tmm6, tmm5
-
-// CHECK: tconjtfp16 tmm3, tmm2
-// CHECK: encoding: [0xc4,0xe2,0x79,0x6b,0xda]
- tconjtfp16 tmm3, tmm2
diff --git a/llvm/test/TableGen/x86-instr-mapping.inc b/llvm/test/TableGen/x86-instr-mapping.inc
index f621979..6d2873e 100644
--- a/llvm/test/TableGen/x86-instr-mapping.inc
+++ b/llvm/test/TableGen/x86-instr-mapping.inc
@@ -167,14 +167,6 @@ static const X86TableEntry X86CompressEVEXTable[] = {
{ X86::SHRX64rm_EVEX, X86::SHRX64rm },
{ X86::SHRX64rr_EVEX, X86::SHRX64rr },
{ X86::STTILECFG_EVEX, X86::STTILECFG },
- { X86::T2RPNTLVWZ0RST1_EVEX, X86::T2RPNTLVWZ0RST1 },
- { X86::T2RPNTLVWZ0RS_EVEX, X86::T2RPNTLVWZ0RS },
- { X86::T2RPNTLVWZ0T1_EVEX, X86::T2RPNTLVWZ0T1 },
- { X86::T2RPNTLVWZ0_EVEX, X86::T2RPNTLVWZ0 },
- { X86::T2RPNTLVWZ1RST1_EVEX, X86::T2RPNTLVWZ1RST1 },
- { X86::T2RPNTLVWZ1RS_EVEX, X86::T2RPNTLVWZ1RS },
- { X86::T2RPNTLVWZ1T1_EVEX, X86::T2RPNTLVWZ1T1 },
- { X86::T2RPNTLVWZ1_EVEX, X86::T2RPNTLVWZ1 },
{ X86::TILELOADDRST1_EVEX, X86::TILELOADDRST1 },
{ X86::TILELOADDRS_EVEX, X86::TILELOADDRS },
{ X86::TILELOADDT1_EVEX, X86::TILELOADDT1 },
diff --git a/llvm/test/Transforms/FixIrreducible/bug45623.ll b/llvm/test/Transforms/FixIrreducible/bug45623.ll
index 5872443..b6dd6fb 100644
--- a/llvm/test/Transforms/FixIrreducible/bug45623.ll
+++ b/llvm/test/Transforms/FixIrreducible/bug45623.ll
@@ -90,3 +90,112 @@ for.end626: ; preds = %for.cond616
if.else629: ; preds = %backtrack
br label %retry
}
+
+define void @tre_tnfa_run_backtrack_callbr(i1 %arg) {
+; CHECK-LABEL: @tre_tnfa_run_backtrack_callbr(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: callbr void asm "", ""()
+; CHECK-NEXT: to label [[RETRY:%.*]] []
+; CHECK: retry:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[ARG:%.*]])
+; CHECK-NEXT: to label [[RETRY_TARGET_BACKTRACK:%.*]] [label %retry.target.while.body248]
+; CHECK: while.body248:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[ARG]])
+; CHECK-NEXT: to label [[IF_THEN250:%.*]] [label %if.end275]
+; CHECK: if.then250:
+; CHECK-NEXT: callbr void asm "", ""()
+; CHECK-NEXT: to label [[FOR_COND264:%.*]] []
+; CHECK: for.cond264:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[ARG]])
+; CHECK-NEXT: to label [[FOR_BODY267:%.*]] [label %backtrack]
+; CHECK: for.body267:
+; CHECK-NEXT: callbr void asm "", ""()
+; CHECK-NEXT: to label [[FOR_COND264]] []
+; CHECK: if.end275:
+; CHECK-NEXT: callbr void asm "", ""()
+; CHECK-NEXT: to label [[FOR_COND342:%.*]] []
+; CHECK: for.cond342:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[ARG]])
+; CHECK-NEXT: to label [[FOR_BODY345:%.*]] [label %for.end580]
+; CHECK: for.body345:
+; CHECK-NEXT: callbr void asm "", ""()
+; CHECK-NEXT: to label [[FOR_COND342]] []
+; CHECK: for.end580:
+; CHECK-NEXT: callbr void asm "", ""()
+; CHECK-NEXT: to label [[BACKTRACK:%.*]] []
+; CHECK: backtrack:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[ARG]])
+; CHECK-NEXT: to label [[IF_THEN595:%.*]] [label %if.else629]
+; CHECK: if.then595:
+; CHECK-NEXT: callbr void asm "", ""()
+; CHECK-NEXT: to label [[FOR_COND616:%.*]] []
+; CHECK: for.cond616:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[ARG]])
+; CHECK-NEXT: to label [[FOR_BODY619:%.*]] [label %for.end626]
+; CHECK: for.body619:
+; CHECK-NEXT: callbr void asm "", ""()
+; CHECK-NEXT: to label [[FOR_COND616]] []
+; CHECK: for.end626:
+; CHECK-NEXT: callbr void asm "", ""()
+; CHECK-NEXT: to label [[FOR_END626_TARGET_WHILE_BODY248:%.*]] []
+; CHECK: if.else629:
+; CHECK-NEXT: callbr void asm "", ""()
+; CHECK-NEXT: to label [[RETRY]] []
+; CHECK: for.end626.target.while.body248:
+; CHECK-NEXT: br label [[IRR_GUARD:%.*]]
+; CHECK: retry.target.backtrack:
+; CHECK-NEXT: br label [[IRR_GUARD]]
+; CHECK: retry.target.while.body248:
+; CHECK-NEXT: br label [[IRR_GUARD]]
+; CHECK: irr.guard:
+; CHECK-NEXT: [[GUARD_WHILE_BODY248:%.*]] = phi i1 [ true, [[FOR_END626_TARGET_WHILE_BODY248]] ], [ false, [[RETRY_TARGET_BACKTRACK]] ], [ true, [[RETRY_TARGET_WHILE_BODY248:%.*]] ]
+; CHECK-NEXT: br i1 [[GUARD_WHILE_BODY248]], label [[WHILE_BODY248:%.*]], label [[BACKTRACK]]
+;
+entry:
+ callbr void asm "", ""() to label %retry []
+
+retry:
+ callbr void asm "", "r,!i"(i1 %arg) to label %backtrack [label %while.body248]
+
+while.body248: ; preds = %for.end626, %retry
+ callbr void asm "", "r,!i"(i1 %arg) to label %if.then250 [label %if.end275]
+
+if.then250: ; preds = %while.body248
+ callbr void asm "", ""() to label %for.cond264 []
+
+for.cond264: ; preds = %for.body267, %if.then250
+ callbr void asm "", "r,!i"(i1 %arg) to label %for.body267 [label %backtrack]
+
+for.body267: ; preds = %for.cond264
+ callbr void asm "", ""() to label %for.cond264 []
+
+if.end275: ; preds = %while.body248
+ callbr void asm "", ""() to label %for.cond342 []
+
+for.cond342: ; preds = %for.body345, %if.end275
+ callbr void asm "", "r,!i"(i1 %arg) to label %for.body345 [label %for.end580]
+
+for.body345: ; preds = %for.cond342
+ callbr void asm "", ""() to label %for.cond342 []
+
+for.end580: ; preds = %for.cond342
+ callbr void asm "", ""() to label %backtrack []
+
+backtrack: ; preds = %for.end580, %for.cond264, %retry
+ callbr void asm "", "r,!i"(i1 %arg) to label %if.then595 [label %if.else629]
+
+if.then595: ; preds = %backtrack
+ callbr void asm "", ""() to label %for.cond616 []
+
+for.cond616: ; preds = %for.body619, %if.then595
+ callbr void asm "", "r,!i"(i1 %arg) to label %for.body619 [label %for.end626]
+
+for.body619: ; preds = %for.cond616
+ callbr void asm "", ""() to label %for.cond616 []
+
+for.end626: ; preds = %for.cond616
+ callbr void asm "", ""() to label %while.body248 []
+
+if.else629: ; preds = %backtrack
+ callbr void asm "", ""() to label %retry []
+}
diff --git a/llvm/test/Transforms/FixIrreducible/callbr.ll b/llvm/test/Transforms/FixIrreducible/callbr.ll
new file mode 100644
index 0000000..26ca6c7
--- /dev/null
+++ b/llvm/test/Transforms/FixIrreducible/callbr.ll
@@ -0,0 +1,869 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -passes='fix-irreducible,verify<loops>' -S | FileCheck %s
+; RUN: opt < %s -passes='verify<loops>,fix-irreducible,verify<loops>' -S | FileCheck %s
+; RUN: opt < %s -passes='print<cycles>' -disable-output 2>&1 | FileCheck %s --check-prefix CYCLES-BEFORE
+; RUN: opt < %s -passes='fix-irreducible,print<cycles>' -disable-output 2>&1 | FileCheck %s --check-prefix CYCLES-AFTER
+
+; CYCLES-BEFORE: CycleInfo for function: callbr_entry
+; CYCLES-BEFORE-NEXT: depth=1: entries(indirect fallthrough)
+; CYCLES-AFTER: CycleInfo for function: callbr_entry
+; CYCLES-AFTER-NEXT: depth=1: entries(irr.guard) indirect fallthrough
+
+; CYCLES-BEFORE-NEXT: CycleInfo for function: callbr_entry_targets_with_phi_nodes
+; CYCLES-BEFORE-NEXT: depth=1: entries(block1 block)
+; CYCLES-AFTER-NEXT: CycleInfo for function: callbr_entry_targets_with_phi_nodes
+; CYCLES-AFTER-NEXT: depth=1: entries(irr.guard) block1 block
+
+; CYCLES-BEFORE-NEXT: CycleInfo for function: callbr_entry_multiple_indirect_targets
+; CYCLES-BEFORE-NEXT: depth=1: entries(indirect fallthrough)
+; CYCLES-AFTER-NEXT: CycleInfo for function: callbr_entry_multiple_indirect_targets
+; CYCLES-AFTER-NEXT: depth=1: entries(irr.guard) indirect fallthrough
+
+; CYCLES-BEFORE-NEXT: CycleInfo for function: callbr_entry_multiple_indirect_targets1
+; CYCLES-BEFORE-NEXT: depth=1: entries(indirect1 indirect fallthrough)
+; CYCLES-BEFORE-NEXT: depth=2: entries(indirect fallthrough)
+; CYCLES-AFTER-NEXT: CycleInfo for function: callbr_entry_multiple_indirect_targets1
+; CYCLES-AFTER-NEXT: depth=1: entries(irr.guard) indirect1 indirect fallthrough irr.guard1 irr.guard2
+; CYCLES-AFTER-NEXT: depth=2: entries(irr.guard2) indirect fallthrough
+
+; CYCLES-BEFORE-NEXT: CycleInfo for function: callbr_header_no_indirect
+; CYCLES-BEFORE-NEXT: depth=1: entries(fallthrough callbr)
+; CYCLES-AFTER-NEXT: CycleInfo for function: callbr_header_no_indirect
+; CYCLES-AFTER-NEXT: depth=1: entries(irr.guard) fallthrough callbr callbr.target.fallthrough
+
+; CYCLES-BEFORE-NEXT: CycleInfo for function: callbr_header
+; CYCLES-BEFORE-NEXT: depth=1: entries(fallthrough callbr)
+; CYCLES-AFTER-NEXT: CycleInfo for function: callbr_header
+; CYCLES-AFTER-NEXT: depth=1: entries(irr.guard) fallthrough callbr callbr.target.fallthrough
+
+; CYCLES-BEFORE-NEXT: CycleInfo for function: callbr_header_multiple_indirect_targets
+; CYCLES-BEFORE-NEXT: depth=1: entries(fallthrough callbr) indirect1
+; CYCLES-BEFORE-NEXT: depth=2: entries(callbr) indirect1
+; CYCLES-AFTER-NEXT: CycleInfo for function: callbr_header_multiple_indirect_targets
+; CYCLES-AFTER-NEXT: depth=1: entries(irr.guard) fallthrough callbr indirect1 callbr.target.fallthrough
+; CYCLES-AFTER-NEXT: depth=2: entries(callbr) indirect1
+
+; CYCLES-BEFORE-NEXT: CycleInfo for function: callbr_regular
+; CYCLES-BEFORE-NEXT: depth=1: entries(fallthrough2 fallthrough1)
+; CYCLES-BEFORE-NEXT: depth=1: entries(indirect2 indirect1)
+; CYCLES-BEFORE-NEXT: depth=1: entries(nocallbr2 nocallbr1)
+; CYCLES-AFTER-NEXT: CycleInfo for function: callbr_regular
+; CYCLES-AFTER-NEXT: depth=1: entries(irr.guard) fallthrough2 fallthrough1
+; CYCLES-AFTER-NEXT: depth=1: entries(irr.guard1) indirect2 indirect1
+; CYCLES-AFTER-NEXT: depth=1: entries(irr.guard2) nocallbr2 nocallbr1
+
+; CYCLES-BEFORE-NEXT: CycleInfo for function: callbr_regular1
+; CYCLES-BEFORE-NEXT: depth=1: entries(callbr nocallbr)
+; CYCLES-AFTER-NEXT: CycleInfo for function: callbr_regular1
+; CYCLES-AFTER-NEXT: depth=1: entries(irr.guard) callbr nocallbr
+
+; CYCLES-BEFORE-NEXT: CycleInfo for function: callbr_regular2
+; CYCLES-BEFORE-NEXT: depth=1: entries(callbr nocallbr)
+; CYCLES-AFTER-NEXT: CycleInfo for function: callbr_regular2
+; CYCLES-AFTER-NEXT: depth=1: entries(irr.guard) callbr nocallbr
+
+; CYCLES-BEFORE-NEXT: CycleInfo for function: callbr_header_and_regular
+; CYCLES-BEFORE-NEXT: depth=1: entries(callbr_header) callbr_regular mid
+; CYCLES-BEFORE-NEXT: depth=2: entries(callbr_regular mid)
+; CYCLES-AFTER-NEXT: CycleInfo for function: callbr_header_and_regular
+; CYCLES-AFTER-NEXT: depth=1: entries(callbr_header) callbr_regular mid callbr_header.target.mid callbr_header.target.callbr_regular irr.guard
+; CYCLES-AFTER-NEXT: depth=2: entries(irr.guard) callbr_regular mid
+
+; CYCLES-BEFORE-NEXT: CycleInfo for function: callbr_only
+; CYCLES-BEFORE-NEXT: depth=1: entries(callbr_block callbr_header)
+; CYCLES-AFTER-NEXT: CycleInfo for function: callbr_only
+; CYCLES-AFTER-NEXT: depth=1: entries(irr.guard) callbr_block callbr_header callbr_header.target.callbr_block
+
+; CYCLES-BEFORE-NEXT: CycleInfo for function: entry_multiple_callbr
+; CYCLES-BEFORE-NEXT: depth=1: entries(cb2 block block1)
+; CYCLES-BEFORE-NEXT: depth=2: entries(block block1)
+; CYCLES-AFTER-NEXT: CycleInfo for function: entry_multiple_callbr
+; CYCLES-AFTER-NEXT: depth=1: entries(irr.guard) cb2 block block1 irr.guard1 cb2.target.block1 cb2.target.block irr.guard2
+; CYCLES-AFTER-NEXT: depth=2: entries(irr.guard2) block block1
+
+; CYCLES-BEFORE-NEXT: CycleInfo for function: callbr_exit_with_separate_entries
+; CYCLES-BEFORE-NEXT: depth=1: entries(l2 l1) cb
+; CYCLES-BEFORE-NEXT: depth=2: entries(l1 cb)
+; CYCLES-AFTER-NEXT: CycleInfo for function: callbr_exit_with_separate_entries
+; CYCLES-AFTER-NEXT: depth=1: entries(irr.guard) l2 l1 cb cb.target.l1 irr.guard1
+; CYCLES-AFTER-NEXT: depth=2: entries(irr.guard1) l1 cb cb.target.l1
+
+; CYCLES-BEFORE-NEXT: CycleInfo for function: callbr_exit_with_separate_entries1
+; CYCLES-BEFORE-NEXT: depth=1: entries(loop2 loop1) cb
+; CYCLES-AFTER-NEXT: CycleInfo for function: callbr_exit_with_separate_entries1
+; CYCLES-AFTER-NEXT: depth=1: entries(irr.guard) loop2 loop1 cb cb.target.loop2
+
+; CYCLES-BEFORE-NEXT: CycleInfo for function: callbr_only_multiple
+; CYCLES-BEFORE-NEXT: depth=1: entries(cb3 cb1 cb2)
+; CYCLES-BEFORE-NEXT: depth=2: entries(cb1 cb2)
+; CYCLES-AFTER-NEXT: CycleInfo for function: callbr_only_multiple
+; CYCLES-AFTER-NEXT: depth=1: entries(irr.guard) cb3 cb1 cb2 cb2.target.cb3 cb1.target.cb3 irr.guard1 cb2.target.cb1 cb3.target.cb1 irr.guard2
+; CYCLES-AFTER-NEXT: depth=2: entries(irr.guard2) cb1 cb2 cb2.target.cb1
+
+; CYCLES-BEFORE-NEXT: CycleInfo for function: callbr_bypass
+; CYCLES-BEFORE-NEXT: depth=1: entries(l1 cb) l2
+; CYCLES-BEFORE-NEXT: depth=2: entries(cb l2)
+; CYCLES-AFTER-NEXT: CycleInfo for function: callbr_bypass
+; CYCLES-AFTER-NEXT: depth=1: entries(irr.guard) l1 cb l2 cb.target.l1 irr.guard1
+; CYCLES-AFTER-NEXT: depth=2: entries(irr.guard1) cb l2
+
+; CYCLES-BEFORE-NEXT: CycleInfo for function: callbr_multiple_with_exit
+; CYCLES-BEFORE-NEXT: depth=1: entries(l3 l1 l2)
+; CYCLES-BEFORE-NEXT: depth=2: entries(l1 l2)
+; CYCLES-AFTER-NEXT: CycleInfo for function: callbr_multiple_with_exit
+; CYCLES-AFTER-NEXT: depth=1: entries(irr.guard) l3 l1 l2 irr.guard1 irr.guard2
+; CYCLES-AFTER-NEXT: depth=2: entries(irr.guard2) l1 l2
+
+; CYCLES-BEFORE-NEXT: CycleInfo for function: callbr_nested
+; CYCLES-BEFORE-NEXT: depth=1: entries(bb bh)
+; CYCLES-BEFORE-NEXT: depth=1: entries(b h)
+; CYCLES-AFTER-NEXT: CycleInfo for function: callbr_nested
+; CYCLES-AFTER-NEXT: depth=1: entries(irr.guard) bb bh
+; CYCLES-AFTER-NEXT: depth=1: entries(irr.guard1) b h
+
+; Fix the irreducible loop in which callbr is the entry (see description at the
+; top of FixIrreducible.cpp).
+define void @callbr_entry(i1 %c) {
+; CHECK-LABEL: define void @callbr_entry(
+; CHECK-SAME: i1 [[C:%.*]]) {
+; CHECK-NEXT: [[CALLBR:.*:]]
+; CHECK-NEXT: callbr void asm "", "!i"()
+; CHECK-NEXT: to label %[[CALLBR_TARGET_FALLTHROUGH:.*]] [label %callbr.target.indirect]
+; CHECK: [[FALLTHROUGH:.*]]:
+; CHECK-NEXT: br i1 [[C]], label %[[IRR_GUARD:.*]], label %[[RET:.*]]
+; CHECK: [[INDIRECT:.*]]:
+; CHECK-NEXT: br label %[[FALLTHROUGH]]
+; CHECK: [[RET]]:
+; CHECK-NEXT: ret void
+; CHECK: [[CALLBR_TARGET_FALLTHROUGH]]:
+; CHECK-NEXT: br label %[[IRR_GUARD]]
+; CHECK: [[CALLBR_TARGET_INDIRECT:.*]]:
+; CHECK-NEXT: br label %[[IRR_GUARD]]
+; CHECK: [[IRR_GUARD]]:
+; CHECK-NEXT: [[GUARD_INDIRECT:%.*]] = phi i1 [ true, %[[FALLTHROUGH]] ], [ false, %[[CALLBR_TARGET_FALLTHROUGH]] ], [ true, %[[CALLBR_TARGET_INDIRECT]] ]
+; CHECK-NEXT: br i1 [[GUARD_INDIRECT]], label %[[INDIRECT]], label %[[FALLTHROUGH]]
+;
+callbr:
+ callbr void asm "", "!i"() to label %fallthrough [label %indirect]
+fallthrough:
+ br i1 %c, label %indirect, label %ret
+indirect:
+ br label %fallthrough
+ret:
+ ret void
+}
+
+define i32 @callbr_entry_targets_with_phi_nodes(i1 %c) {
+; CHECK-LABEL: define i32 @callbr_entry_targets_with_phi_nodes(
+; CHECK-SAME: i1 [[C:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: callbr void asm "", "!i"()
+; CHECK-NEXT: to label %[[ENTRY_TARGET_BLOCK:.*]] [label %entry.target.block1]
+; CHECK: [[BLOCK:.*]]:
+; CHECK-NEXT: [[A:%.*]] = phi i32 [ 1, %[[BLOCK1:.*]] ], [ [[A_MOVED:%.*]], %[[IRR_GUARD:.*]] ]
+; CHECK-NEXT: br label %[[IRR_GUARD]]
+; CHECK: [[BLOCK1]]:
+; CHECK-NEXT: br i1 [[C]], label %[[BLOCK]], label %[[RET:.*]]
+; CHECK: [[RET]]:
+; CHECK-NEXT: ret i32 [[B_MOVED:%.*]]
+; CHECK: [[ENTRY_TARGET_BLOCK]]:
+; CHECK-NEXT: br label %[[IRR_GUARD]]
+; CHECK: [[ENTRY_TARGET_BLOCK1:.*]]:
+; CHECK-NEXT: br label %[[IRR_GUARD]]
+; CHECK: [[IRR_GUARD]]:
+; CHECK-NEXT: [[A_MOVED]] = phi i32 [ poison, %[[BLOCK]] ], [ 42, %[[ENTRY_TARGET_BLOCK]] ], [ poison, %[[ENTRY_TARGET_BLOCK1]] ]
+; CHECK-NEXT: [[B_MOVED]] = phi i32 [ [[A]], %[[BLOCK]] ], [ poison, %[[ENTRY_TARGET_BLOCK]] ], [ 43, %[[ENTRY_TARGET_BLOCK1]] ]
+; CHECK-NEXT: [[GUARD_BLOCK1:%.*]] = phi i1 [ true, %[[BLOCK]] ], [ false, %[[ENTRY_TARGET_BLOCK]] ], [ true, %[[ENTRY_TARGET_BLOCK1]] ]
+; CHECK-NEXT: br i1 [[GUARD_BLOCK1]], label %[[BLOCK1]], label %[[BLOCK]]
+;
+entry:
+ callbr void asm "", "!i"() to label %block [label %block1]
+block:
+ %a = phi i32 [42, %entry], [1, %block1]
+ br label %block1
+block1:
+ %b = phi i32 [43, %entry], [%a, %block]
+ br i1 %c, label %block, label %ret
+ret:
+ ret i32 %b
+}
+
+define void @callbr_entry_multiple_indirect_targets(i1 %c) {
+; CHECK-LABEL: define void @callbr_entry_multiple_indirect_targets(
+; CHECK-SAME: i1 [[C:%.*]]) {
+; CHECK-NEXT: [[CALLBR:.*:]]
+; CHECK-NEXT: callbr void asm "", "!i,!i,!i"()
+; CHECK-NEXT: to label %[[CALLBR_TARGET_FALLTHROUGH:.*]] [label %[[CALLBR_TARGET_INDIRECT:.*]], label %[[INDIRECT1:.*]], label %indirect2]
+; CHECK: [[INDIRECT3:.*]]:
+; CHECK-NEXT: br i1 [[C]], label %[[IRR_GUARD:.*]], label %[[RET:.*]]
+; CHECK: [[INDIRECT:.*]]:
+; CHECK-NEXT: br label %[[INDIRECT3]]
+; CHECK: [[INDIRECT1]]:
+; CHECK-NEXT: br label %[[IRR_GUARD]]
+; CHECK: [[INDIRECT2:.*:]]
+; CHECK-NEXT: br label %[[RET]]
+; CHECK: [[RET]]:
+; CHECK-NEXT: ret void
+; CHECK: [[CALLBR_TARGET_FALLTHROUGH]]:
+; CHECK-NEXT: br label %[[IRR_GUARD]]
+; CHECK: [[CALLBR_TARGET_INDIRECT]]:
+; CHECK-NEXT: br label %[[IRR_GUARD]]
+; CHECK: [[IRR_GUARD]]:
+; CHECK-NEXT: [[GUARD_INDIRECT:%.*]] = phi i1 [ true, %[[INDIRECT3]] ], [ true, %[[INDIRECT1]] ], [ false, %[[CALLBR_TARGET_FALLTHROUGH]] ], [ true, %[[CALLBR_TARGET_INDIRECT]] ]
+; CHECK-NEXT: br i1 [[GUARD_INDIRECT]], label %[[INDIRECT]], label %[[INDIRECT3]]
+;
+callbr:
+ callbr void asm "", "!i,!i,!i"() to label %fallthrough [label %indirect, label %indirect1, label %indirect2]
+fallthrough:
+ br i1 %c, label %indirect, label %ret
+indirect:
+ br label %fallthrough
+indirect1:
+ br label %indirect
+indirect2:
+ br label %ret
+ret:
+ ret void
+}
+
+define void @callbr_entry_multiple_indirect_targets1(i1 %c, i1 %d) {
+; CHECK-LABEL: define void @callbr_entry_multiple_indirect_targets1(
+; CHECK-SAME: i1 [[C:%.*]], i1 [[D:%.*]]) {
+; CHECK-NEXT: [[CALLBR:.*:]]
+; CHECK-NEXT: callbr void asm "", "!i,!i,!i"()
+; CHECK-NEXT: to label %[[CALLBR_TARGET_FALLTHROUGH:.*]] [label %[[CALLBR_TARGET_INDIRECT:.*]], label %[[CALLBR_TARGET_INDIRECT1:.*]], label %indirect2]
+; CHECK: [[INDIRECT3:.*]]:
+; CHECK-NEXT: br i1 [[C]], label %[[IRR_GUARD2:.*]], label %[[RET:.*]]
+; CHECK: [[INDIRECT:.*]]:
+; CHECK-NEXT: br i1 [[D]], label %[[INDIRECT3]], label %[[IRR_GUARD:.*]]
+; CHECK: [[INDIRECT1:.*]]:
+; CHECK-NEXT: br label %[[IRR_GUARD2]]
+; CHECK: [[INDIRECT2:.*:]]
+; CHECK-NEXT: br label %[[RET]]
+; CHECK: [[RET]]:
+; CHECK-NEXT: ret void
+; CHECK: [[CALLBR_TARGET_FALLTHROUGH]]:
+; CHECK-NEXT: br label %[[IRR_GUARD]]
+; CHECK: [[CALLBR_TARGET_INDIRECT]]:
+; CHECK-NEXT: br label %[[IRR_GUARD]]
+; CHECK: [[CALLBR_TARGET_INDIRECT1]]:
+; CHECK-NEXT: br label %[[IRR_GUARD]]
+; CHECK: [[IRR_GUARD]]:
+; CHECK-NEXT: [[GUARD_INDIRECT1:%.*]] = phi i1 [ true, %[[INDIRECT]] ], [ false, %[[CALLBR_TARGET_FALLTHROUGH]] ], [ false, %[[CALLBR_TARGET_INDIRECT]] ], [ true, %[[CALLBR_TARGET_INDIRECT1]] ]
+; CHECK-NEXT: [[GUARD_FALLTHROUGH:%.*]] = phi i1 [ false, %[[INDIRECT]] ], [ true, %[[CALLBR_TARGET_FALLTHROUGH]] ], [ false, %[[CALLBR_TARGET_INDIRECT]] ], [ false, %[[CALLBR_TARGET_INDIRECT1]] ]
+; CHECK-NEXT: [[GUARD_FALLTHROUGH_INV:%.*]] = xor i1 [[GUARD_FALLTHROUGH]], true
+; CHECK-NEXT: br i1 [[GUARD_INDIRECT1]], label %[[INDIRECT1]], label %[[IRR_GUARD1:.*]]
+; CHECK: [[IRR_GUARD1]]:
+; CHECK-NEXT: br label %[[IRR_GUARD2]]
+; CHECK: [[IRR_GUARD2]]:
+; CHECK-NEXT: [[GUARD_INDIRECT:%.*]] = phi i1 [ true, %[[INDIRECT3]] ], [ [[GUARD_FALLTHROUGH_INV]], %[[IRR_GUARD1]] ], [ true, %[[INDIRECT1]] ]
+; CHECK-NEXT: br i1 [[GUARD_INDIRECT]], label %[[INDIRECT]], label %[[INDIRECT3]]
+;
+callbr:
+ callbr void asm "", "!i,!i,!i"() to label %fallthrough [label %indirect, label %indirect1, label %indirect2]
+fallthrough:
+ br i1 %c, label %indirect, label %ret
+indirect:
+ br i1 %d, label %fallthrough, label %indirect1
+indirect1:
+ br label %indirect
+indirect2:
+ br label %ret
+ret:
+ ret void
+}
+
+; Fix the irreducible loop in which callbr is the header (see the example at the
+; top of FixIrreducible.cpp).
+define void @callbr_header_no_indirect(i1 %c, i1 %d) {
+; CHECK-LABEL: define void @callbr_header_no_indirect(
+; CHECK-SAME: i1 [[C:%.*]], i1 [[D:%.*]]) {
+; CHECK-NEXT: [[D_INV:%.*]] = xor i1 [[D]], true
+; CHECK-NEXT: br label %[[IRR_GUARD:.*]]
+; CHECK: [[CALLBR:.*]]:
+; CHECK-NEXT: callbr void asm "", ""()
+; CHECK-NEXT: to label %[[CALLBR_TARGET_FALLTHROUGH:.*]] []
+; CHECK: [[FALLTHROUGH:.*]]:
+; CHECK-NEXT: br i1 [[C]], label %[[CALLBR]], label %[[RET:.*]]
+; CHECK: [[RET]]:
+; CHECK-NEXT: ret void
+; CHECK: [[CALLBR_TARGET_FALLTHROUGH]]:
+; CHECK-NEXT: br label %[[IRR_GUARD]]
+; CHECK: [[IRR_GUARD]]:
+; CHECK-NEXT: [[GUARD_FALLTHROUGH:%.*]] = phi i1 [ true, %[[CALLBR_TARGET_FALLTHROUGH]] ], [ [[D_INV]], [[TMP0:%.*]] ]
+; CHECK-NEXT: br i1 [[GUARD_FALLTHROUGH]], label %[[FALLTHROUGH]], label %[[CALLBR]]
+;
+ br i1 %d, label %callbr, label %fallthrough
+callbr:
+ callbr void asm "", ""() to label %fallthrough []
+fallthrough:
+ br i1 %c, label %callbr, label %ret
+ret:
+ ret void
+}
+
+; Fix the irreducible loop in which callbr is the header.
+define void @callbr_header(i1 %c, i1 %d) {
+; CHECK-LABEL: define void @callbr_header(
+; CHECK-SAME: i1 [[C:%.*]], i1 [[D:%.*]]) {
+; CHECK-NEXT: [[D_INV:%.*]] = xor i1 [[D]], true
+; CHECK-NEXT: br label %[[IRR_GUARD:.*]]
+; CHECK: [[CALLBR:.*]]:
+; CHECK-NEXT: callbr void asm "", "!i"()
+; CHECK-NEXT: to label %[[CALLBR_TARGET_FALLTHROUGH:.*]] [label %indirect]
+; CHECK: [[INDIRECT:.*:]]
+; CHECK-NEXT: br label %[[RET:.*]]
+; CHECK: [[FALLTHROUGH:.*]]:
+; CHECK-NEXT: br i1 [[C]], label %[[CALLBR]], label %[[RET]]
+; CHECK: [[RET]]:
+; CHECK-NEXT: ret void
+; CHECK: [[CALLBR_TARGET_FALLTHROUGH]]:
+; CHECK-NEXT: br label %[[IRR_GUARD]]
+; CHECK: [[IRR_GUARD]]:
+; CHECK-NEXT: [[GUARD_FALLTHROUGH:%.*]] = phi i1 [ true, %[[CALLBR_TARGET_FALLTHROUGH]] ], [ [[D_INV]], [[TMP0:%.*]] ]
+; CHECK-NEXT: br i1 [[GUARD_FALLTHROUGH]], label %[[FALLTHROUGH]], label %[[CALLBR]]
+;
+ br i1 %d, label %callbr, label %fallthrough
+callbr:
+ callbr void asm "", "!i"() to label %fallthrough [label %indirect]
+indirect:
+ br label %ret
+fallthrough:
+ br i1 %c, label %callbr, label %ret
+ret:
+ ret void
+}
+
+define void @callbr_header_multiple_indirect_targets(i1 %c, i1 %d) {
+; CHECK-LABEL: define void @callbr_header_multiple_indirect_targets(
+; CHECK-SAME: i1 [[C:%.*]], i1 [[D:%.*]]) {
+; CHECK-NEXT: [[D_INV:%.*]] = xor i1 [[D]], true
+; CHECK-NEXT: br label %[[IRR_GUARD:.*]]
+; CHECK: [[CALLBR:.*]]:
+; CHECK-NEXT: callbr void asm "", "!i,!i"()
+; CHECK-NEXT: to label %[[CALLBR_TARGET_FALLTHROUGH:.*]] [label %[[INDIRECT1:.*]], label %indirect1]
+; CHECK: [[INDIRECT1]]:
+; CHECK-NEXT: br label %[[RET:.*]]
+; CHECK: [[INDIRECT2:.*:]]
+; CHECK-NEXT: br label %[[CALLBR]]
+; CHECK: [[FALLTHROUGH:.*]]:
+; CHECK-NEXT: br i1 [[C]], label %[[CALLBR]], label %[[RET]]
+; CHECK: [[RET]]:
+; CHECK-NEXT: ret void
+; CHECK: [[CALLBR_TARGET_FALLTHROUGH]]:
+; CHECK-NEXT: br label %[[IRR_GUARD]]
+; CHECK: [[IRR_GUARD]]:
+; CHECK-NEXT: [[GUARD_FALLTHROUGH:%.*]] = phi i1 [ true, %[[CALLBR_TARGET_FALLTHROUGH]] ], [ [[D_INV]], [[TMP0:%.*]] ]
+; CHECK-NEXT: br i1 [[GUARD_FALLTHROUGH]], label %[[FALLTHROUGH]], label %[[CALLBR]]
+;
+ br i1 %d, label %callbr, label %fallthrough
+callbr:
+ callbr void asm "", "!i,!i"() to label %fallthrough [label %indirect, label %indirect1]
+indirect:
+ br label %ret
+indirect1:
+ br label %callbr
+fallthrough:
+ br i1 %c, label %callbr, label %ret
+ret:
+ ret void
+}
+
+; Fix the three usual irreducible loops (callbr isn't a part of one of them):
+; - fallthrough, fallthrough1, fallthrough2
+; - indirect, indirect1, indirect2
+; - nocallbr, nocallbr1, nocallbr2
+define void @callbr_regular(i1 %c, i1 %d) {
+; CHECK-LABEL: define void @callbr_regular(
+; CHECK-SAME: i1 [[C:%.*]], i1 [[D:%.*]]) {
+; CHECK-NEXT: [[C_INV:%.*]] = xor i1 [[C]], true
+; CHECK-NEXT: br i1 [[D]], label %[[CALLBR:.*]], label %[[NOCALLBR:.*]]
+; CHECK: [[CALLBR]]:
+; CHECK-NEXT: callbr void asm "", "!i"()
+; CHECK-NEXT: to label %[[FALLTHROUGH:.*]] [label %indirect]
+; CHECK: [[FALLTHROUGH]]:
+; CHECK-NEXT: br label %[[IRR_GUARD:.*]]
+; CHECK: [[FALLTHROUGH1:.*]]:
+; CHECK-NEXT: br label %[[IRR_GUARD]]
+; CHECK: [[FALLTHROUGH2:.*]]:
+; CHECK-NEXT: br i1 [[D]], label %[[FALLTHROUGH1]], label %[[RET:.*]]
+; CHECK: [[INDIRECT:.*]]:
+; CHECK-NEXT: br label %[[IRR_GUARD1:.*]]
+; CHECK: [[INDIRECT1:.*]]:
+; CHECK-NEXT: br label %[[IRR_GUARD1]]
+; CHECK: [[INDIRECT2:.*]]:
+; CHECK-NEXT: br i1 [[D]], label %[[INDIRECT1]], label %[[RET]]
+; CHECK: [[NOCALLBR]]:
+; CHECK-NEXT: br label %[[IRR_GUARD2:.*]]
+; CHECK: [[NOCALLBR1:.*]]:
+; CHECK-NEXT: br label %[[IRR_GUARD2]]
+; CHECK: [[NOCALLBR2:.*]]:
+; CHECK-NEXT: br i1 [[D]], label %[[NOCALLBR1]], label %[[RET]]
+; CHECK: [[RET]]:
+; CHECK-NEXT: ret void
+; CHECK: [[IRR_GUARD]]:
+; CHECK-NEXT: [[GUARD_FALLTHROUGH2:%.*]] = phi i1 [ true, %[[FALLTHROUGH1]] ], [ [[C_INV]], %[[FALLTHROUGH]] ]
+; CHECK-NEXT: br i1 [[GUARD_FALLTHROUGH2]], label %[[FALLTHROUGH2]], label %[[FALLTHROUGH1]]
+; CHECK: [[IRR_GUARD1]]:
+; CHECK-NEXT: [[GUARD_INDIRECT2:%.*]] = phi i1 [ true, %[[INDIRECT1]] ], [ [[C_INV]], %[[INDIRECT]] ]
+; CHECK-NEXT: br i1 [[GUARD_INDIRECT2]], label %[[INDIRECT2]], label %[[INDIRECT1]]
+; CHECK: [[IRR_GUARD2]]:
+; CHECK-NEXT: [[GUARD_NOCALLBR2:%.*]] = phi i1 [ true, %[[NOCALLBR1]] ], [ [[C_INV]], %[[NOCALLBR]] ]
+; CHECK-NEXT: br i1 [[GUARD_NOCALLBR2]], label %[[NOCALLBR2]], label %[[NOCALLBR1]]
+;
+ br i1 %d, label %callbr, label %nocallbr
+callbr:
+ callbr void asm "", "!i"() to label %fallthrough [label %indirect]
+fallthrough:
+ br i1 %c, label %fallthrough1, label %fallthrough2
+fallthrough1:
+ br label %fallthrough2
+fallthrough2:
+ br i1 %d, label %fallthrough1, label %ret
+indirect:
+ br i1 %c, label %indirect1, label %indirect2
+indirect1:
+ br label %indirect2
+indirect2:
+ br i1 %d, label %indirect1, label %ret
+nocallbr:
+ br i1 %c, label %nocallbr1, label %nocallbr2
+nocallbr1:
+ br label %nocallbr2
+nocallbr2:
+ br i1 %d, label %nocallbr1, label %ret
+ret:
+ ret void
+}
+
+; Fix an irreducible loop in which callbr is a regular block (neither entry nor
+; header). See the example at the top of FixIrreducible.cpp.
+define void @callbr_regular1(i1 %c) {
+; CHECK-LABEL: define void @callbr_regular1(
+; CHECK-SAME: i1 [[C:%.*]]) {
+; CHECK-NEXT: [[C_INV:%.*]] = xor i1 [[C]], true
+; CHECK-NEXT: br label %[[IRR_GUARD:.*]]
+; CHECK: [[NOCALLBR:.*]]:
+; CHECK-NEXT: br label %[[IRR_GUARD]]
+; CHECK: [[CALLBR:.*]]:
+; CHECK-NEXT: callbr void asm "", "!i"()
+; CHECK-NEXT: to label %[[RET:.*]] [label %nocallbr]
+; CHECK: [[RET]]:
+; CHECK-NEXT: ret void
+; CHECK: [[IRR_GUARD]]:
+; CHECK-NEXT: [[GUARD_CALLBR:%.*]] = phi i1 [ true, %[[NOCALLBR]] ], [ [[C_INV]], [[TMP0:%.*]] ]
+; CHECK-NEXT: br i1 [[GUARD_CALLBR]], label %[[CALLBR]], label %[[NOCALLBR]]
+;
+ br i1 %c, label %nocallbr, label %callbr
+nocallbr:
+ br label %callbr
+callbr:
+ callbr void asm "", "!i"() to label %ret [label %nocallbr]
+ret:
+ ret void
+}
+
+; Fix an irreducible loop in which callbr is a regular block (neither entry nor
+; header). See the example at the top of FixIrreducible.cpp.
+define void @callbr_regular2(i1 %c) {
+; CHECK-LABEL: define void @callbr_regular2(
+; CHECK-SAME: i1 [[C:%.*]]) {
+; CHECK-NEXT: [[C_INV:%.*]] = xor i1 [[C]], true
+; CHECK-NEXT: br label %[[IRR_GUARD:.*]]
+; CHECK: [[NOCALLBR:.*]]:
+; CHECK-NEXT: br label %[[IRR_GUARD]]
+; CHECK: [[CALLBR:.*]]:
+; CHECK-NEXT: callbr void asm "", "!i"()
+; CHECK-NEXT: to label %[[NOCALLBR]] [label %ret]
+; CHECK: [[RET:.*:]]
+; CHECK-NEXT: ret void
+; CHECK: [[IRR_GUARD]]:
+; CHECK-NEXT: [[GUARD_CALLBR:%.*]] = phi i1 [ true, %[[NOCALLBR]] ], [ [[C_INV]], [[TMP0:%.*]] ]
+; CHECK-NEXT: br i1 [[GUARD_CALLBR]], label %[[CALLBR]], label %[[NOCALLBR]]
+;
+ br i1 %c, label %nocallbr, label %callbr
+nocallbr:
+ br label %callbr
+callbr:
+ callbr void asm "", "!i"() to label %nocallbr [label %ret]
+ret:
+ ret void
+}
+
+; Fix an irreducible loop with two callbr blocks, one as header and one as regular block.
+define void @callbr_header_and_regular(i1 %c) {
+; CHECK-LABEL: define void @callbr_header_and_regular(
+; CHECK-SAME: i1 [[C:%.*]]) {
+; CHECK-NEXT: br label %[[CALLBR_HEADER:.*]]
+; CHECK: [[CALLBR_HEADER]]:
+; CHECK-NEXT: callbr void asm "", "!i"()
+; CHECK-NEXT: to label %[[CALLBR_HEADER_TARGET_MID:.*]] [label %callbr_header.target.callbr_regular]
+; CHECK: [[MID:.*]]:
+; CHECK-NEXT: br i1 [[C]], label %[[IRR_GUARD:.*]], label %[[RET:.*]]
+; CHECK: [[CALLBR_REGULAR:.*]]:
+; CHECK-NEXT: callbr void asm "", "!i"()
+; CHECK-NEXT: to label %[[CALLBR_HEADER]] [label %mid]
+; CHECK: [[RET]]:
+; CHECK-NEXT: ret void
+; CHECK: [[CALLBR_HEADER_TARGET_MID]]:
+; CHECK-NEXT: br label %[[IRR_GUARD]]
+; CHECK: [[CALLBR_HEADER_TARGET_CALLBR_REGULAR:.*]]:
+; CHECK-NEXT: br label %[[IRR_GUARD]]
+; CHECK: [[IRR_GUARD]]:
+; CHECK-NEXT: [[GUARD_CALLBR_REGULAR:%.*]] = phi i1 [ true, %[[MID]] ], [ false, %[[CALLBR_HEADER_TARGET_MID]] ], [ true, %[[CALLBR_HEADER_TARGET_CALLBR_REGULAR]] ]
+; CHECK-NEXT: br i1 [[GUARD_CALLBR_REGULAR]], label %[[CALLBR_REGULAR]], label %[[MID]]
+;
+ br label %callbr_header
+callbr_header:
+ callbr void asm "", "!i"() to label %mid [label %callbr_regular]
+mid:
+ br i1 %c, label %callbr_regular, label %ret
+callbr_regular:
+ callbr void asm "", "!i"() to label %callbr_header [label %mid]
+ret:
+ ret void
+}
+
+; Fix an irreducible loop consisting only of callbr blocks (and ret). See the
+; example at the top of FixIrreducible.cpp.
+define void @callbr_only(i1 %c) {
+; CHECK-LABEL: define void @callbr_only(
+; CHECK-SAME: i1 [[C:%.*]]) {
+; CHECK-NEXT: [[CALLBR:.*:]]
+; CHECK-NEXT: callbr void asm "", "!i"()
+; CHECK-NEXT: to label %[[CALLBR_ENTRY_TARGET_CALLBR_HEADER:.*]] [label %callbr_entry.target.callbr_block]
+; CHECK: [[CALLBR_HEADER:.*]]:
+; CHECK-NEXT: callbr void asm "", ""()
+; CHECK-NEXT: to label %[[CALLBR_HEADER_TARGET_CALLBR_BLOCK:.*]] []
+; CHECK: [[CALLBR_BLOCK:.*]]:
+; CHECK-NEXT: callbr void asm "", "!i"()
+; CHECK-NEXT: to label %[[CALLBR_HEADER]] [label %ret]
+; CHECK: [[RET:.*:]]
+; CHECK-NEXT: ret void
+; CHECK: [[CALLBR_HEADER_TARGET_CALLBR_BLOCK]]:
+; CHECK-NEXT: br label %[[IRR_GUARD:.*]]
+; CHECK: [[CALLBR_ENTRY_TARGET_CALLBR_HEADER]]:
+; CHECK-NEXT: br label %[[IRR_GUARD]]
+; CHECK: [[CALLBR_ENTRY_TARGET_CALLBR_BLOCK:.*]]:
+; CHECK-NEXT: br label %[[IRR_GUARD]]
+; CHECK: [[IRR_GUARD]]:
+; CHECK-NEXT: [[GUARD_CALLBR_BLOCK:%.*]] = phi i1 [ true, %[[CALLBR_HEADER_TARGET_CALLBR_BLOCK]] ], [ false, %[[CALLBR_ENTRY_TARGET_CALLBR_HEADER]] ], [ true, %[[CALLBR_ENTRY_TARGET_CALLBR_BLOCK]] ]
+; CHECK-NEXT: br i1 [[GUARD_CALLBR_BLOCK]], label %[[CALLBR_BLOCK]], label %[[CALLBR_HEADER]]
+;
+callbr_entry:
+ callbr void asm "", "!i"() to label %callbr_header [label %callbr_block]
+callbr_header:
+ callbr void asm "", ""() to label %callbr_block []
+callbr_block:
+ callbr void asm "", "!i"() to label %callbr_header [label %ret]
+ret:
+ ret void
+}
+
+; Irreducible loop: entry leading to multiple callbr blocks.
+define void @entry_multiple_callbr(i1 %a, i1 %b, i1 %c) {
+; CHECK-LABEL: define void @entry_multiple_callbr(
+; CHECK-SAME: i1 [[A:%.*]], i1 [[B:%.*]], i1 [[C:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br i1 [[A]], label %[[CB1:.*]], label %[[IRR_GUARD:.*]]
+; CHECK: [[CB1]]:
+; CHECK-NEXT: callbr void asm "", "!i,!i"()
+; CHECK-NEXT: to label %[[CB1_TARGET_BLOCK:.*]] [label %[[CB1_TARGET_CB2:.*]], label %cb1.target.block1]
+; CHECK: [[BLOCK:.*]]:
+; CHECK-NEXT: br i1 [[B]], label %[[IRR_GUARD]], label %[[BLOCK1:.*]]
+; CHECK: [[CB2:.*]]:
+; CHECK-NEXT: callbr void asm "", "!i"()
+; CHECK-NEXT: to label %[[CB2_TARGET_BLOCK1:.*]] [label %cb2.target.block]
+; CHECK: [[BLOCK1]]:
+; CHECK-NEXT: br i1 [[C]], label %[[IRR_GUARD2:.*]], label %[[EXIT:.*]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
+; CHECK: [[CB1_TARGET_BLOCK]]:
+; CHECK-NEXT: br label %[[IRR_GUARD]]
+; CHECK: [[CB1_TARGET_CB2]]:
+; CHECK-NEXT: br label %[[IRR_GUARD]]
+; CHECK: [[CB1_TARGET_BLOCK1:.*]]:
+; CHECK-NEXT: br label %[[IRR_GUARD]]
+; CHECK: [[IRR_GUARD]]:
+; CHECK-NEXT: [[GUARD_CB2:%.*]] = phi i1 [ true, %[[BLOCK]] ], [ false, %[[CB1_TARGET_BLOCK]] ], [ true, %[[CB1_TARGET_CB2]] ], [ false, %[[CB1_TARGET_BLOCK1]] ], [ true, %[[ENTRY]] ]
+; CHECK-NEXT: [[GUARD_BLOCK:%.*]] = phi i1 [ false, %[[BLOCK]] ], [ true, %[[CB1_TARGET_BLOCK]] ], [ false, %[[CB1_TARGET_CB2]] ], [ false, %[[CB1_TARGET_BLOCK1]] ], [ false, %[[ENTRY]] ]
+; CHECK-NEXT: br i1 [[GUARD_CB2]], label %[[CB2]], label %[[IRR_GUARD1:.*]]
+; CHECK: [[IRR_GUARD1]]:
+; CHECK-NEXT: br label %[[IRR_GUARD2]]
+; CHECK: [[CB2_TARGET_BLOCK1]]:
+; CHECK-NEXT: br label %[[IRR_GUARD2]]
+; CHECK: [[CB2_TARGET_BLOCK:.*]]:
+; CHECK-NEXT: br label %[[IRR_GUARD2]]
+; CHECK: [[IRR_GUARD2]]:
+; CHECK-NEXT: [[GUARD_BLOCK3:%.*]] = phi i1 [ true, %[[BLOCK1]] ], [ [[GUARD_BLOCK]], %[[IRR_GUARD1]] ], [ false, %[[CB2_TARGET_BLOCK1]] ], [ true, %[[CB2_TARGET_BLOCK]] ]
+; CHECK-NEXT: br i1 [[GUARD_BLOCK3]], label %[[BLOCK]], label %[[BLOCK1]]
+;
+entry:
+ br i1 %a, label %cb1, label %cb2
+cb1:
+ callbr void asm "", "!i,!i"() to label %block [label %cb2, label %block1]
+block:
+ br i1 %b, label %cb2, label %block1
+cb2:
+ callbr void asm "", "!i"() to label %block1 [label %block]
+block1:
+ br i1 %c, label %block, label %exit
+exit:
+ ret void
+}
+
+; Irreducible loop: callbr as loop exit, with multiple entries
+define void @callbr_exit_with_separate_entries(i1 %a, i1 %b, i1 %c) {
+; CHECK-LABEL: define void @callbr_exit_with_separate_entries(
+; CHECK-SAME: i1 [[A:%.*]], i1 [[B:%.*]], i1 [[C:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[C_INV:%.*]] = xor i1 [[C]], true
+; CHECK-NEXT: [[A_INV:%.*]] = xor i1 [[A]], true
+; CHECK-NEXT: br label %[[IRR_GUARD:.*]]
+; CHECK: [[L1:.*]]:
+; CHECK-NEXT: br i1 [[B]], label %[[CB:.*]], label %[[IRR_GUARD]]
+; CHECK: [[L2:.*]]:
+; CHECK-NEXT: br label %[[IRR_GUARD1:.*]]
+; CHECK: [[CB]]:
+; CHECK-NEXT: callbr void asm "", "!i"()
+; CHECK-NEXT: to label %[[EXIT:.*]] [label %cb.target.l1]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
+; CHECK: [[IRR_GUARD]]:
+; CHECK-NEXT: [[GUARD_L2:%.*]] = phi i1 [ true, %[[L1]] ], [ [[A_INV]], %[[ENTRY]] ]
+; CHECK-NEXT: br i1 [[GUARD_L2]], label %[[L2]], label %[[IRR_GUARD1]]
+; CHECK: [[CB_TARGET_L1:.*]]:
+; CHECK-NEXT: br label %[[IRR_GUARD1]]
+; CHECK: [[IRR_GUARD1]]:
+; CHECK-NEXT: [[GUARD_L1:%.*]] = phi i1 [ true, %[[CB_TARGET_L1]] ], [ true, %[[IRR_GUARD]] ], [ [[C_INV]], %[[L2]] ]
+; CHECK-NEXT: br i1 [[GUARD_L1]], label %[[L1]], label %[[CB]]
+;
+entry:
+ br i1 %a, label %l1, label %l2
+l1:
+ br i1 %b, label %cb, label %l2
+l2:
+ br i1 %c, label %cb, label %l1
+cb:
+ callbr void asm "", "!i"() to label %exit [label %l1]
+exit:
+ ret void
+}
+
+define void @callbr_exit_with_separate_entries1(i1 %a, i1 %b) {
+; CHECK-LABEL: define void @callbr_exit_with_separate_entries1(
+; CHECK-SAME: i1 [[A:%.*]], i1 [[B:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[A_INV:%.*]] = xor i1 [[A]], true
+; CHECK-NEXT: br label %[[IRR_GUARD:.*]]
+; CHECK: [[LOOP1:.*]]:
+; CHECK-NEXT: br i1 [[B]], label %[[CB:.*]], label %[[IRR_GUARD]]
+; CHECK: [[LOOP2:.*]]:
+; CHECK-NEXT: br label %[[LOOP1]]
+; CHECK: [[CB]]:
+; CHECK-NEXT: callbr void asm "", "!i"()
+; CHECK-NEXT: to label %[[EXIT:.*]] [label %cb.target.loop2]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
+; CHECK: [[CB_TARGET_LOOP2:.*]]:
+; CHECK-NEXT: br label %[[IRR_GUARD]]
+; CHECK: [[IRR_GUARD]]:
+; CHECK-NEXT: [[GUARD_LOOP2:%.*]] = phi i1 [ true, %[[CB_TARGET_LOOP2]] ], [ true, %[[LOOP1]] ], [ [[A_INV]], %[[ENTRY]] ]
+; CHECK-NEXT: br i1 [[GUARD_LOOP2]], label %[[LOOP2]], label %[[LOOP1]]
+;
+entry:
+ br i1 %a, label %loop1, label %loop2
+loop1:
+ br i1 %b, label %cb, label %loop2
+loop2:
+ br label %loop1
+cb:
+ callbr void asm "", "!i"() to label %exit [label %loop2]
+exit:
+ ret void
+}
+
+; Irreducible loop: all blocks are callbrs, with cross-edges
+define void @callbr_only_multiple(i1 %a, i1 %b, i1 %c) {
+; CHECK-LABEL: define void @callbr_only_multiple(
+; CHECK-SAME: i1 [[A:%.*]], i1 [[B:%.*]], i1 [[C:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: callbr void asm "", "!i,!i"()
+; CHECK-NEXT: to label %[[ENTRY_TARGET_CB1:.*]] [label %[[ENTRY_TARGET_CB2:.*]], label %entry.target.cb3]
+; CHECK: [[CB1:.*]]:
+; CHECK-NEXT: callbr void asm "", "!i"()
+; CHECK-NEXT: to label %[[CB2:.*]] [label %cb1.target.cb3]
+; CHECK: [[CB2]]:
+; CHECK-NEXT: callbr void asm "", "!i"()
+; CHECK-NEXT: to label %[[CB2_TARGET_CB3:.*]] [label %cb2.target.cb1]
+; CHECK: [[CB3:.*]]:
+; CHECK-NEXT: callbr void asm "", "!i"()
+; CHECK-NEXT: to label %[[CB3_TARGET_CB1:.*]] [label %exit]
+; CHECK: [[EXIT:.*:]]
+; CHECK-NEXT: ret void
+; CHECK: [[CB2_TARGET_CB3]]:
+; CHECK-NEXT: br label %[[IRR_GUARD:.*]]
+; CHECK: [[CB1_TARGET_CB3:.*]]:
+; CHECK-NEXT: br label %[[IRR_GUARD]]
+; CHECK: [[ENTRY_TARGET_CB1]]:
+; CHECK-NEXT: br label %[[IRR_GUARD]]
+; CHECK: [[ENTRY_TARGET_CB2]]:
+; CHECK-NEXT: br label %[[IRR_GUARD]]
+; CHECK: [[ENTRY_TARGET_CB3:.*]]:
+; CHECK-NEXT: br label %[[IRR_GUARD]]
+; CHECK: [[IRR_GUARD]]:
+; CHECK-NEXT: [[GUARD_CB3:%.*]] = phi i1 [ true, %[[CB2_TARGET_CB3]] ], [ true, %[[CB1_TARGET_CB3]] ], [ false, %[[ENTRY_TARGET_CB1]] ], [ false, %[[ENTRY_TARGET_CB2]] ], [ true, %[[ENTRY_TARGET_CB3]] ]
+; CHECK-NEXT: [[GUARD_CB1:%.*]] = phi i1 [ false, %[[CB2_TARGET_CB3]] ], [ false, %[[CB1_TARGET_CB3]] ], [ true, %[[ENTRY_TARGET_CB1]] ], [ false, %[[ENTRY_TARGET_CB2]] ], [ false, %[[ENTRY_TARGET_CB3]] ]
+; CHECK-NEXT: br i1 [[GUARD_CB3]], label %[[CB3]], label %[[IRR_GUARD1:.*]]
+; CHECK: [[IRR_GUARD1]]:
+; CHECK-NEXT: br label %[[IRR_GUARD2:.*]]
+; CHECK: [[CB2_TARGET_CB1:.*]]:
+; CHECK-NEXT: br label %[[IRR_GUARD2]]
+; CHECK: [[CB3_TARGET_CB1]]:
+; CHECK-NEXT: br label %[[IRR_GUARD2]]
+; CHECK: [[IRR_GUARD2]]:
+; CHECK-NEXT: [[GUARD_CB13:%.*]] = phi i1 [ true, %[[CB2_TARGET_CB1]] ], [ [[GUARD_CB1]], %[[IRR_GUARD1]] ], [ true, %[[CB3_TARGET_CB1]] ]
+; CHECK-NEXT: br i1 [[GUARD_CB13]], label %[[CB1]], label %[[CB2]]
+;
+entry:
+ callbr void asm "", "!i,!i"() to label %cb1 [label %cb2, label %cb3]
+cb1:
+ callbr void asm "", "!i"() to label %cb2 [label %cb3]
+cb2:
+ callbr void asm "", "!i"() to label %cb3 [label %cb1]
+cb3:
+ callbr void asm "", "!i"() to label %cb1 [label %exit]
+exit:
+ ret void
+}
+
+; Irreducible loop: callbr as a "bypass" block
+define void @callbr_bypass(i1 %a, i1 %b, i1 %c) {
+; CHECK-LABEL: define void @callbr_bypass(
+; CHECK-SAME: i1 [[A:%.*]], i1 [[B:%.*]], i1 [[C:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[B_INV:%.*]] = xor i1 [[B]], true
+; CHECK-NEXT: [[A_INV:%.*]] = xor i1 [[A]], true
+; CHECK-NEXT: br label %[[IRR_GUARD:.*]]
+; CHECK: [[CB:.*]]:
+; CHECK-NEXT: callbr void asm "", "!i"()
+; CHECK-NEXT: to label %[[L2:.*]] [label %cb.target.l1]
+; CHECK: [[L1:.*]]:
+; CHECK-NEXT: br label %[[IRR_GUARD1:.*]]
+; CHECK: [[L2]]:
+; CHECK-NEXT: br i1 [[C]], label %[[IRR_GUARD1]], label %[[EXIT:.*]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
+; CHECK: [[CB_TARGET_L1:.*]]:
+; CHECK-NEXT: br label %[[IRR_GUARD]]
+; CHECK: [[IRR_GUARD]]:
+; CHECK-NEXT: [[GUARD_L1:%.*]] = phi i1 [ true, %[[CB_TARGET_L1]] ], [ [[A_INV]], %[[ENTRY]] ]
+; CHECK-NEXT: br i1 [[GUARD_L1]], label %[[L1]], label %[[IRR_GUARD1]]
+; CHECK: [[IRR_GUARD1]]:
+; CHECK-NEXT: [[GUARD_CB:%.*]] = phi i1 [ true, %[[L2]] ], [ true, %[[IRR_GUARD]] ], [ [[B_INV]], %[[L1]] ]
+; CHECK-NEXT: br i1 [[GUARD_CB]], label %[[CB]], label %[[L2]]
+;
+entry:
+ br i1 %a, label %cb, label %l1
+cb:
+ callbr void asm "", "!i"() to label %l2 [label %l1]
+l1:
+ br i1 %b, label %l2, label %cb
+l2:
+ br i1 %c, label %cb, label %exit
+exit:
+ ret void
+}
+
+; Irreducible loop: callbr with multiple indirect targets, some looping, some exiting
+define void @callbr_multiple_with_exit(i1 %a, i1 %b, i1 %c) {
+; CHECK-LABEL: define void @callbr_multiple_with_exit(
+; CHECK-SAME: i1 [[A:%.*]], i1 [[B:%.*]], i1 [[C:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: callbr void asm "", "!i,!i,!i"()
+; CHECK-NEXT: to label %[[ENTRY_TARGET_L1:.*]] [label %[[ENTRY_TARGET_L2:.*]], label %[[EXIT:.*]], label %entry.target.l3]
+; CHECK: [[L1:.*]]:
+; CHECK-NEXT: br i1 [[A]], label %[[L2:.*]], label %[[IRR_GUARD:.*]]
+; CHECK: [[L2]]:
+; CHECK-NEXT: br i1 [[B]], label %[[IRR_GUARD2:.*]], label %[[EXIT]]
+; CHECK: [[L3:.*]]:
+; CHECK-NEXT: br i1 [[C]], label %[[IRR_GUARD2]], label %[[EXIT]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
+; CHECK: [[ENTRY_TARGET_L1]]:
+; CHECK-NEXT: br label %[[IRR_GUARD]]
+; CHECK: [[ENTRY_TARGET_L2]]:
+; CHECK-NEXT: br label %[[IRR_GUARD]]
+; CHECK: [[ENTRY_TARGET_L3:.*]]:
+; CHECK-NEXT: br label %[[IRR_GUARD]]
+; CHECK: [[IRR_GUARD]]:
+; CHECK-NEXT: [[GUARD_L3:%.*]] = phi i1 [ true, %[[L1]] ], [ false, %[[ENTRY_TARGET_L1]] ], [ false, %[[ENTRY_TARGET_L2]] ], [ true, %[[ENTRY_TARGET_L3]] ]
+; CHECK-NEXT: [[GUARD_L1:%.*]] = phi i1 [ false, %[[L1]] ], [ true, %[[ENTRY_TARGET_L1]] ], [ false, %[[ENTRY_TARGET_L2]] ], [ false, %[[ENTRY_TARGET_L3]] ]
+; CHECK-NEXT: br i1 [[GUARD_L3]], label %[[L3]], label %[[IRR_GUARD1:.*]]
+; CHECK: [[IRR_GUARD1]]:
+; CHECK-NEXT: br label %[[IRR_GUARD2]]
+; CHECK: [[IRR_GUARD2]]:
+; CHECK-NEXT: [[GUARD_L13:%.*]] = phi i1 [ true, %[[L2]] ], [ [[GUARD_L1]], %[[IRR_GUARD1]] ], [ true, %[[L3]] ]
+; CHECK-NEXT: br i1 [[GUARD_L13]], label %[[L1]], label %[[L2]]
+;
+entry:
+ callbr void asm "", "!i,!i,!i"() to label %l1 [label %l2, label %exit, label %l3]
+l1:
+ br i1 %a, label %l2, label %l3
+l2:
+ br i1 %b, label %l1, label %exit
+l3:
+ br i1 %c, label %l1, label %exit
+exit:
+ ret void
+}
+
+define void @callbr_nested(i1 %c, i1 %d) {
+; CHECK-LABEL: define void @callbr_nested(
+; CHECK-SAME: i1 [[C:%.*]], i1 [[D:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: callbr void asm "", "!i"()
+; CHECK-NEXT: to label %[[ENTRY_TARGET_H:.*]] [label %entry.target.b]
+; CHECK: [[H:.*]]:
+; CHECK-NEXT: br label %[[IRR_GUARD1:.*]]
+; CHECK: [[B:.*]]:
+; CHECK-NEXT: callbr void asm "", "!i,!i"()
+; CHECK-NEXT: to label %[[H]] [label %[[B_TARGET_BH:.*]], label %b.target.bb]
+; CHECK: [[BH:.*]]:
+; CHECK-NEXT: br label %[[IRR_GUARD:.*]]
+; CHECK: [[BB:.*]]:
+; CHECK-NEXT: br i1 [[C]], label %[[BH]], label %[[RET:.*]]
+; CHECK: [[RET]]:
+; CHECK-NEXT: ret void
+; CHECK: [[B_TARGET_BH]]:
+; CHECK-NEXT: br label %[[IRR_GUARD]]
+; CHECK: [[B_TARGET_BB:.*]]:
+; CHECK-NEXT: br label %[[IRR_GUARD]]
+; CHECK: [[IRR_GUARD]]:
+; CHECK-NEXT: [[GUARD_BB:%.*]] = phi i1 [ true, %[[BH]] ], [ false, %[[B_TARGET_BH]] ], [ true, %[[B_TARGET_BB]] ]
+; CHECK-NEXT: br i1 [[GUARD_BB]], label %[[BB]], label %[[BH]]
+; CHECK: [[ENTRY_TARGET_H]]:
+; CHECK-NEXT: br label %[[IRR_GUARD1]]
+; CHECK: [[ENTRY_TARGET_B:.*]]:
+; CHECK-NEXT: br label %[[IRR_GUARD1]]
+; CHECK: [[IRR_GUARD1]]:
+; CHECK-NEXT: [[GUARD_B:%.*]] = phi i1 [ true, %[[H]] ], [ false, %[[ENTRY_TARGET_H]] ], [ true, %[[ENTRY_TARGET_B]] ]
+; CHECK-NEXT: br i1 [[GUARD_B]], label %[[B]], label %[[H]]
+;
+entry:
+ callbr void asm "","!i"() to label %h [label %b]
+h:
+ br label %b
+b:
+ callbr void asm "","!i,!i"() to label %h [label %bh, label %bb]
+bh:
+ br label %bb
+bb:
+ br i1 %c, label %bh, label %ret
+ret:
+ ret void
+}
+
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; LOOPS-AFTER: {{.*}}
+; LOOPS-BEFORE: {{.*}}
diff --git a/llvm/test/Transforms/FixIrreducible/nested.ll b/llvm/test/Transforms/FixIrreducible/nested.ll
index 0cc6b47..c9161cc1 100644
--- a/llvm/test/Transforms/FixIrreducible/nested.ll
+++ b/llvm/test/Transforms/FixIrreducible/nested.ll
@@ -50,6 +50,69 @@ exit:
ret void
}
+define void @nested_irr_top_level_callbr(i1 %Pred0, i1 %Pred1, i1 %Pred2, i1 %Pred3, i1 %Pred4, i1 %Pred5) {
+; CHECK-LABEL: @nested_irr_top_level_callbr(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED0:%.*]])
+; CHECK-NEXT: to label [[ENTRY_TARGET_A1:%.*]] [label %entry.target.A2]
+; CHECK: A1:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED1:%.*]])
+; CHECK-NEXT: to label [[A1_TARGET_B1:%.*]] [label %A1.target.B2]
+; CHECK: B1:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED2:%.*]])
+; CHECK-NEXT: to label [[B1_TARGET_B2:%.*]] [label %A3]
+; CHECK: B2:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED3:%.*]])
+; CHECK-NEXT: to label [[B1:%.*]] [label %A3]
+; CHECK: A3:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED4:%.*]])
+; CHECK-NEXT: to label [[A3_TARGET_A2:%.*]] [label %exit]
+; CHECK: A2:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED5:%.*]])
+; CHECK-NEXT: to label [[A1:%.*]] [label %exit]
+; CHECK: exit:
+; CHECK-NEXT: ret void
+; CHECK: A3.target.A2:
+; CHECK-NEXT: br label [[IRR_GUARD:%.*]]
+; CHECK: entry.target.A1:
+; CHECK-NEXT: br label [[IRR_GUARD]]
+; CHECK: entry.target.A2:
+; CHECK-NEXT: br label [[IRR_GUARD]]
+; CHECK: irr.guard:
+; CHECK-NEXT: [[GUARD_A2:%.*]] = phi i1 [ true, [[A3_TARGET_A2]] ], [ false, [[ENTRY_TARGET_A1]] ], [ true, [[ENTRY_TARGET_A2:%.*]] ]
+; CHECK-NEXT: br i1 [[GUARD_A2]], label [[A2:%.*]], label [[A1]]
+; CHECK: B1.target.B2:
+; CHECK-NEXT: br label [[IRR_GUARD1:%.*]]
+; CHECK: A1.target.B1:
+; CHECK-NEXT: br label [[IRR_GUARD1]]
+; CHECK: A1.target.B2:
+; CHECK-NEXT: br label [[IRR_GUARD1]]
+; CHECK: irr.guard1:
+; CHECK-NEXT: [[GUARD_B2:%.*]] = phi i1 [ true, [[B1_TARGET_B2]] ], [ false, [[A1_TARGET_B1]] ], [ true, [[A1_TARGET_B2:%.*]] ]
+; CHECK-NEXT: br i1 [[GUARD_B2]], label [[B2:%.*]], label [[B1]]
+;
+entry:
+ callbr void asm "", "r,!i"(i1 %Pred0) to label %A1 [label %A2]
+
+A1:
+ callbr void asm "", "r,!i"(i1 %Pred1) to label %B1 [label %B2]
+
+B1:
+ callbr void asm "", "r,!i"(i1 %Pred2) to label %B2 [label %A3]
+
+B2:
+ callbr void asm "", "r,!i"(i1 %Pred3) to label %B1 [label %A3]
+
+A3:
+ callbr void asm "", "r,!i"(i1 %Pred4) to label %A2 [label %exit]
+
+A2:
+ callbr void asm "", "r,!i"(i1 %Pred5) to label %A1 [label %exit]
+
+exit:
+ ret void
+}
+
define void @nested_irr_in_loop(i1 %Pred0, i1 %Pred1, i1 %Pred2, i1 %Pred3, i1 %Pred4, i1 %Pred5, i1 %Pred6) {
; CHECK-LABEL: @nested_irr_in_loop(
; CHECK-NEXT: entry:
@@ -107,6 +170,80 @@ exit:
ret void
}
+define void @nested_irr_in_loop_callbr(i1 %Pred0, i1 %Pred1, i1 %Pred2, i1 %Pred3, i1 %Pred4, i1 %Pred5, i1 %Pred6) {
+; CHECK-LABEL: @nested_irr_in_loop_callbr(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[H1:%.*]]
+; CHECK: H1:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED0:%.*]])
+; CHECK-NEXT: to label [[H1_TARGET_A1:%.*]] [label %H1.target.A2]
+; CHECK: A1:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED1:%.*]])
+; CHECK-NEXT: to label [[A1_TARGET_B1:%.*]] [label %A1.target.B2]
+; CHECK: B1:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED2:%.*]])
+; CHECK-NEXT: to label [[B1_TARGET_B2:%.*]] [label %A3]
+; CHECK: B2:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED3:%.*]])
+; CHECK-NEXT: to label [[B1:%.*]] [label %A3]
+; CHECK: A3:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED4:%.*]])
+; CHECK-NEXT: to label [[A3_TARGET_A2:%.*]] [label %L1]
+; CHECK: A2:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED5:%.*]])
+; CHECK-NEXT: to label [[A1:%.*]] [label %L1]
+; CHECK: L1:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED6:%.*]])
+; CHECK-NEXT: to label [[EXIT:%.*]] [label %H1]
+; CHECK: exit:
+; CHECK-NEXT: ret void
+; CHECK: A3.target.A2:
+; CHECK-NEXT: br label [[IRR_GUARD:%.*]]
+; CHECK: H1.target.A1:
+; CHECK-NEXT: br label [[IRR_GUARD]]
+; CHECK: H1.target.A2:
+; CHECK-NEXT: br label [[IRR_GUARD]]
+; CHECK: irr.guard:
+; CHECK-NEXT: [[GUARD_A2:%.*]] = phi i1 [ true, [[A3_TARGET_A2]] ], [ false, [[H1_TARGET_A1]] ], [ true, [[H1_TARGET_A2:%.*]] ]
+; CHECK-NEXT: br i1 [[GUARD_A2]], label [[A2:%.*]], label [[A1]]
+; CHECK: B1.target.B2:
+; CHECK-NEXT: br label [[IRR_GUARD1:%.*]]
+; CHECK: A1.target.B1:
+; CHECK-NEXT: br label [[IRR_GUARD1]]
+; CHECK: A1.target.B2:
+; CHECK-NEXT: br label [[IRR_GUARD1]]
+; CHECK: irr.guard1:
+; CHECK-NEXT: [[GUARD_B2:%.*]] = phi i1 [ true, [[B1_TARGET_B2]] ], [ false, [[A1_TARGET_B1]] ], [ true, [[A1_TARGET_B2:%.*]] ]
+; CHECK-NEXT: br i1 [[GUARD_B2]], label [[B2:%.*]], label [[B1]]
+;
+entry:
+ br label %H1
+
+H1:
+ callbr void asm "", "r,!i"(i1 %Pred0) to label %A1 [label %A2]
+
+A1:
+ callbr void asm "", "r,!i"(i1 %Pred1) to label %B1 [label %B2]
+
+B1:
+ callbr void asm "", "r,!i"(i1 %Pred2) to label %B2 [label %A3]
+
+B2:
+ callbr void asm "", "r,!i"(i1 %Pred3) to label %B1 [label %A3]
+
+A3:
+ callbr void asm "", "r,!i"(i1 %Pred4) to label %A2 [label %L1]
+
+A2:
+ callbr void asm "", "r,!i"(i1 %Pred5) to label %A1 [label %L1]
+
+L1:
+ callbr void asm "", "r,!i"(i1 %Pred6) to label %exit [label %H1]
+
+exit:
+ ret void
+}
+
define void @loop_in_irr(i1 %Pred0, i1 %Pred1, i1 %Pred2) {
; CHECK-LABEL: @loop_in_irr(
; CHECK-NEXT: entry:
@@ -150,6 +287,60 @@ exit:
ret void
}
+define void @loop_in_irr_callbr(i1 %Pred0, i1 %Pred1, i1 %Pred2) {
+; CHECK-LABEL: @loop_in_irr_callbr(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED0:%.*]])
+; CHECK-NEXT: to label [[ENTRY_TARGET_A1:%.*]] [label %entry.target.A2]
+; CHECK: A1:
+; CHECK-NEXT: callbr void asm "", ""()
+; CHECK-NEXT: to label [[H1:%.*]] []
+; CHECK: H1:
+; CHECK-NEXT: callbr void asm "", ""()
+; CHECK-NEXT: to label [[L1:%.*]] []
+; CHECK: L1:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED1:%.*]])
+; CHECK-NEXT: to label [[H1]] [label %A3]
+; CHECK: A3:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED2:%.*]])
+; CHECK-NEXT: to label [[A3_TARGET_A2:%.*]] [label %exit]
+; CHECK: A2:
+; CHECK-NEXT: callbr void asm "", ""()
+; CHECK-NEXT: to label [[A1:%.*]] []
+; CHECK: exit:
+; CHECK-NEXT: ret void
+; CHECK: A3.target.A2:
+; CHECK-NEXT: br label [[IRR_GUARD:%.*]]
+; CHECK: entry.target.A1:
+; CHECK-NEXT: br label [[IRR_GUARD]]
+; CHECK: entry.target.A2:
+; CHECK-NEXT: br label [[IRR_GUARD]]
+; CHECK: irr.guard:
+; CHECK-NEXT: [[GUARD_A2:%.*]] = phi i1 [ true, [[A3_TARGET_A2]] ], [ false, [[ENTRY_TARGET_A1]] ], [ true, [[ENTRY_TARGET_A2:%.*]] ]
+; CHECK-NEXT: br i1 [[GUARD_A2]], label [[A2:%.*]], label [[A1]]
+;
+entry:
+ callbr void asm "", "r,!i"(i1 %Pred0) to label %A1 [label %A2]
+
+A1:
+ callbr void asm "", ""() to label %H1 []
+
+H1:
+ callbr void asm "", ""() to label %L1 []
+
+L1:
+ callbr void asm "", "r,!i"(i1 %Pred1) to label %H1 [label %A3]
+
+A3:
+ callbr void asm "", "r,!i"(i1 %Pred2) to label %A2 [label %exit]
+
+A2:
+ callbr void asm "", ""() to label %A1 []
+
+exit:
+ ret void
+}
+
define void @loop_in_irr_shared_entry(i1 %Pred0, i1 %Pred1, i1 %Pred2) {
; CHECK-LABEL: @loop_in_irr_shared_entry(
; CHECK-NEXT: entry:
@@ -188,6 +379,54 @@ exit:
ret void
}
+define void @loop_in_irr_shared_entry_callbr(i1 %Pred0, i1 %Pred1, i1 %Pred2) {
+; CHECK-LABEL: @loop_in_irr_shared_entry_callbr(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED0:%.*]])
+; CHECK-NEXT: to label [[ENTRY_TARGET_H1:%.*]] [label %entry.target.A2]
+; CHECK: H1:
+; CHECK-NEXT: callbr void asm "", ""()
+; CHECK-NEXT: to label [[L1:%.*]] []
+; CHECK: L1:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED1:%.*]])
+; CHECK-NEXT: to label [[H1:%.*]] [label %A3]
+; CHECK: A3:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED2:%.*]])
+; CHECK-NEXT: to label [[A3_TARGET_A2:%.*]] [label %exit]
+; CHECK: A2:
+; CHECK-NEXT: callbr void asm "", ""()
+; CHECK-NEXT: to label [[H1]] []
+; CHECK: exit:
+; CHECK-NEXT: ret void
+; CHECK: A3.target.A2:
+; CHECK-NEXT: br label [[IRR_GUARD:%.*]]
+; CHECK: entry.target.H1:
+; CHECK-NEXT: br label [[IRR_GUARD]]
+; CHECK: entry.target.A2:
+; CHECK-NEXT: br label [[IRR_GUARD]]
+; CHECK: irr.guard:
+; CHECK-NEXT: [[GUARD_A2:%.*]] = phi i1 [ true, [[A3_TARGET_A2]] ], [ false, [[ENTRY_TARGET_H1]] ], [ true, [[ENTRY_TARGET_A2:%.*]] ]
+; CHECK-NEXT: br i1 [[GUARD_A2]], label [[A2:%.*]], label [[H1]]
+;
+entry:
+ callbr void asm "", "r,!i"(i1 %Pred0) to label %H1 [label %A2]
+
+H1:
+ callbr void asm "", ""() to label %L1 []
+
+L1:
+ callbr void asm "", "r,!i"(i1 %Pred1) to label %H1 [label %A3]
+
+A3:
+ callbr void asm "", "r,!i"(i1 %Pred2) to label %A2 [label %exit]
+
+A2:
+ callbr void asm "", ""() to label %H1 []
+
+exit:
+ ret void
+}
+
define void @loop_in_irr_shared_header(i1 %Pred0, i1 %Pred1, i1 %Pred2) {
; CHECK-LABEL: @loop_in_irr_shared_header(
; CHECK-NEXT: entry:
@@ -226,6 +465,56 @@ exit:
ret void
}
+define void @loop_in_irr_shared_header_callbr(i1 %Pred0, i1 %Pred1, i1 %Pred2) {
+; CHECK-LABEL: @loop_in_irr_shared_header_callbr(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED0:%.*]])
+; CHECK-NEXT: to label [[ENTRY_TARGET_A2:%.*]] [label %entry.target.H1]
+; CHECK: H1:
+; CHECK-NEXT: callbr void asm "", ""()
+; CHECK-NEXT: to label [[L1:%.*]] []
+; CHECK: L1:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED1:%.*]])
+; CHECK-NEXT: to label [[L1_TARGET_H1:%.*]] [label %A3]
+; CHECK: A3:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED2:%.*]])
+; CHECK-NEXT: to label [[A2:%.*]] [label %exit]
+; CHECK: A2:
+; CHECK-NEXT: callbr void asm "", ""()
+; CHECK-NEXT: to label [[A2_TARGET_H1:%.*]] []
+; CHECK: exit:
+; CHECK-NEXT: ret void
+; CHECK: A2.target.H1:
+; CHECK-NEXT: br label [[IRR_GUARD:%.*]]
+; CHECK: L1.target.H1:
+; CHECK-NEXT: br label [[IRR_GUARD]]
+; CHECK: entry.target.A2:
+; CHECK-NEXT: br label [[IRR_GUARD]]
+; CHECK: entry.target.H1:
+; CHECK-NEXT: br label [[IRR_GUARD]]
+; CHECK: irr.guard:
+; CHECK-NEXT: [[GUARD_H1:%.*]] = phi i1 [ true, [[A2_TARGET_H1]] ], [ true, [[L1_TARGET_H1]] ], [ false, [[ENTRY_TARGET_A2]] ], [ true, [[ENTRY_TARGET_H1:%.*]] ]
+; CHECK-NEXT: br i1 [[GUARD_H1]], label [[H1:%.*]], label [[A2]]
+;
+entry:
+ callbr void asm "", "r,!i"(i1 %Pred0) to label %A2 [label %H1]
+
+H1:
+ callbr void asm "", ""() to label %L1 []
+
+L1:
+ callbr void asm "", "r,!i"(i1 %Pred1) to label %H1 [label %A3]
+
+A3:
+ callbr void asm "", "r,!i"(i1 %Pred2) to label %A2 [label %exit]
+
+A2:
+ callbr void asm "", ""() to label %H1 []
+
+exit:
+ ret void
+}
+
define void @loop_irr_loop_shared_header(i1 %Pred0, i1 %Pred1, i1 %Pred2, i1 %Pred3) {
; CHECK-LABEL: @loop_irr_loop_shared_header(
; CHECK-NEXT: entry:
@@ -269,6 +558,62 @@ exit:
ret void
}
+define void @loop_irr_loop_shared_header_callbr(i1 %Pred0, i1 %Pred1, i1 %Pred2, i1 %Pred3) {
+; CHECK-LABEL: @loop_irr_loop_shared_header_callbr(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: callbr void asm "", ""()
+; CHECK-NEXT: to label [[H2:%.*]] []
+; CHECK: H2:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED0:%.*]])
+; CHECK-NEXT: to label [[H2_TARGET_A2:%.*]] [label %H2.target.H1]
+; CHECK: H1:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED1:%.*]])
+; CHECK-NEXT: to label [[A3:%.*]] [label %H1.target.H1]
+; CHECK: A3:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED2:%.*]])
+; CHECK-NEXT: to label [[A2:%.*]] [label %L2]
+; CHECK: A2:
+; CHECK-NEXT: callbr void asm "", ""()
+; CHECK-NEXT: to label [[A2_TARGET_H1:%.*]] []
+; CHECK: L2:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED3:%.*]])
+; CHECK-NEXT: to label [[H2]] [label %exit]
+; CHECK: exit:
+; CHECK-NEXT: ret void
+; CHECK: A2.target.H1:
+; CHECK-NEXT: br label [[IRR_GUARD:%.*]]
+; CHECK: H1.target.H1:
+; CHECK-NEXT: br label [[IRR_GUARD]]
+; CHECK: H2.target.A2:
+; CHECK-NEXT: br label [[IRR_GUARD]]
+; CHECK: H2.target.H1:
+; CHECK-NEXT: br label [[IRR_GUARD]]
+; CHECK: irr.guard:
+; CHECK-NEXT: [[GUARD_H1:%.*]] = phi i1 [ true, [[A2_TARGET_H1]] ], [ true, [[H1_TARGET_H1:%.*]] ], [ false, [[H2_TARGET_A2]] ], [ true, [[H2_TARGET_H1:%.*]] ]
+; CHECK-NEXT: br i1 [[GUARD_H1]], label [[H1:%.*]], label [[A2]]
+;
+entry:
+ callbr void asm "", ""() to label %H2 []
+
+H2:
+ callbr void asm "", "r,!i"(i1 %Pred0) to label %A2 [label %H1]
+
+H1:
+ callbr void asm "", "r,!i"(i1 %Pred1) to label %A3 [label %H1]
+
+A3:
+ callbr void asm "", "r,!i"(i1 %Pred2) to label %A2 [label %L2]
+
+A2:
+ callbr void asm "", ""() to label %H1 []
+
+L2:
+ callbr void asm "", "r,!i"(i1 %Pred3) to label %H2 [label %exit]
+
+exit:
+ ret void
+}
+
define void @siblings_top_level(i1 %Pred0, i1 %Pred1, i1 %Pred2, i1 %Pred3, i1 %Pred4, i1 %Pred5, i1 %Pred6) {
; CHECK-LABEL: @siblings_top_level(
; CHECK-NEXT: entry:
@@ -336,6 +681,93 @@ exit:
ret void
}
+define void @siblings_top_level_callbr(i1 %Pred0, i1 %Pred1, i1 %Pred2, i1 %Pred3, i1 %Pred4, i1 %Pred5, i1 %Pred6) {
+; CHECK-LABEL: @siblings_top_level_callbr(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED0:%.*]])
+; CHECK-NEXT: to label [[H1:%.*]] [label %fork1]
+; CHECK: H1:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED1:%.*]])
+; CHECK-NEXT: to label [[H1_TARGET_A1:%.*]] [label %H1.target.A2]
+; CHECK: A1:
+; CHECK-NEXT: callbr void asm "", ""()
+; CHECK-NEXT: to label [[A1_TARGET_A2:%.*]] []
+; CHECK: A2:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED2:%.*]])
+; CHECK-NEXT: to label [[A1:%.*]] [label %L1]
+; CHECK: L1:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED3:%.*]])
+; CHECK-NEXT: to label [[H1]] [label %exit]
+; CHECK: fork1:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED4:%.*]])
+; CHECK-NEXT: to label [[FORK1_TARGET_B1:%.*]] [label %fork1.target.B2]
+; CHECK: B1:
+; CHECK-NEXT: callbr void asm "", ""()
+; CHECK-NEXT: to label [[H2:%.*]] []
+; CHECK: H2:
+; CHECK-NEXT: callbr void asm "", ""()
+; CHECK-NEXT: to label [[L2:%.*]] []
+; CHECK: L2:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED5:%.*]])
+; CHECK-NEXT: to label [[H2]] [label %L2.target.B2]
+; CHECK: B2:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED6:%.*]])
+; CHECK-NEXT: to label [[B1:%.*]] [label %exit]
+; CHECK: exit:
+; CHECK-NEXT: ret void
+; CHECK: A1.target.A2:
+; CHECK-NEXT: br label [[IRR_GUARD:%.*]]
+; CHECK: H1.target.A1:
+; CHECK-NEXT: br label [[IRR_GUARD]]
+; CHECK: H1.target.A2:
+; CHECK-NEXT: br label [[IRR_GUARD]]
+; CHECK: irr.guard:
+; CHECK-NEXT: [[GUARD_A2:%.*]] = phi i1 [ true, [[A1_TARGET_A2]] ], [ false, [[H1_TARGET_A1]] ], [ true, [[H1_TARGET_A2:%.*]] ]
+; CHECK-NEXT: br i1 [[GUARD_A2]], label [[A2:%.*]], label [[A1]]
+; CHECK: L2.target.B2:
+; CHECK-NEXT: br label [[IRR_GUARD1:%.*]]
+; CHECK: fork1.target.B1:
+; CHECK-NEXT: br label [[IRR_GUARD1]]
+; CHECK: fork1.target.B2:
+; CHECK-NEXT: br label [[IRR_GUARD1]]
+; CHECK: irr.guard1:
+; CHECK-NEXT: [[GUARD_B2:%.*]] = phi i1 [ true, [[L2_TARGET_B2:%.*]] ], [ false, [[FORK1_TARGET_B1]] ], [ true, [[FORK1_TARGET_B2:%.*]] ]
+; CHECK-NEXT: br i1 [[GUARD_B2]], label [[B2:%.*]], label [[B1]]
+;
+entry:
+ callbr void asm "", "r,!i"(i1 %Pred0) to label %H1 [label %fork1]
+
+H1:
+ callbr void asm "", "r,!i"(i1 %Pred1) to label %A1 [label %A2]
+
+A1:
+ callbr void asm "", ""() to label %A2 []
+
+A2:
+ callbr void asm "", "r,!i"(i1 %Pred2) to label %A1 [label %L1]
+
+L1:
+ callbr void asm "", "r,!i"(i1 %Pred3) to label %H1 [label %exit]
+
+fork1:
+ callbr void asm "", "r,!i"(i1 %Pred4) to label %B1 [label %B2]
+
+B1:
+ callbr void asm "", ""() to label %H2 []
+
+H2:
+ callbr void asm "", ""() to label %L2 []
+
+L2:
+ callbr void asm "", "r,!i"(i1 %Pred5) to label %H2 [label %B2]
+
+B2:
+ callbr void asm "", "r,!i"(i1 %Pred6) to label %B1 [label %exit]
+
+exit:
+ ret void
+}
+
define void @siblings_in_loop(i1 %Pred0, i1 %Pred1, i1 %Pred2, i1 %Pred3, i1 %Pred4, i1 %Pred5, i1 %Pred6, i1 %Pred7) {
; CHECK-LABEL: @siblings_in_loop(
; CHECK-NEXT: entry:
@@ -413,6 +845,105 @@ exit:
ret void
}
+define void @siblings_in_loop_callbr(i1 %Pred0, i1 %Pred1, i1 %Pred2, i1 %Pred3, i1 %Pred4, i1 %Pred5, i1 %Pred6, i1 %Pred7) {
+; CHECK-LABEL: @siblings_in_loop_callbr(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: callbr void asm "", ""()
+; CHECK-NEXT: to label [[H0:%.*]] []
+; CHECK: H0:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED0:%.*]])
+; CHECK-NEXT: to label [[H1:%.*]] [label %fork1]
+; CHECK: H1:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED1:%.*]])
+; CHECK-NEXT: to label [[H1_TARGET_A1:%.*]] [label %H1.target.A2]
+; CHECK: A1:
+; CHECK-NEXT: callbr void asm "", ""()
+; CHECK-NEXT: to label [[A1_TARGET_A2:%.*]] []
+; CHECK: A2:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED2:%.*]])
+; CHECK-NEXT: to label [[A1:%.*]] [label %L1]
+; CHECK: L1:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED3:%.*]])
+; CHECK-NEXT: to label [[H1]] [label %L0]
+; CHECK: fork1:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED4:%.*]])
+; CHECK-NEXT: to label [[FORK1_TARGET_B1:%.*]] [label %fork1.target.B2]
+; CHECK: B1:
+; CHECK-NEXT: callbr void asm "", ""()
+; CHECK-NEXT: to label [[H2:%.*]] []
+; CHECK: H2:
+; CHECK-NEXT: callbr void asm "", ""()
+; CHECK-NEXT: to label [[L2:%.*]] []
+; CHECK: L2:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED5:%.*]])
+; CHECK-NEXT: to label [[H2]] [label %L2.target.B2]
+; CHECK: B2:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED6:%.*]])
+; CHECK-NEXT: to label [[B1:%.*]] [label %L0]
+; CHECK: L0:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED7:%.*]])
+; CHECK-NEXT: to label [[EXIT:%.*]] [label %H0]
+; CHECK: exit:
+; CHECK-NEXT: ret void
+; CHECK: A1.target.A2:
+; CHECK-NEXT: br label [[IRR_GUARD:%.*]]
+; CHECK: H1.target.A1:
+; CHECK-NEXT: br label [[IRR_GUARD]]
+; CHECK: H1.target.A2:
+; CHECK-NEXT: br label [[IRR_GUARD]]
+; CHECK: irr.guard:
+; CHECK-NEXT: [[GUARD_A2:%.*]] = phi i1 [ true, [[A1_TARGET_A2]] ], [ false, [[H1_TARGET_A1]] ], [ true, [[H1_TARGET_A2:%.*]] ]
+; CHECK-NEXT: br i1 [[GUARD_A2]], label [[A2:%.*]], label [[A1]]
+; CHECK: L2.target.B2:
+; CHECK-NEXT: br label [[IRR_GUARD1:%.*]]
+; CHECK: fork1.target.B1:
+; CHECK-NEXT: br label [[IRR_GUARD1]]
+; CHECK: fork1.target.B2:
+; CHECK-NEXT: br label [[IRR_GUARD1]]
+; CHECK: irr.guard1:
+; CHECK-NEXT: [[GUARD_B2:%.*]] = phi i1 [ true, [[L2_TARGET_B2:%.*]] ], [ false, [[FORK1_TARGET_B1]] ], [ true, [[FORK1_TARGET_B2:%.*]] ]
+; CHECK-NEXT: br i1 [[GUARD_B2]], label [[B2:%.*]], label [[B1]]
+;
+entry:
+ callbr void asm "", ""() to label %H0 []
+
+H0:
+ callbr void asm "", "r,!i"(i1 %Pred0) to label %H1 [label %fork1]
+
+H1:
+ callbr void asm "", "r,!i"(i1 %Pred1) to label %A1 [label %A2]
+
+A1:
+ callbr void asm "", ""() to label %A2 []
+
+A2:
+ callbr void asm "", "r,!i"(i1 %Pred2) to label %A1 [label %L1]
+
+L1:
+ callbr void asm "", "r,!i"(i1 %Pred3) to label %H1 [label %L0]
+
+fork1:
+ callbr void asm "", "r,!i"(i1 %Pred4) to label %B1 [label %B2]
+
+B1:
+ callbr void asm "", ""() to label %H2 []
+
+H2:
+ callbr void asm "", ""() to label %L2 []
+
+L2:
+ callbr void asm "", "r,!i"(i1 %Pred5) to label %H2 [label %B2]
+
+B2:
+ callbr void asm "", "r,!i"(i1 %Pred6) to label %B1 [label %L0]
+
+L0:
+ callbr void asm "", "r,!i"(i1 %Pred7) to label %exit [label %H0]
+
+exit:
+ ret void
+}
+
define void @irr_in_irr_shared_entry(i1 %Pred0, i1 %Pred1, i1 %Pred2, i1 %Pred3, i1 %Pred4, i1 %Pred5, i1 %Pred6, i1 %Pred7, i1 %Pred8, i1 %Pred9, i1 %Pred10, i1 %Pred11, i1 %Pred12, i1 %Pred13) {
; CHECK-LABEL: @irr_in_irr_shared_entry(
; CHECK-NEXT: entry:
@@ -527,3 +1058,148 @@ if.end8.i:
exit:
ret void
}
+
+define void @irr_in_irr_shared_entry_callbr(i1 %Pred0, i1 %Pred1, i1 %Pred2, i1 %Pred3, i1 %Pred4, i1 %Pred5, i1 %Pred6, i1 %Pred7, i1 %Pred8, i1 %Pred9, i1 %Pred10, i1 %Pred11, i1 %Pred12, i1 %Pred13) {
+; CHECK-LABEL: @irr_in_irr_shared_entry_callbr(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED0:%.*]])
+; CHECK-NEXT: to label [[IF_END:%.*]] [label %if.then]
+; CHECK: if.end:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED1:%.*]])
+; CHECK-NEXT: to label [[IF_THEN7:%.*]] [label %if.else]
+; CHECK: if.then7:
+; CHECK-NEXT: callbr void asm "", ""()
+; CHECK-NEXT: to label [[IF_END16:%.*]] []
+; CHECK: if.else:
+; CHECK-NEXT: callbr void asm "", ""()
+; CHECK-NEXT: to label [[IF_END16]] []
+; CHECK: if.end16:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED2:%.*]])
+; CHECK-NEXT: to label [[WHILE_COND_PREHEADER:%.*]] [label %if.then39]
+; CHECK: while.cond.preheader:
+; CHECK-NEXT: callbr void asm "", ""()
+; CHECK-NEXT: to label [[WHILE_COND:%.*]] []
+; CHECK: while.cond:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED3:%.*]])
+; CHECK-NEXT: to label [[WHILE_COND_TARGET_COND_TRUE49:%.*]] [label %lor.rhs]
+; CHECK: cond.true49:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED4:%.*]])
+; CHECK-NEXT: to label [[IF_THEN69:%.*]] [label %cond.true49.target.while.body63]
+; CHECK: while.body63:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED5:%.*]])
+; CHECK-NEXT: to label [[EXIT:%.*]] [label %while.cond47]
+; CHECK: while.cond47:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED6:%.*]])
+; CHECK-NEXT: to label [[COND_TRUE49:%.*]] [label %while.cond47.target.cond.end61]
+; CHECK: cond.end61:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED7:%.*]])
+; CHECK-NEXT: to label [[COND_END61_TARGET_WHILE_BODY63:%.*]] [label %while.cond]
+; CHECK: if.then69:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED8:%.*]])
+; CHECK-NEXT: to label [[EXIT]] [label %while.cond]
+; CHECK: lor.rhs:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED9:%.*]])
+; CHECK-NEXT: to label [[LOR_RHS_TARGET_COND_END61:%.*]] [label %while.end76]
+; CHECK: while.end76:
+; CHECK-NEXT: callbr void asm "", ""()
+; CHECK-NEXT: to label [[EXIT]] []
+; CHECK: if.then39:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED10:%.*]])
+; CHECK-NEXT: to label [[EXIT]] [label %if.end.i145]
+; CHECK: if.end.i145:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED11:%.*]])
+; CHECK-NEXT: to label [[EXIT]] [label %if.end8.i149]
+; CHECK: if.end8.i149:
+; CHECK-NEXT: callbr void asm "", ""()
+; CHECK-NEXT: to label [[EXIT]] []
+; CHECK: if.then:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED12:%.*]])
+; CHECK-NEXT: to label [[EXIT]] [label %if.end.i]
+; CHECK: if.end.i:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED13:%.*]])
+; CHECK-NEXT: to label [[EXIT]] [label %if.end8.i]
+; CHECK: if.end8.i:
+; CHECK-NEXT: callbr void asm "", ""()
+; CHECK-NEXT: to label [[EXIT]] []
+; CHECK: exit:
+; CHECK-NEXT: ret void
+; CHECK: while.cond47.target.cond.end61:
+; CHECK-NEXT: br label [[IRR_GUARD:%.*]]
+; CHECK: lor.rhs.target.cond.end61:
+; CHECK-NEXT: br label [[IRR_GUARD]]
+; CHECK: while.cond.target.cond.true49:
+; CHECK-NEXT: br label [[IRR_GUARD]]
+; CHECK: irr.guard:
+; CHECK-NEXT: [[GUARD_COND_END61:%.*]] = phi i1 [ true, [[WHILE_COND47_TARGET_COND_END61:%.*]] ], [ true, [[LOR_RHS_TARGET_COND_END61]] ], [ false, [[WHILE_COND_TARGET_COND_TRUE49]] ]
+; CHECK-NEXT: br i1 [[GUARD_COND_END61]], label [[COND_END61:%.*]], label [[IRR_GUARD1:%.*]]
+; CHECK: cond.true49.target.while.body63:
+; CHECK-NEXT: br label [[IRR_GUARD1]]
+; CHECK: cond.end61.target.while.body63:
+; CHECK-NEXT: br label [[IRR_GUARD1]]
+; CHECK: irr.guard1:
+; CHECK-NEXT: [[GUARD_WHILE_BODY63:%.*]] = phi i1 [ true, [[COND_TRUE49_TARGET_WHILE_BODY63:%.*]] ], [ true, [[COND_END61_TARGET_WHILE_BODY63]] ], [ false, [[IRR_GUARD]] ]
+; CHECK-NEXT: br i1 [[GUARD_WHILE_BODY63]], label [[WHILE_BODY63:%.*]], label [[COND_TRUE49]]
+;
+entry:
+ callbr void asm "", "r,!i"(i1 %Pred0) to label %if.end [label %if.then]
+
+if.end:
+ callbr void asm "", "r,!i"(i1 %Pred1) to label %if.then7 [label %if.else]
+
+if.then7:
+ callbr void asm "", ""() to label %if.end16 []
+
+if.else:
+ callbr void asm "", ""() to label %if.end16 []
+
+if.end16:
+ callbr void asm "", "r,!i"(i1 %Pred2) to label %while.cond.preheader [label %if.then39]
+
+while.cond.preheader:
+ callbr void asm "", ""() to label %while.cond []
+
+while.cond:
+ callbr void asm "", "r,!i"(i1 %Pred3) to label %cond.true49 [label %lor.rhs]
+
+cond.true49:
+ callbr void asm "", "r,!i"(i1 %Pred4) to label %if.then69 [label %while.body63]
+
+while.body63:
+ callbr void asm "", "r,!i"(i1 %Pred5) to label %exit [label %while.cond47]
+
+while.cond47:
+ callbr void asm "", "r,!i"(i1 %Pred6) to label %cond.true49 [label %cond.end61]
+
+cond.end61:
+ callbr void asm "", "r,!i"(i1 %Pred7) to label %while.body63 [label %while.cond]
+
+if.then69:
+ callbr void asm "", "r,!i"(i1 %Pred8) to label %exit [label %while.cond]
+
+lor.rhs:
+ callbr void asm "", "r,!i"(i1 %Pred9) to label %cond.end61 [label %while.end76]
+
+while.end76:
+ callbr void asm "", ""() to label %exit []
+
+if.then39:
+ callbr void asm "", "r,!i"(i1 %Pred10) to label %exit [label %if.end.i145]
+
+if.end.i145:
+ callbr void asm "", "r,!i"(i1 %Pred11) to label %exit [label %if.end8.i149]
+
+if.end8.i149:
+ callbr void asm "", ""() to label %exit []
+
+if.then:
+ callbr void asm "", "r,!i"(i1 %Pred12) to label %exit [label %if.end.i]
+
+if.end.i:
+ callbr void asm "", "r,!i"(i1 %Pred13) to label %exit [label %if.end8.i]
+
+if.end8.i:
+ callbr void asm "", ""() to label %exit []
+
+exit:
+ ret void
+}
diff --git a/llvm/test/Transforms/FixIrreducible/unreachable.ll b/llvm/test/Transforms/FixIrreducible/unreachable.ll
index defbefb..845cf50 100644
--- a/llvm/test/Transforms/FixIrreducible/unreachable.ll
+++ b/llvm/test/Transforms/FixIrreducible/unreachable.ll
@@ -25,3 +25,26 @@ loop.latch:
loop.exit:
ret void
}
+
+; CHECK-LABEL: @unreachable_callbr(
+; CHECK: entry:
+; CHECK-NOT: irr.guard:
+define void @unreachable_callbr(i32 %n, i1 %arg) {
+entry:
+ callbr void asm "", ""() to label %loop.body []
+
+loop.body:
+ callbr void asm "", ""() to label %inner.block []
+
+unreachable.block:
+ callbr void asm "", ""() to label %inner.block []
+
+inner.block:
+ callbr void asm "", "r,!i"(i1 %arg) to label %loop.exit [label %loop.latch]
+
+loop.latch:
+ callbr void asm "", ""() to label %loop.body []
+
+loop.exit:
+ ret void
+}
diff --git a/llvm/test/Transforms/GVN/assume-equal.ll b/llvm/test/Transforms/GVN/assume-equal.ll
index bbbc5c5..a389801 100644
--- a/llvm/test/Transforms/GVN/assume-equal.ll
+++ b/llvm/test/Transforms/GVN/assume-equal.ll
@@ -1,6 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt < %s -passes=gvn -S | FileCheck %s
+target datalayout = "p1:64:64:64:32"
+
%struct.A = type { ptr }
@_ZTV1A = available_externally unnamed_addr constant [4 x ptr] [ptr null, ptr @_ZTI1A, ptr @_ZN1A3fooEv, ptr @_ZN1A3barEv], align 8
@_ZTI1A = external constant ptr
@@ -372,6 +374,20 @@ define i1 @assume_ptr_eq_different_prov_does_not_matter_icmp(ptr %p, ptr %p2) {
ret i1 %c
}
+define i1 @assume_ptr_eq_different_prov_does_not_matter_icmp_addrsize(ptr addrspace(1) %p, ptr addrspace(1) %p2) {
+; CHECK-LABEL: define i1 @assume_ptr_eq_different_prov_does_not_matter_icmp_addrsize(
+; CHECK-SAME: ptr addrspace(1) [[P:%.*]], ptr addrspace(1) [[P2:%.*]]) {
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr addrspace(1) [[P]], [[P2]]
+; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]])
+; CHECK-NEXT: [[C:%.*]] = icmp eq ptr addrspace(1) [[P]], null
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %cmp = icmp eq ptr addrspace(1) %p, %p2
+ call void @llvm.assume(i1 %cmp)
+ %c = icmp eq ptr addrspace(1) %p2, null
+ ret i1 %c
+}
+
; This is not correct, as it may change the provenance exposed by ptrtoint.
; We still allow it for now.
define i64 @assume_ptr_eq_different_prov_does_not_matter_ptrtoint(ptr %p, ptr %p2) {
@@ -388,6 +404,20 @@ define i64 @assume_ptr_eq_different_prov_does_not_matter_ptrtoint(ptr %p, ptr %p
ret i64 %int
}
+define i64 @assume_ptr_eq_different_prov_does_not_matter_ptrtoint_addrsize(ptr addrspace(1) %p, ptr addrspace(1) %p2) {
+; CHECK-LABEL: define i64 @assume_ptr_eq_different_prov_does_not_matter_ptrtoint_addrsize(
+; CHECK-SAME: ptr addrspace(1) [[P:%.*]], ptr addrspace(1) [[P2:%.*]]) {
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr addrspace(1) [[P]], [[P2]]
+; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]])
+; CHECK-NEXT: [[INT:%.*]] = ptrtoint ptr addrspace(1) [[P]] to i64
+; CHECK-NEXT: ret i64 [[INT]]
+;
+ %cmp = icmp eq ptr addrspace(1) %p, %p2
+ call void @llvm.assume(i1 %cmp)
+ %int = ptrtoint ptr addrspace(1) %p2 to i64
+ ret i64 %int
+}
+
define i64 @assume_ptr_eq_different_prov_does_not_matter_ptrtoaddr(ptr %p, ptr %p2) {
; CHECK-LABEL: define i64 @assume_ptr_eq_different_prov_does_not_matter_ptrtoaddr(
; CHECK-SAME: ptr [[P:%.*]], ptr [[P2:%.*]]) {
@@ -402,6 +432,20 @@ define i64 @assume_ptr_eq_different_prov_does_not_matter_ptrtoaddr(ptr %p, ptr %
ret i64 %int
}
+define i32 @assume_ptr_eq_different_prov_does_not_matter_ptrtoaddr_addrsize(ptr addrspace(1) %p, ptr addrspace(1) %p2) {
+; CHECK-LABEL: define i32 @assume_ptr_eq_different_prov_does_not_matter_ptrtoaddr_addrsize(
+; CHECK-SAME: ptr addrspace(1) [[P:%.*]], ptr addrspace(1) [[P2:%.*]]) {
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr addrspace(1) [[P]], [[P2]]
+; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]])
+; CHECK-NEXT: [[INT:%.*]] = ptrtoaddr ptr addrspace(1) [[P]] to i32
+; CHECK-NEXT: ret i32 [[INT]]
+;
+ %cmp = icmp eq ptr addrspace(1) %p, %p2
+ call void @llvm.assume(i1 %cmp)
+ %int = ptrtoaddr ptr addrspace(1) %p2 to i32
+ ret i32 %int
+}
+
define i8 @assume_ptr_eq_same_prov(ptr %p, i64 %x) {
; CHECK-LABEL: define i8 @assume_ptr_eq_same_prov(
; CHECK-SAME: ptr [[P:%.*]], i64 [[X:%.*]]) {
diff --git a/llvm/test/Transforms/IndVarSimplify/AMDGPU/addrspace-7-doesnt-crash.ll b/llvm/test/Transforms/IndVarSimplify/AMDGPU/addrspace-7-doesnt-crash.ll
index 08dcf1d..8e932e0 100644
--- a/llvm/test/Transforms/IndVarSimplify/AMDGPU/addrspace-7-doesnt-crash.ll
+++ b/llvm/test/Transforms/IndVarSimplify/AMDGPU/addrspace-7-doesnt-crash.ll
@@ -7,11 +7,11 @@ define void @f(ptr addrspace(7) %arg) {
; CHECK-LABEL: define void @f
; CHECK-SAME: (ptr addrspace(7) [[ARG:%.*]]) {
; CHECK-NEXT: bb:
+; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr addrspace(7) [[ARG]], i32 8
; CHECK-NEXT: br label [[BB1:%.*]]
; CHECK: bb1:
; CHECK-NEXT: br i1 false, label [[BB2:%.*]], label [[BB1]]
; CHECK: bb2:
-; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr addrspace(7) [[ARG]], i32 8
; CHECK-NEXT: br label [[BB3:%.*]]
; CHECK: bb3:
; CHECK-NEXT: [[I4:%.*]] = load i32, ptr addrspace(7) [[SCEVGEP]], align 4
diff --git a/llvm/test/Transforms/IndVarSimplify/ARM/code-size.ll b/llvm/test/Transforms/IndVarSimplify/ARM/code-size.ll
index 2003b1a..3c6535d 100644
--- a/llvm/test/Transforms/IndVarSimplify/ARM/code-size.ll
+++ b/llvm/test/Transforms/IndVarSimplify/ARM/code-size.ll
@@ -4,33 +4,31 @@
define i32 @remove_loop(i32 %size) #0 {
; CHECK-V8M-LABEL: @remove_loop(
-; CHECK-V8M-SAME: i32 [[SIZE:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-V8M-NEXT: entry:
-; CHECK-V8M-NEXT: br label %[[WHILE_COND:.*]]
-; CHECK-V8M: while.cond:
-; CHECK-V8M-NEXT: br i1 false, label %[[WHILE_COND]], label %[[WHILE_END:.*]]
-; CHECK-V8M: while.end:
-; CHECK-V8M-NEXT: [[TMP0:%.*]] = add i32 [[SIZE]], 31
+; CHECK-V8M-NEXT: [[TMP0:%.*]] = add i32 [[SIZE:%.*]], 31
; CHECK-V8M-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SIZE]], i32 31)
; CHECK-V8M-NEXT: [[TMP1:%.*]] = sub i32 [[TMP0]], [[UMIN]]
; CHECK-V8M-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP1]], 5
; CHECK-V8M-NEXT: [[TMP3:%.*]] = shl nuw i32 [[TMP2]], 5
; CHECK-V8M-NEXT: [[TMP4:%.*]] = sub i32 [[SIZE]], [[TMP3]]
+; CHECK-V8M-NEXT: br label [[WHILE_COND:%.*]]
+; CHECK-V8M: while.cond:
+; CHECK-V8M-NEXT: br i1 false, label [[WHILE_COND]], label [[WHILE_END:%.*]]
+; CHECK-V8M: while.end:
; CHECK-V8M-NEXT: ret i32 [[TMP4]]
;
; CHECK-V8A-LABEL: @remove_loop(
-; CHECK-V8A-SAME: i32 [[SIZE:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-V8A-NEXT: entry:
-; CHECK-V8A-NEXT: br label %[[WHILE_COND:.*]]
-; CHECK-V8A: while.cond:
-; CHECK-V8A-NEXT: br i1 false, label %[[WHILE_COND]], label %[[WHILE_END:.*]]
-; CHECK-V8A: while.end:
-; CHECK-V8A-NEXT: [[TMP0:%.*]] = add i32 [[SIZE]], 31
+; CHECK-V8A-NEXT: [[TMP0:%.*]] = add i32 [[SIZE:%.*]], 31
; CHECK-V8A-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SIZE]], i32 31)
; CHECK-V8A-NEXT: [[TMP1:%.*]] = sub i32 [[TMP0]], [[UMIN]]
; CHECK-V8A-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP1]], 5
; CHECK-V8A-NEXT: [[TMP3:%.*]] = shl nuw i32 [[TMP2]], 5
; CHECK-V8A-NEXT: [[TMP4:%.*]] = sub i32 [[SIZE]], [[TMP3]]
+; CHECK-V8A-NEXT: br label [[WHILE_COND:%.*]]
+; CHECK-V8A: while.cond:
+; CHECK-V8A-NEXT: br i1 false, label [[WHILE_COND]], label [[WHILE_END:%.*]]
+; CHECK-V8A: while.end:
; CHECK-V8A-NEXT: ret i32 [[TMP4]]
;
entry:
diff --git a/llvm/test/Transforms/IndVarSimplify/ARM/indvar-unroll-imm-cost.ll b/llvm/test/Transforms/IndVarSimplify/ARM/indvar-unroll-imm-cost.ll
index 2261423..382f026 100644
--- a/llvm/test/Transforms/IndVarSimplify/ARM/indvar-unroll-imm-cost.ll
+++ b/llvm/test/Transforms/IndVarSimplify/ARM/indvar-unroll-imm-cost.ll
@@ -77,6 +77,8 @@ define dso_local arm_aapcscc void @test(ptr nocapture %pDest, ptr nocapture read
; CHECK-NEXT: [[CMP2780:%.*]] = icmp ugt i32 [[ADD25]], [[J_0_LCSSA]]
; CHECK-NEXT: br i1 [[CMP2780]], label [[FOR_BODY29_PREHEADER:%.*]], label [[FOR_END40]]
; CHECK: for.body29.preheader:
+; CHECK-NEXT: [[TMP10:%.*]] = sub nsw i32 [[ADD25]], [[J_0_LCSSA]]
+; CHECK-NEXT: [[SCEVGEP93:%.*]] = getelementptr i16, ptr [[PSRCB_ADDR_1_LCSSA]], i32 [[TMP10]]
; CHECK-NEXT: br label [[FOR_BODY29:%.*]]
; CHECK: for.body29:
; CHECK-NEXT: [[J_184:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY29]] ], [ [[J_0_LCSSA]], [[FOR_BODY29_PREHEADER]] ]
@@ -100,8 +102,6 @@ define dso_local arm_aapcscc void @test(ptr nocapture %pDest, ptr nocapture read
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[ADD25]]
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END40_LOOPEXIT:%.*]], label [[FOR_BODY29]]
; CHECK: for.end40.loopexit:
-; CHECK-NEXT: [[TMP10:%.*]] = sub nsw i32 [[ADD25]], [[J_0_LCSSA]]
-; CHECK-NEXT: [[SCEVGEP93:%.*]] = getelementptr i16, ptr [[PSRCB_ADDR_1_LCSSA]], i32 [[TMP10]]
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i16, ptr [[PSRCA_ADDR_1_LCSSA]], i32 [[TMP10]]
; CHECK-NEXT: [[SCEVGEP94:%.*]] = getelementptr i32, ptr [[PDEST_ADDR_1_LCSSA]], i32 [[TMP10]]
; CHECK-NEXT: br label [[FOR_END40]]
diff --git a/llvm/test/Transforms/IndVarSimplify/X86/inner-loop-by-latch-cond.ll b/llvm/test/Transforms/IndVarSimplify/X86/inner-loop-by-latch-cond.ll
index 0fa6e34..0eb9deb 100644
--- a/llvm/test/Transforms/IndVarSimplify/X86/inner-loop-by-latch-cond.ll
+++ b/llvm/test/Transforms/IndVarSimplify/X86/inner-loop-by-latch-cond.ll
@@ -14,6 +14,7 @@ define void @test(i64 %a) {
; CHECK: outer_header:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[OUTER_LATCH:%.*]] ], [ 21, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[I:%.*]] = phi i64 [ 20, [[ENTRY]] ], [ [[I_NEXT:%.*]], [[OUTER_LATCH]] ]
+; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1
; CHECK-NEXT: br label [[INNER_HEADER:%.*]]
; CHECK: inner_header:
; CHECK-NEXT: [[J:%.*]] = phi i64 [ 1, [[OUTER_HEADER]] ], [ [[J_NEXT:%.*]], [[INNER_HEADER]] ]
@@ -22,7 +23,6 @@ define void @test(i64 %a) {
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[J_NEXT]], [[INDVARS_IV]]
; CHECK-NEXT: br i1 [[EXITCOND]], label [[INNER_HEADER]], label [[OUTER_LATCH]]
; CHECK: outer_latch:
-; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1
; CHECK-NEXT: [[COND2:%.*]] = icmp ne i64 [[I_NEXT]], 40
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; CHECK-NEXT: br i1 [[COND2]], label [[OUTER_HEADER]], label [[RETURN:%.*]]
diff --git a/llvm/test/Transforms/IndVarSimplify/exit-count-select.ll b/llvm/test/Transforms/IndVarSimplify/exit-count-select.ll
index 1592b84..829092f 100644
--- a/llvm/test/Transforms/IndVarSimplify/exit-count-select.ll
+++ b/llvm/test/Transforms/IndVarSimplify/exit-count-select.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -passes=indvars -S | FileCheck %s
+; RUN: opt < %s -passes='require<scalar-evolution>,indvars,loop-mssa(licm)' -S | FileCheck %s
define i32 @logical_and_2ops(i32 %n, i32 %m) {
; CHECK-LABEL: @logical_and_2ops(
@@ -56,10 +56,10 @@ define i32 @logical_and_3ops(i32 %n, i32 %m, i32 %k) {
; CHECK: loop:
; CHECK-NEXT: br i1 false, label [[LOOP]], label [[EXIT:%.*]]
; CHECK: exit:
-; CHECK-NEXT: [[TMP0:%.*]] = freeze i32 [[K:%.*]]
; CHECK-NEXT: [[TMP1:%.*]] = freeze i32 [[M:%.*]]
-; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP0]], i32 [[TMP1]])
-; CHECK-NEXT: [[UMIN1:%.*]] = call i32 @llvm.umin.i32(i32 [[UMIN]], i32 [[N:%.*]])
+; CHECK-NEXT: [[N:%.*]] = freeze i32 [[K:%.*]]
+; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP1]], i32 [[N]])
+; CHECK-NEXT: [[UMIN1:%.*]] = call i32 @llvm.umin.i32(i32 [[UMIN]], i32 [[N1:%.*]])
; CHECK-NEXT: ret i32 [[UMIN1]]
;
entry:
@@ -84,10 +84,10 @@ define i32 @logical_or_3ops(i32 %n, i32 %m, i32 %k) {
; CHECK: loop:
; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[LOOP]]
; CHECK: exit:
-; CHECK-NEXT: [[TMP0:%.*]] = freeze i32 [[K:%.*]]
; CHECK-NEXT: [[TMP1:%.*]] = freeze i32 [[M:%.*]]
-; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP0]], i32 [[TMP1]])
-; CHECK-NEXT: [[UMIN1:%.*]] = call i32 @llvm.umin.i32(i32 [[UMIN]], i32 [[N:%.*]])
+; CHECK-NEXT: [[N:%.*]] = freeze i32 [[K:%.*]]
+; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP1]], i32 [[N]])
+; CHECK-NEXT: [[UMIN1:%.*]] = call i32 @llvm.umin.i32(i32 [[UMIN]], i32 [[N1:%.*]])
; CHECK-NEXT: ret i32 [[UMIN1]]
;
entry:
diff --git a/llvm/test/Transforms/IndVarSimplify/finite-exit-comparisons.ll b/llvm/test/Transforms/IndVarSimplify/finite-exit-comparisons.ll
index e006d9f..f798eb28 100644
--- a/llvm/test/Transforms/IndVarSimplify/finite-exit-comparisons.ll
+++ b/llvm/test/Transforms/IndVarSimplify/finite-exit-comparisons.ll
@@ -932,6 +932,9 @@ for.end: ; preds = %for.body, %entry
define i16 @ult_multiuse_profit(i16 %n.raw, i8 %start) mustprogress {
; CHECK-LABEL: @ult_multiuse_profit(
; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP2:%.*]] = add i8 [[START:%.*]], 1
+; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[TMP2]] to i16
+; CHECK-NEXT: [[UMAX:%.*]] = call i16 @llvm.umax.i16(i16 [[TMP1]], i16 254)
; CHECK-NEXT: [[TMP0:%.*]] = trunc i16 254 to i8
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
@@ -940,9 +943,6 @@ define i16 @ult_multiuse_profit(i16 %n.raw, i8 %start) mustprogress {
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[IV_NEXT]], [[TMP0]]
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]]
; CHECK: for.end:
-; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[START:%.*]], 1
-; CHECK-NEXT: [[TMP2:%.*]] = zext i8 [[TMP1]] to i16
-; CHECK-NEXT: [[UMAX:%.*]] = call i16 @llvm.umax.i16(i16 [[TMP2]], i16 254)
; CHECK-NEXT: ret i16 [[UMAX]]
;
entry:
diff --git a/llvm/test/Transforms/IndVarSimplify/pr116483.ll b/llvm/test/Transforms/IndVarSimplify/pr116483.ll
index 093e25a..e9e0d22 100644
--- a/llvm/test/Transforms/IndVarSimplify/pr116483.ll
+++ b/llvm/test/Transforms/IndVarSimplify/pr116483.ll
@@ -4,16 +4,16 @@
define i32 @test() {
; CHECK-LABEL: define i32 @test() {
; CHECK-NEXT: [[ENTRY:.*:]]
-; CHECK-NEXT: br label %[[LOOP_BODY:.*]]
-; CHECK: [[LOOP_BODY]]:
-; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[LOOP_BODY]]
-; CHECK: [[EXIT]]:
; CHECK-NEXT: [[XOR:%.*]] = xor i32 0, 3
; CHECK-NEXT: [[MUL:%.*]] = mul i32 [[XOR]], 329
; CHECK-NEXT: [[CONV:%.*]] = trunc i32 [[MUL]] to i16
; CHECK-NEXT: [[SEXT:%.*]] = shl i16 [[CONV]], 8
; CHECK-NEXT: [[CONV1:%.*]] = ashr i16 [[SEXT]], 8
; CHECK-NEXT: [[CONV3:%.*]] = zext i16 [[CONV1]] to i32
+; CHECK-NEXT: br label %[[LOOP_BODY:.*]]
+; CHECK: [[LOOP_BODY]]:
+; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[LOOP_BODY]]
+; CHECK: [[EXIT]]:
; CHECK-NEXT: ret i32 [[CONV3]]
;
entry:
diff --git a/llvm/test/Transforms/IndVarSimplify/pr24783.ll b/llvm/test/Transforms/IndVarSimplify/pr24783.ll
index c521bca..37ecf42 100644
--- a/llvm/test/Transforms/IndVarSimplify/pr24783.ll
+++ b/llvm/test/Transforms/IndVarSimplify/pr24783.ll
@@ -7,11 +7,11 @@ target triple = "powerpc64-unknown-linux-gnu"
define void @f(ptr %end.s, ptr %loc, i32 %p) {
; CHECK-LABEL: @f(
; CHECK-NEXT: entry:
+; CHECK-NEXT: [[END:%.*]] = getelementptr inbounds i32, ptr [[END_S:%.*]], i32 [[P:%.*]]
; CHECK-NEXT: br label [[WHILE_BODY_I:%.*]]
; CHECK: while.body.i:
; CHECK-NEXT: br i1 true, label [[LOOP_EXIT:%.*]], label [[WHILE_BODY_I]]
; CHECK: loop.exit:
-; CHECK-NEXT: [[END:%.*]] = getelementptr inbounds i32, ptr [[END_S:%.*]], i32 [[P:%.*]]
; CHECK-NEXT: store ptr [[END]], ptr [[LOC:%.*]], align 8
; CHECK-NEXT: ret void
;
diff --git a/llvm/test/Transforms/IndVarSimplify/pr39673.ll b/llvm/test/Transforms/IndVarSimplify/pr39673.ll
index 7b093b3..3cee1ab 100644
--- a/llvm/test/Transforms/IndVarSimplify/pr39673.ll
+++ b/llvm/test/Transforms/IndVarSimplify/pr39673.ll
@@ -148,6 +148,7 @@ loop2.end: ; preds = %loop2
define i16 @neg_loop_carried(i16 %arg) {
; CHECK-LABEL: @neg_loop_carried(
; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = add i16 [[ARG:%.*]], 2
; CHECK-NEXT: br label [[LOOP1:%.*]]
; CHECK: loop1:
; CHECK-NEXT: [[L1:%.*]] = phi i16 [ 0, [[ENTRY:%.*]] ], [ [[L1_ADD:%.*]], [[LOOP1]] ]
@@ -155,7 +156,6 @@ define i16 @neg_loop_carried(i16 %arg) {
; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i16 [[L1_ADD]], 2
; CHECK-NEXT: br i1 [[CMP1]], label [[LOOP1]], label [[LOOP2_PREHEADER:%.*]]
; CHECK: loop2.preheader:
-; CHECK-NEXT: [[TMP0:%.*]] = add i16 [[ARG:%.*]], 2
; CHECK-NEXT: br label [[LOOP2:%.*]]
; CHECK: loop2:
; CHECK-NEXT: [[K2:%.*]] = phi i16 [ [[K2_ADD:%.*]], [[LOOP2]] ], [ [[TMP0]], [[LOOP2_PREHEADER]] ]
diff --git a/llvm/test/Transforms/IndVarSimplify/pr63763.ll b/llvm/test/Transforms/IndVarSimplify/pr63763.ll
index 427db1e..a5fde67 100644
--- a/llvm/test/Transforms/IndVarSimplify/pr63763.ll
+++ b/llvm/test/Transforms/IndVarSimplify/pr63763.ll
@@ -16,13 +16,13 @@ define i32 @test(i1 %c) {
; CHECK-NEXT: [[CONV2:%.*]] = ashr exact i32 [[SEXT]], 24
; CHECK-NEXT: [[INVARIANT_OP:%.*]] = sub nsw i32 7, [[CONV2]]
; CHECK-NEXT: call void @use(i32 [[INVARIANT_OP]])
+; CHECK-NEXT: [[SEXT_US:%.*]] = shl i32 [[SEL]], 24
+; CHECK-NEXT: [[CONV2_US:%.*]] = ashr exact i32 [[SEXT_US]], 24
+; CHECK-NEXT: [[INVARIANT_OP_US:%.*]] = sub nsw i32 7, [[CONV2_US]]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[LOOP]]
; CHECK: exit:
-; CHECK-NEXT: [[SEXT_US:%.*]] = shl i32 [[SEL]], 24
-; CHECK-NEXT: [[CONV2_US:%.*]] = ashr exact i32 [[SEXT_US]], 24
-; CHECK-NEXT: [[INVARIANT_OP_US:%.*]] = sub nsw i32 7, [[CONV2_US]]
; CHECK-NEXT: ret i32 [[INVARIANT_OP_US]]
;
entry:
diff --git a/llvm/test/Transforms/IndVarSimplify/replace-loop-exit-folds.ll b/llvm/test/Transforms/IndVarSimplify/replace-loop-exit-folds.ll
index b3162de..7cdc98a 100644
--- a/llvm/test/Transforms/IndVarSimplify/replace-loop-exit-folds.ll
+++ b/llvm/test/Transforms/IndVarSimplify/replace-loop-exit-folds.ll
@@ -4,22 +4,21 @@
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
define i32 @remove_loop(i32 %size) {
-; CHECK-LABEL: define i32 @remove_loop(
-; CHECK-SAME: i32 [[SIZE:%.*]]) {
-; CHECK-NEXT: [[ENTRY:.*]]:
-; CHECK-NEXT: br label %[[WHILE_COND:.*]]
-; CHECK: [[WHILE_COND]]:
-; CHECK-NEXT: [[SIZE_ADDR_0:%.*]] = phi i32 [ [[SIZE]], %[[ENTRY]] ], [ [[SUB:%.*]], %[[WHILE_COND]] ]
-; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[SIZE_ADDR_0]], 31
-; CHECK-NEXT: [[SUB]] = add i32 [[SIZE_ADDR_0]], -32
-; CHECK-NEXT: br i1 [[CMP]], label %[[WHILE_COND]], label %[[WHILE_END:.*]]
-; CHECK: [[WHILE_END]]:
-; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[SIZE]], 31
+; CHECK-LABEL: @remove_loop(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[SIZE:%.*]], 31
; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SIZE]], i32 31)
; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[TMP0]], [[UMIN]]
; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP1]], 5
; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i32 [[TMP2]], 5
; CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[SIZE]], [[TMP3]]
+; CHECK-NEXT: br label [[WHILE_COND:%.*]]
+; CHECK: while.cond:
+; CHECK-NEXT: [[SIZE_ADDR_0:%.*]] = phi i32 [ [[SIZE]], [[ENTRY:%.*]] ], [ [[SUB:%.*]], [[WHILE_COND]] ]
+; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[SIZE_ADDR_0]], 31
+; CHECK-NEXT: [[SUB]] = add i32 [[SIZE_ADDR_0]], -32
+; CHECK-NEXT: br i1 [[CMP]], label [[WHILE_COND]], label [[WHILE_END:%.*]]
+; CHECK: while.end:
; CHECK-NEXT: ret i32 [[TMP4]]
;
entry:
diff --git a/llvm/test/Transforms/IndVarSimplify/rewrite-loop-exit-values-phi.ll b/llvm/test/Transforms/IndVarSimplify/rewrite-loop-exit-values-phi.ll
index 84ae79d..41fce36 100644
--- a/llvm/test/Transforms/IndVarSimplify/rewrite-loop-exit-values-phi.ll
+++ b/llvm/test/Transforms/IndVarSimplify/rewrite-loop-exit-values-phi.ll
@@ -76,6 +76,10 @@ define i64 @narow_canonical_iv_wide_multiplied_iv(i32 %x, i64 %y, ptr %0) {
; CHECK-LABEL: @narow_canonical_iv_wide_multiplied_iv(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[SMAX:%.*]] = tail call i32 @llvm.smax.i32(i32 [[X:%.*]], i32 1)
+; CHECK-NEXT: [[TMP1:%.*]] = zext nneg i32 [[SMAX]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[Y:%.*]], [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP2]], 1
+; CHECK-NEXT: [[TMP6:%.*]] = add nuw nsw i64 [[TMP3]], 1
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
@@ -84,10 +88,6 @@ define i64 @narow_canonical_iv_wide_multiplied_iv(i32 %x, i64 %y, ptr %0) {
; CHECK-NEXT: [[EC:%.*]] = icmp ne i32 [[IV_NEXT]], [[SMAX]]
; CHECK-NEXT: br i1 [[EC]], label [[LOOP]], label [[EXIT:%.*]]
; CHECK: exit:
-; CHECK-NEXT: [[TMP1:%.*]] = zext nneg i32 [[SMAX]] to i64
-; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[Y:%.*]], [[TMP1]]
-; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP2]], 1
-; CHECK-NEXT: [[TMP6:%.*]] = add nuw nsw i64 [[TMP3]], 1
; CHECK-NEXT: ret i64 [[TMP6]]
;
entry:
diff --git a/llvm/test/Transforms/IndVarSimplify/scev-expander-preserve-lcssa.ll b/llvm/test/Transforms/IndVarSimplify/scev-expander-preserve-lcssa.ll
index 14e06fe..aca553e 100644
--- a/llvm/test/Transforms/IndVarSimplify/scev-expander-preserve-lcssa.ll
+++ b/llvm/test/Transforms/IndVarSimplify/scev-expander-preserve-lcssa.ll
@@ -23,8 +23,8 @@ define void @test1(i8 %x, ptr %ptr) {
; CHECK-NEXT: br label [[WHILE_COND192:%.*]]
; CHECK: while.cond192:
; CHECK-NEXT: switch i8 [[X:%.*]], label [[WHILE_BODY205:%.*]] [
-; CHECK-NEXT: i8 59, label [[WHILE_COND215_PREHEADER:%.*]]
-; CHECK-NEXT: i8 10, label [[IF_END224_LOOPEXIT1:%.*]]
+; CHECK-NEXT: i8 59, label [[WHILE_COND215_PREHEADER:%.*]]
+; CHECK-NEXT: i8 10, label [[IF_END224_LOOPEXIT1:%.*]]
; CHECK-NEXT: ]
; CHECK: while.cond215.preheader:
; CHECK-NEXT: br label [[WHILE_COND215:%.*]]
@@ -103,8 +103,8 @@ define void @test2(i16 %x) {
; CHECK-NEXT: br label [[FOR_COND:%.*]]
; CHECK: for.cond:
; CHECK-NEXT: switch i16 [[X:%.*]], label [[RETURN_LOOPEXIT1:%.*]] [
-; CHECK-NEXT: i16 41, label [[FOR_END:%.*]]
-; CHECK-NEXT: i16 43, label [[FOR_COND]]
+; CHECK-NEXT: i16 41, label [[FOR_END:%.*]]
+; CHECK-NEXT: i16 43, label [[FOR_COND]]
; CHECK-NEXT: ]
; CHECK: for.end:
; CHECK-NEXT: [[I_0_LCSSA2:%.*]] = phi i32 [ 0, [[FOR_COND]] ]
@@ -336,6 +336,7 @@ if.end1824: ; preds = %for.end1326
define void @test5(ptr %header, i32 %conv, i8 %n) {
; CHECK-LABEL: @test5(
; CHECK-NEXT: entry:
+; CHECK-NEXT: [[SHL:%.*]] = shl nuw nsw i32 [[CONV:%.*]], 2
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: br label [[FOR_INNER:%.*]]
@@ -358,7 +359,6 @@ define void @test5(ptr %header, i32 %conv, i8 %n) {
; CHECK-NEXT: br i1 false, label [[FOR_BODY]], label [[WHILE_COND_PREHEADER:%.*]]
; CHECK: while.cond.preheader:
; CHECK-NEXT: [[ADD85_LCSSA:%.*]] = phi i32 [ [[ADD85]], [[FOR_INC]] ]
-; CHECK-NEXT: [[SHL:%.*]] = shl nuw nsw i32 [[CONV:%.*]], 2
; CHECK-NEXT: br label [[WHILE_COND:%.*]]
; CHECK: while.cond:
; CHECK-NEXT: [[POS_8:%.*]] = phi i32 [ [[INC114:%.*]], [[WHILE_BODY:%.*]] ], [ [[ADD85_LCSSA]], [[WHILE_COND_PREHEADER]] ]
@@ -427,8 +427,8 @@ define void @test6(i8 %x) {
; CHECK-NEXT: br label [[WHILE_COND192:%.*]]
; CHECK: while.cond192:
; CHECK-NEXT: switch i8 [[X:%.*]], label [[WHILE_BODY205:%.*]] [
-; CHECK-NEXT: i8 59, label [[WHILE_COND215_PREHEADER:%.*]]
-; CHECK-NEXT: i8 10, label [[IF_END224:%.*]]
+; CHECK-NEXT: i8 59, label [[WHILE_COND215_PREHEADER:%.*]]
+; CHECK-NEXT: i8 10, label [[IF_END224:%.*]]
; CHECK-NEXT: ]
; CHECK: while.cond215.preheader:
; CHECK-NEXT: [[I_7_LCSSA:%.*]] = phi i32 [ 0, [[WHILE_COND192]] ]
diff --git a/llvm/test/Transforms/IndVarSimplify/scev-invalidation.ll b/llvm/test/Transforms/IndVarSimplify/scev-invalidation.ll
index a92d328..ad69812 100644
--- a/llvm/test/Transforms/IndVarSimplify/scev-invalidation.ll
+++ b/llvm/test/Transforms/IndVarSimplify/scev-invalidation.ll
@@ -46,12 +46,12 @@ for.end106: ; preds = %for.cond
define i32 @test_pr58439(i32 %a) {
; CHECK-LABEL: @test_pr58439(
; CHECK-NEXT: entry:
+; CHECK-NEXT: [[OR:%.*]] = or i32 [[A:%.*]], 1
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: br i1 false, label [[LOOP]], label [[EXIT:%.*]]
; CHECK: exit:
; CHECK-NEXT: [[C_EXT_LCSSA:%.*]] = phi i32 [ 0, [[LOOP]] ]
-; CHECK-NEXT: [[OR:%.*]] = or i32 [[A:%.*]], 1
; CHECK-NEXT: [[RES:%.*]] = add i32 [[C_EXT_LCSSA]], [[OR]]
; CHECK-NEXT: ret i32 [[RES]]
;
@@ -76,6 +76,7 @@ define i8 @l(i32 %inc, i1 %tobool.not.i) {
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[OUTER_HEADER:%.*]]
; CHECK: outer.header:
+; CHECK-NEXT: [[AND:%.*]] = and i32 1, [[INC:%.*]]
; CHECK-NEXT: br label [[INNER:%.*]]
; CHECK: inner:
; CHECK-NEXT: [[C_05_I:%.*]] = phi i32 [ [[INC_I:%.*]], [[INNER]] ], [ 0, [[OUTER_HEADER]] ]
@@ -86,7 +87,6 @@ define i8 @l(i32 %inc, i1 %tobool.not.i) {
; CHECK: outer.latch:
; CHECK-NEXT: [[C_05_I_LCSSA:%.*]] = phi i32 [ [[C_05_I]], [[INNER]] ]
; CHECK-NEXT: [[LCSSA:%.*]] = phi i32 [ 0, [[INNER]] ]
-; CHECK-NEXT: [[AND:%.*]] = and i32 1, [[INC:%.*]]
; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[AND]] to i8
; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[C_05_I_LCSSA]] to i8
; CHECK-NEXT: [[TMP2:%.*]] = sub i8 [[TMP0]], [[TMP1]]
diff --git a/llvm/test/Transforms/IndVarSimplify/sentinel.ll b/llvm/test/Transforms/IndVarSimplify/sentinel.ll
index 5234141..4f12308 100644
--- a/llvm/test/Transforms/IndVarSimplify/sentinel.ll
+++ b/llvm/test/Transforms/IndVarSimplify/sentinel.ll
@@ -9,19 +9,19 @@ define void @test(i1 %arg) personality ptr @snork {
; CHECK-NEXT: bb:
; CHECK-NEXT: br label [[BB4:%.*]]
; CHECK: bb1:
-; CHECK-NEXT: [[INDVARS_IV_NEXT:%.*]] = add i32 [[INDVARS_IV:%.*]], 1
-; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[TMP6:%.*]], [[INDVARS_IV]]
-; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[TMP0]], [[SMAX:%.*]]
; CHECK-NEXT: br i1 [[ARG:%.*]], label [[BB2:%.*]], label [[BB4]]
; CHECK: bb2:
-; CHECK-NEXT: [[TMP3:%.*]] = phi i32 [ [[TMP1]], [[BB1:%.*]] ]
+; CHECK-NEXT: [[TMP3:%.*]] = phi i32 [ [[TMP1:%.*]], [[BB1:%.*]] ]
; CHECK-NEXT: ret void
; CHECK: bb4:
-; CHECK-NEXT: [[INDVARS_IV]] = phi i32 [ [[INDVARS_IV_NEXT]], [[BB1]] ], [ undef, [[BB:%.*]] ]
-; CHECK-NEXT: [[SMAX]] = call i32 @llvm.smax.i32(i32 [[INDVARS_IV]], i32 36)
-; CHECK-NEXT: [[TMP6]] = invoke i32 @quux() [ "deopt"(i32 0, i32 0, i32 0, i32 180, i32 0, i32 25, i32 0, i32 7, ptr null, i32 7, ptr null, i32 7, ptr null, i32 3, i32 [[INDVARS_IV]], i32 3, i32 undef, i32 7, ptr null, i32 3, i32 undef, i32 3, i32 undef, i32 3, i32 undef, i32 3, i32 undef, i32 4, double undef, i32 7, ptr null, i32 4, i64 undef, i32 7, ptr null, i32 0, ptr addrspace(1) undef, i32 3, i32 undef, i32 0, ptr addrspace(1) undef, i32 0, ptr addrspace(1) undef, i32 0, ptr addrspace(1) undef, i32 0, ptr addrspace(1) undef, i32 0, ptr addrspace(1) undef, i32 0, ptr addrspace(1) undef, i32 0, ptr addrspace(1) undef, i32 0, ptr addrspace(1) undef, i32 7, ptr null) ]
+; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ [[INDVARS_IV_NEXT:%.*]], [[BB1]] ], [ undef, [[BB:%.*]] ]
+; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[INDVARS_IV]], i32 36)
+; CHECK-NEXT: [[TMP6:%.*]] = invoke i32 @quux() [ "deopt"(i32 0, i32 0, i32 0, i32 180, i32 0, i32 25, i32 0, i32 7, ptr null, i32 7, ptr null, i32 7, ptr null, i32 3, i32 [[INDVARS_IV]], i32 3, i32 undef, i32 7, ptr null, i32 3, i32 undef, i32 3, i32 undef, i32 3, i32 undef, i32 3, i32 undef, i32 4, double undef, i32 7, ptr null, i32 4, i64 undef, i32 7, ptr null, i32 0, ptr addrspace(1) undef, i32 3, i32 undef, i32 0, ptr addrspace(1) undef, i32 0, ptr addrspace(1) undef, i32 0, ptr addrspace(1) undef, i32 0, ptr addrspace(1) undef, i32 0, ptr addrspace(1) undef, i32 0, ptr addrspace(1) undef, i32 0, ptr addrspace(1) undef, i32 0, ptr addrspace(1) undef, i32 7, ptr null) ]
; CHECK-NEXT: to label [[BB7:%.*]] unwind label [[BB15:%.*]]
; CHECK: bb7:
+; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i32 [[INDVARS_IV]], 1
+; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[TMP6]], [[INDVARS_IV]]
+; CHECK-NEXT: [[TMP1]] = sub i32 [[TMP0]], [[SMAX]]
; CHECK-NEXT: br label [[BB9:%.*]]
; CHECK: bb9:
; CHECK-NEXT: br i1 true, label [[BB1]], label [[BB9]]
diff --git a/llvm/test/Transforms/IndVarSimplify/sink-from-preheader.ll b/llvm/test/Transforms/IndVarSimplify/sink-from-preheader.ll
deleted file mode 100644
index 89583f9..0000000
--- a/llvm/test/Transforms/IndVarSimplify/sink-from-preheader.ll
+++ /dev/null
@@ -1,32 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -passes=indvars -indvars-predicate-loops=0 -S | FileCheck %s
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
-target triple = "i386-apple-darwin10.0"
-
-; We make sinking here, Changed flag should be set properly.
-define i32 @test(i32 %a, i32 %b, i32 %N) {
-; CHECK-LABEL: @test(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: br label [[LOOP:%.*]]
-; CHECK: loop:
-; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
-; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[IV_NEXT]], [[N:%.*]]
-; CHECK-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]]
-; CHECK: exit:
-; CHECK-NEXT: [[ADD:%.*]] = add i32 [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT: ret i32 [[ADD]]
-;
-entry:
- %add = add i32 %a, %b
- br label %loop
-
-loop:
- %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
- %iv.next = add i32 %iv, 1
- %cmp = icmp slt i32 %iv.next, %N
- br i1 %cmp, label %loop, label %exit
-
-exit:
- ret i32 %add
-}
diff --git a/llvm/test/Transforms/IndVarSimplify/sink-trapping.ll b/llvm/test/Transforms/IndVarSimplify/sink-trapping.ll
deleted file mode 100644
index d2478be..0000000
--- a/llvm/test/Transforms/IndVarSimplify/sink-trapping.ll
+++ /dev/null
@@ -1,19 +0,0 @@
-; RUN: opt < %s -passes=indvars -S | FileCheck %s
-
-declare i1 @b()
-
-define i32 @a(i32 %x) nounwind {
-for.body.preheader:
- %y = sdiv i32 10, %x
- br label %for.body
-
-for.body:
- %cmp = call i1 @b()
- br i1 %cmp, label %for.body, label %for.end.loopexit
-
-for.end.loopexit:
- ret i32 %y
-}
-; CHECK: for.end.loopexit:
-; CHECK: sdiv
-; CHECK: ret
diff --git a/llvm/test/Transforms/IndVarSimplify/zext-nuw.ll b/llvm/test/Transforms/IndVarSimplify/zext-nuw.ll
index 17921af..abe7a3e 100644
--- a/llvm/test/Transforms/IndVarSimplify/zext-nuw.ll
+++ b/llvm/test/Transforms/IndVarSimplify/zext-nuw.ll
@@ -24,13 +24,13 @@ define void @_Z3fn1v() {
; CHECK-NEXT: [[X8:%.*]] = icmp ult i32 0, 4
; CHECK-NEXT: br i1 [[X8]], label [[DOTPREHEADER_LR_PH:%.*]], label [[X22]]
; CHECK: .preheader.lr.ph:
+; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[K_09]], i64 [[TMP5]]
; CHECK-NEXT: br label [[DOTPREHEADER:%.*]]
; CHECK: .preheader:
; CHECK-NEXT: br label [[X17:%.*]]
; CHECK: x17:
; CHECK-NEXT: br i1 false, label [[DOTPREHEADER]], label [[DOT_CRIT_EDGE_8:%.*]]
; CHECK: ._crit_edge.8:
-; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[K_09]], i64 [[TMP5]]
; CHECK-NEXT: br label [[X22]]
; CHECK: x22:
; CHECK-NEXT: [[K_1_LCSSA:%.*]] = phi ptr [ [[SCEVGEP]], [[DOT_CRIT_EDGE_8]] ], [ [[K_09]], [[DOTPREHEADER4]] ]
diff --git a/llvm/test/Transforms/InstCombine/vec_extract_var_elt-inseltpoison.ll b/llvm/test/Transforms/InstCombine/vec_extract_var_elt-inseltpoison.ll
deleted file mode 100644
index 9fcac80..0000000
--- a/llvm/test/Transforms/InstCombine/vec_extract_var_elt-inseltpoison.ll
+++ /dev/null
@@ -1,26 +0,0 @@
-; RUN: opt < %s -passes=instcombine -S | FileCheck %s
-
-define void @test (float %b, ptr %p) {
-; CHECK: extractelement
-; CHECK: fptosi
- %1 = load <8 x float> , ptr %p
- %2 = bitcast <8 x float> %1 to <8 x i32>
- %3 = bitcast <8 x i32> %2 to <8 x float>
- %a = fptosi <8 x float> %3 to <8 x i32>
- %4 = fptosi float %b to i32
- %5 = add i32 %4, -2
- %6 = extractelement <8 x i32> %a, i32 %5
- %7 = insertelement <8 x i32> poison, i32 %6, i32 7
- %8 = sitofp <8 x i32> %7 to <8 x float>
- store <8 x float> %8, ptr %p
- ret void
-}
-
-; PR18600
-define i32 @test2(i32 %i) {
- %e = extractelement <4 x i32> bitcast (<2 x i64> <i64 1, i64 2> to <4 x i32>), i32 %i
- ret i32 %e
-
-; CHECK-LABEL: @test2
-; CHECK: extractelement
-}
diff --git a/llvm/test/Transforms/InstCombine/vec_extract_var_elt.ll b/llvm/test/Transforms/InstCombine/vec_extract_var_elt.ll
index 32bf4da..205b4b8 100644
--- a/llvm/test/Transforms/InstCombine/vec_extract_var_elt.ll
+++ b/llvm/test/Transforms/InstCombine/vec_extract_var_elt.ll
@@ -1,26 +1,81 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
; RUN: opt < %s -passes=instcombine -S | FileCheck %s
-define void @test (float %b, ptr %p) {
-; CHECK: extractelement
-; CHECK: fptosi
- %1 = load <8 x float> , ptr %p
+define void @test_poison(float %b, ptr %p) {
+; CHECK-LABEL: define void @test_poison(
+; CHECK-SAME: float [[B:%.*]], ptr [[P:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x float>, ptr [[P]], align 32
+; CHECK-NEXT: [[TMP2:%.*]] = fptosi float [[B]] to i32
+; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], -2
+; CHECK-NEXT: [[TMP4:%.*]] = extractelement <8 x float> [[TMP1]], i32 [[TMP3]]
+; CHECK-NEXT: [[TMP5:%.*]] = fptosi float [[TMP4]] to i32
+; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i32> poison, i32 [[TMP5]], i64 7
+; CHECK-NEXT: [[TMP7:%.*]] = sitofp <8 x i32> [[TMP6]] to <8 x float>
+; CHECK-NEXT: store <8 x float> [[TMP7]], ptr [[P]], align 32
+; CHECK-NEXT: ret void
+;
+ %1 = load <8 x float>, ptr %p
%2 = bitcast <8 x float> %1 to <8 x i32>
%3 = bitcast <8 x i32> %2 to <8 x float>
%a = fptosi <8 x float> %3 to <8 x i32>
%4 = fptosi float %b to i32
%5 = add i32 %4, -2
%6 = extractelement <8 x i32> %a, i32 %5
- %7 = insertelement <8 x i32> undef, i32 %6, i32 7
+ %7 = insertelement <8 x i32> poison, i32 %6, i32 7
%8 = sitofp <8 x i32> %7 to <8 x float>
store <8 x float> %8, ptr %p
- ret void
+ ret void
}
; PR18600
-define i32 @test2(i32 %i) {
+define i32 @test_bitcast(i32 %i) {
+; CHECK-LABEL: define i32 @test_bitcast(
+; CHECK-SAME: i32 [[I:%.*]]) {
+; CHECK-NEXT: [[E:%.*]] = extractelement <4 x i32> <i32 1, i32 0, i32 2, i32 0>, i32 [[I]]
+; CHECK-NEXT: ret i32 [[E]]
+;
%e = extractelement <4 x i32> bitcast (<2 x i64> <i64 1, i64 2> to <4 x i32>), i32 %i
ret i32 %e
+}
+
+declare void @use(i32)
-; CHECK-LABEL: @test2
-; CHECK: extractelement
+define void @test_loop(<4 x float> %in) {
+; CHECK-LABEL: define void @test_loop(
+; CHECK-SAME: <4 x float> [[IN:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[R:%.*]] = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> [[IN]], i32 9)
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[NEXT:%.*]], %[[LATCH:.*]] ]
+; CHECK-NEXT: [[COND:%.*]] = icmp samesign ult i32 [[I]], 4
+; CHECK-NEXT: br i1 [[COND]], label %[[BODY:.*]], label %[[DONE:.*]]
+; CHECK: [[BODY]]:
+; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x float> [[R]], i32 [[I]]
+; CHECK-NEXT: [[ELEM:%.*]] = fptosi float [[TMP0]] to i32
+; CHECK-NEXT: call void @use(i32 [[ELEM]])
+; CHECK-NEXT: br label %[[LATCH]]
+; CHECK: [[LATCH]]:
+; CHECK-NEXT: [[NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT: br label %[[LOOP]]
+; CHECK: [[DONE]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ %r = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %in, i32 9)
+ %vi = fptosi <4 x float> %r to <4 x i32>
+ br label %loop
+loop:
+ %i = phi i32 [ 0, %entry ], [ %next, %latch ]
+ %cond = icmp ult i32 %i, 4
+ br i1 %cond, label %body, label %done
+body:
+ %elem = extractelement <4 x i32> %vi, i32 %i
+ call void @use(i32 %elem)
+ br label %latch
+latch:
+ %next = add i32 %i, 1
+ br label %loop
+done:
+ ret void
}
diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/vecreduce.ll b/llvm/test/Transforms/InstSimplify/ConstProp/vecreduce.ll
index 77a7f0d..479b3f8 100644
--- a/llvm/test/Transforms/InstSimplify/ConstProp/vecreduce.ll
+++ b/llvm/test/Transforms/InstSimplify/ConstProp/vecreduce.ll
@@ -12,8 +12,7 @@ define i32 @add_0() {
define i32 @add_0_scalable_vector() {
; CHECK-LABEL: @add_0_scalable_vector(
-; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.add.nxv8i32(<vscale x 8 x i32> zeroinitializer)
-; CHECK-NEXT: ret i32 [[X]]
+; CHECK-NEXT: ret i32 0
;
%x = call i32 @llvm.vector.reduce.add.nxv8i32(<vscale x 8 x i32> zeroinitializer)
ret i32 %x
@@ -89,8 +88,7 @@ define i32 @add_poison() {
define i32 @add_poison_scalable_vector() {
; CHECK-LABEL: @add_poison_scalable_vector(
-; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.add.nxv8i32(<vscale x 8 x i32> poison)
-; CHECK-NEXT: ret i32 [[X]]
+; CHECK-NEXT: ret i32 poison
;
%x = call i32 @llvm.vector.reduce.add.nxv8i32(<vscale x 8 x i32> poison)
ret i32 %x
@@ -123,8 +121,7 @@ define i32 @mul_0() {
define i32 @mul_0_scalable_vector() {
; CHECK-LABEL: @mul_0_scalable_vector(
-; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.mul.nxv8i32(<vscale x 8 x i32> zeroinitializer)
-; CHECK-NEXT: ret i32 [[X]]
+; CHECK-NEXT: ret i32 0
;
%x = call i32 @llvm.vector.reduce.mul.nxv8i32(<vscale x 8 x i32> zeroinitializer)
ret i32 %x
@@ -140,13 +137,29 @@ define i32 @mul_1() {
define i32 @mul_1_scalable_vector() {
; CHECK-LABEL: @mul_1_scalable_vector(
-; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.mul.nxv8i32(<vscale x 8 x i32> splat (i32 1))
-; CHECK-NEXT: ret i32 [[X]]
+; CHECK-NEXT: ret i32 1
;
%x = call i32 @llvm.vector.reduce.mul.nxv8i32(<vscale x 8 x i32> splat (i32 1))
ret i32 %x
}
+define i32 @mul_2() {
+; CHECK-LABEL: @mul_2(
+; CHECK-NEXT: ret i32 256
+;
+ %x = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>)
+ ret i32 %x
+}
+
+define i32 @mul_2_scalable_vector() {
+; CHECK-LABEL: @mul_2_scalable_vector(
+; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.mul.nxv8i32(<vscale x 8 x i32> splat (i32 2))
+; CHECK-NEXT: ret i32 [[X]]
+;
+ %x = call i32 @llvm.vector.reduce.mul.nxv8i32(<vscale x 8 x i32> splat (i32 2))
+ ret i32 %x
+}
+
define i32 @mul_inc() {
; CHECK-LABEL: @mul_inc(
; CHECK-NEXT: ret i32 40320
@@ -200,8 +213,7 @@ define i32 @mul_poison() {
define i32 @mul_poison_scalable_vector() {
; CHECK-LABEL: @mul_poison_scalable_vector(
-; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.mul.nxv8i32(<vscale x 8 x i32> poison)
-; CHECK-NEXT: ret i32 [[X]]
+; CHECK-NEXT: ret i32 poison
;
%x = call i32 @llvm.vector.reduce.mul.nxv8i32(<vscale x 8 x i32> poison)
ret i32 %x
@@ -225,8 +237,7 @@ define i32 @and_0() {
define i32 @and_0_scalable_vector() {
; CHECK-LABEL: @and_0_scalable_vector(
-; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.and.nxv8i32(<vscale x 8 x i32> zeroinitializer)
-; CHECK-NEXT: ret i32 [[X]]
+; CHECK-NEXT: ret i32 0
;
%x = call i32 @llvm.vector.reduce.and.nxv8i32(<vscale x 8 x i32> zeroinitializer)
ret i32 %x
@@ -242,8 +253,7 @@ define i32 @and_1() {
define i32 @and_1_scalable_vector() {
; CHECK-LABEL: @and_1_scalable_vector(
-; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.and.nxv8i32(<vscale x 8 x i32> splat (i32 1))
-; CHECK-NEXT: ret i32 [[X]]
+; CHECK-NEXT: ret i32 1
;
%x = call i32 @llvm.vector.reduce.and.nxv8i32(<vscale x 8 x i32> splat (i32 1))
ret i32 %x
@@ -302,8 +312,7 @@ define i32 @and_poison() {
define i32 @and_poison_scalable_vector() {
; CHECK-LABEL: @and_poison_scalable_vector(
-; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.and.nxv8i32(<vscale x 8 x i32> poison)
-; CHECK-NEXT: ret i32 [[X]]
+; CHECK-NEXT: ret i32 poison
;
%x = call i32 @llvm.vector.reduce.and.nxv8i32(<vscale x 8 x i32> poison)
ret i32 %x
@@ -327,8 +336,7 @@ define i32 @or_0() {
define i32 @or_0_scalable_vector() {
; CHECK-LABEL: @or_0_scalable_vector(
-; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.or.nxv8i32(<vscale x 8 x i32> zeroinitializer)
-; CHECK-NEXT: ret i32 [[X]]
+; CHECK-NEXT: ret i32 0
;
%x = call i32 @llvm.vector.reduce.or.nxv8i32(<vscale x 8 x i32> zeroinitializer)
ret i32 %x
@@ -344,8 +352,7 @@ define i32 @or_1() {
define i32 @or_1_scalable_vector() {
; CHECK-LABEL: @or_1_scalable_vector(
-; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.or.nxv8i32(<vscale x 8 x i32> splat (i32 1))
-; CHECK-NEXT: ret i32 [[X]]
+; CHECK-NEXT: ret i32 1
;
%x = call i32 @llvm.vector.reduce.or.nxv8i32(<vscale x 8 x i32> splat (i32 1))
ret i32 %x
@@ -404,8 +411,7 @@ define i32 @or_poison() {
define i32 @or_poison_scalable_vector() {
; CHECK-LABEL: @or_poison_scalable_vector(
-; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.or.nxv8i32(<vscale x 8 x i32> poison)
-; CHECK-NEXT: ret i32 [[X]]
+; CHECK-NEXT: ret i32 poison
;
%x = call i32 @llvm.vector.reduce.or.nxv8i32(<vscale x 8 x i32> poison)
ret i32 %x
@@ -429,8 +435,7 @@ define i32 @xor_0() {
define i32 @xor_0_scalable_vector() {
; CHECK-LABEL: @xor_0_scalable_vector(
-; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.xor.nxv8i32(<vscale x 8 x i32> zeroinitializer)
-; CHECK-NEXT: ret i32 [[X]]
+; CHECK-NEXT: ret i32 0
;
%x = call i32 @llvm.vector.reduce.xor.nxv8i32(<vscale x 8 x i32> zeroinitializer)
ret i32 %x
@@ -446,13 +451,21 @@ define i32 @xor_1() {
define i32 @xor_1_scalable_vector() {
; CHECK-LABEL: @xor_1_scalable_vector(
-; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.xor.nxv8i32(<vscale x 8 x i32> splat (i32 1))
-; CHECK-NEXT: ret i32 [[X]]
+; CHECK-NEXT: ret i32 0
;
%x = call i32 @llvm.vector.reduce.xor.nxv8i32(<vscale x 8 x i32> splat(i32 1))
ret i32 %x
}
+define i32 @xor_1_scalable_vector_lane_count_not_known_even() {
+; CHECK-LABEL: @xor_1_scalable_vector_lane_count_not_known_even(
+; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.xor.nxv1i32(<vscale x 1 x i32> splat (i32 1))
+; CHECK-NEXT: ret i32 [[X]]
+;
+ %x = call i32 @llvm.vector.reduce.xor.nxv8i32(<vscale x 1 x i32> splat(i32 1))
+ ret i32 %x
+}
+
define i32 @xor_inc() {
; CHECK-LABEL: @xor_inc(
; CHECK-NEXT: ret i32 10
@@ -506,8 +519,7 @@ define i32 @xor_poison() {
define i32 @xor_poison_scalable_vector() {
; CHECK-LABEL: @xor_poison_scalable_vector(
-; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.xor.nxv8i32(<vscale x 8 x i32> poison)
-; CHECK-NEXT: ret i32 [[X]]
+; CHECK-NEXT: ret i32 poison
;
%x = call i32 @llvm.vector.reduce.xor.nxv8i32(<vscale x 8 x i32> poison)
ret i32 %x
@@ -531,8 +543,7 @@ define i32 @smin_0() {
define i32 @smin_0_scalable_vector() {
; CHECK-LABEL: @smin_0_scalable_vector(
-; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.smin.nxv8i32(<vscale x 8 x i32> zeroinitializer)
-; CHECK-NEXT: ret i32 [[X]]
+; CHECK-NEXT: ret i32 0
;
%x = call i32 @llvm.vector.reduce.smin.nxv8i32(<vscale x 8 x i32> zeroinitializer)
ret i32 %x
@@ -548,8 +559,7 @@ define i32 @smin_1() {
define i32 @smin_1_scalable_vector() {
; CHECK-LABEL: @smin_1_scalable_vector(
-; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.smin.nxv8i32(<vscale x 8 x i32> splat (i32 1))
-; CHECK-NEXT: ret i32 [[X]]
+; CHECK-NEXT: ret i32 1
;
%x = call i32 @llvm.vector.reduce.smin.nxv8i32(<vscale x 8 x i32> splat(i32 1))
ret i32 %x
@@ -608,8 +618,7 @@ define i32 @smin_poison() {
define i32 @smin_poison_scalable_vector() {
; CHECK-LABEL: @smin_poison_scalable_vector(
-; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.smin.nxv8i32(<vscale x 8 x i32> poison)
-; CHECK-NEXT: ret i32 [[X]]
+; CHECK-NEXT: ret i32 poison
;
%x = call i32 @llvm.vector.reduce.smin.nxv8i32(<vscale x 8 x i32> poison)
ret i32 %x
@@ -633,8 +642,7 @@ define i32 @smax_0() {
define i32 @smax_0_scalable_vector() {
; CHECK-LABEL: @smax_0_scalable_vector(
-; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.smax.nxv8i32(<vscale x 8 x i32> zeroinitializer)
-; CHECK-NEXT: ret i32 [[X]]
+; CHECK-NEXT: ret i32 0
;
%x = call i32 @llvm.vector.reduce.smax.nxv8i32(<vscale x 8 x i32> zeroinitializer)
ret i32 %x
@@ -650,8 +658,7 @@ define i32 @smax_1() {
define i32 @smax_1_scalable_vector() {
; CHECK-LABEL: @smax_1_scalable_vector(
-; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.smax.nxv8i32(<vscale x 8 x i32> splat (i32 1))
-; CHECK-NEXT: ret i32 [[X]]
+; CHECK-NEXT: ret i32 1
;
%x = call i32 @llvm.vector.reduce.smax.nxv8i32(<vscale x 8 x i32> splat(i32 1))
ret i32 %x
@@ -710,8 +717,7 @@ define i32 @smax_poison() {
define i32 @smax_poison_scalable_vector() {
; CHECK-LABEL: @smax_poison_scalable_vector(
-; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.smax.nxv8i32(<vscale x 8 x i32> poison)
-; CHECK-NEXT: ret i32 [[X]]
+; CHECK-NEXT: ret i32 poison
;
%x = call i32 @llvm.vector.reduce.smax.nxv8i32(<vscale x 8 x i32> poison)
ret i32 %x
@@ -735,8 +741,7 @@ define i32 @umin_0() {
define i32 @umin_0_scalable_vector() {
; CHECK-LABEL: @umin_0_scalable_vector(
-; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.umin.nxv8i32(<vscale x 8 x i32> zeroinitializer)
-; CHECK-NEXT: ret i32 [[X]]
+; CHECK-NEXT: ret i32 0
;
%x = call i32 @llvm.vector.reduce.umin.nxv8i32(<vscale x 8 x i32> zeroinitializer)
ret i32 %x
@@ -752,8 +757,7 @@ define i32 @umin_1() {
define i32 @umin_1_scalable_vector() {
; CHECK-LABEL: @umin_1_scalable_vector(
-; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.umin.nxv8i32(<vscale x 8 x i32> splat (i32 1))
-; CHECK-NEXT: ret i32 [[X]]
+; CHECK-NEXT: ret i32 1
;
%x = call i32 @llvm.vector.reduce.umin.nxv8i32(<vscale x 8 x i32> splat (i32 1))
ret i32 %x
@@ -812,8 +816,7 @@ define i32 @umin_poison() {
define i32 @umin_poison_scalable_vector() {
; CHECK-LABEL: @umin_poison_scalable_vector(
-; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.umin.nxv8i32(<vscale x 8 x i32> poison)
-; CHECK-NEXT: ret i32 [[X]]
+; CHECK-NEXT: ret i32 poison
;
%x = call i32 @llvm.vector.reduce.umin.nxv8i32(<vscale x 8 x i32> poison)
ret i32 %x
@@ -837,8 +840,7 @@ define i32 @umax_0() {
define i32 @umax_0_scalable_vector() {
; CHECK-LABEL: @umax_0_scalable_vector(
-; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.umax.nxv8i32(<vscale x 8 x i32> zeroinitializer)
-; CHECK-NEXT: ret i32 [[X]]
+; CHECK-NEXT: ret i32 0
;
%x = call i32 @llvm.vector.reduce.umax.nxv8i32(<vscale x 8 x i32> zeroinitializer)
ret i32 %x
@@ -854,8 +856,7 @@ define i32 @umax_1() {
define i32 @umax_1_scalable_vector() {
; CHECK-LABEL: @umax_1_scalable_vector(
-; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.umax.nxv8i32(<vscale x 8 x i32> splat (i32 1))
-; CHECK-NEXT: ret i32 [[X]]
+; CHECK-NEXT: ret i32 1
;
%x = call i32 @llvm.vector.reduce.umax.nxv8i32(<vscale x 8 x i32> splat(i32 1))
ret i32 %x
@@ -914,8 +915,7 @@ define i32 @umax_poison() {
define i32 @umax_poison_scalable_vector() {
; CHECK-LABEL: @umax_poison_scalable_vector(
-; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.umax.nxv8i32(<vscale x 8 x i32> poison)
-; CHECK-NEXT: ret i32 [[X]]
+; CHECK-NEXT: ret i32 poison
;
%x = call i32 @llvm.vector.reduce.umax.nxv8i32(<vscale x 8 x i32> poison)
ret i32 %x
diff --git a/llvm/test/Transforms/LICM/scalar-promote.ll b/llvm/test/Transforms/LICM/scalar-promote.ll
index 3af65df..e6cc457 100644
--- a/llvm/test/Transforms/LICM/scalar-promote.ll
+++ b/llvm/test/Transforms/LICM/scalar-promote.ll
@@ -43,9 +43,9 @@ define void @test2(i32 %i) {
; CHECK-LABEL: define void @test2(
; CHECK-SAME: i32 [[I:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*]]:
-; CHECK-NEXT: [[X1:%.*]] = getelementptr i32, ptr @X, i64 1
; CHECK-NEXT: [[X2:%.*]] = getelementptr i32, ptr @X, i64 1
-; CHECK-NEXT: [[X1_PROMOTED:%.*]] = load i32, ptr [[X1]], align 4
+; CHECK-NEXT: [[X3:%.*]] = getelementptr i32, ptr @X, i64 1
+; CHECK-NEXT: [[X1_PROMOTED:%.*]] = load i32, ptr [[X2]], align 4
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[A1:%.*]] = phi i32 [ [[V:%.*]], %[[LOOP]] ], [ [[X1_PROMOTED]], %[[ENTRY]] ]
@@ -53,7 +53,7 @@ define void @test2(i32 %i) {
; CHECK-NEXT: br i1 false, label %[[LOOP]], label %[[EXIT:.*]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: [[V_LCSSA:%.*]] = phi i32 [ [[V]], %[[LOOP]] ]
-; CHECK-NEXT: store i32 [[V_LCSSA]], ptr [[X1]], align 4
+; CHECK-NEXT: store i32 [[V_LCSSA]], ptr [[X2]], align 4
; CHECK-NEXT: ret void
;
Entry:
diff --git a/llvm/test/Transforms/IndVarSimplify/sink-alloca.ll b/llvm/test/Transforms/LICM/sink-alloca.ll
index 0997bf6..2bf9350 100644
--- a/llvm/test/Transforms/IndVarSimplify/sink-alloca.ll
+++ b/llvm/test/Transforms/LICM/sink-alloca.ll
@@ -1,9 +1,9 @@
-; RUN: opt < %s -passes=indvars -S | FileCheck %s
+; RUN: opt < %s -passes=licm -verify-memoryssa -S | FileCheck %s
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
target triple = "i386-apple-darwin10.0"
; PR4775
-; Indvars shouldn't sink the alloca out of the entry block, even though
+; LICM shouldn't sink the alloca out of the entry block, even though
; it's not used until after the loop.
define i32 @main() nounwind {
; CHECK: entry:
@@ -25,7 +25,7 @@ while.end: ; preds = %while.cond
declare i32 @bar()
; <rdar://problem/10352360>
-; Indvars shouldn't sink the first alloca between the stacksave and stackrestore
+; LICM shouldn't sink the first alloca between the stacksave and stackrestore
; intrinsics.
declare ptr @a(...)
declare ptr @llvm.stacksave() nounwind
diff --git a/llvm/test/Transforms/LICM/sink-from-preheader.ll b/llvm/test/Transforms/LICM/sink-from-preheader.ll
new file mode 100644
index 0000000..bbe3d3b
--- /dev/null
+++ b/llvm/test/Transforms/LICM/sink-from-preheader.ll
@@ -0,0 +1,185 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -passes=licm -verify-memoryssa -S | FileCheck %s
+
+; We perform sinking here, Changed flag should be set properly.
+define i32 @test(i32 %a, i32 %b, i32 %N) {
+; CHECK-LABEL: @test(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[IV_NEXT]], [[N:%.*]]
+; CHECK-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK: exit:
+; CHECK-NEXT: [[ADD:%.*]] = add i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT: ret i32 [[ADD]]
+;
+entry:
+ %add = add i32 %a, %b
+ br label %loop
+
+loop:
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+ %iv.next = add i32 %iv, 1
+ %cmp = icmp slt i32 %iv.next, %N
+ br i1 %cmp, label %loop, label %exit
+
+exit:
+ ret i32 %add
+}
+
+define i32 @test_with_unused_load(i32 %a, ptr %b, i32 %N) {
+; CHECK-LABEL: @test_with_unused_load(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[IV_NEXT]], [[N:%.*]]
+; CHECK-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK: exit:
+; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[B:%.*]], align 4
+; CHECK-NEXT: [[ADD:%.*]] = add i32 [[A:%.*]], [[LOAD]]
+; CHECK-NEXT: ret i32 [[ADD]]
+;
+entry:
+ %load = load i32, ptr %b
+ %add = add i32 %a, %load
+ br label %loop
+
+loop:
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+ %iv.next = add i32 %iv, 1
+ %cmp = icmp slt i32 %iv.next, %N
+ br i1 %cmp, label %loop, label %exit
+
+exit:
+ ret i32 %add
+}
+
+define i32 @test_with_unused_load_modified_store(i32 %a, ptr %b, i32 %N) {
+; CHECK-LABEL: @test_with_unused_load_modified_store(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[B:%.*]], align 4
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], [[A:%.*]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[IV_NEXT]], [[N:%.*]]
+; CHECK-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK: exit:
+; CHECK-NEXT: [[SMAX:%.*]] = phi i32 [ [[IV_NEXT]], [[LOOP]] ]
+; CHECK-NEXT: store i32 [[SMAX]], ptr [[B]], align 4
+; CHECK-NEXT: [[ADD:%.*]] = add i32 [[A]], [[LOAD]]
+; CHECK-NEXT: ret i32 [[ADD]]
+;
+entry:
+ %load = load i32, ptr %b
+ %add = add i32 %a, %load
+ br label %loop
+
+loop:
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+ %iv.next = add i32 %iv, %a
+ store i32 %iv.next, ptr %b
+ %cmp = icmp slt i32 %iv.next, %N
+ br i1 %cmp, label %loop, label %exit
+
+exit:
+ ret i32 %add
+}
+
+; Volatile loads must not be sunk.
+define i32 @test_with_volatile_load_no_sink(i32 %a, ptr %b, i32 %N) {
+; CHECK-LABEL: @test_with_volatile_load_no_sink(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[LD:%.*]] = load volatile i32, ptr [[B:%.*]], align 4
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[IV_NEXT]], [[N:%.*]]
+; CHECK-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK: exit:
+; CHECK-NEXT: [[ADD:%.*]] = add i32 [[A:%.*]], [[LD]]
+; CHECK-NEXT: ret i32 [[ADD]]
+;
+entry:
+ %ld = load volatile i32, ptr %b, align 4
+ %add = add i32 %a, %ld
+ br label %loop
+
+loop:
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+ %iv.next = add i32 %iv, 1
+ %cmp = icmp slt i32 %iv.next, %N
+ br i1 %cmp, label %loop, label %exit
+
+exit:
+ ret i32 %add
+}
+
+; Ordered/atomic loads must not be sunk.
+define i32 @test_with_atomic_load_no_sink(i32 %a, ptr %b, i32 %N) {
+; CHECK-LABEL: @test_with_atomic_load_no_sink(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[LD:%.*]] = load atomic i32, ptr [[B:%.*]] acquire, align 4
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[IV_NEXT]], [[N:%.*]]
+; CHECK-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK: exit:
+; CHECK-NEXT: [[ADD:%.*]] = add i32 [[A:%.*]], [[LD]]
+; CHECK-NEXT: ret i32 [[ADD]]
+;
+entry:
+ %ld = load atomic i32, ptr %b acquire, align 4
+ %add = add i32 %a, %ld
+ br label %loop
+
+loop:
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+ %iv.next = add i32 %iv, 1
+ %cmp = icmp slt i32 %iv.next, %N
+ br i1 %cmp, label %loop, label %exit
+
+exit:
+ ret i32 %add
+}
+
+declare void @clobber(ptr)
+
+; Calls that may write memory in the loop should prevent sinking the load.
+define i32 @test_with_unused_load_clobbered_by_call(i32 %a, ptr %b, i32 %N) {
+; CHECK-LABEL: @test_with_unused_load_clobbered_by_call(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[LD:%.*]] = load i32, ptr [[B:%.*]], align 4
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
+; CHECK-NEXT: call void @clobber(ptr [[B]])
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[IV_NEXT]], [[N:%.*]]
+; CHECK-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK: exit:
+; CHECK-NEXT: [[ADD:%.*]] = add i32 [[A:%.*]], [[LD]]
+; CHECK-NEXT: ret i32 [[ADD]]
+;
+entry:
+ %ld = load i32, ptr %b, align 4
+ %add = add i32 %a, %ld
+ br label %loop
+
+loop:
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+ %iv.next = add i32 %iv, 1
+ call void @clobber(ptr %b)
+ %cmp = icmp slt i32 %iv.next, %N
+ br i1 %cmp, label %loop, label %exit
+
+exit:
+ ret i32 %add
+}
diff --git a/llvm/test/Transforms/LICM/sink-trapping.ll b/llvm/test/Transforms/LICM/sink-trapping.ll
new file mode 100644
index 0000000..f4d260d
--- /dev/null
+++ b/llvm/test/Transforms/LICM/sink-trapping.ll
@@ -0,0 +1,28 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt < %s -passes=licm -verify-memoryssa -S | FileCheck %s
+
+declare i1 @b()
+
+define i32 @a(i32 %x) nounwind {
+; CHECK-LABEL: define i32 @a(
+; CHECK-SAME: i32 [[X:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[FOR_BODY_PREHEADER:.*:]]
+; CHECK-NEXT: br label %[[FOR_BODY:.*]]
+; CHECK: [[FOR_BODY]]:
+; CHECK-NEXT: [[CMP:%.*]] = call i1 @b()
+; CHECK-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_END_LOOPEXIT:.*]]
+; CHECK: [[FOR_END_LOOPEXIT]]:
+; CHECK-NEXT: [[Y:%.*]] = sdiv i32 10, [[X]]
+; CHECK-NEXT: ret i32 [[Y]]
+;
+for.body.preheader:
+ %y = sdiv i32 10, %x
+ br label %for.body
+
+for.body:
+ %cmp = call i1 @b()
+ br i1 %cmp, label %for.body, label %for.end.loopexit
+
+for.end.loopexit:
+ ret i32 %y
+}
diff --git a/llvm/test/Transforms/LoopDeletion/invalidate-scev-after-hoisting.ll b/llvm/test/Transforms/LoopDeletion/invalidate-scev-after-hoisting.ll
index bdd51c2..6c19aaa 100644
--- a/llvm/test/Transforms/LoopDeletion/invalidate-scev-after-hoisting.ll
+++ b/llvm/test/Transforms/LoopDeletion/invalidate-scev-after-hoisting.ll
@@ -84,13 +84,13 @@ define i32 @scev_invalidation_after_deleting(ptr %src) {
; CHECK: inner.2.preheader:
; CHECK-NEXT: br label [[INNER_3_PH:%.*]]
; CHECK: inner.3.ph:
+; CHECK-NEXT: [[TRUNC:%.*]] = trunc i64 0 to i32
; CHECK-NEXT: br label [[INNER_3:%.*]]
; CHECK: inner.3:
; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[SRC:%.*]], align 4
; CHECK-NEXT: br i1 false, label [[OUTER_LATCH]], label [[INNER_3]]
; CHECK: outer.latch:
; CHECK-NEXT: [[L_LCSSA:%.*]] = phi i32 [ [[L]], [[INNER_3]] ]
-; CHECK-NEXT: [[TRUNC:%.*]] = trunc i64 0 to i32
; CHECK-NEXT: [[OUTER_IV_NEXT]] = add nsw i32 [[L_LCSSA]], [[TRUNC]]
; CHECK-NEXT: br label [[OUTER_HEADER]]
;
diff --git a/llvm/test/Transforms/LoopDistribute/laa-invalidation.ll b/llvm/test/Transforms/LoopDistribute/laa-invalidation.ll
index 62c5627..4a55c0e 100644
--- a/llvm/test/Transforms/LoopDistribute/laa-invalidation.ll
+++ b/llvm/test/Transforms/LoopDistribute/laa-invalidation.ll
@@ -4,11 +4,11 @@
define void @test_pr50940(ptr %A, ptr %B) {
; CHECK-LABEL: @test_pr50940(
; CHECK-NEXT: entry:
+; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 4
; CHECK-NEXT: br label [[OUTER_HEADER:%.*]]
; CHECK: outer.header:
; CHECK-NEXT: br i1 false, label [[OUTER_LATCH:%.*]], label [[INNER_PH:%.*]]
; CHECK: inner.ph:
-; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 4
; CHECK-NEXT: [[GEP_A_3:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 3
; CHECK-NEXT: br label [[INNER_LVER_CHECK:%.*]]
; CHECK: inner.lver.check:
diff --git a/llvm/test/Transforms/LoopIdiom/cyclic-redundancy-check.ll b/llvm/test/Transforms/LoopIdiom/cyclic-redundancy-check.ll
index b2ec53c..90995a0 100644
--- a/llvm/test/Transforms/LoopIdiom/cyclic-redundancy-check.ll
+++ b/llvm/test/Transforms/LoopIdiom/cyclic-redundancy-check.ll
@@ -537,6 +537,52 @@ exit: ; preds = %loop
%ret = and i32 %unrelated.next, %crc.next
ret i32 %ret
}
+
+define i16 @not.crc.data.next.outside.user(i16 %crc.init, i16 %data.init) {
+; CHECK-LABEL: define i16 @not.crc.data.next.outside.user(
+; CHECK-SAME: i16 [[CRC_INIT:%.*]], i16 [[DATA_INIT:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[TBL_LD:%.*]] = phi i16 [ [[CRC_INIT]], %[[ENTRY]] ], [ [[CRC_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[CRC_BE_SHIFT:%.*]] = phi i16 [ [[DATA_INIT]], %[[ENTRY]] ], [ [[DATA_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[CRC_NEXT3:%.*]] = xor i16 [[CRC_BE_SHIFT]], [[TBL_LD]]
+; CHECK-NEXT: [[CRC_SHL:%.*]] = shl i16 [[TBL_LD]], 1
+; CHECK-NEXT: [[CRC_XOR:%.*]] = xor i16 [[CRC_SHL]], 3
+; CHECK-NEXT: [[CHECK_SB:%.*]] = icmp slt i16 [[CRC_NEXT3]], 0
+; CHECK-NEXT: [[CRC_NEXT]] = select i1 [[CHECK_SB]], i16 [[CRC_XOR]], i16 [[CRC_SHL]]
+; CHECK-NEXT: [[DATA_NEXT]] = shl i16 [[CRC_BE_SHIFT]], 1
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
+; CHECK-NEXT: [[EXIT_COND:%.*]] = icmp samesign ult i32 [[IV]], 7
+; CHECK-NEXT: br i1 [[EXIT_COND]], label %[[LOOP]], label %[[EXIT:.*]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[CRC_NEXT_LCSSA:%.*]] = phi i16 [ [[CRC_NEXT]], %[[LOOP]] ]
+; CHECK-NEXT: [[DATA_NEXT_LCSSA:%.*]] = phi i16 [ [[DATA_NEXT]], %[[LOOP]] ]
+; CHECK-NEXT: [[RET:%.*]] = xor i16 [[DATA_NEXT_LCSSA]], [[CRC_NEXT_LCSSA]]
+; CHECK-NEXT: ret i16 [[RET]]
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+ %crc = phi i16 [ %crc.init, %entry ], [ %crc.next, %loop ]
+ %data = phi i16 [ %data.init, %entry ], [ %data.next, %loop ]
+ %xor.crc.data = xor i16 %data, %crc
+ %crc.shl = shl i16 %crc, 1
+ %crc.xor = xor i16 %crc.shl, 3
+ %check.sb = icmp slt i16 %xor.crc.data, 0
+ %crc.next = select i1 %check.sb, i16 %crc.xor, i16 %crc.shl
+ %data.next = shl i16 %data, 1
+ %iv.next = add nuw nsw i32 %iv, 1
+ %exit.cond = icmp samesign ult i32 %iv, 7
+ br i1 %exit.cond, label %loop, label %exit
+
+exit:
+ %ret = xor i16 %data.next, %crc.next
+ ret i16 %ret
+}
;.
; CHECK: attributes #[[ATTR0]] = { optsize }
;.
diff --git a/llvm/test/Transforms/LoopStrengthReduce/AArch64/prefer-all.ll b/llvm/test/Transforms/LoopStrengthReduce/AArch64/prefer-all.ll
index db30fd2..1944a9c 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/AArch64/prefer-all.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/AArch64/prefer-all.ll
@@ -119,8 +119,6 @@ for.end:
; We can't use postindex addressing on the conditional load of qval and can't
; convert the loop condition to a compare with zero, so we should instead use
; offset addressing.
-; FIXME: Currently we don't notice the load of qval is conditional, and attempt
-; postindex addressing anyway.
define i32 @conditional_load(ptr %p, ptr %q, ptr %n) {
; CHECK-LABEL: define i32 @conditional_load(
; CHECK-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], ptr [[N:%.*]]) {
@@ -128,7 +126,6 @@ define i32 @conditional_load(ptr %p, ptr %q, ptr %n) {
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
; CHECK: [[FOR_BODY]]:
; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[SCEVGEP2:%.*]], %[[FOR_INC:.*]] ], [ [[P]], %[[ENTRY]] ]
-; CHECK-NEXT: [[LSR_IV:%.*]] = phi ptr [ [[SCEVGEP:%.*]], %[[FOR_INC]] ], [ [[Q]], %[[ENTRY]] ]
; CHECK-NEXT: [[IDX:%.*]] = phi i64 [ [[IDX_NEXT:%.*]], %[[FOR_INC]] ], [ 0, %[[ENTRY]] ]
; CHECK-NEXT: [[RET:%.*]] = phi i32 [ [[RET_NEXT:%.*]], %[[FOR_INC]] ], [ 0, %[[ENTRY]] ]
; CHECK-NEXT: [[PVAL:%.*]] = load i32, ptr [[LSR_IV1]], align 4
@@ -136,6 +133,8 @@ define i32 @conditional_load(ptr %p, ptr %q, ptr %n) {
; CHECK-NEXT: [[SCEVGEP2]] = getelementptr i8, ptr [[LSR_IV1]], i64 4
; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label %[[FOR_INC]], label %[[IF_THEN:.*]]
; CHECK: [[IF_THEN]]:
+; CHECK-NEXT: [[TMP0:%.*]] = shl i64 [[IDX]], 2
+; CHECK-NEXT: [[LSR_IV:%.*]] = getelementptr i8, ptr [[Q]], i64 [[TMP0]]
; CHECK-NEXT: [[QVAL:%.*]] = load i32, ptr [[LSR_IV]], align 4
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[RET]], [[QVAL]]
; CHECK-NEXT: br label %[[FOR_INC]]
@@ -143,7 +142,6 @@ define i32 @conditional_load(ptr %p, ptr %q, ptr %n) {
; CHECK-NEXT: [[RET_NEXT]] = phi i32 [ [[ADD]], %[[IF_THEN]] ], [ [[RET]], %[[FOR_BODY]] ]
; CHECK-NEXT: [[IDX_NEXT]] = add nuw nsw i64 [[IDX]], 1
; CHECK-NEXT: [[NVAL:%.*]] = load volatile i64, ptr [[N]], align 8
-; CHECK-NEXT: [[SCEVGEP]] = getelementptr i8, ptr [[LSR_IV]], i64 4
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[IDX_NEXT]], [[NVAL]]
; CHECK-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[EXIT:.*]]
; CHECK: [[EXIT]]:
@@ -176,3 +174,141 @@ for.inc:
exit:
ret i32 %ret.next
}
+
+; We can use postindex addressing for both loads here, even though the second
+; may not be executed on every loop iteration.
+define i32 @early_exit_load(ptr %p, ptr %q, ptr %n) {
+; CHECK-LABEL: define i32 @early_exit_load(
+; CHECK-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], ptr [[N:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[FOR_BODY:.*]]
+; CHECK: [[FOR_BODY]]:
+; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[SCEVGEP2:%.*]], %[[FOR_INC:.*]] ], [ [[P]], %[[ENTRY]] ]
+; CHECK-NEXT: [[LSR_IV:%.*]] = phi ptr [ [[SCEVGEP:%.*]], %[[FOR_INC]] ], [ [[Q]], %[[ENTRY]] ]
+; CHECK-NEXT: [[RET_PHI:%.*]] = phi i32 [ [[ADD:%.*]], %[[FOR_INC]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: [[IDX:%.*]] = phi i64 [ [[IDX_NEXT:%.*]], %[[FOR_INC]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: [[PVAL:%.*]] = load i32, ptr [[LSR_IV1]], align 4
+; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[PVAL]], 0
+; CHECK-NEXT: [[SCEVGEP2]] = getelementptr i8, ptr [[LSR_IV1]], i64 4
+; CHECK-NEXT: br i1 [[CMP1]], label %[[FOR_INC]], label %[[EXIT:.*]]
+; CHECK: [[FOR_INC]]:
+; CHECK-NEXT: [[QVAL:%.*]] = load i32, ptr [[LSR_IV]], align 4
+; CHECK-NEXT: [[ADD]] = add nsw i32 [[QVAL]], [[RET_PHI]]
+; CHECK-NEXT: [[IDX_NEXT]] = add nuw nsw i64 [[IDX]], 1
+; CHECK-NEXT: [[NVAL:%.*]] = load volatile i64, ptr [[N]], align 8
+; CHECK-NEXT: [[SCEVGEP]] = getelementptr i8, ptr [[LSR_IV]], i64 4
+; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i64 [[IDX_NEXT]], [[NVAL]]
+; CHECK-NEXT: br i1 [[CMP2]], label %[[FOR_BODY]], label %[[EXIT]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[RET:%.*]] = phi i32 [ [[RET_PHI]], %[[FOR_BODY]] ], [ [[ADD]], %[[FOR_INC]] ]
+; CHECK-NEXT: ret i32 [[RET]]
+;
+entry:
+ br label %for.body
+
+for.body:
+ %ret.phi = phi i32 [ %add, %for.inc ], [ 0, %entry ]
+ %idx = phi i64 [ %idx.next, %for.inc ], [ 0, %entry ]
+ %paddr = getelementptr inbounds nuw i32, ptr %p, i64 %idx
+ %pval = load i32, ptr %paddr, align 4
+ %cmp1 = icmp eq i32 %pval, 0
+ br i1 %cmp1, label %for.inc, label %exit
+
+for.inc:
+ %qaddr = getelementptr inbounds nuw i32, ptr %q, i64 %idx
+ %qval = load i32, ptr %qaddr, align 4
+ %add = add nsw i32 %qval, %ret.phi
+ %idx.next = add nuw nsw i64 %idx, 1
+ %nval = load volatile i64, ptr %n, align 8
+ %cmp2 = icmp slt i64 %idx.next, %nval
+ br i1 %cmp2, label %for.body, label %exit
+
+exit:
+ %ret = phi i32 [ %ret.phi, %for.body ], [ %add, %for.inc ]
+ ret i32 %ret
+}
+
+; The control-flow before and after the load of qval shouldn't prevent postindex
+; addressing from happening.
+; FIXME: We choose postindex addressing, but the scevgep is placed in for.inc so
+; during codegen we will fail to actually generate a postindex load.
+define void @middle_block_load(ptr %p, ptr %q, i64 %n) {
+; CHECK-LABEL: define void @middle_block_load(
+; CHECK-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], i64 [[N:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[FOR_BODY:.*]]
+; CHECK: [[FOR_BODY]]:
+; CHECK-NEXT: [[LSR_IV2:%.*]] = phi ptr [ [[SCEVGEP3:%.*]], %[[FOR_INC:.*]] ], [ [[P]], %[[ENTRY]] ]
+; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[SCEVGEP:%.*]], %[[FOR_INC]] ], [ [[Q]], %[[ENTRY]] ]
+; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], %[[FOR_INC]] ], [ [[N]], %[[ENTRY]] ]
+; CHECK-NEXT: [[PVAL:%.*]] = load i32, ptr [[LSR_IV2]], align 4
+; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[PVAL]], 0
+; CHECK-NEXT: [[SCEVGEP3]] = getelementptr i8, ptr [[LSR_IV2]], i64 4
+; CHECK-NEXT: br i1 [[CMP1]], label %[[IF_THEN1:.*]], label %[[IF_ELSE1:.*]]
+; CHECK: [[IF_THEN1]]:
+; CHECK-NEXT: tail call void @otherfn1()
+; CHECK-NEXT: br label %[[IF_END:.*]]
+; CHECK: [[IF_ELSE1]]:
+; CHECK-NEXT: tail call void @otherfn2()
+; CHECK-NEXT: br label %[[IF_END]]
+; CHECK: [[IF_END]]:
+; CHECK-NEXT: [[QVAL:%.*]] = load i32, ptr [[LSR_IV1]], align 4
+; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i32 [[QVAL]], 0
+; CHECK-NEXT: br i1 [[CMP2]], label %[[IF_THEN2:.*]], label %[[IF_ELSE2:.*]]
+; CHECK: [[IF_THEN2]]:
+; CHECK-NEXT: tail call void @otherfn1()
+; CHECK-NEXT: br label %[[FOR_INC]]
+; CHECK: [[IF_ELSE2]]:
+; CHECK-NEXT: tail call void @otherfn2()
+; CHECK-NEXT: br label %[[FOR_INC]]
+; CHECK: [[FOR_INC]]:
+; CHECK-NEXT: [[LSR_IV_NEXT]] = add i64 [[LSR_IV]], -1
+; CHECK-NEXT: [[SCEVGEP]] = getelementptr i8, ptr [[LSR_IV1]], i64 4
+; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i64 [[LSR_IV_NEXT]], 0
+; CHECK-NEXT: br i1 [[CMP3]], label %[[EXIT:.*]], label %[[FOR_BODY]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %for.body
+
+for.body:
+ %idx = phi i64 [ %idx.next, %for.inc ], [ 0, %entry ]
+ %paddr = getelementptr inbounds nuw i32, ptr %p, i64 %idx
+ %pval = load i32, ptr %paddr, align 4
+ %cmp1 = icmp sgt i32 %pval, 0
+ br i1 %cmp1, label %if.then1, label %if.else1
+
+if.then1:
+ tail call void @otherfn1()
+ br label %if.end
+
+if.else1:
+ tail call void @otherfn2()
+ br label %if.end
+
+if.end:
+ %qaddr = getelementptr inbounds nuw i32, ptr %q, i64 %idx
+ %qval = load i32, ptr %qaddr, align 4
+ %cmp2 = icmp sgt i32 %qval, 0
+ br i1 %cmp2, label %if.then2, label %if.else2
+
+if.then2:
+ tail call void @otherfn1()
+ br label %for.inc
+
+if.else2:
+ tail call void @otherfn2()
+ br label %for.inc
+
+for.inc:
+ %idx.next = add nuw nsw i64 %idx, 1
+ %cmp3 = icmp eq i64 %idx.next, %n
+ br i1 %cmp3, label %exit, label %for.body
+
+exit:
+ ret void
+}
+
+declare dso_local void @otherfn1()
+declare dso_local void @otherfn2()
diff --git a/llvm/test/Transforms/LoopUnroll/peel-branch-weights-freq.ll b/llvm/test/Transforms/LoopUnroll/branch-weights-freq/peel.ll
index 1339afe..1339afe 100644
--- a/llvm/test/Transforms/LoopUnroll/peel-branch-weights-freq.ll
+++ b/llvm/test/Transforms/LoopUnroll/branch-weights-freq/peel.ll
diff --git a/llvm/test/Transforms/LoopUnroll/branch-weights-freq/unroll-epilog.ll b/llvm/test/Transforms/LoopUnroll/branch-weights-freq/unroll-epilog.ll
new file mode 100644
index 0000000..96b31d8
--- /dev/null
+++ b/llvm/test/Transforms/LoopUnroll/branch-weights-freq/unroll-epilog.ll
@@ -0,0 +1,160 @@
+; Test branch weight metadata, estimated trip count metadata, and block
+; frequencies after loop unrolling with an epilogue.
+
+; ------------------------------------------------------------------------------
+; Define substitutions.
+;
+; Check original loop body frequency.
+; DEFINE: %{bf-fc} = opt %s -S -passes='print<block-freq>' 2>&1 | \
+; DEFINE: FileCheck %s -check-prefixes
+;
+; Unroll loops and then check block frequency. The -implicit-check-not options
+; make sure that no additional labels or @f calls show up.
+; DEFINE: %{ur-bf} = opt %s -S -passes='loop-unroll,print<block-freq>' 2>&1
+; DEFINE: %{fc} = FileCheck %s \
+; DEFINE: -implicit-check-not='{{^( *- )?[^ ;]*:}}' \
+; DEFINE: -implicit-check-not='call void @f' -check-prefixes
+
+; ------------------------------------------------------------------------------
+; Check various interesting unroll count values relative to the original loop's
+; estimated trip count of 11 (e.g., minimum and boundary values).
+;
+; RUN: %{bf-fc} ALL,ORIG
+; RUN: %{ur-bf} -unroll-count=2 -unroll-runtime | %{fc} ALL,UR,UR2
+; RUN: %{ur-bf} -unroll-count=4 -unroll-runtime | %{fc} ALL,UR,UR4
+; RUN: %{ur-bf} -unroll-count=10 -unroll-runtime | %{fc} ALL,UR,UR10
+; RUN: %{ur-bf} -unroll-count=11 -unroll-runtime | %{fc} ALL,UR,UR11
+; RUN: %{ur-bf} -unroll-count=12 -unroll-runtime | %{fc} ALL,UR,UR12
+
+; ------------------------------------------------------------------------------
+; Check the iteration frequencies, which, when each is multiplied by the number
+; of original loop bodies that execute within it, should sum to almost exactly
+; the original loop body frequency.
+;
+; ALL-LABEL: block-frequency-info: test
+;
+; ORIG: - [[ENTRY:.*]]:
+; ORIG: - [[DO_BODY:.*]]: float = 11.0,
+; ORIG: - [[DO_END:.*]]:
+;
+; UR: - [[ENTRY:.*]]:
+; UR: - [[ENTRY_NEW:.*]]:
+; UR2: - [[DO_BODY:.*]]: float = 5.2381,
+; UR4: - [[DO_BODY:.*]]: float = 2.3702,
+; UR10: - [[DO_BODY:.*]]: float = 0.6902,
+; UR11: - [[DO_BODY:.*]]: float = 0.59359,
+; UR12: - [[DO_BODY:.*]]: float = 0.5144,
+; UR: - [[DO_END_UNR_LCSSA:.*]]:
+; UR: - [[DO_BODY_EPIL_PREHEADER:.*]]:
+; UR2: - [[DO_BODY_EPIL:.*]]: float = 0.52381,
+; UR4: - [[DO_BODY_EPIL:.*]]: float = 1.5193,
+; UR10: - [[DO_BODY_EPIL:.*]]: float = 4.098,
+; UR11: - [[DO_BODY_EPIL:.*]]: float = 4.4705,
+; UR12: - [[DO_BODY_EPIL:.*]]: float = 4.8272,
+; UR4: - [[DO_END_EPILOG_LCSSA:.*]]:
+; UR10: - [[DO_END_EPILOG_LCSSA:.*]]:
+; UR11: - [[DO_END_EPILOG_LCSSA:.*]]:
+; UR12: - [[DO_END_EPILOG_LCSSA:.*]]:
+; UR: - [[DO_END:.*]]:
+
+; ------------------------------------------------------------------------------
+; Check the CFGs, including the number of original loop bodies that appear
+; within each unrolled iteration.
+;
+; UR-LABEL: define void @test(i32 %{{.*}}) {
+; UR: [[ENTRY]]:
+; UR: br i1 %{{.*}}, label %[[DO_BODY_EPIL_PREHEADER]], label %[[ENTRY_NEW]], !prof ![[#PROF_UR_GUARD:]]{{$}}
+; UR: [[ENTRY_NEW]]:
+; UR: br label %[[DO_BODY]]
+; UR: [[DO_BODY]]:
+; UR2-COUNT-2: call void @f
+; UR4-COUNT-4: call void @f
+; UR10-COUNT-10: call void @f
+; UR11-COUNT-11: call void @f
+; UR12-COUNT-12: call void @f
+; UR: br i1 %{{.*}}, label %[[DO_END_UNR_LCSSA]], label %[[DO_BODY]], !prof ![[#PROF_UR_LATCH:]], !llvm.loop ![[#LOOP_UR_LATCH:]]{{$}}
+; UR: [[DO_END_UNR_LCSSA]]:
+; UR: br i1 %{{.*}}, label %[[DO_BODY_EPIL_PREHEADER]], label %[[DO_END:.*]], !prof ![[#PROF_RM_GUARD:]]{{$}}
+; UR: [[DO_BODY_EPIL_PREHEADER]]:
+; UR: br label %[[DO_BODY_EPIL]]
+; UR: [[DO_BODY_EPIL]]:
+; UR: call void @f
+; UR4: br i1 %{{.*}}, label %[[DO_BODY_EPIL]], label %[[DO_END_EPILOG_LCSSA]], !prof ![[#PROF_RM_LATCH:]], !llvm.loop ![[#LOOP_RM_LATCH:]]{{$}}
+; UR10: br i1 %{{.*}}, label %[[DO_BODY_EPIL]], label %[[DO_END_EPILOG_LCSSA]], !prof ![[#PROF_RM_LATCH:]], !llvm.loop ![[#LOOP_RM_LATCH:]]{{$}}
+; UR11: br i1 %{{.*}}, label %[[DO_BODY_EPIL]], label %[[DO_END_EPILOG_LCSSA]], !prof ![[#PROF_RM_LATCH:]], !llvm.loop ![[#LOOP_RM_LATCH:]]{{$}}
+; UR12: br i1 %{{.*}}, label %[[DO_BODY_EPIL]], label %[[DO_END_EPILOG_LCSSA]], !prof ![[#PROF_RM_LATCH:]], !llvm.loop ![[#LOOP_RM_LATCH:]]{{$}}
+; UR4: [[DO_END_EPILOG_LCSSA]]:
+; UR10: [[DO_END_EPILOG_LCSSA]]:
+; UR11: [[DO_END_EPILOG_LCSSA]]:
+; UR12: [[DO_END_EPILOG_LCSSA]]:
+; UR: br label %[[DO_END]]
+; UR: [[DO_END]]:
+; UR: ret void
+
+declare void @f(i32)
+
+define void @test(i32 %n) {
+entry:
+ br label %do.body
+
+do.body:
+ %i = phi i32 [ 0, %entry ], [ %inc, %do.body ]
+ %inc = add i32 %i, 1
+ call void @f(i32 %i)
+ %c = icmp sge i32 %inc, %n
+ br i1 %c, label %do.end, label %do.body, !prof !0
+
+do.end:
+ ret void
+}
+
+!0 = !{!"branch_weights", i32 1, i32 10}
+
+; ------------------------------------------------------------------------------
+; Check branch weight metadata and estimated trip count metadata.
+;
+; UR2: ![[#PROF_UR_GUARD]] = !{!"branch_weights", i32 195225786, i32 1952257862}
+; UR4: ![[#PROF_UR_GUARD]] = !{!"branch_weights", i32 534047398, i32 1613436250}
+; UR10: ![[#PROF_UR_GUARD]] = !{!"branch_weights", i32 1236740947, i32 910742701}
+; UR11: ![[#PROF_UR_GUARD]] = !{!"branch_weights", i32 1319535738, i32 827947910}
+; UR12: ![[#PROF_UR_GUARD]] = !{!"branch_weights", i32 1394803730, i32 752679918}
+;
+; UR2: ![[#PROF_UR_LATCH]] = !{!"branch_weights", i32 372703773, i32 1774779875}
+; UR4: ![[#PROF_UR_LATCH]] = !{!"branch_weights", i32 680723421, i32 1466760227}
+; UR10: ![[#PROF_UR_LATCH]] = !{!"branch_weights", i32 1319535738, i32 827947910}
+; UR11: ![[#PROF_UR_LATCH]] = !{!"branch_weights", i32 1394803730, i32 752679918}
+; UR12: ![[#PROF_UR_LATCH]] = !{!"branch_weights", i32 1463229177, i32 684254471}
+;
+; UR2: ![[#LOOP_UR_LATCH]] = distinct !{![[#LOOP_UR_LATCH]], ![[#LOOP_UR_TC:]], ![[#DISABLE:]]}
+; UR4: ![[#LOOP_UR_LATCH]] = distinct !{![[#LOOP_UR_LATCH]], ![[#LOOP_UR_TC:]], ![[#DISABLE:]]}
+; UR10: ![[#LOOP_UR_LATCH]] = distinct !{![[#LOOP_UR_LATCH]], ![[#LOOP_UR_TC:]], ![[#DISABLE:]]}
+; UR11: ![[#LOOP_UR_LATCH]] = distinct !{![[#LOOP_UR_LATCH]], ![[#LOOP_UR_TC:]], ![[#DISABLE:]]}
+; UR12: ![[#LOOP_UR_LATCH]] = distinct !{![[#LOOP_UR_LATCH]], ![[#LOOP_UR_TC:]], ![[#DISABLE:]]}
+;
+; UR2: ![[#LOOP_UR_TC]] = !{!"llvm.loop.estimated_trip_count", i32 5}
+; UR4: ![[#LOOP_UR_TC]] = !{!"llvm.loop.estimated_trip_count", i32 2}
+; UR10: ![[#LOOP_UR_TC]] = !{!"llvm.loop.estimated_trip_count", i32 1}
+; UR11: ![[#LOOP_UR_TC]] = !{!"llvm.loop.estimated_trip_count", i32 1}
+; UR12: ![[#LOOP_UR_TC]] = !{!"llvm.loop.estimated_trip_count", i32 0}
+; UR: ![[#DISABLE]] = !{!"llvm.loop.unroll.disable"}
+;
+; UR2: ![[#PROF_RM_GUARD]] = !{!"branch_weights", i32 1022611260, i32 1124872388}
+; UR4: ![[#PROF_RM_GUARD]] = !{!"branch_weights", i32 1531603292, i32 615880356}
+; UR10: ![[#PROF_RM_GUARD]] = !{!"branch_weights", i32 1829762672, i32 317720976}
+; UR11: ![[#PROF_RM_GUARD]] = !{!"branch_weights", i32 1846907894, i32 300575754}
+; UR12: ![[#PROF_RM_GUARD]] = !{!"branch_weights", i32 1860963812, i32 286519836}
+;
+; UR4: ![[#PROF_RM_LATCH]] = !{!"branch_weights", i32 1038564635, i32 1108919013}
+; UR10: ![[#PROF_RM_LATCH]] = !{!"branch_weights", i32 1656332913, i32 491150735}
+; UR11: ![[#PROF_RM_LATCH]] = !{!"branch_weights", i32 1693034047, i32 454449601}
+; UR12: ![[#PROF_RM_LATCH]] = !{!"branch_weights", i32 1723419551, i32 424064097}
+
+; UR4: ![[#LOOP_RM_LATCH]] = distinct !{![[#LOOP_RM_LATCH]], ![[#LOOP_RM_TC:]], ![[#DISABLE:]]}
+; UR10: ![[#LOOP_RM_LATCH]] = distinct !{![[#LOOP_RM_LATCH]], ![[#LOOP_UR_TC:]], ![[#DISABLE:]]}
+; UR11: ![[#LOOP_RM_LATCH]] = distinct !{![[#LOOP_RM_LATCH]], ![[#LOOP_RM_TC:]], ![[#DISABLE:]]}
+; UR12: ![[#LOOP_RM_LATCH]] = distinct !{![[#LOOP_RM_LATCH]], ![[#LOOP_RM_TC:]], ![[#DISABLE:]]}
+;
+; UR4: ![[#LOOP_RM_TC]] = !{!"llvm.loop.estimated_trip_count", i32 3}
+; For UR10, llvm.loop.estimated_trip_count is the same for both loops.
+; UR11: ![[#LOOP_RM_TC]] = !{!"llvm.loop.estimated_trip_count", i32 0}
+; UR12: ![[#LOOP_RM_TC]] = !{!"llvm.loop.estimated_trip_count", i32 11}
diff --git a/llvm/test/Transforms/LoopUnroll/branch-weights-freq/unroll-partial.ll b/llvm/test/Transforms/LoopUnroll/branch-weights-freq/unroll-partial.ll
new file mode 100644
index 0000000..cde9d46
--- /dev/null
+++ b/llvm/test/Transforms/LoopUnroll/branch-weights-freq/unroll-partial.ll
@@ -0,0 +1,68 @@
+; Test branch weight metadata, estimated trip count metadata, and block
+; frequencies after partial loop unrolling without -unroll-runtime.
+
+; RUN: opt < %s -S -passes='print<block-freq>' 2>&1 | \
+; RUN: FileCheck -check-prefix=CHECK %s
+
+; The -implicit-check-not options make sure that no additional labels or calls
+; to @f show up.
+; RUN: opt < %s -S -passes='loop-unroll,print<block-freq>' \
+; RUN: -unroll-count=4 2>&1 | \
+; RUN: FileCheck %s -check-prefix=CHECK-UR \
+; RUN: -implicit-check-not='{{^( *- )?[^ ;]*:}}' \
+; RUN: -implicit-check-not='call void @f'
+
+; CHECK: block-frequency-info: test
+; CHECK: do.body: float = 10.0,
+
+; The sum should still be ~10.
+;
+; CHECK-UR: block-frequency-info: test
+; CHECK-UR: - [[ENTRY:.*]]:
+; CHECK-UR: - [[DO_BODY:.*]]: float = 2.9078,
+; CHECK-UR: - [[DO_BODY_1:.*]]: float = 2.617,
+; CHECK-UR: - [[DO_BODY_2:.*]]: float = 2.3553,
+; CHECK-UR: - [[DO_BODY_3:.*]]: float = 2.1198,
+; CHECK-UR: - [[DO_END:.*]]:
+
+declare void @f(i32)
+
+define void @test(i32 %n) {
+; CHECK-UR-LABEL: define void @test(i32 %{{.*}}) {
+; CHECK-UR: [[ENTRY]]:
+; CHECK-UR: br label %[[DO_BODY]]
+; CHECK-UR: [[DO_BODY]]:
+; CHECK-UR: call void @f
+; CHECK-UR: br i1 %{{.*}}, label %[[DO_END]], label %[[DO_BODY_1]], !prof ![[#PROF:]]
+; CHECK-UR: [[DO_BODY_1]]:
+; CHECK-UR: call void @f
+; CHECK-UR: br i1 %{{.*}}, label %[[DO_END]], label %[[DO_BODY_2]], !prof ![[#PROF]]
+; CHECK-UR: [[DO_BODY_2]]:
+; CHECK-UR: call void @f
+; CHECK-UR: br i1 %{{.*}}, label %[[DO_END]], label %[[DO_BODY_3]], !prof ![[#PROF]]
+; CHECK-UR: [[DO_BODY_3]]:
+; CHECK-UR: call void @f
+; CHECK-UR: br i1 %{{.*}}, label %[[DO_END]], label %[[DO_BODY]], !prof ![[#PROF]], !llvm.loop ![[#LOOP_UR_LATCH:]]
+; CHECK-UR: [[DO_END]]:
+; CHECK-UR: ret void
+
+entry:
+ br label %do.body
+
+do.body:
+ %i = phi i32 [ 0, %entry ], [ %inc, %do.body ]
+ %inc = add i32 %i, 1
+ call void @f(i32 %i)
+ %c = icmp sge i32 %inc, %n
+ br i1 %c, label %do.end, label %do.body, !prof !0
+
+do.end:
+ ret void
+}
+
+!0 = !{!"branch_weights", i32 1, i32 9}
+
+; CHECK-UR: ![[#PROF]] = !{!"branch_weights", i32 1, i32 9}
+; CHECK-UR: ![[#LOOP_UR_LATCH]] = distinct !{![[#LOOP_UR_LATCH]], ![[#LOOP_UR_TC:]], ![[#DISABLE:]]}
+; CHECK-UR: ![[#LOOP_UR_TC]] = !{!"llvm.loop.estimated_trip_count", i32 3}
+; CHECK-UR: ![[#DISABLE]] = !{!"llvm.loop.unroll.disable"}
diff --git a/llvm/test/Transforms/LoopUnroll/followup.ll b/llvm/test/Transforms/LoopUnroll/followup.ll
index 051e43d..9dda76e 100644
--- a/llvm/test/Transforms/LoopUnroll/followup.ll
+++ b/llvm/test/Transforms/LoopUnroll/followup.ll
@@ -1,9 +1,20 @@
-; RUN: opt < %s -S -passes=loop-unroll -unroll-count=2 | FileCheck %s -check-prefixes=COUNT,COMMON
-; RUN: opt < %s -S -passes=loop-unroll -unroll-runtime=true -unroll-runtime-epilog=true | FileCheck %s -check-prefixes=EPILOG,COMMON
-; RUN: opt < %s -S -passes=loop-unroll -unroll-runtime=true -unroll-runtime-epilog=false | FileCheck %s -check-prefixes=PROLOG,COMMON
-;
-; Check that followup-attributes are applied after LoopUnroll.
+; Check that followup attributes are applied after LoopUnroll.
;
+; We choose -unroll-count=3 because it produces partial unrolling of remainder
+; loops. Complete unrolling would leave no remainder loop to which to copy
+; followup attributes.
+
+; DEFINE: %{unroll} = opt < %s -S -passes=loop-unroll -unroll-count=3
+; DEFINE: %{epilog} = %{unroll} -unroll-runtime -unroll-runtime-epilog=true
+; DEFINE: %{prolog} = %{unroll} -unroll-runtime -unroll-runtime-epilog=false
+; DEFINE: %{fc} = FileCheck %s -check-prefixes
+
+; RUN: %{unroll} | %{fc} COMMON,COUNT
+; RUN: %{epilog} | %{fc} COMMON,EPILOG,EPILOG-NO-UNROLL
+; RUN: %{prolog} | %{fc} COMMON,PROLOG,PROLOG-NO-UNROLL
+; RUN: %{epilog} -unroll-remainder | %{fc} COMMON,EPILOG,EPILOG-UNROLL
+; RUN: %{prolog} -unroll-remainder | %{fc} COMMON,PROLOG,PROLOG-UNROLL
+
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
define i32 @test(ptr nocapture %a, i32 %n) nounwind uwtable readonly {
@@ -36,15 +47,17 @@ for.end: ; preds = %for.body, %entry
; COMMON-LABEL: @test(
-; COUNT: br i1 %exitcond.1, label %for.end.loopexit, label %for.body, !llvm.loop ![[LOOP:[0-9]+]]
+; COUNT: br i1 %exitcond.2, label %for.end.loopexit, label %for.body, !llvm.loop ![[LOOP:[0-9]+]]
; COUNT: ![[FOLLOWUP_ALL:[0-9]+]] = !{!"FollowupAll"}
; COUNT: ![[FOLLOWUP_UNROLLED:[0-9]+]] = !{!"FollowupUnrolled"}
; COUNT: ![[LOOP]] = distinct !{![[LOOP]], ![[FOLLOWUP_ALL]], ![[FOLLOWUP_UNROLLED]]}
-; EPILOG: br i1 %niter.ncmp.7, label %for.end.loopexit.unr-lcssa, label %for.body, !llvm.loop ![[LOOP_0:[0-9]+]]
-; EPILOG: br i1 %epil.iter.cmp, label %for.body.epil, label %for.end.loopexit.epilog-lcssa, !llvm.loop ![[LOOP_2:[0-9]+]]
+; EPILOG: br i1 %niter.ncmp.2, label %for.end.loopexit.unr-lcssa, label %for.body, !llvm.loop ![[LOOP_0:[0-9]+]]
+; EPILOG-NO-UNROLL: br i1 %epil.iter.cmp, label %for.body.epil, label %for.end.loopexit.epilog-lcssa, !llvm.loop ![[LOOP_2:[0-9]+]]
+; EPILOG-UNROLL: br i1 %epil.iter.cmp, label %for.body.epil.1, label %for.end.loopexit.epilog-lcssa
+; EPILOG-UNROLL: br i1 %epil.iter.cmp.1, label %for.body.epil, label %for.end.loopexit.epilog-lcssa, !llvm.loop ![[LOOP_2:[0-9]+]]
; EPILOG: ![[LOOP_0]] = distinct !{![[LOOP_0]], ![[FOLLOWUP_ALL:[0-9]+]], ![[FOLLOWUP_UNROLLED:[0-9]+]]}
; EPILOG: ![[FOLLOWUP_ALL]] = !{!"FollowupAll"}
@@ -53,8 +66,10 @@ for.end: ; preds = %for.body, %entry
; EPILOG: ![[FOLLOWUP_REMAINDER]] = !{!"FollowupRemainder"}
-; PROLOG: br i1 %prol.iter.cmp, label %for.body.prol, label %for.body.prol.loopexit.unr-lcssa, !llvm.loop ![[LOOP_0:[0-9]+]]
-; PROLOG: br i1 %exitcond.7, label %for.end.loopexit.unr-lcssa, label %for.body, !llvm.loop ![[LOOP_2:[0-9]+]]
+; PROLOG-UNROLL: br i1 %prol.iter.cmp, label %for.body.prol.1, label %for.body.prol.loopexit.unr-lcssa
+; PROLOG-UNROLL: br i1 %prol.iter.cmp.1, label %for.body.prol, label %for.body.prol.loopexit.unr-lcssa, !llvm.loop ![[LOOP_0:[0-9]+]]
+; PROLOG-NO-UNROLL: br i1 %prol.iter.cmp, label %for.body.prol, label %for.body.prol.loopexit.unr-lcssa, !llvm.loop ![[LOOP_0:[0-9]+]]
+; PROLOG: br i1 %exitcond.2, label %for.end.loopexit.unr-lcssa, label %for.body, !llvm.loop ![[LOOP_2:[0-9]+]]
; PROLOG: ![[LOOP_0]] = distinct !{![[LOOP_0]], ![[FOLLOWUP_ALL:[0-9]+]], ![[FOLLOWUP_REMAINDER:[0-9]+]]}
; PROLOG: ![[FOLLOWUP_ALL]] = !{!"FollowupAll"}
diff --git a/llvm/test/Transforms/LoopUnroll/runtime-exit-phi-scev-invalidation.ll b/llvm/test/Transforms/LoopUnroll/runtime-exit-phi-scev-invalidation.ll
index 0c52b5a0..0473601 100644
--- a/llvm/test/Transforms/LoopUnroll/runtime-exit-phi-scev-invalidation.ll
+++ b/llvm/test/Transforms/LoopUnroll/runtime-exit-phi-scev-invalidation.ll
@@ -188,7 +188,7 @@ define void @pr56286(i64 %x, ptr %src, ptr %dst, ptr %ptr.src) !prof !0 {
; CHECK-NEXT: [[L_1_LCSSA_UNR:%.*]] = phi i32 [ poison, [[OUTER_HEADER]] ], [ [[L_1_LCSSA_UNR_PH]], [[INNER_1_HEADER_PROL_LOOPEXIT_UNR_LCSSA]] ]
; CHECK-NEXT: [[INNER_1_IV_UNR:%.*]] = phi i64 [ [[X]], [[OUTER_HEADER]] ], [ [[INNER_1_IV_UNR_PH]], [[INNER_1_HEADER_PROL_LOOPEXIT_UNR_LCSSA]] ]
; CHECK-NEXT: [[TMP4:%.*]] = icmp ult i64 [[TMP3]], 7
-; CHECK-NEXT: br i1 [[TMP4]], label [[OUTER_MIDDLE:%.*]], label [[OUTER_HEADER_NEW:%.*]], !prof [[PROF3]]
+; CHECK-NEXT: br i1 [[TMP4]], label [[OUTER_MIDDLE:%.*]], label [[OUTER_HEADER_NEW:%.*]], !prof [[PROF6:![0-9]+]]
; CHECK: outer.header.new:
; CHECK-NEXT: br label [[INNER_1_HEADER:%.*]]
; CHECK: inner.1.header:
@@ -232,7 +232,7 @@ define void @pr56286(i64 %x, ptr %src, ptr %dst, ptr %ptr.src) !prof !0 {
; CHECK-NEXT: store i32 [[L_1_7]], ptr [[DST]], align 8
; CHECK-NEXT: [[INNER_1_IV_NEXT_7]] = add i64 [[INNER_1_IV]], 8
; CHECK-NEXT: [[CMP_2_7:%.*]] = icmp sgt i64 [[INNER_1_IV_NEXT_6]], 0
-; CHECK-NEXT: br i1 [[CMP_2_7]], label [[OUTER_MIDDLE_UNR_LCSSA:%.*]], label [[INNER_1_HEADER]], !prof [[PROF6:![0-9]+]], !llvm.loop [[LOOP7:![0-9]+]]
+; CHECK-NEXT: br i1 [[CMP_2_7]], label [[OUTER_MIDDLE_UNR_LCSSA:%.*]], label [[INNER_1_HEADER]], !prof [[PROF7:![0-9]+]], !llvm.loop [[LOOP8:![0-9]+]]
; CHECK: outer.middle.unr-lcssa:
; CHECK-NEXT: [[L_1_LCSSA_PH:%.*]] = phi i32 [ [[L_1_7]], [[INNER_1_LATCH_7]] ]
; CHECK-NEXT: br label [[OUTER_MIDDLE]]
diff --git a/llvm/test/Transforms/LoopUnroll/runtime-loop-branchweight.ll b/llvm/test/Transforms/LoopUnroll/runtime-loop-branchweight.ll
index 2617199..2f8f98d 100644
--- a/llvm/test/Transforms/LoopUnroll/runtime-loop-branchweight.ll
+++ b/llvm/test/Transforms/LoopUnroll/runtime-loop-branchweight.ll
@@ -2,12 +2,24 @@
;; Check that the remainder loop is properly assigned a branch weight for its latch branch.
; CHECK-LABEL: @test(
-; CHECK-LABEL: for.body:
-; CHECK: br i1 [[COND1:%.*]], label %for.end.loopexit.unr-lcssa, label %for.body, !prof ![[#PROF:]], !llvm.loop ![[#LOOP:]]
-; CHECK-LABEL: for.body.epil:
-; CHECK: br i1 [[COND2:%.*]], label %for.body.epil, label %for.end.loopexit.epilog-lcssa, !prof ![[#PROF2:]], !llvm.loop ![[#LOOP2:]]
-; CHECK: ![[#PROF]] = !{!"branch_weights", i32 1, i32 2499}
-; CHECK: ![[#PROF2]] = !{!"branch_weights", i32 1, i32 1}
+; CHECK-LABEL: entry:
+; CHECK: [[FOR_BODY_PREHEADER:.*]]:
+; CHECK: br i1 %{{.*}}, label %[[FOR_BODY_EPIL_PREHEADER:.*]], label %[[FOR_BODY_PREHEADER_NEW:.*]], !prof ![[#PROF_UR_GUARD:]]
+; CHECK: [[FOR_BODY_PREHEADER_NEW]]:
+; CHECK: br label %for.body
+; CHECK: for.body:
+; CHECK: %add = add
+; CHECK: %add.1 = add
+; CHECK: %add.2 = add
+; CHECK: %add.3 = add
+; CHECK-NOT: %add.4 = add
+; CHECK: br i1 %{{.*}}, label %[[FOR_END_LOOPEXIT_UNR_LCSSA:.*]], label %for.body, !prof ![[#PROF_UR_LATCH:]], !llvm.loop ![[#LOOP_UR_LATCH:]]
+; CHECK: [[FOR_END_LOOPEXIT_UNR_LCSSA]]:
+; CHECK: br i1 %{{.*}}, label %[[FOR_BODY_EPIL_PREHEADER]], label %[[FOR_END_LOOPEXIT:.*]], !prof ![[#PROF_RM_GUARD:]]
+; CHECK: [[FOR_BODY_EPIL_PREHEADER]]:
+; CHECK: br label %[[FOR_BODY_EPIL:.*]]
+; CHECK: [[FOR_BODY_EPIL]]:
+; CHECK: br i1 {{.*}}, label %[[FOR_BODY_EPIL]], label %[[FOR_END_LOOPEXIT_EPILOG_LCSSA:.*]], !prof ![[#PROF_RM_LATCH:]], !llvm.loop ![[#LOOP_RM_LATCH:]]
define i3 @test(ptr %a, i3 %n) {
entry:
@@ -31,3 +43,37 @@ for.end:
}
!0 = !{!"branch_weights", i32 1, i32 9999}
+
+; Original loop probability: p = 9999/(1+9999) = 0.9999
+; Original estimated trip count: (1+9999)/1 = 10000
+; Unroll count: 4
+
+; Probability of >=3 iterations after first: p^3 = 0.9970003 =~
+; 2146839468 / (644180 + 2146839468).
+; CHECK: ![[#PROF_UR_GUARD]] = !{!"branch_weights", i32 644180, i32 2146839468}
+
+; Probability of >=4 more iterations: p^4 = 0.99960006 =~
+; 2146624784 / (858864 + 2146624784).
+; CHECK: ![[#PROF_UR_LATCH]] = !{!"branch_weights", i32 858864, i32 2146624784}
+
+; 10000//4 = 2500
+; CHECK: ![[#LOOP_UR_LATCH]] = distinct !{![[#LOOP_UR_LATCH]], ![[#LOOP_UR_TC:]], ![[#DISABLE:]]}
+; CHECK: ![[#LOOP_UR_TC]] = !{!"llvm.loop.estimated_trip_count", i32 2500}
+;
+; CHECK: ![[#DISABLE]] = !{!"llvm.loop.unroll.disable"}
+
+; Probability of 1 to 3 more of 3 more remainder iterations:
+; (p-p^4)/(1-p^4) = 0.749962497 =~ 1610532724 / (1610532724 + 536950924).
+; CHECK: ![[#PROF_RM_GUARD]] = !{!"branch_weights", i32 1610532724, i32 536950924}
+
+; Frequency of first remainder iter: r1 = 1
+; Frequency of second remainder iter: r2 = r1*(p-p^3)/(1-p^3) = 0.666633331
+; Frequency of third remainder iter: r3 = r2*(p-p^2)/(1-p^2) = 0.333299999
+; Solve for loop probability that produces that frequency: f = 1/(1-p') =>
+; p' = 1-1/f = 1-1/(r1+r2+r3) = 0.499983332 =~
+; 1073706403 / (1073706403 + 1073777245).
+; CHECK: ![[#PROF_RM_LATCH]] = !{!"branch_weights", i32 1073706403, i32 1073777245}
+
+; 10000%4 = 0
+; CHECK: ![[#LOOP_RM_LATCH]] = distinct !{![[#LOOP_RM_LATCH]], ![[#LOOP_RM_TC:]], ![[#DISABLE:]]}
+; CHECK: ![[#LOOP_RM_TC]] = !{!"llvm.loop.estimated_trip_count", i32 0}
diff --git a/llvm/test/Transforms/LoopUnroll/runtime-loop.ll b/llvm/test/Transforms/LoopUnroll/runtime-loop.ll
index 492de06..ec7aba4 100644
--- a/llvm/test/Transforms/LoopUnroll/runtime-loop.ll
+++ b/llvm/test/Transforms/LoopUnroll/runtime-loop.ll
@@ -295,11 +295,12 @@ exit2.loopexit:
; COMMON-LABEL: {{^}}!0 =
; EPILOG: [[EPILOG_PROF_0]] = !{!"branch_weights", i32 1, i32 11}
-; EPILOG: [[EPILOG_PROF_1]] = !{!"branch_weights", i32 1, i32 127}
-; EPILOG: [[EPILOG_PROF_2]] = !{!"branch_weights", i32 1, i32 7}
-; EPILOG: [[EPILOG_PROF_3]] = !{!"branch_weights", i32 3, i32 1}
+; EPILOG: [[EPILOG_PROF_1]] = !{!"branch_weights", i32 326124004, i32 1821359644}
+; EPILOG: [[EPILOG_PROF_2]] = !{!"branch_weights", i32 1856428066, i32 291055582}
+; EPILOG: [[EPILOG_PROF_3]] = !{!"branch_weights", i32 1597681585, i32 549802063}
-; EPILOG: [[EPILOG_LOOP]] = distinct !{[[EPILOG_LOOP]], [[EPILOG_LOOP_1:![0-9]+]]}
+; EPILOG: [[EPILOG_LOOP]] = distinct !{[[EPILOG_LOOP]], [[EPILOG_TC:![0-9]+]], [[EPILOG_LOOP_1:![0-9]+]]}
+; EPILOG: [[EPILOG_TC]] = !{!"llvm.loop.estimated_trip_count", i32 3}
; EPILOG: [[EPILOG_LOOP_1]] = !{!"llvm.loop.unroll.disable"}
; PROLOG: [[PROLOG_PROF_0]] = !{!"branch_weights", i32 1, i32 11}
diff --git a/llvm/test/Transforms/LoopUnroll/unroll-heuristics-pgo.ll b/llvm/test/Transforms/LoopUnroll/unroll-heuristics-pgo.ll
index 611ee5f..02f5bf9 100644
--- a/llvm/test/Transforms/LoopUnroll/unroll-heuristics-pgo.ll
+++ b/llvm/test/Transforms/LoopUnroll/unroll-heuristics-pgo.ll
@@ -3,14 +3,27 @@
@known_constant = internal unnamed_addr constant [9 x i32] [i32 0, i32 -1, i32 0, i32 -1, i32 5, i32 -1, i32 0, i32 -1, i32 0], align 16
; CHECK-LABEL: @bar_prof
-; CHECK: loop:
-; CHECK: %mul = mul
-; CHECK: %mul.1 = mul
-; CHECK: %mul.2 = mul
-; CHECK: %mul.3 = mul
-; CHECK: br i1 %niter.ncmp.7, label %loop.end.unr-lcssa, label %loop, !prof [[PROF0:![0-9]+]]
-; CHECK: loop.epil:
-; CHECK: br i1 %epil.iter.cmp, label %loop.epil, label %loop.end.epilog-lcssa, !prof [[PROF1:![0-9]+]], !llvm.loop {{![0-9]+}}
+; CHECK: entry:
+; CHECK: br i1 %{{.*}}, label %[[LOOP_EPIL_PREHEADER:.*]], label %[[ENTRY_NEW:.*]], !prof ![[#PROF_UR_GUARD:]]
+; CHECK: [[ENTRY_NEW]]:
+; CHECK: br label %loop
+; CHECK: loop:
+; CHECK: %mul = mul
+; CHECK: %mul.1 = mul
+; CHECK: %mul.2 = mul
+; CHECK: %mul.3 = mul
+; CHECK: %mul.4 = mul
+; CHECK: %mul.5 = mul
+; CHECK: %mul.6 = mul
+; CHECK: %mul.7 = mul
+; CHECK-NOT: %mul.8 = mul
+; CHECK: br i1 %{{.*}}, label %[[LOOP_END_UNR_LCSSA:.*]], label %loop, !prof ![[#PROF_UR_LATCH:]], !llvm.loop ![[#LOOP_UR_LATCH:]]
+; CHECK: [[LOOP_END_UNR_LCSSA]]:
+; CHECK: br i1 %{{.*}}, label %[[LOOP_EPIL_PREHEADER]], label %loop.end, !prof ![[#PROF_RM_GUARD:]]
+; CHECK: [[LOOP_EPIL_PREHEADER]]:
+; CHECK: br label %[[LOOP_EPIL:.*]]
+; CHECK: [[LOOP_EPIL]]:
+; CHECK: br i1 %{{.*}}, label %[[LOOP_EPIL]], label %[[LOOP_END_EPILOG_LCSSA:.*]], !prof ![[#PROF_RM_LATCH:]], !llvm.loop ![[#LOOP_RM_LATCH:]]
define i32 @bar_prof(ptr noalias nocapture readonly %src, i64 %c) !prof !1 {
entry:
br label %loop
@@ -60,5 +73,38 @@ loop.end:
!1 = !{!"function_entry_count", i64 1}
!2 = !{!"branch_weights", i32 1, i32 1000}
-; CHECK: [[PROF0]] = !{!"branch_weights", i32 1, i32 124}
-; CHECK: [[PROF1]] = !{!"branch_weights", i32 3, i32 1}
+; Original loop probability: p = 1000/(1+1000) = 0.999000999
+; Original estimated trip count: (1+1000)/1 = 1001
+; Unroll count: 8
+
+; Probability of >=7 iterations after first: p^7 = 0.993027916 =~
+; 2132511214 / (14972434 + 2132511214).
+; CHECK: ![[#PROF_UR_GUARD]] = !{!"branch_weights", i32 14972434, i32 2132511214}
+
+; Probability of >=8 more iterations: p^8 = 0.99203588 =~
+; 2130380833 / (17102815 + 2130380833).
+; CHECK: ![[#PROF_UR_LATCH]] = !{!"branch_weights", i32 17102815, i32 2130380833}
+
+; 1001//8 = 125
+; CHECK: ![[#LOOP_UR_LATCH]] = distinct !{![[#LOOP_UR_LATCH]], ![[#LOOP_UR_TC:]]}
+; CHECK: ![[#LOOP_UR_TC]] = !{!"llvm.loop.estimated_trip_count", i32 125}
+
+; Probability of 1 to 7 more of 7 more remainder iterations:
+; (p-p^8)/(1-p^8) = 0.874562282 =~ 1878108210 / (1878108210 + 269375438).
+; CHECK: ![[#PROF_RM_GUARD]] = !{!"branch_weights", i32 1878108210, i32 269375438}
+
+; Frequency of first remainder iter: r1 = 1
+; Frequency of second remainder iter: r2 = r1*(p-p^7)/(1-p^7) = 0.856714143
+; Frequency of third remainder iter: r3 = r2*(p-p^6)/(1-p^6) = 0.713571429
+; Frequency of fourth remainder iter: r4 = r2*(p-p^5)/(1-p^5) = 0.570571715
+; Frequency of fifth remainder iter: r5 = r2*(p-p^4)/(1-p^4) = 0.427714858
+; Frequency of sixth remainder iter: r6 = r2*(p-p^3)/(1-p^3) = 0.285000715
+; Frequency of seventh remainder iter: r7 = r2*(p-p^2)/(1-p^2) = 0.142429143
+; Solve for loop probability that produces that frequency: f = 1/(1-p') =>
+; p' = 1-1/f = 1-1/(r1+r2+r3+r4+r5+r6+r7) = 0.749749875 =~
+; 1610075606 / (1610075606 + 537408042).
+; CHECK: ![[#PROF_RM_LATCH]] = !{!"branch_weights", i32 1610075606, i32 537408042}
+
+; Remainder estimated trip count: 1001%8 = 1
+; CHECK: ![[#LOOP_RM_LATCH]] = distinct !{![[#LOOP_RM_LATCH]], ![[#LOOP_RM_TC:]], ![[#DISABLE:]]}
+; CHECK: ![[#LOOP_RM_TC]] = !{!"llvm.loop.estimated_trip_count", i32 1}
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll
index 7f34513..68cfc65 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll
@@ -660,6 +660,114 @@ exit:
ret i32 %red
}
+
+define i32 @test_or_reduction_with_stride_2(i32 %scale, ptr %src) {
+; CHECK-LABEL: define i32 @test_or_reduction_with_stride_2(
+; CHECK-SAME: i32 [[SCALE:%.*]], ptr [[SRC:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[SCALE]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP66:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
+; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
+; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
+; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4
+; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6
+; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 8
+; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 10
+; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 12
+; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 14
+; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[OFFSET_IDX]], 16
+; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[OFFSET_IDX]], 18
+; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[OFFSET_IDX]], 20
+; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[OFFSET_IDX]], 22
+; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[OFFSET_IDX]], 24
+; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[OFFSET_IDX]], 26
+; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], 28
+; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[OFFSET_IDX]], 30
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP17:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP1]]
+; CHECK-NEXT: [[TMP18:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP2]]
+; CHECK-NEXT: [[TMP19:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP3]]
+; CHECK-NEXT: [[TMP20:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP4]]
+; CHECK-NEXT: [[TMP21:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP5]]
+; CHECK-NEXT: [[TMP22:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP6]]
+; CHECK-NEXT: [[TMP23:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP7]]
+; CHECK-NEXT: [[TMP24:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP8]]
+; CHECK-NEXT: [[TMP25:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP9]]
+; CHECK-NEXT: [[TMP26:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP10]]
+; CHECK-NEXT: [[TMP27:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP11]]
+; CHECK-NEXT: [[TMP28:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP12]]
+; CHECK-NEXT: [[TMP29:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP13]]
+; CHECK-NEXT: [[TMP30:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP14]]
+; CHECK-NEXT: [[TMP31:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP15]]
+; CHECK-NEXT: [[TMP32:%.*]] = load i8, ptr [[TMP16]], align 1
+; CHECK-NEXT: [[TMP33:%.*]] = load i8, ptr [[TMP17]], align 1
+; CHECK-NEXT: [[TMP34:%.*]] = load i8, ptr [[TMP18]], align 1
+; CHECK-NEXT: [[TMP35:%.*]] = load i8, ptr [[TMP19]], align 1
+; CHECK-NEXT: [[TMP36:%.*]] = load i8, ptr [[TMP20]], align 1
+; CHECK-NEXT: [[TMP37:%.*]] = load i8, ptr [[TMP21]], align 1
+; CHECK-NEXT: [[TMP38:%.*]] = load i8, ptr [[TMP22]], align 1
+; CHECK-NEXT: [[TMP39:%.*]] = load i8, ptr [[TMP23]], align 1
+; CHECK-NEXT: [[TMP40:%.*]] = load i8, ptr [[TMP24]], align 1
+; CHECK-NEXT: [[TMP41:%.*]] = load i8, ptr [[TMP25]], align 1
+; CHECK-NEXT: [[TMP42:%.*]] = load i8, ptr [[TMP26]], align 1
+; CHECK-NEXT: [[TMP43:%.*]] = load i8, ptr [[TMP27]], align 1
+; CHECK-NEXT: [[TMP44:%.*]] = load i8, ptr [[TMP28]], align 1
+; CHECK-NEXT: [[TMP45:%.*]] = load i8, ptr [[TMP29]], align 1
+; CHECK-NEXT: [[TMP46:%.*]] = load i8, ptr [[TMP30]], align 1
+; CHECK-NEXT: [[TMP47:%.*]] = load i8, ptr [[TMP31]], align 1
+; CHECK-NEXT: [[TMP48:%.*]] = insertelement <16 x i8> poison, i8 [[TMP32]], i32 0
+; CHECK-NEXT: [[TMP49:%.*]] = insertelement <16 x i8> [[TMP48]], i8 [[TMP33]], i32 1
+; CHECK-NEXT: [[TMP50:%.*]] = insertelement <16 x i8> [[TMP49]], i8 [[TMP34]], i32 2
+; CHECK-NEXT: [[TMP51:%.*]] = insertelement <16 x i8> [[TMP50]], i8 [[TMP35]], i32 3
+; CHECK-NEXT: [[TMP52:%.*]] = insertelement <16 x i8> [[TMP51]], i8 [[TMP36]], i32 4
+; CHECK-NEXT: [[TMP53:%.*]] = insertelement <16 x i8> [[TMP52]], i8 [[TMP37]], i32 5
+; CHECK-NEXT: [[TMP54:%.*]] = insertelement <16 x i8> [[TMP53]], i8 [[TMP38]], i32 6
+; CHECK-NEXT: [[TMP55:%.*]] = insertelement <16 x i8> [[TMP54]], i8 [[TMP39]], i32 7
+; CHECK-NEXT: [[TMP56:%.*]] = insertelement <16 x i8> [[TMP55]], i8 [[TMP40]], i32 8
+; CHECK-NEXT: [[TMP57:%.*]] = insertelement <16 x i8> [[TMP56]], i8 [[TMP41]], i32 9
+; CHECK-NEXT: [[TMP58:%.*]] = insertelement <16 x i8> [[TMP57]], i8 [[TMP42]], i32 10
+; CHECK-NEXT: [[TMP59:%.*]] = insertelement <16 x i8> [[TMP58]], i8 [[TMP43]], i32 11
+; CHECK-NEXT: [[TMP60:%.*]] = insertelement <16 x i8> [[TMP59]], i8 [[TMP44]], i32 12
+; CHECK-NEXT: [[TMP61:%.*]] = insertelement <16 x i8> [[TMP60]], i8 [[TMP45]], i32 13
+; CHECK-NEXT: [[TMP62:%.*]] = insertelement <16 x i8> [[TMP61]], i8 [[TMP46]], i32 14
+; CHECK-NEXT: [[TMP63:%.*]] = insertelement <16 x i8> [[TMP62]], i8 [[TMP47]], i32 15
+; CHECK-NEXT: [[TMP64:%.*]] = sext <16 x i8> [[TMP63]] to <16 x i32>
+; CHECK-NEXT: [[TMP65:%.*]] = mul <16 x i32> [[BROADCAST_SPLAT]], [[TMP64]]
+; CHECK-NEXT: [[TMP66]] = or <16 x i32> [[TMP65]], [[VEC_PHI]]
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
+; CHECK-NEXT: [[TMP67:%.*]] = icmp eq i64 [[INDEX_NEXT]], 48
+; CHECK-NEXT: br i1 [[TMP67]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[TMP68:%.*]] = call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> [[TMP66]])
+; CHECK-NEXT: br label %[[SCALAR_PH:.*]]
+; CHECK: [[SCALAR_PH]]:
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ]
+ %reduction = phi i32 [ %reduction.next, %loop ], [ 0, %entry ]
+ %gep = getelementptr [32 x i8], ptr %src, i64 %iv
+ %load = load i8, ptr %gep, align 1
+ %sext = sext i8 %load to i32
+ %mul = mul i32 %scale, %sext
+ %reduction.next = or i32 %mul, %reduction
+ %iv.next = add i64 %iv, 2
+ %cmp = icmp eq i64 %iv.next, 100
+ br i1 %cmp, label %exit, label %loop
+
+exit:
+ ret i32 %reduction.next
+}
+
attributes #0 = { "target-cpu"="neoverse-512tvb" }
!0 = !{!1, !2, i64 0}
diff --git a/llvm/test/Transforms/LoopVectorize/X86/uniform_load.ll b/llvm/test/Transforms/LoopVectorize/X86/uniform_load.ll
index d4004da..8081c0e 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/uniform_load.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/uniform_load.ll
@@ -64,39 +64,24 @@ exit:
define void @uniform_load_can_fold_users(ptr noalias %src, ptr %dst, i64 %start, double %d) {
; CHECK-LABEL: define void @uniform_load_can_fold_users(
; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr [[DST:%.*]], i64 [[START:%.*]], double [[D:%.*]]) {
-; CHECK-NEXT: [[ENTRY:.*:]]
-; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[START]], 1
-; CHECK-NEXT: [[SMIN:%.*]] = call i64 @llvm.smin.i64(i64 [[START]], i64 0)
-; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[SMIN]]
-; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP1]], 2
-; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], [[SCALAR_PH:label %.*]], label %[[VECTOR_PH:.*]]
-; CHECK: [[VECTOR_PH]]:
-; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP1]], 2
-; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP1]], [[N_MOD_VF]]
-; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[START]], [[N_VEC]]
-; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
-; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[TMP4:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_1_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[IV_2:%.*]] = phi i64 [ [[START]], %[[ENTRY]] ], [ [[IV_2_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[TMP5:%.*]] = load double, ptr [[SRC]], align 8
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[TMP5]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP6:%.*]] = fmul <2 x double> [[BROADCAST_SPLAT]], splat (double 9.000000e+00)
-; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[TMP6]], i32 0
+; CHECK-NEXT: [[TMP7:%.*]] = fmul double [[TMP5]], 9.000000e+00
; CHECK-NEXT: [[TMP8:%.*]] = fdiv double [[TMP7]], [[D]]
-; CHECK-NEXT: [[TMP9:%.*]] = sub i64 [[TMP3]], 1
; CHECK-NEXT: [[TMP10:%.*]] = sub i64 [[TMP4]], 1
-; CHECK-NEXT: [[TMP11:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP3]]
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP4]]
-; CHECK-NEXT: [[TMP13:%.*]] = getelementptr double, ptr [[TMP11]], i64 [[TMP9]]
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr double, ptr [[TMP12]], i64 [[TMP10]]
-; CHECK-NEXT: store double [[TMP8]], ptr [[TMP13]], align 8
; CHECK-NEXT: store double [[TMP8]], ptr [[TMP14]], align 8
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
-; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[IV_1_NEXT]] = add i64 [[TMP4]], 1
+; CHECK-NEXT: [[IV_2_NEXT]] = add i64 [[IV_2]], -1
+; CHECK-NEXT: [[EC:%.*]] = icmp sgt i64 [[IV_2]], 0
+; CHECK-NEXT: br i1 [[EC]], label %[[LOOP]], label %[[EXIT:.*]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
;
entry:
br label %loop
diff --git a/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll b/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll
index eea2237..abed18a 100644
--- a/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll
+++ b/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll
@@ -380,7 +380,6 @@ define void @multiple_uniform_stores(ptr nocapture %var1, ptr nocapture readonly
; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP8]], 8589934588
-; CHECK-NEXT: [[IND_END:%.*]] = add nuw nsw i64 [[N_VEC]], [[TMP4]]
; CHECK-NEXT: [[TMP15:%.*]] = insertelement <4 x i32> <i32 poison, i32 0, i32 0, i32 0>, i32 [[ARRAYIDX5_PROMOTED]], i64 0
; CHECK-NEXT: [[INVARIANT_GEP:%.*]] = getelementptr i32, ptr [[VAR2]], i64 [[TMP4]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -396,6 +395,7 @@ define void @multiple_uniform_stores(ptr nocapture %var1, ptr nocapture readonly
; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi <4 x i32> [ [[TMP17]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[IND_END:%.*]] = add nuw nsw i64 [[N_VEC]], [[TMP4]]
; CHECK-NEXT: [[TMP19:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[DOTLCSSA]])
; CHECK-NEXT: store i32 [[TMP19]], ptr [[ARRAYIDX5]], align 4, !alias.scope [[META27:![0-9]+]], !noalias [[META23]]
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP8]], [[N_VEC]]
diff --git a/llvm/test/Transforms/LoopVectorize/single-scalar-cast-minbw.ll b/llvm/test/Transforms/LoopVectorize/single-scalar-cast-minbw.ll
index 9a69982..70adac2 100644
--- a/llvm/test/Transforms/LoopVectorize/single-scalar-cast-minbw.ll
+++ b/llvm/test/Transforms/LoopVectorize/single-scalar-cast-minbw.ll
@@ -84,12 +84,8 @@ define void @single_scalar_cast_stored(ptr %src, ptr %dst, i32 %n) {
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[SRC]], align 2, !alias.scope [[META4:![0-9]+]]
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i16> poison, i16 [[TMP0]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT]], <4 x i16> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <4 x i16> [[BROADCAST_SPLAT]], zeroinitializer
-; CHECK-NEXT: [[TMP2:%.*]] = and <4 x i16> [[BROADCAST_SPLAT]], splat (i16 15)
-; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0
-; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i16> [[TMP2]], i32 0
+; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i16 [[TMP0]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = and i16 [[TMP0]], 15
; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[TMP3]], i16 0, i16 [[TMP4]]
; CHECK-NEXT: store i16 [[TMP5]], ptr [[DST]], align 2, !alias.scope [[META7:![0-9]+]], !noalias [[META4]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll
index 291ada8..ef678ff 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll
@@ -804,9 +804,9 @@ exit:
define i32 @print_mulacc_extended_const(ptr %start, ptr %end) {
; CHECK-LABEL: 'print_mulacc_extended_const'
; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' {
-; CHECK-NEXT: Live-in vp<%0> = VF
-; CHECK-NEXT: Live-in vp<%1> = VF * UF
-; CHECK-NEXT: Live-in vp<%2> = vector-trip-count
+; CHECK-NEXT: Live-in vp<[[VF:%.+]]> = VF
+; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF
+; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count
; CHECK-NEXT: vp<%3> = original trip-count
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<entry>:
@@ -814,107 +814,84 @@ define i32 @print_mulacc_extended_const(ptr %start, ptr %end) {
; CHECK-NEXT: Successor(s): scalar.ph, vector.ph
; CHECK-EMPTY:
; CHECK-NEXT: vector.ph:
-; CHECK-NEXT: vp<%4> = DERIVED-IV ir<%start> + vp<%2> * ir<1>
-; CHECK-NEXT: EMIT vp<%5> = reduction-start-vector ir<0>, ir<0>, ir<1>
+; CHECK-NEXT: vp<[[DER_IV:%.+]]> = DERIVED-IV ir<%start> + vp<[[VTC]]> * ir<1>
+; CHECK-NEXT: EMIT vp<[[RDX_START:%.+]]> = reduction-start-vector ir<0>, ir<0>, ir<1>
; CHECK-NEXT: Successor(s): vector loop
; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
-; CHECK-NEXT: EMIT vp<%6> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
-; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%red> = phi vp<%5>, vp<%9>
-; CHECK-NEXT: vp<%7> = SCALAR-STEPS vp<%6>, ir<1>, vp<%0>
-; CHECK-NEXT: EMIT vp<%next.gep> = ptradd ir<%start>, vp<%7>
-; CHECK-NEXT: vp<%8> = vector-pointer vp<%next.gep>
-; CHECK-NEXT: WIDEN ir<%l> = load vp<%8>
-; CHECK-NEXT: EXPRESSION vp<%9> = ir<%red> + reduce.add (mul (ir<%l> zext to i32), (ir<63> zext to i32))
-; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<%6>, vp<%1>
-; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<%2>
+; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[IV_NEXT:%.+]]>
+; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<[[RDX:%.+]]> = phi vp<[[RDX_START]]>, vp<[[RDX_NEXT:%.+]]>
+; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]]>
+; CHECK-NEXT: EMIT vp<%next.gep> = ptradd ir<%start>, vp<[[STEPS]]>
+; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer vp<%next.gep>
+; CHECK-NEXT: WIDEN ir<%l> = load vp<[[VEC_PTR]]>
+; CHECK-NEXT: EXPRESSION vp<[[RDX_NEXT]]> = ir<[[RDX]]> + reduce.add (mul (ir<%l> zext to i32), (ir<63> zext to i32))
+; CHECK-NEXT: EMIT vp<[[IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]>
+; CHECK-NEXT: EMIT branch-on-count vp<[[IV_NEXT]]>, vp<[[VTC]]>
; CHECK-NEXT: No successors
; CHECK-NEXT: }
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
-; CHECK-NEXT: EMIT vp<%11> = compute-reduction-result ir<%red>, vp<%9>
-; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq vp<%3>, vp<%2>
+; CHECK-NEXT: EMIT vp<%11> = compute-reduction-result ir<[[RDX]]>, vp<[[RDX_NEXT]]>
+; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq vp<%3>, vp<[[VTC]]>
; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n>
-; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph
-; CHECK-EMPTY:
-; CHECK-NEXT: ir-bb<exit>:
-; CHECK-NEXT: IR %red.next.lcssa = phi i32 [ %red.next, %loop ] (extra operand: vp<%11> from middle.block)
-; CHECK-NEXT: No successors
-; CHECK-EMPTY:
-; CHECK-NEXT: scalar.ph:
-; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<%4>, middle.block ], [ ir<%start>, ir-bb<entry> ]
-; CHECK-NEXT: EMIT-SCALAR vp<%bc.merge.rdx> = phi [ vp<%11>, middle.block ], [ ir<0>, ir-bb<entry> ]
-; CHECK-NEXT: Successor(s): ir-bb<loop>
-; CHECK-EMPTY:
-; CHECK-NEXT: ir-bb<loop>:
-; CHECK-NEXT: IR %ptr.iv = phi ptr [ %start, %entry ], [ %gep.iv.next, %loop ] (extra operand: vp<%bc.resume.val> from scalar.ph)
-; CHECK-NEXT: IR %red = phi i32 [ 0, %entry ], [ %red.next, %loop ] (extra operand: vp<%bc.merge.rdx> from scalar.ph)
-; CHECK-NEXT: IR %l = load i8, ptr %ptr.iv, align 1
-; CHECK-NEXT: IR %l.ext = zext i8 %l to i32
-; CHECK-NEXT: IR %mul = mul i32 %l.ext, 63
-; CHECK-NEXT: IR %red.next = add i32 %red, %mul
-; CHECK-NEXT: IR %gep.iv.next = getelementptr i8, ptr %ptr.iv, i64 1
-; CHECK-NEXT: IR %ec = icmp eq ptr %ptr.iv, %end
-; CHECK-NEXT: No successors
-; CHECK-NEXT: }
-; CHECK: VPlan 'Final VPlan for VF={4},UF={1}' {
-; CHECK-NEXT: Live-in ir<%1> = original trip-count
+entry:
+ br label %loop
+
+loop:
+ %ptr.iv = phi ptr [ %start, %entry ], [ %gep.iv.next, %loop ]
+ %red = phi i32 [ 0, %entry ], [ %red.next, %loop ]
+ %l = load i8, ptr %ptr.iv, align 1
+ %l.ext = zext i8 %l to i32
+ %mul = mul i32 %l.ext, 63
+ %red.next = add i32 %red, %mul
+ %gep.iv.next = getelementptr i8, ptr %ptr.iv, i64 1
+ %ec = icmp eq ptr %ptr.iv, %end
+ br i1 %ec, label %exit, label %loop
+
+exit:
+ ret i32 %red.next
+}
+
+define i32 @print_mulacc_extended_const_lhs(ptr %start, ptr %end) {
+; CHECK-LABEL: 'print_mulacc_extended_const_lhs'
+; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' {
+; CHECK-NEXT: Live-in vp<[[VF:%.+]]> = VF
+; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF
+; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count
+; CHECK-NEXT: vp<%3> = original trip-count
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<entry>:
-; CHECK-NEXT: IR %start2 = ptrtoint ptr %start to i64
-; CHECK-NEXT: IR %end1 = ptrtoint ptr %end to i64
-; CHECK-NEXT: IR %0 = add i64 %end1, 1
-; CHECK-NEXT: IR %1 = sub i64 %0, %start2
-; CHECK-NEXT: EMIT vp<%min.iters.check> = icmp ult ir<%1>, ir<4>
-; CHECK-NEXT: EMIT branch-on-cond vp<%min.iters.check>
-; CHECK-NEXT: Successor(s): ir-bb<scalar.ph>, vector.ph
+; CHECK-NEXT: EMIT vp<%3> = EXPAND SCEV (1 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64))
+; CHECK-NEXT: Successor(s): scalar.ph, vector.ph
; CHECK-EMPTY:
; CHECK-NEXT: vector.ph:
-; CHECK-NEXT: EMIT vp<%n.mod.vf> = urem ir<%1>, ir<4>
-; CHECK-NEXT: EMIT vp<%n.vec> = sub ir<%1>, vp<%n.mod.vf>
-; CHECK-NEXT: vp<%3> = DERIVED-IV ir<%start> + vp<%n.vec> * ir<1>
-; CHECK-NEXT: Successor(s): vector.body
-; CHECK-EMPTY:
-; CHECK-NEXT: vector.body:
-; CHECK-NEXT: EMIT-SCALAR vp<%index> = phi [ ir<0>, vector.ph ], [ vp<%index.next>, vector.body ]
-; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%red> = phi ir<0>, ir<%red.next>
-; CHECK-NEXT: EMIT vp<%next.gep> = ptradd ir<%start>, vp<%index>
-; CHECK-NEXT: WIDEN ir<%l> = load vp<%next.gep>
-; CHECK-NEXT: WIDEN-CAST ir<%l.ext> = zext ir<%l> to i32
-; CHECK-NEXT: WIDEN ir<%mul> = mul ir<%l.ext>, ir<63>
-; CHECK-NEXT: REDUCE ir<%red.next> = ir<%red> + reduce.add (ir<%mul>)
-; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<%index>, ir<4>
-; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<%n.vec>
-; CHECK-NEXT: Successor(s): middle.block, vector.body
+; CHECK-NEXT: vp<[[DER_IV:%.+]]> = DERIVED-IV ir<%start> + vp<[[VTC]]> * ir<1>
+; CHECK-NEXT: EMIT vp<[[RDX_START:%.+]]> = reduction-start-vector ir<0>, ir<0>, ir<1>
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
+; CHECK-NEXT: <x1> vector loop: {
+; CHECK-NEXT: vector.body:
+; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[IV_NEXT:%.+]]>
+; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<[[RDX:%.+]]> = phi vp<[[RDX_START]]>, vp<[[RDX_NEXT:%.+]]>
+; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]]>
+; CHECK-NEXT: EMIT vp<%next.gep> = ptradd ir<%start>, vp<[[STEPS]]>
+; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer vp<%next.gep>
+; CHECK-NEXT: WIDEN ir<%l> = load vp<[[VEC_PTR]]>
+; CHECK-NEXT: WIDEN-CAST ir<%l.ext> = zext ir<%l> to i32
+; CHECK-NEXT: EXPRESSION vp<[[RDX_NEXT]]> = ir<[[RDX]]> + reduce.add (mul ir<63>, ir<%l.ext>)
+; CHECK-NEXT: EMIT vp<[[IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]>
+; CHECK-NEXT: EMIT branch-on-count vp<[[IV_NEXT]]>, vp<[[VTC]]>
+; CHECK-NEXT: No successors
+; CHECK-NEXT: }
+; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
-; CHECK-NEXT: EMIT vp<%5> = compute-reduction-result ir<%red>, ir<%red.next>
-; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<%1>, vp<%n.vec>
+; CHECK-NEXT: EMIT vp<%11> = compute-reduction-result ir<[[RDX]]>, vp<[[RDX_NEXT]]>
+; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq vp<%3>, vp<[[VTC]]>
; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n>
-; CHECK-NEXT: Successor(s): ir-bb<exit>, ir-bb<scalar.ph>
-; CHECK-EMPTY:
-; CHECK-NEXT: ir-bb<exit>:
-; CHECK-NEXT: IR %red.next.lcssa = phi i32 [ %red.next, %loop ] (extra operand: vp<%5> from middle.block)
-; CHECK-NEXT: No successors
-; CHECK-EMPTY:
-; CHECK-NEXT: ir-bb<scalar.ph>:
-; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<%3>, middle.block ], [ ir<%start>, ir-bb<entry> ]
-; CHECK-NEXT: EMIT-SCALAR vp<%bc.merge.rdx> = phi [ vp<%5>, middle.block ], [ ir<0>, ir-bb<entry> ]
-; CHECK-NEXT: Successor(s): ir-bb<loop>
-; CHECK-EMPTY:
-; CHECK-NEXT: ir-bb<loop>:
-; CHECK-NEXT: IR %ptr.iv = phi ptr [ %start, %scalar.ph ], [ %gep.iv.next, %loop ] (extra operand: vp<%bc.resume.val> from ir-bb<scalar.ph>)
-; CHECK-NEXT: IR %red = phi i32 [ 0, %scalar.ph ], [ %red.next, %loop ] (extra operand: vp<%bc.merge.rdx> from ir-bb<scalar.ph>)
-; CHECK-NEXT: IR %l = load i8, ptr %ptr.iv, align 1
-; CHECK-NEXT: IR %l.ext = zext i8 %l to i32
-; CHECK-NEXT: IR %mul = mul i32 %l.ext, 63
-; CHECK-NEXT: IR %red.next = add i32 %red, %mul
-; CHECK-NEXT: IR %gep.iv.next = getelementptr i8, ptr %ptr.iv, i64 1
-; CHECK-NEXT: IR %ec = icmp eq ptr %ptr.iv, %end
-; CHECK-NEXT: No successors
-; CHECK-NEXT: }
entry:
br label %loop
@@ -923,7 +900,7 @@ loop:
%red = phi i32 [ 0, %entry ], [ %red.next, %loop ]
%l = load i8, ptr %ptr.iv, align 1
%l.ext = zext i8 %l to i32
- %mul = mul i32 %l.ext, 63
+ %mul = mul i32 63, %l.ext
%red.next = add i32 %red, %mul
%gep.iv.next = getelementptr i8, ptr %ptr.iv, i64 1
%ec = icmp eq ptr %ptr.iv, %end
@@ -937,9 +914,9 @@ exit:
define i32 @print_mulacc_not_extended_const(ptr %start, ptr %end) {
; CHECK-LABEL: 'print_mulacc_not_extended_const'
; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' {
-; CHECK-NEXT: Live-in vp<%0> = VF
-; CHECK-NEXT: Live-in vp<%1> = VF * UF
-; CHECK-NEXT: Live-in vp<%2> = vector-trip-count
+; CHECK-NEXT: Live-in vp<[[VF:%.+]]> = VF
+; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF
+; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count
; CHECK-NEXT: vp<%3> = original trip-count
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<entry>:
@@ -947,108 +924,30 @@ define i32 @print_mulacc_not_extended_const(ptr %start, ptr %end) {
; CHECK-NEXT: Successor(s): scalar.ph, vector.ph
; CHECK-EMPTY:
; CHECK-NEXT: vector.ph:
-; CHECK-NEXT: vp<%4> = DERIVED-IV ir<%start> + vp<%2> * ir<1>
-; CHECK-NEXT: EMIT vp<%5> = reduction-start-vector ir<0>, ir<0>, ir<1>
+; CHECK-NEXT: vp<[[DER_IV:%.+]]> = DERIVED-IV ir<%start> + vp<[[VTC]]> * ir<1>
+; CHECK-NEXT: EMIT vp<[[RDX_START:%.+]]> = reduction-start-vector ir<0>, ir<0>, ir<1>
; CHECK-NEXT: Successor(s): vector loop
; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
-; CHECK-NEXT: EMIT vp<%6> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
-; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%red> = phi vp<%5>, vp<%9>
-; CHECK-NEXT: vp<%7> = SCALAR-STEPS vp<%6>, ir<1>, vp<%0>
-; CHECK-NEXT: EMIT vp<%next.gep> = ptradd ir<%start>, vp<%7>
-; CHECK-NEXT: vp<%8> = vector-pointer vp<%next.gep>
-; CHECK-NEXT: WIDEN ir<%l> = load vp<%8>
+; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[IV_NEXT:%.+]]>
+; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<[[RDX:%.+]]> = phi vp<[[RDX_START]]>, vp<[[RDX_NEXT:%.+]]>
+; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]]>
+; CHECK-NEXT: EMIT vp<%next.gep> = ptradd ir<%start>, vp<[[STEPS]]>
+; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer vp<%next.gep>
+; CHECK-NEXT: WIDEN ir<%l> = load vp<[[VEC_PTR]]>
; CHECK-NEXT: WIDEN-CAST ir<%l.ext> = sext ir<%l> to i32
-; CHECK-NEXT: EXPRESSION vp<%9> = ir<%red> + reduce.add (mul ir<%l.ext>, ir<128>)
-; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<%6>, vp<%1>
-; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<%2>
+; CHECK-NEXT: EXPRESSION vp<[[RDX_NEXT]]> = ir<[[RDX]]> + reduce.add (mul ir<%l.ext>, ir<128>)
+; CHECK-NEXT: EMIT vp<[[IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]>
+; CHECK-NEXT: EMIT branch-on-count vp<[[IV_NEXT]]>, vp<[[VTC]]>
; CHECK-NEXT: No successors
; CHECK-NEXT: }
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
-; CHECK-NEXT: EMIT vp<%11> = compute-reduction-result ir<%red>, vp<%9>
-; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq vp<%3>, vp<%2>
+; CHECK-NEXT: EMIT vp<%11> = compute-reduction-result ir<[[RDX:%.+]]>, vp<[[RDX_NEXT]]>
+; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq vp<%3>, vp<[[VTC]]>
; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n>
-; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph
-; CHECK-EMPTY:
-; CHECK-NEXT: ir-bb<exit>:
-; CHECK-NEXT: IR %red.next.lcssa = phi i32 [ %red.next, %loop ] (extra operand: vp<%11> from middle.block)
-; CHECK-NEXT: No successors
-; CHECK-EMPTY:
-; CHECK-NEXT: scalar.ph:
-; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<%4>, middle.block ], [ ir<%start>, ir-bb<entry> ]
-; CHECK-NEXT: EMIT-SCALAR vp<%bc.merge.rdx> = phi [ vp<%11>, middle.block ], [ ir<0>, ir-bb<entry> ]
-; CHECK-NEXT: Successor(s): ir-bb<loop>
-; CHECK-EMPTY:
-; CHECK-NEXT: ir-bb<loop>:
-; CHECK-NEXT: IR %ptr.iv = phi ptr [ %start, %entry ], [ %gep.iv.next, %loop ] (extra operand: vp<%bc.resume.val> from scalar.ph)
-; CHECK-NEXT: IR %red = phi i32 [ 0, %entry ], [ %red.next, %loop ] (extra operand: vp<%bc.merge.rdx> from scalar.ph)
-; CHECK-NEXT: IR %l = load i8, ptr %ptr.iv, align 1
-; CHECK-NEXT: IR %l.ext = sext i8 %l to i32
-; CHECK-NEXT: IR %mul = mul i32 %l.ext, 128
-; CHECK-NEXT: IR %red.next = add i32 %red, %mul
-; CHECK-NEXT: IR %gep.iv.next = getelementptr i8, ptr %ptr.iv, i64 1
-; CHECK-NEXT: IR %ec = icmp eq ptr %ptr.iv, %end
-; CHECK-NEXT: No successors
-; CHECK-NEXT: }
-; CHECK: VPlan 'Final VPlan for VF={4},UF={1}' {
-; CHECK-NEXT: Live-in ir<%1> = original trip-count
-; CHECK-EMPTY:
-; CHECK-NEXT: ir-bb<entry>:
-; CHECK-NEXT: IR %start2 = ptrtoint ptr %start to i64
-; CHECK-NEXT: IR %end1 = ptrtoint ptr %end to i64
-; CHECK-NEXT: IR %0 = add i64 %end1, 1
-; CHECK-NEXT: IR %1 = sub i64 %0, %start2
-; CHECK-NEXT: EMIT vp<%min.iters.check> = icmp ult ir<%1>, ir<4>
-; CHECK-NEXT: EMIT branch-on-cond vp<%min.iters.check>
-; CHECK-NEXT: Successor(s): ir-bb<scalar.ph>, vector.ph
-; CHECK-EMPTY:
-; CHECK-NEXT: vector.ph:
-; CHECK-NEXT: EMIT vp<%n.mod.vf> = urem ir<%1>, ir<4>
-; CHECK-NEXT: EMIT vp<%n.vec> = sub ir<%1>, vp<%n.mod.vf>
-; CHECK-NEXT: vp<%3> = DERIVED-IV ir<%start> + vp<%n.vec> * ir<1>
-; CHECK-NEXT: Successor(s): vector.body
-; CHECK-EMPTY:
-; CHECK-NEXT: vector.body:
-; CHECK-NEXT: EMIT-SCALAR vp<%index> = phi [ ir<0>, vector.ph ], [ vp<%index.next>, vector.body ]
-; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%red> = phi ir<0>, ir<%red.next>
-; CHECK-NEXT: EMIT vp<%next.gep> = ptradd ir<%start>, vp<%index>
-; CHECK-NEXT: WIDEN ir<%l> = load vp<%next.gep>
-; CHECK-NEXT: WIDEN-CAST ir<%l.ext> = sext ir<%l> to i32
-; CHECK-NEXT: WIDEN ir<%mul> = mul ir<%l.ext>, ir<128>
-; CHECK-NEXT: REDUCE ir<%red.next> = ir<%red> + reduce.add (ir<%mul>)
-; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<%index>, ir<4>
-; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<%n.vec>
-; CHECK-NEXT: Successor(s): middle.block, vector.body
-; CHECK-EMPTY:
-; CHECK-NEXT: middle.block:
-; CHECK-NEXT: EMIT vp<%5> = compute-reduction-result ir<%red>, ir<%red.next>
-; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<%1>, vp<%n.vec>
-; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n>
-; CHECK-NEXT: Successor(s): ir-bb<exit>, ir-bb<scalar.ph>
-; CHECK-EMPTY:
-; CHECK-NEXT: ir-bb<exit>:
-; CHECK-NEXT: IR %red.next.lcssa = phi i32 [ %red.next, %loop ] (extra operand: vp<%5> from middle.block)
-; CHECK-NEXT: No successors
-; CHECK-EMPTY:
-; CHECK-NEXT: ir-bb<scalar.ph>:
-; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<%3>, middle.block ], [ ir<%start>, ir-bb<entry> ]
-; CHECK-NEXT: EMIT-SCALAR vp<%bc.merge.rdx> = phi [ vp<%5>, middle.block ], [ ir<0>, ir-bb<entry> ]
-; CHECK-NEXT: Successor(s): ir-bb<loop>
-; CHECK-EMPTY:
-; CHECK-NEXT: ir-bb<loop>:
-; CHECK-NEXT: IR %ptr.iv = phi ptr [ %start, %scalar.ph ], [ %gep.iv.next, %loop ] (extra operand: vp<%bc.resume.val> from ir-bb<scalar.ph>)
-; CHECK-NEXT: IR %red = phi i32 [ 0, %scalar.ph ], [ %red.next, %loop ] (extra operand: vp<%bc.merge.rdx> from ir-bb<scalar.ph>)
-; CHECK-NEXT: IR %l = load i8, ptr %ptr.iv, align 1
-; CHECK-NEXT: IR %l.ext = sext i8 %l to i32
-; CHECK-NEXT: IR %mul = mul i32 %l.ext, 128
-; CHECK-NEXT: IR %red.next = add i32 %red, %mul
-; CHECK-NEXT: IR %gep.iv.next = getelementptr i8, ptr %ptr.iv, i64 1
-; CHECK-NEXT: IR %ec = icmp eq ptr %ptr.iv, %end
-; CHECK-NEXT: No successors
-; CHECK-NEXT: }
entry:
br label %loop
@@ -1071,9 +970,9 @@ exit:
define i64 @print_ext_mulacc_extended_const(ptr %start, ptr %end) {
; CHECK-LABEL: 'print_ext_mulacc_extended_const'
; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' {
-; CHECK-NEXT: Live-in vp<%0> = VF
-; CHECK-NEXT: Live-in vp<%1> = VF * UF
-; CHECK-NEXT: Live-in vp<%2> = vector-trip-count
+; CHECK-NEXT: Live-in vp<[[VF:%.+]]> = VF
+; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF
+; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count
; CHECK-NEXT: vp<%3> = original trip-count
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<entry>:
@@ -1081,109 +980,29 @@ define i64 @print_ext_mulacc_extended_const(ptr %start, ptr %end) {
; CHECK-NEXT: Successor(s): scalar.ph, vector.ph
; CHECK-EMPTY:
; CHECK-NEXT: vector.ph:
-; CHECK-NEXT: vp<%4> = DERIVED-IV ir<%start> + vp<%2> * ir<1>
-; CHECK-NEXT: EMIT vp<%5> = reduction-start-vector ir<0>, ir<0>, ir<1>
+; CHECK-NEXT: vp<[[DER_IV:%.+]]> = DERIVED-IV ir<%start> + vp<[[VTC]]> * ir<1>
+; CHECK-NEXT: EMIT vp<[[RDX_START:%.+]]> = reduction-start-vector ir<0>, ir<0>, ir<1>
; CHECK-NEXT: Successor(s): vector loop
; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
-; CHECK-NEXT: EMIT vp<%6> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
-; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%red> = phi vp<%5>, vp<%9>
-; CHECK-NEXT: vp<%7> = SCALAR-STEPS vp<%6>, ir<1>, vp<%0>
-; CHECK-NEXT: EMIT vp<%next.gep> = ptradd ir<%start>, vp<%7>
-; CHECK-NEXT: vp<%8> = vector-pointer vp<%next.gep>
-; CHECK-NEXT: WIDEN ir<%l> = load vp<%8>
-; CHECK-NEXT: EXPRESSION vp<%9> = ir<%red> + reduce.add (mul (ir<%l> zext to i64), (ir<63> zext to i64))
-; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<%6>, vp<%1>
-; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<%2>
+; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[IV_NEXT:%.+]]>
+; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<[[RDX:%.+]]> = phi vp<[[RDX_START]]>, vp<[[RDX_NEXT:%.+]]>
+; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]]>
+; CHECK-NEXT: EMIT vp<%next.gep> = ptradd ir<%start>, vp<[[STEPS]]>
+; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer vp<%next.gep>
+; CHECK-NEXT: WIDEN ir<%l> = load vp<[[VEC_PTR]]>
+; CHECK-NEXT: EXPRESSION vp<[[RDX_NEXT]]> = ir<[[RDX]]> + reduce.add (mul (ir<%l> zext to i64), (ir<63> zext to i64))
+; CHECK-NEXT: EMIT vp<[[IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]>
+; CHECK-NEXT: EMIT branch-on-count vp<[[IV_NEXT]]>, vp<[[VTC]]>
; CHECK-NEXT: No successors
; CHECK-NEXT: }
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
-; CHECK-NEXT: EMIT vp<%11> = compute-reduction-result ir<%red>, vp<%9>
-; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq vp<%3>, vp<%2>
+; CHECK-NEXT: EMIT vp<%11> = compute-reduction-result ir<[[RDX]]>, vp<[[RDX_NEXT]]>
+; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq vp<%3>, vp<[[VTC]]>
; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n>
-; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph
-; CHECK-EMPTY:
-; CHECK-NEXT: ir-bb<exit>:
-; CHECK-NEXT: IR %red.next.lcssa = phi i64 [ %red.next, %loop ] (extra operand: vp<%11> from middle.block)
-; CHECK-NEXT: No successors
-; CHECK-EMPTY:
-; CHECK-NEXT: scalar.ph:
-; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<%4>, middle.block ], [ ir<%start>, ir-bb<entry> ]
-; CHECK-NEXT: EMIT-SCALAR vp<%bc.merge.rdx> = phi [ vp<%11>, middle.block ], [ ir<0>, ir-bb<entry> ]
-; CHECK-NEXT: Successor(s): ir-bb<loop>
-; CHECK-EMPTY:
-; CHECK-NEXT: ir-bb<loop>:
-; CHECK-NEXT: IR %ptr.iv = phi ptr [ %start, %entry ], [ %gep.iv.next, %loop ] (extra operand: vp<%bc.resume.val> from scalar.ph)
-; CHECK-NEXT: IR %red = phi i64 [ 0, %entry ], [ %red.next, %loop ] (extra operand: vp<%bc.merge.rdx> from scalar.ph)
-; CHECK-NEXT: IR %l = load i8, ptr %ptr.iv, align 1
-; CHECK-NEXT: IR %l.ext = zext i8 %l to i32
-; CHECK-NEXT: IR %mul = mul i32 %l.ext, 63
-; CHECK-NEXT: IR %mul.ext = zext i32 %mul to i64
-; CHECK-NEXT: IR %red.next = add i64 %red, %mul.ext
-; CHECK-NEXT: IR %gep.iv.next = getelementptr i8, ptr %ptr.iv, i64 1
-; CHECK-NEXT: IR %ec = icmp eq ptr %ptr.iv, %end
-; CHECK-NEXT: No successors
-; CHECK-NEXT: }
-; CHECK: VPlan 'Final VPlan for VF={4},UF={1}' {
-; CHECK-NEXT: Live-in ir<%1> = original trip-count
-; CHECK-EMPTY:
-; CHECK-NEXT: ir-bb<entry>:
-; CHECK-NEXT: IR %start2 = ptrtoint ptr %start to i64
-; CHECK-NEXT: IR %end1 = ptrtoint ptr %end to i64
-; CHECK-NEXT: IR %0 = add i64 %end1, 1
-; CHECK-NEXT: IR %1 = sub i64 %0, %start2
-; CHECK-NEXT: EMIT vp<%min.iters.check> = icmp ult ir<%1>, ir<4>
-; CHECK-NEXT: EMIT branch-on-cond vp<%min.iters.check>
-; CHECK-NEXT: Successor(s): ir-bb<scalar.ph>, vector.ph
-; CHECK-EMPTY:
-; CHECK-NEXT: vector.ph:
-; CHECK-NEXT: EMIT vp<%n.mod.vf> = urem ir<%1>, ir<4>
-; CHECK-NEXT: EMIT vp<%n.vec> = sub ir<%1>, vp<%n.mod.vf>
-; CHECK-NEXT: vp<%3> = DERIVED-IV ir<%start> + vp<%n.vec> * ir<1>
-; CHECK-NEXT: Successor(s): vector.body
-; CHECK-EMPTY:
-; CHECK-NEXT: vector.body:
-; CHECK-NEXT: EMIT-SCALAR vp<%index> = phi [ ir<0>, vector.ph ], [ vp<%index.next>, vector.body ]
-; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%red> = phi ir<0>, ir<%red.next>
-; CHECK-NEXT: EMIT vp<%next.gep> = ptradd ir<%start>, vp<%index>
-; CHECK-NEXT: WIDEN ir<%l> = load vp<%next.gep>
-; CHECK-NEXT: WIDEN-CAST vp<%4> = zext ir<%l> to i64
-; CHECK-NEXT: WIDEN ir<%mul> = mul vp<%4>, ir<63>
-; CHECK-NEXT: REDUCE ir<%red.next> = ir<%red> + reduce.add (ir<%mul>)
-; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<%index>, ir<4>
-; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<%n.vec>
-; CHECK-NEXT: Successor(s): middle.block, vector.body
-; CHECK-EMPTY:
-; CHECK-NEXT: middle.block:
-; CHECK-NEXT: EMIT vp<%6> = compute-reduction-result ir<%red>, ir<%red.next>
-; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<%1>, vp<%n.vec>
-; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n>
-; CHECK-NEXT: Successor(s): ir-bb<exit>, ir-bb<scalar.ph>
-; CHECK-EMPTY:
-; CHECK-NEXT: ir-bb<exit>:
-; CHECK-NEXT: IR %red.next.lcssa = phi i64 [ %red.next, %loop ] (extra operand: vp<%6> from middle.block)
-; CHECK-NEXT: No successors
-; CHECK-EMPTY:
-; CHECK-NEXT: ir-bb<scalar.ph>:
-; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<%3>, middle.block ], [ ir<%start>, ir-bb<entry> ]
-; CHECK-NEXT: EMIT-SCALAR vp<%bc.merge.rdx> = phi [ vp<%6>, middle.block ], [ ir<0>, ir-bb<entry> ]
-; CHECK-NEXT: Successor(s): ir-bb<loop>
-; CHECK-EMPTY:
-; CHECK-NEXT: ir-bb<loop>:
-; CHECK-NEXT: IR %ptr.iv = phi ptr [ %start, %scalar.ph ], [ %gep.iv.next, %loop ] (extra operand: vp<%bc.resume.val> from ir-bb<scalar.ph>)
-; CHECK-NEXT: IR %red = phi i64 [ 0, %scalar.ph ], [ %red.next, %loop ] (extra operand: vp<%bc.merge.rdx> from ir-bb<scalar.ph>)
-; CHECK-NEXT: IR %l = load i8, ptr %ptr.iv, align 1
-; CHECK-NEXT: IR %l.ext = zext i8 %l to i32
-; CHECK-NEXT: IR %mul = mul i32 %l.ext, 63
-; CHECK-NEXT: IR %mul.ext = zext i32 %mul to i64
-; CHECK-NEXT: IR %red.next = add i64 %red, %mul.ext
-; CHECK-NEXT: IR %gep.iv.next = getelementptr i8, ptr %ptr.iv, i64 1
-; CHECK-NEXT: IR %ec = icmp eq ptr %ptr.iv, %end
-; CHECK-NEXT: No successors
-; CHECK-NEXT: }
entry:
br label %loop
@@ -1207,9 +1026,9 @@ exit:
define i64 @print_ext_mulacc_not_extended_const(ptr %start, ptr %end) {
; CHECK-LABEL: 'print_ext_mulacc_not_extended_const'
; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' {
-; CHECK-NEXT: Live-in vp<%0> = VF
-; CHECK-NEXT: Live-in vp<%1> = VF * UF
-; CHECK-NEXT: Live-in vp<%2> = vector-trip-count
+; CHECK-NEXT: Live-in vp<[[VF:%.+]]> = VF
+; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF
+; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count
; CHECK-NEXT: vp<%3> = original trip-count
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<entry>:
@@ -1217,112 +1036,31 @@ define i64 @print_ext_mulacc_not_extended_const(ptr %start, ptr %end) {
; CHECK-NEXT: Successor(s): scalar.ph, vector.ph
; CHECK-EMPTY:
; CHECK-NEXT: vector.ph:
-; CHECK-NEXT: vp<%4> = DERIVED-IV ir<%start> + vp<%2> * ir<1>
-; CHECK-NEXT: EMIT vp<%5> = reduction-start-vector ir<0>, ir<0>, ir<1>
+; CHECK-NEXT: vp<[[DER_IV:%.+]]> = DERIVED-IV ir<%start> + vp<[[VTC]]> * ir<1>
+; CHECK-NEXT: EMIT vp<[[RDX_START:%.+]]> = reduction-start-vector ir<0>, ir<0>, ir<1>
; CHECK-NEXT: Successor(s): vector loop
; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
-; CHECK-NEXT: EMIT vp<%6> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
-; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%red> = phi vp<%5>, vp<%9>
-; CHECK-NEXT: vp<%7> = SCALAR-STEPS vp<%6>, ir<1>, vp<%0>
-; CHECK-NEXT: EMIT vp<%next.gep> = ptradd ir<%start>, vp<%7>
-; CHECK-NEXT: vp<%8> = vector-pointer vp<%next.gep>
-; CHECK-NEXT: WIDEN ir<%l> = load vp<%8>
+; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[IV_NEXT:%.+]]>
+; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<[[RDX:%.+]]> = phi vp<[[RDX_START]]>, vp<[[RDX_NEXT:%.+]]>
+; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]]>
+; CHECK-NEXT: EMIT vp<%next.gep> = ptradd ir<%start>, vp<[[STEPS]]>
+; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer vp<%next.gep>
+; CHECK-NEXT: WIDEN ir<%l> = load vp<[[VEC_PTR]]>
; CHECK-NEXT: WIDEN-CAST ir<%l.ext> = sext ir<%l> to i32
; CHECK-NEXT: WIDEN ir<%mul> = mul ir<%l.ext>, ir<128>
-; CHECK-NEXT: EXPRESSION vp<%9> = ir<%red> + reduce.add (ir<%mul> sext to i64)
-; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<%6>, vp<%1>
-; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<%2>
+; CHECK-NEXT: EXPRESSION vp<[[RDX_NEXT]]> = ir<[[RDX]]> + reduce.add (ir<%mul> sext to i64)
+; CHECK-NEXT: EMIT vp<[[IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]>
+; CHECK-NEXT: EMIT branch-on-count vp<[[IV_NEXT]]>, vp<[[VTC]]>
; CHECK-NEXT: No successors
; CHECK-NEXT: }
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
-; CHECK-NEXT: EMIT vp<%11> = compute-reduction-result ir<%red>, vp<%9>
-; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq vp<%3>, vp<%2>
-; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n>
-; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph
-; CHECK-EMPTY:
-; CHECK-NEXT: ir-bb<exit>:
-; CHECK-NEXT: IR %red.next.lcssa = phi i64 [ %red.next, %loop ] (extra operand: vp<%11> from middle.block)
-; CHECK-NEXT: No successors
-; CHECK-EMPTY:
-; CHECK-NEXT: scalar.ph:
-; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<%4>, middle.block ], [ ir<%start>, ir-bb<entry> ]
-; CHECK-NEXT: EMIT-SCALAR vp<%bc.merge.rdx> = phi [ vp<%11>, middle.block ], [ ir<0>, ir-bb<entry> ]
-; CHECK-NEXT: Successor(s): ir-bb<loop>
-; CHECK-EMPTY:
-; CHECK-NEXT: ir-bb<loop>:
-; CHECK-NEXT: IR %ptr.iv = phi ptr [ %start, %entry ], [ %gep.iv.next, %loop ] (extra operand: vp<%bc.resume.val> from scalar.ph)
-; CHECK-NEXT: IR %red = phi i64 [ 0, %entry ], [ %red.next, %loop ] (extra operand: vp<%bc.merge.rdx> from scalar.ph)
-; CHECK-NEXT: IR %l = load i8, ptr %ptr.iv, align 1
-; CHECK-NEXT: IR %l.ext = sext i8 %l to i32
-; CHECK-NEXT: IR %mul = mul i32 %l.ext, 128
-; CHECK-NEXT: IR %mul.ext = sext i32 %mul to i64
-; CHECK-NEXT: IR %red.next = add i64 %red, %mul.ext
-; CHECK-NEXT: IR %gep.iv.next = getelementptr i8, ptr %ptr.iv, i64 1
-; CHECK-NEXT: IR %ec = icmp eq ptr %ptr.iv, %end
-; CHECK-NEXT: No successors
-; CHECK-NEXT: }
-; CHECK: VPlan 'Final VPlan for VF={4},UF={1}' {
-; CHECK-NEXT: Live-in ir<%1> = original trip-count
-; CHECK-EMPTY:
-; CHECK-NEXT: ir-bb<entry>:
-; CHECK-NEXT: IR %start2 = ptrtoint ptr %start to i64
-; CHECK-NEXT: IR %end1 = ptrtoint ptr %end to i64
-; CHECK-NEXT: IR %0 = add i64 %end1, 1
-; CHECK-NEXT: IR %1 = sub i64 %0, %start2
-; CHECK-NEXT: EMIT vp<%min.iters.check> = icmp ult ir<%1>, ir<4>
-; CHECK-NEXT: EMIT branch-on-cond vp<%min.iters.check>
-; CHECK-NEXT: Successor(s): ir-bb<scalar.ph>, vector.ph
-; CHECK-EMPTY:
-; CHECK-NEXT: vector.ph:
-; CHECK-NEXT: EMIT vp<%n.mod.vf> = urem ir<%1>, ir<4>
-; CHECK-NEXT: EMIT vp<%n.vec> = sub ir<%1>, vp<%n.mod.vf>
-; CHECK-NEXT: vp<%3> = DERIVED-IV ir<%start> + vp<%n.vec> * ir<1>
-; CHECK-NEXT: Successor(s): vector.body
-; CHECK-EMPTY:
-; CHECK-NEXT: vector.body:
-; CHECK-NEXT: EMIT-SCALAR vp<%index> = phi [ ir<0>, vector.ph ], [ vp<%index.next>, vector.body ]
-; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%red> = phi ir<0>, ir<%red.next>
-; CHECK-NEXT: EMIT vp<%next.gep> = ptradd ir<%start>, vp<%index>
-; CHECK-NEXT: WIDEN ir<%l> = load vp<%next.gep>
-; CHECK-NEXT: WIDEN-CAST ir<%l.ext> = sext ir<%l> to i32
-; CHECK-NEXT: WIDEN ir<%mul> = mul ir<%l.ext>, ir<128>
-; CHECK-NEXT: WIDEN-CAST ir<%mul.ext> = sext ir<%mul> to i64
-; CHECK-NEXT: REDUCE ir<%red.next> = ir<%red> + reduce.add (ir<%mul.ext>)
-; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<%index>, ir<4>
-; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<%n.vec>
-; CHECK-NEXT: Successor(s): middle.block, vector.body
-; CHECK-EMPTY:
-; CHECK-NEXT: middle.block:
-; CHECK-NEXT: EMIT vp<%5> = compute-reduction-result ir<%red>, ir<%red.next>
-; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<%1>, vp<%n.vec>
+; CHECK-NEXT: EMIT vp<%11> = compute-reduction-result ir<[[RDX]]>, vp<[[RDX_NEXT]]>
+; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq vp<%3>, vp<[[VTC]]>
; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n>
-; CHECK-NEXT: Successor(s): ir-bb<exit>, ir-bb<scalar.ph>
-; CHECK-EMPTY:
-; CHECK-NEXT: ir-bb<exit>:
-; CHECK-NEXT: IR %red.next.lcssa = phi i64 [ %red.next, %loop ] (extra operand: vp<%5> from middle.block)
-; CHECK-NEXT: No successors
-; CHECK-EMPTY:
-; CHECK-NEXT: ir-bb<scalar.ph>:
-; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<%3>, middle.block ], [ ir<%start>, ir-bb<entry> ]
-; CHECK-NEXT: EMIT-SCALAR vp<%bc.merge.rdx> = phi [ vp<%5>, middle.block ], [ ir<0>, ir-bb<entry> ]
-; CHECK-NEXT: Successor(s): ir-bb<loop>
-; CHECK-EMPTY:
-; CHECK-NEXT: ir-bb<loop>:
-; CHECK-NEXT: IR %ptr.iv = phi ptr [ %start, %scalar.ph ], [ %gep.iv.next, %loop ] (extra operand: vp<%bc.resume.val> from ir-bb<scalar.ph>)
-; CHECK-NEXT: IR %red = phi i64 [ 0, %scalar.ph ], [ %red.next, %loop ] (extra operand: vp<%bc.merge.rdx> from ir-bb<scalar.ph>)
-; CHECK-NEXT: IR %l = load i8, ptr %ptr.iv, align 1
-; CHECK-NEXT: IR %l.ext = sext i8 %l to i32
-; CHECK-NEXT: IR %mul = mul i32 %l.ext, 128
-; CHECK-NEXT: IR %mul.ext = sext i32 %mul to i64
-; CHECK-NEXT: IR %red.next = add i64 %red, %mul.ext
-; CHECK-NEXT: IR %gep.iv.next = getelementptr i8, ptr %ptr.iv, i64 1
-; CHECK-NEXT: IR %ec = icmp eq ptr %ptr.iv, %end
-; CHECK-NEXT: No successors
-; CHECK-NEXT: }
entry:
br label %loop
diff --git a/llvm/test/Transforms/MemCpyOpt/stack-move.ll b/llvm/test/Transforms/MemCpyOpt/stack-move.ll
index 940e30e..0c2e05f 100644
--- a/llvm/test/Transforms/MemCpyOpt/stack-move.ll
+++ b/llvm/test/Transforms/MemCpyOpt/stack-move.ll
@@ -1729,3 +1729,61 @@ define i32 @test_ret_only_capture() {
%v = load i32, ptr %a
ret i32 %v
}
+
+declare ptr @captures_address_only(ptr captures(address))
+
+; Can transform: Only one address captured.
+define void @test_captures_address_captures_none() {
+; CHECK-LABEL: define void @test_captures_address_captures_none() {
+; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
+; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
+; CHECK-NEXT: call void @captures_address_only(ptr [[SRC]])
+; CHECK-NEXT: call void @use_nocapture(ptr [[SRC]])
+; CHECK-NEXT: ret void
+;
+ %src = alloca %struct.Foo, align 4
+ %dst = alloca %struct.Foo, align 4
+ store %struct.Foo { i32 10, i32 20, i32 30 }, ptr %src
+ call void @captures_address_only(ptr %src)
+ call void @llvm.memcpy.p0.p0.i64(ptr align 4 %dst, ptr align 4 %src, i64 12, i1 false)
+ call void @use_nocapture(ptr %dst)
+ ret void
+}
+
+; Can transform: Only one address captured.
+define void @test_captures_none_and_captures_address() {
+; CHECK-LABEL: define void @test_captures_none_and_captures_address() {
+; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
+; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
+; CHECK-NEXT: call void @use_nocapture(ptr [[SRC]])
+; CHECK-NEXT: call void @captures_address_only(ptr [[SRC]])
+; CHECK-NEXT: ret void
+;
+ %src = alloca %struct.Foo, align 4
+ %dst = alloca %struct.Foo, align 4
+ store %struct.Foo { i32 10, i32 20, i32 30 }, ptr %src
+ call void @use_nocapture(ptr %src)
+ call void @llvm.memcpy.p0.p0.i64(ptr align 4 %dst, ptr align 4 %src, i64 12, i1 false)
+ call void @captures_address_only(ptr %dst)
+ ret void
+}
+
+; Cannot transform: Both addresses captured.
+define void @test_captures_address_and_captures_address() {
+; CHECK-LABEL: define void @test_captures_address_and_captures_address() {
+; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
+; CHECK-NEXT: [[DST:%.*]] = alloca [[STRUCT_FOO]], align 4
+; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
+; CHECK-NEXT: call void @captures_address_only(ptr [[SRC]])
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DST]], ptr align 4 [[SRC]], i64 12, i1 false)
+; CHECK-NEXT: call void @captures_address_only(ptr [[DST]])
+; CHECK-NEXT: ret void
+;
+ %src = alloca %struct.Foo, align 4
+ %dst = alloca %struct.Foo, align 4
+ store %struct.Foo { i32 10, i32 20, i32 30 }, ptr %src
+ call void @captures_address_only(ptr %src)
+ call void @llvm.memcpy.p0.p0.i64(ptr align 4 %dst, ptr align 4 %src, i64 12, i1 false)
+ call void @captures_address_only(ptr %dst)
+ ret void
+}
diff --git a/llvm/test/Transforms/PGOProfile/memprof_diff_inline.ll b/llvm/test/Transforms/PGOProfile/memprof_diff_inline.ll
new file mode 100644
index 0000000..5213a07
--- /dev/null
+++ b/llvm/test/Transforms/PGOProfile/memprof_diff_inline.ll
@@ -0,0 +1,118 @@
+;; Tests that the compiler ignores smaller contexts that differ only in the
+;; IsInlineFrame bool. These map to the same full context id internally, as we
+;; ignore the inline frame status which may differ in feedback compiles.
+;; Presumably this happens when profiles collected from different binaries are
+;; merged. If we didn't pick the largest we would default them all to noncold.
+
+;; Avoid failures on big-endian systems that can't read the profile properly
+; REQUIRES: x86_64-linux
+
+;; Generate the profile and the IR.
+; RUN: split-file %s %t
+
+;; Generate indexed profile
+; RUN: llvm-profdata merge %t/memprof_diff_inline.yaml -o %t.memprofdata
+
+; RUN: opt < %t/memprof_diff_inline.ll -passes='memprof-use<profile-filename=%t.memprofdata>' -S -memprof-report-hinted-sizes -memprof-print-match-info 2>&1 | FileCheck %s --check-prefixes=MEMPROF
+
+; MEMPROF: MemProf notcold context with id 10194276560488437434 has total profiled size 200 is matched with 1 frames
+; MEMPROF: MemProf cold context with id 16342802530253093571 has total profiled size 10000 is matched with 1 frames
+
+;--- memprof_diff_inline.yaml
+---
+HeapProfileRecords:
+ - GUID: _Z3foov
+ AllocSites:
+ # Small non-cold, full context id 16342802530253093571, should ignore
+ - Callstack:
+ - { Function: _Z3foov, LineOffset: 1, Column: 10, IsInlineFrame: false }
+ - { Function: _Z4foo2v, LineOffset: 1, Column: 10, IsInlineFrame: false }
+ - { Function: _Z3barv, LineOffset: 1, Column: 10, IsInlineFrame: false }
+ - { Function: main, LineOffset: 8, Column: 13, IsInlineFrame: false }
+ MemInfoBlock:
+ AllocCount: 1
+ TotalSize: 10
+ TotalLifetime: 0
+ TotalLifetimeAccessDensity: 20000
+ # Large cold, full context id 16342802530253093571, should keep
+ - Callstack:
+ - { Function: _Z3foov, LineOffset: 1, Column: 10, IsInlineFrame: false }
+ - { Function: _Z4foo2v, LineOffset: 1, Column: 10, IsInlineFrame: true }
+ - { Function: _Z3barv, LineOffset: 1, Column: 10, IsInlineFrame: false }
+ - { Function: main, LineOffset: 8, Column: 13, IsInlineFrame: false }
+ MemInfoBlock:
+ AllocCount: 1
+ TotalSize: 10000
+ TotalLifetime: 200000
+ TotalLifetimeAccessDensity: 0
+ # Small non-cold, full context id 16342802530253093571, should ignore
+ - Callstack:
+ - { Function: _Z3foov, LineOffset: 1, Column: 10, IsInlineFrame: false }
+ - { Function: _Z4foo2v, LineOffset: 1, Column: 10, IsInlineFrame: false }
+ - { Function: _Z3barv, LineOffset: 1, Column: 10, IsInlineFrame: true }
+ - { Function: main, LineOffset: 8, Column: 13, IsInlineFrame: false }
+ MemInfoBlock:
+ AllocCount: 1
+ TotalSize: 100
+ TotalLifetime: 0
+ TotalLifetimeAccessDensity: 20000
+ # Small non-cold, full context id 10194276560488437434
+ - Callstack:
+ - { Function: _Z3foov, LineOffset: 1, Column: 10, IsInlineFrame: false }
+ - { Function: _Z4foo2v, LineOffset: 1, Column: 10, IsInlineFrame: false }
+ - { Function: _Z3barv, LineOffset: 1, Column: 10, IsInlineFrame: false }
+ - { Function: main, LineOffset: 9, Column: 13, IsInlineFrame: false }
+ MemInfoBlock:
+ AllocCount: 1
+ TotalSize: 200
+ TotalLifetime: 0
+ TotalLifetimeAccessDensity: 20000
+ CallSites: []
+...
+;--- memprof_diff_inline.ll
+; ModuleID = 'memprof_diff_inline.cc'
+source_filename = "memprof_diff_inline.cc"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%"struct.std::nothrow_t" = type { i8 }
+
+@_ZSt7nothrow = external global %"struct.std::nothrow_t", align 1
+
+define dso_local noundef ptr @_Z3foov() !dbg !10 {
+entry:
+ ; MEMPROF: call {{.*}} @_Znwm{{.*}} !memprof ![[M1:[0-9]+]], !callsite ![[C1:[0-9]+]]
+ %call = call noalias noundef align 32 ptr @_Znwm(i64 noundef 32) #6, !dbg !13
+ ret ptr %call
+}
+
+declare noundef ptr @_Znwm(i64 noundef)
+
+attributes #6 = { builtin allocsize(0) }
+
+; MEMPROF: ![[M1]] = !{![[MIB1:[0-9]+]], ![[MIB2:[0-9]+]]}
+
+; MEMPROF: ![[MIB1]] = !{![[STACK1:[0-9]+]], !"notcold", ![[CONTEXTSIZE1:[0-9]+]]}
+; MEMPROF: ![[STACK1]] = !{i64 2732490490862098848, i64 8467819354083268568, i64 9086428284934609951, i64 2061451396820446691}
+;; Full context id 10194276560488437434 == -8252467513221114182
+; MEMPROF: ![[CONTEXTSIZE1]] = !{i64 -8252467513221114182, i64 200}
+
+; MEMPROF: ![[MIB2]] = !{![[STACK2:[0-9]+]], !"cold", ![[CONTEXTSIZE2:[0-9]+]]}
+; MEMPROF: ![[STACK2]] = !{i64 2732490490862098848, i64 8467819354083268568, i64 9086428284934609951, i64 -5747251260480066785}
+;; Full context id 16342802530253093571 == -2103941543456458045
+;; We should have kept the large (cold) one.
+; MEMPROF: ![[CONTEXTSIZE2]] = !{i64 -2103941543456458045, i64 10000}
+
+; MEMPROF: ![[C1]] = !{i64 2732490490862098848}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!2, !3}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang version 15.0.0 (https://github.com/llvm/llvm-project.git 6cbe6284d1f0a088b5c6482ae27b738f03d82fe7)", isOptimized: false, runtimeVersion: 0, emissionKind: LineTablesOnly, splitDebugInlining: false, debugInfoForProfiling: true, nameTableKind: None)
+!1 = !DIFile(filename: "memprof.cc", directory: "/usr/local/google/home/tejohnson/llvm/tmp", checksumkind: CSK_MD5, checksum: "e8c40ebe4b21776b4d60e9632cbc13c2")
+!2 = !{i32 7, !"Dwarf Version", i32 5}
+!3 = !{i32 2, !"Debug Info Version", i32 3}
+!10 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !1, file: !1, line: 4, type: !11, scopeLine: 4, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !12)
+!11 = !DISubroutineType(types: !12)
+!12 = !{}
+!13 = !DILocation(line: 5, column: 10, scope: !10)
diff --git a/llvm/test/Transforms/PGOProfile/memprof_loop_unroll.ll b/llvm/test/Transforms/PGOProfile/memprof_loop_unroll.ll
index 2461ca3..ba53c57 100644
--- a/llvm/test/Transforms/PGOProfile/memprof_loop_unroll.ll
+++ b/llvm/test/Transforms/PGOProfile/memprof_loop_unroll.ll
@@ -4,24 +4,50 @@
;; Avoid failures on big-endian systems that can't read the profile properly
; REQUIRES: x86_64-linux
-;; TODO: Use text profile inputs once that is available for memprof.
-;; # To update the Inputs below, run Inputs/update_memprof_inputs.sh.
-;; # To generate below LLVM IR for use in matching.
-;; $ clang++ -gmlt -fdebug-info-for-profiling -S %S/Inputs/memprof_loop_unroll_b.cc -emit-llvm
+; Generate the profile and the IR.
+; RUN: split-file %s %t
+
+;; Generate indexed profile
+; RUN: llvm-profdata merge %t/memprof_loop_unroll.yaml -o %t.memprofdata
-; RUN: llvm-profdata merge %S/Inputs/memprof_loop_unroll.memprofraw --profiled-binary %S/Inputs/memprof_loop_unroll.exe -o %t.memprofdata
;; Set the minimum lifetime threshold to 0 to ensure that one context is
;; considered cold (the other will be notcold).
-; RUN: opt < %s -passes='memprof-use<profile-filename=%t.memprofdata>' -S -memprof-report-hinted-sizes -memprof-ave-lifetime-cold-threshold=0 2>&1 | FileCheck %s
+; RUN: opt < %t/memprof_loop_unroll.ll -passes='memprof-use<profile-filename=%t.memprofdata>' -S -memprof-report-hinted-sizes -memprof-ave-lifetime-cold-threshold=0 2>&1 | FileCheck %s
-;; Conservatively annotate as not cold. We get two messages as there are two
-;; unrolled copies of the allocation.
-; CHECK: MemProf hinting: Total size for full allocation context hash {{.*}} and indistinguishable alloc type notcold: 4
-; CHECK: MemProf hinting: Total size for full allocation context hash {{.*}} and indistinguishable alloc type notcold: 4
+;; Conservatively annotate as not cold.
+; CHECK: MemProf hinting: Total size for full allocation context hash {{.*}} and single alloc type notcold: 4
; CHECK: call {{.*}} @_Znam{{.*}} #[[ATTR:[0-9]+]]
; CHECK: attributes #[[ATTR]] = { builtin allocsize(0) "memprof"="notcold" }
; CHECK-NOT: stackIds: ()
+;--- memprof_loop_unroll.yaml
+---
+HeapProfileRecords:
+ - GUID: 0x7f8d88fcc70a347b
+ AllocSites:
+ - Callstack:
+ - { Function: 0x7f8d88fcc70a347b, LineOffset: 2, Column: 16, IsInlineFrame: false }
+ - { Function: 0xdb956436e78dd5fa, LineOffset: 1, Column: 5, IsInlineFrame: false }
+ MemInfoBlock:
+ AllocCount: 1
+ TotalSize: 4
+ TotalLifetime: 2
+ TotalLifetimeAccessDensity: 12500000000
+ - Callstack:
+ - { Function: 0x7f8d88fcc70a347b, LineOffset: 2, Column: 16, IsInlineFrame: false }
+ - { Function: 0xdb956436e78dd5fa, LineOffset: 1, Column: 5, IsInlineFrame: false }
+ MemInfoBlock:
+ AllocCount: 1
+ TotalSize: 4
+ TotalLifetime: 2
+ TotalLifetimeAccessDensity: 0
+ - GUID: 0xdb956436e78dd5fa
+ CallSites:
+ - Frames:
+ - { Function: 0xdb956436e78dd5fa, LineOffset: 1, Column: 5, IsInlineFrame: false }
+...
+
+;--- memprof_loop_unroll.ll
; ModuleID = 'memprof_loop_unroll_b.cc'
source_filename = "memprof_loop_unroll_b.cc"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/indvars-vectorization.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/indvars-vectorization.ll
index 8d20a3b..d311f54 100644
--- a/llvm/test/Transforms/PhaseOrdering/AArch64/indvars-vectorization.ll
+++ b/llvm/test/Transforms/PhaseOrdering/AArch64/indvars-vectorization.ll
@@ -43,7 +43,6 @@ define void @s172(i32 noundef %xa, i32 noundef %xb, ptr noundef %a, ptr noundef
; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[FOR_BODY_PREHEADER13]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP8]], -8
-; CHECK-NEXT: [[IND_END:%.*]] = add i64 [[N_VEC]], [[TMP0]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -64,6 +63,7 @@ define void @s172(i32 noundef %xa, i32 noundef %xb, ptr noundef %a, ptr noundef
; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK: middle.block:
+; CHECK-NEXT: [[IND_END:%.*]] = add i64 [[N_VEC]], [[TMP0]]
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP8]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END]], label [[FOR_BODY_PREHEADER13]]
; CHECK: for.body.preheader14:
diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/interleave_vec.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/interleave_vec.ll
index 2dceb27..f2ae327 100644
--- a/llvm/test/Transforms/PhaseOrdering/AArch64/interleave_vec.ll
+++ b/llvm/test/Transforms/PhaseOrdering/AArch64/interleave_vec.ll
@@ -1040,7 +1040,6 @@ define void @saxpy_5(i64 %n, float %a, ptr readonly %x, ptr noalias %y) {
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[LOOP_PREHEADER11:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP3]], 9223372036854775806
-; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[N_VEC]], 5
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[A]], i64 0
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT]], <2 x float> poison, <10 x i32> zeroinitializer
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
@@ -1058,10 +1057,11 @@ define void @saxpy_5(i64 %n, float %a, ptr readonly %x, ptr noalias %y) {
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[N_VEC]], 5
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT]], label %[[LOOP_PREHEADER11]]
; CHECK: [[LOOP_PREHEADER11]]:
-; CHECK-NEXT: [[I1_PH:%.*]] = phi i64 [ 0, %[[LOOP_PREHEADER]] ], [ [[TMP4]], %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: [[I1_PH:%.*]] = phi i64 [ 0, %[[LOOP_PREHEADER]] ], [ [[TMP16]], %[[MIDDLE_BLOCK]] ]
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x float> poison, float [[A]], i64 0
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x float> [[TMP10]], <4 x float> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: br label %[[LOOP:.*]]
diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/std-find.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/std-find.ll
index a3b8736..338d925 100644
--- a/llvm/test/Transforms/PhaseOrdering/AArch64/std-find.ll
+++ b/llvm/test/Transforms/PhaseOrdering/AArch64/std-find.ll
@@ -9,7 +9,6 @@ define i64 @std_find_i16_constant_offset_with_assumptions(ptr %first.coerce, i16
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[FIRST_COERCE]], i64 2) ]
; CHECK-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[FIRST_COERCE]], i64 256) ]
-; CHECK-NEXT: [[COERCE_VAL_IP:%.*]] = getelementptr i8, ptr [[FIRST_COERCE]], i64 256
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[S]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
@@ -27,6 +26,7 @@ define i64 @std_find_i16_constant_offset_with_assumptions(ptr %first.coerce, i16
; CHECK-NEXT: [[TMP4:%.*]] = or i1 [[TMP2]], [[TMP3]]
; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: [[MIDDLE_SPLIT]]:
+; CHECK-NEXT: [[COERCE_VAL_IP:%.*]] = getelementptr i8, ptr [[FIRST_COERCE]], i64 256
; CHECK-NEXT: br i1 [[TMP2]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[RETURN:.*]]
; CHECK: [[VECTOR_EARLY_EXIT]]:
; CHECK-NEXT: [[TMP5:%.*]] = tail call i64 @llvm.experimental.cttz.elts.i64.v8i1(<8 x i1> [[TMP0]], i1 true)
diff --git a/llvm/test/Transforms/PhaseOrdering/ARM/arm_mult_q15.ll b/llvm/test/Transforms/PhaseOrdering/ARM/arm_mult_q15.ll
index 5127b7d..7c349fb 100644
--- a/llvm/test/Transforms/PhaseOrdering/ARM/arm_mult_q15.ll
+++ b/llvm/test/Transforms/PhaseOrdering/ARM/arm_mult_q15.ll
@@ -18,22 +18,15 @@ define void @arm_mult_q15(ptr %pSrcA, ptr %pSrcB, ptr noalias %pDst, i32 %blockS
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[WHILE_BODY_PREHEADER15:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_VEC:%.*]] = and i32 [[BLOCKSIZE]], -8
-; CHECK-NEXT: [[IND_END:%.*]] = and i32 [[BLOCKSIZE]], 7
-; CHECK-NEXT: [[TMP0:%.*]] = shl i32 [[N_VEC]], 1
-; CHECK-NEXT: [[IND_END7:%.*]] = getelementptr i8, ptr [[PSRCA:%.*]], i32 [[TMP0]]
-; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[N_VEC]], 1
-; CHECK-NEXT: [[IND_END9:%.*]] = getelementptr i8, ptr [[PDST:%.*]], i32 [[TMP1]]
-; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[N_VEC]], 1
-; CHECK-NEXT: [[IND_END11:%.*]] = getelementptr i8, ptr [[PSRCB:%.*]], i32 [[TMP2]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i32 [[INDEX]], 1
-; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PSRCA]], i32 [[OFFSET_IDX]]
+; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PSRCA:%.*]], i32 [[OFFSET_IDX]]
; CHECK-NEXT: [[OFFSET_IDX13:%.*]] = shl i32 [[INDEX]], 1
-; CHECK-NEXT: [[NEXT_GEP14:%.*]] = getelementptr i8, ptr [[PDST]], i32 [[OFFSET_IDX13]]
+; CHECK-NEXT: [[NEXT_GEP14:%.*]] = getelementptr i8, ptr [[PDST:%.*]], i32 [[OFFSET_IDX13]]
; CHECK-NEXT: [[OFFSET_IDX15:%.*]] = shl i32 [[INDEX]], 1
-; CHECK-NEXT: [[NEXT_GEP16:%.*]] = getelementptr i8, ptr [[PSRCB]], i32 [[OFFSET_IDX15]]
+; CHECK-NEXT: [[NEXT_GEP16:%.*]] = getelementptr i8, ptr [[PSRCB:%.*]], i32 [[OFFSET_IDX15]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, ptr [[NEXT_GEP]], align 2
; CHECK-NEXT: [[TMP3:%.*]] = sext <8 x i16> [[WIDE_LOAD]] to <8 x i32>
; CHECK-NEXT: [[WIDE_LOAD17:%.*]] = load <8 x i16>, ptr [[NEXT_GEP16]], align 2
@@ -47,6 +40,13 @@ define void @arm_mult_q15(ptr %pSrcA, ptr %pSrcB, ptr noalias %pDst, i32 %blockS
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
+; CHECK-NEXT: [[IND_END:%.*]] = and i32 [[BLOCKSIZE]], 7
+; CHECK-NEXT: [[TMP13:%.*]] = shl i32 [[N_VEC]], 1
+; CHECK-NEXT: [[IND_END7:%.*]] = getelementptr i8, ptr [[PSRCA]], i32 [[TMP13]]
+; CHECK-NEXT: [[TMP14:%.*]] = shl i32 [[N_VEC]], 1
+; CHECK-NEXT: [[IND_END9:%.*]] = getelementptr i8, ptr [[PDST]], i32 [[TMP14]]
+; CHECK-NEXT: [[TMP12:%.*]] = shl i32 [[N_VEC]], 1
+; CHECK-NEXT: [[IND_END11:%.*]] = getelementptr i8, ptr [[PSRCB]], i32 [[TMP12]]
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[BLOCKSIZE]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[WHILE_END]], label [[WHILE_BODY_PREHEADER15]]
; CHECK: while.body.preheader15:
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/pr48844-br-to-switch-vectorization.ll b/llvm/test/Transforms/PhaseOrdering/X86/pr48844-br-to-switch-vectorization.ll
index dcfebe3..6e95b63 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/pr48844-br-to-switch-vectorization.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/pr48844-br-to-switch-vectorization.ll
@@ -46,7 +46,6 @@ define dso_local void @test(ptr %start, ptr %end) #0 {
; AVX2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP1]], 124
; AVX2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[BB12_PREHEADER11:%.*]], label [[VECTOR_PH:%.*]]
; AVX2: vector.ph:
-; AVX2-NEXT: [[N_VEC_REMAINING:%.*]] = and i64 [[TMP3]], 24
; AVX2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP3]], 9223372036854775776
; AVX2-NEXT: br label [[VECTOR_BODY:%.*]]
; AVX2: vector.body:
@@ -80,6 +79,7 @@ define dso_local void @test(ptr %start, ptr %end) #0 {
; AVX2-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; AVX2-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; AVX2: middle.block:
+; AVX2-NEXT: [[N_VEC_REMAINING:%.*]] = and i64 [[TMP3]], 24
; AVX2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
; AVX2-NEXT: br i1 [[CMP_N]], label [[EXIT]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
; AVX2: vec.epilog.iter.check:
@@ -90,8 +90,6 @@ define dso_local void @test(ptr %start, ptr %end) #0 {
; AVX2: vec.epilog.ph:
; AVX2-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
; AVX2-NEXT: [[N_VEC10:%.*]] = and i64 [[TMP3]], 9223372036854775800
-; AVX2-NEXT: [[TMP21:%.*]] = shl i64 [[N_VEC10]], 2
-; AVX2-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP21]]
; AVX2-NEXT: br label [[BB12:%.*]]
; AVX2: vec.epilog.vector.body:
; AVX2-NEXT: [[INDEX12:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[BB12_PREHEADER11]] ], [ [[INDEX_NEXT16:%.*]], [[BB12]] ]
@@ -106,6 +104,8 @@ define dso_local void @test(ptr %start, ptr %end) #0 {
; AVX2-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT16]], [[N_VEC10]]
; AVX2-NEXT: br i1 [[TMP25]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[BB12]], !llvm.loop [[LOOP4:![0-9]+]]
; AVX2: vec.epilog.middle.block:
+; AVX2-NEXT: [[TMP27:%.*]] = shl i64 [[N_VEC10]], 2
+; AVX2-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP27]]
; AVX2-NEXT: [[CMP_N17:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC10]]
; AVX2-NEXT: br i1 [[CMP_N17]], label [[EXIT]], label [[BB12_PREHEADER1]]
; AVX2: bb12.preheader:
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vdiv.ll b/llvm/test/Transforms/PhaseOrdering/X86/vdiv.ll
index bfb8554..4562072 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/vdiv.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/vdiv.ll
@@ -16,8 +16,8 @@ define void @vdiv(ptr %x, ptr %y, double %a, i32 %N) #0 {
; CHECK-SAME: ptr writeonly captures(none) [[X:%.*]], ptr readonly captures(none) [[Y:%.*]], double [[A:%.*]], i32 [[N:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[N]], 0
-; CHECK-NEXT: br i1 [[CMP1]], label %[[FOR_BODY_PREHEADER:.*]], label %[[FOR_END:.*]]
-; CHECK: [[FOR_BODY_PREHEADER]]:
+; CHECK-NEXT: br i1 [[CMP1]], label %[[ITER_CHECK:.*]], label %[[FOR_END:.*]]
+; CHECK: [[ITER_CHECK]]:
; CHECK-NEXT: [[X4:%.*]] = ptrtoint ptr [[X]] to i64
; CHECK-NEXT: [[Y5:%.*]] = ptrtoint ptr [[Y]] to i64
; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[N]] to i64
@@ -25,12 +25,11 @@ define void @vdiv(ptr %x, ptr %y, double %a, i32 %N) #0 {
; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[X4]], [[Y5]]
; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 128
; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[MIN_ITERS_CHECK]], i1 true, i1 [[DIFF_CHECK]]
-; CHECK-NEXT: br i1 [[OR_COND]], label %[[FOR_BODY_PREHEADER9:.*]], label %[[VECTOR_PH:.*]]
-; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: br i1 [[OR_COND]], label %[[FOR_BODY_PREHEADER:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]]
+; CHECK: [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
; CHECK-NEXT: [[MIN_ITERS_CHECK6:%.*]] = icmp ult i32 [[N]], 16
-; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK6]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH1:.*]]
-; CHECK: [[VECTOR_PH1]]:
-; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 12
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK6]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 2147483632
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[A]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT]], <4 x double> poison, <4 x i32> zeroinitializer
@@ -40,7 +39,7 @@ define void @vdiv(ptr %x, ptr %y, double %a, i32 %N) #0 {
; CHECK-NEXT: [[TMP4:%.*]] = fdiv fast <4 x double> splat (double 1.000000e+00), [[BROADCAST_SPLAT]]
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH1]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw double, ptr [[Y]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP5]], i64 32
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP5]], i64 64
@@ -65,13 +64,14 @@ define void @vdiv(ptr %x, ptr %y, double %a, i32 %N) #0 {
; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 12
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[WIDE_TRIP_COUNT]]
; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END]], label %[[VEC_EPILOG_ITER_CHECK:.*]]
; CHECK: [[VEC_EPILOG_ITER_CHECK]]:
; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp eq i64 [[N_VEC_REMAINING]], 0
-; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[FOR_BODY_PREHEADER9]], label %[[VEC_EPILOG_PH]]
+; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[FOR_BODY_PREHEADER]], label %[[VEC_EPILOG_PH]], !prof [[PROF10:![0-9]+]]
; CHECK: [[VEC_EPILOG_PH]]:
-; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_PH]] ]
+; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
; CHECK-NEXT: [[N_VEC11:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 2147483644
; CHECK-NEXT: [[BROADCAST_SPLATINSERT14:%.*]] = insertelement <4 x double> poison, double [[A]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT15:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT14]], <4 x double> poison, <4 x i32> zeroinitializer
@@ -86,12 +86,12 @@ define void @vdiv(ptr %x, ptr %y, double %a, i32 %N) #0 {
; CHECK-NEXT: store <4 x double> [[TMP40]], ptr [[TMP41]], align 8, !tbaa [[DOUBLE_TBAA3]]
; CHECK-NEXT: [[INDEX_NEXT16]] = add nuw i64 [[INDEX12]], 4
; CHECK-NEXT: [[TMP42:%.*]] = icmp eq i64 [[INDEX_NEXT16]], [[N_VEC11]]
-; CHECK-NEXT: br i1 [[TMP42]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP42]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
; CHECK: [[VEC_EPILOG_MIDDLE_BLOCK]]:
; CHECK-NEXT: [[CMP_N17:%.*]] = icmp eq i64 [[N_VEC11]], [[WIDE_TRIP_COUNT]]
-; CHECK-NEXT: br i1 [[CMP_N17]], label %[[FOR_END]], label %[[FOR_BODY_PREHEADER9]]
-; CHECK: [[FOR_BODY_PREHEADER9]]:
-; CHECK-NEXT: [[INDVARS_IV_PH:%.*]] = phi i64 [ 0, %[[FOR_BODY_PREHEADER]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[N_VEC11]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ]
+; CHECK-NEXT: br i1 [[CMP_N17]], label %[[FOR_END]], label %[[FOR_BODY_PREHEADER]]
+; CHECK: [[FOR_BODY_PREHEADER]]:
+; CHECK-NEXT: [[INDVARS_IV_PH:%.*]] = phi i64 [ 0, %[[ITER_CHECK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[N_VEC11]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ]
; CHECK-NEXT: [[TMP43:%.*]] = sub nsw i64 [[WIDE_TRIP_COUNT]], [[INDVARS_IV_PH]]
; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP43]], 7
; CHECK-NEXT: [[LCMP_MOD_NOT:%.*]] = icmp eq i64 [[XTRAITER]], 0
@@ -110,13 +110,13 @@ define void @vdiv(ptr %x, ptr %y, double %a, i32 %N) #0 {
; CHECK-NEXT: [[INDVARS_IV_NEXT_PROL]] = add nuw nsw i64 [[INDVARS_IV_PROL]], 1
; CHECK-NEXT: [[PROL_ITER_NEXT]] = add i64 [[PROL_ITER]], 1
; CHECK-NEXT: [[PROL_ITER_CMP_NOT:%.*]] = icmp eq i64 [[PROL_ITER_NEXT]], [[XTRAITER]]
-; CHECK-NEXT: br i1 [[PROL_ITER_CMP_NOT]], label %[[FOR_BODY_PROL_LOOPEXIT]], label %[[FOR_BODY_PROL]], !llvm.loop [[LOOP11:![0-9]+]]
+; CHECK-NEXT: br i1 [[PROL_ITER_CMP_NOT]], label %[[FOR_BODY_PROL_LOOPEXIT]], label %[[FOR_BODY_PROL]], !llvm.loop [[LOOP12:![0-9]+]]
; CHECK: [[FOR_BODY_PROL_LOOPEXIT]]:
-; CHECK-NEXT: [[INDVARS_IV_UNR:%.*]] = phi i64 [ [[INDVARS_IV_PH]], %[[FOR_BODY_PREHEADER9]] ], [ [[INDVARS_IV_NEXT_PROL]], %[[FOR_BODY_PROL]] ]
+; CHECK-NEXT: [[INDVARS_IV_UNR:%.*]] = phi i64 [ [[INDVARS_IV_PH]], %[[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT_PROL]], %[[FOR_BODY_PROL]] ]
; CHECK-NEXT: [[TMP20:%.*]] = sub nsw i64 [[INDVARS_IV_PH]], [[WIDE_TRIP_COUNT]]
; CHECK-NEXT: [[TMP21:%.*]] = icmp ugt i64 [[TMP20]], -8
-; CHECK-NEXT: br i1 [[TMP21]], label %[[FOR_END]], label %[[FOR_BODY_PREHEADER9_NEW:.*]]
-; CHECK: [[FOR_BODY_PREHEADER9_NEW]]:
+; CHECK-NEXT: br i1 [[TMP21]], label %[[FOR_END]], label %[[FOR_BODY_PREHEADER_NEW:.*]]
+; CHECK: [[FOR_BODY_PREHEADER_NEW]]:
; CHECK-NEXT: [[TMP22:%.*]] = fdiv fast double 1.000000e+00, [[A]]
; CHECK-NEXT: [[TMP23:%.*]] = fdiv fast double 1.000000e+00, [[A]]
; CHECK-NEXT: [[TMP24:%.*]] = fdiv fast double 1.000000e+00, [[A]]
@@ -127,7 +127,7 @@ define void @vdiv(ptr %x, ptr %y, double %a, i32 %N) #0 {
; CHECK-NEXT: [[TMP29:%.*]] = fdiv fast double 1.000000e+00, [[A]]
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
; CHECK: [[FOR_BODY]]:
-; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_UNR]], %[[FOR_BODY_PREHEADER9_NEW]] ], [ [[INDVARS_IV_NEXT_7:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_UNR]], %[[FOR_BODY_PREHEADER_NEW]] ], [ [[INDVARS_IV_NEXT_7:%.*]], %[[FOR_BODY]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw double, ptr [[Y]], i64 [[INDVARS_IV]]
; CHECK-NEXT: [[T0:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !tbaa [[DOUBLE_TBAA3]]
; CHECK-NEXT: [[TMP30:%.*]] = fmul fast double [[T0]], [[TMP22]]
@@ -177,7 +177,7 @@ define void @vdiv(ptr %x, ptr %y, double %a, i32 %N) #0 {
; CHECK-NEXT: store double [[TMP37]], ptr [[ARRAYIDX2_7]], align 8, !tbaa [[DOUBLE_TBAA3]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_7]] = add nuw nsw i64 [[INDVARS_IV]], 8
; CHECK-NEXT: [[EXITCOND_NOT_7:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_7]], [[WIDE_TRIP_COUNT]]
-; CHECK-NEXT: br i1 [[EXITCOND_NOT_7]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT_7]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
; CHECK: [[FOR_END]]:
; CHECK-NEXT: ret void
;
@@ -232,8 +232,9 @@ attributes #0 = { nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="
; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META8:![0-9]+]], [[META9:![0-9]+]]}
; CHECK: [[META8]] = !{!"llvm.loop.isvectorized", i32 1}
; CHECK: [[META9]] = !{!"llvm.loop.unroll.runtime.disable"}
-; CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[META8]], [[META9]]}
-; CHECK: [[LOOP11]] = distinct !{[[LOOP11]], [[META12:![0-9]+]]}
-; CHECK: [[META12]] = !{!"llvm.loop.unroll.disable"}
-; CHECK: [[LOOP13]] = distinct !{[[LOOP13]], [[META8]]}
+; CHECK: [[PROF10]] = !{!"branch_weights", i32 4, i32 12}
+; CHECK: [[LOOP11]] = distinct !{[[LOOP11]], [[META8]], [[META9]]}
+; CHECK: [[LOOP12]] = distinct !{[[LOOP12]], [[META13:![0-9]+]]}
+; CHECK: [[META13]] = !{!"llvm.loop.unroll.disable"}
+; CHECK: [[LOOP14]] = distinct !{[[LOOP14]], [[META8]]}
;.
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/div-like-mixed-with-undefs.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/div-like-mixed-with-undefs.ll
index d16843c..6629b12 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/div-like-mixed-with-undefs.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/div-like-mixed-with-undefs.ll
@@ -1,21 +1,21 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
; RUN: opt -passes=slp-vectorizer -S -slp-threshold=-100 -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s
-define ptr @test(ptr %d) {
+define ptr @test(ptr %d, i64 %v) {
; CHECK-LABEL: define ptr @test(
-; CHECK-SAME: ptr [[D:%.*]]) {
+; CHECK-SAME: ptr [[D:%.*]], i64 [[V:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
-; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr null, align 1
+; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[D]], align 1
; CHECK-NEXT: [[CMP4_2:%.*]] = icmp eq i8 [[TMP0]], 0
-; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[CMP4_2]], i64 0, i64 0
-; CHECK-NEXT: [[TMP2:%.*]] = xor i64 0, 0
-; CHECK-NEXT: [[TMP3:%.*]] = udiv i64 [[TMP2]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = udiv i64 1, 0
+; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[CMP4_2]], i64 0, i64 4
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 0, [[V]]
+; CHECK-NEXT: [[TMP3:%.*]] = udiv i64 [[TMP2]], 3
+; CHECK-NEXT: [[TMP4:%.*]] = udiv i64 1, [[V]]
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <6 x i64> poison, i64 [[TMP1]], i32 0
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <6 x i64> [[TMP5]], i64 [[TMP3]], i32 1
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <6 x i64> [[TMP6]], i64 [[TMP4]], i32 4
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <6 x i64> [[TMP7]], <6 x i64> poison, <6 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 4>
-; CHECK-NEXT: [[TMP9:%.*]] = mul <6 x i64> [[TMP8]], <i64 2, i64 6, i64 1, i64 1, i64 1, i64 0>
+; CHECK-NEXT: [[TMP9:%.*]] = mul <6 x i64> [[TMP8]], <i64 2, i64 6, i64 4, i64 3, i64 5, i64 4>
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <6 x i64> [[TMP9]], i32 0
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[D]], i64 [[TMP10]]
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <6 x i64> [[TMP9]], i32 1
@@ -31,23 +31,23 @@ define ptr @test(ptr %d) {
; CHECK-NEXT: ret ptr [[TMP20]]
;
entry:
- %0 = load i8, ptr null, align 1
+ %0 = load i8, ptr %d, align 1
%cmp4.2 = icmp eq i8 %0, 0
- %1 = select i1 %cmp4.2, i64 0, i64 0
+ %1 = select i1 %cmp4.2, i64 0, i64 4
%2 = shl i64 %1, 1
%3 = getelementptr i8, ptr %d, i64 %2
- %4 = xor i64 0, 0
- %5 = udiv i64 %4, 0
+ %4 = xor i64 0, %v
+ %5 = udiv i64 %4, 3
%6 = mul i64 %5, 6
%7 = getelementptr i8, ptr %d, i64 %6
- %8 = shl i64 %1, 0
+ %8 = shl i64 %1, 2
%scevgep42 = getelementptr i8, ptr %d, i64 %8
- %9 = mul i64 %5, 1
+ %9 = mul i64 %5, 3
%10 = getelementptr i8, ptr %d, i64 %9
- %11 = udiv i64 1, 0
- %12 = mul i64 %11, 1
+ %11 = udiv i64 1, %v
+ %12 = mul i64 %11, 5
%13 = getelementptr i8, ptr %d, i64 %12
- %14 = mul i64 %11, 0
+ %14 = mul i64 %11, 4
%15 = getelementptr i8, ptr %d, i64 %14
ret ptr %15
}
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-opcode-strict-bitwidth-than-main.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-opcode-strict-bitwidth-than-main.ll
new file mode 100644
index 0000000..959b235
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-opcode-strict-bitwidth-than-main.ll
@@ -0,0 +1,36 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -passes=slp-vectorizer -S -slp-threshold=-99999 -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
+
+define float @test(i8 %0) {
+; CHECK-LABEL: define float @test(
+; CHECK-SAME: i8 [[TMP0:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i8> <i8 poison, i8 0>, i8 [[TMP0]], i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i8> [[TMP1]] to <2 x i32>
+; CHECK-NEXT: [[TMP3:%.*]] = mul <2 x i32> [[TMP2]], <i32 2, i32 27>
+; CHECK-NEXT: [[TMP4:%.*]] = lshr <2 x i32> [[TMP2]], <i32 2, i32 27>
+; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i32> [[TMP5]], i32 0
+; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i32> [[TMP5]], i32 1
+; CHECK-NEXT: [[TMP8:%.*]] = or i32 [[TMP6]], [[TMP7]]
+; CHECK-NEXT: switch i32 [[TMP8]], label %[[EXIT:.*]] [
+; CHECK-NEXT: i32 0, label %[[EXIT]]
+; CHECK-NEXT: i32 1, label %[[EXIT]]
+; CHECK-NEXT: ]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret float 0.000000e+00
+;
+entry:
+ %1 = sext i8 0 to i32
+ %2 = lshr i32 %1, 27
+ %3 = sext i8 %0 to i32
+ %reass.add.epil = mul i32 %3, 2
+ %4 = or i32 %reass.add.epil, %2
+ switch i32 %4, label %exit [
+ i32 0, label %exit
+ i32 1, label %exit
+ ]
+
+exit:
+ ret float 0.000000e+00
+}
diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-profile.ll b/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-profile.ll
new file mode 100644
index 0000000..9cc417f
--- /dev/null
+++ b/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-profile.ll
@@ -0,0 +1,89 @@
+; RUN: split-file %s %t
+; RUN: cat %t/main.ll %t/probable-or.prof > %t/probable-or.ll
+; RUN: cat %t/main.ll %t/probable-and.prof > %t/probable-and.ll
+; RUN: opt -passes='loop(simple-loop-unswitch<nontrivial>)' -S %t/probable-or.ll -o -| FileCheck %t/probable-or.prof
+; RUN: opt -passes='loop(simple-loop-unswitch<nontrivial>)' -S %t/probable-and.ll -o -| FileCheck %t/probable-and.prof
+
+;--- main.ll
+declare i32 @a()
+declare i32 @b()
+
+define i32 @or(ptr %ptr, i1 %cond) !prof !0 {
+entry:
+ br label %loop_begin
+
+loop_begin:
+ %v1 = load i1, ptr %ptr
+ %cond_or = or i1 %v1, %cond
+ br i1 %cond_or, label %loop_a, label %loop_b, !prof !1
+
+loop_a:
+ call i32 @a()
+ br label %latch
+
+loop_b:
+ call i32 @b()
+ br label %latch
+
+latch:
+ %v2 = load i1, ptr %ptr
+ br i1 %v2, label %loop_begin, label %loop_exit, !prof !2
+
+loop_exit:
+ ret i32 0
+}
+
+define i32 @and(ptr %ptr, i1 %cond) !prof !0 {
+entry:
+ br label %loop_begin
+
+loop_begin:
+ %v1 = load i1, ptr %ptr
+ %cond_and = and i1 %v1, %cond
+ br i1 %cond_and, label %loop_a, label %loop_b, !prof !1
+
+loop_a:
+ call i32 @a()
+ br label %latch
+
+loop_b:
+ call i32 @b()
+ br label %latch
+
+latch:
+ %v2 = load i1, ptr %ptr
+ br i1 %v2, label %loop_begin, label %loop_exit, !prof !2
+
+loop_exit:
+ ret i32 0
+}
+
+;--- probable-or.prof
+!0 = !{!"function_entry_count", i32 10}
+!1 = !{!"branch_weights", i32 1, i32 1000}
+!2 = !{!"branch_weights", i32 5, i32 7}
+; CHECK-LABEL: @or
+; CHECK-LABEL: entry:
+; CHECK-NEXT: %cond.fr = freeze i1 %cond
+; CHECK-NEXT: br i1 %cond.fr, label %entry.split.us, label %entry.split, !prof !1
+; CHECK-LABEL: @and
+; CHECK-LABEL: entry:
+; CHECK-NEXT: %cond.fr = freeze i1 %cond
+; CHECK-NEXT: br i1 %cond.fr, label %entry.split, label %entry.split.us, !prof !3
+; CHECK: !1 = !{!"branch_weights", i32 1, i32 1000}
+; CHECK: !3 = !{!"unknown", !"simple-loop-unswitch"}
+
+;--- probable-and.prof
+!0 = !{!"function_entry_count", i32 10}
+!1 = !{!"branch_weights", i32 1000, i32 1}
+!2 = !{!"branch_weights", i32 5, i32 7}
+; CHECK-LABEL: @or
+; CHECK-LABEL: entry:
+; CHECK-NEXT: %cond.fr = freeze i1 %cond
+; CHECK-NEXT: br i1 %cond.fr, label %entry.split.us, label %entry.split, !prof !1
+; CHECK-LABEL: @and
+; CHECK-LABEL: entry:
+; CHECK-NEXT: %cond.fr = freeze i1 %cond
+; CHECK-NEXT: br i1 %cond.fr, label %entry.split, label %entry.split.us, !prof !3
+; CHECK: !1 = !{!"unknown", !"simple-loop-unswitch"}
+; CHECK: !3 = !{!"branch_weights", i32 1000, i32 1}
diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/pr60736.ll b/llvm/test/Transforms/SimpleLoopUnswitch/pr60736.ll
index 0964c55..3760be4 100644
--- a/llvm/test/Transforms/SimpleLoopUnswitch/pr60736.ll
+++ b/llvm/test/Transforms/SimpleLoopUnswitch/pr60736.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals
; RUN: opt < %s -simple-loop-unswitch-inject-invariant-conditions=true -passes='loop(simple-loop-unswitch<nontrivial>,loop-instsimplify)' -S | FileCheck %s
define void @test() {
@@ -7,7 +7,7 @@ define void @test() {
; CHECK-NEXT: [[TMP:%.*]] = call i1 @llvm.experimental.widenable.condition()
; CHECK-NEXT: [[TMP1:%.*]] = load atomic i32, ptr addrspace(1) poison unordered, align 8
; CHECK-NEXT: [[TMP2:%.*]] = load atomic i32, ptr addrspace(1) poison unordered, align 8
-; CHECK-NEXT: br i1 [[TMP]], label [[BB_SPLIT:%.*]], label [[BB3_SPLIT_US:%.*]]
+; CHECK-NEXT: br i1 [[TMP]], label [[BB_SPLIT:%.*]], label [[BB3_SPLIT_US:%.*]], !prof [[PROF0:![0-9]+]]
; CHECK: bb.split:
; CHECK-NEXT: br label [[BB3:%.*]]
; CHECK: bb3:
@@ -19,7 +19,7 @@ define void @test() {
; CHECK-NEXT: [[TMP6_US:%.*]] = phi i32 [ poison, [[BB3_SPLIT_US]] ]
; CHECK-NEXT: [[TMP7_US:%.*]] = add nuw nsw i32 [[TMP6_US]], 2
; CHECK-NEXT: [[TMP8_US:%.*]] = icmp ult i32 [[TMP7_US]], [[TMP2]]
-; CHECK-NEXT: br i1 [[TMP8_US]], label [[BB9_US:%.*]], label [[BB16_SPLIT_US:%.*]], !prof [[PROF0:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP8_US]], label [[BB9_US:%.*]], label [[BB16_SPLIT_US:%.*]], !prof [[PROF0]]
; CHECK: bb9.us:
; CHECK-NEXT: br label [[BB17_SPLIT_US:%.*]]
; CHECK: bb16.split.us:
@@ -96,3 +96,8 @@ declare i1 @llvm.experimental.widenable.condition()
!0 = !{!"branch_weights", i32 1048576, i32 1}
+;.
+; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(inaccessiblemem: readwrite) }
+;.
+; CHECK: [[PROF0]] = !{!"branch_weights", i32 1048576, i32 1}
+;.
diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/simple-unswitch-profile.ll b/llvm/test/Transforms/SimpleLoopUnswitch/simple-unswitch-profile.ll
new file mode 100644
index 0000000..ec6baa5
--- /dev/null
+++ b/llvm/test/Transforms/SimpleLoopUnswitch/simple-unswitch-profile.ll
@@ -0,0 +1,157 @@
+; RUN: split-file %s %t
+; RUN: cat %t/main.ll %t/probable-or.prof > %t/probable-or.ll
+; RUN: cat %t/main.ll %t/probable-and.prof > %t/probable-and.ll
+; RUN: opt -passes='loop-mssa(simple-loop-unswitch)' -S %t/probable-or.ll -o - | FileCheck %t/probable-or.prof
+; RUN: opt -passes='loop-mssa(simple-loop-unswitch)' -S %t/probable-and.ll -o - | FileCheck %t/probable-and.prof
+;
+; RUN: opt -passes='module(print<block-freq>),function(loop-mssa(simple-loop-unswitch)),module(print<block-freq>)' \
+; RUN: %t/probable-or.ll -disable-output -simple-loop-unswitch-estimate-profile=0 2>&1 | FileCheck %t/probable-or.prof --check-prefixes=PROFILE-COM,PROFILE-REF
+
+; RUN: opt -passes='module(print<block-freq>),function(loop-mssa(simple-loop-unswitch)),module(print<block-freq>)' \
+; RUN: %t/probable-or.ll -disable-output -simple-loop-unswitch-estimate-profile=1 2>&1 | FileCheck %t/probable-or.prof --check-prefixes=PROFILE-COM,PROFILE-CHK
+
+; RUN: opt -passes='module(print<block-freq>),function(loop-mssa(simple-loop-unswitch)),module(print<block-freq>)' \
+; RUN: %t/probable-and.ll -disable-output -simple-loop-unswitch-estimate-profile=0 2>&1 | FileCheck %t/probable-and.prof --check-prefixes=PROFILE-COM,PROFILE-REF
+
+; RUN: opt -passes='module(print<block-freq>),function(loop-mssa(simple-loop-unswitch)),module(print<block-freq>)' \
+; RUN: %t/probable-and.ll -disable-output -simple-loop-unswitch-estimate-profile=1 2>&1 | FileCheck %t/probable-and.prof --check-prefixes=PROFILE-COM,PROFILE-CHK
+
+;--- main.ll
+declare void @some_func() noreturn
+
+define i32 @or(i1 %cond1, i32 %var1) !prof !0 {
+entry:
+ br label %loop_begin
+
+loop_begin:
+ %var3 = phi i32 [%var1, %entry], [%var2, %do_something]
+ %cond2 = icmp eq i32 %var3, 10
+ %cond.or = or i1 %cond1, %cond2
+ br i1 %cond.or, label %loop_exit, label %do_something, !prof !1
+
+do_something:
+ %var2 = add i32 %var3, 1
+ call void @some_func() noreturn nounwind
+ br label %loop_begin
+
+loop_exit:
+ ret i32 0
+}
+
+define i32 @and(i1 %cond1, i32 %var1) !prof !0 {
+entry:
+ br label %loop_begin
+
+loop_begin:
+ %var3 = phi i32 [%var1, %entry], [%var2, %do_something]
+ %cond2 = icmp eq i32 %var3, 10
+ %cond.and = and i1 %cond1, %cond2
+ br i1 %cond.and, label %do_something, label %loop_exit, !prof !1
+
+do_something:
+ %var2 = add i32 %var3, 1
+ call void @some_func() noreturn nounwind
+ br label %loop_begin
+
+loop_exit:
+ ret i32 0
+}
+
+;--- probable-or.prof
+!0 = !{!"function_entry_count", i32 10}
+!1 = !{!"branch_weights", i32 1, i32 1000}
+; CHECK-LABEL: @or
+; CHECK-LABEL: entry:
+; CHECK-NEXT: %cond1.fr = freeze i1 %cond1
+; CHECK-NEXT: br i1 %cond1.fr, label %loop_exit.split, label %entry.split, !prof !1
+; CHECK-LABEL: @and
+; CHECK-LABEL: entry:
+; CHECK-NEXT: %cond1.fr = freeze i1 %cond1
+; CHECK-NEXT: br i1 %cond1.fr, label %entry.split, label %loop_exit.split, !prof !2
+; CHECK: !1 = !{!"branch_weights", i32 1, i32 1000}
+; CHECK: !2 = !{!"unknown", !"simple-loop-unswitch"}
+
+; PROFILE-COM: Printing analysis results of BFI for function 'or':
+; PROFILE-COM: block-frequency-info: or
+ ; PROFILE-COM: - entry: {{.*}} count = 10
+ ; PROFILE-COM: - loop_begin: {{.*}} count = 10010
+ ; PROFILE-COM: - do_something: {{.*}} count = 10000
+ ; PROFILE-COM: - loop_exit: {{.*}} count = 10
+
+; PROFILE-COM: Printing analysis results of BFI for function 'and':
+; PROFILE-COM: block-frequency-info: and
+ ; PROFILE-COM: - entry: {{.*}} count = 10
+ ; PROFILE-COM: - loop_begin: {{.*}} count = 10
+ ; PROFILE-COM: - do_something: {{.*}} count = 0
+ ; PROFILE-COM: - loop_exit: {{.*}} count = 10
+
+; PROFILE-COM: Printing analysis results of BFI for function 'or':
+; PROFILE-COM: block-frequency-info: or
+ ; PROFILE-COM: - entry: {{.*}} count = 10
+ ; PROFILE-REF: - entry.split: {{.*}} count = 5
+ ; PROFILE-CHK: - entry.split: {{.*}} count = 10
+ ; PROFILE-REF: - loop_begin: {{.*}} count = 5005
+ ; PROFILE-CHK: - loop_begin: {{.*}} count = 10000
+ ; PROFILE-REF: - do_something: {{.*}} count = 5000
+ ; PROFILE-CHK: - do_something: {{.*}} count = 9990
+ ; PROFILE-REF: - loop_exit: {{.*}} count = 5
+ ; PROFILE-CHK: - loop_exit: {{.*}} count = 10
+ ; PROFILE-COM: - loop_exit.split: {{.*}} count = 10
+
+; PROFILE-COM: Printing analysis results of BFI for function 'and':
+; PROFILE-COM: block-frequency-info: and
+ ; PROFILE-COM: - entry: {{.*}} count = 10
+ ; PROFILE-COM: - entry.split: {{.*}} count = 5
+ ; PROFILE-COM: - loop_begin: {{.*}} count = 5
+ ; PROFILE-COM: - do_something: {{.*}} count = 0
+ ; PROFILE-COM: - loop_exit: {{.*}} count = 5
+ ; PROFILE-COM: - loop_exit.split: {{.*}} count = 10
+
+;--- probable-and.prof
+!0 = !{!"function_entry_count", i32 10}
+!1 = !{!"branch_weights", i32 1000, i32 1}
+; CHECK-LABEL: @or
+; CHECK-LABEL: entry:
+; CHECK-NEXT: %cond1.fr = freeze i1 %cond1
+; CHECK-NEXT: br i1 %cond1.fr, label %loop_exit.split, label %entry.split, !prof !1
+; CHECK-LABEL: @and
+; CHECK-LABEL: entry:
+; CHECK-NEXT: %cond1.fr = freeze i1 %cond1
+; CHECK-NEXT: br i1 %cond1.fr, label %entry.split, label %loop_exit.split, !prof !2
+; CHECK: !1 = !{!"unknown", !"simple-loop-unswitch"}
+; CHECK: !2 = !{!"branch_weights", i32 1000, i32 1}
+; PROFILE-COM: Printing analysis results of BFI for function 'or':
+; PROFILE-COM: block-frequency-info: or
+ ; PROFILE-COM: - entry: {{.*}}, count = 10
+ ; PROFILE-COM: - loop_begin: {{.*}}, count = 10
+ ; PROFILE-COM: - do_something: {{.*}}, count = 0
+ ; PROFILE-COM: - loop_exit: {{.*}}, count = 10
+
+; PROFILE-COM: Printing analysis results of BFI for function 'and':
+; PROFILE-COM: block-frequency-info: and
+ ; PROFILE-COM: - entry: {{.*}} count = 10
+ ; PROFILE-COM: - loop_begin: {{.*}} count = 10010
+ ; PROFILE-COM: - do_something: {{.*}} count = 10000
+ ; PROFILE-COM: - loop_exit: {{.*}} count = 10
+
+; PROFILE-COM: Printing analysis results of BFI for function 'or':
+; PROFILE-COM: block-frequency-info: or
+ ; PROFILE-COM: - entry: {{.*}} count = 10
+ ; PROFILE-COM: - entry.split: {{.*}} count = 5
+ ; PROFILE-COM: - loop_begin: {{.*}} count = 5
+ ; PROFILE-COM: - do_something: {{.*}} count = 0
+ ; PROFILE-COM: - loop_exit: {{.*}} count = 5
+ ; PROFILE-COM: - loop_exit.split: {{.*}} count = 10
+
+; PROFILE-COM: Printing analysis results of BFI for function 'and':
+; PROFILE-COM: block-frequency-info: and
+ ; PROFILE-COM: - entry: {{.*}} count = 10
+ ; PROFILE-REF: - entry.split: {{.*}} count = 5
+ ; PROFILE-CHK: - entry.split: {{.*}} count = 10
+ ; PROFILE-REF: - loop_begin: {{.*}} count = 5005
+ ; PROFILE-CHK: - loop_begin: {{.*}} count = 10000
+ ; PROFILE-REF: - do_something: {{.*}} count = 5000
+ ; PROFILE-CHK: - do_something: {{.*}} count = 9990
+ ; PROFILE-REF: - loop_exit: {{.*}} count = 5
+ ; PROFILE-CHK: - loop_exit: {{.*}} count = 10
+ ; PROFILE-COM: - loop_exit.split: {{.*}} count = 10
diff --git a/llvm/test/Transforms/SimplifyCFG/X86/switch-of-powers-of-two.ll b/llvm/test/Transforms/SimplifyCFG/X86/switch-of-powers-of-two.ll
index aa95b3f..d818335 100644
--- a/llvm/test/Transforms/SimplifyCFG/X86/switch-of-powers-of-two.ll
+++ b/llvm/test/Transforms/SimplifyCFG/X86/switch-of-powers-of-two.ll
@@ -1,8 +1,13 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
; RUN: opt -passes='simplifycfg<switch-to-lookup>' -simplifycfg-require-and-preserve-domtree=1 -S < %s | FileCheck %s
target triple = "x86_64-unknown-linux-gnu"
+;.
+; CHECK: @switch.table.switch_of_powers_two = private unnamed_addr constant [7 x i32] [i32 3, i32 poison, i32 poison, i32 2, i32 1, i32 0, i32 42], align 4
+; CHECK: @switch.table.switch_of_powers_two_default_reachable = private unnamed_addr constant [7 x i32] [i32 3, i32 5, i32 5, i32 2, i32 1, i32 0, i32 42], align 4
+; CHECK: @switch.table.switch_of_powers_two_default_reachable_multipreds = private unnamed_addr constant [7 x i32] [i32 3, i32 poison, i32 poison, i32 2, i32 1, i32 0, i32 42], align 4
+;.
define i32 @switch_of_powers_two(i32 %arg) {
; CHECK-LABEL: define i32 @switch_of_powers_two(
; CHECK-SAME: i32 [[ARG:%.*]]) {
@@ -35,17 +40,17 @@ return:
ret i32 %phi
}
-define i32 @switch_of_powers_two_default_reachable(i32 %arg) {
+define i32 @switch_of_powers_two_default_reachable(i32 %arg) !prof !0 {
; CHECK-LABEL: define i32 @switch_of_powers_two_default_reachable(
-; CHECK-SAME: i32 [[ARG:%.*]]) {
+; CHECK-SAME: i32 [[ARG:%.*]]) !prof [[PROF0:![0-9]+]] {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.ctpop.i32(i32 [[ARG]])
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 1
-; CHECK-NEXT: br i1 [[TMP1]], label %[[ENTRY_SPLIT:.*]], label %[[RETURN:.*]]
+; CHECK-NEXT: br i1 [[TMP1]], label %[[ENTRY_SPLIT:.*]], label %[[RETURN:.*]], !prof [[PROF1:![0-9]+]]
; CHECK: [[ENTRY_SPLIT]]:
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.cttz.i32(i32 [[ARG]], i1 true)
; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i32 [[TMP2]], 7
-; CHECK-NEXT: br i1 [[TMP3]], label %[[SWITCH_LOOKUP:.*]], label %[[RETURN]]
+; CHECK-NEXT: br i1 [[TMP3]], label %[[SWITCH_LOOKUP:.*]], label %[[RETURN]], !prof [[PROF2:![0-9]+]]
; CHECK: [[SWITCH_LOOKUP]]:
; CHECK-NEXT: [[TMP4:%.*]] = zext nneg i32 [[TMP2]] to i64
; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [7 x i32], ptr @switch.table.switch_of_powers_two_default_reachable, i64 0, i64 [[TMP4]]
@@ -62,7 +67,7 @@ entry:
i32 16, label %bb3
i32 32, label %bb4
i32 64, label %bb5
- ]
+ ], !prof !1
default_case: br label %return
bb1: br label %return
@@ -128,3 +133,13 @@ return:
%phi = phi i32 [ 3, %bb1 ], [ 2, %bb2 ], [ 1, %bb3 ], [ 0, %bb4 ], [ 42, %bb5 ], [ %pn, %default_case ]
ret i32 %phi
}
+
+!0 = !{!"function_entry_count", i32 10}
+!1 = !{!"branch_weights", i32 10, i32 5, i32 7, i32 11, i32 13, i32 17}
+;.
+; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+;.
+; CHECK: [[PROF0]] = !{!"function_entry_count", i32 10}
+; CHECK: [[PROF1]] = !{!"branch_weights", i32 58, i32 5}
+; CHECK: [[PROF2]] = !{!"branch_weights", i32 56, i32 5}
+;.
diff --git a/llvm/test/Transforms/SimplifyCFG/pr165301.ll b/llvm/test/Transforms/SimplifyCFG/pr165301.ll
new file mode 100644
index 0000000..1df6552
--- /dev/null
+++ b/llvm/test/Transforms/SimplifyCFG/pr165301.ll
@@ -0,0 +1,31 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 6
+; RUN: opt -S -passes="simplifycfg<switch-range-to-icmp>" < %s | FileCheck %s
+
+; Make sure there's no use after free when removing incoming values from PHI nodes
+
+define i32 @pr165301(i1 %cond) !prof !0 {
+; CHECK-LABEL: define i32 @pr165301(
+; CHECK-SAME: i1 [[COND:%.*]]) !prof [[PROF0:![0-9]+]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: br label %[[SWITCHBB:.*]]
+; CHECK: [[SWITCHBB]]:
+; CHECK-NEXT: br label %[[SWITCHBB]]
+;
+entry:
+ br label %switchbb
+
+switchbb:
+ switch i1 %cond, label %default [
+ i1 false, label %switchbb
+ i1 true, label %switchbb
+ ], !prof !1
+
+default:
+ %phi.lcssa = phi i32 [ 0, %switchbb ]
+ ret i32 %phi.lcssa
+}
+!0 = !{!"function_entry_count", i32 10}
+!1 = !{!"branch_weights", i32 2, i32 3, i32 5}
+;.
+; CHECK: [[PROF0]] = !{!"function_entry_count", i32 10}
+;.
diff --git a/llvm/test/Transforms/UnifyLoopExits/basic.ll b/llvm/test/Transforms/UnifyLoopExits/basic.ll
index ccd15d4..d04d142 100644
--- a/llvm/test/Transforms/UnifyLoopExits/basic.ll
+++ b/llvm/test/Transforms/UnifyLoopExits/basic.ll
@@ -18,12 +18,12 @@ define void @loop_1(i1 %PredEntry, i1 %PredB, i1 %PredC, i1 %PredD) {
; CHECK: F:
; CHECK-NEXT: br label [[EXIT]]
; CHECK: G:
-; CHECK-NEXT: br label [[F:%.*]]
+; CHECK-NEXT: br label [[Y:%.*]]
; CHECK: exit:
; CHECK-NEXT: ret void
; CHECK: loop.exit.guard:
-; CHECK-NEXT: [[GUARD_E:%.*]] = phi i1 [ true, [[B]] ], [ false, [[C]] ], [ false, [[D]] ]
-; CHECK-NEXT: br i1 [[GUARD_E]], label [[E:%.*]], label [[F]]
+; CHECK-NEXT: [[GUARD_X:%.*]] = phi i1 [ true, [[B]] ], [ false, [[C]] ], [ false, [[D]] ]
+; CHECK-NEXT: br i1 [[GUARD_X]], label [[X:%.*]], label [[Y]]
;
entry:
br i1 %PredEntry, label %A, label %G
@@ -53,6 +53,67 @@ exit:
ret void
}
+define void @loop_1_callbr(i1 %PredEntry, i1 %PredB, i1 %PredC, i1 %PredD) {
+; CHECK-LABEL: @loop_1_callbr(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 [[PREDENTRY:%.*]], label [[A:%.*]], label [[G:%.*]]
+; CHECK: A:
+; CHECK-NEXT: br label [[B:%.*]]
+; CHECK: B:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PREDB:%.*]])
+; CHECK-NEXT: to label [[C:%.*]] [label %B.target.E]
+; CHECK: C:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PREDC:%.*]])
+; CHECK-NEXT: to label [[D:%.*]] [label %C.target.F]
+; CHECK: D:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PREDD:%.*]])
+; CHECK-NEXT: to label [[A]] [label %D.target.F]
+; CHECK: E:
+; CHECK-NEXT: br label [[EXIT:%.*]]
+; CHECK: F:
+; CHECK-NEXT: br label [[EXIT]]
+; CHECK: G:
+; CHECK-NEXT: br label [[Y:%.*]]
+; CHECK: exit:
+; CHECK-NEXT: ret void
+; CHECK: B.target.E:
+; CHECK-NEXT: br label [[LOOP_EXIT_GUARD:%.*]]
+; CHECK: C.target.F:
+; CHECK-NEXT: br label [[LOOP_EXIT_GUARD]]
+; CHECK: D.target.F:
+; CHECK-NEXT: br label [[LOOP_EXIT_GUARD]]
+; CHECK: loop.exit.guard:
+; CHECK-NEXT: [[GUARD_X:%.*]] = phi i1 [ true, [[B_TARGET_E:%.*]] ], [ false, [[C_TARGET_F:%.*]] ], [ false, [[D_TARGET_F:%.*]] ]
+; CHECK-NEXT: br i1 [[GUARD_X]], label [[X:%.*]], label [[Y]]
+;
+entry:
+ br i1 %PredEntry, label %A, label %G
+
+A:
+ br label %B
+
+B:
+ callbr void asm "", "r,!i"(i1 %PredB) to label %C [label %E]
+
+C:
+ callbr void asm "", "r,!i"(i1 %PredC) to label %D [label %F]
+
+D:
+ callbr void asm "", "r,!i"(i1 %PredD) to label %A [label %F]
+
+E:
+ br label %exit
+
+F:
+ br label %exit
+
+G:
+ br label %F
+
+exit:
+ ret void
+}
+
define void @loop_2(i1 %PredA, i1 %PredB, i1 %PredC) {
; CHECK-LABEL: @loop_2(
; CHECK-NEXT: entry:
@@ -107,3 +168,67 @@ Z:
exit:
ret void
}
+
+define void @loop_2_callbr(i1 %PredA, i1 %PredB, i1 %PredC) {
+; CHECK-LABEL: @loop_2_callbr(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[A:%.*]]
+; CHECK: A:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PREDA:%.*]])
+; CHECK-NEXT: to label [[B:%.*]] [label %A.target.X]
+; CHECK: B:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PREDB:%.*]])
+; CHECK-NEXT: to label [[C:%.*]] [label %B.target.Y]
+; CHECK: C:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PREDC:%.*]])
+; CHECK-NEXT: to label [[D:%.*]] [label %C.target.Z]
+; CHECK: D:
+; CHECK-NEXT: br label [[A]]
+; CHECK: X:
+; CHECK-NEXT: br label [[EXIT:%.*]]
+; CHECK: Y:
+; CHECK-NEXT: br label [[EXIT]]
+; CHECK: Z:
+; CHECK-NEXT: br label [[EXIT]]
+; CHECK: exit:
+; CHECK-NEXT: ret void
+; CHECK: A.target.X:
+; CHECK-NEXT: br label [[LOOP_EXIT_GUARD:%.*]]
+; CHECK: B.target.Y:
+; CHECK-NEXT: br label [[LOOP_EXIT_GUARD]]
+; CHECK: C.target.Z:
+; CHECK-NEXT: br label [[LOOP_EXIT_GUARD]]
+; CHECK: loop.exit.guard:
+; CHECK-NEXT: [[GUARD_X:%.*]] = phi i1 [ true, [[A_TARGET_X:%.*]] ], [ false, [[B_TARGET_Y:%.*]] ], [ false, [[C_TARGET_Z:%.*]] ]
+; CHECK-NEXT: [[GUARD_Y:%.*]] = phi i1 [ false, [[A_TARGET_X]] ], [ true, [[B_TARGET_Y]] ], [ false, [[C_TARGET_Z]] ]
+; CHECK-NEXT: br i1 [[GUARD_X]], label [[X:%.*]], label [[LOOP_EXIT_GUARD1:%.*]]
+; CHECK: loop.exit.guard1:
+; CHECK-NEXT: br i1 [[GUARD_Y]], label [[Y:%.*]], label [[Z:%.*]]
+;
+entry:
+ br label %A
+
+A:
+ callbr void asm "", "r,!i"(i1 %PredA) to label %B [label %X]
+
+B:
+ callbr void asm "", "r,!i"(i1 %PredB) to label %C [label %Y]
+
+C:
+ callbr void asm "", "r,!i"(i1 %PredC) to label %D [label %Z]
+
+D:
+ br label %A
+
+X:
+ br label %exit
+
+Y:
+ br label %exit
+
+Z:
+ br label %exit
+
+exit:
+ ret void
+}
diff --git a/llvm/test/Transforms/UnifyLoopExits/integer_guards.ll b/llvm/test/Transforms/UnifyLoopExits/integer_guards.ll
index f55639f..be982d5 100644
--- a/llvm/test/Transforms/UnifyLoopExits/integer_guards.ll
+++ b/llvm/test/Transforms/UnifyLoopExits/integer_guards.ll
@@ -71,6 +71,85 @@ E:
ret void
}
+define void @loop_two_exits_callbr(i1 %PredEntry, i1 %PredA) {
+; CHECK-LABEL: @loop_two_exits_callbr(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 [[PREDENTRY:%.*]], label [[A:%.*]], label [[E:%.*]]
+; CHECK: A:
+; CHECK-NEXT: [[INC1:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC2:%.*]], [[C:%.*]] ]
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PREDA:%.*]])
+; CHECK-NEXT: to label [[A_TARGET_B:%.*]] [label %C]
+; CHECK: B:
+; CHECK-NEXT: tail call fastcc void @check(i32 1) #[[ATTR0]]
+; CHECK-NEXT: br label [[D:%.*]]
+; CHECK: C:
+; CHECK-NEXT: [[INC2]] = add i32 [[INC1]], 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[INC2]], 10
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[CMP]])
+; CHECK-NEXT: to label [[A]] [label %C.target.E]
+; CHECK: D:
+; CHECK-NEXT: unreachable
+; CHECK: E:
+; CHECK-NEXT: ret void
+; CHECK: A.target.B:
+; CHECK-NEXT: br label [[LOOP_EXIT_GUARD:%.*]]
+; CHECK: C.target.E:
+; CHECK-NEXT: br label [[LOOP_EXIT_GUARD]]
+; CHECK: loop.exit.guard:
+; CHECK-NEXT: [[MERGED_BB_IDX:%.*]] = phi i32 [ 0, [[A_TARGET_B]] ], [ 1, [[C_TARGET_E:%.*]] ]
+; CHECK-NEXT: [[B_PREDICATE:%.*]] = icmp eq i32 [[MERGED_BB_IDX]], 0
+; CHECK-NEXT: br i1 [[B_PREDICATE]], label [[B:%.*]], label [[E]]
+;
+; BOOLEAN-LABEL: @loop_two_exits_callbr(
+; BOOLEAN-NEXT: entry:
+; BOOLEAN-NEXT: br i1 [[PREDENTRY:%.*]], label [[A:%.*]], label [[E:%.*]]
+; BOOLEAN: A:
+; BOOLEAN-NEXT: [[INC1:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC2:%.*]], [[C:%.*]] ]
+; BOOLEAN-NEXT: callbr void asm "", "r,!i"(i1 [[PREDA:%.*]])
+; BOOLEAN-NEXT: to label [[A_TARGET_B:%.*]] [label %C]
+; BOOLEAN: B:
+; BOOLEAN-NEXT: tail call fastcc void @check(i32 1) #[[ATTR0]]
+; BOOLEAN-NEXT: br label [[D:%.*]]
+; BOOLEAN: C:
+; BOOLEAN-NEXT: [[INC2]] = add i32 [[INC1]], 1
+; BOOLEAN-NEXT: [[CMP:%.*]] = icmp ult i32 [[INC2]], 10
+; BOOLEAN-NEXT: callbr void asm "", "r,!i"(i1 [[CMP]])
+; BOOLEAN-NEXT: to label [[A]] [label %C.target.E]
+; BOOLEAN: D:
+; BOOLEAN-NEXT: unreachable
+; BOOLEAN: E:
+; BOOLEAN-NEXT: ret void
+; BOOLEAN: A.target.B:
+; BOOLEAN-NEXT: br label [[LOOP_EXIT_GUARD:%.*]]
+; BOOLEAN: C.target.E:
+; BOOLEAN-NEXT: br label [[LOOP_EXIT_GUARD]]
+; BOOLEAN: loop.exit.guard:
+; BOOLEAN-NEXT: [[GUARD_B:%.*]] = phi i1 [ true, [[A_TARGET_B]] ], [ false, [[C_TARGET_E:%.*]] ]
+; BOOLEAN-NEXT: br i1 [[GUARD_B]], label [[B:%.*]], label [[E]]
+;
+entry:
+ br i1 %PredEntry, label %A, label %E
+
+A:
+ %inc1 = phi i32 [ 0, %entry ], [ %inc2, %C ]
+ callbr void asm "", "r,!i"(i1 %PredA) to label %B [label %C]
+
+B:
+ tail call fastcc void @check(i32 1) #0
+ br label %D
+
+C:
+ %inc2 = add i32 %inc1, 1
+ %cmp = icmp ult i32 %inc2, 10
+ callbr void asm "","r,!i"(i1 %cmp) to label %A [label %E]
+
+D:
+ unreachable
+
+E:
+ ret void
+}
+
; The loop exit blocks appear in an inner loop.
define void @inner_loop(i1 %PredEntry, i1 %PredA, i1 %PredB) {
@@ -196,6 +275,164 @@ I:
ret void
}
+define void @inner_loop_callbr(i1 %PredEntry, i1 %PredA, i1 %PredB) {
+; CHECK-LABEL: @inner_loop_callbr(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 [[PREDENTRY:%.*]], label [[A:%.*]], label [[I:%.*]]
+; CHECK: A:
+; CHECK-NEXT: [[OUTER1:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[OUTER2:%.*]], [[G:%.*]] ]
+; CHECK-NEXT: br label [[B:%.*]]
+; CHECK: B:
+; CHECK-NEXT: [[INNER1:%.*]] = phi i32 [ 0, [[A]] ], [ [[INNER2:%.*]], [[F:%.*]] ]
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PREDA:%.*]])
+; CHECK-NEXT: to label [[D:%.*]] [label %B.target.B.target.C]
+; CHECK: C:
+; CHECK-NEXT: tail call fastcc void @check(i32 1) #[[ATTR0]]
+; CHECK-NEXT: br label [[H:%.*]]
+; CHECK: D:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PREDB:%.*]])
+; CHECK-NEXT: to label [[D_TARGET_D_TARGET_E:%.*]] [label %F]
+; CHECK: E:
+; CHECK-NEXT: tail call fastcc void @check(i32 2) #[[ATTR0]]
+; CHECK-NEXT: br label [[H]]
+; CHECK: F:
+; CHECK-NEXT: [[INNER2]] = add i32 [[INNER1]], 1
+; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i32 [[INNER2]], 20
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[CMP1]])
+; CHECK-NEXT: to label [[B]] [label %F.target.G]
+; CHECK: G:
+; CHECK-NEXT: [[OUTER2]] = add i32 [[OUTER1]], 1
+; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i32 [[OUTER2]], 10
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[CMP2]])
+; CHECK-NEXT: to label [[A]] [label %G.target.I]
+; CHECK: H:
+; CHECK-NEXT: unreachable
+; CHECK: I:
+; CHECK-NEXT: ret void
+; CHECK: B.target.C:
+; CHECK-NEXT: br label [[LOOP_EXIT_GUARD:%.*]]
+; CHECK: D.target.E:
+; CHECK-NEXT: br label [[LOOP_EXIT_GUARD]]
+; CHECK: G.target.I:
+; CHECK-NEXT: br label [[LOOP_EXIT_GUARD]]
+; CHECK: loop.exit.guard:
+; CHECK-NEXT: [[MERGED_BB_IDX:%.*]] = phi i32 [ 0, [[B_TARGET_C:%.*]] ], [ 1, [[D_TARGET_E:%.*]] ], [ 2, [[G_TARGET_I:%.*]] ]
+; CHECK-NEXT: [[C_PREDICATE:%.*]] = icmp eq i32 [[MERGED_BB_IDX]], 0
+; CHECK-NEXT: br i1 [[C_PREDICATE]], label [[C:%.*]], label [[LOOP_EXIT_GUARD1:%.*]]
+; CHECK: loop.exit.guard1:
+; CHECK-NEXT: [[E_PREDICATE:%.*]] = icmp eq i32 [[MERGED_BB_IDX]], 1
+; CHECK-NEXT: br i1 [[E_PREDICATE]], label [[E:%.*]], label [[I]]
+; CHECK: B.target.B.target.C:
+; CHECK-NEXT: br label [[LOOP_EXIT_GUARD2:%.*]]
+; CHECK: D.target.D.target.E:
+; CHECK-NEXT: br label [[LOOP_EXIT_GUARD2]]
+; CHECK: F.target.G:
+; CHECK-NEXT: br label [[LOOP_EXIT_GUARD2]]
+; CHECK: loop.exit.guard2:
+; CHECK-NEXT: [[MERGED_BB_IDX4:%.*]] = phi i32 [ 0, [[B_TARGET_B_TARGET_C:%.*]] ], [ 1, [[D_TARGET_D_TARGET_E]] ], [ 2, [[F_TARGET_G:%.*]] ]
+; CHECK-NEXT: [[B_TARGET_C_PREDICATE:%.*]] = icmp eq i32 [[MERGED_BB_IDX4]], 0
+; CHECK-NEXT: br i1 [[B_TARGET_C_PREDICATE]], label [[B_TARGET_C]], label [[LOOP_EXIT_GUARD3:%.*]]
+; CHECK: loop.exit.guard3:
+; CHECK-NEXT: [[D_TARGET_E_PREDICATE:%.*]] = icmp eq i32 [[MERGED_BB_IDX4]], 1
+; CHECK-NEXT: br i1 [[D_TARGET_E_PREDICATE]], label [[D_TARGET_E]], label [[G]]
+;
+; BOOLEAN-LABEL: @inner_loop_callbr(
+; BOOLEAN-NEXT: entry:
+; BOOLEAN-NEXT: br i1 [[PREDENTRY:%.*]], label [[A:%.*]], label [[I:%.*]]
+; BOOLEAN: A:
+; BOOLEAN-NEXT: [[OUTER1:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[OUTER2:%.*]], [[G:%.*]] ]
+; BOOLEAN-NEXT: br label [[B:%.*]]
+; BOOLEAN: B:
+; BOOLEAN-NEXT: [[INNER1:%.*]] = phi i32 [ 0, [[A]] ], [ [[INNER2:%.*]], [[F:%.*]] ]
+; BOOLEAN-NEXT: callbr void asm "", "r,!i"(i1 [[PREDA:%.*]])
+; BOOLEAN-NEXT: to label [[D:%.*]] [label %B.target.B.target.C]
+; BOOLEAN: C:
+; BOOLEAN-NEXT: tail call fastcc void @check(i32 1) #[[ATTR0]]
+; BOOLEAN-NEXT: br label [[H:%.*]]
+; BOOLEAN: D:
+; BOOLEAN-NEXT: callbr void asm "", "r,!i"(i1 [[PREDB:%.*]])
+; BOOLEAN-NEXT: to label [[D_TARGET_D_TARGET_E:%.*]] [label %F]
+; BOOLEAN: E:
+; BOOLEAN-NEXT: tail call fastcc void @check(i32 2) #[[ATTR0]]
+; BOOLEAN-NEXT: br label [[H]]
+; BOOLEAN: F:
+; BOOLEAN-NEXT: [[INNER2]] = add i32 [[INNER1]], 1
+; BOOLEAN-NEXT: [[CMP1:%.*]] = icmp ult i32 [[INNER2]], 20
+; BOOLEAN-NEXT: callbr void asm "", "r,!i"(i1 [[CMP1]])
+; BOOLEAN-NEXT: to label [[B]] [label %F.target.G]
+; BOOLEAN: G:
+; BOOLEAN-NEXT: [[OUTER2]] = add i32 [[OUTER1]], 1
+; BOOLEAN-NEXT: [[CMP2:%.*]] = icmp ult i32 [[OUTER2]], 10
+; BOOLEAN-NEXT: callbr void asm "", "r,!i"(i1 [[CMP2]])
+; BOOLEAN-NEXT: to label [[A]] [label %G.target.I]
+; BOOLEAN: H:
+; BOOLEAN-NEXT: unreachable
+; BOOLEAN: I:
+; BOOLEAN-NEXT: ret void
+; BOOLEAN: B.target.C:
+; BOOLEAN-NEXT: br label [[LOOP_EXIT_GUARD:%.*]]
+; BOOLEAN: D.target.E:
+; BOOLEAN-NEXT: br label [[LOOP_EXIT_GUARD]]
+; BOOLEAN: G.target.I:
+; BOOLEAN-NEXT: br label [[LOOP_EXIT_GUARD]]
+; BOOLEAN: loop.exit.guard:
+; BOOLEAN-NEXT: [[GUARD_C:%.*]] = phi i1 [ true, [[B_TARGET_C:%.*]] ], [ false, [[D_TARGET_E:%.*]] ], [ false, [[G_TARGET_I:%.*]] ]
+; BOOLEAN-NEXT: [[GUARD_E:%.*]] = phi i1 [ false, [[B_TARGET_C]] ], [ true, [[D_TARGET_E]] ], [ false, [[G_TARGET_I]] ]
+; BOOLEAN-NEXT: br i1 [[GUARD_C]], label [[C:%.*]], label [[LOOP_EXIT_GUARD1:%.*]]
+; BOOLEAN: loop.exit.guard1:
+; BOOLEAN-NEXT: br i1 [[GUARD_E]], label [[E:%.*]], label [[I]]
+; BOOLEAN: B.target.B.target.C:
+; BOOLEAN-NEXT: br label [[LOOP_EXIT_GUARD2:%.*]]
+; BOOLEAN: D.target.D.target.E:
+; BOOLEAN-NEXT: br label [[LOOP_EXIT_GUARD2]]
+; BOOLEAN: F.target.G:
+; BOOLEAN-NEXT: br label [[LOOP_EXIT_GUARD2]]
+; BOOLEAN: loop.exit.guard2:
+; BOOLEAN-NEXT: [[GUARD_B_TARGET_C:%.*]] = phi i1 [ true, [[B_TARGET_B_TARGET_C:%.*]] ], [ false, [[D_TARGET_D_TARGET_E]] ], [ false, [[F_TARGET_G:%.*]] ]
+; BOOLEAN-NEXT: [[GUARD_D_TARGET_E:%.*]] = phi i1 [ false, [[B_TARGET_B_TARGET_C]] ], [ true, [[D_TARGET_D_TARGET_E]] ], [ false, [[F_TARGET_G]] ]
+; BOOLEAN-NEXT: br i1 [[GUARD_B_TARGET_C]], label [[B_TARGET_C]], label [[LOOP_EXIT_GUARD3:%.*]]
+; BOOLEAN: loop.exit.guard3:
+; BOOLEAN-NEXT: br i1 [[GUARD_D_TARGET_E]], label [[D_TARGET_E]], label [[G]]
+;
+entry:
+ br i1 %PredEntry, label %A, label %I
+
+A:
+ %outer1 = phi i32 [ 0, %entry ], [ %outer2, %G ]
+ br label %B
+
+B:
+ %inner1 = phi i32 [ 0, %A ], [ %inner2, %F ]
+ callbr void asm "", "r,!i"(i1 %PredA) to label %D [label %C]
+
+C:
+ tail call fastcc void @check(i32 1) #0
+ br label %H
+
+D:
+ callbr void asm "", "r,!i"(i1 %PredB) to label %E [label %F]
+
+E:
+ tail call fastcc void @check(i32 2) #0
+ br label %H
+
+F:
+ %inner2 = add i32 %inner1, 1
+ %cmp1 = icmp ult i32 %inner2, 20
+ callbr void asm "", "r,!i"(i1 %cmp1) to label %B [label %G]
+
+G:
+ %outer2 = add i32 %outer1, 1
+ %cmp2 = icmp ult i32 %outer2, 10
+ callbr void asm "", "r,!i"(i1 %cmp2) to label %A [label %I]
+
+H:
+ unreachable
+
+I:
+ ret void
+}
+
; A loop with more exit blocks.
define void @loop_five_exits(i1 %PredEntry, i1 %PredA, i1 %PredB, i1 %PredC, i1 %PredD) {
@@ -341,6 +578,179 @@ L:
ret void
}
+define void @loop_five_exits_callbr(i1 %PredEntry, i1 %PredA, i1 %PredB, i1 %PredC, i1 %PredD) {
+; CHECK-LABEL: @loop_five_exits_callbr(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 [[PREDENTRY:%.*]], label [[A:%.*]], label [[L:%.*]]
+; CHECK: A:
+; CHECK-NEXT: [[INC1:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC2:%.*]], [[I:%.*]] ]
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PREDA:%.*]])
+; CHECK-NEXT: to label [[A_TARGET_B:%.*]] [label %C]
+; CHECK: B:
+; CHECK-NEXT: tail call fastcc void @check(i32 1) #[[ATTR0]]
+; CHECK-NEXT: br label [[J:%.*]]
+; CHECK: C:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PREDB:%.*]])
+; CHECK-NEXT: to label [[C_TARGET_D:%.*]] [label %E]
+; CHECK: D:
+; CHECK-NEXT: tail call fastcc void @check(i32 2) #[[ATTR0]]
+; CHECK-NEXT: br label [[J]]
+; CHECK: E:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PREDC:%.*]])
+; CHECK-NEXT: to label [[E_TARGET_F:%.*]] [label %G]
+; CHECK: F:
+; CHECK-NEXT: tail call fastcc void @check(i32 3) #[[ATTR0]]
+; CHECK-NEXT: br label [[K:%.*]]
+; CHECK: G:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PREDD:%.*]])
+; CHECK-NEXT: to label [[G_TARGET_H:%.*]] [label %I]
+; CHECK: H:
+; CHECK-NEXT: tail call fastcc void @check(i32 4) #[[ATTR0]]
+; CHECK-NEXT: br label [[K]]
+; CHECK: I:
+; CHECK-NEXT: [[INC2]] = add i32 [[INC1]], 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[INC2]], 10
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[CMP]])
+; CHECK-NEXT: to label [[A]] [label %I.target.L]
+; CHECK: J:
+; CHECK-NEXT: br label [[L]]
+; CHECK: K:
+; CHECK-NEXT: br label [[L]]
+; CHECK: L:
+; CHECK-NEXT: ret void
+; CHECK: A.target.B:
+; CHECK-NEXT: br label [[LOOP_EXIT_GUARD:%.*]]
+; CHECK: C.target.D:
+; CHECK-NEXT: br label [[LOOP_EXIT_GUARD]]
+; CHECK: E.target.F:
+; CHECK-NEXT: br label [[LOOP_EXIT_GUARD]]
+; CHECK: G.target.H:
+; CHECK-NEXT: br label [[LOOP_EXIT_GUARD]]
+; CHECK: I.target.L:
+; CHECK-NEXT: br label [[LOOP_EXIT_GUARD]]
+; CHECK: loop.exit.guard:
+; CHECK-NEXT: [[MERGED_BB_IDX:%.*]] = phi i32 [ 0, [[A_TARGET_B]] ], [ 1, [[C_TARGET_D]] ], [ 2, [[E_TARGET_F]] ], [ 3, [[G_TARGET_H]] ], [ 4, [[I_TARGET_L:%.*]] ]
+; CHECK-NEXT: [[B_PREDICATE:%.*]] = icmp eq i32 [[MERGED_BB_IDX]], 0
+; CHECK-NEXT: br i1 [[B_PREDICATE]], label [[B:%.*]], label [[LOOP_EXIT_GUARD1:%.*]]
+; CHECK: loop.exit.guard1:
+; CHECK-NEXT: [[D_PREDICATE:%.*]] = icmp eq i32 [[MERGED_BB_IDX]], 1
+; CHECK-NEXT: br i1 [[D_PREDICATE]], label [[D:%.*]], label [[LOOP_EXIT_GUARD2:%.*]]
+; CHECK: loop.exit.guard2:
+; CHECK-NEXT: [[F_PREDICATE:%.*]] = icmp eq i32 [[MERGED_BB_IDX]], 2
+; CHECK-NEXT: br i1 [[F_PREDICATE]], label [[F:%.*]], label [[LOOP_EXIT_GUARD3:%.*]]
+; CHECK: loop.exit.guard3:
+; CHECK-NEXT: [[H_PREDICATE:%.*]] = icmp eq i32 [[MERGED_BB_IDX]], 3
+; CHECK-NEXT: br i1 [[H_PREDICATE]], label [[H:%.*]], label [[L]]
+;
+; BOOLEAN-LABEL: @loop_five_exits_callbr(
+; BOOLEAN-NEXT: entry:
+; BOOLEAN-NEXT: br i1 [[PREDENTRY:%.*]], label [[A:%.*]], label [[L:%.*]]
+; BOOLEAN: A:
+; BOOLEAN-NEXT: [[INC1:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC2:%.*]], [[I:%.*]] ]
+; BOOLEAN-NEXT: callbr void asm "", "r,!i"(i1 [[PREDA:%.*]])
+; BOOLEAN-NEXT: to label [[A_TARGET_B:%.*]] [label %C]
+; BOOLEAN: B:
+; BOOLEAN-NEXT: tail call fastcc void @check(i32 1) #[[ATTR0]]
+; BOOLEAN-NEXT: br label [[J:%.*]]
+; BOOLEAN: C:
+; BOOLEAN-NEXT: callbr void asm "", "r,!i"(i1 [[PREDB:%.*]])
+; BOOLEAN-NEXT: to label [[C_TARGET_D:%.*]] [label %E]
+; BOOLEAN: D:
+; BOOLEAN-NEXT: tail call fastcc void @check(i32 2) #[[ATTR0]]
+; BOOLEAN-NEXT: br label [[J]]
+; BOOLEAN: E:
+; BOOLEAN-NEXT: callbr void asm "", "r,!i"(i1 [[PREDC:%.*]])
+; BOOLEAN-NEXT: to label [[E_TARGET_F:%.*]] [label %G]
+; BOOLEAN: F:
+; BOOLEAN-NEXT: tail call fastcc void @check(i32 3) #[[ATTR0]]
+; BOOLEAN-NEXT: br label [[K:%.*]]
+; BOOLEAN: G:
+; BOOLEAN-NEXT: callbr void asm "", "r,!i"(i1 [[PREDD:%.*]])
+; BOOLEAN-NEXT: to label [[G_TARGET_H:%.*]] [label %I]
+; BOOLEAN: H:
+; BOOLEAN-NEXT: tail call fastcc void @check(i32 4) #[[ATTR0]]
+; BOOLEAN-NEXT: br label [[K]]
+; BOOLEAN: I:
+; BOOLEAN-NEXT: [[INC2]] = add i32 [[INC1]], 1
+; BOOLEAN-NEXT: [[CMP:%.*]] = icmp ult i32 [[INC2]], 10
+; BOOLEAN-NEXT: callbr void asm "", "r,!i"(i1 [[CMP]])
+; BOOLEAN-NEXT: to label [[A]] [label %I.target.L]
+; BOOLEAN: J:
+; BOOLEAN-NEXT: br label [[L]]
+; BOOLEAN: K:
+; BOOLEAN-NEXT: br label [[L]]
+; BOOLEAN: L:
+; BOOLEAN-NEXT: ret void
+; BOOLEAN: A.target.B:
+; BOOLEAN-NEXT: br label [[LOOP_EXIT_GUARD:%.*]]
+; BOOLEAN: C.target.D:
+; BOOLEAN-NEXT: br label [[LOOP_EXIT_GUARD]]
+; BOOLEAN: E.target.F:
+; BOOLEAN-NEXT: br label [[LOOP_EXIT_GUARD]]
+; BOOLEAN: G.target.H:
+; BOOLEAN-NEXT: br label [[LOOP_EXIT_GUARD]]
+; BOOLEAN: I.target.L:
+; BOOLEAN-NEXT: br label [[LOOP_EXIT_GUARD]]
+; BOOLEAN: loop.exit.guard:
+; BOOLEAN-NEXT: [[GUARD_B:%.*]] = phi i1 [ true, [[A_TARGET_B]] ], [ false, [[C_TARGET_D]] ], [ false, [[E_TARGET_F]] ], [ false, [[G_TARGET_H]] ], [ false, [[I_TARGET_L:%.*]] ]
+; BOOLEAN-NEXT: [[GUARD_D:%.*]] = phi i1 [ false, [[A_TARGET_B]] ], [ true, [[C_TARGET_D]] ], [ false, [[E_TARGET_F]] ], [ false, [[G_TARGET_H]] ], [ false, [[I_TARGET_L]] ]
+; BOOLEAN-NEXT: [[GUARD_F:%.*]] = phi i1 [ false, [[A_TARGET_B]] ], [ false, [[C_TARGET_D]] ], [ true, [[E_TARGET_F]] ], [ false, [[G_TARGET_H]] ], [ false, [[I_TARGET_L]] ]
+; BOOLEAN-NEXT: [[GUARD_H:%.*]] = phi i1 [ false, [[A_TARGET_B]] ], [ false, [[C_TARGET_D]] ], [ false, [[E_TARGET_F]] ], [ true, [[G_TARGET_H]] ], [ false, [[I_TARGET_L]] ]
+; BOOLEAN-NEXT: br i1 [[GUARD_B]], label [[B:%.*]], label [[LOOP_EXIT_GUARD1:%.*]]
+; BOOLEAN: loop.exit.guard1:
+; BOOLEAN-NEXT: br i1 [[GUARD_D]], label [[D:%.*]], label [[LOOP_EXIT_GUARD2:%.*]]
+; BOOLEAN: loop.exit.guard2:
+; BOOLEAN-NEXT: br i1 [[GUARD_F]], label [[F:%.*]], label [[LOOP_EXIT_GUARD3:%.*]]
+; BOOLEAN: loop.exit.guard3:
+; BOOLEAN-NEXT: br i1 [[GUARD_H]], label [[H:%.*]], label [[L]]
+;
+entry:
+ br i1 %PredEntry, label %A, label %L
+
+A:
+ %inc1 = phi i32 [ 0, %entry ], [ %inc2, %I ]
+ callbr void asm "", "r,!i"(i1 %PredA) to label %B [label %C]
+
+B:
+ tail call fastcc void @check(i32 1) #0
+ br label %J
+
+C:
+ callbr void asm "", "r,!i"(i1 %PredB) to label %D [label %E]
+
+D:
+ tail call fastcc void @check(i32 2) #0
+ br label %J
+
+E:
+ callbr void asm "", "r,!i"(i1 %PredC) to label %F [label %G]
+
+F:
+ tail call fastcc void @check(i32 3) #0
+ br label %K
+
+G:
+ callbr void asm "", "r,!i"(i1 %PredD) to label %H [label %I]
+
+H:
+ tail call fastcc void @check(i32 4) #0
+ br label %K
+
+I:
+ %inc2 = add i32 %inc1, 1
+ %cmp = icmp ult i32 %inc2, 10
+ callbr void asm "", "r,!i"(i1 %cmp) to label %A [label %L]
+
+J:
+ br label %L
+
+K:
+ br label %L
+
+L:
+ ret void
+}
+
declare void @check(i32 noundef %i) #0
diff --git a/llvm/test/Transforms/UnifyLoopExits/nested.ll b/llvm/test/Transforms/UnifyLoopExits/nested.ll
index 8fae2c4..2ec576a 100644
--- a/llvm/test/Transforms/UnifyLoopExits/nested.ll
+++ b/llvm/test/Transforms/UnifyLoopExits/nested.ll
@@ -78,3 +78,145 @@ exit:
%exit.phi = phi i32 [%A4.phi, %A5], [%Z, %C]
ret void
}
+
+define void @nested_callbr(i1 %PredB3, i1 %PredB4, i1 %PredA4, i1 %PredA3, i32 %X, i32 %Y, i32 %Z) {
+; CHECK-LABEL: @nested_callbr(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[A1:%.*]]
+; CHECK: A1:
+; CHECK-NEXT: br label [[B1:%.*]]
+; CHECK: B1:
+; CHECK-NEXT: br label [[B2:%.*]]
+; CHECK: B2:
+; CHECK-NEXT: [[X_INC:%.*]] = add i32 [[X:%.*]], 1
+; CHECK-NEXT: br label [[B3:%.*]]
+; CHECK: B3:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PREDB3:%.*]])
+; CHECK-NEXT: to label [[B4:%.*]] [label %B3.target.A3]
+; CHECK: B4:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PREDB4:%.*]])
+; CHECK-NEXT: to label [[B1]] [label %B4.target.A2]
+; CHECK: A2:
+; CHECK-NEXT: br label [[A4:%.*]]
+; CHECK: A3:
+; CHECK-NEXT: br label [[A4]]
+; CHECK: A4:
+; CHECK-NEXT: [[A4_PHI:%.*]] = phi i32 [ [[Y:%.*]], [[A3:%.*]] ], [ [[X_INC_MOVED:%.*]], [[A2:%.*]] ]
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PREDA4:%.*]])
+; CHECK-NEXT: to label [[A4_TARGET_C:%.*]] [label %A5]
+; CHECK: A5:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PREDA3:%.*]])
+; CHECK-NEXT: to label [[A5_TARGET_EXIT:%.*]] [label %A1]
+; CHECK: C:
+; CHECK-NEXT: br label [[EXIT:%.*]]
+; CHECK: exit:
+; CHECK-NEXT: [[EXIT_PHI:%.*]] = phi i32 [ [[Z:%.*]], [[C:%.*]] ], [ [[EXIT_PHI_MOVED:%.*]], [[LOOP_EXIT_GUARD:%.*]] ]
+; CHECK-NEXT: ret void
+; CHECK: A4.target.C:
+; CHECK-NEXT: br label [[LOOP_EXIT_GUARD]]
+; CHECK: A5.target.exit:
+; CHECK-NEXT: br label [[LOOP_EXIT_GUARD]]
+; CHECK: loop.exit.guard:
+; CHECK-NEXT: [[EXIT_PHI_MOVED]] = phi i32 [ poison, [[A4_TARGET_C]] ], [ [[A4_PHI]], [[A5_TARGET_EXIT]] ]
+; CHECK-NEXT: [[GUARD_C:%.*]] = phi i1 [ true, [[A4_TARGET_C]] ], [ false, [[A5_TARGET_EXIT]] ]
+; CHECK-NEXT: br i1 [[GUARD_C]], label [[C]], label [[EXIT]]
+; CHECK: B3.target.A3:
+; CHECK-NEXT: br label [[LOOP_EXIT_GUARD1:%.*]]
+; CHECK: B4.target.A2:
+; CHECK-NEXT: br label [[LOOP_EXIT_GUARD1]]
+; CHECK: loop.exit.guard1:
+; CHECK-NEXT: [[X_INC_MOVED]] = phi i32 [ [[X_INC]], [[B3_TARGET_A3:%.*]] ], [ [[X_INC]], [[B4_TARGET_A2:%.*]] ]
+; CHECK-NEXT: [[GUARD_A3:%.*]] = phi i1 [ true, [[B3_TARGET_A3]] ], [ false, [[B4_TARGET_A2]] ]
+; CHECK-NEXT: br i1 [[GUARD_A3]], label [[A3]], label [[A2]]
+;
+entry:
+ br label %A1
+
+A1:
+ br label %B1
+
+B1:
+ br label %B2
+
+B2:
+ %X.inc = add i32 %X, 1
+ br label %B3
+
+B3:
+ callbr void asm "", "r,!i"(i1 %PredB3) to label %B4 [label %A3]
+
+B4:
+ callbr void asm "", "r,!i"(i1 %PredB4) to label %B1 [label %A2]
+
+A2:
+ br label %A4
+
+A3:
+ br label %A4
+
+A4:
+ %A4.phi = phi i32 [%Y, %A3], [%X.inc, %A2]
+ callbr void asm "", "r,!i"(i1 %PredA4) to label %C [label %A5]
+
+A5:
+ callbr void asm "", "r,!i"(i1 %PredA3) to label %exit [label %A1]
+
+C:
+ br label %exit
+
+exit:
+ %exit.phi = phi i32 [%A4.phi, %A5], [%Z, %C]
+ ret void
+}
+
+; Here, the newly created target loop that connects b to r1 needs to be part of
+; the parent loop (the outer loop b participates in). Otherwise, it will be
+; regarded as an additional loop entry point to this outer loop.
+define void @nested_callbr_multiple_exits() {
+; CHECK-LABEL: @nested_callbr_multiple_exits(
+; CHECK-NEXT: br label [[A:%.*]]
+; CHECK: a:
+; CHECK-NEXT: callbr void asm "", ""()
+; CHECK-NEXT: to label [[B:%.*]] []
+; CHECK: b:
+; CHECK-NEXT: callbr void asm "", "!i"()
+; CHECK-NEXT: to label [[C:%.*]] [label %b.target.b.target.r1]
+; CHECK: c:
+; CHECK-NEXT: callbr void asm "", "!i"()
+; CHECK-NEXT: to label [[C_TARGET_E:%.*]] [label %b]
+; CHECK: e:
+; CHECK-NEXT: callbr void asm "", "!i"()
+; CHECK-NEXT: to label [[A]] [label %e.target.r2]
+; CHECK: r1:
+; CHECK-NEXT: ret void
+; CHECK: r2:
+; CHECK-NEXT: ret void
+; CHECK: b.target.r1:
+; CHECK-NEXT: br label [[LOOP_EXIT_GUARD:%.*]]
+; CHECK: e.target.r2:
+; CHECK-NEXT: br label [[LOOP_EXIT_GUARD]]
+; CHECK: loop.exit.guard:
+; CHECK-NEXT: [[GUARD_R1:%.*]] = phi i1 [ true, [[B_TARGET_R1:%.*]] ], [ false, [[E_TARGET_R2:%.*]] ]
+; CHECK-NEXT: br i1 [[GUARD_R1]], label [[R1:%.*]], label [[R2:%.*]]
+; CHECK: b.target.b.target.r1:
+; CHECK-NEXT: br label [[LOOP_EXIT_GUARD1:%.*]]
+; CHECK: c.target.e:
+; CHECK-NEXT: br label [[LOOP_EXIT_GUARD1]]
+; CHECK: loop.exit.guard1:
+; CHECK-NEXT: [[GUARD_B_TARGET_R1:%.*]] = phi i1 [ true, [[B_TARGET_B_TARGET_R1:%.*]] ], [ false, [[C_TARGET_E]] ]
+; CHECK-NEXT: br i1 [[GUARD_B_TARGET_R1]], label [[B_TARGET_R1]], label [[E:%.*]]
+;
+ br label %a
+a:
+ callbr void asm "", ""() to label %b []
+b:
+ callbr void asm "", "!i"() to label %c [label %r1]
+c:
+ callbr void asm "", "!i"() to label %e [label %b]
+e:
+ callbr void asm "", "!i"() to label %a [label %r2]
+r1:
+ ret void
+r2:
+ ret void
+}
diff --git a/llvm/test/Transforms/UnifyLoopExits/restore-ssa.ll b/llvm/test/Transforms/UnifyLoopExits/restore-ssa.ll
index 3e68df3..ffe8026 100644
--- a/llvm/test/Transforms/UnifyLoopExits/restore-ssa.ll
+++ b/llvm/test/Transforms/UnifyLoopExits/restore-ssa.ll
@@ -57,6 +57,60 @@ return:
ret i32 %phi
}
+define i32 @exiting-used-in-exit_callbr(ptr %arg1, ptr %arg2) local_unnamed_addr align 2 {
+; CHECK-LABEL: @exiting-used-in-exit_callbr(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: callbr void asm "", ""()
+; CHECK-NEXT: to label [[A:%.*]] []
+; CHECK: A:
+; CHECK-NEXT: [[MYTMP42:%.*]] = load i32, ptr [[ARG1:%.*]], align 4
+; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[MYTMP42]], 0
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[CMP1]])
+; CHECK-NEXT: to label [[B:%.*]] [label %A.target.return]
+; CHECK: B:
+; CHECK-NEXT: [[MYTMP41:%.*]] = load i32, ptr [[ARG2:%.*]], align 4
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[MYTMP41]], 0
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[CMP]])
+; CHECK-NEXT: to label [[A]] [label %B.target.C]
+; CHECK: C:
+; CHECK-NEXT: [[INC:%.*]] = add i32 [[MYTMP41_MOVED:%.*]], 1
+; CHECK-NEXT: callbr void asm "", ""()
+; CHECK-NEXT: to label [[RETURN:%.*]] []
+; CHECK: return:
+; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ [[INC]], [[C:%.*]] ], [ [[PHI_MOVED:%.*]], [[LOOP_EXIT_GUARD:%.*]] ]
+; CHECK-NEXT: ret i32 [[PHI]]
+; CHECK: A.target.return:
+; CHECK-NEXT: br label [[LOOP_EXIT_GUARD]]
+; CHECK: B.target.C:
+; CHECK-NEXT: br label [[LOOP_EXIT_GUARD]]
+; CHECK: loop.exit.guard:
+; CHECK-NEXT: [[MYTMP41_MOVED]] = phi i32 [ poison, [[A_TARGET_RETURN:%.*]] ], [ [[MYTMP41]], [[B_TARGET_C:%.*]] ]
+; CHECK-NEXT: [[PHI_MOVED]] = phi i32 [ [[MYTMP42]], [[A_TARGET_RETURN]] ], [ poison, [[B_TARGET_C]] ]
+; CHECK-NEXT: [[GUARD_RETURN:%.*]] = phi i1 [ true, [[A_TARGET_RETURN]] ], [ false, [[B_TARGET_C]] ]
+; CHECK-NEXT: br i1 [[GUARD_RETURN]], label [[RETURN]], label [[C]]
+;
+entry:
+ callbr void asm "", ""() to label %A []
+
+A:
+ %mytmp42 = load i32, ptr %arg1, align 4
+ %cmp1 = icmp slt i32 %mytmp42, 0
+ callbr void asm "", "r,!i"(i1 %cmp1) to label %B [label %return]
+
+B:
+ %mytmp41 = load i32, ptr %arg2, align 4
+ %cmp = icmp slt i32 %mytmp41, 0
+ callbr void asm "", "r,!i"(i1 %cmp) to label %A [label %C]
+
+C:
+ %inc = add i32 %mytmp41, 1
+ callbr void asm "", ""() to label %return []
+
+return:
+ %phi = phi i32 [ %inc, %C ], [ %mytmp42, %A ]
+ ret i32 %phi
+}
+
; Loop consists of A, B and C:
; - A is the header
; - A and C are exiting blocks
@@ -112,6 +166,63 @@ return:
ret i32 0
}
+define i32 @internal-used-in-exit_callbr(ptr %arg1, ptr %arg2) local_unnamed_addr align 2 {
+; CHECK-LABEL: @internal-used-in-exit_callbr(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[MYTMP42:%.*]] = load i32, ptr [[ARG1:%.*]], align 4
+; CHECK-NEXT: callbr void asm "", ""()
+; CHECK-NEXT: to label [[A:%.*]] []
+; CHECK: A:
+; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[MYTMP42]], 0
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[CMP1]])
+; CHECK-NEXT: to label [[B:%.*]] [label %A.target.return]
+; CHECK: B:
+; CHECK-NEXT: [[MYTMP41:%.*]] = load i32, ptr [[ARG2:%.*]], align 4
+; CHECK-NEXT: callbr void asm "", ""()
+; CHECK-NEXT: to label [[C:%.*]] []
+; CHECK: C:
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[MYTMP42]], 0
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[CMP]])
+; CHECK-NEXT: to label [[A]] [label %C.target.D]
+; CHECK: D:
+; CHECK-NEXT: [[INC:%.*]] = add i32 [[MYTMP41_MOVED:%.*]], 1
+; CHECK-NEXT: callbr void asm "", ""()
+; CHECK-NEXT: to label [[RETURN:%.*]] []
+; CHECK: return:
+; CHECK-NEXT: ret i32 0
+; CHECK: A.target.return:
+; CHECK-NEXT: br label [[LOOP_EXIT_GUARD:%.*]]
+; CHECK: C.target.D:
+; CHECK-NEXT: br label [[LOOP_EXIT_GUARD]]
+; CHECK: loop.exit.guard:
+; CHECK-NEXT: [[MYTMP41_MOVED]] = phi i32 [ poison, [[A_TARGET_RETURN:%.*]] ], [ [[MYTMP41]], [[C_TARGET_D:%.*]] ]
+; CHECK-NEXT: [[GUARD_RETURN:%.*]] = phi i1 [ true, [[A_TARGET_RETURN]] ], [ false, [[C_TARGET_D]] ]
+; CHECK-NEXT: br i1 [[GUARD_RETURN]], label [[RETURN]], label [[D:%.*]]
+;
+entry:
+ %mytmp42 = load i32, ptr %arg1, align 4
+ callbr void asm "", ""() to label %A []
+
+A:
+ %cmp1 = icmp slt i32 %mytmp42, 0
+ callbr void asm "", "r,!i"(i1 %cmp1) to label %B [label %return]
+
+B:
+ %mytmp41 = load i32, ptr %arg2, align 4
+ callbr void asm "", ""() to label %C []
+
+C:
+ %cmp = icmp slt i32 %mytmp42, 0
+ callbr void asm "", "r,!i"(i1 %cmp) to label %A [label %D]
+
+D:
+ %inc = add i32 %mytmp41, 1
+ callbr void asm "", ""() to label %return []
+
+return:
+ ret i32 0
+}
+
; Loop consists of A, B and C:
; - A is the header
; - A and C are exiting blocks
@@ -172,6 +283,68 @@ return:
ret i32 %phi
}
+define i32 @mixed-use-in-exit_callbr(ptr %arg1, ptr %arg2) local_unnamed_addr align 2 {
+; CHECK-LABEL: @mixed-use-in-exit_callbr(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[MYTMP42:%.*]] = load i32, ptr [[ARG1:%.*]], align 4
+; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[MYTMP42]], 0
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[CMP2]])
+; CHECK-NEXT: to label [[A:%.*]] [label %return]
+; CHECK: A:
+; CHECK-NEXT: [[MYTMP43:%.*]] = add i32 [[MYTMP42]], 1
+; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[MYTMP42]], 0
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[CMP1]])
+; CHECK-NEXT: to label [[B:%.*]] [label %A.target.return]
+; CHECK: B:
+; CHECK-NEXT: [[MYTMP41:%.*]] = load i32, ptr [[ARG2:%.*]], align 4
+; CHECK-NEXT: callbr void asm "", ""()
+; CHECK-NEXT: to label [[C:%.*]] []
+; CHECK: C:
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[MYTMP42]], 0
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[CMP]])
+; CHECK-NEXT: to label [[A]] [label %C.target.D]
+; CHECK: D:
+; CHECK-NEXT: callbr void asm "", ""()
+; CHECK-NEXT: to label [[RETURN:%.*]] []
+; CHECK: return:
+; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ [[MYTMP41_MOVED:%.*]], [[D:%.*]] ], [ [[MYTMP42]], [[ENTRY:%.*]] ], [ [[PHI_MOVED:%.*]], [[LOOP_EXIT_GUARD:%.*]] ]
+; CHECK-NEXT: ret i32 [[PHI]]
+; CHECK: A.target.return:
+; CHECK-NEXT: br label [[LOOP_EXIT_GUARD]]
+; CHECK: C.target.D:
+; CHECK-NEXT: br label [[LOOP_EXIT_GUARD]]
+; CHECK: loop.exit.guard:
+; CHECK-NEXT: [[MYTMP41_MOVED]] = phi i32 [ poison, [[A_TARGET_RETURN:%.*]] ], [ [[MYTMP41]], [[C_TARGET_D:%.*]] ]
+; CHECK-NEXT: [[PHI_MOVED]] = phi i32 [ [[MYTMP43]], [[A_TARGET_RETURN]] ], [ poison, [[C_TARGET_D]] ]
+; CHECK-NEXT: [[GUARD_RETURN:%.*]] = phi i1 [ true, [[A_TARGET_RETURN]] ], [ false, [[C_TARGET_D]] ]
+; CHECK-NEXT: br i1 [[GUARD_RETURN]], label [[RETURN]], label [[D]]
+;
+entry:
+ %mytmp42 = load i32, ptr %arg1, align 4
+ %cmp2 = icmp slt i32 %mytmp42, 0
+ callbr void asm "", "r,!i"(i1 %cmp2) to label %A [label %return]
+
+A:
+ %mytmp43 = add i32 %mytmp42, 1
+ %cmp1 = icmp slt i32 %mytmp42, 0
+ callbr void asm "", "r,!i"(i1 %cmp1) to label %B [label %return]
+
+B:
+ %mytmp41 = load i32, ptr %arg2, align 4
+ callbr void asm "", ""() to label %C []
+
+C:
+ %cmp = icmp slt i32 %mytmp42, 0
+ callbr void asm "", "r,!i"(i1 %cmp) to label %A [label %D]
+
+D:
+ callbr void asm "", ""() to label %return []
+
+return:
+ %phi = phi i32 [ %mytmp41, %D ], [ %mytmp43, %A ], [%mytmp42, %entry]
+ ret i32 %phi
+}
+
; Loop consists of A, B and C:
; - A is the header
; - A and C are exiting blocks
@@ -236,3 +409,66 @@ return:
%phi = phi i32 [ %mytmp41, %D ], [ %mytmp42, %E ]
ret i32 %phi
}
+
+define i32 @phi-via-external-block_callbr(ptr %arg1, ptr %arg2) local_unnamed_addr align 2 {
+; CHECK-LABEL: @phi-via-external-block_callbr(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[MYTMP42:%.*]] = load i32, ptr [[ARG1:%.*]], align 4
+; CHECK-NEXT: callbr void asm "", ""()
+; CHECK-NEXT: to label [[A:%.*]] []
+; CHECK: A:
+; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[MYTMP42]], 0
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[CMP1]])
+; CHECK-NEXT: to label [[B:%.*]] [label %A.target.E]
+; CHECK: B:
+; CHECK-NEXT: [[MYTMP41:%.*]] = load i32, ptr [[ARG2:%.*]], align 4
+; CHECK-NEXT: callbr void asm "", ""()
+; CHECK-NEXT: to label [[C:%.*]] []
+; CHECK: C:
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[MYTMP42]], 0
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[CMP]])
+; CHECK-NEXT: to label [[A]] [label %C.target.D]
+; CHECK: D:
+; CHECK-NEXT: callbr void asm "", ""()
+; CHECK-NEXT: to label [[RETURN:%.*]] []
+; CHECK: E:
+; CHECK-NEXT: callbr void asm "", ""()
+; CHECK-NEXT: to label [[RETURN]] []
+; CHECK: return:
+; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ [[MYTMP41_MOVED:%.*]], [[D:%.*]] ], [ [[MYTMP42]], [[E:%.*]] ]
+; CHECK-NEXT: ret i32 [[PHI]]
+; CHECK: A.target.E:
+; CHECK-NEXT: br label [[LOOP_EXIT_GUARD:%.*]]
+; CHECK: C.target.D:
+; CHECK-NEXT: br label [[LOOP_EXIT_GUARD]]
+; CHECK: loop.exit.guard:
+; CHECK-NEXT: [[MYTMP41_MOVED]] = phi i32 [ poison, [[A_TARGET_E:%.*]] ], [ [[MYTMP41]], [[C_TARGET_D:%.*]] ]
+; CHECK-NEXT: [[GUARD_E:%.*]] = phi i1 [ true, [[A_TARGET_E]] ], [ false, [[C_TARGET_D]] ]
+; CHECK-NEXT: br i1 [[GUARD_E]], label [[E]], label [[D]]
+;
+entry:
+ %mytmp42 = load i32, ptr %arg1, align 4
+ callbr void asm "", ""() to label %A []
+
+A:
+ %cmp1 = icmp slt i32 %mytmp42, 0
+ callbr void asm "", "r,!i"(i1 %cmp1) to label %B [label %E]
+
+B:
+ %mytmp41 = load i32, ptr %arg2, align 4
+ callbr void asm "", ""() to label %C []
+
+C:
+ %cmp = icmp slt i32 %mytmp42, 0
+ callbr void asm "", "r,!i"(i1 %cmp) to label %A [label %D]
+
+D:
+ callbr void asm "", ""() to label %return []
+
+E:
+ callbr void asm "", ""() to label %return []
+
+return:
+ %phi = phi i32 [ %mytmp41, %D ], [ %mytmp42, %E ]
+ ret i32 %phi
+}
diff --git a/llvm/test/Transforms/UnifyLoopExits/undef-phis.ll b/llvm/test/Transforms/UnifyLoopExits/undef-phis.ll
index 05f50fc..e65e254 100644
--- a/llvm/test/Transforms/UnifyLoopExits/undef-phis.ll
+++ b/llvm/test/Transforms/UnifyLoopExits/undef-phis.ll
@@ -56,3 +56,71 @@ mbb5291: ; preds = %mbb4321
store volatile [2 x i32] %i5293, ptr addrspace(5) null, align 4
ret void
}
+
+define fastcc void @undef_phi_callbr(i64 %i5247, i1 %i4530, i1 %i4936.not) {
+; CHECK-LABEL: define fastcc void @undef_phi_callbr(
+; CHECK-SAME: i64 [[I5247:%.*]], i1 [[I4530:%.*]], i1 [[I4936_NOT:%.*]]) {
+; CHECK-NEXT: [[MBB:.*:]]
+; CHECK-NEXT: callbr void asm "", ""()
+; CHECK-NEXT: to label %[[MBB3932:.*]] []
+; CHECK: [[MBB3932]]:
+; CHECK-NEXT: callbr void asm "", ""()
+; CHECK-NEXT: to label %[[MBB4454:.*]] []
+; CHECK: [[MBB4321:.*]]:
+; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[I5247]] to i32
+; CHECK-NEXT: [[I5290:%.*]] = icmp eq i32 [[TMP0]], 0
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[I5290]])
+; CHECK-NEXT: to label %[[MBB3932]] [label %mbb4321.target.mbb5291]
+; CHECK: [[MBB4454]]:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[I4530]])
+; CHECK-NEXT: to label %[[MBB4535:.*]] [label %mbb4454.target.mbb4454.target.mbb4531]
+; CHECK: [[MBB4531:.*]]:
+; CHECK-NEXT: ret void
+; CHECK: [[MBB4535]]:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[I4936_NOT]])
+; CHECK-NEXT: to label %[[MBB4535_TARGET_MBB4321:.*]] [label %mbb4454]
+; CHECK: [[MBB5291:.*]]:
+; CHECK-NEXT: [[I5293:%.*]] = insertvalue [2 x i32] zeroinitializer, i32 [[DOTMOVED:%.*]], 1
+; CHECK-NEXT: store volatile [2 x i32] [[I5293]], ptr addrspace(5) null, align 4
+; CHECK-NEXT: ret void
+; CHECK: [[MBB4454_TARGET_MBB4531:.*]]:
+; CHECK-NEXT: br label %[[LOOP_EXIT_GUARD:.*]]
+; CHECK: [[MBB4321_TARGET_MBB5291:.*]]:
+; CHECK-NEXT: br label %[[LOOP_EXIT_GUARD]]
+; CHECK: [[LOOP_EXIT_GUARD]]:
+; CHECK-NEXT: [[DOTMOVED]] = phi i32 [ poison, %[[MBB4454_TARGET_MBB4531]] ], [ [[TMP0]], %[[MBB4321_TARGET_MBB5291]] ]
+; CHECK-NEXT: [[GUARD_MBB4531:%.*]] = phi i1 [ true, %[[MBB4454_TARGET_MBB4531]] ], [ false, %[[MBB4321_TARGET_MBB5291]] ]
+; CHECK-NEXT: br i1 [[GUARD_MBB4531]], label %[[MBB4531]], label %[[MBB5291]]
+; CHECK: [[MBB4454_TARGET_MBB4454_TARGET_MBB4531:.*]]:
+; CHECK-NEXT: br label %[[LOOP_EXIT_GUARD1:.*]]
+; CHECK: [[MBB4535_TARGET_MBB4321]]:
+; CHECK-NEXT: br label %[[LOOP_EXIT_GUARD1]]
+; CHECK: [[LOOP_EXIT_GUARD1]]:
+; CHECK-NEXT: [[GUARD_MBB4454_TARGET_MBB4531:%.*]] = phi i1 [ true, %[[MBB4454_TARGET_MBB4454_TARGET_MBB4531]] ], [ false, %[[MBB4535_TARGET_MBB4321]] ]
+; CHECK-NEXT: br i1 [[GUARD_MBB4454_TARGET_MBB4531]], label %[[MBB4454_TARGET_MBB4531]], label %[[MBB4321]]
+;
+mbb:
+ callbr void asm "", ""() to label %mbb3932 []
+
+mbb3932: ; preds = %mbb4321, %mbb
+ callbr void asm "", ""() to label %mbb4454 []
+
+mbb4321: ; preds = %mbb4535
+ %0 = trunc i64 %i5247 to i32
+ %i5290 = icmp eq i32 %0, 0
+ callbr void asm "", "r,!i"(i1 %i5290) to label %mbb3932 [label %mbb5291]
+
+mbb4454: ; preds = %mbb4535, %mbb3932
+ callbr void asm "", "r,!i"(i1 %i4530) to label %mbb4535 [label %mbb4531]
+
+mbb4531: ; preds = %mbb4454
+ ret void
+
+mbb4535: ; preds = %mbb4454
+ callbr void asm "", "r,!i"(i1 %i4936.not) to label %mbb4321 [label %mbb4454]
+
+mbb5291: ; preds = %mbb4321
+ %i5293 = insertvalue [2 x i32] zeroinitializer, i32 %0, 1
+ store volatile [2 x i32] %i5293, ptr addrspace(5) null, align 4
+ ret void
+}
diff --git a/llvm/test/Transforms/Util/PredicateInfo/branch-on-same-cond.ll b/llvm/test/Transforms/Util/PredicateInfo/branch-on-same-cond.ll
index 0be13ee..f024106 100644
--- a/llvm/test/Transforms/Util/PredicateInfo/branch-on-same-cond.ll
+++ b/llvm/test/Transforms/Util/PredicateInfo/branch-on-same-cond.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-inst-comments
; RUN: opt -S -passes=print-predicateinfo < %s 2>&1 >/dev/null | FileCheck %s
; FIXME: RenamedOp should be %cmp or %x in all cases here,
@@ -9,25 +9,25 @@ define i32 @test(i32 %x) {
; CHECK-NEXT: br label [[BB1:%.*]]
; CHECK: bb1:
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X:%.*]], 0
-; CHECK: RenamedOp: [[CMP]]
-; CHECK: [[CMP_0:%.*]] = bitcast i1 [[CMP]] to i1
-; CHECK: RenamedOp: [[X]]
-; CHECK: [[X_0:%.*]] = bitcast i32 [[X]] to i32
-; CHECK-NEXT: br i1 [[CMP]], label [[BB2:%.*]], label [[EXIT1:%.*]]
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[CMP]] = icmp eq i32 [[X]], 0 Edge: [label [[BB1]],label [[BB2:%.*]]], RenamedOp: [[CMP]] }
+; CHECK-NEXT: [[CMP_0:%.*]] = bitcast i1 [[CMP]] to i1
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[CMP]] = icmp eq i32 [[X]], 0 Edge: [label [[BB1]],label [[BB2]]], RenamedOp: [[X]] }
+; CHECK-NEXT: [[X_0:%.*]] = bitcast i32 [[X]] to i32
+; CHECK-NEXT: br i1 [[CMP]], label [[BB2]], label [[EXIT1:%.*]]
; CHECK: bb2:
-; CHECK: RenamedOp: [[CMP_0]]
-; CHECK: [[CMP_0_1:%.*]] = bitcast i1 [[CMP_0]] to i1
-; CHECK: RenamedOp: [[X]]
-; CHECK: [[X_0_1:%.*]] = bitcast i32 [[X_0]] to i32
-; CHECK: RenamedOp: [[X_0]]
-; CHECK: [[X_0_4:%.*]] = bitcast i32 [[X_0]] to i32
-; CHECK-NEXT: br i1 [[CMP_0]], label [[BB3:%.*]], label [[EXIT2:%.*]]
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[CMP]] = icmp eq i32 [[X]], 0 Edge: [label [[BB2]],label [[BB3:%.*]]], RenamedOp: [[CMP_0]] }
+; CHECK-NEXT: [[CMP_0_1:%.*]] = bitcast i1 [[CMP_0]] to i1
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[CMP]] = icmp eq i32 [[X]], 0 Edge: [label [[BB2]],label [[BB3]]], RenamedOp: [[X]] }
+; CHECK-NEXT: [[X_0_1:%.*]] = bitcast i32 [[X_0]] to i32
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[CMP]] = icmp eq i32 [[X]], 0 Edge: [label [[BB2]],label [[EXIT2:%.*]]], RenamedOp: [[X_0]] }
+; CHECK-NEXT: [[X_0_4:%.*]] = bitcast i32 [[X_0]] to i32
+; CHECK-NEXT: br i1 [[CMP_0]], label [[BB3]], label [[EXIT2]]
; CHECK: bb3:
-; CHECK: RenamedOp: [[X]]
-; CHECK: [[X_0_1_2:%.*]] = bitcast i32 [[X_0_1]] to i32
-; CHECK: RenamedOp: [[X_0_1]]
-; CHECK: [[X_0_1_3:%.*]] = bitcast i32 [[X_0_1]] to i32
-; CHECK-NEXT: br i1 [[CMP_0_1]], label [[EXIT3:%.*]], label [[EXIT4:%.*]]
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[CMP]] = icmp eq i32 [[X]], 0 Edge: [label [[BB3]],label [[EXIT3:%.*]]], RenamedOp: [[X]] }
+; CHECK-NEXT: [[X_0_1_2:%.*]] = bitcast i32 [[X_0_1]] to i32
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[CMP]] = icmp eq i32 [[X]], 0 Edge: [label [[BB3]],label [[EXIT4:%.*]]], RenamedOp: [[X_0_1]] }
+; CHECK-NEXT: [[X_0_1_3:%.*]] = bitcast i32 [[X_0_1]] to i32
+; CHECK-NEXT: br i1 [[CMP_0_1]], label [[EXIT3]], label [[EXIT4]]
; CHECK: exit1:
; CHECK-NEXT: ret i32 0
; CHECK: exit2:
diff --git a/llvm/test/Transforms/Util/PredicateInfo/condprop.ll b/llvm/test/Transforms/Util/PredicateInfo/condprop.ll
index 256d0d9..42e8ccb 100644
--- a/llvm/test/Transforms/Util/PredicateInfo/condprop.ll
+++ b/llvm/test/Transforms/Util/PredicateInfo/condprop.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-inst-comments
; RUN: opt -passes=print-predicateinfo -disable-output < %s 2>&1 | FileCheck %s
@a = external global i32 ; <ptr> [#uses=7]
@@ -98,12 +98,17 @@ define void @test3(i32 %x, i32 %y) {
; CHECK-NEXT: [[XZ:%.*]] = icmp eq i32 [[X:%.*]], 0
; CHECK-NEXT: [[YZ:%.*]] = icmp eq i32 [[Y:%.*]], 0
; CHECK-NEXT: [[Z:%.*]] = and i1 [[XZ]], [[YZ]]
-; CHECK: [[Z_0:%.*]] = bitcast i1 [[Z]] to i1
-; CHECK: [[XZ_0:%.*]] = bitcast i1 [[XZ]] to i1
-; CHECK: [[X_0:%.*]] = bitcast i32 [[X]] to i32
-; CHECK: [[YZ_0:%.*]] = bitcast i1 [[YZ]] to i1
-; CHECK: [[Y_0:%.*]] = bitcast i32 [[Y]] to i32
-; CHECK-NEXT: br i1 [[Z]], label [[BOTH_ZERO:%.*]], label [[NOPE:%.*]]
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[Z]] = and i1 [[XZ]], [[YZ]] Edge: [label [[TMP0:%.*]],label [[NOPE:%.*]]], RenamedOp: [[Z]] }
+; CHECK-NEXT: [[Z_0:%.*]] = bitcast i1 [[Z]] to i1
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[XZ]] = icmp eq i32 [[X]], 0 Edge: [label [[TMP0]],label [[BOTH_ZERO:%.*]]], RenamedOp: [[XZ]] }
+; CHECK-NEXT: [[XZ_0:%.*]] = bitcast i1 [[XZ]] to i1
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[XZ]] = icmp eq i32 [[X]], 0 Edge: [label [[TMP0]],label [[BOTH_ZERO]]], RenamedOp: [[X]] }
+; CHECK-NEXT: [[X_0:%.*]] = bitcast i32 [[X]] to i32
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[YZ]] = icmp eq i32 [[Y]], 0 Edge: [label [[TMP0]],label [[BOTH_ZERO]]], RenamedOp: [[YZ]] }
+; CHECK-NEXT: [[YZ_0:%.*]] = bitcast i1 [[YZ]] to i1
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[YZ]] = icmp eq i32 [[Y]], 0 Edge: [label [[TMP0]],label [[BOTH_ZERO]]], RenamedOp: [[Y]] }
+; CHECK-NEXT: [[Y_0:%.*]] = bitcast i32 [[Y]] to i32
+; CHECK-NEXT: br i1 [[Z]], label [[BOTH_ZERO]], label [[NOPE]]
; CHECK: both_zero:
; CHECK-NEXT: call void @foo(i1 [[XZ_0]])
; CHECK-NEXT: call void @foo(i1 [[YZ_0]])
@@ -133,10 +138,11 @@ define void @test4(i1 %b, i32 %x) {
; CHECK-LABEL: @test4(
; CHECK-NEXT: br i1 [[B:%.*]], label [[SW:%.*]], label [[CASE3:%.*]]
; CHECK: sw:
-; CHECK: [[X_0:%.*]] = bitcast i32 [[X:%.*]] to i32
+; CHECK-NEXT: ; switch predicate info { CaseValue: i32 1 Edge: [label [[SW]],label [[CASE1:%.*]]], RenamedOp: [[X:%.*]] }
+; CHECK-NEXT: [[X_0:%.*]] = bitcast i32 [[X]] to i32
; CHECK-NEXT: switch i32 [[X]], label [[DEFAULT:%.*]] [
; CHECK-NEXT: i32 0, label [[CASE0:%.*]]
-; CHECK-NEXT: i32 1, label [[CASE1:%.*]]
+; CHECK-NEXT: i32 1, label [[CASE1]]
; CHECK-NEXT: i32 2, label [[CASE0]]
; CHECK-NEXT: i32 3, label [[CASE3]]
; CHECK-NEXT: i32 4, label [[DEFAULT]]
@@ -180,11 +186,15 @@ case3:
define i1 @test5(i32 %x, i32 %y) {
; CHECK-LABEL: @test5(
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X:%.*]], [[Y:%.*]]
-; CHECK: [[X_0:%.*]] = bitcast i32 [[X]] to i32
-; CHECK: [[X_1:%.*]] = bitcast i32 [[X]] to i32
-; CHECK: [[Y_0:%.*]] = bitcast i32 [[Y]] to i32
-; CHECK: [[Y_1:%.*]] = bitcast i32 [[Y]] to i32
-; CHECK-NEXT: br i1 [[CMP]], label [[SAME:%.*]], label [[DIFFERENT:%.*]]
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[CMP]] = icmp eq i32 [[X]], [[Y]] Edge: [label [[TMP0:%.*]],label [[SAME:%.*]]], RenamedOp: [[X]] }
+; CHECK-NEXT: [[X_0:%.*]] = bitcast i32 [[X]] to i32
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[CMP]] = icmp eq i32 [[X]], [[Y]] Edge: [label [[TMP0]],label [[DIFFERENT:%.*]]], RenamedOp: [[X]] }
+; CHECK-NEXT: [[X_1:%.*]] = bitcast i32 [[X]] to i32
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[CMP]] = icmp eq i32 [[X]], [[Y]] Edge: [label [[TMP0]],label [[SAME]]], RenamedOp: [[Y]] }
+; CHECK-NEXT: [[Y_0:%.*]] = bitcast i32 [[Y]] to i32
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[CMP]] = icmp eq i32 [[X]], [[Y]] Edge: [label [[TMP0]],label [[DIFFERENT]]], RenamedOp: [[Y]] }
+; CHECK-NEXT: [[Y_1:%.*]] = bitcast i32 [[Y]] to i32
+; CHECK-NEXT: br i1 [[CMP]], label [[SAME]], label [[DIFFERENT]]
; CHECK: same:
; CHECK-NEXT: [[CMP2:%.*]] = icmp ne i32 [[X_0]], [[Y_0]]
; CHECK-NEXT: ret i1 [[CMP2]]
@@ -253,11 +263,15 @@ different:
define i1 @test7(i32 %x, i32 %y) {
; CHECK-LABEL: @test7(
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[X:%.*]], [[Y:%.*]]
-; CHECK: [[X_0:%.*]] = bitcast i32 [[X]] to i32
-; CHECK: [[X_1:%.*]] = bitcast i32 [[X]] to i32
-; CHECK: [[Y_0:%.*]] = bitcast i32 [[Y]] to i32
-; CHECK: [[Y_1:%.*]] = bitcast i32 [[Y]] to i32
-; CHECK-NEXT: br i1 [[CMP]], label [[SAME:%.*]], label [[DIFFERENT:%.*]]
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[CMP]] = icmp sgt i32 [[X]], [[Y]] Edge: [label [[TMP0:%.*]],label [[SAME:%.*]]], RenamedOp: [[X]] }
+; CHECK-NEXT: [[X_0:%.*]] = bitcast i32 [[X]] to i32
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[CMP]] = icmp sgt i32 [[X]], [[Y]] Edge: [label [[TMP0]],label [[DIFFERENT:%.*]]], RenamedOp: [[X]] }
+; CHECK-NEXT: [[X_1:%.*]] = bitcast i32 [[X]] to i32
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[CMP]] = icmp sgt i32 [[X]], [[Y]] Edge: [label [[TMP0]],label [[SAME]]], RenamedOp: [[Y]] }
+; CHECK-NEXT: [[Y_0:%.*]] = bitcast i32 [[Y]] to i32
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[CMP]] = icmp sgt i32 [[X]], [[Y]] Edge: [label [[TMP0]],label [[DIFFERENT]]], RenamedOp: [[Y]] }
+; CHECK-NEXT: [[Y_1:%.*]] = bitcast i32 [[Y]] to i32
+; CHECK-NEXT: br i1 [[CMP]], label [[SAME]], label [[DIFFERENT]]
; CHECK: same:
; CHECK-NEXT: [[CMP2:%.*]] = icmp sle i32 [[X_0]], [[Y_0]]
; CHECK-NEXT: ret i1 [[CMP2]]
@@ -280,11 +294,15 @@ different:
define i1 @test7_fp(float %x, float %y) {
; CHECK-LABEL: @test7_fp(
; CHECK-NEXT: [[CMP:%.*]] = fcmp ogt float [[X:%.*]], [[Y:%.*]]
-; CHECK: [[X_0:%.*]] = bitcast float [[X]] to float
-; CHECK: [[X_1:%.*]] = bitcast float [[X]] to float
-; CHECK: [[Y_0:%.*]] = bitcast float [[Y]] to float
-; CHECK: [[Y_1:%.*]] = bitcast float [[Y]] to float
-; CHECK-NEXT: br i1 [[CMP]], label [[SAME:%.*]], label [[DIFFERENT:%.*]]
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[CMP]] = fcmp ogt float [[X]], [[Y]] Edge: [label [[TMP0:%.*]],label [[SAME:%.*]]], RenamedOp: [[X]] }
+; CHECK-NEXT: [[X_0:%.*]] = bitcast float [[X]] to float
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[CMP]] = fcmp ogt float [[X]], [[Y]] Edge: [label [[TMP0]],label [[DIFFERENT:%.*]]], RenamedOp: [[X]] }
+; CHECK-NEXT: [[X_1:%.*]] = bitcast float [[X]] to float
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[CMP]] = fcmp ogt float [[X]], [[Y]] Edge: [label [[TMP0]],label [[SAME]]], RenamedOp: [[Y]] }
+; CHECK-NEXT: [[Y_0:%.*]] = bitcast float [[Y]] to float
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[CMP]] = fcmp ogt float [[X]], [[Y]] Edge: [label [[TMP0]],label [[DIFFERENT]]], RenamedOp: [[Y]] }
+; CHECK-NEXT: [[Y_1:%.*]] = bitcast float [[Y]] to float
+; CHECK-NEXT: br i1 [[CMP]], label [[SAME]], label [[DIFFERENT]]
; CHECK: same:
; CHECK-NEXT: [[CMP2:%.*]] = fcmp ule float [[X_0]], [[Y_0]]
; CHECK-NEXT: ret i1 [[CMP2]]
@@ -353,9 +371,11 @@ different:
define i32 @test9(i32 %i, i32 %j) {
; CHECK-LABEL: @test9(
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[I:%.*]], [[J:%.*]]
-; CHECK: [[I_0:%.*]] = bitcast i32 [[I]] to i32
-; CHECK: [[J_0:%.*]] = bitcast i32 [[J]] to i32
-; CHECK-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[RET:%.*]]
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[CMP]] = icmp eq i32 [[I]], [[J]] Edge: [label [[TMP0:%.*]],label [[COND_TRUE:%.*]]], RenamedOp: [[I]] }
+; CHECK-NEXT: [[I_0:%.*]] = bitcast i32 [[I]] to i32
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[CMP]] = icmp eq i32 [[I]], [[J]] Edge: [label [[TMP0]],label [[COND_TRUE]]], RenamedOp: [[J]] }
+; CHECK-NEXT: [[J_0:%.*]] = bitcast i32 [[J]] to i32
+; CHECK-NEXT: br i1 [[CMP]], label [[COND_TRUE]], label [[RET:%.*]]
; CHECK: cond_true:
; CHECK-NEXT: [[DIFF:%.*]] = sub i32 [[I_0]], [[J_0]]
; CHECK-NEXT: ret i32 [[DIFF]]
@@ -376,9 +396,11 @@ ret:
define i32 @test10(i32 %j, i32 %i) {
; CHECK-LABEL: @test10(
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[I:%.*]], [[J:%.*]]
-; CHECK: [[I_0:%.*]] = bitcast i32 [[I]] to i32
-; CHECK: [[J_0:%.*]] = bitcast i32 [[J]] to i32
-; CHECK-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[RET:%.*]]
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[CMP]] = icmp eq i32 [[I]], [[J]] Edge: [label [[TMP0:%.*]],label [[COND_TRUE:%.*]]], RenamedOp: [[I]] }
+; CHECK-NEXT: [[I_0:%.*]] = bitcast i32 [[I]] to i32
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[CMP]] = icmp eq i32 [[I]], [[J]] Edge: [label [[TMP0]],label [[COND_TRUE]]], RenamedOp: [[J]] }
+; CHECK-NEXT: [[J_0:%.*]] = bitcast i32 [[J]] to i32
+; CHECK-NEXT: br i1 [[CMP]], label [[COND_TRUE]], label [[RET:%.*]]
; CHECK: cond_true:
; CHECK-NEXT: [[DIFF:%.*]] = sub i32 [[I_0]], [[J_0]]
; CHECK-NEXT: ret i32 [[DIFF]]
@@ -403,15 +425,18 @@ define i32 @test11(i32 %x) {
; CHECK-NEXT: [[V0:%.*]] = call i32 @yogibar()
; CHECK-NEXT: [[V1:%.*]] = call i32 @yogibar()
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[V0]], [[V1]]
-; CHECK: [[V0_0:%.*]] = bitcast i32 [[V0]] to i32
-; CHECK: [[V1_0:%.*]] = bitcast i32 [[V1]] to i32
-; CHECK-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[NEXT:%.*]]
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[CMP]] = icmp eq i32 [[V0]], [[V1]] Edge: [label [[TMP0:%.*]],label [[NEXT:%.*]]], RenamedOp: [[V0]] }
+; CHECK-NEXT: [[V0_0:%.*]] = bitcast i32 [[V0]] to i32
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[CMP]] = icmp eq i32 [[V0]], [[V1]] Edge: [label [[TMP0]],label [[COND_TRUE:%.*]]], RenamedOp: [[V1]] }
+; CHECK-NEXT: [[V1_0:%.*]] = bitcast i32 [[V1]] to i32
+; CHECK-NEXT: br i1 [[CMP]], label [[COND_TRUE]], label [[NEXT]]
; CHECK: cond_true:
; CHECK-NEXT: ret i32 [[V1_0]]
; CHECK: next:
; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[X:%.*]], [[V0_0]]
-; CHECK: [[V0_0_1:%.*]] = bitcast i32 [[V0_0]] to i32
-; CHECK-NEXT: br i1 [[CMP2]], label [[COND_TRUE2:%.*]], label [[NEXT2:%.*]]
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[CMP2]] = icmp eq i32 [[X]], [[V0_0]] Edge: [label [[NEXT]],label [[COND_TRUE2:%.*]]], RenamedOp: [[V0_0]] }
+; CHECK-NEXT: [[V0_0_1:%.*]] = bitcast i32 [[V0_0]] to i32
+; CHECK-NEXT: br i1 [[CMP2]], label [[COND_TRUE2]], label [[NEXT2:%.*]]
; CHECK: cond_true2:
; CHECK-NEXT: ret i32 [[V0_0_1]]
; CHECK: next2:
@@ -439,9 +464,11 @@ next2:
define i32 @test12(i32 %x) {
; CHECK-LABEL: @test12(
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X:%.*]], 0
-; CHECK: [[X_0:%.*]] = bitcast i32 [[X]] to i32
-; CHECK: [[X_1:%.*]] = bitcast i32 [[X]] to i32
-; CHECK-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[CMP]] = icmp eq i32 [[X]], 0 Edge: [label [[TMP0:%.*]],label [[COND_TRUE:%.*]]], RenamedOp: [[X]] }
+; CHECK-NEXT: [[X_0:%.*]] = bitcast i32 [[X]] to i32
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[CMP]] = icmp eq i32 [[X]], 0 Edge: [label [[TMP0]],label [[COND_FALSE:%.*]]], RenamedOp: [[X]] }
+; CHECK-NEXT: [[X_1:%.*]] = bitcast i32 [[X]] to i32
+; CHECK-NEXT: br i1 [[CMP]], label [[COND_TRUE]], label [[COND_FALSE]]
; CHECK: cond_true:
; CHECK-NEXT: br label [[RET:%.*]]
; CHECK: cond_false:
diff --git a/llvm/test/Transforms/Util/PredicateInfo/diamond.ll b/llvm/test/Transforms/Util/PredicateInfo/diamond.ll
index ac2c9a1..06c02d6 100644
--- a/llvm/test/Transforms/Util/PredicateInfo/diamond.ll
+++ b/llvm/test/Transforms/Util/PredicateInfo/diamond.ll
@@ -1,16 +1,18 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -passes=print-predicateinfo < %s 2>&1 | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-inst-comments
+; RUN: opt -passes=print-predicateinfo -disable-output < %s 2>&1 | FileCheck %s
define i1 @f(i32 %x, i1 %y) {
; CHECK-LABEL: @f(
; CHECK-NEXT: br i1 [[Y:%.*]], label [[BB0:%.*]], label [[BB1:%.*]]
; CHECK: bb0:
; CHECK-NEXT: [[CMP:%.*]] = icmp sge i32 [[X:%.*]], 0
-; CHECK: [[X_0:%.*]] = bitcast i32 [[X]] to i32
-; CHECK-NEXT: br i1 [[CMP]], label [[BB2:%.*]], label [[BB3:%.*]]
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[CMP]] = icmp sge i32 [[X]], 0 Edge: [label [[BB0]],label [[BB2:%.*]]], RenamedOp: [[X]] }
+; CHECK-NEXT: [[X_0:%.*]] = bitcast i32 [[X]] to i32
+; CHECK-NEXT: br i1 [[CMP]], label [[BB2]], label [[BB3:%.*]]
; CHECK: bb1:
; CHECK-NEXT: [[X2:%.*]] = add nuw nsw i32 [[X]], 1
; CHECK-NEXT: [[CMP2:%.*]] = icmp sge i32 [[X2]], 2
-; CHECK: [[X2_0:%.*]] = bitcast i32 [[X2]] to i32
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[CMP2]] = icmp sge i32 [[X2]], 2 Edge: [label [[BB1]],label [[BB2]]], RenamedOp: [[X2]] }
+; CHECK-NEXT: [[X2_0:%.*]] = bitcast i32 [[X2]] to i32
; CHECK-NEXT: br i1 [[CMP2]], label [[BB2]], label [[BB3]]
; CHECK: bb2:
; CHECK-NEXT: [[X3:%.*]] = phi i32 [ [[X_0]], [[BB0]] ], [ [[X2_0]], [[BB1]] ]
@@ -38,12 +40,14 @@ define i1 @g(i32 %x, i1 %y) {
; CHECK-NEXT: br i1 [[Y:%.*]], label [[BB0:%.*]], label [[BB1:%.*]]
; CHECK: bb0:
; CHECK-NEXT: [[CMP:%.*]] = icmp sge i32 [[X:%.*]], 0
-; CHECK: [[X_0:%.*]] = bitcast i32 [[X]] to i32
-; CHECK-NEXT: br i1 [[CMP]], label [[BB3:%.*]], label [[BB2:%.*]]
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[CMP]] = icmp sge i32 [[X]], 0 Edge: [label [[BB0]],label [[BB2:%.*]]], RenamedOp: [[X]] }
+; CHECK-NEXT: [[X_0:%.*]] = bitcast i32 [[X]] to i32
+; CHECK-NEXT: br i1 [[CMP]], label [[BB3:%.*]], label [[BB2]]
; CHECK: bb1:
; CHECK-NEXT: [[X2:%.*]] = add nuw nsw i32 [[X]], 1
; CHECK-NEXT: [[CMP2:%.*]] = icmp sge i32 [[X2]], 2
-; CHECK: [[X2_0:%.*]] = bitcast i32 [[X2]] to i32
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[CMP2]] = icmp sge i32 [[X2]], 2 Edge: [label [[BB1]],label [[BB2]]], RenamedOp: [[X2]] }
+; CHECK-NEXT: [[X2_0:%.*]] = bitcast i32 [[X2]] to i32
; CHECK-NEXT: br i1 [[CMP2]], label [[BB3]], label [[BB2]]
; CHECK: bb2:
; CHECK-NEXT: [[X3:%.*]] = phi i32 [ [[X_0]], [[BB0]] ], [ [[X2_0]], [[BB1]] ]
diff --git a/llvm/test/Transforms/Util/PredicateInfo/edge.ll b/llvm/test/Transforms/Util/PredicateInfo/edge.ll
index ef757f3..9138326 100644
--- a/llvm/test/Transforms/Util/PredicateInfo/edge.ll
+++ b/llvm/test/Transforms/Util/PredicateInfo/edge.ll
@@ -1,16 +1,17 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -passes=print-predicateinfo < %s 2>&1 | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-inst-comments
+; RUN: opt -passes=print-predicateinfo -disable-output < %s 2>&1 | FileCheck %s
define i32 @f1(i32 %x) {
; CHECK-LABEL: @f1(
; CHECK-NEXT: bb0:
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X:%.*]], 0
-; CHECK: [[X_0:%.*]] = bitcast i32 [[X]] to i32
-; CHECK-NEXT: br i1 [[CMP]], label [[BB2:%.*]], label [[BB1:%.*]]
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[CMP]] = icmp eq i32 [[X]], 0 Edge: [label [[BB0:%.*]],label [[BB2:%.*]]], RenamedOp: [[X]] }
+; CHECK-NEXT: [[X_0:%.*]] = bitcast i32 [[X]] to i32
+; CHECK-NEXT: br i1 [[CMP]], label [[BB2]], label [[BB1:%.*]]
; CHECK: bb1:
; CHECK-NEXT: br label [[BB2]]
; CHECK: bb2:
-; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[X_0]], [[BB0:%.*]] ], [ 0, [[BB1]] ]
+; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[X_0]], [[BB0]] ], [ 0, [[BB1]] ]
; CHECK-NEXT: [[FOO:%.*]] = add i32 [[COND]], [[X]]
; CHECK-NEXT: ret i32 [[FOO]]
;
@@ -29,12 +30,13 @@ define i32 @f2(i32 %x) {
; CHECK-LABEL: @f2(
; CHECK-NEXT: bb0:
; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[X:%.*]], 0
-; CHECK: [[X_0:%.*]] = bitcast i32 [[X]] to i32
-; CHECK-NEXT: br i1 [[CMP]], label [[BB1:%.*]], label [[BB2:%.*]]
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[CMP]] = icmp ne i32 [[X]], 0 Edge: [label [[BB0:%.*]],label [[BB2:%.*]]], RenamedOp: [[X]] }
+; CHECK-NEXT: [[X_0:%.*]] = bitcast i32 [[X]] to i32
+; CHECK-NEXT: br i1 [[CMP]], label [[BB1:%.*]], label [[BB2]]
; CHECK: bb1:
; CHECK-NEXT: br label [[BB2]]
; CHECK: bb2:
-; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[X_0]], [[BB0:%.*]] ], [ 0, [[BB1]] ]
+; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[X_0]], [[BB0]] ], [ 0, [[BB1]] ]
; CHECK-NEXT: [[FOO:%.*]] = add i32 [[COND]], [[X]]
; CHECK-NEXT: ret i32 [[FOO]]
;
@@ -52,14 +54,15 @@ bb2:
define i32 @f3(i32 %x) {
; CHECK-LABEL: @f3(
; CHECK-NEXT: bb0:
-; CHECK: [[X_0:%.*]] = bitcast i32 [[X:%.*]] to i32
+; CHECK-NEXT: ; switch predicate info { CaseValue: i32 0 Edge: [label [[BB0:%.*]],label [[BB2:%.*]]], RenamedOp: [[X:%.*]] }
+; CHECK-NEXT: [[X_0:%.*]] = bitcast i32 [[X]] to i32
; CHECK-NEXT: switch i32 [[X]], label [[BB1:%.*]] [
-; CHECK-NEXT: i32 0, label [[BB2:%.*]]
+; CHECK-NEXT: i32 0, label [[BB2]]
; CHECK-NEXT: ]
; CHECK: bb1:
; CHECK-NEXT: br label [[BB2]]
; CHECK: bb2:
-; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[X_0]], [[BB0:%.*]] ], [ 0, [[BB1]] ]
+; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[X_0]], [[BB0]] ], [ 0, [[BB1]] ]
; CHECK-NEXT: [[FOO:%.*]] = add i32 [[COND]], [[X]]
; CHECK-NEXT: ret i32 [[FOO]]
;
@@ -78,13 +81,14 @@ define double @fcmp_oeq_not_zero(double %x, double %y) {
; CHECK-LABEL: @fcmp_oeq_not_zero(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP:%.*]] = fcmp oeq double [[Y:%.*]], 2.000000e+00
-; CHECK: [[Y_0:%.*]] = bitcast double [[Y]] to double
-; CHECK-NEXT: br i1 [[CMP]], label [[IF:%.*]], label [[RETURN:%.*]]
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[CMP]] = fcmp oeq double [[Y]], 2.000000e+00 Edge: [label [[ENTRY:%.*]],label [[IF:%.*]]], RenamedOp: [[Y]] }
+; CHECK-NEXT: [[Y_0:%.*]] = bitcast double [[Y]] to double
+; CHECK-NEXT: br i1 [[CMP]], label [[IF]], label [[RETURN:%.*]]
; CHECK: if:
; CHECK-NEXT: [[DIV:%.*]] = fdiv double [[X:%.*]], [[Y_0]]
; CHECK-NEXT: br label [[RETURN]]
; CHECK: return:
-; CHECK-NEXT: [[RETVAL:%.*]] = phi double [ [[DIV]], [[IF]] ], [ [[X]], [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[RETVAL:%.*]] = phi double [ [[DIV]], [[IF]] ], [ [[X]], [[ENTRY]] ]
; CHECK-NEXT: ret double [[RETVAL]]
;
entry:
@@ -105,13 +109,14 @@ define double @fcmp_une_not_zero(double %x, double %y) {
; CHECK-LABEL: @fcmp_une_not_zero(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP:%.*]] = fcmp une double [[Y:%.*]], 2.000000e+00
-; CHECK: [[Y_0:%.*]] = bitcast double [[Y]] to double
-; CHECK-NEXT: br i1 [[CMP]], label [[RETURN:%.*]], label [[ELSE:%.*]]
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[CMP]] = fcmp une double [[Y]], 2.000000e+00 Edge: [label [[ENTRY:%.*]],label [[ELSE:%.*]]], RenamedOp: [[Y]] }
+; CHECK-NEXT: [[Y_0:%.*]] = bitcast double [[Y]] to double
+; CHECK-NEXT: br i1 [[CMP]], label [[RETURN:%.*]], label [[ELSE]]
; CHECK: else:
; CHECK-NEXT: [[DIV:%.*]] = fdiv double [[X:%.*]], [[Y_0]]
; CHECK-NEXT: br label [[RETURN]]
; CHECK: return:
-; CHECK-NEXT: [[RETVAL:%.*]] = phi double [ [[DIV]], [[ELSE]] ], [ [[X]], [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[RETVAL:%.*]] = phi double [ [[DIV]], [[ELSE]] ], [ [[X]], [[ENTRY]] ]
; CHECK-NEXT: ret double [[RETVAL]]
;
entry:
@@ -132,13 +137,14 @@ define double @fcmp_oeq_zero(double %x, double %y) {
; CHECK-LABEL: @fcmp_oeq_zero(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP:%.*]] = fcmp oeq double [[Y:%.*]], 0.000000e+00
-; CHECK: [[Y_0:%.*]] = bitcast double [[Y]] to double
-; CHECK-NEXT: br i1 [[CMP]], label [[IF:%.*]], label [[RETURN:%.*]]
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[CMP]] = fcmp oeq double [[Y]], 0.000000e+00 Edge: [label [[ENTRY:%.*]],label [[IF:%.*]]], RenamedOp: [[Y]] }
+; CHECK-NEXT: [[Y_0:%.*]] = bitcast double [[Y]] to double
+; CHECK-NEXT: br i1 [[CMP]], label [[IF]], label [[RETURN:%.*]]
; CHECK: if:
; CHECK-NEXT: [[DIV:%.*]] = fdiv double [[X:%.*]], [[Y_0]]
; CHECK-NEXT: br label [[RETURN]]
; CHECK: return:
-; CHECK-NEXT: [[RETVAL:%.*]] = phi double [ [[DIV]], [[IF]] ], [ [[X]], [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[RETVAL:%.*]] = phi double [ [[DIV]], [[IF]] ], [ [[X]], [[ENTRY]] ]
; CHECK-NEXT: ret double [[RETVAL]]
;
entry:
@@ -159,13 +165,14 @@ define double @fcmp_une_zero(double %x, double %y) {
; CHECK-LABEL: @fcmp_une_zero(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP:%.*]] = fcmp une double [[Y:%.*]], -0.000000e+00
-; CHECK: [[Y_0:%.*]] = bitcast double [[Y]] to double
-; CHECK-NEXT: br i1 [[CMP]], label [[RETURN:%.*]], label [[ELSE:%.*]]
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[CMP]] = fcmp une double [[Y]], -0.000000e+00 Edge: [label [[ENTRY:%.*]],label [[ELSE:%.*]]], RenamedOp: [[Y]] }
+; CHECK-NEXT: [[Y_0:%.*]] = bitcast double [[Y]] to double
+; CHECK-NEXT: br i1 [[CMP]], label [[RETURN:%.*]], label [[ELSE]]
; CHECK: else:
; CHECK-NEXT: [[DIV:%.*]] = fdiv double [[X:%.*]], [[Y_0]]
; CHECK-NEXT: br label [[RETURN]]
; CHECK: return:
-; CHECK-NEXT: [[RETVAL:%.*]] = phi double [ [[DIV]], [[ELSE]] ], [ [[X]], [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[RETVAL:%.*]] = phi double [ [[DIV]], [[ELSE]] ], [ [[X]], [[ENTRY]] ]
; CHECK-NEXT: ret double [[RETVAL]]
;
entry:
@@ -188,13 +195,14 @@ define double @fcmp_oeq_maybe_zero(double %x, double %y, double %z1, double %z2)
; CHECK-NEXT: entry:
; CHECK-NEXT: [[Z:%.*]] = fadd double [[Z1:%.*]], [[Z2:%.*]]
; CHECK-NEXT: [[CMP:%.*]] = fcmp oeq double [[Y:%.*]], [[Z]]
-; CHECK: [[Z_0:%.*]] = bitcast double [[Z]] to double
-; CHECK-NEXT: br i1 [[CMP]], label [[IF:%.*]], label [[RETURN:%.*]]
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[CMP]] = fcmp oeq double [[Y]], [[Z]] Edge: [label [[ENTRY:%.*]],label [[IF:%.*]]], RenamedOp: [[Z]] }
+; CHECK-NEXT: [[Z_0:%.*]] = bitcast double [[Z]] to double
+; CHECK-NEXT: br i1 [[CMP]], label [[IF]], label [[RETURN:%.*]]
; CHECK: if:
; CHECK-NEXT: [[DIV:%.*]] = fdiv double [[X:%.*]], [[Z_0]]
; CHECK-NEXT: br label [[RETURN]]
; CHECK: return:
-; CHECK-NEXT: [[RETVAL:%.*]] = phi double [ [[DIV]], [[IF]] ], [ [[X]], [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[RETVAL:%.*]] = phi double [ [[DIV]], [[IF]] ], [ [[X]], [[ENTRY]] ]
; CHECK-NEXT: ret double [[RETVAL]]
;
entry:
@@ -217,13 +225,14 @@ define double @fcmp_une_maybe_zero(double %x, double %y, double %z1, double %z2)
; CHECK-NEXT: entry:
; CHECK-NEXT: [[Z:%.*]] = fadd double [[Z1:%.*]], [[Z2:%.*]]
; CHECK-NEXT: [[CMP:%.*]] = fcmp une double [[Y:%.*]], [[Z]]
-; CHECK: [[Z_0:%.*]] = bitcast double [[Z]] to double
-; CHECK-NEXT: br i1 [[CMP]], label [[RETURN:%.*]], label [[ELSE:%.*]]
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[CMP]] = fcmp une double [[Y]], [[Z]] Edge: [label [[ENTRY:%.*]],label [[ELSE:%.*]]], RenamedOp: [[Z]] }
+; CHECK-NEXT: [[Z_0:%.*]] = bitcast double [[Z]] to double
+; CHECK-NEXT: br i1 [[CMP]], label [[RETURN:%.*]], label [[ELSE]]
; CHECK: else:
; CHECK-NEXT: [[DIV:%.*]] = fdiv double [[X:%.*]], [[Z_0]]
; CHECK-NEXT: br label [[RETURN]]
; CHECK: return:
-; CHECK-NEXT: [[RETVAL:%.*]] = phi double [ [[DIV]], [[ELSE]] ], [ [[X]], [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[RETVAL:%.*]] = phi double [ [[DIV]], [[ELSE]] ], [ [[X]], [[ENTRY]] ]
; CHECK-NEXT: ret double [[RETVAL]]
;
entry:
diff --git a/llvm/test/Transforms/Util/PredicateInfo/pr33456.ll b/llvm/test/Transforms/Util/PredicateInfo/pr33456.ll
index 36eaf6e..4762d37 100644
--- a/llvm/test/Transforms/Util/PredicateInfo/pr33456.ll
+++ b/llvm/test/Transforms/Util/PredicateInfo/pr33456.ll
@@ -1,5 +1,5 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -passes=print-predicateinfo < %s 2>&1 | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-inst-comments
+; RUN: opt -passes=print-predicateinfo -disable-output < %s 2>&1 | FileCheck %s
; Don't insert predicate info for conditions with a single target.
@a = global i32 1, align 4
@d = common global i32 0, align 4
@@ -12,22 +12,27 @@ define i32 @main() {
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr @d, align 4
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0
; CHECK-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP13:%.*]]
-; CHECK: [[TMP4:%.*]] = load i32, ptr @a, align 4
+; CHECK: 3:
+; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr @a, align 4
; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr @c, align 4
; CHECK-NEXT: [[TMP6:%.*]] = icmp slt i32 [[TMP5]], 1
; CHECK-NEXT: br i1 [[TMP6]], label [[TMP7:%.*]], label [[TMP9:%.*]]
-; CHECK: [[TMP8:%.*]] = icmp eq i32 [[TMP4]], 0
+; CHECK: 7:
+; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP4]], 0
; CHECK-NEXT: br i1 [[TMP8]], label [[TMP9]], label [[TMP9]]
-; CHECK: [[DOT0:%.*]] = phi i32 [ [[TMP4]], [[TMP7]] ], [ [[TMP4]], [[TMP7]] ], [ [[DOT1:%.*]], [[TMP13]] ], [ [[TMP4]], [[TMP3]] ]
+; CHECK: 9:
+; CHECK-NEXT: [[DOT0:%.*]] = phi i32 [ [[TMP4]], [[TMP7]] ], [ [[TMP4]], [[TMP7]] ], [ [[DOT1:%.*]], [[TMP13]] ], [ [[TMP4]], [[TMP3]] ]
; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr @b, align 4
; CHECK-NEXT: [[TMP11:%.*]] = sdiv i32 [[TMP10]], [[DOT0]]
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i32 [[TMP11]], 0
; CHECK-NEXT: br i1 [[TMP12]], label [[TMP13]], label [[TMP13]]
-; CHECK: [[DOT1]] = phi i32 [ [[DOT0]], [[TMP9]] ], [ [[DOT0]], [[TMP9]] ], [ undef, [[TMP0:%.*]] ]
+; CHECK: 13:
+; CHECK-NEXT: [[DOT1]] = phi i32 [ [[DOT0]], [[TMP9]] ], [ [[DOT0]], [[TMP9]] ], [ undef, [[TMP0:%.*]] ]
; CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr @e, align 4
; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i32 [[TMP14]], 0
; CHECK-NEXT: br i1 [[TMP15]], label [[TMP16:%.*]], label [[TMP9]]
-; CHECK: ret i32 0
+; CHECK: 16:
+; CHECK-NEXT: ret i32 0
;
%1 = load i32, ptr @d, align 4
%2 = icmp eq i32 %1, 0
diff --git a/llvm/test/Transforms/Util/PredicateInfo/pr33457.ll b/llvm/test/Transforms/Util/PredicateInfo/pr33457.ll
index bc1d39f..e4fd4cc 100644
--- a/llvm/test/Transforms/Util/PredicateInfo/pr33457.ll
+++ b/llvm/test/Transforms/Util/PredicateInfo/pr33457.ll
@@ -1,5 +1,5 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -passes=print-predicateinfo < %s 2>&1 | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-inst-comments
+; RUN: opt -passes=print-predicateinfo -disable-output < %s 2>&1 | FileCheck %s
; Don't insert predicate info for conditions with a single target.
@a = global i32 6, align 4
@c = global i32 -1, align 4
@@ -13,26 +13,32 @@ define i32 @main() {
; CHECK-LABEL: @main(
; CHECK-NEXT: store i32 6, ptr @e, align 4
; CHECK-NEXT: br label [[TMP1:%.*]]
-; CHECK: [[TMP2:%.*]] = load i32, ptr @d, align 4
+; CHECK: 1:
+; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr @d, align 4
; CHECK-NEXT: [[TMP3:%.*]] = sext i32 [[TMP2]] to i64
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [6 x i32], ptr @b, i64 0, i64 [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
; CHECK-NEXT: [[TMP6:%.*]] = call i32 (ptr, ...) @printf(ptr @.str, i32 [[TMP5]])
; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr @a, align 4
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0
-; CHECK-NEXT: br i1 [[TMP8]], label %thread-pre-split, label [[TMP9:%.*]]
-; CHECK: [[TMP10:%.*]] = load i32, ptr @e, align 4
+; CHECK-NEXT: br i1 [[TMP8]], label [[THREAD_PRE_SPLIT:%.*]], label [[TMP9:%.*]]
+; CHECK: 9:
+; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr @e, align 4
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[TMP10]], 0
; CHECK-NEXT: br i1 [[TMP11]], label [[TMP12:%.*]], label [[TMP12]]
; CHECK: thread-pre-split:
; CHECK-NEXT: [[DOTPR:%.*]] = load i32, ptr @e, align 4
; CHECK-NEXT: br label [[TMP12]]
-; CHECK: [[TMP13:%.*]] = phi i32 [ [[DOTPR]], %thread-pre-split ], [ [[TMP10]], [[TMP9]] ], [ [[TMP10]], [[TMP9]] ]
+; CHECK: 12:
+; CHECK-NEXT: [[TMP13:%.*]] = phi i32 [ [[DOTPR]], [[THREAD_PRE_SPLIT]] ], [ [[TMP10]], [[TMP9]] ], [ [[TMP10]], [[TMP9]] ]
; CHECK-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
; CHECK-NEXT: br i1 [[TMP14]], label [[TMP15:%.*]], label [[TMP15]]
-; CHECK: br i1 [[TMP14]], label [[TMP16:%.*]], label [[TMP17:%.*]]
-; CHECK: br label [[TMP17]]
-; CHECK: [[DOT0:%.*]] = phi i32 [ 1, [[TMP16]] ], [ -1, [[TMP15]] ]
+; CHECK: 15:
+; CHECK-NEXT: br i1 [[TMP14]], label [[TMP16:%.*]], label [[TMP17:%.*]]
+; CHECK: 16:
+; CHECK-NEXT: br label [[TMP17]]
+; CHECK: 17:
+; CHECK-NEXT: [[DOT0:%.*]] = phi i32 [ 1, [[TMP16]] ], [ -1, [[TMP15]] ]
; CHECK-NEXT: [[TMP18:%.*]] = and i32 [[DOT0]], 8693
; CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr @c, align 4
; CHECK-NEXT: [[TMP20:%.*]] = xor i32 [[TMP18]], [[TMP19]]
@@ -40,7 +46,8 @@ define i32 @main() {
; CHECK-NEXT: store i32 [[TMP21]], ptr @d, align 4
; CHECK-NEXT: [[TMP22:%.*]] = icmp slt i32 [[TMP20]], -2
; CHECK-NEXT: br i1 [[TMP22]], label [[TMP1]], label [[TMP23:%.*]]
-; CHECK: ret i32 0
+; CHECK: 23:
+; CHECK-NEXT: ret i32 0
;
store i32 6, ptr @e, align 4
br label %1
diff --git a/llvm/test/Transforms/Util/PredicateInfo/testandor.ll b/llvm/test/Transforms/Util/PredicateInfo/testandor.ll
index cc1dc4e..d29aadd 100644
--- a/llvm/test/Transforms/Util/PredicateInfo/testandor.ll
+++ b/llvm/test/Transforms/Util/PredicateInfo/testandor.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-inst-comments
; RUN: opt -passes=print-predicateinfo -disable-output < %s 2>&1 | FileCheck %s
declare void @foo(i1)
@@ -10,12 +10,17 @@ define void @test_or(i32 %x, i32 %y) {
; CHECK-NEXT: [[XZ:%.*]] = icmp eq i32 [[X:%.*]], 0
; CHECK-NEXT: [[YZ:%.*]] = icmp eq i32 [[Y:%.*]], 0
; CHECK-NEXT: [[Z:%.*]] = or i1 [[XZ]], [[YZ]]
-; CHECK: [[Z_0:%.*]] = bitcast i1 [[Z]] to i1
-; CHECK: [[XZ_0:%.*]] = bitcast i1 [[XZ]] to i1
-; CHECK: [[X_0:%.*]] = bitcast i32 [[X]] to i32
-; CHECK: [[YZ_0:%.*]] = bitcast i1 [[YZ]] to i1
-; CHECK: [[Y_0:%.*]] = bitcast i32 [[Y]] to i32
-; CHECK-NEXT: br i1 [[Z]], label [[ONEOF:%.*]], label [[NEITHER:%.*]]
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[Z]] = or i1 [[XZ]], [[YZ]] Edge: [label [[TMP0:%.*]],label [[NEITHER:%.*]]], RenamedOp: [[Z]] }
+; CHECK-NEXT: [[Z_0:%.*]] = bitcast i1 [[Z]] to i1
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[XZ]] = icmp eq i32 [[X]], 0 Edge: [label [[TMP0]],label [[NEITHER]]], RenamedOp: [[XZ]] }
+; CHECK-NEXT: [[XZ_0:%.*]] = bitcast i1 [[XZ]] to i1
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[XZ]] = icmp eq i32 [[X]], 0 Edge: [label [[TMP0]],label [[NEITHER]]], RenamedOp: [[X]] }
+; CHECK-NEXT: [[X_0:%.*]] = bitcast i32 [[X]] to i32
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[YZ]] = icmp eq i32 [[Y]], 0 Edge: [label [[TMP0]],label [[NEITHER]]], RenamedOp: [[YZ]] }
+; CHECK-NEXT: [[YZ_0:%.*]] = bitcast i1 [[YZ]] to i1
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[YZ]] = icmp eq i32 [[Y]], 0 Edge: [label [[TMP0]],label [[NEITHER]]], RenamedOp: [[Y]] }
+; CHECK-NEXT: [[Y_0:%.*]] = bitcast i32 [[Y]] to i32
+; CHECK-NEXT: br i1 [[Z]], label [[ONEOF:%.*]], label [[NEITHER]]
; CHECK: oneof:
; CHECK-NEXT: call void @foo(i1 [[XZ]])
; CHECK-NEXT: call void @foo(i1 [[YZ]])
@@ -55,12 +60,17 @@ define void @test_or_logical(i32 %x, i32 %y) {
; CHECK-NEXT: [[XZ:%.*]] = icmp eq i32 [[X:%.*]], 0
; CHECK-NEXT: [[YZ:%.*]] = icmp eq i32 [[Y:%.*]], 0
; CHECK-NEXT: [[Z:%.*]] = select i1 [[XZ]], i1 true, i1 [[YZ]]
-; CHECK: [[Z_0:%.*]] = bitcast i1 [[Z]] to i1
-; CHECK: [[XZ_0:%.*]] = bitcast i1 [[XZ]] to i1
-; CHECK: [[X_0:%.*]] = bitcast i32 [[X]] to i32
-; CHECK: [[YZ_0:%.*]] = bitcast i1 [[YZ]] to i1
-; CHECK: [[Y_0:%.*]] = bitcast i32 [[Y]] to i32
-; CHECK-NEXT: br i1 [[Z]], label [[ONEOF:%.*]], label [[NEITHER:%.*]]
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[Z]] = select i1 [[XZ]], i1 true, i1 [[YZ]] Edge: [label [[TMP0:%.*]],label [[NEITHER:%.*]]], RenamedOp: [[Z]] }
+; CHECK-NEXT: [[Z_0:%.*]] = bitcast i1 [[Z]] to i1
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[XZ]] = icmp eq i32 [[X]], 0 Edge: [label [[TMP0]],label [[NEITHER]]], RenamedOp: [[XZ]] }
+; CHECK-NEXT: [[XZ_0:%.*]] = bitcast i1 [[XZ]] to i1
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[XZ]] = icmp eq i32 [[X]], 0 Edge: [label [[TMP0]],label [[NEITHER]]], RenamedOp: [[X]] }
+; CHECK-NEXT: [[X_0:%.*]] = bitcast i32 [[X]] to i32
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[YZ]] = icmp eq i32 [[Y]], 0 Edge: [label [[TMP0]],label [[NEITHER]]], RenamedOp: [[YZ]] }
+; CHECK-NEXT: [[YZ_0:%.*]] = bitcast i1 [[YZ]] to i1
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[YZ]] = icmp eq i32 [[Y]], 0 Edge: [label [[TMP0]],label [[NEITHER]]], RenamedOp: [[Y]] }
+; CHECK-NEXT: [[Y_0:%.*]] = bitcast i32 [[Y]] to i32
+; CHECK-NEXT: br i1 [[Z]], label [[ONEOF:%.*]], label [[NEITHER]]
; CHECK: oneof:
; CHECK-NEXT: call void @foo(i1 [[XZ]])
; CHECK-NEXT: call void @foo(i1 [[YZ]])
@@ -100,12 +110,17 @@ define void @test_and(i32 %x, i32 %y) {
; CHECK-NEXT: [[XZ:%.*]] = icmp eq i32 [[X:%.*]], 0
; CHECK-NEXT: [[YZ:%.*]] = icmp eq i32 [[Y:%.*]], 0
; CHECK-NEXT: [[Z:%.*]] = and i1 [[XZ]], [[YZ]]
-; CHECK: [[Z_0:%.*]] = bitcast i1 [[Z]] to i1
-; CHECK: [[XZ_0:%.*]] = bitcast i1 [[XZ]] to i1
-; CHECK: [[X_0:%.*]] = bitcast i32 [[X]] to i32
-; CHECK: [[YZ_0:%.*]] = bitcast i1 [[YZ]] to i1
-; CHECK: [[Y_0:%.*]] = bitcast i32 [[Y]] to i32
-; CHECK-NEXT: br i1 [[Z]], label [[BOTH:%.*]], label [[NOPE:%.*]]
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[Z]] = and i1 [[XZ]], [[YZ]] Edge: [label [[TMP0:%.*]],label [[NOPE:%.*]]], RenamedOp: [[Z]] }
+; CHECK-NEXT: [[Z_0:%.*]] = bitcast i1 [[Z]] to i1
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[XZ]] = icmp eq i32 [[X]], 0 Edge: [label [[TMP0]],label [[BOTH:%.*]]], RenamedOp: [[XZ]] }
+; CHECK-NEXT: [[XZ_0:%.*]] = bitcast i1 [[XZ]] to i1
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[XZ]] = icmp eq i32 [[X]], 0 Edge: [label [[TMP0]],label [[BOTH]]], RenamedOp: [[X]] }
+; CHECK-NEXT: [[X_0:%.*]] = bitcast i32 [[X]] to i32
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[YZ]] = icmp eq i32 [[Y]], 0 Edge: [label [[TMP0]],label [[BOTH]]], RenamedOp: [[YZ]] }
+; CHECK-NEXT: [[YZ_0:%.*]] = bitcast i1 [[YZ]] to i1
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[YZ]] = icmp eq i32 [[Y]], 0 Edge: [label [[TMP0]],label [[BOTH]]], RenamedOp: [[Y]] }
+; CHECK-NEXT: [[Y_0:%.*]] = bitcast i32 [[Y]] to i32
+; CHECK-NEXT: br i1 [[Z]], label [[BOTH]], label [[NOPE]]
; CHECK: both:
; CHECK-NEXT: call void @foo(i1 [[XZ_0]])
; CHECK-NEXT: call void @foo(i1 [[YZ_0]])
@@ -145,12 +160,17 @@ define void @test_and_logical(i32 %x, i32 %y) {
; CHECK-NEXT: [[XZ:%.*]] = icmp eq i32 [[X:%.*]], 0
; CHECK-NEXT: [[YZ:%.*]] = icmp eq i32 [[Y:%.*]], 0
; CHECK-NEXT: [[Z:%.*]] = select i1 [[XZ]], i1 [[YZ]], i1 false
-; CHECK: [[Z_0:%.*]] = bitcast i1 [[Z]] to i1
-; CHECK: [[XZ_0:%.*]] = bitcast i1 [[XZ]] to i1
-; CHECK: [[X_0:%.*]] = bitcast i32 [[X]] to i32
-; CHECK: [[YZ_0:%.*]] = bitcast i1 [[YZ]] to i1
-; CHECK: [[Y_0:%.*]] = bitcast i32 [[Y]] to i32
-; CHECK-NEXT: br i1 [[Z]], label [[BOTH:%.*]], label [[NOPE:%.*]]
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[Z]] = select i1 [[XZ]], i1 [[YZ]], i1 false Edge: [label [[TMP0:%.*]],label [[NOPE:%.*]]], RenamedOp: [[Z]] }
+; CHECK-NEXT: [[Z_0:%.*]] = bitcast i1 [[Z]] to i1
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[XZ]] = icmp eq i32 [[X]], 0 Edge: [label [[TMP0]],label [[BOTH:%.*]]], RenamedOp: [[XZ]] }
+; CHECK-NEXT: [[XZ_0:%.*]] = bitcast i1 [[XZ]] to i1
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[XZ]] = icmp eq i32 [[X]], 0 Edge: [label [[TMP0]],label [[BOTH]]], RenamedOp: [[X]] }
+; CHECK-NEXT: [[X_0:%.*]] = bitcast i32 [[X]] to i32
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[YZ]] = icmp eq i32 [[Y]], 0 Edge: [label [[TMP0]],label [[BOTH]]], RenamedOp: [[YZ]] }
+; CHECK-NEXT: [[YZ_0:%.*]] = bitcast i1 [[YZ]] to i1
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[YZ]] = icmp eq i32 [[Y]], 0 Edge: [label [[TMP0]],label [[BOTH]]], RenamedOp: [[Y]] }
+; CHECK-NEXT: [[Y_0:%.*]] = bitcast i32 [[Y]] to i32
+; CHECK-NEXT: br i1 [[Z]], label [[BOTH]], label [[NOPE]]
; CHECK: both:
; CHECK-NEXT: call void @foo(i1 [[XZ_0]])
; CHECK-NEXT: call void @foo(i1 [[YZ_0]])
@@ -190,12 +210,17 @@ define void @testandsame(i32 %x, i32 %y) {
; CHECK-NEXT: [[XGT:%.*]] = icmp sgt i32 [[X:%.*]], 0
; CHECK-NEXT: [[XLT:%.*]] = icmp slt i32 [[X]], 100
; CHECK-NEXT: [[Z:%.*]] = and i1 [[XGT]], [[XLT]]
-; CHECK: [[Z_0:%.*]] = bitcast i1 [[Z]] to i1
-; CHECK: [[XGT_0:%.*]] = bitcast i1 [[XGT]] to i1
-; CHECK: [[X_0:%.*]] = bitcast i32 [[X]] to i32
-; CHECK: [[X_0_1:%.*]] = bitcast i32 [[X_0]] to i32
-; CHECK: [[XLT_0:%.*]] = bitcast i1 [[XLT]] to i1
-; CHECK-NEXT: br i1 [[Z]], label [[BOTH:%.*]], label [[NOPE:%.*]]
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[Z]] = and i1 [[XGT]], [[XLT]] Edge: [label [[TMP0:%.*]],label [[NOPE:%.*]]], RenamedOp: [[Z]] }
+; CHECK-NEXT: [[Z_0:%.*]] = bitcast i1 [[Z]] to i1
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[XGT]] = icmp sgt i32 [[X]], 0 Edge: [label [[TMP0]],label [[BOTH:%.*]]], RenamedOp: [[XGT]] }
+; CHECK-NEXT: [[XGT_0:%.*]] = bitcast i1 [[XGT]] to i1
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[XGT]] = icmp sgt i32 [[X]], 0 Edge: [label [[TMP0]],label [[BOTH]]], RenamedOp: [[X]] }
+; CHECK-NEXT: [[X_0:%.*]] = bitcast i32 [[X]] to i32
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[XLT]] = icmp slt i32 [[X]], 100 Edge: [label [[TMP0]],label [[BOTH]]], RenamedOp: [[X]] }
+; CHECK-NEXT: [[X_0_1:%.*]] = bitcast i32 [[X_0]] to i32
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[XLT]] = icmp slt i32 [[X]], 100 Edge: [label [[TMP0]],label [[BOTH]]], RenamedOp: [[XLT]] }
+; CHECK-NEXT: [[XLT_0:%.*]] = bitcast i1 [[XLT]] to i1
+; CHECK-NEXT: br i1 [[Z]], label [[BOTH]], label [[NOPE]]
; CHECK: both:
; CHECK-NEXT: call void @foo(i1 [[XGT_0]])
; CHECK-NEXT: call void @foo(i1 [[XLT_0]])
@@ -229,17 +254,27 @@ define void @testandassume(i32 %x, i32 %y) {
; CHECK-NEXT: [[YZ:%.*]] = icmp eq i32 [[Y:%.*]], 0
; CHECK-NEXT: [[Z:%.*]] = and i1 [[XZ]], [[YZ]]
; CHECK-NEXT: call void @llvm.assume(i1 [[Z]])
-; CHECK: [[TMP1:%.*]] = bitcast i32 [[Y]] to i32
-; CHECK: [[TMP2:%.*]] = bitcast i1 [[YZ]] to i1
-; CHECK: [[TMP3:%.*]] = bitcast i32 [[X]] to i32
-; CHECK: [[TMP4:%.*]] = bitcast i1 [[XZ]] to i1
-; CHECK: [[TMP5:%.*]] = bitcast i1 [[Z]] to i1
-; CHECK: [[DOT0:%.*]] = bitcast i1 [[TMP5]] to i1
-; CHECK: [[DOT01:%.*]] = bitcast i1 [[TMP4]] to i1
-; CHECK: [[DOT02:%.*]] = bitcast i32 [[TMP3]] to i32
-; CHECK: [[DOT03:%.*]] = bitcast i1 [[TMP2]] to i1
-; CHECK: [[DOT04:%.*]] = bitcast i32 [[TMP1]] to i32
-; CHECK-NEXT: br i1 [[TMP5]], label [[BOTH:%.*]], label [[NOPE:%.*]]
+; CHECK-NEXT: ; assume predicate info { Comparison: [[YZ]] = icmp eq i32 [[Y]], 0, RenamedOp: [[Y]] }
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32 [[Y]] to i32
+; CHECK-NEXT: ; assume predicate info { Comparison: [[YZ]] = icmp eq i32 [[Y]], 0, RenamedOp: [[YZ]] }
+; CHECK-NEXT: [[TMP2:%.*]] = bitcast i1 [[YZ]] to i1
+; CHECK-NEXT: ; assume predicate info { Comparison: [[XZ]] = icmp eq i32 [[X]], 0, RenamedOp: [[X]] }
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[X]] to i32
+; CHECK-NEXT: ; assume predicate info { Comparison: [[XZ]] = icmp eq i32 [[X]], 0, RenamedOp: [[XZ]] }
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast i1 [[XZ]] to i1
+; CHECK-NEXT: ; assume predicate info { Comparison: [[Z]] = and i1 [[XZ]], [[YZ]], RenamedOp: [[Z]] }
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast i1 [[Z]] to i1
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[Z]] = and i1 [[XZ]], [[YZ]] Edge: [label [[TMP0:%.*]],label [[NOPE:%.*]]], RenamedOp: [[TMP5]] }
+; CHECK-NEXT: [[DOT0:%.*]] = bitcast i1 [[TMP5]] to i1
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[XZ]] = icmp eq i32 [[X]], 0 Edge: [label [[TMP0]],label [[BOTH:%.*]]], RenamedOp: [[XZ]] }
+; CHECK-NEXT: [[DOT01:%.*]] = bitcast i1 [[TMP4]] to i1
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[XZ]] = icmp eq i32 [[X]], 0 Edge: [label [[TMP0]],label [[BOTH]]], RenamedOp: [[X]] }
+; CHECK-NEXT: [[DOT02:%.*]] = bitcast i32 [[TMP3]] to i32
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[YZ]] = icmp eq i32 [[Y]], 0 Edge: [label [[TMP0]],label [[BOTH]]], RenamedOp: [[YZ]] }
+; CHECK-NEXT: [[DOT03:%.*]] = bitcast i1 [[TMP2]] to i1
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[YZ]] = icmp eq i32 [[Y]], 0 Edge: [label [[TMP0]],label [[BOTH]]], RenamedOp: [[Y]] }
+; CHECK-NEXT: [[DOT04:%.*]] = bitcast i32 [[TMP1]] to i32
+; CHECK-NEXT: br i1 [[TMP5]], label [[BOTH]], label [[NOPE]]
; CHECK: both:
; CHECK-NEXT: call void @foo(i1 [[DOT01]])
; CHECK-NEXT: call void @foo(i1 [[DOT03]])
@@ -274,9 +309,11 @@ define void @testorassume(i32 %x, i32 %y) {
; CHECK-NEXT: [[YZ:%.*]] = icmp eq i32 [[Y:%.*]], 0
; CHECK-NEXT: [[Z:%.*]] = or i1 [[XZ]], [[YZ]]
; CHECK-NEXT: call void @llvm.assume(i1 [[Z]])
-; CHECK: [[TMP1:%.*]] = bitcast i1 [[Z]] to i1
-; CHECK: [[DOT0:%.*]] = bitcast i1 [[TMP1]] to i1
-; CHECK-NEXT: br i1 [[TMP1]], label [[BOTH:%.*]], label [[NOPE:%.*]]
+; CHECK-NEXT: ; assume predicate info { Comparison: [[Z]] = or i1 [[XZ]], [[YZ]], RenamedOp: [[Z]] }
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast i1 [[Z]] to i1
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[Z]] = or i1 [[XZ]], [[YZ]] Edge: [label [[TMP0:%.*]],label [[NOPE:%.*]]], RenamedOp: [[TMP1]] }
+; CHECK-NEXT: [[DOT0:%.*]] = bitcast i1 [[TMP1]] to i1
+; CHECK-NEXT: br i1 [[TMP1]], label [[BOTH:%.*]], label [[NOPE]]
; CHECK: both:
; CHECK-NEXT: call void @foo(i1 [[XZ]])
; CHECK-NEXT: call void @foo(i1 [[YZ]])
@@ -307,12 +344,17 @@ define void @test_and_one_unknown_cond(i32 %x, i1 %c1) {
; CHECK-LABEL: @test_and_one_unknown_cond(
; CHECK-NEXT: [[C2:%.*]] = icmp eq i32 [[X:%.*]], 0
; CHECK-NEXT: [[A:%.*]] = and i1 [[C1:%.*]], [[C2]]
-; CHECK: [[A_0:%.*]] = bitcast i1 [[A]] to i1
-; CHECK: [[A_1:%.*]] = bitcast i1 [[A]] to i1
-; CHECK: [[C1_0:%.*]] = bitcast i1 [[C1]] to i1
-; CHECK: [[C2_0:%.*]] = bitcast i1 [[C2]] to i1
-; CHECK: [[X_0:%.*]] = bitcast i32 [[X]] to i32
-; CHECK-NEXT: br i1 [[A]], label [[BOTH:%.*]], label [[NOPE:%.*]]
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[A]] = and i1 [[C1]], [[C2]] Edge: [label [[TMP0:%.*]],label [[BOTH:%.*]]], RenamedOp: [[A]] }
+; CHECK-NEXT: [[A_0:%.*]] = bitcast i1 [[A]] to i1
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[A]] = and i1 [[C1]], [[C2]] Edge: [label [[TMP0]],label [[NOPE:%.*]]], RenamedOp: [[A]] }
+; CHECK-NEXT: [[A_1:%.*]] = bitcast i1 [[A]] to i1
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison:i1 [[C1]] Edge: [label [[TMP0]],label [[BOTH]]], RenamedOp: [[C1]] }
+; CHECK-NEXT: [[C1_0:%.*]] = bitcast i1 [[C1]] to i1
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[C2]] = icmp eq i32 [[X]], 0 Edge: [label [[TMP0]],label [[BOTH]]], RenamedOp: [[C2]] }
+; CHECK-NEXT: [[C2_0:%.*]] = bitcast i1 [[C2]] to i1
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[C2]] = icmp eq i32 [[X]], 0 Edge: [label [[TMP0]],label [[BOTH]]], RenamedOp: [[X]] }
+; CHECK-NEXT: [[X_0:%.*]] = bitcast i32 [[X]] to i32
+; CHECK-NEXT: br i1 [[A]], label [[BOTH]], label [[NOPE]]
; CHECK: both:
; CHECK-NEXT: call void @bar(i32 [[X_0]])
; CHECK-NEXT: call void @foo(i1 [[C1_0]])
@@ -349,12 +391,17 @@ define void @test_or_one_unknown_cond(i32 %x, i1 %c1) {
; CHECK-LABEL: @test_or_one_unknown_cond(
; CHECK-NEXT: [[C2:%.*]] = icmp eq i32 [[X:%.*]], 0
; CHECK-NEXT: [[A:%.*]] = or i1 [[C1:%.*]], [[C2]]
-; CHECK: [[A_0:%.*]] = bitcast i1 [[A]] to i1
-; CHECK: [[A_1:%.*]] = bitcast i1 [[A]] to i1
-; CHECK: [[C1_0:%.*]] = bitcast i1 [[C1]] to i1
-; CHECK: [[C2_0:%.*]] = bitcast i1 [[C2]] to i1
-; CHECK: [[X_0:%.*]] = bitcast i32 [[X]] to i32
-; CHECK-NEXT: br i1 [[A]], label [[NOPE:%.*]], label [[BOTH_INVERTED:%.*]]
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[A]] = or i1 [[C1]], [[C2]] Edge: [label [[TMP0:%.*]],label [[NOPE:%.*]]], RenamedOp: [[A]] }
+; CHECK-NEXT: [[A_0:%.*]] = bitcast i1 [[A]] to i1
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[A]] = or i1 [[C1]], [[C2]] Edge: [label [[TMP0]],label [[BOTH_INVERTED:%.*]]], RenamedOp: [[A]] }
+; CHECK-NEXT: [[A_1:%.*]] = bitcast i1 [[A]] to i1
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison:i1 [[C1]] Edge: [label [[TMP0]],label [[BOTH_INVERTED]]], RenamedOp: [[C1]] }
+; CHECK-NEXT: [[C1_0:%.*]] = bitcast i1 [[C1]] to i1
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[C2]] = icmp eq i32 [[X]], 0 Edge: [label [[TMP0]],label [[BOTH_INVERTED]]], RenamedOp: [[C2]] }
+; CHECK-NEXT: [[C2_0:%.*]] = bitcast i1 [[C2]] to i1
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[C2]] = icmp eq i32 [[X]], 0 Edge: [label [[TMP0]],label [[BOTH_INVERTED]]], RenamedOp: [[X]] }
+; CHECK-NEXT: [[X_0:%.*]] = bitcast i32 [[X]] to i32
+; CHECK-NEXT: br i1 [[A]], label [[NOPE]], label [[BOTH_INVERTED]]
; CHECK: both_inverted:
; CHECK-NEXT: call void @bar(i32 [[X_0]])
; CHECK-NEXT: call void @foo(i1 [[C1_0]])
@@ -391,13 +438,19 @@ define void @test_and_chain(i1 %a, i1 %b, i1 %c) {
; CHECK-LABEL: @test_and_chain(
; CHECK-NEXT: [[AND1:%.*]] = and i1 [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[AND2:%.*]] = and i1 [[AND1]], [[C:%.*]]
-; CHECK: [[AND2_0:%.*]] = bitcast i1 [[AND2]] to i1
-; CHECK: [[AND2_1:%.*]] = bitcast i1 [[AND2]] to i1
-; CHECK: [[AND1_0:%.*]] = bitcast i1 [[AND1]] to i1
-; CHECK: [[A_0:%.*]] = bitcast i1 [[A]] to i1
-; CHECK: [[B_0:%.*]] = bitcast i1 [[B]] to i1
-; CHECK: [[C_0:%.*]] = bitcast i1 [[C]] to i1
-; CHECK-NEXT: br i1 [[AND2]], label [[IF:%.*]], label [[ELSE:%.*]]
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[AND2]] = and i1 [[AND1]], [[C]] Edge: [label [[TMP0:%.*]],label [[IF:%.*]]], RenamedOp: [[AND2]] }
+; CHECK-NEXT: [[AND2_0:%.*]] = bitcast i1 [[AND2]] to i1
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[AND2]] = and i1 [[AND1]], [[C]] Edge: [label [[TMP0]],label [[ELSE:%.*]]], RenamedOp: [[AND2]] }
+; CHECK-NEXT: [[AND2_1:%.*]] = bitcast i1 [[AND2]] to i1
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[AND1]] = and i1 [[A]], [[B]] Edge: [label [[TMP0]],label [[IF]]], RenamedOp: [[AND1]] }
+; CHECK-NEXT: [[AND1_0:%.*]] = bitcast i1 [[AND1]] to i1
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison:i1 [[A]] Edge: [label [[TMP0]],label [[IF]]], RenamedOp: [[A]] }
+; CHECK-NEXT: [[A_0:%.*]] = bitcast i1 [[A]] to i1
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison:i1 [[B]] Edge: [label [[TMP0]],label [[IF]]], RenamedOp: [[B]] }
+; CHECK-NEXT: [[B_0:%.*]] = bitcast i1 [[B]] to i1
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison:i1 [[C]] Edge: [label [[TMP0]],label [[IF]]], RenamedOp: [[C]] }
+; CHECK-NEXT: [[C_0:%.*]] = bitcast i1 [[C]] to i1
+; CHECK-NEXT: br i1 [[AND2]], label [[IF]], label [[ELSE]]
; CHECK: if:
; CHECK-NEXT: call void @foo(i1 [[A_0]])
; CHECK-NEXT: call void @foo(i1 [[B_0]])
@@ -438,13 +491,19 @@ define void @test_or_chain(i1 %a, i1 %b, i1 %c) {
; CHECK-LABEL: @test_or_chain(
; CHECK-NEXT: [[OR1:%.*]] = or i1 [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[OR2:%.*]] = or i1 [[OR1]], [[C:%.*]]
-; CHECK: [[OR2_0:%.*]] = bitcast i1 [[OR2]] to i1
-; CHECK: [[OR2_1:%.*]] = bitcast i1 [[OR2]] to i1
-; CHECK: [[OR1_0:%.*]] = bitcast i1 [[OR1]] to i1
-; CHECK: [[A_0:%.*]] = bitcast i1 [[A]] to i1
-; CHECK: [[B_0:%.*]] = bitcast i1 [[B]] to i1
-; CHECK: [[C_0:%.*]] = bitcast i1 [[C]] to i1
-; CHECK-NEXT: br i1 [[OR2]], label [[IF:%.*]], label [[ELSE:%.*]]
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[OR2]] = or i1 [[OR1]], [[C]] Edge: [label [[TMP0:%.*]],label [[IF:%.*]]], RenamedOp: [[OR2]] }
+; CHECK-NEXT: [[OR2_0:%.*]] = bitcast i1 [[OR2]] to i1
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[OR2]] = or i1 [[OR1]], [[C]] Edge: [label [[TMP0]],label [[ELSE:%.*]]], RenamedOp: [[OR2]] }
+; CHECK-NEXT: [[OR2_1:%.*]] = bitcast i1 [[OR2]] to i1
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[OR1]] = or i1 [[A]], [[B]] Edge: [label [[TMP0]],label [[ELSE]]], RenamedOp: [[OR1]] }
+; CHECK-NEXT: [[OR1_0:%.*]] = bitcast i1 [[OR1]] to i1
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison:i1 [[A]] Edge: [label [[TMP0]],label [[ELSE]]], RenamedOp: [[A]] }
+; CHECK-NEXT: [[A_0:%.*]] = bitcast i1 [[A]] to i1
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison:i1 [[B]] Edge: [label [[TMP0]],label [[ELSE]]], RenamedOp: [[B]] }
+; CHECK-NEXT: [[B_0:%.*]] = bitcast i1 [[B]] to i1
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison:i1 [[C]] Edge: [label [[TMP0]],label [[ELSE]]], RenamedOp: [[C]] }
+; CHECK-NEXT: [[C_0:%.*]] = bitcast i1 [[C]] to i1
+; CHECK-NEXT: br i1 [[OR2]], label [[IF]], label [[ELSE]]
; CHECK: if:
; CHECK-NEXT: call void @foo(i1 [[A]])
; CHECK-NEXT: call void @foo(i1 [[B]])
@@ -485,11 +544,15 @@ define void @test_and_or_mixed(i1 %a, i1 %b, i1 %c) {
; CHECK-LABEL: @test_and_or_mixed(
; CHECK-NEXT: [[OR:%.*]] = or i1 [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[AND:%.*]] = and i1 [[OR]], [[C:%.*]]
-; CHECK: [[AND_0:%.*]] = bitcast i1 [[AND]] to i1
-; CHECK: [[AND_1:%.*]] = bitcast i1 [[AND]] to i1
-; CHECK: [[OR_0:%.*]] = bitcast i1 [[OR]] to i1
-; CHECK: [[C_0:%.*]] = bitcast i1 [[C]] to i1
-; CHECK-NEXT: br i1 [[AND]], label [[IF:%.*]], label [[ELSE:%.*]]
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[AND]] = and i1 [[OR]], [[C]] Edge: [label [[TMP0:%.*]],label [[IF:%.*]]], RenamedOp: [[AND]] }
+; CHECK-NEXT: [[AND_0:%.*]] = bitcast i1 [[AND]] to i1
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[AND]] = and i1 [[OR]], [[C]] Edge: [label [[TMP0]],label [[ELSE:%.*]]], RenamedOp: [[AND]] }
+; CHECK-NEXT: [[AND_1:%.*]] = bitcast i1 [[AND]] to i1
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[OR]] = or i1 [[A]], [[B]] Edge: [label [[TMP0]],label [[IF]]], RenamedOp: [[OR]] }
+; CHECK-NEXT: [[OR_0:%.*]] = bitcast i1 [[OR]] to i1
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison:i1 [[C]] Edge: [label [[TMP0]],label [[IF]]], RenamedOp: [[C]] }
+; CHECK-NEXT: [[C_0:%.*]] = bitcast i1 [[C]] to i1
+; CHECK-NEXT: br i1 [[AND]], label [[IF]], label [[ELSE]]
; CHECK: if:
; CHECK-NEXT: call void @foo(i1 [[A]])
; CHECK-NEXT: call void @foo(i1 [[B]])
@@ -542,16 +605,25 @@ define void @test_deep_and_chain(i1 %a1) {
; CHECK-NEXT: [[A13:%.*]] = and i1 [[A12]], true
; CHECK-NEXT: [[A14:%.*]] = and i1 [[A13]], true
; CHECK-NEXT: [[A15:%.*]] = and i1 [[A14]], true
-; CHECK: [[A15_0:%.*]] = bitcast i1 [[A15]] to i1
-; CHECK: [[A15_1:%.*]] = bitcast i1 [[A15]] to i1
-; CHECK: [[A14_0:%.*]] = bitcast i1 [[A14]] to i1
-; CHECK: [[A13_0:%.*]] = bitcast i1 [[A13]] to i1
-; CHECK: [[A12_0:%.*]] = bitcast i1 [[A12]] to i1
-; CHECK: [[A11_0:%.*]] = bitcast i1 [[A11]] to i1
-; CHECK: [[A10_0:%.*]] = bitcast i1 [[A10]] to i1
-; CHECK: [[A9_0:%.*]] = bitcast i1 [[A9]] to i1
-; CHECK: [[A8_0:%.*]] = bitcast i1 [[A8]] to i1
-; CHECK-NEXT: br i1 [[A15]], label [[IF:%.*]], label [[ELSE:%.*]]
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[A15]] = and i1 [[A14]], true Edge: [label [[TMP0:%.*]],label [[IF:%.*]]], RenamedOp: [[A15]] }
+; CHECK-NEXT: [[A15_0:%.*]] = bitcast i1 [[A15]] to i1
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[A15]] = and i1 [[A14]], true Edge: [label [[TMP0]],label [[ELSE:%.*]]], RenamedOp: [[A15]] }
+; CHECK-NEXT: [[A15_1:%.*]] = bitcast i1 [[A15]] to i1
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[A14]] = and i1 [[A13]], true Edge: [label [[TMP0]],label [[IF]]], RenamedOp: [[A14]] }
+; CHECK-NEXT: [[A14_0:%.*]] = bitcast i1 [[A14]] to i1
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[A13]] = and i1 [[A12]], true Edge: [label [[TMP0]],label [[IF]]], RenamedOp: [[A13]] }
+; CHECK-NEXT: [[A13_0:%.*]] = bitcast i1 [[A13]] to i1
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[A12]] = and i1 [[A11]], true Edge: [label [[TMP0]],label [[IF]]], RenamedOp: [[A12]] }
+; CHECK-NEXT: [[A12_0:%.*]] = bitcast i1 [[A12]] to i1
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[A11]] = and i1 [[A10]], true Edge: [label [[TMP0]],label [[IF]]], RenamedOp: [[A11]] }
+; CHECK-NEXT: [[A11_0:%.*]] = bitcast i1 [[A11]] to i1
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[A10]] = and i1 [[A9]], true Edge: [label [[TMP0]],label [[IF]]], RenamedOp: [[A10]] }
+; CHECK-NEXT: [[A10_0:%.*]] = bitcast i1 [[A10]] to i1
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[A9]] = and i1 [[A8]], true Edge: [label [[TMP0]],label [[IF]]], RenamedOp: [[A9]] }
+; CHECK-NEXT: [[A9_0:%.*]] = bitcast i1 [[A9]] to i1
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[A8]] = and i1 [[A7]], true Edge: [label [[TMP0]],label [[IF]]], RenamedOp: [[A8]] }
+; CHECK-NEXT: [[A8_0:%.*]] = bitcast i1 [[A8]] to i1
+; CHECK-NEXT: br i1 [[A15]], label [[IF]], label [[ELSE]]
; CHECK: if:
; CHECK-NEXT: call void @foo(i1 [[A1]])
; CHECK-NEXT: call void @foo(i1 [[A2]])
@@ -656,16 +728,25 @@ define void @test_deep_and_tree(i1 %a1) {
; CHECK-NEXT: [[A13:%.*]] = and i1 [[A12]], [[A12]]
; CHECK-NEXT: [[A14:%.*]] = and i1 [[A13]], [[A13]]
; CHECK-NEXT: [[A15:%.*]] = and i1 [[A14]], [[A14]]
-; CHECK: [[A15_0:%.*]] = bitcast i1 [[A15]] to i1
-; CHECK: [[A15_1:%.*]] = bitcast i1 [[A15]] to i1
-; CHECK: [[A14_0:%.*]] = bitcast i1 [[A14]] to i1
-; CHECK: [[A13_0:%.*]] = bitcast i1 [[A13]] to i1
-; CHECK: [[A12_0:%.*]] = bitcast i1 [[A12]] to i1
-; CHECK: [[A11_0:%.*]] = bitcast i1 [[A11]] to i1
-; CHECK: [[A10_0:%.*]] = bitcast i1 [[A10]] to i1
-; CHECK: [[A9_0:%.*]] = bitcast i1 [[A9]] to i1
-; CHECK: [[A8_0:%.*]] = bitcast i1 [[A8]] to i1
-; CHECK-NEXT: br i1 [[A15]], label [[IF:%.*]], label [[ELSE:%.*]]
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[A15]] = and i1 [[A14]], [[A14]] Edge: [label [[TMP0:%.*]],label [[IF:%.*]]], RenamedOp: [[A15]] }
+; CHECK-NEXT: [[A15_0:%.*]] = bitcast i1 [[A15]] to i1
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[A15]] = and i1 [[A14]], [[A14]] Edge: [label [[TMP0]],label [[ELSE:%.*]]], RenamedOp: [[A15]] }
+; CHECK-NEXT: [[A15_1:%.*]] = bitcast i1 [[A15]] to i1
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[A14]] = and i1 [[A13]], [[A13]] Edge: [label [[TMP0]],label [[IF]]], RenamedOp: [[A14]] }
+; CHECK-NEXT: [[A14_0:%.*]] = bitcast i1 [[A14]] to i1
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[A13]] = and i1 [[A12]], [[A12]] Edge: [label [[TMP0]],label [[IF]]], RenamedOp: [[A13]] }
+; CHECK-NEXT: [[A13_0:%.*]] = bitcast i1 [[A13]] to i1
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[A12]] = and i1 [[A11]], [[A11]] Edge: [label [[TMP0]],label [[IF]]], RenamedOp: [[A12]] }
+; CHECK-NEXT: [[A12_0:%.*]] = bitcast i1 [[A12]] to i1
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[A11]] = and i1 [[A10]], [[A10]] Edge: [label [[TMP0]],label [[IF]]], RenamedOp: [[A11]] }
+; CHECK-NEXT: [[A11_0:%.*]] = bitcast i1 [[A11]] to i1
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[A10]] = and i1 [[A9]], [[A9]] Edge: [label [[TMP0]],label [[IF]]], RenamedOp: [[A10]] }
+; CHECK-NEXT: [[A10_0:%.*]] = bitcast i1 [[A10]] to i1
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[A9]] = and i1 [[A8]], [[A8]] Edge: [label [[TMP0]],label [[IF]]], RenamedOp: [[A9]] }
+; CHECK-NEXT: [[A9_0:%.*]] = bitcast i1 [[A9]] to i1
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[A8]] = and i1 [[A7]], [[A7]] Edge: [label [[TMP0]],label [[IF]]], RenamedOp: [[A8]] }
+; CHECK-NEXT: [[A8_0:%.*]] = bitcast i1 [[A8]] to i1
+; CHECK-NEXT: br i1 [[A15]], label [[IF]], label [[ELSE]]
; CHECK: if:
; CHECK-NEXT: call void @foo(i1 [[A1]])
; CHECK-NEXT: call void @foo(i1 [[A2]])
@@ -770,16 +851,25 @@ define void @test_deep_or_tree(i1 %a1) {
; CHECK-NEXT: [[A13:%.*]] = or i1 [[A12]], [[A12]]
; CHECK-NEXT: [[A14:%.*]] = or i1 [[A13]], [[A13]]
; CHECK-NEXT: [[A15:%.*]] = or i1 [[A14]], [[A14]]
-; CHECK: [[A15_0:%.*]] = bitcast i1 [[A15]] to i1
-; CHECK: [[A15_1:%.*]] = bitcast i1 [[A15]] to i1
-; CHECK: [[A14_0:%.*]] = bitcast i1 [[A14]] to i1
-; CHECK: [[A13_0:%.*]] = bitcast i1 [[A13]] to i1
-; CHECK: [[A12_0:%.*]] = bitcast i1 [[A12]] to i1
-; CHECK: [[A11_0:%.*]] = bitcast i1 [[A11]] to i1
-; CHECK: [[A10_0:%.*]] = bitcast i1 [[A10]] to i1
-; CHECK: [[A9_0:%.*]] = bitcast i1 [[A9]] to i1
-; CHECK: [[A8_0:%.*]] = bitcast i1 [[A8]] to i1
-; CHECK-NEXT: br i1 [[A15]], label [[IF:%.*]], label [[ELSE:%.*]]
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[A15]] = or i1 [[A14]], [[A14]] Edge: [label [[TMP0:%.*]],label [[IF:%.*]]], RenamedOp: [[A15]] }
+; CHECK-NEXT: [[A15_0:%.*]] = bitcast i1 [[A15]] to i1
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[A15]] = or i1 [[A14]], [[A14]] Edge: [label [[TMP0]],label [[ELSE:%.*]]], RenamedOp: [[A15]] }
+; CHECK-NEXT: [[A15_1:%.*]] = bitcast i1 [[A15]] to i1
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[A14]] = or i1 [[A13]], [[A13]] Edge: [label [[TMP0]],label [[ELSE]]], RenamedOp: [[A14]] }
+; CHECK-NEXT: [[A14_0:%.*]] = bitcast i1 [[A14]] to i1
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[A13]] = or i1 [[A12]], [[A12]] Edge: [label [[TMP0]],label [[ELSE]]], RenamedOp: [[A13]] }
+; CHECK-NEXT: [[A13_0:%.*]] = bitcast i1 [[A13]] to i1
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[A12]] = or i1 [[A11]], [[A11]] Edge: [label [[TMP0]],label [[ELSE]]], RenamedOp: [[A12]] }
+; CHECK-NEXT: [[A12_0:%.*]] = bitcast i1 [[A12]] to i1
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[A11]] = or i1 [[A10]], [[A10]] Edge: [label [[TMP0]],label [[ELSE]]], RenamedOp: [[A11]] }
+; CHECK-NEXT: [[A11_0:%.*]] = bitcast i1 [[A11]] to i1
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[A10]] = or i1 [[A9]], [[A9]] Edge: [label [[TMP0]],label [[ELSE]]], RenamedOp: [[A10]] }
+; CHECK-NEXT: [[A10_0:%.*]] = bitcast i1 [[A10]] to i1
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[A9]] = or i1 [[A8]], [[A8]] Edge: [label [[TMP0]],label [[ELSE]]], RenamedOp: [[A9]] }
+; CHECK-NEXT: [[A9_0:%.*]] = bitcast i1 [[A9]] to i1
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 0 Comparison: [[A8]] = or i1 [[A7]], [[A7]] Edge: [label [[TMP0]],label [[ELSE]]], RenamedOp: [[A8]] }
+; CHECK-NEXT: [[A8_0:%.*]] = bitcast i1 [[A8]] to i1
+; CHECK-NEXT: br i1 [[A15]], label [[IF]], label [[ELSE]]
; CHECK: if:
; CHECK-NEXT: call void @foo(i1 [[A1]])
; CHECK-NEXT: call void @foo(i1 [[A2]])
@@ -873,11 +963,16 @@ define void @test_assume_and_chain(i1 %a, i1 %b, i1 %c) {
; CHECK-NEXT: [[AND1:%.*]] = and i1 [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[AND2:%.*]] = and i1 [[AND1]], [[C:%.*]]
; CHECK-NEXT: call void @llvm.assume(i1 [[AND2]])
-; CHECK: [[TMP1:%.*]] = bitcast i1 [[C]] to i1
-; CHECK: [[TMP2:%.*]] = bitcast i1 [[B]] to i1
-; CHECK: [[TMP3:%.*]] = bitcast i1 [[A]] to i1
-; CHECK: [[TMP4:%.*]] = bitcast i1 [[AND1]] to i1
-; CHECK: [[TMP5:%.*]] = bitcast i1 [[AND2]] to i1
+; CHECK-NEXT: ; assume predicate info { Comparison:i1 [[C]], RenamedOp: [[C]] }
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast i1 [[C]] to i1
+; CHECK-NEXT: ; assume predicate info { Comparison:i1 [[B]], RenamedOp: [[B]] }
+; CHECK-NEXT: [[TMP2:%.*]] = bitcast i1 [[B]] to i1
+; CHECK-NEXT: ; assume predicate info { Comparison:i1 [[A]], RenamedOp: [[A]] }
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast i1 [[A]] to i1
+; CHECK-NEXT: ; assume predicate info { Comparison: [[AND1]] = and i1 [[A]], [[B]], RenamedOp: [[AND1]] }
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast i1 [[AND1]] to i1
+; CHECK-NEXT: ; assume predicate info { Comparison: [[AND2]] = and i1 [[AND1]], [[C]], RenamedOp: [[AND2]] }
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast i1 [[AND2]] to i1
; CHECK-NEXT: call void @foo(i1 [[TMP3]])
; CHECK-NEXT: call void @foo(i1 [[TMP2]])
; CHECK-NEXT: call void @foo(i1 [[TMP1]])
@@ -901,7 +996,8 @@ define void @test_assume_or_chain(i1 %a, i1 %b, i1 %c) {
; CHECK-NEXT: [[OR1:%.*]] = or i1 [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[OR2:%.*]] = or i1 [[OR1]], [[C:%.*]]
; CHECK-NEXT: call void @llvm.assume(i1 [[OR2]])
-; CHECK: [[TMP1:%.*]] = bitcast i1 [[OR2]] to i1
+; CHECK-NEXT: ; assume predicate info { Comparison: [[OR2]] = or i1 [[OR1]], [[C]], RenamedOp: [[OR2]] }
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast i1 [[OR2]] to i1
; CHECK-NEXT: call void @foo(i1 [[A]])
; CHECK-NEXT: call void @foo(i1 [[B]])
; CHECK-NEXT: call void @foo(i1 [[C]])
@@ -937,14 +1033,22 @@ define void @test_assume_deep_and_tree(i1 %a1) {
; CHECK-NEXT: [[A14:%.*]] = and i1 [[A13]], [[A13]]
; CHECK-NEXT: [[A15:%.*]] = and i1 [[A14]], [[A14]]
; CHECK-NEXT: call void @llvm.assume(i1 [[A15]])
-; CHECK: [[TMP1:%.*]] = bitcast i1 [[A8]] to i1
-; CHECK: [[TMP2:%.*]] = bitcast i1 [[A9]] to i1
-; CHECK: [[TMP3:%.*]] = bitcast i1 [[A10]] to i1
-; CHECK: [[TMP4:%.*]] = bitcast i1 [[A11]] to i1
-; CHECK: [[TMP5:%.*]] = bitcast i1 [[A12]] to i1
-; CHECK: [[TMP6:%.*]] = bitcast i1 [[A13]] to i1
-; CHECK: [[TMP7:%.*]] = bitcast i1 [[A14]] to i1
-; CHECK: [[TMP8:%.*]] = bitcast i1 [[A15]] to i1
+; CHECK-NEXT: ; assume predicate info { Comparison: [[A8]] = and i1 [[A7]], [[A7]], RenamedOp: [[A8]] }
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast i1 [[A8]] to i1
+; CHECK-NEXT: ; assume predicate info { Comparison: [[A9]] = and i1 [[A8]], [[A8]], RenamedOp: [[A9]] }
+; CHECK-NEXT: [[TMP2:%.*]] = bitcast i1 [[A9]] to i1
+; CHECK-NEXT: ; assume predicate info { Comparison: [[A10]] = and i1 [[A9]], [[A9]], RenamedOp: [[A10]] }
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast i1 [[A10]] to i1
+; CHECK-NEXT: ; assume predicate info { Comparison: [[A11]] = and i1 [[A10]], [[A10]], RenamedOp: [[A11]] }
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast i1 [[A11]] to i1
+; CHECK-NEXT: ; assume predicate info { Comparison: [[A12]] = and i1 [[A11]], [[A11]], RenamedOp: [[A12]] }
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast i1 [[A12]] to i1
+; CHECK-NEXT: ; assume predicate info { Comparison: [[A13]] = and i1 [[A12]], [[A12]], RenamedOp: [[A13]] }
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast i1 [[A13]] to i1
+; CHECK-NEXT: ; assume predicate info { Comparison: [[A14]] = and i1 [[A13]], [[A13]], RenamedOp: [[A14]] }
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast i1 [[A14]] to i1
+; CHECK-NEXT: ; assume predicate info { Comparison: [[A15]] = and i1 [[A14]], [[A14]], RenamedOp: [[A15]] }
+; CHECK-NEXT: [[TMP8:%.*]] = bitcast i1 [[A15]] to i1
; CHECK-NEXT: call void @foo(i1 [[A1]])
; CHECK-NEXT: call void @foo(i1 [[A2]])
; CHECK-NEXT: call void @foo(i1 [[A3]])
@@ -1001,13 +1105,15 @@ define i32 @test_and_with_phinode(i32 %x) {
; CHECK-NEXT: [[XGE1:%.*]] = icmp uge i32 [[X:%.*]], 1
; CHECK-NEXT: [[XLT2:%.*]] = icmp ult i32 [[X]], 2
; CHECK-NEXT: [[AND:%.*]] = and i1 [[XGE1]], [[XLT2]]
-; CHECK: [[X_0_1:%.*]] = bitcast i32 [[X]] to i32
-; CHECK: [[X_0_2:%.*]] = bitcast i32 [[X_0_1]] to i32
-; CHECK-NEXT: br i1 [[AND]], label [[PHI:%.*]], label [[NOPE:%.*]]
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[XGE1]] = icmp uge i32 [[X]], 1 Edge: [label [[ENTRY:%.*]],label [[PHI:%.*]]], RenamedOp: [[X]] }
+; CHECK-NEXT: [[X_0_1:%.*]] = bitcast i32 [[X]] to i32
+; CHECK-NEXT: ; branch predicate info { TrueEdge: 1 Comparison: [[XLT2]] = icmp ult i32 [[X]], 2 Edge: [label [[ENTRY]],label [[PHI]]], RenamedOp: [[X]] }
+; CHECK-NEXT: [[X_0_2:%.*]] = bitcast i32 [[X_0_1]] to i32
+; CHECK-NEXT: br i1 [[AND]], label [[PHI]], label [[NOPE:%.*]]
; CHECK: nope:
; CHECK-NEXT: br label [[PHI]]
; CHECK: phi:
-; CHECK-NEXT: [[RES:%.*]] = phi i32 [ [[X_0_2]], [[ENTRY:%.*]] ], [ 1, [[NOPE]] ]
+; CHECK-NEXT: [[RES:%.*]] = phi i32 [ [[X_0_2]], [[ENTRY]] ], [ 1, [[NOPE]] ]
; CHECK-NEXT: ret i32 [[RES]]
;
entry:
diff --git a/llvm/test/lit.cfg.py b/llvm/test/lit.cfg.py
index 781240a..11a5a57 100644
--- a/llvm/test/lit.cfg.py
+++ b/llvm/test/lit.cfg.py
@@ -753,10 +753,17 @@ if not hasattr(sys, "getwindowsversion") or sys.getwindowsversion().build >= 170
config.available_features.add("unix-sockets")
# .debug_frame is not emitted for targeting Windows x64, aarch64/arm64, AIX, or Apple Silicon Mac.
-if not re.match(
- r"^(x86_64|aarch64|arm64|powerpc|powerpc64).*-(windows-cygnus|windows-gnu|windows-msvc|aix)",
- config.target_triple,
-) and not re.match(r"^arm64(e)?-apple-(macos|darwin)", config.target_triple):
+if (
+ not re.match(
+ r"^(x86_64|aarch64|arm64|powerpc|powerpc64).*-(windows-cygnus|windows-gnu|windows-msvc|aix)",
+ config.target_triple,
+ )
+ and not re.match(
+ r"^arm64(e)?-apple-(macos|darwin)",
+ config.target_triple,
+ )
+ and not re.match(r".*-zos.*", config.target_triple)
+):
config.available_features.add("debug_frame")
if config.enable_backtrace:
diff --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/check_empty.ll b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/check_empty.ll
new file mode 100644
index 0000000..bfd216d
--- /dev/null
+++ b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/check_empty.ll
@@ -0,0 +1,29 @@
+; RUN: opt < %s -S | FileCheck %s
+
+; Test whether UTC checks empty lines instead of skipping them.
+define i32 @test(i32 %x) {
+entry:
+ br label %block1
+
+block1:
+ %cmp = icmp eq i32 %x, 0
+ br i1 %cmp, label %block2, label %exit1
+
+block2:
+ br i1 %cmp, label %block3, label %exit2
+
+block3:
+ br i1 %cmp, label %exit3, label %exit4
+
+exit1:
+ ret i32 0
+
+exit2:
+ ret i32 %x
+
+exit3:
+ ret i32 %x
+
+exit4:
+ ret i32 %x
+}
diff --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/check_empty.ll.expected b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/check_empty.ll.expected
new file mode 100644
index 0000000..c5f822d
--- /dev/null
+++ b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/check_empty.ll.expected
@@ -0,0 +1,57 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 7
+; RUN: opt < %s -S | FileCheck %s
+
+; Test whether UTC checks empty lines instead of skipping them.
+define i32 @test(i32 %x) {
+; CHECK-LABEL: define i32 @test(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: br label %[[BLOCK1:.*]]
+; CHECK-EMPTY:
+; CHECK-NEXT: [[BLOCK1]]:
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X]], 0
+; CHECK-NEXT: br i1 [[CMP]], label %[[BLOCK2:.*]], label %[[EXIT1:.*]]
+; CHECK-EMPTY:
+; CHECK-NEXT: [[BLOCK2]]:
+; CHECK-NEXT: br i1 [[CMP]], label %[[BLOCK3:.*]], label %[[EXIT2:.*]]
+; CHECK-EMPTY:
+; CHECK-NEXT: [[BLOCK3]]:
+; CHECK-NEXT: br i1 [[CMP]], label %[[EXIT3:.*]], label %[[EXIT4:.*]]
+; CHECK-EMPTY:
+; CHECK-NEXT: [[EXIT1]]:
+; CHECK-NEXT: ret i32 0
+; CHECK-EMPTY:
+; CHECK-NEXT: [[EXIT2]]:
+; CHECK-NEXT: ret i32 [[X]]
+; CHECK-EMPTY:
+; CHECK-NEXT: [[EXIT3]]:
+; CHECK-NEXT: ret i32 [[X]]
+; CHECK-EMPTY:
+; CHECK-NEXT: [[EXIT4]]:
+; CHECK-NEXT: ret i32 [[X]]
+;
+entry:
+ br label %block1
+
+block1:
+ %cmp = icmp eq i32 %x, 0
+ br i1 %cmp, label %block2, label %exit1
+
+block2:
+ br i1 %cmp, label %block3, label %exit2
+
+block3:
+ br i1 %cmp, label %exit3, label %exit4
+
+exit1:
+ ret i32 0
+
+exit2:
+ ret i32 %x
+
+exit3:
+ ret i32 %x
+
+exit4:
+ ret i32 %x
+}
diff --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/switch_case.ll.expected b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/switch_case.ll.expected
index b1977e7..8cab0bb 100644
--- a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/switch_case.ll.expected
+++ b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/switch_case.ll.expected
@@ -12,13 +12,17 @@ define i8 @testi8(i8 %x) {
; CHECK-NEXT: i8 2, label %[[CASE3:.*]]
; CHECK-NEXT: i8 3, label %[[CASE3]]
; CHECK-NEXT: ]
-; CHECK: [[DEFAULT]]:
+; CHECK-EMPTY:
+; CHECK-NEXT: [[DEFAULT]]:
; CHECK-NEXT: ret i8 0
-; CHECK: [[CASE1]]:
+; CHECK-EMPTY:
+; CHECK-NEXT: [[CASE1]]:
; CHECK-NEXT: ret i8 1
-; CHECK: [[CASE2]]:
+; CHECK-EMPTY:
+; CHECK-NEXT: [[CASE2]]:
; CHECK-NEXT: ret i8 2
-; CHECK: [[CASE3]]:
+; CHECK-EMPTY:
+; CHECK-NEXT: [[CASE3]]:
; CHECK-NEXT: ret i8 3
;
switch i8 %x, label %default [
@@ -46,13 +50,17 @@ define i32 @testi32(i32 %x) {
; CHECK-NEXT: i32 2, label %[[CASE3:.*]]
; CHECK-NEXT: i32 3, label %[[CASE3]]
; CHECK-NEXT: ]
-; CHECK: [[DEFAULT]]:
+; CHECK-EMPTY:
+; CHECK-NEXT: [[DEFAULT]]:
; CHECK-NEXT: ret i32 0
-; CHECK: [[CASE1]]:
+; CHECK-EMPTY:
+; CHECK-NEXT: [[CASE1]]:
; CHECK-NEXT: ret i32 1
-; CHECK: [[CASE2]]:
+; CHECK-EMPTY:
+; CHECK-NEXT: [[CASE2]]:
; CHECK-NEXT: ret i32 2
-; CHECK: [[CASE3]]:
+; CHECK-EMPTY:
+; CHECK-NEXT: [[CASE3]]:
; CHECK-NEXT: ret i32 3
;
switch i32 %x, label %default [
@@ -80,13 +88,17 @@ define i128 @testi128(i128 %x) {
; CHECK-NEXT: i128 2, label %[[CASE3:.*]]
; CHECK-NEXT: i128 3, label %[[CASE3]]
; CHECK-NEXT: ]
-; CHECK: [[DEFAULT]]:
+; CHECK-EMPTY:
+; CHECK-NEXT: [[DEFAULT]]:
; CHECK-NEXT: ret i128 0
-; CHECK: [[CASE1]]:
+; CHECK-EMPTY:
+; CHECK-NEXT: [[CASE1]]:
; CHECK-NEXT: ret i128 1
-; CHECK: [[CASE2]]:
+; CHECK-EMPTY:
+; CHECK-NEXT: [[CASE2]]:
; CHECK-NEXT: ret i128 2
-; CHECK: [[CASE3]]:
+; CHECK-EMPTY:
+; CHECK-NEXT: [[CASE3]]:
; CHECK-NEXT: ret i128 3
;
switch i128 %x, label %default [
diff --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/check_empty.test b/llvm/test/tools/UpdateTestChecks/update_test_checks/check_empty.test
new file mode 100644
index 0000000..670bda2
--- /dev/null
+++ b/llvm/test/tools/UpdateTestChecks/update_test_checks/check_empty.test
@@ -0,0 +1,3 @@
+## test whether the UTC generates CHECK-EMPTY for blank lines
+# RUN: cp -f %S/Inputs/check_empty.ll %t.ll && %update_test_checks %t.ll --version 7
+# RUN: diff -u %t.ll %S/Inputs/check_empty.ll.expected
diff --git a/llvm/test/tools/llvm-config/paths.test b/llvm/test/tools/llvm-config/paths.test
index 419f155..61d86f7 100644
--- a/llvm/test/tools/llvm-config/paths.test
+++ b/llvm/test/tools/llvm-config/paths.test
@@ -4,18 +4,34 @@ RUN: llvm-config --bindir 2>&1 | FileCheck --check-prefix=CHECK-BINDIR %s
CHECK-BINDIR: {{.*}}{{/|\\}}bin
CHECK-BINDIR-NOT: error:
CHECK-BINDIR-NOT: warning
+RUN: llvm-config --bindir --quote-paths 2>&1 | FileCheck --check-prefix=CHECK-BINDIR2 %s
+CHECK-BINDIR2: {{.*}}{{/|\\\\}}bin
+CHECK-BINDIR2-NOT: error:
+CHECK-BINDIR2-NOT: warning
RUN: llvm-config --includedir 2>&1 | FileCheck --check-prefix=CHECK-INCLUDEDIR %s
CHECK-INCLUDEDIR: {{.*}}{{/|\\}}include
CHECK-INCLUDEDIR-NOT: error:
CHECK-INCLUDEDIR-NOT: warning
+RUN: llvm-config --includedir --quote-paths 2>&1 | FileCheck --check-prefix=CHECK-INCLUDEDIR2 %s
+CHECK-INCLUDEDIR2: {{.*}}{{/|\\\\}}include
+CHECK-INCLUDEDIR2-NOT: error:
+CHECK-INCLUDEDIR2-NOT: warning
RUN: llvm-config --libdir 2>&1 | FileCheck --check-prefix=CHECK-LIBDIR %s
CHECK-LIBDIR: {{.*}}{{/|\\}}lib{{.*}}
CHECK-LIBDIR-NOT: error:
CHECK-LIBDIR-NOT: warning
+RUN: llvm-config --libdir --quote-paths 2>&1 | FileCheck --check-prefix=CHECK-LIBDIR2 %s
+CHECK-LIBDIR2: {{.*}}{{/|\\\\}}lib{{.*}}
+CHECK-LIBDIR2-NOT: error:
+CHECK-LIBDIR2-NOT: warning
RUN: llvm-config --cmakedir 2>&1 | FileCheck --check-prefix=CHECK-CMAKEDIR %s
CHECK-CMAKEDIR: {{.*}}{{/|\\}}cmake{{/|\\}}llvm
CHECK-CMAKEDIR-NOT: error:
CHECK-CMAKEDIR-NOT: warning
+RUN: llvm-config --cmakedir --quote-paths 2>&1 | FileCheck --check-prefix=CHECK-CMAKEDIR2 %s
+CHECK-CMAKEDIR2: {{.*}}{{/|\\\\}}cmake{{/|\\\\}}llvm
+CHECK-CMAKEDIR2-NOT: error:
+CHECK-CMAKEDIR2-NOT: warning
diff --git a/llvm/test/tools/llvm-dwarfdump/AArch64/DW_AT_APPLE_property.s b/llvm/test/tools/llvm-dwarfdump/AArch64/DW_AT_APPLE_property.s
new file mode 100644
index 0000000..6c38791
--- /dev/null
+++ b/llvm/test/tools/llvm-dwarfdump/AArch64/DW_AT_APPLE_property.s
@@ -0,0 +1,126 @@
+# Checks that we correctly display the DW_AT_APPLE_property_name of a
+# referenced DW_TAG_APPLE_property.
+#
+# RUN: llvm-mc -triple=aarch64--darwin -filetype=obj -o %t.o < %s
+# RUN: not llvm-dwarfdump %t.o 2> %t.errs.txt | FileCheck %s
+# RUN: FileCheck %s --check-prefix=ERRORS < %t.errs.txt
+
+# CHECK: 0x[[PROP_REF:[0-9a-f]+]]: DW_TAG_APPLE_property
+# CHECK-NEXT: DW_AT_APPLE_property_name ("autoSynthProp")
+#
+# CHECK: 0x[[NO_NAME_PROP:[0-9a-f]+]]: DW_TAG_APPLE_property
+# CHECK-NOT: DW_AT_APPLE_property_name
+#
+# CHECK: 0x[[INVALID_STRP:[0-9a-f]+]]: DW_TAG_APPLE_property
+# CHECK-NEXT: DW_AT_APPLE_property_name
+#
+# CHECK: DW_TAG_member
+# CHECK: DW_AT_APPLE_property (0x[[PROP_REF]] "autoSynthProp")
+# CHECK: DW_AT_APPLE_property (0x[[NO_NAME_PROP]] "")
+# CHECK: DW_AT_APPLE_property (0x{{.*}})
+# CHECK: DW_AT_APPLE_property (0x{{.*}})
+# CHECK: DW_AT_APPLE_property (0x[[INVALID_STRP]])
+
+# ERRORS: error: decoding DW_AT_APPLE_property_name: not referencing a DW_TAG_APPLE_property
+# ERRORS: error: decoding DW_AT_APPLE_property_name: invalid DIE
+# ERRORS: error: decoding DW_AT_APPLE_property_name: DW_FORM_strp offset 102 is beyond .debug_str bounds
+
+ .section __DWARF,__debug_abbrev,regular,debug
+Lsection_abbrev:
+ .byte 1 ; Abbreviation Code
+ .byte 17 ; DW_TAG_compile_unit
+ .byte 1 ; DW_CHILDREN_yes
+ .byte 114 ; DW_AT_str_offsets_base
+ .byte 23 ; DW_FORM_sec_offset
+ .byte 0 ; EOM(1)
+ .byte 0 ; EOM(2)
+ .byte 2 ; Abbreviation Code
+ .byte 19 ; DW_TAG_structure_type
+ .byte 1 ; DW_CHILDREN_yes
+ .byte 3 ; DW_AT_name
+ .byte 37 ; DW_FORM_strx1
+ .byte 0 ; EOM(1)
+ .byte 0 ; EOM(2)
+ .byte 3 ; Abbreviation Code
+ .ascii "\200\204\001" ; DW_TAG_APPLE_property
+ .byte 0 ; DW_CHILDREN_no
+ .ascii "\350\177" ; DW_AT_APPLE_property_name
+ .byte 37 ; DW_FORM_strx1
+ .byte 0 ; EOM(1)
+ .byte 0 ; EOM(2)
+ .byte 4 ; Abbreviation Code
+ .ascii "\200\204\001" ; DW_TAG_APPLE_property
+ .byte 0 ; DW_CHILDREN_no
+ .byte 0 ; EOM(1)
+ .byte 0 ; EOM(2)
+ .byte 5 ; Abbreviation Code
+ .ascii "\200\204\001" ; DW_TAG_APPLE_property
+ .byte 0 ; DW_CHILDREN_no
+ .ascii "\350\177" ; DW_AT_APPLE_property_name
+ .byte 14 ; DW_FORM_strp
+ .byte 0 ; EOM(1)
+ .byte 0 ; EOM(2)
+ .byte 6 ; Abbreviation Code
+ .byte 13 ; DW_TAG_member
+ .byte 0 ; DW_CHILDREN_no
+ .byte 3 ; DW_AT_name
+ .byte 37 ; DW_FORM_strx1
+ .ascii "\355\177" ; DW_AT_APPLE_property
+ .byte 19 ; DW_FORM_ref4
+ .ascii "\355\177" ; DW_AT_APPLE_property
+ .byte 19 ; DW_FORM_ref4
+ .ascii "\355\177" ; DW_AT_APPLE_property
+ .byte 19 ; DW_FORM_ref4
+ .ascii "\355\177" ; DW_AT_APPLE_property
+ .byte 19 ; DW_FORM_ref4
+ .ascii "\355\177" ; DW_AT_APPLE_property
+ .byte 19 ; DW_FORM_ref4
+ .byte 0 ; EOM(1)
+ .byte 0 ; EOM(2)
+ .byte 0 ; EOM(3)
+ .section __DWARF,__debug_info,regular,debug
+Lsection_info:
+Lcu_begin0:
+Lset0 = Ldebug_info_end0-Ldebug_info_start0 ; Length of Unit
+ .long Lset0
+Ldebug_info_start0:
+ .short 5 ; DWARF version number
+ .byte 1 ; DWARF Unit Type
+ .byte 8 ; Address Size (in bytes)
+Lset1 = Lsection_abbrev-Lsection_abbrev ; Offset Into Abbrev. Section
+ .long Lset1
+ .byte 1 ; Abbrev [1] DW_TAG_compile_unit
+Lset2 = Lstr_offsets_base0-Lsection_str_off ; DW_AT_str_offsets_base
+ .long Lset2
+ .byte 2 ; Abbrev [2] DW_TAG_structure_type
+ .byte 2 ; DW_AT_name
+ .byte 3 ; Abbrev [3] DW_TAG_APPLE_property
+ .byte 0 ; DW_AT_APPLE_property_name
+ .byte 4 ; Abbrev [4] DW_TAG_APPLE_property
+ .byte 5 ; Abbrev [5] DW_TAG_APPLE_property
+ .long 102 ; DW_AT_APPLE_property_name
+ .byte 6 ; Abbrev [6] DW_TAG_member
+ .byte 1 ; DW_AT_name
+ .long 19 ; DW_AT_APPLE_property
+ .long 21 ; DW_AT_APPLE_property
+ .long 17 ; DW_AT_APPLE_property
+ .long 0 ; DW_AT_APPLE_property
+ .long 22 ; DW_AT_APPLE_property
+ .byte 0 ; End Of Children Mark
+ .byte 0 ; End Of Children Mark
+Ldebug_info_end0:
+ .section __DWARF,__debug_str_offs,regular,debug
+Lsection_str_off:
+ .long 16 ; Length of String Offsets Set
+ .short 5
+ .short 0
+Lstr_offsets_base0:
+ .section __DWARF,__debug_str,regular,debug
+Linfo_string:
+ .asciz "autoSynthProp" ; string offset=0
+ .asciz "_var" ; string offset=14
+ .asciz "Foo" ; string offset=19
+ .section __DWARF,__debug_str_offs,regular,debug
+ .long 0
+ .long 14
+ .long 19
diff --git a/llvm/test/tools/llvm-ir2vec/output/reference_triplets.txt b/llvm/test/tools/llvm-ir2vec/output/reference_triplets.txt
index dfbac4c..141a56a 100644
--- a/llvm/test/tools/llvm-ir2vec/output/reference_triplets.txt
+++ b/llvm/test/tools/llvm-ir2vec/output/reference_triplets.txt
@@ -1,33 +1,33 @@
MAX_RELATION=4
-187 7072 1
-187 6968 2
+187 7051 1
+187 6948 2
187 187 0
-187 7072 1
-187 6969 2
+187 7051 1
+187 6949 2
187 10 0
-10 7072 1
-10 7072 2
-10 7072 3
-10 6961 4
+10 7051 1
+10 7051 2
+10 7051 3
+10 6941 4
10 187 0
-187 6952 1
-187 7072 2
-187 1555 0
-1555 6882 1
-1555 6952 2
-187 7072 1
-187 6968 2
+187 6932 1
+187 7051 2
+187 1543 0
+1543 6862 1
+1543 6932 2
+187 7051 1
+187 6948 2
187 187 0
-187 7072 1
-187 6969 2
+187 7051 1
+187 6949 2
187 601 0
-601 7072 1
-601 7072 2
-601 7072 3
-601 6961 4
+601 7051 1
+601 7051 2
+601 7051 3
+601 6941 4
601 187 0
-187 6952 1
-187 7072 2
-187 1555 0
-1555 6882 1
-1555 6952 2
+187 6932 1
+187 7051 2
+187 1543 0
+1543 6862 1
+1543 6932 2
diff --git a/llvm/test/tools/llvm-ir2vec/output/reference_x86_entities.txt b/llvm/test/tools/llvm-ir2vec/output/reference_x86_entities.txt
index dc436d1..dbbbbc7 100644
--- a/llvm/test/tools/llvm-ir2vec/output/reference_x86_entities.txt
+++ b/llvm/test/tools/llvm-ir2vec/output/reference_x86_entities.txt
@@ -1,4 +1,4 @@
-7173
+7151
AAA 0
AAD 1
AADD 2
@@ -1440,5735 +1440,5713 @@ PSUBWrm 1437
PSUBWrr 1438
PSWAPDrm 1439
PSWAPDrr 1440
-PT 1441
-PTCMMIMFP 1442
-PTCMMRLFP 1443
-PTCONJTCMMIMFP 1444
-PTCONJTFP 1445
-PTCVTROWD 1446
-PTCVTROWPS 1447
-PTDPBF 1448
-PTDPBHF 1449
-PTDPBSSD 1450
-PTDPBSSDV 1451
-PTDPBSUD 1452
-PTDPBSUDV 1453
-PTDPBUSD 1454
-PTDPBUSDV 1455
-PTDPBUUD 1456
-PTDPBUUDV 1457
-PTDPFP 1458
-PTDPHBF 1459
-PTDPHF 1460
-PTESTrm 1461
-PTESTrr 1462
-PTILELOADD 1463
-PTILELOADDRS 1464
-PTILELOADDRST 1465
-PTILELOADDRSV 1466
-PTILELOADDT 1467
-PTILELOADDV 1468
-PTILEMOVROWrre 1469
-PTILEMOVROWrreV 1470
-PTILEMOVROWrri 1471
-PTILEMOVROWrriV 1472
-PTILEPAIRLOAD 1473
-PTILEPAIRSTORE 1474
-PTILESTORED 1475
-PTILESTOREDV 1476
-PTILEZERO 1477
-PTILEZEROV 1478
-PTMMULTF 1479
-PTTCMMIMFP 1480
-PTTCMMRLFP 1481
-PTTDPBF 1482
-PTTDPFP 1483
-PTTMMULTF 1484
-PTTRANSPOSED 1485
-PTTRANSPOSEDV 1486
-PTWRITE 1487
-PTWRITEm 1488
-PTWRITEr 1489
-PUNPCKHBWrm 1490
-PUNPCKHBWrr 1491
-PUNPCKHDQrm 1492
-PUNPCKHDQrr 1493
-PUNPCKHQDQrm 1494
-PUNPCKHQDQrr 1495
-PUNPCKHWDrm 1496
-PUNPCKHWDrr 1497
-PUNPCKLBWrm 1498
-PUNPCKLBWrr 1499
-PUNPCKLDQrm 1500
-PUNPCKLDQrr 1501
-PUNPCKLQDQrm 1502
-PUNPCKLQDQrr 1503
-PUNPCKLWDrm 1504
-PUNPCKLWDrr 1505
-PUSH 1506
-PUSHA 1507
-PUSHCS 1508
-PUSHDS 1509
-PUSHES 1510
-PUSHF 1511
-PUSHFS 1512
-PUSHGS 1513
-PUSHP 1514
-PUSHSS 1515
-PVALIDATE 1516
-PXORrm 1517
-PXORrr 1518
-RCL 1519
-RCPPSm 1520
-RCPPSr 1521
-RCPSSm 1522
-RCPSSm_Int 1523
-RCPSSr 1524
-RCPSSr_Int 1525
-RCR 1526
-RDFLAGS 1527
-RDFSBASE 1528
-RDGSBASE 1529
-RDMSR 1530
-RDMSRLIST 1531
-RDMSRri 1532
-RDMSRri_EVEX 1533
-RDPID 1534
-RDPKRUr 1535
-RDPMC 1536
-RDPRU 1537
-RDRAND 1538
-RDSEED 1539
-RDSSPD 1540
-RDSSPQ 1541
-RDTSC 1542
-RDTSCP 1543
-REG_SEQUENCE 1544
-REPNE_PREFIX 1545
-REP_MOVSB 1546
-REP_MOVSD 1547
-REP_MOVSQ 1548
-REP_MOVSW 1549
-REP_PREFIX 1550
-REP_STOSB 1551
-REP_STOSD 1552
-REP_STOSQ 1553
-REP_STOSW 1554
-RET 1555
-RETI 1556
-REX 1557
-RMPADJUST 1558
-RMPQUERY 1559
-RMPUPDATE 1560
-ROL 1561
-ROR 1562
-RORX 1563
-ROUNDPDmi 1564
-ROUNDPDri 1565
-ROUNDPSmi 1566
-ROUNDPSri 1567
-ROUNDSDmi 1568
-ROUNDSDmi_Int 1569
-ROUNDSDri 1570
-ROUNDSDri_Int 1571
-ROUNDSSmi 1572
-ROUNDSSmi_Int 1573
-ROUNDSSri 1574
-ROUNDSSri_Int 1575
-RSM 1576
-RSQRTPSm 1577
-RSQRTPSr 1578
-RSQRTSSm 1579
-RSQRTSSm_Int 1580
-RSQRTSSr 1581
-RSQRTSSr_Int 1582
-RSTORSSP 1583
-SAHF 1584
-SALC 1585
-SAR 1586
-SARX 1587
-SAVEPREVSSP 1588
-SBB 1589
-SCASB 1590
-SCASL 1591
-SCASQ 1592
-SCASW 1593
-SEAMCALL 1594
-SEAMOPS 1595
-SEAMRET 1596
-SEG_ALLOCA 1597
-SEH_BeginEpilogue 1598
-SEH_EndEpilogue 1599
-SEH_EndPrologue 1600
-SEH_PushFrame 1601
-SEH_PushReg 1602
-SEH_SaveReg 1603
-SEH_SaveXMM 1604
-SEH_SetFrame 1605
-SEH_StackAlign 1606
-SEH_StackAlloc 1607
-SEH_UnwindV 1608
-SEH_UnwindVersion 1609
-SENDUIPI 1610
-SERIALIZE 1611
-SETB_C 1612
-SETCCm 1613
-SETCCm_EVEX 1614
-SETCCr 1615
-SETCCr_EVEX 1616
-SETSSBSY 1617
-SETZUCCm 1618
-SETZUCCr 1619
-SFENCE 1620
-SGDT 1621
-SHA 1622
-SHL 1623
-SHLD 1624
-SHLDROT 1625
-SHLX 1626
-SHR 1627
-SHRD 1628
-SHRDROT 1629
-SHRX 1630
-SHUFPDrmi 1631
-SHUFPDrri 1632
-SHUFPSrmi 1633
-SHUFPSrri 1634
-SIDT 1635
-SKINIT 1636
-SLDT 1637
-SLWPCB 1638
-SMSW 1639
-SQRTPDm 1640
-SQRTPDr 1641
-SQRTPSm 1642
-SQRTPSr 1643
-SQRTSDm 1644
-SQRTSDm_Int 1645
-SQRTSDr 1646
-SQRTSDr_Int 1647
-SQRTSSm 1648
-SQRTSSm_Int 1649
-SQRTSSr 1650
-SQRTSSr_Int 1651
-SQRT_F 1652
-SQRT_Fp 1653
-SS_PREFIX 1654
-STAC 1655
-STACKALLOC_W_PROBING 1656
-STACKMAP 1657
-STATEPOINT 1658
-STC 1659
-STD 1660
-STGI 1661
-STI 1662
-STMXCSR 1663
-STOSB 1664
-STOSL 1665
-STOSQ 1666
-STOSW 1667
-STR 1668
-STRm 1669
-STTILECFG 1670
-STTILECFG_EVEX 1671
-STUI 1672
-ST_F 1673
-ST_FP 1674
-ST_FPrr 1675
-ST_Fp 1676
-ST_FpP 1677
-ST_Frr 1678
-SUB 1679
-SUBPDrm 1680
-SUBPDrr 1681
-SUBPSrm 1682
-SUBPSrr 1683
-SUBREG_TO_REG 1684
-SUBR_F 1685
-SUBR_FI 1686
-SUBR_FPrST 1687
-SUBR_FST 1688
-SUBR_Fp 1689
-SUBR_FpI 1690
-SUBR_FrST 1691
-SUBSDrm 1692
-SUBSDrm_Int 1693
-SUBSDrr 1694
-SUBSDrr_Int 1695
-SUBSSrm 1696
-SUBSSrm_Int 1697
-SUBSSrr 1698
-SUBSSrr_Int 1699
-SUB_F 1700
-SUB_FI 1701
-SUB_FPrST 1702
-SUB_FST 1703
-SUB_Fp 1704
-SUB_FpI 1705
-SUB_FrST 1706
-SWAPGS 1707
-SYSCALL 1708
-SYSENTER 1709
-SYSEXIT 1710
-SYSRET 1711
-T 1712
-TAILJMPd 1713
-TAILJMPd_CC 1714
-TAILJMPm 1715
-TAILJMPr 1716
-TCMMIMFP 1717
-TCMMRLFP 1718
-TCONJTCMMIMFP 1719
-TCONJTFP 1720
-TCRETURN_HIPE 1721
-TCRETURN_WIN 1722
-TCRETURN_WINmi 1723
-TCRETURNdi 1724
-TCRETURNdicc 1725
-TCRETURNmi 1726
-TCRETURNri 1727
-TCVTROWD 1728
-TCVTROWPS 1729
-TDCALL 1730
-TDPBF 1731
-TDPBHF 1732
-TDPBSSD 1733
-TDPBSUD 1734
-TDPBUSD 1735
-TDPBUUD 1736
-TDPFP 1737
-TDPHBF 1738
-TDPHF 1739
-TEST 1740
-TESTUI 1741
-TILELOADD 1742
-TILELOADDRS 1743
-TILELOADDRST 1744
-TILELOADDRS_EVEX 1745
-TILELOADDT 1746
-TILELOADD_EVEX 1747
-TILEMOVROWrre 1748
-TILEMOVROWrri 1749
-TILERELEASE 1750
-TILESTORED 1751
-TILESTORED_EVEX 1752
-TILEZERO 1753
-TLBSYNC 1754
-TLSCall 1755
-TLS_addr 1756
-TLS_addrX 1757
-TLS_base_addr 1758
-TLS_base_addrX 1759
-TLS_desc 1760
-TMMULTF 1761
-TPAUSE 1762
-TRAP 1763
-TST_F 1764
-TST_Fp 1765
-TTCMMIMFP 1766
-TTCMMRLFP 1767
-TTDPBF 1768
-TTDPFP 1769
-TTMMULTF 1770
-TTRANSPOSED 1771
-TZCNT 1772
-TZMSK 1773
-UBSAN_UD 1774
-UCOMISDrm 1775
-UCOMISDrm_Int 1776
-UCOMISDrr 1777
-UCOMISDrr_Int 1778
-UCOMISSrm 1779
-UCOMISSrm_Int 1780
-UCOMISSrr 1781
-UCOMISSrr_Int 1782
-UCOM_FIPr 1783
-UCOM_FIr 1784
-UCOM_FPPr 1785
-UCOM_FPr 1786
-UCOM_FpIr 1787
-UCOM_Fpr 1788
-UCOM_Fr 1789
-UD 1790
-UIRET 1791
-UMONITOR 1792
-UMWAIT 1793
-UNPCKHPDrm 1794
-UNPCKHPDrr 1795
-UNPCKHPSrm 1796
-UNPCKHPSrr 1797
-UNPCKLPDrm 1798
-UNPCKLPDrr 1799
-UNPCKLPSrm 1800
-UNPCKLPSrr 1801
-URDMSRri 1802
-URDMSRri_EVEX 1803
-URDMSRrr 1804
-URDMSRrr_EVEX 1805
-UWRMSRir 1806
-UWRMSRir_EVEX 1807
-UWRMSRrr 1808
-UWRMSRrr_EVEX 1809
-V 1810
-VAARG 1811
-VAARG_X 1812
-VADDBF 1813
-VADDPDYrm 1814
-VADDPDYrr 1815
-VADDPDZ 1816
-VADDPDZrm 1817
-VADDPDZrmb 1818
-VADDPDZrmbk 1819
-VADDPDZrmbkz 1820
-VADDPDZrmk 1821
-VADDPDZrmkz 1822
-VADDPDZrr 1823
-VADDPDZrrb 1824
-VADDPDZrrbk 1825
-VADDPDZrrbkz 1826
-VADDPDZrrk 1827
-VADDPDZrrkz 1828
-VADDPDrm 1829
-VADDPDrr 1830
-VADDPHZ 1831
-VADDPHZrm 1832
-VADDPHZrmb 1833
-VADDPHZrmbk 1834
-VADDPHZrmbkz 1835
-VADDPHZrmk 1836
-VADDPHZrmkz 1837
-VADDPHZrr 1838
-VADDPHZrrb 1839
-VADDPHZrrbk 1840
-VADDPHZrrbkz 1841
-VADDPHZrrk 1842
-VADDPHZrrkz 1843
-VADDPSYrm 1844
-VADDPSYrr 1845
-VADDPSZ 1846
-VADDPSZrm 1847
-VADDPSZrmb 1848
-VADDPSZrmbk 1849
-VADDPSZrmbkz 1850
-VADDPSZrmk 1851
-VADDPSZrmkz 1852
-VADDPSZrr 1853
-VADDPSZrrb 1854
-VADDPSZrrbk 1855
-VADDPSZrrbkz 1856
-VADDPSZrrk 1857
-VADDPSZrrkz 1858
-VADDPSrm 1859
-VADDPSrr 1860
-VADDSDZrm 1861
-VADDSDZrm_Int 1862
-VADDSDZrmk_Int 1863
-VADDSDZrmkz_Int 1864
-VADDSDZrr 1865
-VADDSDZrr_Int 1866
-VADDSDZrrb_Int 1867
-VADDSDZrrbk_Int 1868
-VADDSDZrrbkz_Int 1869
-VADDSDZrrk_Int 1870
-VADDSDZrrkz_Int 1871
-VADDSDrm 1872
-VADDSDrm_Int 1873
-VADDSDrr 1874
-VADDSDrr_Int 1875
-VADDSHZrm 1876
-VADDSHZrm_Int 1877
-VADDSHZrmk_Int 1878
-VADDSHZrmkz_Int 1879
-VADDSHZrr 1880
-VADDSHZrr_Int 1881
-VADDSHZrrb_Int 1882
-VADDSHZrrbk_Int 1883
-VADDSHZrrbkz_Int 1884
-VADDSHZrrk_Int 1885
-VADDSHZrrkz_Int 1886
-VADDSSZrm 1887
-VADDSSZrm_Int 1888
-VADDSSZrmk_Int 1889
-VADDSSZrmkz_Int 1890
-VADDSSZrr 1891
-VADDSSZrr_Int 1892
-VADDSSZrrb_Int 1893
-VADDSSZrrbk_Int 1894
-VADDSSZrrbkz_Int 1895
-VADDSSZrrk_Int 1896
-VADDSSZrrkz_Int 1897
-VADDSSrm 1898
-VADDSSrm_Int 1899
-VADDSSrr 1900
-VADDSSrr_Int 1901
-VADDSUBPDYrm 1902
-VADDSUBPDYrr 1903
-VADDSUBPDrm 1904
-VADDSUBPDrr 1905
-VADDSUBPSYrm 1906
-VADDSUBPSYrr 1907
-VADDSUBPSrm 1908
-VADDSUBPSrr 1909
-VAESDECLASTYrm 1910
-VAESDECLASTYrr 1911
-VAESDECLASTZ 1912
-VAESDECLASTZrm 1913
-VAESDECLASTZrr 1914
-VAESDECLASTrm 1915
-VAESDECLASTrr 1916
-VAESDECYrm 1917
-VAESDECYrr 1918
-VAESDECZ 1919
-VAESDECZrm 1920
-VAESDECZrr 1921
-VAESDECrm 1922
-VAESDECrr 1923
-VAESENCLASTYrm 1924
-VAESENCLASTYrr 1925
-VAESENCLASTZ 1926
-VAESENCLASTZrm 1927
-VAESENCLASTZrr 1928
-VAESENCLASTrm 1929
-VAESENCLASTrr 1930
-VAESENCYrm 1931
-VAESENCYrr 1932
-VAESENCZ 1933
-VAESENCZrm 1934
-VAESENCZrr 1935
-VAESENCrm 1936
-VAESENCrr 1937
-VAESIMCrm 1938
-VAESIMCrr 1939
-VAESKEYGENASSISTrmi 1940
-VAESKEYGENASSISTrri 1941
-VALIGNDZ 1942
-VALIGNDZrmbi 1943
-VALIGNDZrmbik 1944
-VALIGNDZrmbikz 1945
-VALIGNDZrmi 1946
-VALIGNDZrmik 1947
-VALIGNDZrmikz 1948
-VALIGNDZrri 1949
-VALIGNDZrrik 1950
-VALIGNDZrrikz 1951
-VALIGNQZ 1952
-VALIGNQZrmbi 1953
-VALIGNQZrmbik 1954
-VALIGNQZrmbikz 1955
-VALIGNQZrmi 1956
-VALIGNQZrmik 1957
-VALIGNQZrmikz 1958
-VALIGNQZrri 1959
-VALIGNQZrrik 1960
-VALIGNQZrrikz 1961
-VANDNPDYrm 1962
-VANDNPDYrr 1963
-VANDNPDZ 1964
-VANDNPDZrm 1965
-VANDNPDZrmb 1966
-VANDNPDZrmbk 1967
-VANDNPDZrmbkz 1968
-VANDNPDZrmk 1969
-VANDNPDZrmkz 1970
-VANDNPDZrr 1971
-VANDNPDZrrk 1972
-VANDNPDZrrkz 1973
-VANDNPDrm 1974
-VANDNPDrr 1975
-VANDNPSYrm 1976
-VANDNPSYrr 1977
-VANDNPSZ 1978
-VANDNPSZrm 1979
-VANDNPSZrmb 1980
-VANDNPSZrmbk 1981
-VANDNPSZrmbkz 1982
-VANDNPSZrmk 1983
-VANDNPSZrmkz 1984
-VANDNPSZrr 1985
-VANDNPSZrrk 1986
-VANDNPSZrrkz 1987
-VANDNPSrm 1988
-VANDNPSrr 1989
-VANDPDYrm 1990
-VANDPDYrr 1991
-VANDPDZ 1992
-VANDPDZrm 1993
-VANDPDZrmb 1994
-VANDPDZrmbk 1995
-VANDPDZrmbkz 1996
-VANDPDZrmk 1997
-VANDPDZrmkz 1998
-VANDPDZrr 1999
-VANDPDZrrk 2000
-VANDPDZrrkz 2001
-VANDPDrm 2002
-VANDPDrr 2003
-VANDPSYrm 2004
-VANDPSYrr 2005
-VANDPSZ 2006
-VANDPSZrm 2007
-VANDPSZrmb 2008
-VANDPSZrmbk 2009
-VANDPSZrmbkz 2010
-VANDPSZrmk 2011
-VANDPSZrmkz 2012
-VANDPSZrr 2013
-VANDPSZrrk 2014
-VANDPSZrrkz 2015
-VANDPSrm 2016
-VANDPSrr 2017
-VASTART_SAVE_XMM_REGS 2018
-VBCSTNEBF 2019
-VBCSTNESH 2020
-VBLENDMPDZ 2021
-VBLENDMPDZrm 2022
-VBLENDMPDZrmb 2023
-VBLENDMPDZrmbk 2024
-VBLENDMPDZrmbkz 2025
-VBLENDMPDZrmk 2026
-VBLENDMPDZrmkz 2027
-VBLENDMPDZrr 2028
-VBLENDMPDZrrk 2029
-VBLENDMPDZrrkz 2030
-VBLENDMPSZ 2031
-VBLENDMPSZrm 2032
-VBLENDMPSZrmb 2033
-VBLENDMPSZrmbk 2034
-VBLENDMPSZrmbkz 2035
-VBLENDMPSZrmk 2036
-VBLENDMPSZrmkz 2037
-VBLENDMPSZrr 2038
-VBLENDMPSZrrk 2039
-VBLENDMPSZrrkz 2040
-VBLENDPDYrmi 2041
-VBLENDPDYrri 2042
-VBLENDPDrmi 2043
-VBLENDPDrri 2044
-VBLENDPSYrmi 2045
-VBLENDPSYrri 2046
-VBLENDPSrmi 2047
-VBLENDPSrri 2048
-VBLENDVPDYrmr 2049
-VBLENDVPDYrrr 2050
-VBLENDVPDrmr 2051
-VBLENDVPDrrr 2052
-VBLENDVPSYrmr 2053
-VBLENDVPSYrrr 2054
-VBLENDVPSrmr 2055
-VBLENDVPSrrr 2056
-VBROADCASTF 2057
-VBROADCASTI 2058
-VBROADCASTSDYrm 2059
-VBROADCASTSDYrr 2060
-VBROADCASTSDZ 2061
-VBROADCASTSDZrm 2062
-VBROADCASTSDZrmk 2063
-VBROADCASTSDZrmkz 2064
-VBROADCASTSDZrr 2065
-VBROADCASTSDZrrk 2066
-VBROADCASTSDZrrkz 2067
-VBROADCASTSSYrm 2068
-VBROADCASTSSYrr 2069
-VBROADCASTSSZ 2070
-VBROADCASTSSZrm 2071
-VBROADCASTSSZrmk 2072
-VBROADCASTSSZrmkz 2073
-VBROADCASTSSZrr 2074
-VBROADCASTSSZrrk 2075
-VBROADCASTSSZrrkz 2076
-VBROADCASTSSrm 2077
-VBROADCASTSSrr 2078
-VCMPBF 2079
-VCMPPDYrmi 2080
-VCMPPDYrri 2081
-VCMPPDZ 2082
-VCMPPDZrmbi 2083
-VCMPPDZrmbik 2084
-VCMPPDZrmi 2085
-VCMPPDZrmik 2086
-VCMPPDZrri 2087
-VCMPPDZrrib 2088
-VCMPPDZrribk 2089
-VCMPPDZrrik 2090
-VCMPPDrmi 2091
-VCMPPDrri 2092
-VCMPPHZ 2093
-VCMPPHZrmbi 2094
-VCMPPHZrmbik 2095
-VCMPPHZrmi 2096
-VCMPPHZrmik 2097
-VCMPPHZrri 2098
-VCMPPHZrrib 2099
-VCMPPHZrribk 2100
-VCMPPHZrrik 2101
-VCMPPSYrmi 2102
-VCMPPSYrri 2103
-VCMPPSZ 2104
-VCMPPSZrmbi 2105
-VCMPPSZrmbik 2106
-VCMPPSZrmi 2107
-VCMPPSZrmik 2108
-VCMPPSZrri 2109
-VCMPPSZrrib 2110
-VCMPPSZrribk 2111
-VCMPPSZrrik 2112
-VCMPPSrmi 2113
-VCMPPSrri 2114
-VCMPSDZrmi 2115
-VCMPSDZrmi_Int 2116
-VCMPSDZrmik_Int 2117
-VCMPSDZrri 2118
-VCMPSDZrri_Int 2119
-VCMPSDZrrib_Int 2120
-VCMPSDZrribk_Int 2121
-VCMPSDZrrik_Int 2122
-VCMPSDrmi 2123
-VCMPSDrmi_Int 2124
-VCMPSDrri 2125
-VCMPSDrri_Int 2126
-VCMPSHZrmi 2127
-VCMPSHZrmi_Int 2128
-VCMPSHZrmik_Int 2129
-VCMPSHZrri 2130
-VCMPSHZrri_Int 2131
-VCMPSHZrrib_Int 2132
-VCMPSHZrribk_Int 2133
-VCMPSHZrrik_Int 2134
-VCMPSSZrmi 2135
-VCMPSSZrmi_Int 2136
-VCMPSSZrmik_Int 2137
-VCMPSSZrri 2138
-VCMPSSZrri_Int 2139
-VCMPSSZrrib_Int 2140
-VCMPSSZrribk_Int 2141
-VCMPSSZrrik_Int 2142
-VCMPSSrmi 2143
-VCMPSSrmi_Int 2144
-VCMPSSrri 2145
-VCMPSSrri_Int 2146
-VCOMISBF 2147
-VCOMISDZrm 2148
-VCOMISDZrm_Int 2149
-VCOMISDZrr 2150
-VCOMISDZrr_Int 2151
-VCOMISDZrrb 2152
-VCOMISDrm 2153
-VCOMISDrm_Int 2154
-VCOMISDrr 2155
-VCOMISDrr_Int 2156
-VCOMISHZrm 2157
-VCOMISHZrm_Int 2158
-VCOMISHZrr 2159
-VCOMISHZrr_Int 2160
-VCOMISHZrrb 2161
-VCOMISSZrm 2162
-VCOMISSZrm_Int 2163
-VCOMISSZrr 2164
-VCOMISSZrr_Int 2165
-VCOMISSZrrb 2166
-VCOMISSrm 2167
-VCOMISSrm_Int 2168
-VCOMISSrr 2169
-VCOMISSrr_Int 2170
-VCOMPRESSPDZ 2171
-VCOMPRESSPDZmr 2172
-VCOMPRESSPDZmrk 2173
-VCOMPRESSPDZrr 2174
-VCOMPRESSPDZrrk 2175
-VCOMPRESSPDZrrkz 2176
-VCOMPRESSPSZ 2177
-VCOMPRESSPSZmr 2178
-VCOMPRESSPSZmrk 2179
-VCOMPRESSPSZrr 2180
-VCOMPRESSPSZrrk 2181
-VCOMPRESSPSZrrkz 2182
-VCOMXSDZrm_Int 2183
-VCOMXSDZrr_Int 2184
-VCOMXSDZrrb_Int 2185
-VCOMXSHZrm_Int 2186
-VCOMXSHZrr_Int 2187
-VCOMXSHZrrb_Int 2188
-VCOMXSSZrm_Int 2189
-VCOMXSSZrr_Int 2190
-VCOMXSSZrrb_Int 2191
-VCVT 2192
-VCVTBF 2193
-VCVTBIASPH 2194
-VCVTDQ 2195
-VCVTHF 2196
-VCVTNE 2197
-VCVTNEEBF 2198
-VCVTNEEPH 2199
-VCVTNEOBF 2200
-VCVTNEOPH 2201
-VCVTNEPS 2202
-VCVTPD 2203
-VCVTPH 2204
-VCVTPS 2205
-VCVTQQ 2206
-VCVTSD 2207
-VCVTSH 2208
-VCVTSI 2209
-VCVTSS 2210
-VCVTTBF 2211
-VCVTTPD 2212
-VCVTTPH 2213
-VCVTTPS 2214
-VCVTTSD 2215
-VCVTTSH 2216
-VCVTTSS 2217
-VCVTUDQ 2218
-VCVTUQQ 2219
-VCVTUSI 2220
-VCVTUW 2221
-VCVTW 2222
-VDBPSADBWZ 2223
-VDBPSADBWZrmi 2224
-VDBPSADBWZrmik 2225
-VDBPSADBWZrmikz 2226
-VDBPSADBWZrri 2227
-VDBPSADBWZrrik 2228
-VDBPSADBWZrrikz 2229
-VDIVBF 2230
-VDIVPDYrm 2231
-VDIVPDYrr 2232
-VDIVPDZ 2233
-VDIVPDZrm 2234
-VDIVPDZrmb 2235
-VDIVPDZrmbk 2236
-VDIVPDZrmbkz 2237
-VDIVPDZrmk 2238
-VDIVPDZrmkz 2239
-VDIVPDZrr 2240
-VDIVPDZrrb 2241
-VDIVPDZrrbk 2242
-VDIVPDZrrbkz 2243
-VDIVPDZrrk 2244
-VDIVPDZrrkz 2245
-VDIVPDrm 2246
-VDIVPDrr 2247
-VDIVPHZ 2248
-VDIVPHZrm 2249
-VDIVPHZrmb 2250
-VDIVPHZrmbk 2251
-VDIVPHZrmbkz 2252
-VDIVPHZrmk 2253
-VDIVPHZrmkz 2254
-VDIVPHZrr 2255
-VDIVPHZrrb 2256
-VDIVPHZrrbk 2257
-VDIVPHZrrbkz 2258
-VDIVPHZrrk 2259
-VDIVPHZrrkz 2260
-VDIVPSYrm 2261
-VDIVPSYrr 2262
-VDIVPSZ 2263
-VDIVPSZrm 2264
-VDIVPSZrmb 2265
-VDIVPSZrmbk 2266
-VDIVPSZrmbkz 2267
-VDIVPSZrmk 2268
-VDIVPSZrmkz 2269
-VDIVPSZrr 2270
-VDIVPSZrrb 2271
-VDIVPSZrrbk 2272
-VDIVPSZrrbkz 2273
-VDIVPSZrrk 2274
-VDIVPSZrrkz 2275
-VDIVPSrm 2276
-VDIVPSrr 2277
-VDIVSDZrm 2278
-VDIVSDZrm_Int 2279
-VDIVSDZrmk_Int 2280
-VDIVSDZrmkz_Int 2281
-VDIVSDZrr 2282
-VDIVSDZrr_Int 2283
-VDIVSDZrrb_Int 2284
-VDIVSDZrrbk_Int 2285
-VDIVSDZrrbkz_Int 2286
-VDIVSDZrrk_Int 2287
-VDIVSDZrrkz_Int 2288
-VDIVSDrm 2289
-VDIVSDrm_Int 2290
-VDIVSDrr 2291
-VDIVSDrr_Int 2292
-VDIVSHZrm 2293
-VDIVSHZrm_Int 2294
-VDIVSHZrmk_Int 2295
-VDIVSHZrmkz_Int 2296
-VDIVSHZrr 2297
-VDIVSHZrr_Int 2298
-VDIVSHZrrb_Int 2299
-VDIVSHZrrbk_Int 2300
-VDIVSHZrrbkz_Int 2301
-VDIVSHZrrk_Int 2302
-VDIVSHZrrkz_Int 2303
-VDIVSSZrm 2304
-VDIVSSZrm_Int 2305
-VDIVSSZrmk_Int 2306
-VDIVSSZrmkz_Int 2307
-VDIVSSZrr 2308
-VDIVSSZrr_Int 2309
-VDIVSSZrrb_Int 2310
-VDIVSSZrrbk_Int 2311
-VDIVSSZrrbkz_Int 2312
-VDIVSSZrrk_Int 2313
-VDIVSSZrrkz_Int 2314
-VDIVSSrm 2315
-VDIVSSrm_Int 2316
-VDIVSSrr 2317
-VDIVSSrr_Int 2318
-VDPBF 2319
-VDPPDrmi 2320
-VDPPDrri 2321
-VDPPHPSZ 2322
-VDPPHPSZm 2323
-VDPPHPSZmb 2324
-VDPPHPSZmbk 2325
-VDPPHPSZmbkz 2326
-VDPPHPSZmk 2327
-VDPPHPSZmkz 2328
-VDPPHPSZr 2329
-VDPPHPSZrk 2330
-VDPPHPSZrkz 2331
-VDPPSYrmi 2332
-VDPPSYrri 2333
-VDPPSrmi 2334
-VDPPSrri 2335
-VERRm 2336
-VERRr 2337
-VERWm 2338
-VERWr 2339
-VEXP 2340
-VEXPANDPDZ 2341
-VEXPANDPDZrm 2342
-VEXPANDPDZrmk 2343
-VEXPANDPDZrmkz 2344
-VEXPANDPDZrr 2345
-VEXPANDPDZrrk 2346
-VEXPANDPDZrrkz 2347
-VEXPANDPSZ 2348
-VEXPANDPSZrm 2349
-VEXPANDPSZrmk 2350
-VEXPANDPSZrmkz 2351
-VEXPANDPSZrr 2352
-VEXPANDPSZrrk 2353
-VEXPANDPSZrrkz 2354
-VEXTRACTF 2355
-VEXTRACTI 2356
-VEXTRACTPSZmri 2357
-VEXTRACTPSZrri 2358
-VEXTRACTPSmri 2359
-VEXTRACTPSrri 2360
-VFCMADDCPHZ 2361
-VFCMADDCPHZm 2362
-VFCMADDCPHZmb 2363
-VFCMADDCPHZmbk 2364
-VFCMADDCPHZmbkz 2365
-VFCMADDCPHZmk 2366
-VFCMADDCPHZmkz 2367
-VFCMADDCPHZr 2368
-VFCMADDCPHZrb 2369
-VFCMADDCPHZrbk 2370
-VFCMADDCPHZrbkz 2371
-VFCMADDCPHZrk 2372
-VFCMADDCPHZrkz 2373
-VFCMADDCSHZm 2374
-VFCMADDCSHZmk 2375
-VFCMADDCSHZmkz 2376
-VFCMADDCSHZr 2377
-VFCMADDCSHZrb 2378
-VFCMADDCSHZrbk 2379
-VFCMADDCSHZrbkz 2380
-VFCMADDCSHZrk 2381
-VFCMADDCSHZrkz 2382
-VFCMULCPHZ 2383
-VFCMULCPHZrm 2384
-VFCMULCPHZrmb 2385
-VFCMULCPHZrmbk 2386
-VFCMULCPHZrmbkz 2387
-VFCMULCPHZrmk 2388
-VFCMULCPHZrmkz 2389
-VFCMULCPHZrr 2390
-VFCMULCPHZrrb 2391
-VFCMULCPHZrrbk 2392
-VFCMULCPHZrrbkz 2393
-VFCMULCPHZrrk 2394
-VFCMULCPHZrrkz 2395
-VFCMULCSHZrm 2396
-VFCMULCSHZrmk 2397
-VFCMULCSHZrmkz 2398
-VFCMULCSHZrr 2399
-VFCMULCSHZrrb 2400
-VFCMULCSHZrrbk 2401
-VFCMULCSHZrrbkz 2402
-VFCMULCSHZrrk 2403
-VFCMULCSHZrrkz 2404
-VFIXUPIMMPDZ 2405
-VFIXUPIMMPDZrmbi 2406
-VFIXUPIMMPDZrmbik 2407
-VFIXUPIMMPDZrmbikz 2408
-VFIXUPIMMPDZrmi 2409
-VFIXUPIMMPDZrmik 2410
-VFIXUPIMMPDZrmikz 2411
-VFIXUPIMMPDZrri 2412
-VFIXUPIMMPDZrrib 2413
-VFIXUPIMMPDZrribk 2414
-VFIXUPIMMPDZrribkz 2415
-VFIXUPIMMPDZrrik 2416
-VFIXUPIMMPDZrrikz 2417
-VFIXUPIMMPSZ 2418
-VFIXUPIMMPSZrmbi 2419
-VFIXUPIMMPSZrmbik 2420
-VFIXUPIMMPSZrmbikz 2421
-VFIXUPIMMPSZrmi 2422
-VFIXUPIMMPSZrmik 2423
-VFIXUPIMMPSZrmikz 2424
-VFIXUPIMMPSZrri 2425
-VFIXUPIMMPSZrrib 2426
-VFIXUPIMMPSZrribk 2427
-VFIXUPIMMPSZrribkz 2428
-VFIXUPIMMPSZrrik 2429
-VFIXUPIMMPSZrrikz 2430
-VFIXUPIMMSDZrmi 2431
-VFIXUPIMMSDZrmik 2432
-VFIXUPIMMSDZrmikz 2433
-VFIXUPIMMSDZrri 2434
-VFIXUPIMMSDZrrib 2435
-VFIXUPIMMSDZrribk 2436
-VFIXUPIMMSDZrribkz 2437
-VFIXUPIMMSDZrrik 2438
-VFIXUPIMMSDZrrikz 2439
-VFIXUPIMMSSZrmi 2440
-VFIXUPIMMSSZrmik 2441
-VFIXUPIMMSSZrmikz 2442
-VFIXUPIMMSSZrri 2443
-VFIXUPIMMSSZrrib 2444
-VFIXUPIMMSSZrribk 2445
-VFIXUPIMMSSZrribkz 2446
-VFIXUPIMMSSZrrik 2447
-VFIXUPIMMSSZrrikz 2448
-VFMADD 2449
-VFMADDCPHZ 2450
-VFMADDCPHZm 2451
-VFMADDCPHZmb 2452
-VFMADDCPHZmbk 2453
-VFMADDCPHZmbkz 2454
-VFMADDCPHZmk 2455
-VFMADDCPHZmkz 2456
-VFMADDCPHZr 2457
-VFMADDCPHZrb 2458
-VFMADDCPHZrbk 2459
-VFMADDCPHZrbkz 2460
-VFMADDCPHZrk 2461
-VFMADDCPHZrkz 2462
-VFMADDCSHZm 2463
-VFMADDCSHZmk 2464
-VFMADDCSHZmkz 2465
-VFMADDCSHZr 2466
-VFMADDCSHZrb 2467
-VFMADDCSHZrbk 2468
-VFMADDCSHZrbkz 2469
-VFMADDCSHZrk 2470
-VFMADDCSHZrkz 2471
-VFMADDPD 2472
-VFMADDPS 2473
-VFMADDSD 2474
-VFMADDSS 2475
-VFMADDSUB 2476
-VFMADDSUBPD 2477
-VFMADDSUBPS 2478
-VFMSUB 2479
-VFMSUBADD 2480
-VFMSUBADDPD 2481
-VFMSUBADDPS 2482
-VFMSUBPD 2483
-VFMSUBPS 2484
-VFMSUBSD 2485
-VFMSUBSS 2486
-VFMULCPHZ 2487
-VFMULCPHZrm 2488
-VFMULCPHZrmb 2489
-VFMULCPHZrmbk 2490
-VFMULCPHZrmbkz 2491
-VFMULCPHZrmk 2492
-VFMULCPHZrmkz 2493
-VFMULCPHZrr 2494
-VFMULCPHZrrb 2495
-VFMULCPHZrrbk 2496
-VFMULCPHZrrbkz 2497
-VFMULCPHZrrk 2498
-VFMULCPHZrrkz 2499
-VFMULCSHZrm 2500
-VFMULCSHZrmk 2501
-VFMULCSHZrmkz 2502
-VFMULCSHZrr 2503
-VFMULCSHZrrb 2504
-VFMULCSHZrrbk 2505
-VFMULCSHZrrbkz 2506
-VFMULCSHZrrk 2507
-VFMULCSHZrrkz 2508
-VFNMADD 2509
-VFNMADDPD 2510
-VFNMADDPS 2511
-VFNMADDSD 2512
-VFNMADDSS 2513
-VFNMSUB 2514
-VFNMSUBPD 2515
-VFNMSUBPS 2516
-VFNMSUBSD 2517
-VFNMSUBSS 2518
-VFPCLASSBF 2519
-VFPCLASSPDZ 2520
-VFPCLASSPDZmbi 2521
-VFPCLASSPDZmbik 2522
-VFPCLASSPDZmi 2523
-VFPCLASSPDZmik 2524
-VFPCLASSPDZri 2525
-VFPCLASSPDZrik 2526
-VFPCLASSPHZ 2527
-VFPCLASSPHZmbi 2528
-VFPCLASSPHZmbik 2529
-VFPCLASSPHZmi 2530
-VFPCLASSPHZmik 2531
-VFPCLASSPHZri 2532
-VFPCLASSPHZrik 2533
-VFPCLASSPSZ 2534
-VFPCLASSPSZmbi 2535
-VFPCLASSPSZmbik 2536
-VFPCLASSPSZmi 2537
-VFPCLASSPSZmik 2538
-VFPCLASSPSZri 2539
-VFPCLASSPSZrik 2540
-VFPCLASSSDZmi 2541
-VFPCLASSSDZmik 2542
-VFPCLASSSDZri 2543
-VFPCLASSSDZrik 2544
-VFPCLASSSHZmi 2545
-VFPCLASSSHZmik 2546
-VFPCLASSSHZri 2547
-VFPCLASSSHZrik 2548
-VFPCLASSSSZmi 2549
-VFPCLASSSSZmik 2550
-VFPCLASSSSZri 2551
-VFPCLASSSSZrik 2552
-VFRCZPDYrm 2553
-VFRCZPDYrr 2554
-VFRCZPDrm 2555
-VFRCZPDrr 2556
-VFRCZPSYrm 2557
-VFRCZPSYrr 2558
-VFRCZPSrm 2559
-VFRCZPSrr 2560
-VFRCZSDrm 2561
-VFRCZSDrr 2562
-VFRCZSSrm 2563
-VFRCZSSrr 2564
-VGATHERDPDYrm 2565
-VGATHERDPDZ 2566
-VGATHERDPDZrm 2567
-VGATHERDPDrm 2568
-VGATHERDPSYrm 2569
-VGATHERDPSZ 2570
-VGATHERDPSZrm 2571
-VGATHERDPSrm 2572
-VGATHERPF 2573
-VGATHERQPDYrm 2574
-VGATHERQPDZ 2575
-VGATHERQPDZrm 2576
-VGATHERQPDrm 2577
-VGATHERQPSYrm 2578
-VGATHERQPSZ 2579
-VGATHERQPSZrm 2580
-VGATHERQPSrm 2581
-VGETEXPBF 2582
-VGETEXPPDZ 2583
-VGETEXPPDZm 2584
-VGETEXPPDZmb 2585
-VGETEXPPDZmbk 2586
-VGETEXPPDZmbkz 2587
-VGETEXPPDZmk 2588
-VGETEXPPDZmkz 2589
-VGETEXPPDZr 2590
-VGETEXPPDZrb 2591
-VGETEXPPDZrbk 2592
-VGETEXPPDZrbkz 2593
-VGETEXPPDZrk 2594
-VGETEXPPDZrkz 2595
-VGETEXPPHZ 2596
-VGETEXPPHZm 2597
-VGETEXPPHZmb 2598
-VGETEXPPHZmbk 2599
-VGETEXPPHZmbkz 2600
-VGETEXPPHZmk 2601
-VGETEXPPHZmkz 2602
-VGETEXPPHZr 2603
-VGETEXPPHZrb 2604
-VGETEXPPHZrbk 2605
-VGETEXPPHZrbkz 2606
-VGETEXPPHZrk 2607
-VGETEXPPHZrkz 2608
-VGETEXPPSZ 2609
-VGETEXPPSZm 2610
-VGETEXPPSZmb 2611
-VGETEXPPSZmbk 2612
-VGETEXPPSZmbkz 2613
-VGETEXPPSZmk 2614
-VGETEXPPSZmkz 2615
-VGETEXPPSZr 2616
-VGETEXPPSZrb 2617
-VGETEXPPSZrbk 2618
-VGETEXPPSZrbkz 2619
-VGETEXPPSZrk 2620
-VGETEXPPSZrkz 2621
-VGETEXPSDZm 2622
-VGETEXPSDZmk 2623
-VGETEXPSDZmkz 2624
-VGETEXPSDZr 2625
-VGETEXPSDZrb 2626
-VGETEXPSDZrbk 2627
-VGETEXPSDZrbkz 2628
-VGETEXPSDZrk 2629
-VGETEXPSDZrkz 2630
-VGETEXPSHZm 2631
-VGETEXPSHZmk 2632
-VGETEXPSHZmkz 2633
-VGETEXPSHZr 2634
-VGETEXPSHZrb 2635
-VGETEXPSHZrbk 2636
-VGETEXPSHZrbkz 2637
-VGETEXPSHZrk 2638
-VGETEXPSHZrkz 2639
-VGETEXPSSZm 2640
-VGETEXPSSZmk 2641
-VGETEXPSSZmkz 2642
-VGETEXPSSZr 2643
-VGETEXPSSZrb 2644
-VGETEXPSSZrbk 2645
-VGETEXPSSZrbkz 2646
-VGETEXPSSZrk 2647
-VGETEXPSSZrkz 2648
-VGETMANTBF 2649
-VGETMANTPDZ 2650
-VGETMANTPDZrmbi 2651
-VGETMANTPDZrmbik 2652
-VGETMANTPDZrmbikz 2653
-VGETMANTPDZrmi 2654
-VGETMANTPDZrmik 2655
-VGETMANTPDZrmikz 2656
-VGETMANTPDZrri 2657
-VGETMANTPDZrrib 2658
-VGETMANTPDZrribk 2659
-VGETMANTPDZrribkz 2660
-VGETMANTPDZrrik 2661
-VGETMANTPDZrrikz 2662
-VGETMANTPHZ 2663
-VGETMANTPHZrmbi 2664
-VGETMANTPHZrmbik 2665
-VGETMANTPHZrmbikz 2666
-VGETMANTPHZrmi 2667
-VGETMANTPHZrmik 2668
-VGETMANTPHZrmikz 2669
-VGETMANTPHZrri 2670
-VGETMANTPHZrrib 2671
-VGETMANTPHZrribk 2672
-VGETMANTPHZrribkz 2673
-VGETMANTPHZrrik 2674
-VGETMANTPHZrrikz 2675
-VGETMANTPSZ 2676
-VGETMANTPSZrmbi 2677
-VGETMANTPSZrmbik 2678
-VGETMANTPSZrmbikz 2679
-VGETMANTPSZrmi 2680
-VGETMANTPSZrmik 2681
-VGETMANTPSZrmikz 2682
-VGETMANTPSZrri 2683
-VGETMANTPSZrrib 2684
-VGETMANTPSZrribk 2685
-VGETMANTPSZrribkz 2686
-VGETMANTPSZrrik 2687
-VGETMANTPSZrrikz 2688
-VGETMANTSDZrmi 2689
-VGETMANTSDZrmik 2690
-VGETMANTSDZrmikz 2691
-VGETMANTSDZrri 2692
-VGETMANTSDZrrib 2693
-VGETMANTSDZrribk 2694
-VGETMANTSDZrribkz 2695
-VGETMANTSDZrrik 2696
-VGETMANTSDZrrikz 2697
-VGETMANTSHZrmi 2698
-VGETMANTSHZrmik 2699
-VGETMANTSHZrmikz 2700
-VGETMANTSHZrri 2701
-VGETMANTSHZrrib 2702
-VGETMANTSHZrribk 2703
-VGETMANTSHZrribkz 2704
-VGETMANTSHZrrik 2705
-VGETMANTSHZrrikz 2706
-VGETMANTSSZrmi 2707
-VGETMANTSSZrmik 2708
-VGETMANTSSZrmikz 2709
-VGETMANTSSZrri 2710
-VGETMANTSSZrrib 2711
-VGETMANTSSZrribk 2712
-VGETMANTSSZrribkz 2713
-VGETMANTSSZrrik 2714
-VGETMANTSSZrrikz 2715
-VGF 2716
-VHADDPDYrm 2717
-VHADDPDYrr 2718
-VHADDPDrm 2719
-VHADDPDrr 2720
-VHADDPSYrm 2721
-VHADDPSYrr 2722
-VHADDPSrm 2723
-VHADDPSrr 2724
-VHSUBPDYrm 2725
-VHSUBPDYrr 2726
-VHSUBPDrm 2727
-VHSUBPDrr 2728
-VHSUBPSYrm 2729
-VHSUBPSYrr 2730
-VHSUBPSrm 2731
-VHSUBPSrr 2732
-VINSERTF 2733
-VINSERTI 2734
-VINSERTPSZrmi 2735
-VINSERTPSZrri 2736
-VINSERTPSrmi 2737
-VINSERTPSrri 2738
-VLDDQUYrm 2739
-VLDDQUrm 2740
-VLDMXCSR 2741
-VMASKMOVDQU 2742
-VMASKMOVPDYmr 2743
-VMASKMOVPDYrm 2744
-VMASKMOVPDmr 2745
-VMASKMOVPDrm 2746
-VMASKMOVPSYmr 2747
-VMASKMOVPSYrm 2748
-VMASKMOVPSmr 2749
-VMASKMOVPSrm 2750
-VMAXBF 2751
-VMAXCPDYrm 2752
-VMAXCPDYrr 2753
-VMAXCPDZ 2754
-VMAXCPDZrm 2755
-VMAXCPDZrmb 2756
-VMAXCPDZrmbk 2757
-VMAXCPDZrmbkz 2758
-VMAXCPDZrmk 2759
-VMAXCPDZrmkz 2760
-VMAXCPDZrr 2761
-VMAXCPDZrrk 2762
-VMAXCPDZrrkz 2763
-VMAXCPDrm 2764
-VMAXCPDrr 2765
-VMAXCPHZ 2766
-VMAXCPHZrm 2767
-VMAXCPHZrmb 2768
-VMAXCPHZrmbk 2769
-VMAXCPHZrmbkz 2770
-VMAXCPHZrmk 2771
-VMAXCPHZrmkz 2772
-VMAXCPHZrr 2773
-VMAXCPHZrrk 2774
-VMAXCPHZrrkz 2775
-VMAXCPSYrm 2776
-VMAXCPSYrr 2777
-VMAXCPSZ 2778
-VMAXCPSZrm 2779
-VMAXCPSZrmb 2780
-VMAXCPSZrmbk 2781
-VMAXCPSZrmbkz 2782
-VMAXCPSZrmk 2783
-VMAXCPSZrmkz 2784
-VMAXCPSZrr 2785
-VMAXCPSZrrk 2786
-VMAXCPSZrrkz 2787
-VMAXCPSrm 2788
-VMAXCPSrr 2789
-VMAXCSDZrm 2790
-VMAXCSDZrr 2791
-VMAXCSDrm 2792
-VMAXCSDrr 2793
-VMAXCSHZrm 2794
-VMAXCSHZrr 2795
-VMAXCSSZrm 2796
-VMAXCSSZrr 2797
-VMAXCSSrm 2798
-VMAXCSSrr 2799
-VMAXPDYrm 2800
-VMAXPDYrr 2801
-VMAXPDZ 2802
-VMAXPDZrm 2803
-VMAXPDZrmb 2804
-VMAXPDZrmbk 2805
-VMAXPDZrmbkz 2806
-VMAXPDZrmk 2807
-VMAXPDZrmkz 2808
-VMAXPDZrr 2809
-VMAXPDZrrb 2810
-VMAXPDZrrbk 2811
-VMAXPDZrrbkz 2812
-VMAXPDZrrk 2813
-VMAXPDZrrkz 2814
-VMAXPDrm 2815
-VMAXPDrr 2816
-VMAXPHZ 2817
-VMAXPHZrm 2818
-VMAXPHZrmb 2819
-VMAXPHZrmbk 2820
-VMAXPHZrmbkz 2821
-VMAXPHZrmk 2822
-VMAXPHZrmkz 2823
-VMAXPHZrr 2824
-VMAXPHZrrb 2825
-VMAXPHZrrbk 2826
-VMAXPHZrrbkz 2827
-VMAXPHZrrk 2828
-VMAXPHZrrkz 2829
-VMAXPSYrm 2830
-VMAXPSYrr 2831
-VMAXPSZ 2832
-VMAXPSZrm 2833
-VMAXPSZrmb 2834
-VMAXPSZrmbk 2835
-VMAXPSZrmbkz 2836
-VMAXPSZrmk 2837
-VMAXPSZrmkz 2838
-VMAXPSZrr 2839
-VMAXPSZrrb 2840
-VMAXPSZrrbk 2841
-VMAXPSZrrbkz 2842
-VMAXPSZrrk 2843
-VMAXPSZrrkz 2844
-VMAXPSrm 2845
-VMAXPSrr 2846
-VMAXSDZrm 2847
-VMAXSDZrm_Int 2848
-VMAXSDZrmk_Int 2849
-VMAXSDZrmkz_Int 2850
-VMAXSDZrr 2851
-VMAXSDZrr_Int 2852
-VMAXSDZrrb_Int 2853
-VMAXSDZrrbk_Int 2854
-VMAXSDZrrbkz_Int 2855
-VMAXSDZrrk_Int 2856
-VMAXSDZrrkz_Int 2857
-VMAXSDrm 2858
-VMAXSDrm_Int 2859
-VMAXSDrr 2860
-VMAXSDrr_Int 2861
-VMAXSHZrm 2862
-VMAXSHZrm_Int 2863
-VMAXSHZrmk_Int 2864
-VMAXSHZrmkz_Int 2865
-VMAXSHZrr 2866
-VMAXSHZrr_Int 2867
-VMAXSHZrrb_Int 2868
-VMAXSHZrrbk_Int 2869
-VMAXSHZrrbkz_Int 2870
-VMAXSHZrrk_Int 2871
-VMAXSHZrrkz_Int 2872
-VMAXSSZrm 2873
-VMAXSSZrm_Int 2874
-VMAXSSZrmk_Int 2875
-VMAXSSZrmkz_Int 2876
-VMAXSSZrr 2877
-VMAXSSZrr_Int 2878
-VMAXSSZrrb_Int 2879
-VMAXSSZrrbk_Int 2880
-VMAXSSZrrbkz_Int 2881
-VMAXSSZrrk_Int 2882
-VMAXSSZrrkz_Int 2883
-VMAXSSrm 2884
-VMAXSSrm_Int 2885
-VMAXSSrr 2886
-VMAXSSrr_Int 2887
-VMCALL 2888
-VMCLEARm 2889
-VMFUNC 2890
-VMINBF 2891
-VMINCPDYrm 2892
-VMINCPDYrr 2893
-VMINCPDZ 2894
-VMINCPDZrm 2895
-VMINCPDZrmb 2896
-VMINCPDZrmbk 2897
-VMINCPDZrmbkz 2898
-VMINCPDZrmk 2899
-VMINCPDZrmkz 2900
-VMINCPDZrr 2901
-VMINCPDZrrk 2902
-VMINCPDZrrkz 2903
-VMINCPDrm 2904
-VMINCPDrr 2905
-VMINCPHZ 2906
-VMINCPHZrm 2907
-VMINCPHZrmb 2908
-VMINCPHZrmbk 2909
-VMINCPHZrmbkz 2910
-VMINCPHZrmk 2911
-VMINCPHZrmkz 2912
-VMINCPHZrr 2913
-VMINCPHZrrk 2914
-VMINCPHZrrkz 2915
-VMINCPSYrm 2916
-VMINCPSYrr 2917
-VMINCPSZ 2918
-VMINCPSZrm 2919
-VMINCPSZrmb 2920
-VMINCPSZrmbk 2921
-VMINCPSZrmbkz 2922
-VMINCPSZrmk 2923
-VMINCPSZrmkz 2924
-VMINCPSZrr 2925
-VMINCPSZrrk 2926
-VMINCPSZrrkz 2927
-VMINCPSrm 2928
-VMINCPSrr 2929
-VMINCSDZrm 2930
-VMINCSDZrr 2931
-VMINCSDrm 2932
-VMINCSDrr 2933
-VMINCSHZrm 2934
-VMINCSHZrr 2935
-VMINCSSZrm 2936
-VMINCSSZrr 2937
-VMINCSSrm 2938
-VMINCSSrr 2939
-VMINMAXBF 2940
-VMINMAXPDZ 2941
-VMINMAXPDZrmbi 2942
-VMINMAXPDZrmbik 2943
-VMINMAXPDZrmbikz 2944
-VMINMAXPDZrmi 2945
-VMINMAXPDZrmik 2946
-VMINMAXPDZrmikz 2947
-VMINMAXPDZrri 2948
-VMINMAXPDZrrib 2949
-VMINMAXPDZrribk 2950
-VMINMAXPDZrribkz 2951
-VMINMAXPDZrrik 2952
-VMINMAXPDZrrikz 2953
-VMINMAXPHZ 2954
-VMINMAXPHZrmbi 2955
-VMINMAXPHZrmbik 2956
-VMINMAXPHZrmbikz 2957
-VMINMAXPHZrmi 2958
-VMINMAXPHZrmik 2959
-VMINMAXPHZrmikz 2960
-VMINMAXPHZrri 2961
-VMINMAXPHZrrib 2962
-VMINMAXPHZrribk 2963
-VMINMAXPHZrribkz 2964
-VMINMAXPHZrrik 2965
-VMINMAXPHZrrikz 2966
-VMINMAXPSZ 2967
-VMINMAXPSZrmbi 2968
-VMINMAXPSZrmbik 2969
-VMINMAXPSZrmbikz 2970
-VMINMAXPSZrmi 2971
-VMINMAXPSZrmik 2972
-VMINMAXPSZrmikz 2973
-VMINMAXPSZrri 2974
-VMINMAXPSZrrib 2975
-VMINMAXPSZrribk 2976
-VMINMAXPSZrribkz 2977
-VMINMAXPSZrrik 2978
-VMINMAXPSZrrikz 2979
-VMINMAXSDrmi 2980
-VMINMAXSDrmi_Int 2981
-VMINMAXSDrmik_Int 2982
-VMINMAXSDrmikz_Int 2983
-VMINMAXSDrri 2984
-VMINMAXSDrri_Int 2985
-VMINMAXSDrrib_Int 2986
-VMINMAXSDrribk_Int 2987
-VMINMAXSDrribkz_Int 2988
-VMINMAXSDrrik_Int 2989
-VMINMAXSDrrikz_Int 2990
-VMINMAXSHrmi 2991
-VMINMAXSHrmi_Int 2992
-VMINMAXSHrmik_Int 2993
-VMINMAXSHrmikz_Int 2994
-VMINMAXSHrri 2995
-VMINMAXSHrri_Int 2996
-VMINMAXSHrrib_Int 2997
-VMINMAXSHrribk_Int 2998
-VMINMAXSHrribkz_Int 2999
-VMINMAXSHrrik_Int 3000
-VMINMAXSHrrikz_Int 3001
-VMINMAXSSrmi 3002
-VMINMAXSSrmi_Int 3003
-VMINMAXSSrmik_Int 3004
-VMINMAXSSrmikz_Int 3005
-VMINMAXSSrri 3006
-VMINMAXSSrri_Int 3007
-VMINMAXSSrrib_Int 3008
-VMINMAXSSrribk_Int 3009
-VMINMAXSSrribkz_Int 3010
-VMINMAXSSrrik_Int 3011
-VMINMAXSSrrikz_Int 3012
-VMINPDYrm 3013
-VMINPDYrr 3014
-VMINPDZ 3015
-VMINPDZrm 3016
-VMINPDZrmb 3017
-VMINPDZrmbk 3018
-VMINPDZrmbkz 3019
-VMINPDZrmk 3020
-VMINPDZrmkz 3021
-VMINPDZrr 3022
-VMINPDZrrb 3023
-VMINPDZrrbk 3024
-VMINPDZrrbkz 3025
-VMINPDZrrk 3026
-VMINPDZrrkz 3027
-VMINPDrm 3028
-VMINPDrr 3029
-VMINPHZ 3030
-VMINPHZrm 3031
-VMINPHZrmb 3032
-VMINPHZrmbk 3033
-VMINPHZrmbkz 3034
-VMINPHZrmk 3035
-VMINPHZrmkz 3036
-VMINPHZrr 3037
-VMINPHZrrb 3038
-VMINPHZrrbk 3039
-VMINPHZrrbkz 3040
-VMINPHZrrk 3041
-VMINPHZrrkz 3042
-VMINPSYrm 3043
-VMINPSYrr 3044
-VMINPSZ 3045
-VMINPSZrm 3046
-VMINPSZrmb 3047
-VMINPSZrmbk 3048
-VMINPSZrmbkz 3049
-VMINPSZrmk 3050
-VMINPSZrmkz 3051
-VMINPSZrr 3052
-VMINPSZrrb 3053
-VMINPSZrrbk 3054
-VMINPSZrrbkz 3055
-VMINPSZrrk 3056
-VMINPSZrrkz 3057
-VMINPSrm 3058
-VMINPSrr 3059
-VMINSDZrm 3060
-VMINSDZrm_Int 3061
-VMINSDZrmk_Int 3062
-VMINSDZrmkz_Int 3063
-VMINSDZrr 3064
-VMINSDZrr_Int 3065
-VMINSDZrrb_Int 3066
-VMINSDZrrbk_Int 3067
-VMINSDZrrbkz_Int 3068
-VMINSDZrrk_Int 3069
-VMINSDZrrkz_Int 3070
-VMINSDrm 3071
-VMINSDrm_Int 3072
-VMINSDrr 3073
-VMINSDrr_Int 3074
-VMINSHZrm 3075
-VMINSHZrm_Int 3076
-VMINSHZrmk_Int 3077
-VMINSHZrmkz_Int 3078
-VMINSHZrr 3079
-VMINSHZrr_Int 3080
-VMINSHZrrb_Int 3081
-VMINSHZrrbk_Int 3082
-VMINSHZrrbkz_Int 3083
-VMINSHZrrk_Int 3084
-VMINSHZrrkz_Int 3085
-VMINSSZrm 3086
-VMINSSZrm_Int 3087
-VMINSSZrmk_Int 3088
-VMINSSZrmkz_Int 3089
-VMINSSZrr 3090
-VMINSSZrr_Int 3091
-VMINSSZrrb_Int 3092
-VMINSSZrrbk_Int 3093
-VMINSSZrrbkz_Int 3094
-VMINSSZrrk_Int 3095
-VMINSSZrrkz_Int 3096
-VMINSSrm 3097
-VMINSSrm_Int 3098
-VMINSSrr 3099
-VMINSSrr_Int 3100
-VMLAUNCH 3101
-VMLOAD 3102
-VMMCALL 3103
-VMOV 3104
-VMOVAPDYmr 3105
-VMOVAPDYrm 3106
-VMOVAPDYrr 3107
-VMOVAPDYrr_REV 3108
-VMOVAPDZ 3109
-VMOVAPDZmr 3110
-VMOVAPDZmrk 3111
-VMOVAPDZrm 3112
-VMOVAPDZrmk 3113
-VMOVAPDZrmkz 3114
-VMOVAPDZrr 3115
-VMOVAPDZrr_REV 3116
-VMOVAPDZrrk 3117
-VMOVAPDZrrk_REV 3118
-VMOVAPDZrrkz 3119
-VMOVAPDZrrkz_REV 3120
-VMOVAPDmr 3121
-VMOVAPDrm 3122
-VMOVAPDrr 3123
-VMOVAPDrr_REV 3124
-VMOVAPSYmr 3125
-VMOVAPSYrm 3126
-VMOVAPSYrr 3127
-VMOVAPSYrr_REV 3128
-VMOVAPSZ 3129
-VMOVAPSZmr 3130
-VMOVAPSZmrk 3131
-VMOVAPSZrm 3132
-VMOVAPSZrmk 3133
-VMOVAPSZrmkz 3134
-VMOVAPSZrr 3135
-VMOVAPSZrr_REV 3136
-VMOVAPSZrrk 3137
-VMOVAPSZrrk_REV 3138
-VMOVAPSZrrkz 3139
-VMOVAPSZrrkz_REV 3140
-VMOVAPSmr 3141
-VMOVAPSrm 3142
-VMOVAPSrr 3143
-VMOVAPSrr_REV 3144
-VMOVDDUPYrm 3145
-VMOVDDUPYrr 3146
-VMOVDDUPZ 3147
-VMOVDDUPZrm 3148
-VMOVDDUPZrmk 3149
-VMOVDDUPZrmkz 3150
-VMOVDDUPZrr 3151
-VMOVDDUPZrrk 3152
-VMOVDDUPZrrkz 3153
-VMOVDDUPrm 3154
-VMOVDDUPrr 3155
-VMOVDI 3156
-VMOVDQA 3157
-VMOVDQAYmr 3158
-VMOVDQAYrm 3159
-VMOVDQAYrr 3160
-VMOVDQAYrr_REV 3161
-VMOVDQAmr 3162
-VMOVDQArm 3163
-VMOVDQArr 3164
-VMOVDQArr_REV 3165
-VMOVDQU 3166
-VMOVDQUYmr 3167
-VMOVDQUYrm 3168
-VMOVDQUYrr 3169
-VMOVDQUYrr_REV 3170
-VMOVDQUmr 3171
-VMOVDQUrm 3172
-VMOVDQUrr 3173
-VMOVDQUrr_REV 3174
-VMOVHLPSZrr 3175
-VMOVHLPSrr 3176
-VMOVHPDZ 3177
-VMOVHPDmr 3178
-VMOVHPDrm 3179
-VMOVHPSZ 3180
-VMOVHPSmr 3181
-VMOVHPSrm 3182
-VMOVLHPSZrr 3183
-VMOVLHPSrr 3184
-VMOVLPDZ 3185
-VMOVLPDmr 3186
-VMOVLPDrm 3187
-VMOVLPSZ 3188
-VMOVLPSmr 3189
-VMOVLPSrm 3190
-VMOVMSKPDYrr 3191
-VMOVMSKPDrr 3192
-VMOVMSKPSYrr 3193
-VMOVMSKPSrr 3194
-VMOVNTDQAYrm 3195
-VMOVNTDQAZ 3196
-VMOVNTDQAZrm 3197
-VMOVNTDQArm 3198
-VMOVNTDQYmr 3199
-VMOVNTDQZ 3200
-VMOVNTDQZmr 3201
-VMOVNTDQmr 3202
-VMOVNTPDYmr 3203
-VMOVNTPDZ 3204
-VMOVNTPDZmr 3205
-VMOVNTPDmr 3206
-VMOVNTPSYmr 3207
-VMOVNTPSZ 3208
-VMOVNTPSZmr 3209
-VMOVNTPSmr 3210
-VMOVPDI 3211
-VMOVPQI 3212
-VMOVPQIto 3213
-VMOVQI 3214
-VMOVRSBZ 3215
-VMOVRSBZm 3216
-VMOVRSBZmk 3217
-VMOVRSBZmkz 3218
-VMOVRSDZ 3219
-VMOVRSDZm 3220
-VMOVRSDZmk 3221
-VMOVRSDZmkz 3222
-VMOVRSQZ 3223
-VMOVRSQZm 3224
-VMOVRSQZmk 3225
-VMOVRSQZmkz 3226
-VMOVRSWZ 3227
-VMOVRSWZm 3228
-VMOVRSWZmk 3229
-VMOVRSWZmkz 3230
-VMOVSDZmr 3231
-VMOVSDZmrk 3232
-VMOVSDZrm 3233
-VMOVSDZrm_alt 3234
-VMOVSDZrmk 3235
-VMOVSDZrmkz 3236
-VMOVSDZrr 3237
-VMOVSDZrr_REV 3238
-VMOVSDZrrk 3239
-VMOVSDZrrk_REV 3240
-VMOVSDZrrkz 3241
-VMOVSDZrrkz_REV 3242
-VMOVSDmr 3243
-VMOVSDrm 3244
-VMOVSDrm_alt 3245
-VMOVSDrr 3246
-VMOVSDrr_REV 3247
-VMOVSDto 3248
-VMOVSH 3249
-VMOVSHDUPYrm 3250
-VMOVSHDUPYrr 3251
-VMOVSHDUPZ 3252
-VMOVSHDUPZrm 3253
-VMOVSHDUPZrmk 3254
-VMOVSHDUPZrmkz 3255
-VMOVSHDUPZrr 3256
-VMOVSHDUPZrrk 3257
-VMOVSHDUPZrrkz 3258
-VMOVSHDUPrm 3259
-VMOVSHDUPrr 3260
-VMOVSHZmr 3261
-VMOVSHZmrk 3262
-VMOVSHZrm 3263
-VMOVSHZrm_alt 3264
-VMOVSHZrmk 3265
-VMOVSHZrmkz 3266
-VMOVSHZrr 3267
-VMOVSHZrr_REV 3268
-VMOVSHZrrk 3269
-VMOVSHZrrk_REV 3270
-VMOVSHZrrkz 3271
-VMOVSHZrrkz_REV 3272
-VMOVSHtoW 3273
-VMOVSLDUPYrm 3274
-VMOVSLDUPYrr 3275
-VMOVSLDUPZ 3276
-VMOVSLDUPZrm 3277
-VMOVSLDUPZrmk 3278
-VMOVSLDUPZrmkz 3279
-VMOVSLDUPZrr 3280
-VMOVSLDUPZrrk 3281
-VMOVSLDUPZrrkz 3282
-VMOVSLDUPrm 3283
-VMOVSLDUPrr 3284
-VMOVSS 3285
-VMOVSSZmr 3286
-VMOVSSZmrk 3287
-VMOVSSZrm 3288
-VMOVSSZrm_alt 3289
-VMOVSSZrmk 3290
-VMOVSSZrmkz 3291
-VMOVSSZrr 3292
-VMOVSSZrr_REV 3293
-VMOVSSZrrk 3294
-VMOVSSZrrk_REV 3295
-VMOVSSZrrkz 3296
-VMOVSSZrrkz_REV 3297
-VMOVSSmr 3298
-VMOVSSrm 3299
-VMOVSSrm_alt 3300
-VMOVSSrr 3301
-VMOVSSrr_REV 3302
-VMOVUPDYmr 3303
-VMOVUPDYrm 3304
-VMOVUPDYrr 3305
-VMOVUPDYrr_REV 3306
-VMOVUPDZ 3307
-VMOVUPDZmr 3308
-VMOVUPDZmrk 3309
-VMOVUPDZrm 3310
-VMOVUPDZrmk 3311
-VMOVUPDZrmkz 3312
-VMOVUPDZrr 3313
-VMOVUPDZrr_REV 3314
-VMOVUPDZrrk 3315
-VMOVUPDZrrk_REV 3316
-VMOVUPDZrrkz 3317
-VMOVUPDZrrkz_REV 3318
-VMOVUPDmr 3319
-VMOVUPDrm 3320
-VMOVUPDrr 3321
-VMOVUPDrr_REV 3322
-VMOVUPSYmr 3323
-VMOVUPSYrm 3324
-VMOVUPSYrr 3325
-VMOVUPSYrr_REV 3326
-VMOVUPSZ 3327
-VMOVUPSZmr 3328
-VMOVUPSZmrk 3329
-VMOVUPSZrm 3330
-VMOVUPSZrmk 3331
-VMOVUPSZrmkz 3332
-VMOVUPSZrr 3333
-VMOVUPSZrr_REV 3334
-VMOVUPSZrrk 3335
-VMOVUPSZrrk_REV 3336
-VMOVUPSZrrkz 3337
-VMOVUPSZrrkz_REV 3338
-VMOVUPSmr 3339
-VMOVUPSrm 3340
-VMOVUPSrr 3341
-VMOVUPSrr_REV 3342
-VMOVW 3343
-VMOVWmr 3344
-VMOVWrm 3345
-VMOVZPDILo 3346
-VMOVZPQILo 3347
-VMOVZPWILo 3348
-VMPSADBWYrmi 3349
-VMPSADBWYrri 3350
-VMPSADBWZ 3351
-VMPSADBWZrmi 3352
-VMPSADBWZrmik 3353
-VMPSADBWZrmikz 3354
-VMPSADBWZrri 3355
-VMPSADBWZrrik 3356
-VMPSADBWZrrikz 3357
-VMPSADBWrmi 3358
-VMPSADBWrri 3359
-VMPTRLDm 3360
-VMPTRSTm 3361
-VMREAD 3362
-VMRESUME 3363
-VMRUN 3364
-VMSAVE 3365
-VMULBF 3366
-VMULPDYrm 3367
-VMULPDYrr 3368
-VMULPDZ 3369
-VMULPDZrm 3370
-VMULPDZrmb 3371
-VMULPDZrmbk 3372
-VMULPDZrmbkz 3373
-VMULPDZrmk 3374
-VMULPDZrmkz 3375
-VMULPDZrr 3376
-VMULPDZrrb 3377
-VMULPDZrrbk 3378
-VMULPDZrrbkz 3379
-VMULPDZrrk 3380
-VMULPDZrrkz 3381
-VMULPDrm 3382
-VMULPDrr 3383
-VMULPHZ 3384
-VMULPHZrm 3385
-VMULPHZrmb 3386
-VMULPHZrmbk 3387
-VMULPHZrmbkz 3388
-VMULPHZrmk 3389
-VMULPHZrmkz 3390
-VMULPHZrr 3391
-VMULPHZrrb 3392
-VMULPHZrrbk 3393
-VMULPHZrrbkz 3394
-VMULPHZrrk 3395
-VMULPHZrrkz 3396
-VMULPSYrm 3397
-VMULPSYrr 3398
-VMULPSZ 3399
-VMULPSZrm 3400
-VMULPSZrmb 3401
-VMULPSZrmbk 3402
-VMULPSZrmbkz 3403
-VMULPSZrmk 3404
-VMULPSZrmkz 3405
-VMULPSZrr 3406
-VMULPSZrrb 3407
-VMULPSZrrbk 3408
-VMULPSZrrbkz 3409
-VMULPSZrrk 3410
-VMULPSZrrkz 3411
-VMULPSrm 3412
-VMULPSrr 3413
-VMULSDZrm 3414
-VMULSDZrm_Int 3415
-VMULSDZrmk_Int 3416
-VMULSDZrmkz_Int 3417
-VMULSDZrr 3418
-VMULSDZrr_Int 3419
-VMULSDZrrb_Int 3420
-VMULSDZrrbk_Int 3421
-VMULSDZrrbkz_Int 3422
-VMULSDZrrk_Int 3423
-VMULSDZrrkz_Int 3424
-VMULSDrm 3425
-VMULSDrm_Int 3426
-VMULSDrr 3427
-VMULSDrr_Int 3428
-VMULSHZrm 3429
-VMULSHZrm_Int 3430
-VMULSHZrmk_Int 3431
-VMULSHZrmkz_Int 3432
-VMULSHZrr 3433
-VMULSHZrr_Int 3434
-VMULSHZrrb_Int 3435
-VMULSHZrrbk_Int 3436
-VMULSHZrrbkz_Int 3437
-VMULSHZrrk_Int 3438
-VMULSHZrrkz_Int 3439
-VMULSSZrm 3440
-VMULSSZrm_Int 3441
-VMULSSZrmk_Int 3442
-VMULSSZrmkz_Int 3443
-VMULSSZrr 3444
-VMULSSZrr_Int 3445
-VMULSSZrrb_Int 3446
-VMULSSZrrbk_Int 3447
-VMULSSZrrbkz_Int 3448
-VMULSSZrrk_Int 3449
-VMULSSZrrkz_Int 3450
-VMULSSrm 3451
-VMULSSrm_Int 3452
-VMULSSrr 3453
-VMULSSrr_Int 3454
-VMWRITE 3455
-VMXOFF 3456
-VMXON 3457
-VORPDYrm 3458
-VORPDYrr 3459
-VORPDZ 3460
-VORPDZrm 3461
-VORPDZrmb 3462
-VORPDZrmbk 3463
-VORPDZrmbkz 3464
-VORPDZrmk 3465
-VORPDZrmkz 3466
-VORPDZrr 3467
-VORPDZrrk 3468
-VORPDZrrkz 3469
-VORPDrm 3470
-VORPDrr 3471
-VORPSYrm 3472
-VORPSYrr 3473
-VORPSZ 3474
-VORPSZrm 3475
-VORPSZrmb 3476
-VORPSZrmbk 3477
-VORPSZrmbkz 3478
-VORPSZrmk 3479
-VORPSZrmkz 3480
-VORPSZrr 3481
-VORPSZrrk 3482
-VORPSZrrkz 3483
-VORPSrm 3484
-VORPSrr 3485
-VP 3486
-VPABSBYrm 3487
-VPABSBYrr 3488
-VPABSBZ 3489
-VPABSBZrm 3490
-VPABSBZrmk 3491
-VPABSBZrmkz 3492
-VPABSBZrr 3493
-VPABSBZrrk 3494
-VPABSBZrrkz 3495
-VPABSBrm 3496
-VPABSBrr 3497
-VPABSDYrm 3498
-VPABSDYrr 3499
-VPABSDZ 3500
-VPABSDZrm 3501
-VPABSDZrmb 3502
-VPABSDZrmbk 3503
-VPABSDZrmbkz 3504
-VPABSDZrmk 3505
-VPABSDZrmkz 3506
-VPABSDZrr 3507
-VPABSDZrrk 3508
-VPABSDZrrkz 3509
-VPABSDrm 3510
-VPABSDrr 3511
-VPABSQZ 3512
-VPABSQZrm 3513
-VPABSQZrmb 3514
-VPABSQZrmbk 3515
-VPABSQZrmbkz 3516
-VPABSQZrmk 3517
-VPABSQZrmkz 3518
-VPABSQZrr 3519
-VPABSQZrrk 3520
-VPABSQZrrkz 3521
-VPABSWYrm 3522
-VPABSWYrr 3523
-VPABSWZ 3524
-VPABSWZrm 3525
-VPABSWZrmk 3526
-VPABSWZrmkz 3527
-VPABSWZrr 3528
-VPABSWZrrk 3529
-VPABSWZrrkz 3530
-VPABSWrm 3531
-VPABSWrr 3532
-VPACKSSDWYrm 3533
-VPACKSSDWYrr 3534
-VPACKSSDWZ 3535
-VPACKSSDWZrm 3536
-VPACKSSDWZrmb 3537
-VPACKSSDWZrmbk 3538
-VPACKSSDWZrmbkz 3539
-VPACKSSDWZrmk 3540
-VPACKSSDWZrmkz 3541
-VPACKSSDWZrr 3542
-VPACKSSDWZrrk 3543
-VPACKSSDWZrrkz 3544
-VPACKSSDWrm 3545
-VPACKSSDWrr 3546
-VPACKSSWBYrm 3547
-VPACKSSWBYrr 3548
-VPACKSSWBZ 3549
-VPACKSSWBZrm 3550
-VPACKSSWBZrmk 3551
-VPACKSSWBZrmkz 3552
-VPACKSSWBZrr 3553
-VPACKSSWBZrrk 3554
-VPACKSSWBZrrkz 3555
-VPACKSSWBrm 3556
-VPACKSSWBrr 3557
-VPACKUSDWYrm 3558
-VPACKUSDWYrr 3559
-VPACKUSDWZ 3560
-VPACKUSDWZrm 3561
-VPACKUSDWZrmb 3562
-VPACKUSDWZrmbk 3563
-VPACKUSDWZrmbkz 3564
-VPACKUSDWZrmk 3565
-VPACKUSDWZrmkz 3566
-VPACKUSDWZrr 3567
-VPACKUSDWZrrk 3568
-VPACKUSDWZrrkz 3569
-VPACKUSDWrm 3570
-VPACKUSDWrr 3571
-VPACKUSWBYrm 3572
-VPACKUSWBYrr 3573
-VPACKUSWBZ 3574
-VPACKUSWBZrm 3575
-VPACKUSWBZrmk 3576
-VPACKUSWBZrmkz 3577
-VPACKUSWBZrr 3578
-VPACKUSWBZrrk 3579
-VPACKUSWBZrrkz 3580
-VPACKUSWBrm 3581
-VPACKUSWBrr 3582
-VPADDBYrm 3583
-VPADDBYrr 3584
-VPADDBZ 3585
-VPADDBZrm 3586
-VPADDBZrmk 3587
-VPADDBZrmkz 3588
-VPADDBZrr 3589
-VPADDBZrrk 3590
-VPADDBZrrkz 3591
-VPADDBrm 3592
-VPADDBrr 3593
-VPADDDYrm 3594
-VPADDDYrr 3595
-VPADDDZ 3596
-VPADDDZrm 3597
-VPADDDZrmb 3598
-VPADDDZrmbk 3599
-VPADDDZrmbkz 3600
-VPADDDZrmk 3601
-VPADDDZrmkz 3602
-VPADDDZrr 3603
-VPADDDZrrk 3604
-VPADDDZrrkz 3605
-VPADDDrm 3606
-VPADDDrr 3607
-VPADDQYrm 3608
-VPADDQYrr 3609
-VPADDQZ 3610
-VPADDQZrm 3611
-VPADDQZrmb 3612
-VPADDQZrmbk 3613
-VPADDQZrmbkz 3614
-VPADDQZrmk 3615
-VPADDQZrmkz 3616
-VPADDQZrr 3617
-VPADDQZrrk 3618
-VPADDQZrrkz 3619
-VPADDQrm 3620
-VPADDQrr 3621
-VPADDSBYrm 3622
-VPADDSBYrr 3623
-VPADDSBZ 3624
-VPADDSBZrm 3625
-VPADDSBZrmk 3626
-VPADDSBZrmkz 3627
-VPADDSBZrr 3628
-VPADDSBZrrk 3629
-VPADDSBZrrkz 3630
-VPADDSBrm 3631
-VPADDSBrr 3632
-VPADDSWYrm 3633
-VPADDSWYrr 3634
-VPADDSWZ 3635
-VPADDSWZrm 3636
-VPADDSWZrmk 3637
-VPADDSWZrmkz 3638
-VPADDSWZrr 3639
-VPADDSWZrrk 3640
-VPADDSWZrrkz 3641
-VPADDSWrm 3642
-VPADDSWrr 3643
-VPADDUSBYrm 3644
-VPADDUSBYrr 3645
-VPADDUSBZ 3646
-VPADDUSBZrm 3647
-VPADDUSBZrmk 3648
-VPADDUSBZrmkz 3649
-VPADDUSBZrr 3650
-VPADDUSBZrrk 3651
-VPADDUSBZrrkz 3652
-VPADDUSBrm 3653
-VPADDUSBrr 3654
-VPADDUSWYrm 3655
-VPADDUSWYrr 3656
-VPADDUSWZ 3657
-VPADDUSWZrm 3658
-VPADDUSWZrmk 3659
-VPADDUSWZrmkz 3660
-VPADDUSWZrr 3661
-VPADDUSWZrrk 3662
-VPADDUSWZrrkz 3663
-VPADDUSWrm 3664
-VPADDUSWrr 3665
-VPADDWYrm 3666
-VPADDWYrr 3667
-VPADDWZ 3668
-VPADDWZrm 3669
-VPADDWZrmk 3670
-VPADDWZrmkz 3671
-VPADDWZrr 3672
-VPADDWZrrk 3673
-VPADDWZrrkz 3674
-VPADDWrm 3675
-VPADDWrr 3676
-VPALIGNRYrmi 3677
-VPALIGNRYrri 3678
-VPALIGNRZ 3679
-VPALIGNRZrmi 3680
-VPALIGNRZrmik 3681
-VPALIGNRZrmikz 3682
-VPALIGNRZrri 3683
-VPALIGNRZrrik 3684
-VPALIGNRZrrikz 3685
-VPALIGNRrmi 3686
-VPALIGNRrri 3687
-VPANDDZ 3688
-VPANDDZrm 3689
-VPANDDZrmb 3690
-VPANDDZrmbk 3691
-VPANDDZrmbkz 3692
-VPANDDZrmk 3693
-VPANDDZrmkz 3694
-VPANDDZrr 3695
-VPANDDZrrk 3696
-VPANDDZrrkz 3697
-VPANDNDZ 3698
-VPANDNDZrm 3699
-VPANDNDZrmb 3700
-VPANDNDZrmbk 3701
-VPANDNDZrmbkz 3702
-VPANDNDZrmk 3703
-VPANDNDZrmkz 3704
-VPANDNDZrr 3705
-VPANDNDZrrk 3706
-VPANDNDZrrkz 3707
-VPANDNQZ 3708
-VPANDNQZrm 3709
-VPANDNQZrmb 3710
-VPANDNQZrmbk 3711
-VPANDNQZrmbkz 3712
-VPANDNQZrmk 3713
-VPANDNQZrmkz 3714
-VPANDNQZrr 3715
-VPANDNQZrrk 3716
-VPANDNQZrrkz 3717
-VPANDNYrm 3718
-VPANDNYrr 3719
-VPANDNrm 3720
-VPANDNrr 3721
-VPANDQZ 3722
-VPANDQZrm 3723
-VPANDQZrmb 3724
-VPANDQZrmbk 3725
-VPANDQZrmbkz 3726
-VPANDQZrmk 3727
-VPANDQZrmkz 3728
-VPANDQZrr 3729
-VPANDQZrrk 3730
-VPANDQZrrkz 3731
-VPANDYrm 3732
-VPANDYrr 3733
-VPANDrm 3734
-VPANDrr 3735
-VPAVGBYrm 3736
-VPAVGBYrr 3737
-VPAVGBZ 3738
-VPAVGBZrm 3739
-VPAVGBZrmk 3740
-VPAVGBZrmkz 3741
-VPAVGBZrr 3742
-VPAVGBZrrk 3743
-VPAVGBZrrkz 3744
-VPAVGBrm 3745
-VPAVGBrr 3746
-VPAVGWYrm 3747
-VPAVGWYrr 3748
-VPAVGWZ 3749
-VPAVGWZrm 3750
-VPAVGWZrmk 3751
-VPAVGWZrmkz 3752
-VPAVGWZrr 3753
-VPAVGWZrrk 3754
-VPAVGWZrrkz 3755
-VPAVGWrm 3756
-VPAVGWrr 3757
-VPBLENDDYrmi 3758
-VPBLENDDYrri 3759
-VPBLENDDrmi 3760
-VPBLENDDrri 3761
-VPBLENDMBZ 3762
-VPBLENDMBZrm 3763
-VPBLENDMBZrmk 3764
-VPBLENDMBZrmkz 3765
-VPBLENDMBZrr 3766
-VPBLENDMBZrrk 3767
-VPBLENDMBZrrkz 3768
-VPBLENDMDZ 3769
-VPBLENDMDZrm 3770
-VPBLENDMDZrmb 3771
-VPBLENDMDZrmbk 3772
-VPBLENDMDZrmbkz 3773
-VPBLENDMDZrmk 3774
-VPBLENDMDZrmkz 3775
-VPBLENDMDZrr 3776
-VPBLENDMDZrrk 3777
-VPBLENDMDZrrkz 3778
-VPBLENDMQZ 3779
-VPBLENDMQZrm 3780
-VPBLENDMQZrmb 3781
-VPBLENDMQZrmbk 3782
-VPBLENDMQZrmbkz 3783
-VPBLENDMQZrmk 3784
-VPBLENDMQZrmkz 3785
-VPBLENDMQZrr 3786
-VPBLENDMQZrrk 3787
-VPBLENDMQZrrkz 3788
-VPBLENDMWZ 3789
-VPBLENDMWZrm 3790
-VPBLENDMWZrmk 3791
-VPBLENDMWZrmkz 3792
-VPBLENDMWZrr 3793
-VPBLENDMWZrrk 3794
-VPBLENDMWZrrkz 3795
-VPBLENDVBYrmr 3796
-VPBLENDVBYrrr 3797
-VPBLENDVBrmr 3798
-VPBLENDVBrrr 3799
-VPBLENDWYrmi 3800
-VPBLENDWYrri 3801
-VPBLENDWrmi 3802
-VPBLENDWrri 3803
-VPBROADCASTBYrm 3804
-VPBROADCASTBYrr 3805
-VPBROADCASTBZ 3806
-VPBROADCASTBZrm 3807
-VPBROADCASTBZrmk 3808
-VPBROADCASTBZrmkz 3809
-VPBROADCASTBZrr 3810
-VPBROADCASTBZrrk 3811
-VPBROADCASTBZrrkz 3812
-VPBROADCASTBrZ 3813
-VPBROADCASTBrZrr 3814
-VPBROADCASTBrZrrk 3815
-VPBROADCASTBrZrrkz 3816
-VPBROADCASTBrm 3817
-VPBROADCASTBrr 3818
-VPBROADCASTDYrm 3819
-VPBROADCASTDYrr 3820
-VPBROADCASTDZ 3821
-VPBROADCASTDZrm 3822
-VPBROADCASTDZrmk 3823
-VPBROADCASTDZrmkz 3824
-VPBROADCASTDZrr 3825
-VPBROADCASTDZrrk 3826
-VPBROADCASTDZrrkz 3827
-VPBROADCASTDrZ 3828
-VPBROADCASTDrZrr 3829
-VPBROADCASTDrZrrk 3830
-VPBROADCASTDrZrrkz 3831
-VPBROADCASTDrm 3832
-VPBROADCASTDrr 3833
-VPBROADCASTMB 3834
-VPBROADCASTMW 3835
-VPBROADCASTQYrm 3836
-VPBROADCASTQYrr 3837
-VPBROADCASTQZ 3838
-VPBROADCASTQZrm 3839
-VPBROADCASTQZrmk 3840
-VPBROADCASTQZrmkz 3841
-VPBROADCASTQZrr 3842
-VPBROADCASTQZrrk 3843
-VPBROADCASTQZrrkz 3844
-VPBROADCASTQrZ 3845
-VPBROADCASTQrZrr 3846
-VPBROADCASTQrZrrk 3847
-VPBROADCASTQrZrrkz 3848
-VPBROADCASTQrm 3849
-VPBROADCASTQrr 3850
-VPBROADCASTWYrm 3851
-VPBROADCASTWYrr 3852
-VPBROADCASTWZ 3853
-VPBROADCASTWZrm 3854
-VPBROADCASTWZrmk 3855
-VPBROADCASTWZrmkz 3856
-VPBROADCASTWZrr 3857
-VPBROADCASTWZrrk 3858
-VPBROADCASTWZrrkz 3859
-VPBROADCASTWrZ 3860
-VPBROADCASTWrZrr 3861
-VPBROADCASTWrZrrk 3862
-VPBROADCASTWrZrrkz 3863
-VPBROADCASTWrm 3864
-VPBROADCASTWrr 3865
-VPCLMULQDQYrmi 3866
-VPCLMULQDQYrri 3867
-VPCLMULQDQZ 3868
-VPCLMULQDQZrmi 3869
-VPCLMULQDQZrri 3870
-VPCLMULQDQrmi 3871
-VPCLMULQDQrri 3872
-VPCMOVYrmr 3873
-VPCMOVYrrm 3874
-VPCMOVYrrr 3875
-VPCMOVYrrr_REV 3876
-VPCMOVrmr 3877
-VPCMOVrrm 3878
-VPCMOVrrr 3879
-VPCMOVrrr_REV 3880
-VPCMPBZ 3881
-VPCMPBZrmi 3882
-VPCMPBZrmik 3883
-VPCMPBZrri 3884
-VPCMPBZrrik 3885
-VPCMPDZ 3886
-VPCMPDZrmbi 3887
-VPCMPDZrmbik 3888
-VPCMPDZrmi 3889
-VPCMPDZrmik 3890
-VPCMPDZrri 3891
-VPCMPDZrrik 3892
-VPCMPEQBYrm 3893
-VPCMPEQBYrr 3894
-VPCMPEQBZ 3895
-VPCMPEQBZrm 3896
-VPCMPEQBZrmk 3897
-VPCMPEQBZrr 3898
-VPCMPEQBZrrk 3899
-VPCMPEQBrm 3900
-VPCMPEQBrr 3901
-VPCMPEQDYrm 3902
-VPCMPEQDYrr 3903
-VPCMPEQDZ 3904
-VPCMPEQDZrm 3905
-VPCMPEQDZrmb 3906
-VPCMPEQDZrmbk 3907
-VPCMPEQDZrmk 3908
-VPCMPEQDZrr 3909
-VPCMPEQDZrrk 3910
-VPCMPEQDrm 3911
-VPCMPEQDrr 3912
-VPCMPEQQYrm 3913
-VPCMPEQQYrr 3914
-VPCMPEQQZ 3915
-VPCMPEQQZrm 3916
-VPCMPEQQZrmb 3917
-VPCMPEQQZrmbk 3918
-VPCMPEQQZrmk 3919
-VPCMPEQQZrr 3920
-VPCMPEQQZrrk 3921
-VPCMPEQQrm 3922
-VPCMPEQQrr 3923
-VPCMPEQWYrm 3924
-VPCMPEQWYrr 3925
-VPCMPEQWZ 3926
-VPCMPEQWZrm 3927
-VPCMPEQWZrmk 3928
-VPCMPEQWZrr 3929
-VPCMPEQWZrrk 3930
-VPCMPEQWrm 3931
-VPCMPEQWrr 3932
-VPCMPESTRIrmi 3933
-VPCMPESTRIrri 3934
-VPCMPESTRMrmi 3935
-VPCMPESTRMrri 3936
-VPCMPGTBYrm 3937
-VPCMPGTBYrr 3938
-VPCMPGTBZ 3939
-VPCMPGTBZrm 3940
-VPCMPGTBZrmk 3941
-VPCMPGTBZrr 3942
-VPCMPGTBZrrk 3943
-VPCMPGTBrm 3944
-VPCMPGTBrr 3945
-VPCMPGTDYrm 3946
-VPCMPGTDYrr 3947
-VPCMPGTDZ 3948
-VPCMPGTDZrm 3949
-VPCMPGTDZrmb 3950
-VPCMPGTDZrmbk 3951
-VPCMPGTDZrmk 3952
-VPCMPGTDZrr 3953
-VPCMPGTDZrrk 3954
-VPCMPGTDrm 3955
-VPCMPGTDrr 3956
-VPCMPGTQYrm 3957
-VPCMPGTQYrr 3958
-VPCMPGTQZ 3959
-VPCMPGTQZrm 3960
-VPCMPGTQZrmb 3961
-VPCMPGTQZrmbk 3962
-VPCMPGTQZrmk 3963
-VPCMPGTQZrr 3964
-VPCMPGTQZrrk 3965
-VPCMPGTQrm 3966
-VPCMPGTQrr 3967
-VPCMPGTWYrm 3968
-VPCMPGTWYrr 3969
-VPCMPGTWZ 3970
-VPCMPGTWZrm 3971
-VPCMPGTWZrmk 3972
-VPCMPGTWZrr 3973
-VPCMPGTWZrrk 3974
-VPCMPGTWrm 3975
-VPCMPGTWrr 3976
-VPCMPISTRIrmi 3977
-VPCMPISTRIrri 3978
-VPCMPISTRMrmi 3979
-VPCMPISTRMrri 3980
-VPCMPQZ 3981
-VPCMPQZrmbi 3982
-VPCMPQZrmbik 3983
-VPCMPQZrmi 3984
-VPCMPQZrmik 3985
-VPCMPQZrri 3986
-VPCMPQZrrik 3987
-VPCMPUBZ 3988
-VPCMPUBZrmi 3989
-VPCMPUBZrmik 3990
-VPCMPUBZrri 3991
-VPCMPUBZrrik 3992
-VPCMPUDZ 3993
-VPCMPUDZrmbi 3994
-VPCMPUDZrmbik 3995
-VPCMPUDZrmi 3996
-VPCMPUDZrmik 3997
-VPCMPUDZrri 3998
-VPCMPUDZrrik 3999
-VPCMPUQZ 4000
-VPCMPUQZrmbi 4001
-VPCMPUQZrmbik 4002
-VPCMPUQZrmi 4003
-VPCMPUQZrmik 4004
-VPCMPUQZrri 4005
-VPCMPUQZrrik 4006
-VPCMPUWZ 4007
-VPCMPUWZrmi 4008
-VPCMPUWZrmik 4009
-VPCMPUWZrri 4010
-VPCMPUWZrrik 4011
-VPCMPWZ 4012
-VPCMPWZrmi 4013
-VPCMPWZrmik 4014
-VPCMPWZrri 4015
-VPCMPWZrrik 4016
-VPCOMBmi 4017
-VPCOMBri 4018
-VPCOMDmi 4019
-VPCOMDri 4020
-VPCOMPRESSBZ 4021
-VPCOMPRESSBZmr 4022
-VPCOMPRESSBZmrk 4023
-VPCOMPRESSBZrr 4024
-VPCOMPRESSBZrrk 4025
-VPCOMPRESSBZrrkz 4026
-VPCOMPRESSDZ 4027
-VPCOMPRESSDZmr 4028
-VPCOMPRESSDZmrk 4029
-VPCOMPRESSDZrr 4030
-VPCOMPRESSDZrrk 4031
-VPCOMPRESSDZrrkz 4032
-VPCOMPRESSQZ 4033
-VPCOMPRESSQZmr 4034
-VPCOMPRESSQZmrk 4035
-VPCOMPRESSQZrr 4036
-VPCOMPRESSQZrrk 4037
-VPCOMPRESSQZrrkz 4038
-VPCOMPRESSWZ 4039
-VPCOMPRESSWZmr 4040
-VPCOMPRESSWZmrk 4041
-VPCOMPRESSWZrr 4042
-VPCOMPRESSWZrrk 4043
-VPCOMPRESSWZrrkz 4044
-VPCOMQmi 4045
-VPCOMQri 4046
-VPCOMUBmi 4047
-VPCOMUBri 4048
-VPCOMUDmi 4049
-VPCOMUDri 4050
-VPCOMUQmi 4051
-VPCOMUQri 4052
-VPCOMUWmi 4053
-VPCOMUWri 4054
-VPCOMWmi 4055
-VPCOMWri 4056
-VPCONFLICTDZ 4057
-VPCONFLICTDZrm 4058
-VPCONFLICTDZrmb 4059
-VPCONFLICTDZrmbk 4060
-VPCONFLICTDZrmbkz 4061
-VPCONFLICTDZrmk 4062
-VPCONFLICTDZrmkz 4063
-VPCONFLICTDZrr 4064
-VPCONFLICTDZrrk 4065
-VPCONFLICTDZrrkz 4066
-VPCONFLICTQZ 4067
-VPCONFLICTQZrm 4068
-VPCONFLICTQZrmb 4069
-VPCONFLICTQZrmbk 4070
-VPCONFLICTQZrmbkz 4071
-VPCONFLICTQZrmk 4072
-VPCONFLICTQZrmkz 4073
-VPCONFLICTQZrr 4074
-VPCONFLICTQZrrk 4075
-VPCONFLICTQZrrkz 4076
-VPDPBSSDSYrm 4077
-VPDPBSSDSYrr 4078
-VPDPBSSDSZ 4079
-VPDPBSSDSZrm 4080
-VPDPBSSDSZrmb 4081
-VPDPBSSDSZrmbk 4082
-VPDPBSSDSZrmbkz 4083
-VPDPBSSDSZrmk 4084
-VPDPBSSDSZrmkz 4085
-VPDPBSSDSZrr 4086
-VPDPBSSDSZrrk 4087
-VPDPBSSDSZrrkz 4088
-VPDPBSSDSrm 4089
-VPDPBSSDSrr 4090
-VPDPBSSDYrm 4091
-VPDPBSSDYrr 4092
-VPDPBSSDZ 4093
-VPDPBSSDZrm 4094
-VPDPBSSDZrmb 4095
-VPDPBSSDZrmbk 4096
-VPDPBSSDZrmbkz 4097
-VPDPBSSDZrmk 4098
-VPDPBSSDZrmkz 4099
-VPDPBSSDZrr 4100
-VPDPBSSDZrrk 4101
-VPDPBSSDZrrkz 4102
-VPDPBSSDrm 4103
-VPDPBSSDrr 4104
-VPDPBSUDSYrm 4105
-VPDPBSUDSYrr 4106
-VPDPBSUDSZ 4107
-VPDPBSUDSZrm 4108
-VPDPBSUDSZrmb 4109
-VPDPBSUDSZrmbk 4110
-VPDPBSUDSZrmbkz 4111
-VPDPBSUDSZrmk 4112
-VPDPBSUDSZrmkz 4113
-VPDPBSUDSZrr 4114
-VPDPBSUDSZrrk 4115
-VPDPBSUDSZrrkz 4116
-VPDPBSUDSrm 4117
-VPDPBSUDSrr 4118
-VPDPBSUDYrm 4119
-VPDPBSUDYrr 4120
-VPDPBSUDZ 4121
-VPDPBSUDZrm 4122
-VPDPBSUDZrmb 4123
-VPDPBSUDZrmbk 4124
-VPDPBSUDZrmbkz 4125
-VPDPBSUDZrmk 4126
-VPDPBSUDZrmkz 4127
-VPDPBSUDZrr 4128
-VPDPBSUDZrrk 4129
-VPDPBSUDZrrkz 4130
-VPDPBSUDrm 4131
-VPDPBSUDrr 4132
-VPDPBUSDSYrm 4133
-VPDPBUSDSYrr 4134
-VPDPBUSDSZ 4135
-VPDPBUSDSZrm 4136
-VPDPBUSDSZrmb 4137
-VPDPBUSDSZrmbk 4138
-VPDPBUSDSZrmbkz 4139
-VPDPBUSDSZrmk 4140
-VPDPBUSDSZrmkz 4141
-VPDPBUSDSZrr 4142
-VPDPBUSDSZrrk 4143
-VPDPBUSDSZrrkz 4144
-VPDPBUSDSrm 4145
-VPDPBUSDSrr 4146
-VPDPBUSDYrm 4147
-VPDPBUSDYrr 4148
-VPDPBUSDZ 4149
-VPDPBUSDZrm 4150
-VPDPBUSDZrmb 4151
-VPDPBUSDZrmbk 4152
-VPDPBUSDZrmbkz 4153
-VPDPBUSDZrmk 4154
-VPDPBUSDZrmkz 4155
-VPDPBUSDZrr 4156
-VPDPBUSDZrrk 4157
-VPDPBUSDZrrkz 4158
-VPDPBUSDrm 4159
-VPDPBUSDrr 4160
-VPDPBUUDSYrm 4161
-VPDPBUUDSYrr 4162
-VPDPBUUDSZ 4163
-VPDPBUUDSZrm 4164
-VPDPBUUDSZrmb 4165
-VPDPBUUDSZrmbk 4166
-VPDPBUUDSZrmbkz 4167
-VPDPBUUDSZrmk 4168
-VPDPBUUDSZrmkz 4169
-VPDPBUUDSZrr 4170
-VPDPBUUDSZrrk 4171
-VPDPBUUDSZrrkz 4172
-VPDPBUUDSrm 4173
-VPDPBUUDSrr 4174
-VPDPBUUDYrm 4175
-VPDPBUUDYrr 4176
-VPDPBUUDZ 4177
-VPDPBUUDZrm 4178
-VPDPBUUDZrmb 4179
-VPDPBUUDZrmbk 4180
-VPDPBUUDZrmbkz 4181
-VPDPBUUDZrmk 4182
-VPDPBUUDZrmkz 4183
-VPDPBUUDZrr 4184
-VPDPBUUDZrrk 4185
-VPDPBUUDZrrkz 4186
-VPDPBUUDrm 4187
-VPDPBUUDrr 4188
-VPDPWSSDSYrm 4189
-VPDPWSSDSYrr 4190
-VPDPWSSDSZ 4191
-VPDPWSSDSZrm 4192
-VPDPWSSDSZrmb 4193
-VPDPWSSDSZrmbk 4194
-VPDPWSSDSZrmbkz 4195
-VPDPWSSDSZrmk 4196
-VPDPWSSDSZrmkz 4197
-VPDPWSSDSZrr 4198
-VPDPWSSDSZrrk 4199
-VPDPWSSDSZrrkz 4200
-VPDPWSSDSrm 4201
-VPDPWSSDSrr 4202
-VPDPWSSDYrm 4203
-VPDPWSSDYrr 4204
-VPDPWSSDZ 4205
-VPDPWSSDZrm 4206
-VPDPWSSDZrmb 4207
-VPDPWSSDZrmbk 4208
-VPDPWSSDZrmbkz 4209
-VPDPWSSDZrmk 4210
-VPDPWSSDZrmkz 4211
-VPDPWSSDZrr 4212
-VPDPWSSDZrrk 4213
-VPDPWSSDZrrkz 4214
-VPDPWSSDrm 4215
-VPDPWSSDrr 4216
-VPDPWSUDSYrm 4217
-VPDPWSUDSYrr 4218
-VPDPWSUDSZ 4219
-VPDPWSUDSZrm 4220
-VPDPWSUDSZrmb 4221
-VPDPWSUDSZrmbk 4222
-VPDPWSUDSZrmbkz 4223
-VPDPWSUDSZrmk 4224
-VPDPWSUDSZrmkz 4225
-VPDPWSUDSZrr 4226
-VPDPWSUDSZrrk 4227
-VPDPWSUDSZrrkz 4228
-VPDPWSUDSrm 4229
-VPDPWSUDSrr 4230
-VPDPWSUDYrm 4231
-VPDPWSUDYrr 4232
-VPDPWSUDZ 4233
-VPDPWSUDZrm 4234
-VPDPWSUDZrmb 4235
-VPDPWSUDZrmbk 4236
-VPDPWSUDZrmbkz 4237
-VPDPWSUDZrmk 4238
-VPDPWSUDZrmkz 4239
-VPDPWSUDZrr 4240
-VPDPWSUDZrrk 4241
-VPDPWSUDZrrkz 4242
-VPDPWSUDrm 4243
-VPDPWSUDrr 4244
-VPDPWUSDSYrm 4245
-VPDPWUSDSYrr 4246
-VPDPWUSDSZ 4247
-VPDPWUSDSZrm 4248
-VPDPWUSDSZrmb 4249
-VPDPWUSDSZrmbk 4250
-VPDPWUSDSZrmbkz 4251
-VPDPWUSDSZrmk 4252
-VPDPWUSDSZrmkz 4253
-VPDPWUSDSZrr 4254
-VPDPWUSDSZrrk 4255
-VPDPWUSDSZrrkz 4256
-VPDPWUSDSrm 4257
-VPDPWUSDSrr 4258
-VPDPWUSDYrm 4259
-VPDPWUSDYrr 4260
-VPDPWUSDZ 4261
-VPDPWUSDZrm 4262
-VPDPWUSDZrmb 4263
-VPDPWUSDZrmbk 4264
-VPDPWUSDZrmbkz 4265
-VPDPWUSDZrmk 4266
-VPDPWUSDZrmkz 4267
-VPDPWUSDZrr 4268
-VPDPWUSDZrrk 4269
-VPDPWUSDZrrkz 4270
-VPDPWUSDrm 4271
-VPDPWUSDrr 4272
-VPDPWUUDSYrm 4273
-VPDPWUUDSYrr 4274
-VPDPWUUDSZ 4275
-VPDPWUUDSZrm 4276
-VPDPWUUDSZrmb 4277
-VPDPWUUDSZrmbk 4278
-VPDPWUUDSZrmbkz 4279
-VPDPWUUDSZrmk 4280
-VPDPWUUDSZrmkz 4281
-VPDPWUUDSZrr 4282
-VPDPWUUDSZrrk 4283
-VPDPWUUDSZrrkz 4284
-VPDPWUUDSrm 4285
-VPDPWUUDSrr 4286
-VPDPWUUDYrm 4287
-VPDPWUUDYrr 4288
-VPDPWUUDZ 4289
-VPDPWUUDZrm 4290
-VPDPWUUDZrmb 4291
-VPDPWUUDZrmbk 4292
-VPDPWUUDZrmbkz 4293
-VPDPWUUDZrmk 4294
-VPDPWUUDZrmkz 4295
-VPDPWUUDZrr 4296
-VPDPWUUDZrrk 4297
-VPDPWUUDZrrkz 4298
-VPDPWUUDrm 4299
-VPDPWUUDrr 4300
-VPERM 4301
-VPERMBZ 4302
-VPERMBZrm 4303
-VPERMBZrmk 4304
-VPERMBZrmkz 4305
-VPERMBZrr 4306
-VPERMBZrrk 4307
-VPERMBZrrkz 4308
-VPERMDYrm 4309
-VPERMDYrr 4310
-VPERMDZ 4311
-VPERMDZrm 4312
-VPERMDZrmb 4313
-VPERMDZrmbk 4314
-VPERMDZrmbkz 4315
-VPERMDZrmk 4316
-VPERMDZrmkz 4317
-VPERMDZrr 4318
-VPERMDZrrk 4319
-VPERMDZrrkz 4320
-VPERMI 4321
-VPERMIL 4322
-VPERMILPDYmi 4323
-VPERMILPDYri 4324
-VPERMILPDYrm 4325
-VPERMILPDYrr 4326
-VPERMILPDZ 4327
-VPERMILPDZmbi 4328
-VPERMILPDZmbik 4329
-VPERMILPDZmbikz 4330
-VPERMILPDZmi 4331
-VPERMILPDZmik 4332
-VPERMILPDZmikz 4333
-VPERMILPDZri 4334
-VPERMILPDZrik 4335
-VPERMILPDZrikz 4336
-VPERMILPDZrm 4337
-VPERMILPDZrmb 4338
-VPERMILPDZrmbk 4339
-VPERMILPDZrmbkz 4340
-VPERMILPDZrmk 4341
-VPERMILPDZrmkz 4342
-VPERMILPDZrr 4343
-VPERMILPDZrrk 4344
-VPERMILPDZrrkz 4345
-VPERMILPDmi 4346
-VPERMILPDri 4347
-VPERMILPDrm 4348
-VPERMILPDrr 4349
-VPERMILPSYmi 4350
-VPERMILPSYri 4351
-VPERMILPSYrm 4352
-VPERMILPSYrr 4353
-VPERMILPSZ 4354
-VPERMILPSZmbi 4355
-VPERMILPSZmbik 4356
-VPERMILPSZmbikz 4357
-VPERMILPSZmi 4358
-VPERMILPSZmik 4359
-VPERMILPSZmikz 4360
-VPERMILPSZri 4361
-VPERMILPSZrik 4362
-VPERMILPSZrikz 4363
-VPERMILPSZrm 4364
-VPERMILPSZrmb 4365
-VPERMILPSZrmbk 4366
-VPERMILPSZrmbkz 4367
-VPERMILPSZrmk 4368
-VPERMILPSZrmkz 4369
-VPERMILPSZrr 4370
-VPERMILPSZrrk 4371
-VPERMILPSZrrkz 4372
-VPERMILPSmi 4373
-VPERMILPSri 4374
-VPERMILPSrm 4375
-VPERMILPSrr 4376
-VPERMPDYmi 4377
-VPERMPDYri 4378
-VPERMPDZ 4379
-VPERMPDZmbi 4380
-VPERMPDZmbik 4381
-VPERMPDZmbikz 4382
-VPERMPDZmi 4383
-VPERMPDZmik 4384
-VPERMPDZmikz 4385
-VPERMPDZri 4386
-VPERMPDZrik 4387
-VPERMPDZrikz 4388
-VPERMPDZrm 4389
-VPERMPDZrmb 4390
-VPERMPDZrmbk 4391
-VPERMPDZrmbkz 4392
-VPERMPDZrmk 4393
-VPERMPDZrmkz 4394
-VPERMPDZrr 4395
-VPERMPDZrrk 4396
-VPERMPDZrrkz 4397
-VPERMPSYrm 4398
-VPERMPSYrr 4399
-VPERMPSZ 4400
-VPERMPSZrm 4401
-VPERMPSZrmb 4402
-VPERMPSZrmbk 4403
-VPERMPSZrmbkz 4404
-VPERMPSZrmk 4405
-VPERMPSZrmkz 4406
-VPERMPSZrr 4407
-VPERMPSZrrk 4408
-VPERMPSZrrkz 4409
-VPERMQYmi 4410
-VPERMQYri 4411
-VPERMQZ 4412
-VPERMQZmbi 4413
-VPERMQZmbik 4414
-VPERMQZmbikz 4415
-VPERMQZmi 4416
-VPERMQZmik 4417
-VPERMQZmikz 4418
-VPERMQZri 4419
-VPERMQZrik 4420
-VPERMQZrikz 4421
-VPERMQZrm 4422
-VPERMQZrmb 4423
-VPERMQZrmbk 4424
-VPERMQZrmbkz 4425
-VPERMQZrmk 4426
-VPERMQZrmkz 4427
-VPERMQZrr 4428
-VPERMQZrrk 4429
-VPERMQZrrkz 4430
-VPERMT 4431
-VPERMWZ 4432
-VPERMWZrm 4433
-VPERMWZrmk 4434
-VPERMWZrmkz 4435
-VPERMWZrr 4436
-VPERMWZrrk 4437
-VPERMWZrrkz 4438
-VPEXPANDBZ 4439
-VPEXPANDBZrm 4440
-VPEXPANDBZrmk 4441
-VPEXPANDBZrmkz 4442
-VPEXPANDBZrr 4443
-VPEXPANDBZrrk 4444
-VPEXPANDBZrrkz 4445
-VPEXPANDDZ 4446
-VPEXPANDDZrm 4447
-VPEXPANDDZrmk 4448
-VPEXPANDDZrmkz 4449
-VPEXPANDDZrr 4450
-VPEXPANDDZrrk 4451
-VPEXPANDDZrrkz 4452
-VPEXPANDQZ 4453
-VPEXPANDQZrm 4454
-VPEXPANDQZrmk 4455
-VPEXPANDQZrmkz 4456
-VPEXPANDQZrr 4457
-VPEXPANDQZrrk 4458
-VPEXPANDQZrrkz 4459
-VPEXPANDWZ 4460
-VPEXPANDWZrm 4461
-VPEXPANDWZrmk 4462
-VPEXPANDWZrmkz 4463
-VPEXPANDWZrr 4464
-VPEXPANDWZrrk 4465
-VPEXPANDWZrrkz 4466
-VPEXTRBZmri 4467
-VPEXTRBZrri 4468
-VPEXTRBmri 4469
-VPEXTRBrri 4470
-VPEXTRDZmri 4471
-VPEXTRDZrri 4472
-VPEXTRDmri 4473
-VPEXTRDrri 4474
-VPEXTRQZmri 4475
-VPEXTRQZrri 4476
-VPEXTRQmri 4477
-VPEXTRQrri 4478
-VPEXTRWZmri 4479
-VPEXTRWZrri 4480
-VPEXTRWZrri_REV 4481
-VPEXTRWmri 4482
-VPEXTRWrri 4483
-VPEXTRWrri_REV 4484
-VPGATHERDDYrm 4485
-VPGATHERDDZ 4486
-VPGATHERDDZrm 4487
-VPGATHERDDrm 4488
-VPGATHERDQYrm 4489
-VPGATHERDQZ 4490
-VPGATHERDQZrm 4491
-VPGATHERDQrm 4492
-VPGATHERQDYrm 4493
-VPGATHERQDZ 4494
-VPGATHERQDZrm 4495
-VPGATHERQDrm 4496
-VPGATHERQQYrm 4497
-VPGATHERQQZ 4498
-VPGATHERQQZrm 4499
-VPGATHERQQrm 4500
-VPHADDBDrm 4501
-VPHADDBDrr 4502
-VPHADDBQrm 4503
-VPHADDBQrr 4504
-VPHADDBWrm 4505
-VPHADDBWrr 4506
-VPHADDDQrm 4507
-VPHADDDQrr 4508
-VPHADDDYrm 4509
-VPHADDDYrr 4510
-VPHADDDrm 4511
-VPHADDDrr 4512
-VPHADDSWYrm 4513
-VPHADDSWYrr 4514
-VPHADDSWrm 4515
-VPHADDSWrr 4516
-VPHADDUBDrm 4517
-VPHADDUBDrr 4518
-VPHADDUBQrm 4519
-VPHADDUBQrr 4520
-VPHADDUBWrm 4521
-VPHADDUBWrr 4522
-VPHADDUDQrm 4523
-VPHADDUDQrr 4524
-VPHADDUWDrm 4525
-VPHADDUWDrr 4526
-VPHADDUWQrm 4527
-VPHADDUWQrr 4528
-VPHADDWDrm 4529
-VPHADDWDrr 4530
-VPHADDWQrm 4531
-VPHADDWQrr 4532
-VPHADDWYrm 4533
-VPHADDWYrr 4534
-VPHADDWrm 4535
-VPHADDWrr 4536
-VPHMINPOSUWrm 4537
-VPHMINPOSUWrr 4538
-VPHSUBBWrm 4539
-VPHSUBBWrr 4540
-VPHSUBDQrm 4541
-VPHSUBDQrr 4542
-VPHSUBDYrm 4543
-VPHSUBDYrr 4544
-VPHSUBDrm 4545
-VPHSUBDrr 4546
-VPHSUBSWYrm 4547
-VPHSUBSWYrr 4548
-VPHSUBSWrm 4549
-VPHSUBSWrr 4550
-VPHSUBWDrm 4551
-VPHSUBWDrr 4552
-VPHSUBWYrm 4553
-VPHSUBWYrr 4554
-VPHSUBWrm 4555
-VPHSUBWrr 4556
-VPINSRBZrmi 4557
-VPINSRBZrri 4558
-VPINSRBrmi 4559
-VPINSRBrri 4560
-VPINSRDZrmi 4561
-VPINSRDZrri 4562
-VPINSRDrmi 4563
-VPINSRDrri 4564
-VPINSRQZrmi 4565
-VPINSRQZrri 4566
-VPINSRQrmi 4567
-VPINSRQrri 4568
-VPINSRWZrmi 4569
-VPINSRWZrri 4570
-VPINSRWrmi 4571
-VPINSRWrri 4572
-VPLZCNTDZ 4573
-VPLZCNTDZrm 4574
-VPLZCNTDZrmb 4575
-VPLZCNTDZrmbk 4576
-VPLZCNTDZrmbkz 4577
-VPLZCNTDZrmk 4578
-VPLZCNTDZrmkz 4579
-VPLZCNTDZrr 4580
-VPLZCNTDZrrk 4581
-VPLZCNTDZrrkz 4582
-VPLZCNTQZ 4583
-VPLZCNTQZrm 4584
-VPLZCNTQZrmb 4585
-VPLZCNTQZrmbk 4586
-VPLZCNTQZrmbkz 4587
-VPLZCNTQZrmk 4588
-VPLZCNTQZrmkz 4589
-VPLZCNTQZrr 4590
-VPLZCNTQZrrk 4591
-VPLZCNTQZrrkz 4592
-VPMACSDDrm 4593
-VPMACSDDrr 4594
-VPMACSDQHrm 4595
-VPMACSDQHrr 4596
-VPMACSDQLrm 4597
-VPMACSDQLrr 4598
-VPMACSSDDrm 4599
-VPMACSSDDrr 4600
-VPMACSSDQHrm 4601
-VPMACSSDQHrr 4602
-VPMACSSDQLrm 4603
-VPMACSSDQLrr 4604
-VPMACSSWDrm 4605
-VPMACSSWDrr 4606
-VPMACSSWWrm 4607
-VPMACSSWWrr 4608
-VPMACSWDrm 4609
-VPMACSWDrr 4610
-VPMACSWWrm 4611
-VPMACSWWrr 4612
-VPMADCSSWDrm 4613
-VPMADCSSWDrr 4614
-VPMADCSWDrm 4615
-VPMADCSWDrr 4616
-VPMADD 4617
-VPMADDUBSWYrm 4618
-VPMADDUBSWYrr 4619
-VPMADDUBSWZ 4620
-VPMADDUBSWZrm 4621
-VPMADDUBSWZrmk 4622
-VPMADDUBSWZrmkz 4623
-VPMADDUBSWZrr 4624
-VPMADDUBSWZrrk 4625
-VPMADDUBSWZrrkz 4626
-VPMADDUBSWrm 4627
-VPMADDUBSWrr 4628
-VPMADDWDYrm 4629
-VPMADDWDYrr 4630
-VPMADDWDZ 4631
-VPMADDWDZrm 4632
-VPMADDWDZrmk 4633
-VPMADDWDZrmkz 4634
-VPMADDWDZrr 4635
-VPMADDWDZrrk 4636
-VPMADDWDZrrkz 4637
-VPMADDWDrm 4638
-VPMADDWDrr 4639
-VPMASKMOVDYmr 4640
-VPMASKMOVDYrm 4641
-VPMASKMOVDmr 4642
-VPMASKMOVDrm 4643
-VPMASKMOVQYmr 4644
-VPMASKMOVQYrm 4645
-VPMASKMOVQmr 4646
-VPMASKMOVQrm 4647
-VPMAXSBYrm 4648
-VPMAXSBYrr 4649
-VPMAXSBZ 4650
-VPMAXSBZrm 4651
-VPMAXSBZrmk 4652
-VPMAXSBZrmkz 4653
-VPMAXSBZrr 4654
-VPMAXSBZrrk 4655
-VPMAXSBZrrkz 4656
-VPMAXSBrm 4657
-VPMAXSBrr 4658
-VPMAXSDYrm 4659
-VPMAXSDYrr 4660
-VPMAXSDZ 4661
-VPMAXSDZrm 4662
-VPMAXSDZrmb 4663
-VPMAXSDZrmbk 4664
-VPMAXSDZrmbkz 4665
-VPMAXSDZrmk 4666
-VPMAXSDZrmkz 4667
-VPMAXSDZrr 4668
-VPMAXSDZrrk 4669
-VPMAXSDZrrkz 4670
-VPMAXSDrm 4671
-VPMAXSDrr 4672
-VPMAXSQZ 4673
-VPMAXSQZrm 4674
-VPMAXSQZrmb 4675
-VPMAXSQZrmbk 4676
-VPMAXSQZrmbkz 4677
-VPMAXSQZrmk 4678
-VPMAXSQZrmkz 4679
-VPMAXSQZrr 4680
-VPMAXSQZrrk 4681
-VPMAXSQZrrkz 4682
-VPMAXSWYrm 4683
-VPMAXSWYrr 4684
-VPMAXSWZ 4685
-VPMAXSWZrm 4686
-VPMAXSWZrmk 4687
-VPMAXSWZrmkz 4688
-VPMAXSWZrr 4689
-VPMAXSWZrrk 4690
-VPMAXSWZrrkz 4691
-VPMAXSWrm 4692
-VPMAXSWrr 4693
-VPMAXUBYrm 4694
-VPMAXUBYrr 4695
-VPMAXUBZ 4696
-VPMAXUBZrm 4697
-VPMAXUBZrmk 4698
-VPMAXUBZrmkz 4699
-VPMAXUBZrr 4700
-VPMAXUBZrrk 4701
-VPMAXUBZrrkz 4702
-VPMAXUBrm 4703
-VPMAXUBrr 4704
-VPMAXUDYrm 4705
-VPMAXUDYrr 4706
-VPMAXUDZ 4707
-VPMAXUDZrm 4708
-VPMAXUDZrmb 4709
-VPMAXUDZrmbk 4710
-VPMAXUDZrmbkz 4711
-VPMAXUDZrmk 4712
-VPMAXUDZrmkz 4713
-VPMAXUDZrr 4714
-VPMAXUDZrrk 4715
-VPMAXUDZrrkz 4716
-VPMAXUDrm 4717
-VPMAXUDrr 4718
-VPMAXUQZ 4719
-VPMAXUQZrm 4720
-VPMAXUQZrmb 4721
-VPMAXUQZrmbk 4722
-VPMAXUQZrmbkz 4723
-VPMAXUQZrmk 4724
-VPMAXUQZrmkz 4725
-VPMAXUQZrr 4726
-VPMAXUQZrrk 4727
-VPMAXUQZrrkz 4728
-VPMAXUWYrm 4729
-VPMAXUWYrr 4730
-VPMAXUWZ 4731
-VPMAXUWZrm 4732
-VPMAXUWZrmk 4733
-VPMAXUWZrmkz 4734
-VPMAXUWZrr 4735
-VPMAXUWZrrk 4736
-VPMAXUWZrrkz 4737
-VPMAXUWrm 4738
-VPMAXUWrr 4739
-VPMINSBYrm 4740
-VPMINSBYrr 4741
-VPMINSBZ 4742
-VPMINSBZrm 4743
-VPMINSBZrmk 4744
-VPMINSBZrmkz 4745
-VPMINSBZrr 4746
-VPMINSBZrrk 4747
-VPMINSBZrrkz 4748
-VPMINSBrm 4749
-VPMINSBrr 4750
-VPMINSDYrm 4751
-VPMINSDYrr 4752
-VPMINSDZ 4753
-VPMINSDZrm 4754
-VPMINSDZrmb 4755
-VPMINSDZrmbk 4756
-VPMINSDZrmbkz 4757
-VPMINSDZrmk 4758
-VPMINSDZrmkz 4759
-VPMINSDZrr 4760
-VPMINSDZrrk 4761
-VPMINSDZrrkz 4762
-VPMINSDrm 4763
-VPMINSDrr 4764
-VPMINSQZ 4765
-VPMINSQZrm 4766
-VPMINSQZrmb 4767
-VPMINSQZrmbk 4768
-VPMINSQZrmbkz 4769
-VPMINSQZrmk 4770
-VPMINSQZrmkz 4771
-VPMINSQZrr 4772
-VPMINSQZrrk 4773
-VPMINSQZrrkz 4774
-VPMINSWYrm 4775
-VPMINSWYrr 4776
-VPMINSWZ 4777
-VPMINSWZrm 4778
-VPMINSWZrmk 4779
-VPMINSWZrmkz 4780
-VPMINSWZrr 4781
-VPMINSWZrrk 4782
-VPMINSWZrrkz 4783
-VPMINSWrm 4784
-VPMINSWrr 4785
-VPMINUBYrm 4786
-VPMINUBYrr 4787
-VPMINUBZ 4788
-VPMINUBZrm 4789
-VPMINUBZrmk 4790
-VPMINUBZrmkz 4791
-VPMINUBZrr 4792
-VPMINUBZrrk 4793
-VPMINUBZrrkz 4794
-VPMINUBrm 4795
-VPMINUBrr 4796
-VPMINUDYrm 4797
-VPMINUDYrr 4798
-VPMINUDZ 4799
-VPMINUDZrm 4800
-VPMINUDZrmb 4801
-VPMINUDZrmbk 4802
-VPMINUDZrmbkz 4803
-VPMINUDZrmk 4804
-VPMINUDZrmkz 4805
-VPMINUDZrr 4806
-VPMINUDZrrk 4807
-VPMINUDZrrkz 4808
-VPMINUDrm 4809
-VPMINUDrr 4810
-VPMINUQZ 4811
-VPMINUQZrm 4812
-VPMINUQZrmb 4813
-VPMINUQZrmbk 4814
-VPMINUQZrmbkz 4815
-VPMINUQZrmk 4816
-VPMINUQZrmkz 4817
-VPMINUQZrr 4818
-VPMINUQZrrk 4819
-VPMINUQZrrkz 4820
-VPMINUWYrm 4821
-VPMINUWYrr 4822
-VPMINUWZ 4823
-VPMINUWZrm 4824
-VPMINUWZrmk 4825
-VPMINUWZrmkz 4826
-VPMINUWZrr 4827
-VPMINUWZrrk 4828
-VPMINUWZrrkz 4829
-VPMINUWrm 4830
-VPMINUWrr 4831
-VPMOVB 4832
-VPMOVD 4833
-VPMOVDBZ 4834
-VPMOVDBZmr 4835
-VPMOVDBZmrk 4836
-VPMOVDBZrr 4837
-VPMOVDBZrrk 4838
-VPMOVDBZrrkz 4839
-VPMOVDWZ 4840
-VPMOVDWZmr 4841
-VPMOVDWZmrk 4842
-VPMOVDWZrr 4843
-VPMOVDWZrrk 4844
-VPMOVDWZrrkz 4845
-VPMOVM 4846
-VPMOVMSKBYrr 4847
-VPMOVMSKBrr 4848
-VPMOVQ 4849
-VPMOVQBZ 4850
-VPMOVQBZmr 4851
-VPMOVQBZmrk 4852
-VPMOVQBZrr 4853
-VPMOVQBZrrk 4854
-VPMOVQBZrrkz 4855
-VPMOVQDZ 4856
-VPMOVQDZmr 4857
-VPMOVQDZmrk 4858
-VPMOVQDZrr 4859
-VPMOVQDZrrk 4860
-VPMOVQDZrrkz 4861
-VPMOVQWZ 4862
-VPMOVQWZmr 4863
-VPMOVQWZmrk 4864
-VPMOVQWZrr 4865
-VPMOVQWZrrk 4866
-VPMOVQWZrrkz 4867
-VPMOVSDBZ 4868
-VPMOVSDBZmr 4869
-VPMOVSDBZmrk 4870
-VPMOVSDBZrr 4871
-VPMOVSDBZrrk 4872
-VPMOVSDBZrrkz 4873
-VPMOVSDWZ 4874
-VPMOVSDWZmr 4875
-VPMOVSDWZmrk 4876
-VPMOVSDWZrr 4877
-VPMOVSDWZrrk 4878
-VPMOVSDWZrrkz 4879
-VPMOVSQBZ 4880
-VPMOVSQBZmr 4881
-VPMOVSQBZmrk 4882
-VPMOVSQBZrr 4883
-VPMOVSQBZrrk 4884
-VPMOVSQBZrrkz 4885
-VPMOVSQDZ 4886
-VPMOVSQDZmr 4887
-VPMOVSQDZmrk 4888
-VPMOVSQDZrr 4889
-VPMOVSQDZrrk 4890
-VPMOVSQDZrrkz 4891
-VPMOVSQWZ 4892
-VPMOVSQWZmr 4893
-VPMOVSQWZmrk 4894
-VPMOVSQWZrr 4895
-VPMOVSQWZrrk 4896
-VPMOVSQWZrrkz 4897
-VPMOVSWBZ 4898
-VPMOVSWBZmr 4899
-VPMOVSWBZmrk 4900
-VPMOVSWBZrr 4901
-VPMOVSWBZrrk 4902
-VPMOVSWBZrrkz 4903
-VPMOVSXBDYrm 4904
-VPMOVSXBDYrr 4905
-VPMOVSXBDZ 4906
-VPMOVSXBDZrm 4907
-VPMOVSXBDZrmk 4908
-VPMOVSXBDZrmkz 4909
-VPMOVSXBDZrr 4910
-VPMOVSXBDZrrk 4911
-VPMOVSXBDZrrkz 4912
-VPMOVSXBDrm 4913
-VPMOVSXBDrr 4914
-VPMOVSXBQYrm 4915
-VPMOVSXBQYrr 4916
-VPMOVSXBQZ 4917
-VPMOVSXBQZrm 4918
-VPMOVSXBQZrmk 4919
-VPMOVSXBQZrmkz 4920
-VPMOVSXBQZrr 4921
-VPMOVSXBQZrrk 4922
-VPMOVSXBQZrrkz 4923
-VPMOVSXBQrm 4924
-VPMOVSXBQrr 4925
-VPMOVSXBWYrm 4926
-VPMOVSXBWYrr 4927
-VPMOVSXBWZ 4928
-VPMOVSXBWZrm 4929
-VPMOVSXBWZrmk 4930
-VPMOVSXBWZrmkz 4931
-VPMOVSXBWZrr 4932
-VPMOVSXBWZrrk 4933
-VPMOVSXBWZrrkz 4934
-VPMOVSXBWrm 4935
-VPMOVSXBWrr 4936
-VPMOVSXDQYrm 4937
-VPMOVSXDQYrr 4938
-VPMOVSXDQZ 4939
-VPMOVSXDQZrm 4940
-VPMOVSXDQZrmk 4941
-VPMOVSXDQZrmkz 4942
-VPMOVSXDQZrr 4943
-VPMOVSXDQZrrk 4944
-VPMOVSXDQZrrkz 4945
-VPMOVSXDQrm 4946
-VPMOVSXDQrr 4947
-VPMOVSXWDYrm 4948
-VPMOVSXWDYrr 4949
-VPMOVSXWDZ 4950
-VPMOVSXWDZrm 4951
-VPMOVSXWDZrmk 4952
-VPMOVSXWDZrmkz 4953
-VPMOVSXWDZrr 4954
-VPMOVSXWDZrrk 4955
-VPMOVSXWDZrrkz 4956
-VPMOVSXWDrm 4957
-VPMOVSXWDrr 4958
-VPMOVSXWQYrm 4959
-VPMOVSXWQYrr 4960
-VPMOVSXWQZ 4961
-VPMOVSXWQZrm 4962
-VPMOVSXWQZrmk 4963
-VPMOVSXWQZrmkz 4964
-VPMOVSXWQZrr 4965
-VPMOVSXWQZrrk 4966
-VPMOVSXWQZrrkz 4967
-VPMOVSXWQrm 4968
-VPMOVSXWQrr 4969
-VPMOVUSDBZ 4970
-VPMOVUSDBZmr 4971
-VPMOVUSDBZmrk 4972
-VPMOVUSDBZrr 4973
-VPMOVUSDBZrrk 4974
-VPMOVUSDBZrrkz 4975
-VPMOVUSDWZ 4976
-VPMOVUSDWZmr 4977
-VPMOVUSDWZmrk 4978
-VPMOVUSDWZrr 4979
-VPMOVUSDWZrrk 4980
-VPMOVUSDWZrrkz 4981
-VPMOVUSQBZ 4982
-VPMOVUSQBZmr 4983
-VPMOVUSQBZmrk 4984
-VPMOVUSQBZrr 4985
-VPMOVUSQBZrrk 4986
-VPMOVUSQBZrrkz 4987
-VPMOVUSQDZ 4988
-VPMOVUSQDZmr 4989
-VPMOVUSQDZmrk 4990
-VPMOVUSQDZrr 4991
-VPMOVUSQDZrrk 4992
-VPMOVUSQDZrrkz 4993
-VPMOVUSQWZ 4994
-VPMOVUSQWZmr 4995
-VPMOVUSQWZmrk 4996
-VPMOVUSQWZrr 4997
-VPMOVUSQWZrrk 4998
-VPMOVUSQWZrrkz 4999
-VPMOVUSWBZ 5000
-VPMOVUSWBZmr 5001
-VPMOVUSWBZmrk 5002
-VPMOVUSWBZrr 5003
-VPMOVUSWBZrrk 5004
-VPMOVUSWBZrrkz 5005
-VPMOVW 5006
-VPMOVWBZ 5007
-VPMOVWBZmr 5008
-VPMOVWBZmrk 5009
-VPMOVWBZrr 5010
-VPMOVWBZrrk 5011
-VPMOVWBZrrkz 5012
-VPMOVZXBDYrm 5013
-VPMOVZXBDYrr 5014
-VPMOVZXBDZ 5015
-VPMOVZXBDZrm 5016
-VPMOVZXBDZrmk 5017
-VPMOVZXBDZrmkz 5018
-VPMOVZXBDZrr 5019
-VPMOVZXBDZrrk 5020
-VPMOVZXBDZrrkz 5021
-VPMOVZXBDrm 5022
-VPMOVZXBDrr 5023
-VPMOVZXBQYrm 5024
-VPMOVZXBQYrr 5025
-VPMOVZXBQZ 5026
-VPMOVZXBQZrm 5027
-VPMOVZXBQZrmk 5028
-VPMOVZXBQZrmkz 5029
-VPMOVZXBQZrr 5030
-VPMOVZXBQZrrk 5031
-VPMOVZXBQZrrkz 5032
-VPMOVZXBQrm 5033
-VPMOVZXBQrr 5034
-VPMOVZXBWYrm 5035
-VPMOVZXBWYrr 5036
-VPMOVZXBWZ 5037
-VPMOVZXBWZrm 5038
-VPMOVZXBWZrmk 5039
-VPMOVZXBWZrmkz 5040
-VPMOVZXBWZrr 5041
-VPMOVZXBWZrrk 5042
-VPMOVZXBWZrrkz 5043
-VPMOVZXBWrm 5044
-VPMOVZXBWrr 5045
-VPMOVZXDQYrm 5046
-VPMOVZXDQYrr 5047
-VPMOVZXDQZ 5048
-VPMOVZXDQZrm 5049
-VPMOVZXDQZrmk 5050
-VPMOVZXDQZrmkz 5051
-VPMOVZXDQZrr 5052
-VPMOVZXDQZrrk 5053
-VPMOVZXDQZrrkz 5054
-VPMOVZXDQrm 5055
-VPMOVZXDQrr 5056
-VPMOVZXWDYrm 5057
-VPMOVZXWDYrr 5058
-VPMOVZXWDZ 5059
-VPMOVZXWDZrm 5060
-VPMOVZXWDZrmk 5061
-VPMOVZXWDZrmkz 5062
-VPMOVZXWDZrr 5063
-VPMOVZXWDZrrk 5064
-VPMOVZXWDZrrkz 5065
-VPMOVZXWDrm 5066
-VPMOVZXWDrr 5067
-VPMOVZXWQYrm 5068
-VPMOVZXWQYrr 5069
-VPMOVZXWQZ 5070
-VPMOVZXWQZrm 5071
-VPMOVZXWQZrmk 5072
-VPMOVZXWQZrmkz 5073
-VPMOVZXWQZrr 5074
-VPMOVZXWQZrrk 5075
-VPMOVZXWQZrrkz 5076
-VPMOVZXWQrm 5077
-VPMOVZXWQrr 5078
-VPMULDQYrm 5079
-VPMULDQYrr 5080
-VPMULDQZ 5081
-VPMULDQZrm 5082
-VPMULDQZrmb 5083
-VPMULDQZrmbk 5084
-VPMULDQZrmbkz 5085
-VPMULDQZrmk 5086
-VPMULDQZrmkz 5087
-VPMULDQZrr 5088
-VPMULDQZrrk 5089
-VPMULDQZrrkz 5090
-VPMULDQrm 5091
-VPMULDQrr 5092
-VPMULHRSWYrm 5093
-VPMULHRSWYrr 5094
-VPMULHRSWZ 5095
-VPMULHRSWZrm 5096
-VPMULHRSWZrmk 5097
-VPMULHRSWZrmkz 5098
-VPMULHRSWZrr 5099
-VPMULHRSWZrrk 5100
-VPMULHRSWZrrkz 5101
-VPMULHRSWrm 5102
-VPMULHRSWrr 5103
-VPMULHUWYrm 5104
-VPMULHUWYrr 5105
-VPMULHUWZ 5106
-VPMULHUWZrm 5107
-VPMULHUWZrmk 5108
-VPMULHUWZrmkz 5109
-VPMULHUWZrr 5110
-VPMULHUWZrrk 5111
-VPMULHUWZrrkz 5112
-VPMULHUWrm 5113
-VPMULHUWrr 5114
-VPMULHWYrm 5115
-VPMULHWYrr 5116
-VPMULHWZ 5117
-VPMULHWZrm 5118
-VPMULHWZrmk 5119
-VPMULHWZrmkz 5120
-VPMULHWZrr 5121
-VPMULHWZrrk 5122
-VPMULHWZrrkz 5123
-VPMULHWrm 5124
-VPMULHWrr 5125
-VPMULLDYrm 5126
-VPMULLDYrr 5127
-VPMULLDZ 5128
-VPMULLDZrm 5129
-VPMULLDZrmb 5130
-VPMULLDZrmbk 5131
-VPMULLDZrmbkz 5132
-VPMULLDZrmk 5133
-VPMULLDZrmkz 5134
-VPMULLDZrr 5135
-VPMULLDZrrk 5136
-VPMULLDZrrkz 5137
-VPMULLDrm 5138
-VPMULLDrr 5139
-VPMULLQZ 5140
-VPMULLQZrm 5141
-VPMULLQZrmb 5142
-VPMULLQZrmbk 5143
-VPMULLQZrmbkz 5144
-VPMULLQZrmk 5145
-VPMULLQZrmkz 5146
-VPMULLQZrr 5147
-VPMULLQZrrk 5148
-VPMULLQZrrkz 5149
-VPMULLWYrm 5150
-VPMULLWYrr 5151
-VPMULLWZ 5152
-VPMULLWZrm 5153
-VPMULLWZrmk 5154
-VPMULLWZrmkz 5155
-VPMULLWZrr 5156
-VPMULLWZrrk 5157
-VPMULLWZrrkz 5158
-VPMULLWrm 5159
-VPMULLWrr 5160
-VPMULTISHIFTQBZ 5161
-VPMULTISHIFTQBZrm 5162
-VPMULTISHIFTQBZrmb 5163
-VPMULTISHIFTQBZrmbk 5164
-VPMULTISHIFTQBZrmbkz 5165
-VPMULTISHIFTQBZrmk 5166
-VPMULTISHIFTQBZrmkz 5167
-VPMULTISHIFTQBZrr 5168
-VPMULTISHIFTQBZrrk 5169
-VPMULTISHIFTQBZrrkz 5170
-VPMULUDQYrm 5171
-VPMULUDQYrr 5172
-VPMULUDQZ 5173
-VPMULUDQZrm 5174
-VPMULUDQZrmb 5175
-VPMULUDQZrmbk 5176
-VPMULUDQZrmbkz 5177
-VPMULUDQZrmk 5178
-VPMULUDQZrmkz 5179
-VPMULUDQZrr 5180
-VPMULUDQZrrk 5181
-VPMULUDQZrrkz 5182
-VPMULUDQrm 5183
-VPMULUDQrr 5184
-VPOPCNTBZ 5185
-VPOPCNTBZrm 5186
-VPOPCNTBZrmk 5187
-VPOPCNTBZrmkz 5188
-VPOPCNTBZrr 5189
-VPOPCNTBZrrk 5190
-VPOPCNTBZrrkz 5191
-VPOPCNTDZ 5192
-VPOPCNTDZrm 5193
-VPOPCNTDZrmb 5194
-VPOPCNTDZrmbk 5195
-VPOPCNTDZrmbkz 5196
-VPOPCNTDZrmk 5197
-VPOPCNTDZrmkz 5198
-VPOPCNTDZrr 5199
-VPOPCNTDZrrk 5200
-VPOPCNTDZrrkz 5201
-VPOPCNTQZ 5202
-VPOPCNTQZrm 5203
-VPOPCNTQZrmb 5204
-VPOPCNTQZrmbk 5205
-VPOPCNTQZrmbkz 5206
-VPOPCNTQZrmk 5207
-VPOPCNTQZrmkz 5208
-VPOPCNTQZrr 5209
-VPOPCNTQZrrk 5210
-VPOPCNTQZrrkz 5211
-VPOPCNTWZ 5212
-VPOPCNTWZrm 5213
-VPOPCNTWZrmk 5214
-VPOPCNTWZrmkz 5215
-VPOPCNTWZrr 5216
-VPOPCNTWZrrk 5217
-VPOPCNTWZrrkz 5218
-VPORDZ 5219
-VPORDZrm 5220
-VPORDZrmb 5221
-VPORDZrmbk 5222
-VPORDZrmbkz 5223
-VPORDZrmk 5224
-VPORDZrmkz 5225
-VPORDZrr 5226
-VPORDZrrk 5227
-VPORDZrrkz 5228
-VPORQZ 5229
-VPORQZrm 5230
-VPORQZrmb 5231
-VPORQZrmbk 5232
-VPORQZrmbkz 5233
-VPORQZrmk 5234
-VPORQZrmkz 5235
-VPORQZrr 5236
-VPORQZrrk 5237
-VPORQZrrkz 5238
-VPORYrm 5239
-VPORYrr 5240
-VPORrm 5241
-VPORrr 5242
-VPPERMrmr 5243
-VPPERMrrm 5244
-VPPERMrrr 5245
-VPPERMrrr_REV 5246
-VPROLDZ 5247
-VPROLDZmbi 5248
-VPROLDZmbik 5249
-VPROLDZmbikz 5250
-VPROLDZmi 5251
-VPROLDZmik 5252
-VPROLDZmikz 5253
-VPROLDZri 5254
-VPROLDZrik 5255
-VPROLDZrikz 5256
-VPROLQZ 5257
-VPROLQZmbi 5258
-VPROLQZmbik 5259
-VPROLQZmbikz 5260
-VPROLQZmi 5261
-VPROLQZmik 5262
-VPROLQZmikz 5263
-VPROLQZri 5264
-VPROLQZrik 5265
-VPROLQZrikz 5266
-VPROLVDZ 5267
-VPROLVDZrm 5268
-VPROLVDZrmb 5269
-VPROLVDZrmbk 5270
-VPROLVDZrmbkz 5271
-VPROLVDZrmk 5272
-VPROLVDZrmkz 5273
-VPROLVDZrr 5274
-VPROLVDZrrk 5275
-VPROLVDZrrkz 5276
-VPROLVQZ 5277
-VPROLVQZrm 5278
-VPROLVQZrmb 5279
-VPROLVQZrmbk 5280
-VPROLVQZrmbkz 5281
-VPROLVQZrmk 5282
-VPROLVQZrmkz 5283
-VPROLVQZrr 5284
-VPROLVQZrrk 5285
-VPROLVQZrrkz 5286
-VPRORDZ 5287
-VPRORDZmbi 5288
-VPRORDZmbik 5289
-VPRORDZmbikz 5290
-VPRORDZmi 5291
-VPRORDZmik 5292
-VPRORDZmikz 5293
-VPRORDZri 5294
-VPRORDZrik 5295
-VPRORDZrikz 5296
-VPRORQZ 5297
-VPRORQZmbi 5298
-VPRORQZmbik 5299
-VPRORQZmbikz 5300
-VPRORQZmi 5301
-VPRORQZmik 5302
-VPRORQZmikz 5303
-VPRORQZri 5304
-VPRORQZrik 5305
-VPRORQZrikz 5306
-VPRORVDZ 5307
-VPRORVDZrm 5308
-VPRORVDZrmb 5309
-VPRORVDZrmbk 5310
-VPRORVDZrmbkz 5311
-VPRORVDZrmk 5312
-VPRORVDZrmkz 5313
-VPRORVDZrr 5314
-VPRORVDZrrk 5315
-VPRORVDZrrkz 5316
-VPRORVQZ 5317
-VPRORVQZrm 5318
-VPRORVQZrmb 5319
-VPRORVQZrmbk 5320
-VPRORVQZrmbkz 5321
-VPRORVQZrmk 5322
-VPRORVQZrmkz 5323
-VPRORVQZrr 5324
-VPRORVQZrrk 5325
-VPRORVQZrrkz 5326
-VPROTBmi 5327
-VPROTBmr 5328
-VPROTBri 5329
-VPROTBrm 5330
-VPROTBrr 5331
-VPROTBrr_REV 5332
-VPROTDmi 5333
-VPROTDmr 5334
-VPROTDri 5335
-VPROTDrm 5336
-VPROTDrr 5337
-VPROTDrr_REV 5338
-VPROTQmi 5339
-VPROTQmr 5340
-VPROTQri 5341
-VPROTQrm 5342
-VPROTQrr 5343
-VPROTQrr_REV 5344
-VPROTWmi 5345
-VPROTWmr 5346
-VPROTWri 5347
-VPROTWrm 5348
-VPROTWrr 5349
-VPROTWrr_REV 5350
-VPSADBWYrm 5351
-VPSADBWYrr 5352
-VPSADBWZ 5353
-VPSADBWZrm 5354
-VPSADBWZrr 5355
-VPSADBWrm 5356
-VPSADBWrr 5357
-VPSCATTERDDZ 5358
-VPSCATTERDDZmr 5359
-VPSCATTERDQZ 5360
-VPSCATTERDQZmr 5361
-VPSCATTERQDZ 5362
-VPSCATTERQDZmr 5363
-VPSCATTERQQZ 5364
-VPSCATTERQQZmr 5365
-VPSHABmr 5366
-VPSHABrm 5367
-VPSHABrr 5368
-VPSHABrr_REV 5369
-VPSHADmr 5370
-VPSHADrm 5371
-VPSHADrr 5372
-VPSHADrr_REV 5373
-VPSHAQmr 5374
-VPSHAQrm 5375
-VPSHAQrr 5376
-VPSHAQrr_REV 5377
-VPSHAWmr 5378
-VPSHAWrm 5379
-VPSHAWrr 5380
-VPSHAWrr_REV 5381
-VPSHLBmr 5382
-VPSHLBrm 5383
-VPSHLBrr 5384
-VPSHLBrr_REV 5385
-VPSHLDDZ 5386
-VPSHLDDZrmbi 5387
-VPSHLDDZrmbik 5388
-VPSHLDDZrmbikz 5389
-VPSHLDDZrmi 5390
-VPSHLDDZrmik 5391
-VPSHLDDZrmikz 5392
-VPSHLDDZrri 5393
-VPSHLDDZrrik 5394
-VPSHLDDZrrikz 5395
-VPSHLDQZ 5396
-VPSHLDQZrmbi 5397
-VPSHLDQZrmbik 5398
-VPSHLDQZrmbikz 5399
-VPSHLDQZrmi 5400
-VPSHLDQZrmik 5401
-VPSHLDQZrmikz 5402
-VPSHLDQZrri 5403
-VPSHLDQZrrik 5404
-VPSHLDQZrrikz 5405
-VPSHLDVDZ 5406
-VPSHLDVDZm 5407
-VPSHLDVDZmb 5408
-VPSHLDVDZmbk 5409
-VPSHLDVDZmbkz 5410
-VPSHLDVDZmk 5411
-VPSHLDVDZmkz 5412
-VPSHLDVDZr 5413
-VPSHLDVDZrk 5414
-VPSHLDVDZrkz 5415
-VPSHLDVQZ 5416
-VPSHLDVQZm 5417
-VPSHLDVQZmb 5418
-VPSHLDVQZmbk 5419
-VPSHLDVQZmbkz 5420
-VPSHLDVQZmk 5421
-VPSHLDVQZmkz 5422
-VPSHLDVQZr 5423
-VPSHLDVQZrk 5424
-VPSHLDVQZrkz 5425
-VPSHLDVWZ 5426
-VPSHLDVWZm 5427
-VPSHLDVWZmk 5428
-VPSHLDVWZmkz 5429
-VPSHLDVWZr 5430
-VPSHLDVWZrk 5431
-VPSHLDVWZrkz 5432
-VPSHLDWZ 5433
-VPSHLDWZrmi 5434
-VPSHLDWZrmik 5435
-VPSHLDWZrmikz 5436
-VPSHLDWZrri 5437
-VPSHLDWZrrik 5438
-VPSHLDWZrrikz 5439
-VPSHLDmr 5440
-VPSHLDrm 5441
-VPSHLDrr 5442
-VPSHLDrr_REV 5443
-VPSHLQmr 5444
-VPSHLQrm 5445
-VPSHLQrr 5446
-VPSHLQrr_REV 5447
-VPSHLWmr 5448
-VPSHLWrm 5449
-VPSHLWrr 5450
-VPSHLWrr_REV 5451
-VPSHRDDZ 5452
-VPSHRDDZrmbi 5453
-VPSHRDDZrmbik 5454
-VPSHRDDZrmbikz 5455
-VPSHRDDZrmi 5456
-VPSHRDDZrmik 5457
-VPSHRDDZrmikz 5458
-VPSHRDDZrri 5459
-VPSHRDDZrrik 5460
-VPSHRDDZrrikz 5461
-VPSHRDQZ 5462
-VPSHRDQZrmbi 5463
-VPSHRDQZrmbik 5464
-VPSHRDQZrmbikz 5465
-VPSHRDQZrmi 5466
-VPSHRDQZrmik 5467
-VPSHRDQZrmikz 5468
-VPSHRDQZrri 5469
-VPSHRDQZrrik 5470
-VPSHRDQZrrikz 5471
-VPSHRDVDZ 5472
-VPSHRDVDZm 5473
-VPSHRDVDZmb 5474
-VPSHRDVDZmbk 5475
-VPSHRDVDZmbkz 5476
-VPSHRDVDZmk 5477
-VPSHRDVDZmkz 5478
-VPSHRDVDZr 5479
-VPSHRDVDZrk 5480
-VPSHRDVDZrkz 5481
-VPSHRDVQZ 5482
-VPSHRDVQZm 5483
-VPSHRDVQZmb 5484
-VPSHRDVQZmbk 5485
-VPSHRDVQZmbkz 5486
-VPSHRDVQZmk 5487
-VPSHRDVQZmkz 5488
-VPSHRDVQZr 5489
-VPSHRDVQZrk 5490
-VPSHRDVQZrkz 5491
-VPSHRDVWZ 5492
-VPSHRDVWZm 5493
-VPSHRDVWZmk 5494
-VPSHRDVWZmkz 5495
-VPSHRDVWZr 5496
-VPSHRDVWZrk 5497
-VPSHRDVWZrkz 5498
-VPSHRDWZ 5499
-VPSHRDWZrmi 5500
-VPSHRDWZrmik 5501
-VPSHRDWZrmikz 5502
-VPSHRDWZrri 5503
-VPSHRDWZrrik 5504
-VPSHRDWZrrikz 5505
-VPSHUFBITQMBZ 5506
-VPSHUFBITQMBZrm 5507
-VPSHUFBITQMBZrmk 5508
-VPSHUFBITQMBZrr 5509
-VPSHUFBITQMBZrrk 5510
-VPSHUFBYrm 5511
-VPSHUFBYrr 5512
-VPSHUFBZ 5513
-VPSHUFBZrm 5514
-VPSHUFBZrmk 5515
-VPSHUFBZrmkz 5516
-VPSHUFBZrr 5517
-VPSHUFBZrrk 5518
-VPSHUFBZrrkz 5519
-VPSHUFBrm 5520
-VPSHUFBrr 5521
-VPSHUFDYmi 5522
-VPSHUFDYri 5523
-VPSHUFDZ 5524
-VPSHUFDZmbi 5525
-VPSHUFDZmbik 5526
-VPSHUFDZmbikz 5527
-VPSHUFDZmi 5528
-VPSHUFDZmik 5529
-VPSHUFDZmikz 5530
-VPSHUFDZri 5531
-VPSHUFDZrik 5532
-VPSHUFDZrikz 5533
-VPSHUFDmi 5534
-VPSHUFDri 5535
-VPSHUFHWYmi 5536
-VPSHUFHWYri 5537
-VPSHUFHWZ 5538
-VPSHUFHWZmi 5539
-VPSHUFHWZmik 5540
-VPSHUFHWZmikz 5541
-VPSHUFHWZri 5542
-VPSHUFHWZrik 5543
-VPSHUFHWZrikz 5544
-VPSHUFHWmi 5545
-VPSHUFHWri 5546
-VPSHUFLWYmi 5547
-VPSHUFLWYri 5548
-VPSHUFLWZ 5549
-VPSHUFLWZmi 5550
-VPSHUFLWZmik 5551
-VPSHUFLWZmikz 5552
-VPSHUFLWZri 5553
-VPSHUFLWZrik 5554
-VPSHUFLWZrikz 5555
-VPSHUFLWmi 5556
-VPSHUFLWri 5557
-VPSIGNBYrm 5558
-VPSIGNBYrr 5559
-VPSIGNBrm 5560
-VPSIGNBrr 5561
-VPSIGNDYrm 5562
-VPSIGNDYrr 5563
-VPSIGNDrm 5564
-VPSIGNDrr 5565
-VPSIGNWYrm 5566
-VPSIGNWYrr 5567
-VPSIGNWrm 5568
-VPSIGNWrr 5569
-VPSLLDQYri 5570
-VPSLLDQZ 5571
-VPSLLDQZmi 5572
-VPSLLDQZri 5573
-VPSLLDQri 5574
-VPSLLDYri 5575
-VPSLLDYrm 5576
-VPSLLDYrr 5577
-VPSLLDZ 5578
-VPSLLDZmbi 5579
-VPSLLDZmbik 5580
-VPSLLDZmbikz 5581
-VPSLLDZmi 5582
-VPSLLDZmik 5583
-VPSLLDZmikz 5584
-VPSLLDZri 5585
-VPSLLDZrik 5586
-VPSLLDZrikz 5587
-VPSLLDZrm 5588
-VPSLLDZrmk 5589
-VPSLLDZrmkz 5590
-VPSLLDZrr 5591
-VPSLLDZrrk 5592
-VPSLLDZrrkz 5593
-VPSLLDri 5594
-VPSLLDrm 5595
-VPSLLDrr 5596
-VPSLLQYri 5597
-VPSLLQYrm 5598
-VPSLLQYrr 5599
-VPSLLQZ 5600
-VPSLLQZmbi 5601
-VPSLLQZmbik 5602
-VPSLLQZmbikz 5603
-VPSLLQZmi 5604
-VPSLLQZmik 5605
-VPSLLQZmikz 5606
-VPSLLQZri 5607
-VPSLLQZrik 5608
-VPSLLQZrikz 5609
-VPSLLQZrm 5610
-VPSLLQZrmk 5611
-VPSLLQZrmkz 5612
-VPSLLQZrr 5613
-VPSLLQZrrk 5614
-VPSLLQZrrkz 5615
-VPSLLQri 5616
-VPSLLQrm 5617
-VPSLLQrr 5618
-VPSLLVDYrm 5619
-VPSLLVDYrr 5620
-VPSLLVDZ 5621
-VPSLLVDZrm 5622
-VPSLLVDZrmb 5623
-VPSLLVDZrmbk 5624
-VPSLLVDZrmbkz 5625
-VPSLLVDZrmk 5626
-VPSLLVDZrmkz 5627
-VPSLLVDZrr 5628
-VPSLLVDZrrk 5629
-VPSLLVDZrrkz 5630
-VPSLLVDrm 5631
-VPSLLVDrr 5632
-VPSLLVQYrm 5633
-VPSLLVQYrr 5634
-VPSLLVQZ 5635
-VPSLLVQZrm 5636
-VPSLLVQZrmb 5637
-VPSLLVQZrmbk 5638
-VPSLLVQZrmbkz 5639
-VPSLLVQZrmk 5640
-VPSLLVQZrmkz 5641
-VPSLLVQZrr 5642
-VPSLLVQZrrk 5643
-VPSLLVQZrrkz 5644
-VPSLLVQrm 5645
-VPSLLVQrr 5646
-VPSLLVWZ 5647
-VPSLLVWZrm 5648
-VPSLLVWZrmk 5649
-VPSLLVWZrmkz 5650
-VPSLLVWZrr 5651
-VPSLLVWZrrk 5652
-VPSLLVWZrrkz 5653
-VPSLLWYri 5654
-VPSLLWYrm 5655
-VPSLLWYrr 5656
-VPSLLWZ 5657
-VPSLLWZmi 5658
-VPSLLWZmik 5659
-VPSLLWZmikz 5660
-VPSLLWZri 5661
-VPSLLWZrik 5662
-VPSLLWZrikz 5663
-VPSLLWZrm 5664
-VPSLLWZrmk 5665
-VPSLLWZrmkz 5666
-VPSLLWZrr 5667
-VPSLLWZrrk 5668
-VPSLLWZrrkz 5669
-VPSLLWri 5670
-VPSLLWrm 5671
-VPSLLWrr 5672
-VPSRADYri 5673
-VPSRADYrm 5674
-VPSRADYrr 5675
-VPSRADZ 5676
-VPSRADZmbi 5677
-VPSRADZmbik 5678
-VPSRADZmbikz 5679
-VPSRADZmi 5680
-VPSRADZmik 5681
-VPSRADZmikz 5682
-VPSRADZri 5683
-VPSRADZrik 5684
-VPSRADZrikz 5685
-VPSRADZrm 5686
-VPSRADZrmk 5687
-VPSRADZrmkz 5688
-VPSRADZrr 5689
-VPSRADZrrk 5690
-VPSRADZrrkz 5691
-VPSRADri 5692
-VPSRADrm 5693
-VPSRADrr 5694
-VPSRAQZ 5695
-VPSRAQZmbi 5696
-VPSRAQZmbik 5697
-VPSRAQZmbikz 5698
-VPSRAQZmi 5699
-VPSRAQZmik 5700
-VPSRAQZmikz 5701
-VPSRAQZri 5702
-VPSRAQZrik 5703
-VPSRAQZrikz 5704
-VPSRAQZrm 5705
-VPSRAQZrmk 5706
-VPSRAQZrmkz 5707
-VPSRAQZrr 5708
-VPSRAQZrrk 5709
-VPSRAQZrrkz 5710
-VPSRAVDYrm 5711
-VPSRAVDYrr 5712
-VPSRAVDZ 5713
-VPSRAVDZrm 5714
-VPSRAVDZrmb 5715
-VPSRAVDZrmbk 5716
-VPSRAVDZrmbkz 5717
-VPSRAVDZrmk 5718
-VPSRAVDZrmkz 5719
-VPSRAVDZrr 5720
-VPSRAVDZrrk 5721
-VPSRAVDZrrkz 5722
-VPSRAVDrm 5723
-VPSRAVDrr 5724
-VPSRAVQZ 5725
-VPSRAVQZrm 5726
-VPSRAVQZrmb 5727
-VPSRAVQZrmbk 5728
-VPSRAVQZrmbkz 5729
-VPSRAVQZrmk 5730
-VPSRAVQZrmkz 5731
-VPSRAVQZrr 5732
-VPSRAVQZrrk 5733
-VPSRAVQZrrkz 5734
-VPSRAVWZ 5735
-VPSRAVWZrm 5736
-VPSRAVWZrmk 5737
-VPSRAVWZrmkz 5738
-VPSRAVWZrr 5739
-VPSRAVWZrrk 5740
-VPSRAVWZrrkz 5741
-VPSRAWYri 5742
-VPSRAWYrm 5743
-VPSRAWYrr 5744
-VPSRAWZ 5745
-VPSRAWZmi 5746
-VPSRAWZmik 5747
-VPSRAWZmikz 5748
-VPSRAWZri 5749
-VPSRAWZrik 5750
-VPSRAWZrikz 5751
-VPSRAWZrm 5752
-VPSRAWZrmk 5753
-VPSRAWZrmkz 5754
-VPSRAWZrr 5755
-VPSRAWZrrk 5756
-VPSRAWZrrkz 5757
-VPSRAWri 5758
-VPSRAWrm 5759
-VPSRAWrr 5760
-VPSRLDQYri 5761
-VPSRLDQZ 5762
-VPSRLDQZmi 5763
-VPSRLDQZri 5764
-VPSRLDQri 5765
-VPSRLDYri 5766
-VPSRLDYrm 5767
-VPSRLDYrr 5768
-VPSRLDZ 5769
-VPSRLDZmbi 5770
-VPSRLDZmbik 5771
-VPSRLDZmbikz 5772
-VPSRLDZmi 5773
-VPSRLDZmik 5774
-VPSRLDZmikz 5775
-VPSRLDZri 5776
-VPSRLDZrik 5777
-VPSRLDZrikz 5778
-VPSRLDZrm 5779
-VPSRLDZrmk 5780
-VPSRLDZrmkz 5781
-VPSRLDZrr 5782
-VPSRLDZrrk 5783
-VPSRLDZrrkz 5784
-VPSRLDri 5785
-VPSRLDrm 5786
-VPSRLDrr 5787
-VPSRLQYri 5788
-VPSRLQYrm 5789
-VPSRLQYrr 5790
-VPSRLQZ 5791
-VPSRLQZmbi 5792
-VPSRLQZmbik 5793
-VPSRLQZmbikz 5794
-VPSRLQZmi 5795
-VPSRLQZmik 5796
-VPSRLQZmikz 5797
-VPSRLQZri 5798
-VPSRLQZrik 5799
-VPSRLQZrikz 5800
-VPSRLQZrm 5801
-VPSRLQZrmk 5802
-VPSRLQZrmkz 5803
-VPSRLQZrr 5804
-VPSRLQZrrk 5805
-VPSRLQZrrkz 5806
-VPSRLQri 5807
-VPSRLQrm 5808
-VPSRLQrr 5809
-VPSRLVDYrm 5810
-VPSRLVDYrr 5811
-VPSRLVDZ 5812
-VPSRLVDZrm 5813
-VPSRLVDZrmb 5814
-VPSRLVDZrmbk 5815
-VPSRLVDZrmbkz 5816
-VPSRLVDZrmk 5817
-VPSRLVDZrmkz 5818
-VPSRLVDZrr 5819
-VPSRLVDZrrk 5820
-VPSRLVDZrrkz 5821
-VPSRLVDrm 5822
-VPSRLVDrr 5823
-VPSRLVQYrm 5824
-VPSRLVQYrr 5825
-VPSRLVQZ 5826
-VPSRLVQZrm 5827
-VPSRLVQZrmb 5828
-VPSRLVQZrmbk 5829
-VPSRLVQZrmbkz 5830
-VPSRLVQZrmk 5831
-VPSRLVQZrmkz 5832
-VPSRLVQZrr 5833
-VPSRLVQZrrk 5834
-VPSRLVQZrrkz 5835
-VPSRLVQrm 5836
-VPSRLVQrr 5837
-VPSRLVWZ 5838
-VPSRLVWZrm 5839
-VPSRLVWZrmk 5840
-VPSRLVWZrmkz 5841
-VPSRLVWZrr 5842
-VPSRLVWZrrk 5843
-VPSRLVWZrrkz 5844
-VPSRLWYri 5845
-VPSRLWYrm 5846
-VPSRLWYrr 5847
-VPSRLWZ 5848
-VPSRLWZmi 5849
-VPSRLWZmik 5850
-VPSRLWZmikz 5851
-VPSRLWZri 5852
-VPSRLWZrik 5853
-VPSRLWZrikz 5854
-VPSRLWZrm 5855
-VPSRLWZrmk 5856
-VPSRLWZrmkz 5857
-VPSRLWZrr 5858
-VPSRLWZrrk 5859
-VPSRLWZrrkz 5860
-VPSRLWri 5861
-VPSRLWrm 5862
-VPSRLWrr 5863
-VPSUBBYrm 5864
-VPSUBBYrr 5865
-VPSUBBZ 5866
-VPSUBBZrm 5867
-VPSUBBZrmk 5868
-VPSUBBZrmkz 5869
-VPSUBBZrr 5870
-VPSUBBZrrk 5871
-VPSUBBZrrkz 5872
-VPSUBBrm 5873
-VPSUBBrr 5874
-VPSUBDYrm 5875
-VPSUBDYrr 5876
-VPSUBDZ 5877
-VPSUBDZrm 5878
-VPSUBDZrmb 5879
-VPSUBDZrmbk 5880
-VPSUBDZrmbkz 5881
-VPSUBDZrmk 5882
-VPSUBDZrmkz 5883
-VPSUBDZrr 5884
-VPSUBDZrrk 5885
-VPSUBDZrrkz 5886
-VPSUBDrm 5887
-VPSUBDrr 5888
-VPSUBQYrm 5889
-VPSUBQYrr 5890
-VPSUBQZ 5891
-VPSUBQZrm 5892
-VPSUBQZrmb 5893
-VPSUBQZrmbk 5894
-VPSUBQZrmbkz 5895
-VPSUBQZrmk 5896
-VPSUBQZrmkz 5897
-VPSUBQZrr 5898
-VPSUBQZrrk 5899
-VPSUBQZrrkz 5900
-VPSUBQrm 5901
-VPSUBQrr 5902
-VPSUBSBYrm 5903
-VPSUBSBYrr 5904
-VPSUBSBZ 5905
-VPSUBSBZrm 5906
-VPSUBSBZrmk 5907
-VPSUBSBZrmkz 5908
-VPSUBSBZrr 5909
-VPSUBSBZrrk 5910
-VPSUBSBZrrkz 5911
-VPSUBSBrm 5912
-VPSUBSBrr 5913
-VPSUBSWYrm 5914
-VPSUBSWYrr 5915
-VPSUBSWZ 5916
-VPSUBSWZrm 5917
-VPSUBSWZrmk 5918
-VPSUBSWZrmkz 5919
-VPSUBSWZrr 5920
-VPSUBSWZrrk 5921
-VPSUBSWZrrkz 5922
-VPSUBSWrm 5923
-VPSUBSWrr 5924
-VPSUBUSBYrm 5925
-VPSUBUSBYrr 5926
-VPSUBUSBZ 5927
-VPSUBUSBZrm 5928
-VPSUBUSBZrmk 5929
-VPSUBUSBZrmkz 5930
-VPSUBUSBZrr 5931
-VPSUBUSBZrrk 5932
-VPSUBUSBZrrkz 5933
-VPSUBUSBrm 5934
-VPSUBUSBrr 5935
-VPSUBUSWYrm 5936
-VPSUBUSWYrr 5937
-VPSUBUSWZ 5938
-VPSUBUSWZrm 5939
-VPSUBUSWZrmk 5940
-VPSUBUSWZrmkz 5941
-VPSUBUSWZrr 5942
-VPSUBUSWZrrk 5943
-VPSUBUSWZrrkz 5944
-VPSUBUSWrm 5945
-VPSUBUSWrr 5946
-VPSUBWYrm 5947
-VPSUBWYrr 5948
-VPSUBWZ 5949
-VPSUBWZrm 5950
-VPSUBWZrmk 5951
-VPSUBWZrmkz 5952
-VPSUBWZrr 5953
-VPSUBWZrrk 5954
-VPSUBWZrrkz 5955
-VPSUBWrm 5956
-VPSUBWrr 5957
-VPTERNLOGDZ 5958
-VPTERNLOGDZrmbi 5959
-VPTERNLOGDZrmbik 5960
-VPTERNLOGDZrmbikz 5961
-VPTERNLOGDZrmi 5962
-VPTERNLOGDZrmik 5963
-VPTERNLOGDZrmikz 5964
-VPTERNLOGDZrri 5965
-VPTERNLOGDZrrik 5966
-VPTERNLOGDZrrikz 5967
-VPTERNLOGQZ 5968
-VPTERNLOGQZrmbi 5969
-VPTERNLOGQZrmbik 5970
-VPTERNLOGQZrmbikz 5971
-VPTERNLOGQZrmi 5972
-VPTERNLOGQZrmik 5973
-VPTERNLOGQZrmikz 5974
-VPTERNLOGQZrri 5975
-VPTERNLOGQZrrik 5976
-VPTERNLOGQZrrikz 5977
-VPTESTMBZ 5978
-VPTESTMBZrm 5979
-VPTESTMBZrmk 5980
-VPTESTMBZrr 5981
-VPTESTMBZrrk 5982
-VPTESTMDZ 5983
-VPTESTMDZrm 5984
-VPTESTMDZrmb 5985
-VPTESTMDZrmbk 5986
-VPTESTMDZrmk 5987
-VPTESTMDZrr 5988
-VPTESTMDZrrk 5989
-VPTESTMQZ 5990
-VPTESTMQZrm 5991
-VPTESTMQZrmb 5992
-VPTESTMQZrmbk 5993
-VPTESTMQZrmk 5994
-VPTESTMQZrr 5995
-VPTESTMQZrrk 5996
-VPTESTMWZ 5997
-VPTESTMWZrm 5998
-VPTESTMWZrmk 5999
-VPTESTMWZrr 6000
-VPTESTMWZrrk 6001
-VPTESTNMBZ 6002
-VPTESTNMBZrm 6003
-VPTESTNMBZrmk 6004
-VPTESTNMBZrr 6005
-VPTESTNMBZrrk 6006
-VPTESTNMDZ 6007
-VPTESTNMDZrm 6008
-VPTESTNMDZrmb 6009
-VPTESTNMDZrmbk 6010
-VPTESTNMDZrmk 6011
-VPTESTNMDZrr 6012
-VPTESTNMDZrrk 6013
-VPTESTNMQZ 6014
-VPTESTNMQZrm 6015
-VPTESTNMQZrmb 6016
-VPTESTNMQZrmbk 6017
-VPTESTNMQZrmk 6018
-VPTESTNMQZrr 6019
-VPTESTNMQZrrk 6020
-VPTESTNMWZ 6021
-VPTESTNMWZrm 6022
-VPTESTNMWZrmk 6023
-VPTESTNMWZrr 6024
-VPTESTNMWZrrk 6025
-VPTESTYrm 6026
-VPTESTYrr 6027
-VPTESTrm 6028
-VPTESTrr 6029
-VPUNPCKHBWYrm 6030
-VPUNPCKHBWYrr 6031
-VPUNPCKHBWZ 6032
-VPUNPCKHBWZrm 6033
-VPUNPCKHBWZrmk 6034
-VPUNPCKHBWZrmkz 6035
-VPUNPCKHBWZrr 6036
-VPUNPCKHBWZrrk 6037
-VPUNPCKHBWZrrkz 6038
-VPUNPCKHBWrm 6039
-VPUNPCKHBWrr 6040
-VPUNPCKHDQYrm 6041
-VPUNPCKHDQYrr 6042
-VPUNPCKHDQZ 6043
-VPUNPCKHDQZrm 6044
-VPUNPCKHDQZrmb 6045
-VPUNPCKHDQZrmbk 6046
-VPUNPCKHDQZrmbkz 6047
-VPUNPCKHDQZrmk 6048
-VPUNPCKHDQZrmkz 6049
-VPUNPCKHDQZrr 6050
-VPUNPCKHDQZrrk 6051
-VPUNPCKHDQZrrkz 6052
-VPUNPCKHDQrm 6053
-VPUNPCKHDQrr 6054
-VPUNPCKHQDQYrm 6055
-VPUNPCKHQDQYrr 6056
-VPUNPCKHQDQZ 6057
-VPUNPCKHQDQZrm 6058
-VPUNPCKHQDQZrmb 6059
-VPUNPCKHQDQZrmbk 6060
-VPUNPCKHQDQZrmbkz 6061
-VPUNPCKHQDQZrmk 6062
-VPUNPCKHQDQZrmkz 6063
-VPUNPCKHQDQZrr 6064
-VPUNPCKHQDQZrrk 6065
-VPUNPCKHQDQZrrkz 6066
-VPUNPCKHQDQrm 6067
-VPUNPCKHQDQrr 6068
-VPUNPCKHWDYrm 6069
-VPUNPCKHWDYrr 6070
-VPUNPCKHWDZ 6071
-VPUNPCKHWDZrm 6072
-VPUNPCKHWDZrmk 6073
-VPUNPCKHWDZrmkz 6074
-VPUNPCKHWDZrr 6075
-VPUNPCKHWDZrrk 6076
-VPUNPCKHWDZrrkz 6077
-VPUNPCKHWDrm 6078
-VPUNPCKHWDrr 6079
-VPUNPCKLBWYrm 6080
-VPUNPCKLBWYrr 6081
-VPUNPCKLBWZ 6082
-VPUNPCKLBWZrm 6083
-VPUNPCKLBWZrmk 6084
-VPUNPCKLBWZrmkz 6085
-VPUNPCKLBWZrr 6086
-VPUNPCKLBWZrrk 6087
-VPUNPCKLBWZrrkz 6088
-VPUNPCKLBWrm 6089
-VPUNPCKLBWrr 6090
-VPUNPCKLDQYrm 6091
-VPUNPCKLDQYrr 6092
-VPUNPCKLDQZ 6093
-VPUNPCKLDQZrm 6094
-VPUNPCKLDQZrmb 6095
-VPUNPCKLDQZrmbk 6096
-VPUNPCKLDQZrmbkz 6097
-VPUNPCKLDQZrmk 6098
-VPUNPCKLDQZrmkz 6099
-VPUNPCKLDQZrr 6100
-VPUNPCKLDQZrrk 6101
-VPUNPCKLDQZrrkz 6102
-VPUNPCKLDQrm 6103
-VPUNPCKLDQrr 6104
-VPUNPCKLQDQYrm 6105
-VPUNPCKLQDQYrr 6106
-VPUNPCKLQDQZ 6107
-VPUNPCKLQDQZrm 6108
-VPUNPCKLQDQZrmb 6109
-VPUNPCKLQDQZrmbk 6110
-VPUNPCKLQDQZrmbkz 6111
-VPUNPCKLQDQZrmk 6112
-VPUNPCKLQDQZrmkz 6113
-VPUNPCKLQDQZrr 6114
-VPUNPCKLQDQZrrk 6115
-VPUNPCKLQDQZrrkz 6116
-VPUNPCKLQDQrm 6117
-VPUNPCKLQDQrr 6118
-VPUNPCKLWDYrm 6119
-VPUNPCKLWDYrr 6120
-VPUNPCKLWDZ 6121
-VPUNPCKLWDZrm 6122
-VPUNPCKLWDZrmk 6123
-VPUNPCKLWDZrmkz 6124
-VPUNPCKLWDZrr 6125
-VPUNPCKLWDZrrk 6126
-VPUNPCKLWDZrrkz 6127
-VPUNPCKLWDrm 6128
-VPUNPCKLWDrr 6129
-VPXORDZ 6130
-VPXORDZrm 6131
-VPXORDZrmb 6132
-VPXORDZrmbk 6133
-VPXORDZrmbkz 6134
-VPXORDZrmk 6135
-VPXORDZrmkz 6136
-VPXORDZrr 6137
-VPXORDZrrk 6138
-VPXORDZrrkz 6139
-VPXORQZ 6140
-VPXORQZrm 6141
-VPXORQZrmb 6142
-VPXORQZrmbk 6143
-VPXORQZrmbkz 6144
-VPXORQZrmk 6145
-VPXORQZrmkz 6146
-VPXORQZrr 6147
-VPXORQZrrk 6148
-VPXORQZrrkz 6149
-VPXORYrm 6150
-VPXORYrr 6151
-VPXORrm 6152
-VPXORrr 6153
-VRANGEPDZ 6154
-VRANGEPDZrmbi 6155
-VRANGEPDZrmbik 6156
-VRANGEPDZrmbikz 6157
-VRANGEPDZrmi 6158
-VRANGEPDZrmik 6159
-VRANGEPDZrmikz 6160
-VRANGEPDZrri 6161
-VRANGEPDZrrib 6162
-VRANGEPDZrribk 6163
-VRANGEPDZrribkz 6164
-VRANGEPDZrrik 6165
-VRANGEPDZrrikz 6166
-VRANGEPSZ 6167
-VRANGEPSZrmbi 6168
-VRANGEPSZrmbik 6169
-VRANGEPSZrmbikz 6170
-VRANGEPSZrmi 6171
-VRANGEPSZrmik 6172
-VRANGEPSZrmikz 6173
-VRANGEPSZrri 6174
-VRANGEPSZrrib 6175
-VRANGEPSZrribk 6176
-VRANGEPSZrribkz 6177
-VRANGEPSZrrik 6178
-VRANGEPSZrrikz 6179
-VRANGESDZrmi 6180
-VRANGESDZrmik 6181
-VRANGESDZrmikz 6182
-VRANGESDZrri 6183
-VRANGESDZrrib 6184
-VRANGESDZrribk 6185
-VRANGESDZrribkz 6186
-VRANGESDZrrik 6187
-VRANGESDZrrikz 6188
-VRANGESSZrmi 6189
-VRANGESSZrmik 6190
-VRANGESSZrmikz 6191
-VRANGESSZrri 6192
-VRANGESSZrrib 6193
-VRANGESSZrribk 6194
-VRANGESSZrribkz 6195
-VRANGESSZrrik 6196
-VRANGESSZrrikz 6197
-VRCP 6198
-VRCPBF 6199
-VRCPPHZ 6200
-VRCPPHZm 6201
-VRCPPHZmb 6202
-VRCPPHZmbk 6203
-VRCPPHZmbkz 6204
-VRCPPHZmk 6205
-VRCPPHZmkz 6206
-VRCPPHZr 6207
-VRCPPHZrk 6208
-VRCPPHZrkz 6209
-VRCPPSYm 6210
-VRCPPSYr 6211
-VRCPPSm 6212
-VRCPPSr 6213
-VRCPSHZrm 6214
-VRCPSHZrmk 6215
-VRCPSHZrmkz 6216
-VRCPSHZrr 6217
-VRCPSHZrrk 6218
-VRCPSHZrrkz 6219
-VRCPSSm 6220
-VRCPSSm_Int 6221
-VRCPSSr 6222
-VRCPSSr_Int 6223
-VREDUCEBF 6224
-VREDUCEPDZ 6225
-VREDUCEPDZrmbi 6226
-VREDUCEPDZrmbik 6227
-VREDUCEPDZrmbikz 6228
-VREDUCEPDZrmi 6229
-VREDUCEPDZrmik 6230
-VREDUCEPDZrmikz 6231
-VREDUCEPDZrri 6232
-VREDUCEPDZrrib 6233
-VREDUCEPDZrribk 6234
-VREDUCEPDZrribkz 6235
-VREDUCEPDZrrik 6236
-VREDUCEPDZrrikz 6237
-VREDUCEPHZ 6238
-VREDUCEPHZrmbi 6239
-VREDUCEPHZrmbik 6240
-VREDUCEPHZrmbikz 6241
-VREDUCEPHZrmi 6242
-VREDUCEPHZrmik 6243
-VREDUCEPHZrmikz 6244
-VREDUCEPHZrri 6245
-VREDUCEPHZrrib 6246
-VREDUCEPHZrribk 6247
-VREDUCEPHZrribkz 6248
-VREDUCEPHZrrik 6249
-VREDUCEPHZrrikz 6250
-VREDUCEPSZ 6251
-VREDUCEPSZrmbi 6252
-VREDUCEPSZrmbik 6253
-VREDUCEPSZrmbikz 6254
-VREDUCEPSZrmi 6255
-VREDUCEPSZrmik 6256
-VREDUCEPSZrmikz 6257
-VREDUCEPSZrri 6258
-VREDUCEPSZrrib 6259
-VREDUCEPSZrribk 6260
-VREDUCEPSZrribkz 6261
-VREDUCEPSZrrik 6262
-VREDUCEPSZrrikz 6263
-VREDUCESDZrmi 6264
-VREDUCESDZrmik 6265
-VREDUCESDZrmikz 6266
-VREDUCESDZrri 6267
-VREDUCESDZrrib 6268
-VREDUCESDZrribk 6269
-VREDUCESDZrribkz 6270
-VREDUCESDZrrik 6271
-VREDUCESDZrrikz 6272
-VREDUCESHZrmi 6273
-VREDUCESHZrmik 6274
-VREDUCESHZrmikz 6275
-VREDUCESHZrri 6276
-VREDUCESHZrrib 6277
-VREDUCESHZrribk 6278
-VREDUCESHZrribkz 6279
-VREDUCESHZrrik 6280
-VREDUCESHZrrikz 6281
-VREDUCESSZrmi 6282
-VREDUCESSZrmik 6283
-VREDUCESSZrmikz 6284
-VREDUCESSZrri 6285
-VREDUCESSZrrib 6286
-VREDUCESSZrribk 6287
-VREDUCESSZrribkz 6288
-VREDUCESSZrrik 6289
-VREDUCESSZrrikz 6290
-VRNDSCALEBF 6291
-VRNDSCALEPDZ 6292
-VRNDSCALEPDZrmbi 6293
-VRNDSCALEPDZrmbik 6294
-VRNDSCALEPDZrmbikz 6295
-VRNDSCALEPDZrmi 6296
-VRNDSCALEPDZrmik 6297
-VRNDSCALEPDZrmikz 6298
-VRNDSCALEPDZrri 6299
-VRNDSCALEPDZrrib 6300
-VRNDSCALEPDZrribk 6301
-VRNDSCALEPDZrribkz 6302
-VRNDSCALEPDZrrik 6303
-VRNDSCALEPDZrrikz 6304
-VRNDSCALEPHZ 6305
-VRNDSCALEPHZrmbi 6306
-VRNDSCALEPHZrmbik 6307
-VRNDSCALEPHZrmbikz 6308
-VRNDSCALEPHZrmi 6309
-VRNDSCALEPHZrmik 6310
-VRNDSCALEPHZrmikz 6311
-VRNDSCALEPHZrri 6312
-VRNDSCALEPHZrrib 6313
-VRNDSCALEPHZrribk 6314
-VRNDSCALEPHZrribkz 6315
-VRNDSCALEPHZrrik 6316
-VRNDSCALEPHZrrikz 6317
-VRNDSCALEPSZ 6318
-VRNDSCALEPSZrmbi 6319
-VRNDSCALEPSZrmbik 6320
-VRNDSCALEPSZrmbikz 6321
-VRNDSCALEPSZrmi 6322
-VRNDSCALEPSZrmik 6323
-VRNDSCALEPSZrmikz 6324
-VRNDSCALEPSZrri 6325
-VRNDSCALEPSZrrib 6326
-VRNDSCALEPSZrribk 6327
-VRNDSCALEPSZrribkz 6328
-VRNDSCALEPSZrrik 6329
-VRNDSCALEPSZrrikz 6330
-VRNDSCALESDZrmi 6331
-VRNDSCALESDZrmi_Int 6332
-VRNDSCALESDZrmik_Int 6333
-VRNDSCALESDZrmikz_Int 6334
-VRNDSCALESDZrri 6335
-VRNDSCALESDZrri_Int 6336
-VRNDSCALESDZrrib_Int 6337
-VRNDSCALESDZrribk_Int 6338
-VRNDSCALESDZrribkz_Int 6339
-VRNDSCALESDZrrik_Int 6340
-VRNDSCALESDZrrikz_Int 6341
-VRNDSCALESHZrmi 6342
-VRNDSCALESHZrmi_Int 6343
-VRNDSCALESHZrmik_Int 6344
-VRNDSCALESHZrmikz_Int 6345
-VRNDSCALESHZrri 6346
-VRNDSCALESHZrri_Int 6347
-VRNDSCALESHZrrib_Int 6348
-VRNDSCALESHZrribk_Int 6349
-VRNDSCALESHZrribkz_Int 6350
-VRNDSCALESHZrrik_Int 6351
-VRNDSCALESHZrrikz_Int 6352
-VRNDSCALESSZrmi 6353
-VRNDSCALESSZrmi_Int 6354
-VRNDSCALESSZrmik_Int 6355
-VRNDSCALESSZrmikz_Int 6356
-VRNDSCALESSZrri 6357
-VRNDSCALESSZrri_Int 6358
-VRNDSCALESSZrrib_Int 6359
-VRNDSCALESSZrribk_Int 6360
-VRNDSCALESSZrribkz_Int 6361
-VRNDSCALESSZrrik_Int 6362
-VRNDSCALESSZrrikz_Int 6363
-VROUNDPDYmi 6364
-VROUNDPDYri 6365
-VROUNDPDmi 6366
-VROUNDPDri 6367
-VROUNDPSYmi 6368
-VROUNDPSYri 6369
-VROUNDPSmi 6370
-VROUNDPSri 6371
-VROUNDSDmi 6372
-VROUNDSDmi_Int 6373
-VROUNDSDri 6374
-VROUNDSDri_Int 6375
-VROUNDSSmi 6376
-VROUNDSSmi_Int 6377
-VROUNDSSri 6378
-VROUNDSSri_Int 6379
-VRSQRT 6380
-VRSQRTBF 6381
-VRSQRTPHZ 6382
-VRSQRTPHZm 6383
-VRSQRTPHZmb 6384
-VRSQRTPHZmbk 6385
-VRSQRTPHZmbkz 6386
-VRSQRTPHZmk 6387
-VRSQRTPHZmkz 6388
-VRSQRTPHZr 6389
-VRSQRTPHZrk 6390
-VRSQRTPHZrkz 6391
-VRSQRTPSYm 6392
-VRSQRTPSYr 6393
-VRSQRTPSm 6394
-VRSQRTPSr 6395
-VRSQRTSHZrm 6396
-VRSQRTSHZrmk 6397
-VRSQRTSHZrmkz 6398
-VRSQRTSHZrr 6399
-VRSQRTSHZrrk 6400
-VRSQRTSHZrrkz 6401
-VRSQRTSSm 6402
-VRSQRTSSm_Int 6403
-VRSQRTSSr 6404
-VRSQRTSSr_Int 6405
-VSCALEFBF 6406
-VSCALEFPDZ 6407
-VSCALEFPDZrm 6408
-VSCALEFPDZrmb 6409
-VSCALEFPDZrmbk 6410
-VSCALEFPDZrmbkz 6411
-VSCALEFPDZrmk 6412
-VSCALEFPDZrmkz 6413
-VSCALEFPDZrr 6414
-VSCALEFPDZrrb 6415
-VSCALEFPDZrrbk 6416
-VSCALEFPDZrrbkz 6417
-VSCALEFPDZrrk 6418
-VSCALEFPDZrrkz 6419
-VSCALEFPHZ 6420
-VSCALEFPHZrm 6421
-VSCALEFPHZrmb 6422
-VSCALEFPHZrmbk 6423
-VSCALEFPHZrmbkz 6424
-VSCALEFPHZrmk 6425
-VSCALEFPHZrmkz 6426
-VSCALEFPHZrr 6427
-VSCALEFPHZrrb 6428
-VSCALEFPHZrrbk 6429
-VSCALEFPHZrrbkz 6430
-VSCALEFPHZrrk 6431
-VSCALEFPHZrrkz 6432
-VSCALEFPSZ 6433
-VSCALEFPSZrm 6434
-VSCALEFPSZrmb 6435
-VSCALEFPSZrmbk 6436
-VSCALEFPSZrmbkz 6437
-VSCALEFPSZrmk 6438
-VSCALEFPSZrmkz 6439
-VSCALEFPSZrr 6440
-VSCALEFPSZrrb 6441
-VSCALEFPSZrrbk 6442
-VSCALEFPSZrrbkz 6443
-VSCALEFPSZrrk 6444
-VSCALEFPSZrrkz 6445
-VSCALEFSDZrm 6446
-VSCALEFSDZrmk 6447
-VSCALEFSDZrmkz 6448
-VSCALEFSDZrr 6449
-VSCALEFSDZrrb_Int 6450
-VSCALEFSDZrrbk_Int 6451
-VSCALEFSDZrrbkz_Int 6452
-VSCALEFSDZrrk 6453
-VSCALEFSDZrrkz 6454
-VSCALEFSHZrm 6455
-VSCALEFSHZrmk 6456
-VSCALEFSHZrmkz 6457
-VSCALEFSHZrr 6458
-VSCALEFSHZrrb_Int 6459
-VSCALEFSHZrrbk_Int 6460
-VSCALEFSHZrrbkz_Int 6461
-VSCALEFSHZrrk 6462
-VSCALEFSHZrrkz 6463
-VSCALEFSSZrm 6464
-VSCALEFSSZrmk 6465
-VSCALEFSSZrmkz 6466
-VSCALEFSSZrr 6467
-VSCALEFSSZrrb_Int 6468
-VSCALEFSSZrrbk_Int 6469
-VSCALEFSSZrrbkz_Int 6470
-VSCALEFSSZrrk 6471
-VSCALEFSSZrrkz 6472
-VSCATTERDPDZ 6473
-VSCATTERDPDZmr 6474
-VSCATTERDPSZ 6475
-VSCATTERDPSZmr 6476
-VSCATTERPF 6477
-VSCATTERQPDZ 6478
-VSCATTERQPDZmr 6479
-VSCATTERQPSZ 6480
-VSCATTERQPSZmr 6481
-VSHA 6482
-VSHUFF 6483
-VSHUFI 6484
-VSHUFPDYrmi 6485
-VSHUFPDYrri 6486
-VSHUFPDZ 6487
-VSHUFPDZrmbi 6488
-VSHUFPDZrmbik 6489
-VSHUFPDZrmbikz 6490
-VSHUFPDZrmi 6491
-VSHUFPDZrmik 6492
-VSHUFPDZrmikz 6493
-VSHUFPDZrri 6494
-VSHUFPDZrrik 6495
-VSHUFPDZrrikz 6496
-VSHUFPDrmi 6497
-VSHUFPDrri 6498
-VSHUFPSYrmi 6499
-VSHUFPSYrri 6500
-VSHUFPSZ 6501
-VSHUFPSZrmbi 6502
-VSHUFPSZrmbik 6503
-VSHUFPSZrmbikz 6504
-VSHUFPSZrmi 6505
-VSHUFPSZrmik 6506
-VSHUFPSZrmikz 6507
-VSHUFPSZrri 6508
-VSHUFPSZrrik 6509
-VSHUFPSZrrikz 6510
-VSHUFPSrmi 6511
-VSHUFPSrri 6512
-VSM 6513
-VSQRTBF 6514
-VSQRTPDYm 6515
-VSQRTPDYr 6516
-VSQRTPDZ 6517
-VSQRTPDZm 6518
-VSQRTPDZmb 6519
-VSQRTPDZmbk 6520
-VSQRTPDZmbkz 6521
-VSQRTPDZmk 6522
-VSQRTPDZmkz 6523
-VSQRTPDZr 6524
-VSQRTPDZrb 6525
-VSQRTPDZrbk 6526
-VSQRTPDZrbkz 6527
-VSQRTPDZrk 6528
-VSQRTPDZrkz 6529
-VSQRTPDm 6530
-VSQRTPDr 6531
-VSQRTPHZ 6532
-VSQRTPHZm 6533
-VSQRTPHZmb 6534
-VSQRTPHZmbk 6535
-VSQRTPHZmbkz 6536
-VSQRTPHZmk 6537
-VSQRTPHZmkz 6538
-VSQRTPHZr 6539
-VSQRTPHZrb 6540
-VSQRTPHZrbk 6541
-VSQRTPHZrbkz 6542
-VSQRTPHZrk 6543
-VSQRTPHZrkz 6544
-VSQRTPSYm 6545
-VSQRTPSYr 6546
-VSQRTPSZ 6547
-VSQRTPSZm 6548
-VSQRTPSZmb 6549
-VSQRTPSZmbk 6550
-VSQRTPSZmbkz 6551
-VSQRTPSZmk 6552
-VSQRTPSZmkz 6553
-VSQRTPSZr 6554
-VSQRTPSZrb 6555
-VSQRTPSZrbk 6556
-VSQRTPSZrbkz 6557
-VSQRTPSZrk 6558
-VSQRTPSZrkz 6559
-VSQRTPSm 6560
-VSQRTPSr 6561
-VSQRTSDZm 6562
-VSQRTSDZm_Int 6563
-VSQRTSDZmk_Int 6564
-VSQRTSDZmkz_Int 6565
-VSQRTSDZr 6566
-VSQRTSDZr_Int 6567
-VSQRTSDZrb_Int 6568
-VSQRTSDZrbk_Int 6569
-VSQRTSDZrbkz_Int 6570
-VSQRTSDZrk_Int 6571
-VSQRTSDZrkz_Int 6572
-VSQRTSDm 6573
-VSQRTSDm_Int 6574
-VSQRTSDr 6575
-VSQRTSDr_Int 6576
-VSQRTSHZm 6577
-VSQRTSHZm_Int 6578
-VSQRTSHZmk_Int 6579
-VSQRTSHZmkz_Int 6580
-VSQRTSHZr 6581
-VSQRTSHZr_Int 6582
-VSQRTSHZrb_Int 6583
-VSQRTSHZrbk_Int 6584
-VSQRTSHZrbkz_Int 6585
-VSQRTSHZrk_Int 6586
-VSQRTSHZrkz_Int 6587
-VSQRTSSZm 6588
-VSQRTSSZm_Int 6589
-VSQRTSSZmk_Int 6590
-VSQRTSSZmkz_Int 6591
-VSQRTSSZr 6592
-VSQRTSSZr_Int 6593
-VSQRTSSZrb_Int 6594
-VSQRTSSZrbk_Int 6595
-VSQRTSSZrbkz_Int 6596
-VSQRTSSZrk_Int 6597
-VSQRTSSZrkz_Int 6598
-VSQRTSSm 6599
-VSQRTSSm_Int 6600
-VSQRTSSr 6601
-VSQRTSSr_Int 6602
-VSTMXCSR 6603
-VSUBBF 6604
-VSUBPDYrm 6605
-VSUBPDYrr 6606
-VSUBPDZ 6607
-VSUBPDZrm 6608
-VSUBPDZrmb 6609
-VSUBPDZrmbk 6610
-VSUBPDZrmbkz 6611
-VSUBPDZrmk 6612
-VSUBPDZrmkz 6613
-VSUBPDZrr 6614
-VSUBPDZrrb 6615
-VSUBPDZrrbk 6616
-VSUBPDZrrbkz 6617
-VSUBPDZrrk 6618
-VSUBPDZrrkz 6619
-VSUBPDrm 6620
-VSUBPDrr 6621
-VSUBPHZ 6622
-VSUBPHZrm 6623
-VSUBPHZrmb 6624
-VSUBPHZrmbk 6625
-VSUBPHZrmbkz 6626
-VSUBPHZrmk 6627
-VSUBPHZrmkz 6628
-VSUBPHZrr 6629
-VSUBPHZrrb 6630
-VSUBPHZrrbk 6631
-VSUBPHZrrbkz 6632
-VSUBPHZrrk 6633
-VSUBPHZrrkz 6634
-VSUBPSYrm 6635
-VSUBPSYrr 6636
-VSUBPSZ 6637
-VSUBPSZrm 6638
-VSUBPSZrmb 6639
-VSUBPSZrmbk 6640
-VSUBPSZrmbkz 6641
-VSUBPSZrmk 6642
-VSUBPSZrmkz 6643
-VSUBPSZrr 6644
-VSUBPSZrrb 6645
-VSUBPSZrrbk 6646
-VSUBPSZrrbkz 6647
-VSUBPSZrrk 6648
-VSUBPSZrrkz 6649
-VSUBPSrm 6650
-VSUBPSrr 6651
-VSUBSDZrm 6652
-VSUBSDZrm_Int 6653
-VSUBSDZrmk_Int 6654
-VSUBSDZrmkz_Int 6655
-VSUBSDZrr 6656
-VSUBSDZrr_Int 6657
-VSUBSDZrrb_Int 6658
-VSUBSDZrrbk_Int 6659
-VSUBSDZrrbkz_Int 6660
-VSUBSDZrrk_Int 6661
-VSUBSDZrrkz_Int 6662
-VSUBSDrm 6663
-VSUBSDrm_Int 6664
-VSUBSDrr 6665
-VSUBSDrr_Int 6666
-VSUBSHZrm 6667
-VSUBSHZrm_Int 6668
-VSUBSHZrmk_Int 6669
-VSUBSHZrmkz_Int 6670
-VSUBSHZrr 6671
-VSUBSHZrr_Int 6672
-VSUBSHZrrb_Int 6673
-VSUBSHZrrbk_Int 6674
-VSUBSHZrrbkz_Int 6675
-VSUBSHZrrk_Int 6676
-VSUBSHZrrkz_Int 6677
-VSUBSSZrm 6678
-VSUBSSZrm_Int 6679
-VSUBSSZrmk_Int 6680
-VSUBSSZrmkz_Int 6681
-VSUBSSZrr 6682
-VSUBSSZrr_Int 6683
-VSUBSSZrrb_Int 6684
-VSUBSSZrrbk_Int 6685
-VSUBSSZrrbkz_Int 6686
-VSUBSSZrrk_Int 6687
-VSUBSSZrrkz_Int 6688
-VSUBSSrm 6689
-VSUBSSrm_Int 6690
-VSUBSSrr 6691
-VSUBSSrr_Int 6692
-VTESTPDYrm 6693
-VTESTPDYrr 6694
-VTESTPDrm 6695
-VTESTPDrr 6696
-VTESTPSYrm 6697
-VTESTPSYrr 6698
-VTESTPSrm 6699
-VTESTPSrr 6700
-VUCOMISDZrm 6701
-VUCOMISDZrm_Int 6702
-VUCOMISDZrr 6703
-VUCOMISDZrr_Int 6704
-VUCOMISDZrrb 6705
-VUCOMISDrm 6706
-VUCOMISDrm_Int 6707
-VUCOMISDrr 6708
-VUCOMISDrr_Int 6709
-VUCOMISHZrm 6710
-VUCOMISHZrm_Int 6711
-VUCOMISHZrr 6712
-VUCOMISHZrr_Int 6713
-VUCOMISHZrrb 6714
-VUCOMISSZrm 6715
-VUCOMISSZrm_Int 6716
-VUCOMISSZrr 6717
-VUCOMISSZrr_Int 6718
-VUCOMISSZrrb 6719
-VUCOMISSrm 6720
-VUCOMISSrm_Int 6721
-VUCOMISSrr 6722
-VUCOMISSrr_Int 6723
-VUCOMXSDZrm 6724
-VUCOMXSDZrm_Int 6725
-VUCOMXSDZrr 6726
-VUCOMXSDZrr_Int 6727
-VUCOMXSDZrrb_Int 6728
-VUCOMXSHZrm 6729
-VUCOMXSHZrm_Int 6730
-VUCOMXSHZrr 6731
-VUCOMXSHZrr_Int 6732
-VUCOMXSHZrrb_Int 6733
-VUCOMXSSZrm 6734
-VUCOMXSSZrm_Int 6735
-VUCOMXSSZrr 6736
-VUCOMXSSZrr_Int 6737
-VUCOMXSSZrrb_Int 6738
-VUNPCKHPDYrm 6739
-VUNPCKHPDYrr 6740
-VUNPCKHPDZ 6741
-VUNPCKHPDZrm 6742
-VUNPCKHPDZrmb 6743
-VUNPCKHPDZrmbk 6744
-VUNPCKHPDZrmbkz 6745
-VUNPCKHPDZrmk 6746
-VUNPCKHPDZrmkz 6747
-VUNPCKHPDZrr 6748
-VUNPCKHPDZrrk 6749
-VUNPCKHPDZrrkz 6750
-VUNPCKHPDrm 6751
-VUNPCKHPDrr 6752
-VUNPCKHPSYrm 6753
-VUNPCKHPSYrr 6754
-VUNPCKHPSZ 6755
-VUNPCKHPSZrm 6756
-VUNPCKHPSZrmb 6757
-VUNPCKHPSZrmbk 6758
-VUNPCKHPSZrmbkz 6759
-VUNPCKHPSZrmk 6760
-VUNPCKHPSZrmkz 6761
-VUNPCKHPSZrr 6762
-VUNPCKHPSZrrk 6763
-VUNPCKHPSZrrkz 6764
-VUNPCKHPSrm 6765
-VUNPCKHPSrr 6766
-VUNPCKLPDYrm 6767
-VUNPCKLPDYrr 6768
-VUNPCKLPDZ 6769
-VUNPCKLPDZrm 6770
-VUNPCKLPDZrmb 6771
-VUNPCKLPDZrmbk 6772
-VUNPCKLPDZrmbkz 6773
-VUNPCKLPDZrmk 6774
-VUNPCKLPDZrmkz 6775
-VUNPCKLPDZrr 6776
-VUNPCKLPDZrrk 6777
-VUNPCKLPDZrrkz 6778
-VUNPCKLPDrm 6779
-VUNPCKLPDrr 6780
-VUNPCKLPSYrm 6781
-VUNPCKLPSYrr 6782
-VUNPCKLPSZ 6783
-VUNPCKLPSZrm 6784
-VUNPCKLPSZrmb 6785
-VUNPCKLPSZrmbk 6786
-VUNPCKLPSZrmbkz 6787
-VUNPCKLPSZrmk 6788
-VUNPCKLPSZrmkz 6789
-VUNPCKLPSZrr 6790
-VUNPCKLPSZrrk 6791
-VUNPCKLPSZrrkz 6792
-VUNPCKLPSrm 6793
-VUNPCKLPSrr 6794
-VXORPDYrm 6795
-VXORPDYrr 6796
-VXORPDZ 6797
-VXORPDZrm 6798
-VXORPDZrmb 6799
-VXORPDZrmbk 6800
-VXORPDZrmbkz 6801
-VXORPDZrmk 6802
-VXORPDZrmkz 6803
-VXORPDZrr 6804
-VXORPDZrrk 6805
-VXORPDZrrkz 6806
-VXORPDrm 6807
-VXORPDrr 6808
-VXORPSYrm 6809
-VXORPSYrr 6810
-VXORPSZ 6811
-VXORPSZrm 6812
-VXORPSZrmb 6813
-VXORPSZrmbk 6814
-VXORPSZrmbkz 6815
-VXORPSZrmk 6816
-VXORPSZrmkz 6817
-VXORPSZrr 6818
-VXORPSZrrk 6819
-VXORPSZrrkz 6820
-VXORPSrm 6821
-VXORPSrr 6822
-VZEROALL 6823
-VZEROUPPER 6824
-V_SET 6825
-V_SETALLONES 6826
-WAIT 6827
-WBINVD 6828
-WBNOINVD 6829
-WRFLAGS 6830
-WRFSBASE 6831
-WRGSBASE 6832
-WRMSR 6833
-WRMSRLIST 6834
-WRMSRNS 6835
-WRMSRNSir 6836
-WRMSRNSir_EVEX 6837
-WRPKRUr 6838
-WRSSD 6839
-WRSSD_EVEX 6840
-WRSSQ 6841
-WRSSQ_EVEX 6842
-WRUSSD 6843
-WRUSSD_EVEX 6844
-WRUSSQ 6845
-WRUSSQ_EVEX 6846
-XABORT 6847
-XABORT_DEF 6848
-XACQUIRE_PREFIX 6849
-XADD 6850
-XAM_F 6851
-XAM_Fp 6852
-XBEGIN 6853
-XCHG 6854
-XCH_F 6855
-XCRYPTCBC 6856
-XCRYPTCFB 6857
-XCRYPTCTR 6858
-XCRYPTECB 6859
-XCRYPTOFB 6860
-XEND 6861
-XGETBV 6862
-XLAT 6863
-XOR 6864
-XORPDrm 6865
-XORPDrr 6866
-XORPSrm 6867
-XORPSrr 6868
-XRELEASE_PREFIX 6869
-XRESLDTRK 6870
-XRSTOR 6871
-XRSTORS 6872
-XSAVE 6873
-XSAVEC 6874
-XSAVEOPT 6875
-XSAVES 6876
-XSETBV 6877
-XSHA 6878
-XSTORE 6879
-XSUSLDTRK 6880
-XTEST 6881
-Immediate 6882
-CImmediate 6883
-FPImmediate 6884
-MBB 6885
-FrameIndex 6886
-ConstantPoolIndex 6887
-TargetIndex 6888
-JumpTableIndex 6889
-ExternalSymbol 6890
-GlobalAddress 6891
-BlockAddress 6892
-RegisterMask 6893
-RegisterLiveOut 6894
-Metadata 6895
-MCSymbol 6896
-CFIIndex 6897
-IntrinsicID 6898
-Predicate 6899
-ShuffleMask 6900
-PhyReg_GR8 6901
-PhyReg_GRH8 6902
-PhyReg_GR8_NOREX2 6903
-PhyReg_GR8_NOREX 6904
-PhyReg_GR8_ABCD_H 6905
-PhyReg_GR8_ABCD_L 6906
-PhyReg_GRH16 6907
-PhyReg_GR16 6908
-PhyReg_GR16_NOREX2 6909
-PhyReg_GR16_NOREX 6910
-PhyReg_VK1 6911
-PhyReg_VK16 6912
-PhyReg_VK2 6913
-PhyReg_VK4 6914
-PhyReg_VK8 6915
-PhyReg_VK16WM 6916
-PhyReg_VK1WM 6917
-PhyReg_VK2WM 6918
-PhyReg_VK4WM 6919
-PhyReg_VK8WM 6920
-PhyReg_SEGMENT_REG 6921
-PhyReg_GR16_ABCD 6922
-PhyReg_FPCCR 6923
-PhyReg_FR16X 6924
-PhyReg_FR16 6925
-PhyReg_VK16PAIR 6926
-PhyReg_VK1PAIR 6927
-PhyReg_VK2PAIR 6928
-PhyReg_VK4PAIR 6929
-PhyReg_VK8PAIR 6930
-PhyReg_VK1PAIR_with_sub_mask_0_in_VK1WM 6931
-PhyReg_LOW32_ADDR_ACCESS_RBP 6932
-PhyReg_LOW32_ADDR_ACCESS 6933
-PhyReg_LOW32_ADDR_ACCESS_RBP_with_sub_8bit 6934
-PhyReg_FR32X 6935
-PhyReg_GR32 6936
-PhyReg_GR32_NOSP 6937
-PhyReg_LOW32_ADDR_ACCESS_RBP_with_sub_16bit_in_GR16_NOREX2 6938
-PhyReg_DEBUG_REG 6939
-PhyReg_FR32 6940
-PhyReg_GR32_NOREX2 6941
-PhyReg_GR32_NOREX2_NOSP 6942
-PhyReg_LOW32_ADDR_ACCESS_RBP_with_sub_16bit_in_GR16_NOREX 6943
-PhyReg_GR32_NOREX 6944
-PhyReg_VK32 6945
-PhyReg_GR32_NOREX_NOSP 6946
-PhyReg_RFP32 6947
-PhyReg_VK32WM 6948
-PhyReg_GR32_ABCD 6949
-PhyReg_GR32_TC 6950
-PhyReg_GR32_ABCD_and_GR32_TC 6951
-PhyReg_GR32_AD 6952
-PhyReg_GR32_ArgRef 6953
-PhyReg_GR32_BPSP 6954
-PhyReg_GR32_BSI 6955
-PhyReg_GR32_CB 6956
-PhyReg_GR32_DC 6957
-PhyReg_GR32_DIBP 6958
-PhyReg_GR32_SIDI 6959
-PhyReg_LOW32_ADDR_ACCESS_RBP_with_sub_32bit 6960
-PhyReg_CCR 6961
-PhyReg_DFCCR 6962
-PhyReg_GR32_ABCD_and_GR32_BSI 6963
-PhyReg_GR32_AD_and_GR32_ArgRef 6964
-PhyReg_GR32_ArgRef_and_GR32_CB 6965
-PhyReg_GR32_BPSP_and_GR32_DIBP 6966
-PhyReg_GR32_BPSP_and_GR32_TC 6967
-PhyReg_GR32_BSI_and_GR32_SIDI 6968
-PhyReg_GR32_DIBP_and_GR32_SIDI 6969
-PhyReg_LOW32_ADDR_ACCESS_RBP_with_sub_8bit_with_sub_32bit 6970
-PhyReg_LOW32_ADDR_ACCESS_with_sub_32bit 6971
-PhyReg_RFP64 6972
-PhyReg_GR64 6973
-PhyReg_FR64X 6974
-PhyReg_GR64_with_sub_8bit 6975
-PhyReg_GR64_NOSP 6976
-PhyReg_GR64_NOREX2 6977
-PhyReg_CONTROL_REG 6978
-PhyReg_FR64 6979
-PhyReg_GR64_with_sub_16bit_in_GR16_NOREX2 6980
-PhyReg_GR64_NOREX2_NOSP 6981
-PhyReg_GR64PLTSafe 6982
-PhyReg_GR64_TC 6983
-PhyReg_GR64_NOREX 6984
-PhyReg_GR64_TCW64 6985
-PhyReg_GR64_TC_with_sub_8bit 6986
-PhyReg_GR64_NOREX2_NOSP_and_GR64_TC 6987
-PhyReg_GR64_TCW64_with_sub_8bit 6988
-PhyReg_GR64_TC_and_GR64_TCW64 6989
-PhyReg_GR64_with_sub_16bit_in_GR16_NOREX 6990
-PhyReg_VK64 6991
-PhyReg_VR64 6992
-PhyReg_GR64PLTSafe_and_GR64_TC 6993
-PhyReg_GR64_NOREX2_NOSP_and_GR64_TCW64 6994
-PhyReg_GR64_NOREX_NOSP 6995
-PhyReg_GR64_NOREX_and_GR64_TC 6996
-PhyReg_GR64_TCW64_and_GR64_TC_with_sub_8bit 6997
-PhyReg_VK64WM 6998
-PhyReg_GR64_TC_and_GR64_NOREX2_NOSP_and_GR64_TCW64 6999
-PhyReg_GR64_TC_and_GR64_with_sub_16bit_in_GR16_NOREX 7000
-PhyReg_GR64PLTSafe_and_GR64_TCW64 7001
-PhyReg_GR64_NOREX_and_GR64PLTSafe_and_GR64_TC 7002
-PhyReg_GR64_NOREX_and_GR64_TCW64 7003
-PhyReg_GR64_ABCD 7004
-PhyReg_GR64_with_sub_32bit_in_GR32_TC 7005
-PhyReg_GR64_with_sub_32bit_in_GR32_ABCD_and_GR32_TC 7006
-PhyReg_GR64_AD 7007
-PhyReg_GR64_ArgRef 7008
-PhyReg_GR64_and_LOW32_ADDR_ACCESS_RBP 7009
-PhyReg_GR64_with_sub_32bit_in_GR32_ArgRef 7010
-PhyReg_GR64_with_sub_32bit_in_GR32_BPSP 7011
-PhyReg_GR64_with_sub_32bit_in_GR32_BSI 7012
-PhyReg_GR64_with_sub_32bit_in_GR32_CB 7013
-PhyReg_GR64_with_sub_32bit_in_GR32_DIBP 7014
-PhyReg_GR64_with_sub_32bit_in_GR32_SIDI 7015
-PhyReg_GR64_A 7016
-PhyReg_GR64_ArgRef_and_GR64_TC 7017
-PhyReg_GR64_and_LOW32_ADDR_ACCESS 7018
-PhyReg_GR64_with_sub_32bit_in_GR32_ABCD_and_GR32_BSI 7019
-PhyReg_GR64_with_sub_32bit_in_GR32_AD_and_GR32_ArgRef 7020
-PhyReg_GR64_with_sub_32bit_in_GR32_ArgRef_and_GR32_CB 7021
-PhyReg_GR64_with_sub_32bit_in_GR32_BPSP_and_GR32_DIBP 7022
-PhyReg_GR64_with_sub_32bit_in_GR32_BPSP_and_GR32_TC 7023
-PhyReg_GR64_with_sub_32bit_in_GR32_BSI_and_GR32_SIDI 7024
-PhyReg_GR64_with_sub_32bit_in_GR32_DIBP_and_GR32_SIDI 7025
-PhyReg_RST 7026
-PhyReg_RFP80 7027
-PhyReg_RFP80_7 7028
-PhyReg_VR128X 7029
-PhyReg_VR128 7030
-PhyReg_VR256X 7031
-PhyReg_VR256 7032
-PhyReg_VR512 7033
-PhyReg_VR512_0_15 7034
-PhyReg_TILE 7035
-PhyReg_TILEPAIR 7036
-VirtReg_GR8 7037
-VirtReg_GRH8 7038
-VirtReg_GR8_NOREX2 7039
-VirtReg_GR8_NOREX 7040
-VirtReg_GR8_ABCD_H 7041
-VirtReg_GR8_ABCD_L 7042
-VirtReg_GRH16 7043
-VirtReg_GR16 7044
-VirtReg_GR16_NOREX2 7045
-VirtReg_GR16_NOREX 7046
-VirtReg_VK1 7047
-VirtReg_VK16 7048
-VirtReg_VK2 7049
-VirtReg_VK4 7050
-VirtReg_VK8 7051
-VirtReg_VK16WM 7052
-VirtReg_VK1WM 7053
-VirtReg_VK2WM 7054
-VirtReg_VK4WM 7055
-VirtReg_VK8WM 7056
-VirtReg_SEGMENT_REG 7057
-VirtReg_GR16_ABCD 7058
-VirtReg_FPCCR 7059
-VirtReg_FR16X 7060
-VirtReg_FR16 7061
-VirtReg_VK16PAIR 7062
-VirtReg_VK1PAIR 7063
-VirtReg_VK2PAIR 7064
-VirtReg_VK4PAIR 7065
-VirtReg_VK8PAIR 7066
-VirtReg_VK1PAIR_with_sub_mask_0_in_VK1WM 7067
-VirtReg_LOW32_ADDR_ACCESS_RBP 7068
-VirtReg_LOW32_ADDR_ACCESS 7069
-VirtReg_LOW32_ADDR_ACCESS_RBP_with_sub_8bit 7070
-VirtReg_FR32X 7071
-VirtReg_GR32 7072
-VirtReg_GR32_NOSP 7073
-VirtReg_LOW32_ADDR_ACCESS_RBP_with_sub_16bit_in_GR16_NOREX2 7074
-VirtReg_DEBUG_REG 7075
-VirtReg_FR32 7076
-VirtReg_GR32_NOREX2 7077
-VirtReg_GR32_NOREX2_NOSP 7078
-VirtReg_LOW32_ADDR_ACCESS_RBP_with_sub_16bit_in_GR16_NOREX 7079
-VirtReg_GR32_NOREX 7080
-VirtReg_VK32 7081
-VirtReg_GR32_NOREX_NOSP 7082
-VirtReg_RFP32 7083
-VirtReg_VK32WM 7084
-VirtReg_GR32_ABCD 7085
-VirtReg_GR32_TC 7086
-VirtReg_GR32_ABCD_and_GR32_TC 7087
-VirtReg_GR32_AD 7088
-VirtReg_GR32_ArgRef 7089
-VirtReg_GR32_BPSP 7090
-VirtReg_GR32_BSI 7091
-VirtReg_GR32_CB 7092
-VirtReg_GR32_DC 7093
-VirtReg_GR32_DIBP 7094
-VirtReg_GR32_SIDI 7095
-VirtReg_LOW32_ADDR_ACCESS_RBP_with_sub_32bit 7096
-VirtReg_CCR 7097
-VirtReg_DFCCR 7098
-VirtReg_GR32_ABCD_and_GR32_BSI 7099
-VirtReg_GR32_AD_and_GR32_ArgRef 7100
-VirtReg_GR32_ArgRef_and_GR32_CB 7101
-VirtReg_GR32_BPSP_and_GR32_DIBP 7102
-VirtReg_GR32_BPSP_and_GR32_TC 7103
-VirtReg_GR32_BSI_and_GR32_SIDI 7104
-VirtReg_GR32_DIBP_and_GR32_SIDI 7105
-VirtReg_LOW32_ADDR_ACCESS_RBP_with_sub_8bit_with_sub_32bit 7106
-VirtReg_LOW32_ADDR_ACCESS_with_sub_32bit 7107
-VirtReg_RFP64 7108
-VirtReg_GR64 7109
-VirtReg_FR64X 7110
-VirtReg_GR64_with_sub_8bit 7111
-VirtReg_GR64_NOSP 7112
-VirtReg_GR64_NOREX2 7113
-VirtReg_CONTROL_REG 7114
-VirtReg_FR64 7115
-VirtReg_GR64_with_sub_16bit_in_GR16_NOREX2 7116
-VirtReg_GR64_NOREX2_NOSP 7117
-VirtReg_GR64PLTSafe 7118
-VirtReg_GR64_TC 7119
-VirtReg_GR64_NOREX 7120
-VirtReg_GR64_TCW64 7121
-VirtReg_GR64_TC_with_sub_8bit 7122
-VirtReg_GR64_NOREX2_NOSP_and_GR64_TC 7123
-VirtReg_GR64_TCW64_with_sub_8bit 7124
-VirtReg_GR64_TC_and_GR64_TCW64 7125
-VirtReg_GR64_with_sub_16bit_in_GR16_NOREX 7126
-VirtReg_VK64 7127
-VirtReg_VR64 7128
-VirtReg_GR64PLTSafe_and_GR64_TC 7129
-VirtReg_GR64_NOREX2_NOSP_and_GR64_TCW64 7130
-VirtReg_GR64_NOREX_NOSP 7131
-VirtReg_GR64_NOREX_and_GR64_TC 7132
-VirtReg_GR64_TCW64_and_GR64_TC_with_sub_8bit 7133
-VirtReg_VK64WM 7134
-VirtReg_GR64_TC_and_GR64_NOREX2_NOSP_and_GR64_TCW64 7135
-VirtReg_GR64_TC_and_GR64_with_sub_16bit_in_GR16_NOREX 7136
-VirtReg_GR64PLTSafe_and_GR64_TCW64 7137
-VirtReg_GR64_NOREX_and_GR64PLTSafe_and_GR64_TC 7138
-VirtReg_GR64_NOREX_and_GR64_TCW64 7139
-VirtReg_GR64_ABCD 7140
-VirtReg_GR64_with_sub_32bit_in_GR32_TC 7141
-VirtReg_GR64_with_sub_32bit_in_GR32_ABCD_and_GR32_TC 7142
-VirtReg_GR64_AD 7143
-VirtReg_GR64_ArgRef 7144
-VirtReg_GR64_and_LOW32_ADDR_ACCESS_RBP 7145
-VirtReg_GR64_with_sub_32bit_in_GR32_ArgRef 7146
-VirtReg_GR64_with_sub_32bit_in_GR32_BPSP 7147
-VirtReg_GR64_with_sub_32bit_in_GR32_BSI 7148
-VirtReg_GR64_with_sub_32bit_in_GR32_CB 7149
-VirtReg_GR64_with_sub_32bit_in_GR32_DIBP 7150
-VirtReg_GR64_with_sub_32bit_in_GR32_SIDI 7151
-VirtReg_GR64_A 7152
-VirtReg_GR64_ArgRef_and_GR64_TC 7153
-VirtReg_GR64_and_LOW32_ADDR_ACCESS 7154
-VirtReg_GR64_with_sub_32bit_in_GR32_ABCD_and_GR32_BSI 7155
-VirtReg_GR64_with_sub_32bit_in_GR32_AD_and_GR32_ArgRef 7156
-VirtReg_GR64_with_sub_32bit_in_GR32_ArgRef_and_GR32_CB 7157
-VirtReg_GR64_with_sub_32bit_in_GR32_BPSP_and_GR32_DIBP 7158
-VirtReg_GR64_with_sub_32bit_in_GR32_BPSP_and_GR32_TC 7159
-VirtReg_GR64_with_sub_32bit_in_GR32_BSI_and_GR32_SIDI 7160
-VirtReg_GR64_with_sub_32bit_in_GR32_DIBP_and_GR32_SIDI 7161
-VirtReg_RST 7162
-VirtReg_RFP80 7163
-VirtReg_RFP80_7 7164
-VirtReg_VR128X 7165
-VirtReg_VR128 7166
-VirtReg_VR256X 7167
-VirtReg_VR256 7168
-VirtReg_VR512 7169
-VirtReg_VR512_0_15 7170
-VirtReg_TILE 7171
-VirtReg_TILEPAIR 7172
+PTCMMIMFP 1441
+PTCMMRLFP 1442
+PTCVTROWD 1443
+PTCVTROWPS 1444
+PTDPBF 1445
+PTDPBHF 1446
+PTDPBSSD 1447
+PTDPBSSDV 1448
+PTDPBSUD 1449
+PTDPBSUDV 1450
+PTDPBUSD 1451
+PTDPBUSDV 1452
+PTDPBUUD 1453
+PTDPBUUDV 1454
+PTDPFP 1455
+PTDPHBF 1456
+PTDPHF 1457
+PTESTrm 1458
+PTESTrr 1459
+PTILELOADD 1460
+PTILELOADDRS 1461
+PTILELOADDRST 1462
+PTILELOADDRSV 1463
+PTILELOADDT 1464
+PTILELOADDV 1465
+PTILEMOVROWrre 1466
+PTILEMOVROWrreV 1467
+PTILEMOVROWrri 1468
+PTILEMOVROWrriV 1469
+PTILESTORED 1470
+PTILESTOREDV 1471
+PTILEZERO 1472
+PTILEZEROV 1473
+PTMMULTF 1474
+PTWRITE 1475
+PTWRITEm 1476
+PTWRITEr 1477
+PUNPCKHBWrm 1478
+PUNPCKHBWrr 1479
+PUNPCKHDQrm 1480
+PUNPCKHDQrr 1481
+PUNPCKHQDQrm 1482
+PUNPCKHQDQrr 1483
+PUNPCKHWDrm 1484
+PUNPCKHWDrr 1485
+PUNPCKLBWrm 1486
+PUNPCKLBWrr 1487
+PUNPCKLDQrm 1488
+PUNPCKLDQrr 1489
+PUNPCKLQDQrm 1490
+PUNPCKLQDQrr 1491
+PUNPCKLWDrm 1492
+PUNPCKLWDrr 1493
+PUSH 1494
+PUSHA 1495
+PUSHCS 1496
+PUSHDS 1497
+PUSHES 1498
+PUSHF 1499
+PUSHFS 1500
+PUSHGS 1501
+PUSHP 1502
+PUSHSS 1503
+PVALIDATE 1504
+PXORrm 1505
+PXORrr 1506
+RCL 1507
+RCPPSm 1508
+RCPPSr 1509
+RCPSSm 1510
+RCPSSm_Int 1511
+RCPSSr 1512
+RCPSSr_Int 1513
+RCR 1514
+RDFLAGS 1515
+RDFSBASE 1516
+RDGSBASE 1517
+RDMSR 1518
+RDMSRLIST 1519
+RDMSRri 1520
+RDMSRri_EVEX 1521
+RDPID 1522
+RDPKRUr 1523
+RDPMC 1524
+RDPRU 1525
+RDRAND 1526
+RDSEED 1527
+RDSSPD 1528
+RDSSPQ 1529
+RDTSC 1530
+RDTSCP 1531
+REG_SEQUENCE 1532
+REPNE_PREFIX 1533
+REP_MOVSB 1534
+REP_MOVSD 1535
+REP_MOVSQ 1536
+REP_MOVSW 1537
+REP_PREFIX 1538
+REP_STOSB 1539
+REP_STOSD 1540
+REP_STOSQ 1541
+REP_STOSW 1542
+RET 1543
+RETI 1544
+REX 1545
+RMPADJUST 1546
+RMPQUERY 1547
+RMPUPDATE 1548
+ROL 1549
+ROR 1550
+RORX 1551
+ROUNDPDmi 1552
+ROUNDPDri 1553
+ROUNDPSmi 1554
+ROUNDPSri 1555
+ROUNDSDmi 1556
+ROUNDSDmi_Int 1557
+ROUNDSDri 1558
+ROUNDSDri_Int 1559
+ROUNDSSmi 1560
+ROUNDSSmi_Int 1561
+ROUNDSSri 1562
+ROUNDSSri_Int 1563
+RSM 1564
+RSQRTPSm 1565
+RSQRTPSr 1566
+RSQRTSSm 1567
+RSQRTSSm_Int 1568
+RSQRTSSr 1569
+RSQRTSSr_Int 1570
+RSTORSSP 1571
+SAHF 1572
+SALC 1573
+SAR 1574
+SARX 1575
+SAVEPREVSSP 1576
+SBB 1577
+SCASB 1578
+SCASL 1579
+SCASQ 1580
+SCASW 1581
+SEAMCALL 1582
+SEAMOPS 1583
+SEAMRET 1584
+SEG_ALLOCA 1585
+SEH_BeginEpilogue 1586
+SEH_EndEpilogue 1587
+SEH_EndPrologue 1588
+SEH_PushFrame 1589
+SEH_PushReg 1590
+SEH_SaveReg 1591
+SEH_SaveXMM 1592
+SEH_SetFrame 1593
+SEH_StackAlign 1594
+SEH_StackAlloc 1595
+SEH_UnwindV 1596
+SEH_UnwindVersion 1597
+SENDUIPI 1598
+SERIALIZE 1599
+SETB_C 1600
+SETCCm 1601
+SETCCm_EVEX 1602
+SETCCr 1603
+SETCCr_EVEX 1604
+SETSSBSY 1605
+SETZUCCm 1606
+SETZUCCr 1607
+SFENCE 1608
+SGDT 1609
+SHA 1610
+SHL 1611
+SHLD 1612
+SHLDROT 1613
+SHLX 1614
+SHR 1615
+SHRD 1616
+SHRDROT 1617
+SHRX 1618
+SHUFPDrmi 1619
+SHUFPDrri 1620
+SHUFPSrmi 1621
+SHUFPSrri 1622
+SIDT 1623
+SKINIT 1624
+SLDT 1625
+SLWPCB 1626
+SMSW 1627
+SQRTPDm 1628
+SQRTPDr 1629
+SQRTPSm 1630
+SQRTPSr 1631
+SQRTSDm 1632
+SQRTSDm_Int 1633
+SQRTSDr 1634
+SQRTSDr_Int 1635
+SQRTSSm 1636
+SQRTSSm_Int 1637
+SQRTSSr 1638
+SQRTSSr_Int 1639
+SQRT_F 1640
+SQRT_Fp 1641
+SS_PREFIX 1642
+STAC 1643
+STACKALLOC_W_PROBING 1644
+STACKMAP 1645
+STATEPOINT 1646
+STC 1647
+STD 1648
+STGI 1649
+STI 1650
+STMXCSR 1651
+STOSB 1652
+STOSL 1653
+STOSQ 1654
+STOSW 1655
+STR 1656
+STRm 1657
+STTILECFG 1658
+STTILECFG_EVEX 1659
+STUI 1660
+ST_F 1661
+ST_FP 1662
+ST_FPrr 1663
+ST_Fp 1664
+ST_FpP 1665
+ST_Frr 1666
+SUB 1667
+SUBPDrm 1668
+SUBPDrr 1669
+SUBPSrm 1670
+SUBPSrr 1671
+SUBREG_TO_REG 1672
+SUBR_F 1673
+SUBR_FI 1674
+SUBR_FPrST 1675
+SUBR_FST 1676
+SUBR_Fp 1677
+SUBR_FpI 1678
+SUBR_FrST 1679
+SUBSDrm 1680
+SUBSDrm_Int 1681
+SUBSDrr 1682
+SUBSDrr_Int 1683
+SUBSSrm 1684
+SUBSSrm_Int 1685
+SUBSSrr 1686
+SUBSSrr_Int 1687
+SUB_F 1688
+SUB_FI 1689
+SUB_FPrST 1690
+SUB_FST 1691
+SUB_Fp 1692
+SUB_FpI 1693
+SUB_FrST 1694
+SWAPGS 1695
+SYSCALL 1696
+SYSENTER 1697
+SYSEXIT 1698
+SYSRET 1699
+T 1700
+TAILJMPd 1701
+TAILJMPd_CC 1702
+TAILJMPm 1703
+TAILJMPr 1704
+TCMMIMFP 1705
+TCMMRLFP 1706
+TCRETURN_HIPE 1707
+TCRETURN_WIN 1708
+TCRETURN_WINmi 1709
+TCRETURNdi 1710
+TCRETURNdicc 1711
+TCRETURNmi 1712
+TCRETURNri 1713
+TCVTROWD 1714
+TCVTROWPS 1715
+TDCALL 1716
+TDPBF 1717
+TDPBHF 1718
+TDPBSSD 1719
+TDPBSUD 1720
+TDPBUSD 1721
+TDPBUUD 1722
+TDPFP 1723
+TDPHBF 1724
+TDPHF 1725
+TEST 1726
+TESTUI 1727
+TILELOADD 1728
+TILELOADDRS 1729
+TILELOADDRST 1730
+TILELOADDRS_EVEX 1731
+TILELOADDT 1732
+TILELOADD_EVEX 1733
+TILEMOVROWrre 1734
+TILEMOVROWrri 1735
+TILERELEASE 1736
+TILESTORED 1737
+TILESTORED_EVEX 1738
+TILEZERO 1739
+TLBSYNC 1740
+TLSCall 1741
+TLS_addr 1742
+TLS_addrX 1743
+TLS_base_addr 1744
+TLS_base_addrX 1745
+TLS_desc 1746
+TMMULTF 1747
+TPAUSE 1748
+TRAP 1749
+TST_F 1750
+TST_Fp 1751
+TZCNT 1752
+TZMSK 1753
+UBSAN_UD 1754
+UCOMISDrm 1755
+UCOMISDrm_Int 1756
+UCOMISDrr 1757
+UCOMISDrr_Int 1758
+UCOMISSrm 1759
+UCOMISSrm_Int 1760
+UCOMISSrr 1761
+UCOMISSrr_Int 1762
+UCOM_FIPr 1763
+UCOM_FIr 1764
+UCOM_FPPr 1765
+UCOM_FPr 1766
+UCOM_FpIr 1767
+UCOM_Fpr 1768
+UCOM_Fr 1769
+UD 1770
+UIRET 1771
+UMONITOR 1772
+UMWAIT 1773
+UNPCKHPDrm 1774
+UNPCKHPDrr 1775
+UNPCKHPSrm 1776
+UNPCKHPSrr 1777
+UNPCKLPDrm 1778
+UNPCKLPDrr 1779
+UNPCKLPSrm 1780
+UNPCKLPSrr 1781
+URDMSRri 1782
+URDMSRri_EVEX 1783
+URDMSRrr 1784
+URDMSRrr_EVEX 1785
+UWRMSRir 1786
+UWRMSRir_EVEX 1787
+UWRMSRrr 1788
+UWRMSRrr_EVEX 1789
+V 1790
+VAARG 1791
+VAARG_X 1792
+VADDBF 1793
+VADDPDYrm 1794
+VADDPDYrr 1795
+VADDPDZ 1796
+VADDPDZrm 1797
+VADDPDZrmb 1798
+VADDPDZrmbk 1799
+VADDPDZrmbkz 1800
+VADDPDZrmk 1801
+VADDPDZrmkz 1802
+VADDPDZrr 1803
+VADDPDZrrb 1804
+VADDPDZrrbk 1805
+VADDPDZrrbkz 1806
+VADDPDZrrk 1807
+VADDPDZrrkz 1808
+VADDPDrm 1809
+VADDPDrr 1810
+VADDPHZ 1811
+VADDPHZrm 1812
+VADDPHZrmb 1813
+VADDPHZrmbk 1814
+VADDPHZrmbkz 1815
+VADDPHZrmk 1816
+VADDPHZrmkz 1817
+VADDPHZrr 1818
+VADDPHZrrb 1819
+VADDPHZrrbk 1820
+VADDPHZrrbkz 1821
+VADDPHZrrk 1822
+VADDPHZrrkz 1823
+VADDPSYrm 1824
+VADDPSYrr 1825
+VADDPSZ 1826
+VADDPSZrm 1827
+VADDPSZrmb 1828
+VADDPSZrmbk 1829
+VADDPSZrmbkz 1830
+VADDPSZrmk 1831
+VADDPSZrmkz 1832
+VADDPSZrr 1833
+VADDPSZrrb 1834
+VADDPSZrrbk 1835
+VADDPSZrrbkz 1836
+VADDPSZrrk 1837
+VADDPSZrrkz 1838
+VADDPSrm 1839
+VADDPSrr 1840
+VADDSDZrm 1841
+VADDSDZrm_Int 1842
+VADDSDZrmk_Int 1843
+VADDSDZrmkz_Int 1844
+VADDSDZrr 1845
+VADDSDZrr_Int 1846
+VADDSDZrrb_Int 1847
+VADDSDZrrbk_Int 1848
+VADDSDZrrbkz_Int 1849
+VADDSDZrrk_Int 1850
+VADDSDZrrkz_Int 1851
+VADDSDrm 1852
+VADDSDrm_Int 1853
+VADDSDrr 1854
+VADDSDrr_Int 1855
+VADDSHZrm 1856
+VADDSHZrm_Int 1857
+VADDSHZrmk_Int 1858
+VADDSHZrmkz_Int 1859
+VADDSHZrr 1860
+VADDSHZrr_Int 1861
+VADDSHZrrb_Int 1862
+VADDSHZrrbk_Int 1863
+VADDSHZrrbkz_Int 1864
+VADDSHZrrk_Int 1865
+VADDSHZrrkz_Int 1866
+VADDSSZrm 1867
+VADDSSZrm_Int 1868
+VADDSSZrmk_Int 1869
+VADDSSZrmkz_Int 1870
+VADDSSZrr 1871
+VADDSSZrr_Int 1872
+VADDSSZrrb_Int 1873
+VADDSSZrrbk_Int 1874
+VADDSSZrrbkz_Int 1875
+VADDSSZrrk_Int 1876
+VADDSSZrrkz_Int 1877
+VADDSSrm 1878
+VADDSSrm_Int 1879
+VADDSSrr 1880
+VADDSSrr_Int 1881
+VADDSUBPDYrm 1882
+VADDSUBPDYrr 1883
+VADDSUBPDrm 1884
+VADDSUBPDrr 1885
+VADDSUBPSYrm 1886
+VADDSUBPSYrr 1887
+VADDSUBPSrm 1888
+VADDSUBPSrr 1889
+VAESDECLASTYrm 1890
+VAESDECLASTYrr 1891
+VAESDECLASTZ 1892
+VAESDECLASTZrm 1893
+VAESDECLASTZrr 1894
+VAESDECLASTrm 1895
+VAESDECLASTrr 1896
+VAESDECYrm 1897
+VAESDECYrr 1898
+VAESDECZ 1899
+VAESDECZrm 1900
+VAESDECZrr 1901
+VAESDECrm 1902
+VAESDECrr 1903
+VAESENCLASTYrm 1904
+VAESENCLASTYrr 1905
+VAESENCLASTZ 1906
+VAESENCLASTZrm 1907
+VAESENCLASTZrr 1908
+VAESENCLASTrm 1909
+VAESENCLASTrr 1910
+VAESENCYrm 1911
+VAESENCYrr 1912
+VAESENCZ 1913
+VAESENCZrm 1914
+VAESENCZrr 1915
+VAESENCrm 1916
+VAESENCrr 1917
+VAESIMCrm 1918
+VAESIMCrr 1919
+VAESKEYGENASSISTrmi 1920
+VAESKEYGENASSISTrri 1921
+VALIGNDZ 1922
+VALIGNDZrmbi 1923
+VALIGNDZrmbik 1924
+VALIGNDZrmbikz 1925
+VALIGNDZrmi 1926
+VALIGNDZrmik 1927
+VALIGNDZrmikz 1928
+VALIGNDZrri 1929
+VALIGNDZrrik 1930
+VALIGNDZrrikz 1931
+VALIGNQZ 1932
+VALIGNQZrmbi 1933
+VALIGNQZrmbik 1934
+VALIGNQZrmbikz 1935
+VALIGNQZrmi 1936
+VALIGNQZrmik 1937
+VALIGNQZrmikz 1938
+VALIGNQZrri 1939
+VALIGNQZrrik 1940
+VALIGNQZrrikz 1941
+VANDNPDYrm 1942
+VANDNPDYrr 1943
+VANDNPDZ 1944
+VANDNPDZrm 1945
+VANDNPDZrmb 1946
+VANDNPDZrmbk 1947
+VANDNPDZrmbkz 1948
+VANDNPDZrmk 1949
+VANDNPDZrmkz 1950
+VANDNPDZrr 1951
+VANDNPDZrrk 1952
+VANDNPDZrrkz 1953
+VANDNPDrm 1954
+VANDNPDrr 1955
+VANDNPSYrm 1956
+VANDNPSYrr 1957
+VANDNPSZ 1958
+VANDNPSZrm 1959
+VANDNPSZrmb 1960
+VANDNPSZrmbk 1961
+VANDNPSZrmbkz 1962
+VANDNPSZrmk 1963
+VANDNPSZrmkz 1964
+VANDNPSZrr 1965
+VANDNPSZrrk 1966
+VANDNPSZrrkz 1967
+VANDNPSrm 1968
+VANDNPSrr 1969
+VANDPDYrm 1970
+VANDPDYrr 1971
+VANDPDZ 1972
+VANDPDZrm 1973
+VANDPDZrmb 1974
+VANDPDZrmbk 1975
+VANDPDZrmbkz 1976
+VANDPDZrmk 1977
+VANDPDZrmkz 1978
+VANDPDZrr 1979
+VANDPDZrrk 1980
+VANDPDZrrkz 1981
+VANDPDrm 1982
+VANDPDrr 1983
+VANDPSYrm 1984
+VANDPSYrr 1985
+VANDPSZ 1986
+VANDPSZrm 1987
+VANDPSZrmb 1988
+VANDPSZrmbk 1989
+VANDPSZrmbkz 1990
+VANDPSZrmk 1991
+VANDPSZrmkz 1992
+VANDPSZrr 1993
+VANDPSZrrk 1994
+VANDPSZrrkz 1995
+VANDPSrm 1996
+VANDPSrr 1997
+VASTART_SAVE_XMM_REGS 1998
+VBCSTNEBF 1999
+VBCSTNESH 2000
+VBLENDMPDZ 2001
+VBLENDMPDZrm 2002
+VBLENDMPDZrmb 2003
+VBLENDMPDZrmbk 2004
+VBLENDMPDZrmbkz 2005
+VBLENDMPDZrmk 2006
+VBLENDMPDZrmkz 2007
+VBLENDMPDZrr 2008
+VBLENDMPDZrrk 2009
+VBLENDMPDZrrkz 2010
+VBLENDMPSZ 2011
+VBLENDMPSZrm 2012
+VBLENDMPSZrmb 2013
+VBLENDMPSZrmbk 2014
+VBLENDMPSZrmbkz 2015
+VBLENDMPSZrmk 2016
+VBLENDMPSZrmkz 2017
+VBLENDMPSZrr 2018
+VBLENDMPSZrrk 2019
+VBLENDMPSZrrkz 2020
+VBLENDPDYrmi 2021
+VBLENDPDYrri 2022
+VBLENDPDrmi 2023
+VBLENDPDrri 2024
+VBLENDPSYrmi 2025
+VBLENDPSYrri 2026
+VBLENDPSrmi 2027
+VBLENDPSrri 2028
+VBLENDVPDYrmr 2029
+VBLENDVPDYrrr 2030
+VBLENDVPDrmr 2031
+VBLENDVPDrrr 2032
+VBLENDVPSYrmr 2033
+VBLENDVPSYrrr 2034
+VBLENDVPSrmr 2035
+VBLENDVPSrrr 2036
+VBROADCASTF 2037
+VBROADCASTI 2038
+VBROADCASTSDYrm 2039
+VBROADCASTSDYrr 2040
+VBROADCASTSDZ 2041
+VBROADCASTSDZrm 2042
+VBROADCASTSDZrmk 2043
+VBROADCASTSDZrmkz 2044
+VBROADCASTSDZrr 2045
+VBROADCASTSDZrrk 2046
+VBROADCASTSDZrrkz 2047
+VBROADCASTSSYrm 2048
+VBROADCASTSSYrr 2049
+VBROADCASTSSZ 2050
+VBROADCASTSSZrm 2051
+VBROADCASTSSZrmk 2052
+VBROADCASTSSZrmkz 2053
+VBROADCASTSSZrr 2054
+VBROADCASTSSZrrk 2055
+VBROADCASTSSZrrkz 2056
+VBROADCASTSSrm 2057
+VBROADCASTSSrr 2058
+VCMPBF 2059
+VCMPPDYrmi 2060
+VCMPPDYrri 2061
+VCMPPDZ 2062
+VCMPPDZrmbi 2063
+VCMPPDZrmbik 2064
+VCMPPDZrmi 2065
+VCMPPDZrmik 2066
+VCMPPDZrri 2067
+VCMPPDZrrib 2068
+VCMPPDZrribk 2069
+VCMPPDZrrik 2070
+VCMPPDrmi 2071
+VCMPPDrri 2072
+VCMPPHZ 2073
+VCMPPHZrmbi 2074
+VCMPPHZrmbik 2075
+VCMPPHZrmi 2076
+VCMPPHZrmik 2077
+VCMPPHZrri 2078
+VCMPPHZrrib 2079
+VCMPPHZrribk 2080
+VCMPPHZrrik 2081
+VCMPPSYrmi 2082
+VCMPPSYrri 2083
+VCMPPSZ 2084
+VCMPPSZrmbi 2085
+VCMPPSZrmbik 2086
+VCMPPSZrmi 2087
+VCMPPSZrmik 2088
+VCMPPSZrri 2089
+VCMPPSZrrib 2090
+VCMPPSZrribk 2091
+VCMPPSZrrik 2092
+VCMPPSrmi 2093
+VCMPPSrri 2094
+VCMPSDZrmi 2095
+VCMPSDZrmi_Int 2096
+VCMPSDZrmik_Int 2097
+VCMPSDZrri 2098
+VCMPSDZrri_Int 2099
+VCMPSDZrrib_Int 2100
+VCMPSDZrribk_Int 2101
+VCMPSDZrrik_Int 2102
+VCMPSDrmi 2103
+VCMPSDrmi_Int 2104
+VCMPSDrri 2105
+VCMPSDrri_Int 2106
+VCMPSHZrmi 2107
+VCMPSHZrmi_Int 2108
+VCMPSHZrmik_Int 2109
+VCMPSHZrri 2110
+VCMPSHZrri_Int 2111
+VCMPSHZrrib_Int 2112
+VCMPSHZrribk_Int 2113
+VCMPSHZrrik_Int 2114
+VCMPSSZrmi 2115
+VCMPSSZrmi_Int 2116
+VCMPSSZrmik_Int 2117
+VCMPSSZrri 2118
+VCMPSSZrri_Int 2119
+VCMPSSZrrib_Int 2120
+VCMPSSZrribk_Int 2121
+VCMPSSZrrik_Int 2122
+VCMPSSrmi 2123
+VCMPSSrmi_Int 2124
+VCMPSSrri 2125
+VCMPSSrri_Int 2126
+VCOMISBF 2127
+VCOMISDZrm 2128
+VCOMISDZrm_Int 2129
+VCOMISDZrr 2130
+VCOMISDZrr_Int 2131
+VCOMISDZrrb 2132
+VCOMISDrm 2133
+VCOMISDrm_Int 2134
+VCOMISDrr 2135
+VCOMISDrr_Int 2136
+VCOMISHZrm 2137
+VCOMISHZrm_Int 2138
+VCOMISHZrr 2139
+VCOMISHZrr_Int 2140
+VCOMISHZrrb 2141
+VCOMISSZrm 2142
+VCOMISSZrm_Int 2143
+VCOMISSZrr 2144
+VCOMISSZrr_Int 2145
+VCOMISSZrrb 2146
+VCOMISSrm 2147
+VCOMISSrm_Int 2148
+VCOMISSrr 2149
+VCOMISSrr_Int 2150
+VCOMPRESSPDZ 2151
+VCOMPRESSPDZmr 2152
+VCOMPRESSPDZmrk 2153
+VCOMPRESSPDZrr 2154
+VCOMPRESSPDZrrk 2155
+VCOMPRESSPDZrrkz 2156
+VCOMPRESSPSZ 2157
+VCOMPRESSPSZmr 2158
+VCOMPRESSPSZmrk 2159
+VCOMPRESSPSZrr 2160
+VCOMPRESSPSZrrk 2161
+VCOMPRESSPSZrrkz 2162
+VCOMXSDZrm_Int 2163
+VCOMXSDZrr_Int 2164
+VCOMXSDZrrb_Int 2165
+VCOMXSHZrm_Int 2166
+VCOMXSHZrr_Int 2167
+VCOMXSHZrrb_Int 2168
+VCOMXSSZrm_Int 2169
+VCOMXSSZrr_Int 2170
+VCOMXSSZrrb_Int 2171
+VCVT 2172
+VCVTBF 2173
+VCVTBIASPH 2174
+VCVTDQ 2175
+VCVTHF 2176
+VCVTNE 2177
+VCVTNEEBF 2178
+VCVTNEEPH 2179
+VCVTNEOBF 2180
+VCVTNEOPH 2181
+VCVTNEPS 2182
+VCVTPD 2183
+VCVTPH 2184
+VCVTPS 2185
+VCVTQQ 2186
+VCVTSD 2187
+VCVTSH 2188
+VCVTSI 2189
+VCVTSS 2190
+VCVTTBF 2191
+VCVTTPD 2192
+VCVTTPH 2193
+VCVTTPS 2194
+VCVTTSD 2195
+VCVTTSH 2196
+VCVTTSS 2197
+VCVTUDQ 2198
+VCVTUQQ 2199
+VCVTUSI 2200
+VCVTUW 2201
+VCVTW 2202
+VDBPSADBWZ 2203
+VDBPSADBWZrmi 2204
+VDBPSADBWZrmik 2205
+VDBPSADBWZrmikz 2206
+VDBPSADBWZrri 2207
+VDBPSADBWZrrik 2208
+VDBPSADBWZrrikz 2209
+VDIVBF 2210
+VDIVPDYrm 2211
+VDIVPDYrr 2212
+VDIVPDZ 2213
+VDIVPDZrm 2214
+VDIVPDZrmb 2215
+VDIVPDZrmbk 2216
+VDIVPDZrmbkz 2217
+VDIVPDZrmk 2218
+VDIVPDZrmkz 2219
+VDIVPDZrr 2220
+VDIVPDZrrb 2221
+VDIVPDZrrbk 2222
+VDIVPDZrrbkz 2223
+VDIVPDZrrk 2224
+VDIVPDZrrkz 2225
+VDIVPDrm 2226
+VDIVPDrr 2227
+VDIVPHZ 2228
+VDIVPHZrm 2229
+VDIVPHZrmb 2230
+VDIVPHZrmbk 2231
+VDIVPHZrmbkz 2232
+VDIVPHZrmk 2233
+VDIVPHZrmkz 2234
+VDIVPHZrr 2235
+VDIVPHZrrb 2236
+VDIVPHZrrbk 2237
+VDIVPHZrrbkz 2238
+VDIVPHZrrk 2239
+VDIVPHZrrkz 2240
+VDIVPSYrm 2241
+VDIVPSYrr 2242
+VDIVPSZ 2243
+VDIVPSZrm 2244
+VDIVPSZrmb 2245
+VDIVPSZrmbk 2246
+VDIVPSZrmbkz 2247
+VDIVPSZrmk 2248
+VDIVPSZrmkz 2249
+VDIVPSZrr 2250
+VDIVPSZrrb 2251
+VDIVPSZrrbk 2252
+VDIVPSZrrbkz 2253
+VDIVPSZrrk 2254
+VDIVPSZrrkz 2255
+VDIVPSrm 2256
+VDIVPSrr 2257
+VDIVSDZrm 2258
+VDIVSDZrm_Int 2259
+VDIVSDZrmk_Int 2260
+VDIVSDZrmkz_Int 2261
+VDIVSDZrr 2262
+VDIVSDZrr_Int 2263
+VDIVSDZrrb_Int 2264
+VDIVSDZrrbk_Int 2265
+VDIVSDZrrbkz_Int 2266
+VDIVSDZrrk_Int 2267
+VDIVSDZrrkz_Int 2268
+VDIVSDrm 2269
+VDIVSDrm_Int 2270
+VDIVSDrr 2271
+VDIVSDrr_Int 2272
+VDIVSHZrm 2273
+VDIVSHZrm_Int 2274
+VDIVSHZrmk_Int 2275
+VDIVSHZrmkz_Int 2276
+VDIVSHZrr 2277
+VDIVSHZrr_Int 2278
+VDIVSHZrrb_Int 2279
+VDIVSHZrrbk_Int 2280
+VDIVSHZrrbkz_Int 2281
+VDIVSHZrrk_Int 2282
+VDIVSHZrrkz_Int 2283
+VDIVSSZrm 2284
+VDIVSSZrm_Int 2285
+VDIVSSZrmk_Int 2286
+VDIVSSZrmkz_Int 2287
+VDIVSSZrr 2288
+VDIVSSZrr_Int 2289
+VDIVSSZrrb_Int 2290
+VDIVSSZrrbk_Int 2291
+VDIVSSZrrbkz_Int 2292
+VDIVSSZrrk_Int 2293
+VDIVSSZrrkz_Int 2294
+VDIVSSrm 2295
+VDIVSSrm_Int 2296
+VDIVSSrr 2297
+VDIVSSrr_Int 2298
+VDPBF 2299
+VDPPDrmi 2300
+VDPPDrri 2301
+VDPPHPSZ 2302
+VDPPHPSZm 2303
+VDPPHPSZmb 2304
+VDPPHPSZmbk 2305
+VDPPHPSZmbkz 2306
+VDPPHPSZmk 2307
+VDPPHPSZmkz 2308
+VDPPHPSZr 2309
+VDPPHPSZrk 2310
+VDPPHPSZrkz 2311
+VDPPSYrmi 2312
+VDPPSYrri 2313
+VDPPSrmi 2314
+VDPPSrri 2315
+VERRm 2316
+VERRr 2317
+VERWm 2318
+VERWr 2319
+VEXP 2320
+VEXPANDPDZ 2321
+VEXPANDPDZrm 2322
+VEXPANDPDZrmk 2323
+VEXPANDPDZrmkz 2324
+VEXPANDPDZrr 2325
+VEXPANDPDZrrk 2326
+VEXPANDPDZrrkz 2327
+VEXPANDPSZ 2328
+VEXPANDPSZrm 2329
+VEXPANDPSZrmk 2330
+VEXPANDPSZrmkz 2331
+VEXPANDPSZrr 2332
+VEXPANDPSZrrk 2333
+VEXPANDPSZrrkz 2334
+VEXTRACTF 2335
+VEXTRACTI 2336
+VEXTRACTPSZmri 2337
+VEXTRACTPSZrri 2338
+VEXTRACTPSmri 2339
+VEXTRACTPSrri 2340
+VFCMADDCPHZ 2341
+VFCMADDCPHZm 2342
+VFCMADDCPHZmb 2343
+VFCMADDCPHZmbk 2344
+VFCMADDCPHZmbkz 2345
+VFCMADDCPHZmk 2346
+VFCMADDCPHZmkz 2347
+VFCMADDCPHZr 2348
+VFCMADDCPHZrb 2349
+VFCMADDCPHZrbk 2350
+VFCMADDCPHZrbkz 2351
+VFCMADDCPHZrk 2352
+VFCMADDCPHZrkz 2353
+VFCMADDCSHZm 2354
+VFCMADDCSHZmk 2355
+VFCMADDCSHZmkz 2356
+VFCMADDCSHZr 2357
+VFCMADDCSHZrb 2358
+VFCMADDCSHZrbk 2359
+VFCMADDCSHZrbkz 2360
+VFCMADDCSHZrk 2361
+VFCMADDCSHZrkz 2362
+VFCMULCPHZ 2363
+VFCMULCPHZrm 2364
+VFCMULCPHZrmb 2365
+VFCMULCPHZrmbk 2366
+VFCMULCPHZrmbkz 2367
+VFCMULCPHZrmk 2368
+VFCMULCPHZrmkz 2369
+VFCMULCPHZrr 2370
+VFCMULCPHZrrb 2371
+VFCMULCPHZrrbk 2372
+VFCMULCPHZrrbkz 2373
+VFCMULCPHZrrk 2374
+VFCMULCPHZrrkz 2375
+VFCMULCSHZrm 2376
+VFCMULCSHZrmk 2377
+VFCMULCSHZrmkz 2378
+VFCMULCSHZrr 2379
+VFCMULCSHZrrb 2380
+VFCMULCSHZrrbk 2381
+VFCMULCSHZrrbkz 2382
+VFCMULCSHZrrk 2383
+VFCMULCSHZrrkz 2384
+VFIXUPIMMPDZ 2385
+VFIXUPIMMPDZrmbi 2386
+VFIXUPIMMPDZrmbik 2387
+VFIXUPIMMPDZrmbikz 2388
+VFIXUPIMMPDZrmi 2389
+VFIXUPIMMPDZrmik 2390
+VFIXUPIMMPDZrmikz 2391
+VFIXUPIMMPDZrri 2392
+VFIXUPIMMPDZrrib 2393
+VFIXUPIMMPDZrribk 2394
+VFIXUPIMMPDZrribkz 2395
+VFIXUPIMMPDZrrik 2396
+VFIXUPIMMPDZrrikz 2397
+VFIXUPIMMPSZ 2398
+VFIXUPIMMPSZrmbi 2399
+VFIXUPIMMPSZrmbik 2400
+VFIXUPIMMPSZrmbikz 2401
+VFIXUPIMMPSZrmi 2402
+VFIXUPIMMPSZrmik 2403
+VFIXUPIMMPSZrmikz 2404
+VFIXUPIMMPSZrri 2405
+VFIXUPIMMPSZrrib 2406
+VFIXUPIMMPSZrribk 2407
+VFIXUPIMMPSZrribkz 2408
+VFIXUPIMMPSZrrik 2409
+VFIXUPIMMPSZrrikz 2410
+VFIXUPIMMSDZrmi 2411
+VFIXUPIMMSDZrmik 2412
+VFIXUPIMMSDZrmikz 2413
+VFIXUPIMMSDZrri 2414
+VFIXUPIMMSDZrrib 2415
+VFIXUPIMMSDZrribk 2416
+VFIXUPIMMSDZrribkz 2417
+VFIXUPIMMSDZrrik 2418
+VFIXUPIMMSDZrrikz 2419
+VFIXUPIMMSSZrmi 2420
+VFIXUPIMMSSZrmik 2421
+VFIXUPIMMSSZrmikz 2422
+VFIXUPIMMSSZrri 2423
+VFIXUPIMMSSZrrib 2424
+VFIXUPIMMSSZrribk 2425
+VFIXUPIMMSSZrribkz 2426
+VFIXUPIMMSSZrrik 2427
+VFIXUPIMMSSZrrikz 2428
+VFMADD 2429
+VFMADDCPHZ 2430
+VFMADDCPHZm 2431
+VFMADDCPHZmb 2432
+VFMADDCPHZmbk 2433
+VFMADDCPHZmbkz 2434
+VFMADDCPHZmk 2435
+VFMADDCPHZmkz 2436
+VFMADDCPHZr 2437
+VFMADDCPHZrb 2438
+VFMADDCPHZrbk 2439
+VFMADDCPHZrbkz 2440
+VFMADDCPHZrk 2441
+VFMADDCPHZrkz 2442
+VFMADDCSHZm 2443
+VFMADDCSHZmk 2444
+VFMADDCSHZmkz 2445
+VFMADDCSHZr 2446
+VFMADDCSHZrb 2447
+VFMADDCSHZrbk 2448
+VFMADDCSHZrbkz 2449
+VFMADDCSHZrk 2450
+VFMADDCSHZrkz 2451
+VFMADDPD 2452
+VFMADDPS 2453
+VFMADDSD 2454
+VFMADDSS 2455
+VFMADDSUB 2456
+VFMADDSUBPD 2457
+VFMADDSUBPS 2458
+VFMSUB 2459
+VFMSUBADD 2460
+VFMSUBADDPD 2461
+VFMSUBADDPS 2462
+VFMSUBPD 2463
+VFMSUBPS 2464
+VFMSUBSD 2465
+VFMSUBSS 2466
+VFMULCPHZ 2467
+VFMULCPHZrm 2468
+VFMULCPHZrmb 2469
+VFMULCPHZrmbk 2470
+VFMULCPHZrmbkz 2471
+VFMULCPHZrmk 2472
+VFMULCPHZrmkz 2473
+VFMULCPHZrr 2474
+VFMULCPHZrrb 2475
+VFMULCPHZrrbk 2476
+VFMULCPHZrrbkz 2477
+VFMULCPHZrrk 2478
+VFMULCPHZrrkz 2479
+VFMULCSHZrm 2480
+VFMULCSHZrmk 2481
+VFMULCSHZrmkz 2482
+VFMULCSHZrr 2483
+VFMULCSHZrrb 2484
+VFMULCSHZrrbk 2485
+VFMULCSHZrrbkz 2486
+VFMULCSHZrrk 2487
+VFMULCSHZrrkz 2488
+VFNMADD 2489
+VFNMADDPD 2490
+VFNMADDPS 2491
+VFNMADDSD 2492
+VFNMADDSS 2493
+VFNMSUB 2494
+VFNMSUBPD 2495
+VFNMSUBPS 2496
+VFNMSUBSD 2497
+VFNMSUBSS 2498
+VFPCLASSBF 2499
+VFPCLASSPDZ 2500
+VFPCLASSPDZmbi 2501
+VFPCLASSPDZmbik 2502
+VFPCLASSPDZmi 2503
+VFPCLASSPDZmik 2504
+VFPCLASSPDZri 2505
+VFPCLASSPDZrik 2506
+VFPCLASSPHZ 2507
+VFPCLASSPHZmbi 2508
+VFPCLASSPHZmbik 2509
+VFPCLASSPHZmi 2510
+VFPCLASSPHZmik 2511
+VFPCLASSPHZri 2512
+VFPCLASSPHZrik 2513
+VFPCLASSPSZ 2514
+VFPCLASSPSZmbi 2515
+VFPCLASSPSZmbik 2516
+VFPCLASSPSZmi 2517
+VFPCLASSPSZmik 2518
+VFPCLASSPSZri 2519
+VFPCLASSPSZrik 2520
+VFPCLASSSDZmi 2521
+VFPCLASSSDZmik 2522
+VFPCLASSSDZri 2523
+VFPCLASSSDZrik 2524
+VFPCLASSSHZmi 2525
+VFPCLASSSHZmik 2526
+VFPCLASSSHZri 2527
+VFPCLASSSHZrik 2528
+VFPCLASSSSZmi 2529
+VFPCLASSSSZmik 2530
+VFPCLASSSSZri 2531
+VFPCLASSSSZrik 2532
+VFRCZPDYrm 2533
+VFRCZPDYrr 2534
+VFRCZPDrm 2535
+VFRCZPDrr 2536
+VFRCZPSYrm 2537
+VFRCZPSYrr 2538
+VFRCZPSrm 2539
+VFRCZPSrr 2540
+VFRCZSDrm 2541
+VFRCZSDrr 2542
+VFRCZSSrm 2543
+VFRCZSSrr 2544
+VGATHERDPDYrm 2545
+VGATHERDPDZ 2546
+VGATHERDPDZrm 2547
+VGATHERDPDrm 2548
+VGATHERDPSYrm 2549
+VGATHERDPSZ 2550
+VGATHERDPSZrm 2551
+VGATHERDPSrm 2552
+VGATHERPF 2553
+VGATHERQPDYrm 2554
+VGATHERQPDZ 2555
+VGATHERQPDZrm 2556
+VGATHERQPDrm 2557
+VGATHERQPSYrm 2558
+VGATHERQPSZ 2559
+VGATHERQPSZrm 2560
+VGATHERQPSrm 2561
+VGETEXPBF 2562
+VGETEXPPDZ 2563
+VGETEXPPDZm 2564
+VGETEXPPDZmb 2565
+VGETEXPPDZmbk 2566
+VGETEXPPDZmbkz 2567
+VGETEXPPDZmk 2568
+VGETEXPPDZmkz 2569
+VGETEXPPDZr 2570
+VGETEXPPDZrb 2571
+VGETEXPPDZrbk 2572
+VGETEXPPDZrbkz 2573
+VGETEXPPDZrk 2574
+VGETEXPPDZrkz 2575
+VGETEXPPHZ 2576
+VGETEXPPHZm 2577
+VGETEXPPHZmb 2578
+VGETEXPPHZmbk 2579
+VGETEXPPHZmbkz 2580
+VGETEXPPHZmk 2581
+VGETEXPPHZmkz 2582
+VGETEXPPHZr 2583
+VGETEXPPHZrb 2584
+VGETEXPPHZrbk 2585
+VGETEXPPHZrbkz 2586
+VGETEXPPHZrk 2587
+VGETEXPPHZrkz 2588
+VGETEXPPSZ 2589
+VGETEXPPSZm 2590
+VGETEXPPSZmb 2591
+VGETEXPPSZmbk 2592
+VGETEXPPSZmbkz 2593
+VGETEXPPSZmk 2594
+VGETEXPPSZmkz 2595
+VGETEXPPSZr 2596
+VGETEXPPSZrb 2597
+VGETEXPPSZrbk 2598
+VGETEXPPSZrbkz 2599
+VGETEXPPSZrk 2600
+VGETEXPPSZrkz 2601
+VGETEXPSDZm 2602
+VGETEXPSDZmk 2603
+VGETEXPSDZmkz 2604
+VGETEXPSDZr 2605
+VGETEXPSDZrb 2606
+VGETEXPSDZrbk 2607
+VGETEXPSDZrbkz 2608
+VGETEXPSDZrk 2609
+VGETEXPSDZrkz 2610
+VGETEXPSHZm 2611
+VGETEXPSHZmk 2612
+VGETEXPSHZmkz 2613
+VGETEXPSHZr 2614
+VGETEXPSHZrb 2615
+VGETEXPSHZrbk 2616
+VGETEXPSHZrbkz 2617
+VGETEXPSHZrk 2618
+VGETEXPSHZrkz 2619
+VGETEXPSSZm 2620
+VGETEXPSSZmk 2621
+VGETEXPSSZmkz 2622
+VGETEXPSSZr 2623
+VGETEXPSSZrb 2624
+VGETEXPSSZrbk 2625
+VGETEXPSSZrbkz 2626
+VGETEXPSSZrk 2627
+VGETEXPSSZrkz 2628
+VGETMANTBF 2629
+VGETMANTPDZ 2630
+VGETMANTPDZrmbi 2631
+VGETMANTPDZrmbik 2632
+VGETMANTPDZrmbikz 2633
+VGETMANTPDZrmi 2634
+VGETMANTPDZrmik 2635
+VGETMANTPDZrmikz 2636
+VGETMANTPDZrri 2637
+VGETMANTPDZrrib 2638
+VGETMANTPDZrribk 2639
+VGETMANTPDZrribkz 2640
+VGETMANTPDZrrik 2641
+VGETMANTPDZrrikz 2642
+VGETMANTPHZ 2643
+VGETMANTPHZrmbi 2644
+VGETMANTPHZrmbik 2645
+VGETMANTPHZrmbikz 2646
+VGETMANTPHZrmi 2647
+VGETMANTPHZrmik 2648
+VGETMANTPHZrmikz 2649
+VGETMANTPHZrri 2650
+VGETMANTPHZrrib 2651
+VGETMANTPHZrribk 2652
+VGETMANTPHZrribkz 2653
+VGETMANTPHZrrik 2654
+VGETMANTPHZrrikz 2655
+VGETMANTPSZ 2656
+VGETMANTPSZrmbi 2657
+VGETMANTPSZrmbik 2658
+VGETMANTPSZrmbikz 2659
+VGETMANTPSZrmi 2660
+VGETMANTPSZrmik 2661
+VGETMANTPSZrmikz 2662
+VGETMANTPSZrri 2663
+VGETMANTPSZrrib 2664
+VGETMANTPSZrribk 2665
+VGETMANTPSZrribkz 2666
+VGETMANTPSZrrik 2667
+VGETMANTPSZrrikz 2668
+VGETMANTSDZrmi 2669
+VGETMANTSDZrmik 2670
+VGETMANTSDZrmikz 2671
+VGETMANTSDZrri 2672
+VGETMANTSDZrrib 2673
+VGETMANTSDZrribk 2674
+VGETMANTSDZrribkz 2675
+VGETMANTSDZrrik 2676
+VGETMANTSDZrrikz 2677
+VGETMANTSHZrmi 2678
+VGETMANTSHZrmik 2679
+VGETMANTSHZrmikz 2680
+VGETMANTSHZrri 2681
+VGETMANTSHZrrib 2682
+VGETMANTSHZrribk 2683
+VGETMANTSHZrribkz 2684
+VGETMANTSHZrrik 2685
+VGETMANTSHZrrikz 2686
+VGETMANTSSZrmi 2687
+VGETMANTSSZrmik 2688
+VGETMANTSSZrmikz 2689
+VGETMANTSSZrri 2690
+VGETMANTSSZrrib 2691
+VGETMANTSSZrribk 2692
+VGETMANTSSZrribkz 2693
+VGETMANTSSZrrik 2694
+VGETMANTSSZrrikz 2695
+VGF 2696
+VHADDPDYrm 2697
+VHADDPDYrr 2698
+VHADDPDrm 2699
+VHADDPDrr 2700
+VHADDPSYrm 2701
+VHADDPSYrr 2702
+VHADDPSrm 2703
+VHADDPSrr 2704
+VHSUBPDYrm 2705
+VHSUBPDYrr 2706
+VHSUBPDrm 2707
+VHSUBPDrr 2708
+VHSUBPSYrm 2709
+VHSUBPSYrr 2710
+VHSUBPSrm 2711
+VHSUBPSrr 2712
+VINSERTF 2713
+VINSERTI 2714
+VINSERTPSZrmi 2715
+VINSERTPSZrri 2716
+VINSERTPSrmi 2717
+VINSERTPSrri 2718
+VLDDQUYrm 2719
+VLDDQUrm 2720
+VLDMXCSR 2721
+VMASKMOVDQU 2722
+VMASKMOVPDYmr 2723
+VMASKMOVPDYrm 2724
+VMASKMOVPDmr 2725
+VMASKMOVPDrm 2726
+VMASKMOVPSYmr 2727
+VMASKMOVPSYrm 2728
+VMASKMOVPSmr 2729
+VMASKMOVPSrm 2730
+VMAXBF 2731
+VMAXCPDYrm 2732
+VMAXCPDYrr 2733
+VMAXCPDZ 2734
+VMAXCPDZrm 2735
+VMAXCPDZrmb 2736
+VMAXCPDZrmbk 2737
+VMAXCPDZrmbkz 2738
+VMAXCPDZrmk 2739
+VMAXCPDZrmkz 2740
+VMAXCPDZrr 2741
+VMAXCPDZrrk 2742
+VMAXCPDZrrkz 2743
+VMAXCPDrm 2744
+VMAXCPDrr 2745
+VMAXCPHZ 2746
+VMAXCPHZrm 2747
+VMAXCPHZrmb 2748
+VMAXCPHZrmbk 2749
+VMAXCPHZrmbkz 2750
+VMAXCPHZrmk 2751
+VMAXCPHZrmkz 2752
+VMAXCPHZrr 2753
+VMAXCPHZrrk 2754
+VMAXCPHZrrkz 2755
+VMAXCPSYrm 2756
+VMAXCPSYrr 2757
+VMAXCPSZ 2758
+VMAXCPSZrm 2759
+VMAXCPSZrmb 2760
+VMAXCPSZrmbk 2761
+VMAXCPSZrmbkz 2762
+VMAXCPSZrmk 2763
+VMAXCPSZrmkz 2764
+VMAXCPSZrr 2765
+VMAXCPSZrrk 2766
+VMAXCPSZrrkz 2767
+VMAXCPSrm 2768
+VMAXCPSrr 2769
+VMAXCSDZrm 2770
+VMAXCSDZrr 2771
+VMAXCSDrm 2772
+VMAXCSDrr 2773
+VMAXCSHZrm 2774
+VMAXCSHZrr 2775
+VMAXCSSZrm 2776
+VMAXCSSZrr 2777
+VMAXCSSrm 2778
+VMAXCSSrr 2779
+VMAXPDYrm 2780
+VMAXPDYrr 2781
+VMAXPDZ 2782
+VMAXPDZrm 2783
+VMAXPDZrmb 2784
+VMAXPDZrmbk 2785
+VMAXPDZrmbkz 2786
+VMAXPDZrmk 2787
+VMAXPDZrmkz 2788
+VMAXPDZrr 2789
+VMAXPDZrrb 2790
+VMAXPDZrrbk 2791
+VMAXPDZrrbkz 2792
+VMAXPDZrrk 2793
+VMAXPDZrrkz 2794
+VMAXPDrm 2795
+VMAXPDrr 2796
+VMAXPHZ 2797
+VMAXPHZrm 2798
+VMAXPHZrmb 2799
+VMAXPHZrmbk 2800
+VMAXPHZrmbkz 2801
+VMAXPHZrmk 2802
+VMAXPHZrmkz 2803
+VMAXPHZrr 2804
+VMAXPHZrrb 2805
+VMAXPHZrrbk 2806
+VMAXPHZrrbkz 2807
+VMAXPHZrrk 2808
+VMAXPHZrrkz 2809
+VMAXPSYrm 2810
+VMAXPSYrr 2811
+VMAXPSZ 2812
+VMAXPSZrm 2813
+VMAXPSZrmb 2814
+VMAXPSZrmbk 2815
+VMAXPSZrmbkz 2816
+VMAXPSZrmk 2817
+VMAXPSZrmkz 2818
+VMAXPSZrr 2819
+VMAXPSZrrb 2820
+VMAXPSZrrbk 2821
+VMAXPSZrrbkz 2822
+VMAXPSZrrk 2823
+VMAXPSZrrkz 2824
+VMAXPSrm 2825
+VMAXPSrr 2826
+VMAXSDZrm 2827
+VMAXSDZrm_Int 2828
+VMAXSDZrmk_Int 2829
+VMAXSDZrmkz_Int 2830
+VMAXSDZrr 2831
+VMAXSDZrr_Int 2832
+VMAXSDZrrb_Int 2833
+VMAXSDZrrbk_Int 2834
+VMAXSDZrrbkz_Int 2835
+VMAXSDZrrk_Int 2836
+VMAXSDZrrkz_Int 2837
+VMAXSDrm 2838
+VMAXSDrm_Int 2839
+VMAXSDrr 2840
+VMAXSDrr_Int 2841
+VMAXSHZrm 2842
+VMAXSHZrm_Int 2843
+VMAXSHZrmk_Int 2844
+VMAXSHZrmkz_Int 2845
+VMAXSHZrr 2846
+VMAXSHZrr_Int 2847
+VMAXSHZrrb_Int 2848
+VMAXSHZrrbk_Int 2849
+VMAXSHZrrbkz_Int 2850
+VMAXSHZrrk_Int 2851
+VMAXSHZrrkz_Int 2852
+VMAXSSZrm 2853
+VMAXSSZrm_Int 2854
+VMAXSSZrmk_Int 2855
+VMAXSSZrmkz_Int 2856
+VMAXSSZrr 2857
+VMAXSSZrr_Int 2858
+VMAXSSZrrb_Int 2859
+VMAXSSZrrbk_Int 2860
+VMAXSSZrrbkz_Int 2861
+VMAXSSZrrk_Int 2862
+VMAXSSZrrkz_Int 2863
+VMAXSSrm 2864
+VMAXSSrm_Int 2865
+VMAXSSrr 2866
+VMAXSSrr_Int 2867
+VMCALL 2868
+VMCLEARm 2869
+VMFUNC 2870
+VMINBF 2871
+VMINCPDYrm 2872
+VMINCPDYrr 2873
+VMINCPDZ 2874
+VMINCPDZrm 2875
+VMINCPDZrmb 2876
+VMINCPDZrmbk 2877
+VMINCPDZrmbkz 2878
+VMINCPDZrmk 2879
+VMINCPDZrmkz 2880
+VMINCPDZrr 2881
+VMINCPDZrrk 2882
+VMINCPDZrrkz 2883
+VMINCPDrm 2884
+VMINCPDrr 2885
+VMINCPHZ 2886
+VMINCPHZrm 2887
+VMINCPHZrmb 2888
+VMINCPHZrmbk 2889
+VMINCPHZrmbkz 2890
+VMINCPHZrmk 2891
+VMINCPHZrmkz 2892
+VMINCPHZrr 2893
+VMINCPHZrrk 2894
+VMINCPHZrrkz 2895
+VMINCPSYrm 2896
+VMINCPSYrr 2897
+VMINCPSZ 2898
+VMINCPSZrm 2899
+VMINCPSZrmb 2900
+VMINCPSZrmbk 2901
+VMINCPSZrmbkz 2902
+VMINCPSZrmk 2903
+VMINCPSZrmkz 2904
+VMINCPSZrr 2905
+VMINCPSZrrk 2906
+VMINCPSZrrkz 2907
+VMINCPSrm 2908
+VMINCPSrr 2909
+VMINCSDZrm 2910
+VMINCSDZrr 2911
+VMINCSDrm 2912
+VMINCSDrr 2913
+VMINCSHZrm 2914
+VMINCSHZrr 2915
+VMINCSSZrm 2916
+VMINCSSZrr 2917
+VMINCSSrm 2918
+VMINCSSrr 2919
+VMINMAXBF 2920
+VMINMAXPDZ 2921
+VMINMAXPDZrmbi 2922
+VMINMAXPDZrmbik 2923
+VMINMAXPDZrmbikz 2924
+VMINMAXPDZrmi 2925
+VMINMAXPDZrmik 2926
+VMINMAXPDZrmikz 2927
+VMINMAXPDZrri 2928
+VMINMAXPDZrrib 2929
+VMINMAXPDZrribk 2930
+VMINMAXPDZrribkz 2931
+VMINMAXPDZrrik 2932
+VMINMAXPDZrrikz 2933
+VMINMAXPHZ 2934
+VMINMAXPHZrmbi 2935
+VMINMAXPHZrmbik 2936
+VMINMAXPHZrmbikz 2937
+VMINMAXPHZrmi 2938
+VMINMAXPHZrmik 2939
+VMINMAXPHZrmikz 2940
+VMINMAXPHZrri 2941
+VMINMAXPHZrrib 2942
+VMINMAXPHZrribk 2943
+VMINMAXPHZrribkz 2944
+VMINMAXPHZrrik 2945
+VMINMAXPHZrrikz 2946
+VMINMAXPSZ 2947
+VMINMAXPSZrmbi 2948
+VMINMAXPSZrmbik 2949
+VMINMAXPSZrmbikz 2950
+VMINMAXPSZrmi 2951
+VMINMAXPSZrmik 2952
+VMINMAXPSZrmikz 2953
+VMINMAXPSZrri 2954
+VMINMAXPSZrrib 2955
+VMINMAXPSZrribk 2956
+VMINMAXPSZrribkz 2957
+VMINMAXPSZrrik 2958
+VMINMAXPSZrrikz 2959
+VMINMAXSDrmi 2960
+VMINMAXSDrmi_Int 2961
+VMINMAXSDrmik_Int 2962
+VMINMAXSDrmikz_Int 2963
+VMINMAXSDrri 2964
+VMINMAXSDrri_Int 2965
+VMINMAXSDrrib_Int 2966
+VMINMAXSDrribk_Int 2967
+VMINMAXSDrribkz_Int 2968
+VMINMAXSDrrik_Int 2969
+VMINMAXSDrrikz_Int 2970
+VMINMAXSHrmi 2971
+VMINMAXSHrmi_Int 2972
+VMINMAXSHrmik_Int 2973
+VMINMAXSHrmikz_Int 2974
+VMINMAXSHrri 2975
+VMINMAXSHrri_Int 2976
+VMINMAXSHrrib_Int 2977
+VMINMAXSHrribk_Int 2978
+VMINMAXSHrribkz_Int 2979
+VMINMAXSHrrik_Int 2980
+VMINMAXSHrrikz_Int 2981
+VMINMAXSSrmi 2982
+VMINMAXSSrmi_Int 2983
+VMINMAXSSrmik_Int 2984
+VMINMAXSSrmikz_Int 2985
+VMINMAXSSrri 2986
+VMINMAXSSrri_Int 2987
+VMINMAXSSrrib_Int 2988
+VMINMAXSSrribk_Int 2989
+VMINMAXSSrribkz_Int 2990
+VMINMAXSSrrik_Int 2991
+VMINMAXSSrrikz_Int 2992
+VMINPDYrm 2993
+VMINPDYrr 2994
+VMINPDZ 2995
+VMINPDZrm 2996
+VMINPDZrmb 2997
+VMINPDZrmbk 2998
+VMINPDZrmbkz 2999
+VMINPDZrmk 3000
+VMINPDZrmkz 3001
+VMINPDZrr 3002
+VMINPDZrrb 3003
+VMINPDZrrbk 3004
+VMINPDZrrbkz 3005
+VMINPDZrrk 3006
+VMINPDZrrkz 3007
+VMINPDrm 3008
+VMINPDrr 3009
+VMINPHZ 3010
+VMINPHZrm 3011
+VMINPHZrmb 3012
+VMINPHZrmbk 3013
+VMINPHZrmbkz 3014
+VMINPHZrmk 3015
+VMINPHZrmkz 3016
+VMINPHZrr 3017
+VMINPHZrrb 3018
+VMINPHZrrbk 3019
+VMINPHZrrbkz 3020
+VMINPHZrrk 3021
+VMINPHZrrkz 3022
+VMINPSYrm 3023
+VMINPSYrr 3024
+VMINPSZ 3025
+VMINPSZrm 3026
+VMINPSZrmb 3027
+VMINPSZrmbk 3028
+VMINPSZrmbkz 3029
+VMINPSZrmk 3030
+VMINPSZrmkz 3031
+VMINPSZrr 3032
+VMINPSZrrb 3033
+VMINPSZrrbk 3034
+VMINPSZrrbkz 3035
+VMINPSZrrk 3036
+VMINPSZrrkz 3037
+VMINPSrm 3038
+VMINPSrr 3039
+VMINSDZrm 3040
+VMINSDZrm_Int 3041
+VMINSDZrmk_Int 3042
+VMINSDZrmkz_Int 3043
+VMINSDZrr 3044
+VMINSDZrr_Int 3045
+VMINSDZrrb_Int 3046
+VMINSDZrrbk_Int 3047
+VMINSDZrrbkz_Int 3048
+VMINSDZrrk_Int 3049
+VMINSDZrrkz_Int 3050
+VMINSDrm 3051
+VMINSDrm_Int 3052
+VMINSDrr 3053
+VMINSDrr_Int 3054
+VMINSHZrm 3055
+VMINSHZrm_Int 3056
+VMINSHZrmk_Int 3057
+VMINSHZrmkz_Int 3058
+VMINSHZrr 3059
+VMINSHZrr_Int 3060
+VMINSHZrrb_Int 3061
+VMINSHZrrbk_Int 3062
+VMINSHZrrbkz_Int 3063
+VMINSHZrrk_Int 3064
+VMINSHZrrkz_Int 3065
+VMINSSZrm 3066
+VMINSSZrm_Int 3067
+VMINSSZrmk_Int 3068
+VMINSSZrmkz_Int 3069
+VMINSSZrr 3070
+VMINSSZrr_Int 3071
+VMINSSZrrb_Int 3072
+VMINSSZrrbk_Int 3073
+VMINSSZrrbkz_Int 3074
+VMINSSZrrk_Int 3075
+VMINSSZrrkz_Int 3076
+VMINSSrm 3077
+VMINSSrm_Int 3078
+VMINSSrr 3079
+VMINSSrr_Int 3080
+VMLAUNCH 3081
+VMLOAD 3082
+VMMCALL 3083
+VMOV 3084
+VMOVAPDYmr 3085
+VMOVAPDYrm 3086
+VMOVAPDYrr 3087
+VMOVAPDYrr_REV 3088
+VMOVAPDZ 3089
+VMOVAPDZmr 3090
+VMOVAPDZmrk 3091
+VMOVAPDZrm 3092
+VMOVAPDZrmk 3093
+VMOVAPDZrmkz 3094
+VMOVAPDZrr 3095
+VMOVAPDZrr_REV 3096
+VMOVAPDZrrk 3097
+VMOVAPDZrrk_REV 3098
+VMOVAPDZrrkz 3099
+VMOVAPDZrrkz_REV 3100
+VMOVAPDmr 3101
+VMOVAPDrm 3102
+VMOVAPDrr 3103
+VMOVAPDrr_REV 3104
+VMOVAPSYmr 3105
+VMOVAPSYrm 3106
+VMOVAPSYrr 3107
+VMOVAPSYrr_REV 3108
+VMOVAPSZ 3109
+VMOVAPSZmr 3110
+VMOVAPSZmrk 3111
+VMOVAPSZrm 3112
+VMOVAPSZrmk 3113
+VMOVAPSZrmkz 3114
+VMOVAPSZrr 3115
+VMOVAPSZrr_REV 3116
+VMOVAPSZrrk 3117
+VMOVAPSZrrk_REV 3118
+VMOVAPSZrrkz 3119
+VMOVAPSZrrkz_REV 3120
+VMOVAPSmr 3121
+VMOVAPSrm 3122
+VMOVAPSrr 3123
+VMOVAPSrr_REV 3124
+VMOVDDUPYrm 3125
+VMOVDDUPYrr 3126
+VMOVDDUPZ 3127
+VMOVDDUPZrm 3128
+VMOVDDUPZrmk 3129
+VMOVDDUPZrmkz 3130
+VMOVDDUPZrr 3131
+VMOVDDUPZrrk 3132
+VMOVDDUPZrrkz 3133
+VMOVDDUPrm 3134
+VMOVDDUPrr 3135
+VMOVDI 3136
+VMOVDQA 3137
+VMOVDQAYmr 3138
+VMOVDQAYrm 3139
+VMOVDQAYrr 3140
+VMOVDQAYrr_REV 3141
+VMOVDQAmr 3142
+VMOVDQArm 3143
+VMOVDQArr 3144
+VMOVDQArr_REV 3145
+VMOVDQU 3146
+VMOVDQUYmr 3147
+VMOVDQUYrm 3148
+VMOVDQUYrr 3149
+VMOVDQUYrr_REV 3150
+VMOVDQUmr 3151
+VMOVDQUrm 3152
+VMOVDQUrr 3153
+VMOVDQUrr_REV 3154
+VMOVHLPSZrr 3155
+VMOVHLPSrr 3156
+VMOVHPDZ 3157
+VMOVHPDmr 3158
+VMOVHPDrm 3159
+VMOVHPSZ 3160
+VMOVHPSmr 3161
+VMOVHPSrm 3162
+VMOVLHPSZrr 3163
+VMOVLHPSrr 3164
+VMOVLPDZ 3165
+VMOVLPDmr 3166
+VMOVLPDrm 3167
+VMOVLPSZ 3168
+VMOVLPSmr 3169
+VMOVLPSrm 3170
+VMOVMSKPDYrr 3171
+VMOVMSKPDrr 3172
+VMOVMSKPSYrr 3173
+VMOVMSKPSrr 3174
+VMOVNTDQAYrm 3175
+VMOVNTDQAZ 3176
+VMOVNTDQAZrm 3177
+VMOVNTDQArm 3178
+VMOVNTDQYmr 3179
+VMOVNTDQZ 3180
+VMOVNTDQZmr 3181
+VMOVNTDQmr 3182
+VMOVNTPDYmr 3183
+VMOVNTPDZ 3184
+VMOVNTPDZmr 3185
+VMOVNTPDmr 3186
+VMOVNTPSYmr 3187
+VMOVNTPSZ 3188
+VMOVNTPSZmr 3189
+VMOVNTPSmr 3190
+VMOVPDI 3191
+VMOVPQI 3192
+VMOVPQIto 3193
+VMOVQI 3194
+VMOVRSBZ 3195
+VMOVRSBZm 3196
+VMOVRSBZmk 3197
+VMOVRSBZmkz 3198
+VMOVRSDZ 3199
+VMOVRSDZm 3200
+VMOVRSDZmk 3201
+VMOVRSDZmkz 3202
+VMOVRSQZ 3203
+VMOVRSQZm 3204
+VMOVRSQZmk 3205
+VMOVRSQZmkz 3206
+VMOVRSWZ 3207
+VMOVRSWZm 3208
+VMOVRSWZmk 3209
+VMOVRSWZmkz 3210
+VMOVSDZmr 3211
+VMOVSDZmrk 3212
+VMOVSDZrm 3213
+VMOVSDZrm_alt 3214
+VMOVSDZrmk 3215
+VMOVSDZrmkz 3216
+VMOVSDZrr 3217
+VMOVSDZrr_REV 3218
+VMOVSDZrrk 3219
+VMOVSDZrrk_REV 3220
+VMOVSDZrrkz 3221
+VMOVSDZrrkz_REV 3222
+VMOVSDmr 3223
+VMOVSDrm 3224
+VMOVSDrm_alt 3225
+VMOVSDrr 3226
+VMOVSDrr_REV 3227
+VMOVSDto 3228
+VMOVSH 3229
+VMOVSHDUPYrm 3230
+VMOVSHDUPYrr 3231
+VMOVSHDUPZ 3232
+VMOVSHDUPZrm 3233
+VMOVSHDUPZrmk 3234
+VMOVSHDUPZrmkz 3235
+VMOVSHDUPZrr 3236
+VMOVSHDUPZrrk 3237
+VMOVSHDUPZrrkz 3238
+VMOVSHDUPrm 3239
+VMOVSHDUPrr 3240
+VMOVSHZmr 3241
+VMOVSHZmrk 3242
+VMOVSHZrm 3243
+VMOVSHZrm_alt 3244
+VMOVSHZrmk 3245
+VMOVSHZrmkz 3246
+VMOVSHZrr 3247
+VMOVSHZrr_REV 3248
+VMOVSHZrrk 3249
+VMOVSHZrrk_REV 3250
+VMOVSHZrrkz 3251
+VMOVSHZrrkz_REV 3252
+VMOVSHtoW 3253
+VMOVSLDUPYrm 3254
+VMOVSLDUPYrr 3255
+VMOVSLDUPZ 3256
+VMOVSLDUPZrm 3257
+VMOVSLDUPZrmk 3258
+VMOVSLDUPZrmkz 3259
+VMOVSLDUPZrr 3260
+VMOVSLDUPZrrk 3261
+VMOVSLDUPZrrkz 3262
+VMOVSLDUPrm 3263
+VMOVSLDUPrr 3264
+VMOVSS 3265
+VMOVSSZmr 3266
+VMOVSSZmrk 3267
+VMOVSSZrm 3268
+VMOVSSZrm_alt 3269
+VMOVSSZrmk 3270
+VMOVSSZrmkz 3271
+VMOVSSZrr 3272
+VMOVSSZrr_REV 3273
+VMOVSSZrrk 3274
+VMOVSSZrrk_REV 3275
+VMOVSSZrrkz 3276
+VMOVSSZrrkz_REV 3277
+VMOVSSmr 3278
+VMOVSSrm 3279
+VMOVSSrm_alt 3280
+VMOVSSrr 3281
+VMOVSSrr_REV 3282
+VMOVUPDYmr 3283
+VMOVUPDYrm 3284
+VMOVUPDYrr 3285
+VMOVUPDYrr_REV 3286
+VMOVUPDZ 3287
+VMOVUPDZmr 3288
+VMOVUPDZmrk 3289
+VMOVUPDZrm 3290
+VMOVUPDZrmk 3291
+VMOVUPDZrmkz 3292
+VMOVUPDZrr 3293
+VMOVUPDZrr_REV 3294
+VMOVUPDZrrk 3295
+VMOVUPDZrrk_REV 3296
+VMOVUPDZrrkz 3297
+VMOVUPDZrrkz_REV 3298
+VMOVUPDmr 3299
+VMOVUPDrm 3300
+VMOVUPDrr 3301
+VMOVUPDrr_REV 3302
+VMOVUPSYmr 3303
+VMOVUPSYrm 3304
+VMOVUPSYrr 3305
+VMOVUPSYrr_REV 3306
+VMOVUPSZ 3307
+VMOVUPSZmr 3308
+VMOVUPSZmrk 3309
+VMOVUPSZrm 3310
+VMOVUPSZrmk 3311
+VMOVUPSZrmkz 3312
+VMOVUPSZrr 3313
+VMOVUPSZrr_REV 3314
+VMOVUPSZrrk 3315
+VMOVUPSZrrk_REV 3316
+VMOVUPSZrrkz 3317
+VMOVUPSZrrkz_REV 3318
+VMOVUPSmr 3319
+VMOVUPSrm 3320
+VMOVUPSrr 3321
+VMOVUPSrr_REV 3322
+VMOVW 3323
+VMOVWmr 3324
+VMOVWrm 3325
+VMOVZPDILo 3326
+VMOVZPQILo 3327
+VMOVZPWILo 3328
+VMPSADBWYrmi 3329
+VMPSADBWYrri 3330
+VMPSADBWZ 3331
+VMPSADBWZrmi 3332
+VMPSADBWZrmik 3333
+VMPSADBWZrmikz 3334
+VMPSADBWZrri 3335
+VMPSADBWZrrik 3336
+VMPSADBWZrrikz 3337
+VMPSADBWrmi 3338
+VMPSADBWrri 3339
+VMPTRLDm 3340
+VMPTRSTm 3341
+VMREAD 3342
+VMRESUME 3343
+VMRUN 3344
+VMSAVE 3345
+VMULBF 3346
+VMULPDYrm 3347
+VMULPDYrr 3348
+VMULPDZ 3349
+VMULPDZrm 3350
+VMULPDZrmb 3351
+VMULPDZrmbk 3352
+VMULPDZrmbkz 3353
+VMULPDZrmk 3354
+VMULPDZrmkz 3355
+VMULPDZrr 3356
+VMULPDZrrb 3357
+VMULPDZrrbk 3358
+VMULPDZrrbkz 3359
+VMULPDZrrk 3360
+VMULPDZrrkz 3361
+VMULPDrm 3362
+VMULPDrr 3363
+VMULPHZ 3364
+VMULPHZrm 3365
+VMULPHZrmb 3366
+VMULPHZrmbk 3367
+VMULPHZrmbkz 3368
+VMULPHZrmk 3369
+VMULPHZrmkz 3370
+VMULPHZrr 3371
+VMULPHZrrb 3372
+VMULPHZrrbk 3373
+VMULPHZrrbkz 3374
+VMULPHZrrk 3375
+VMULPHZrrkz 3376
+VMULPSYrm 3377
+VMULPSYrr 3378
+VMULPSZ 3379
+VMULPSZrm 3380
+VMULPSZrmb 3381
+VMULPSZrmbk 3382
+VMULPSZrmbkz 3383
+VMULPSZrmk 3384
+VMULPSZrmkz 3385
+VMULPSZrr 3386
+VMULPSZrrb 3387
+VMULPSZrrbk 3388
+VMULPSZrrbkz 3389
+VMULPSZrrk 3390
+VMULPSZrrkz 3391
+VMULPSrm 3392
+VMULPSrr 3393
+VMULSDZrm 3394
+VMULSDZrm_Int 3395
+VMULSDZrmk_Int 3396
+VMULSDZrmkz_Int 3397
+VMULSDZrr 3398
+VMULSDZrr_Int 3399
+VMULSDZrrb_Int 3400
+VMULSDZrrbk_Int 3401
+VMULSDZrrbkz_Int 3402
+VMULSDZrrk_Int 3403
+VMULSDZrrkz_Int 3404
+VMULSDrm 3405
+VMULSDrm_Int 3406
+VMULSDrr 3407
+VMULSDrr_Int 3408
+VMULSHZrm 3409
+VMULSHZrm_Int 3410
+VMULSHZrmk_Int 3411
+VMULSHZrmkz_Int 3412
+VMULSHZrr 3413
+VMULSHZrr_Int 3414
+VMULSHZrrb_Int 3415
+VMULSHZrrbk_Int 3416
+VMULSHZrrbkz_Int 3417
+VMULSHZrrk_Int 3418
+VMULSHZrrkz_Int 3419
+VMULSSZrm 3420
+VMULSSZrm_Int 3421
+VMULSSZrmk_Int 3422
+VMULSSZrmkz_Int 3423
+VMULSSZrr 3424
+VMULSSZrr_Int 3425
+VMULSSZrrb_Int 3426
+VMULSSZrrbk_Int 3427
+VMULSSZrrbkz_Int 3428
+VMULSSZrrk_Int 3429
+VMULSSZrrkz_Int 3430
+VMULSSrm 3431
+VMULSSrm_Int 3432
+VMULSSrr 3433
+VMULSSrr_Int 3434
+VMWRITE 3435
+VMXOFF 3436
+VMXON 3437
+VORPDYrm 3438
+VORPDYrr 3439
+VORPDZ 3440
+VORPDZrm 3441
+VORPDZrmb 3442
+VORPDZrmbk 3443
+VORPDZrmbkz 3444
+VORPDZrmk 3445
+VORPDZrmkz 3446
+VORPDZrr 3447
+VORPDZrrk 3448
+VORPDZrrkz 3449
+VORPDrm 3450
+VORPDrr 3451
+VORPSYrm 3452
+VORPSYrr 3453
+VORPSZ 3454
+VORPSZrm 3455
+VORPSZrmb 3456
+VORPSZrmbk 3457
+VORPSZrmbkz 3458
+VORPSZrmk 3459
+VORPSZrmkz 3460
+VORPSZrr 3461
+VORPSZrrk 3462
+VORPSZrrkz 3463
+VORPSrm 3464
+VORPSrr 3465
+VP 3466
+VPABSBYrm 3467
+VPABSBYrr 3468
+VPABSBZ 3469
+VPABSBZrm 3470
+VPABSBZrmk 3471
+VPABSBZrmkz 3472
+VPABSBZrr 3473
+VPABSBZrrk 3474
+VPABSBZrrkz 3475
+VPABSBrm 3476
+VPABSBrr 3477
+VPABSDYrm 3478
+VPABSDYrr 3479
+VPABSDZ 3480
+VPABSDZrm 3481
+VPABSDZrmb 3482
+VPABSDZrmbk 3483
+VPABSDZrmbkz 3484
+VPABSDZrmk 3485
+VPABSDZrmkz 3486
+VPABSDZrr 3487
+VPABSDZrrk 3488
+VPABSDZrrkz 3489
+VPABSDrm 3490
+VPABSDrr 3491
+VPABSQZ 3492
+VPABSQZrm 3493
+VPABSQZrmb 3494
+VPABSQZrmbk 3495
+VPABSQZrmbkz 3496
+VPABSQZrmk 3497
+VPABSQZrmkz 3498
+VPABSQZrr 3499
+VPABSQZrrk 3500
+VPABSQZrrkz 3501
+VPABSWYrm 3502
+VPABSWYrr 3503
+VPABSWZ 3504
+VPABSWZrm 3505
+VPABSWZrmk 3506
+VPABSWZrmkz 3507
+VPABSWZrr 3508
+VPABSWZrrk 3509
+VPABSWZrrkz 3510
+VPABSWrm 3511
+VPABSWrr 3512
+VPACKSSDWYrm 3513
+VPACKSSDWYrr 3514
+VPACKSSDWZ 3515
+VPACKSSDWZrm 3516
+VPACKSSDWZrmb 3517
+VPACKSSDWZrmbk 3518
+VPACKSSDWZrmbkz 3519
+VPACKSSDWZrmk 3520
+VPACKSSDWZrmkz 3521
+VPACKSSDWZrr 3522
+VPACKSSDWZrrk 3523
+VPACKSSDWZrrkz 3524
+VPACKSSDWrm 3525
+VPACKSSDWrr 3526
+VPACKSSWBYrm 3527
+VPACKSSWBYrr 3528
+VPACKSSWBZ 3529
+VPACKSSWBZrm 3530
+VPACKSSWBZrmk 3531
+VPACKSSWBZrmkz 3532
+VPACKSSWBZrr 3533
+VPACKSSWBZrrk 3534
+VPACKSSWBZrrkz 3535
+VPACKSSWBrm 3536
+VPACKSSWBrr 3537
+VPACKUSDWYrm 3538
+VPACKUSDWYrr 3539
+VPACKUSDWZ 3540
+VPACKUSDWZrm 3541
+VPACKUSDWZrmb 3542
+VPACKUSDWZrmbk 3543
+VPACKUSDWZrmbkz 3544
+VPACKUSDWZrmk 3545
+VPACKUSDWZrmkz 3546
+VPACKUSDWZrr 3547
+VPACKUSDWZrrk 3548
+VPACKUSDWZrrkz 3549
+VPACKUSDWrm 3550
+VPACKUSDWrr 3551
+VPACKUSWBYrm 3552
+VPACKUSWBYrr 3553
+VPACKUSWBZ 3554
+VPACKUSWBZrm 3555
+VPACKUSWBZrmk 3556
+VPACKUSWBZrmkz 3557
+VPACKUSWBZrr 3558
+VPACKUSWBZrrk 3559
+VPACKUSWBZrrkz 3560
+VPACKUSWBrm 3561
+VPACKUSWBrr 3562
+VPADDBYrm 3563
+VPADDBYrr 3564
+VPADDBZ 3565
+VPADDBZrm 3566
+VPADDBZrmk 3567
+VPADDBZrmkz 3568
+VPADDBZrr 3569
+VPADDBZrrk 3570
+VPADDBZrrkz 3571
+VPADDBrm 3572
+VPADDBrr 3573
+VPADDDYrm 3574
+VPADDDYrr 3575
+VPADDDZ 3576
+VPADDDZrm 3577
+VPADDDZrmb 3578
+VPADDDZrmbk 3579
+VPADDDZrmbkz 3580
+VPADDDZrmk 3581
+VPADDDZrmkz 3582
+VPADDDZrr 3583
+VPADDDZrrk 3584
+VPADDDZrrkz 3585
+VPADDDrm 3586
+VPADDDrr 3587
+VPADDQYrm 3588
+VPADDQYrr 3589
+VPADDQZ 3590
+VPADDQZrm 3591
+VPADDQZrmb 3592
+VPADDQZrmbk 3593
+VPADDQZrmbkz 3594
+VPADDQZrmk 3595
+VPADDQZrmkz 3596
+VPADDQZrr 3597
+VPADDQZrrk 3598
+VPADDQZrrkz 3599
+VPADDQrm 3600
+VPADDQrr 3601
+VPADDSBYrm 3602
+VPADDSBYrr 3603
+VPADDSBZ 3604
+VPADDSBZrm 3605
+VPADDSBZrmk 3606
+VPADDSBZrmkz 3607
+VPADDSBZrr 3608
+VPADDSBZrrk 3609
+VPADDSBZrrkz 3610
+VPADDSBrm 3611
+VPADDSBrr 3612
+VPADDSWYrm 3613
+VPADDSWYrr 3614
+VPADDSWZ 3615
+VPADDSWZrm 3616
+VPADDSWZrmk 3617
+VPADDSWZrmkz 3618
+VPADDSWZrr 3619
+VPADDSWZrrk 3620
+VPADDSWZrrkz 3621
+VPADDSWrm 3622
+VPADDSWrr 3623
+VPADDUSBYrm 3624
+VPADDUSBYrr 3625
+VPADDUSBZ 3626
+VPADDUSBZrm 3627
+VPADDUSBZrmk 3628
+VPADDUSBZrmkz 3629
+VPADDUSBZrr 3630
+VPADDUSBZrrk 3631
+VPADDUSBZrrkz 3632
+VPADDUSBrm 3633
+VPADDUSBrr 3634
+VPADDUSWYrm 3635
+VPADDUSWYrr 3636
+VPADDUSWZ 3637
+VPADDUSWZrm 3638
+VPADDUSWZrmk 3639
+VPADDUSWZrmkz 3640
+VPADDUSWZrr 3641
+VPADDUSWZrrk 3642
+VPADDUSWZrrkz 3643
+VPADDUSWrm 3644
+VPADDUSWrr 3645
+VPADDWYrm 3646
+VPADDWYrr 3647
+VPADDWZ 3648
+VPADDWZrm 3649
+VPADDWZrmk 3650
+VPADDWZrmkz 3651
+VPADDWZrr 3652
+VPADDWZrrk 3653
+VPADDWZrrkz 3654
+VPADDWrm 3655
+VPADDWrr 3656
+VPALIGNRYrmi 3657
+VPALIGNRYrri 3658
+VPALIGNRZ 3659
+VPALIGNRZrmi 3660
+VPALIGNRZrmik 3661
+VPALIGNRZrmikz 3662
+VPALIGNRZrri 3663
+VPALIGNRZrrik 3664
+VPALIGNRZrrikz 3665
+VPALIGNRrmi 3666
+VPALIGNRrri 3667
+VPANDDZ 3668
+VPANDDZrm 3669
+VPANDDZrmb 3670
+VPANDDZrmbk 3671
+VPANDDZrmbkz 3672
+VPANDDZrmk 3673
+VPANDDZrmkz 3674
+VPANDDZrr 3675
+VPANDDZrrk 3676
+VPANDDZrrkz 3677
+VPANDNDZ 3678
+VPANDNDZrm 3679
+VPANDNDZrmb 3680
+VPANDNDZrmbk 3681
+VPANDNDZrmbkz 3682
+VPANDNDZrmk 3683
+VPANDNDZrmkz 3684
+VPANDNDZrr 3685
+VPANDNDZrrk 3686
+VPANDNDZrrkz 3687
+VPANDNQZ 3688
+VPANDNQZrm 3689
+VPANDNQZrmb 3690
+VPANDNQZrmbk 3691
+VPANDNQZrmbkz 3692
+VPANDNQZrmk 3693
+VPANDNQZrmkz 3694
+VPANDNQZrr 3695
+VPANDNQZrrk 3696
+VPANDNQZrrkz 3697
+VPANDNYrm 3698
+VPANDNYrr 3699
+VPANDNrm 3700
+VPANDNrr 3701
+VPANDQZ 3702
+VPANDQZrm 3703
+VPANDQZrmb 3704
+VPANDQZrmbk 3705
+VPANDQZrmbkz 3706
+VPANDQZrmk 3707
+VPANDQZrmkz 3708
+VPANDQZrr 3709
+VPANDQZrrk 3710
+VPANDQZrrkz 3711
+VPANDYrm 3712
+VPANDYrr 3713
+VPANDrm 3714
+VPANDrr 3715
+VPAVGBYrm 3716
+VPAVGBYrr 3717
+VPAVGBZ 3718
+VPAVGBZrm 3719
+VPAVGBZrmk 3720
+VPAVGBZrmkz 3721
+VPAVGBZrr 3722
+VPAVGBZrrk 3723
+VPAVGBZrrkz 3724
+VPAVGBrm 3725
+VPAVGBrr 3726
+VPAVGWYrm 3727
+VPAVGWYrr 3728
+VPAVGWZ 3729
+VPAVGWZrm 3730
+VPAVGWZrmk 3731
+VPAVGWZrmkz 3732
+VPAVGWZrr 3733
+VPAVGWZrrk 3734
+VPAVGWZrrkz 3735
+VPAVGWrm 3736
+VPAVGWrr 3737
+VPBLENDDYrmi 3738
+VPBLENDDYrri 3739
+VPBLENDDrmi 3740
+VPBLENDDrri 3741
+VPBLENDMBZ 3742
+VPBLENDMBZrm 3743
+VPBLENDMBZrmk 3744
+VPBLENDMBZrmkz 3745
+VPBLENDMBZrr 3746
+VPBLENDMBZrrk 3747
+VPBLENDMBZrrkz 3748
+VPBLENDMDZ 3749
+VPBLENDMDZrm 3750
+VPBLENDMDZrmb 3751
+VPBLENDMDZrmbk 3752
+VPBLENDMDZrmbkz 3753
+VPBLENDMDZrmk 3754
+VPBLENDMDZrmkz 3755
+VPBLENDMDZrr 3756
+VPBLENDMDZrrk 3757
+VPBLENDMDZrrkz 3758
+VPBLENDMQZ 3759
+VPBLENDMQZrm 3760
+VPBLENDMQZrmb 3761
+VPBLENDMQZrmbk 3762
+VPBLENDMQZrmbkz 3763
+VPBLENDMQZrmk 3764
+VPBLENDMQZrmkz 3765
+VPBLENDMQZrr 3766
+VPBLENDMQZrrk 3767
+VPBLENDMQZrrkz 3768
+VPBLENDMWZ 3769
+VPBLENDMWZrm 3770
+VPBLENDMWZrmk 3771
+VPBLENDMWZrmkz 3772
+VPBLENDMWZrr 3773
+VPBLENDMWZrrk 3774
+VPBLENDMWZrrkz 3775
+VPBLENDVBYrmr 3776
+VPBLENDVBYrrr 3777
+VPBLENDVBrmr 3778
+VPBLENDVBrrr 3779
+VPBLENDWYrmi 3780
+VPBLENDWYrri 3781
+VPBLENDWrmi 3782
+VPBLENDWrri 3783
+VPBROADCASTBYrm 3784
+VPBROADCASTBYrr 3785
+VPBROADCASTBZ 3786
+VPBROADCASTBZrm 3787
+VPBROADCASTBZrmk 3788
+VPBROADCASTBZrmkz 3789
+VPBROADCASTBZrr 3790
+VPBROADCASTBZrrk 3791
+VPBROADCASTBZrrkz 3792
+VPBROADCASTBrZ 3793
+VPBROADCASTBrZrr 3794
+VPBROADCASTBrZrrk 3795
+VPBROADCASTBrZrrkz 3796
+VPBROADCASTBrm 3797
+VPBROADCASTBrr 3798
+VPBROADCASTDYrm 3799
+VPBROADCASTDYrr 3800
+VPBROADCASTDZ 3801
+VPBROADCASTDZrm 3802
+VPBROADCASTDZrmk 3803
+VPBROADCASTDZrmkz 3804
+VPBROADCASTDZrr 3805
+VPBROADCASTDZrrk 3806
+VPBROADCASTDZrrkz 3807
+VPBROADCASTDrZ 3808
+VPBROADCASTDrZrr 3809
+VPBROADCASTDrZrrk 3810
+VPBROADCASTDrZrrkz 3811
+VPBROADCASTDrm 3812
+VPBROADCASTDrr 3813
+VPBROADCASTMB 3814
+VPBROADCASTMW 3815
+VPBROADCASTQYrm 3816
+VPBROADCASTQYrr 3817
+VPBROADCASTQZ 3818
+VPBROADCASTQZrm 3819
+VPBROADCASTQZrmk 3820
+VPBROADCASTQZrmkz 3821
+VPBROADCASTQZrr 3822
+VPBROADCASTQZrrk 3823
+VPBROADCASTQZrrkz 3824
+VPBROADCASTQrZ 3825
+VPBROADCASTQrZrr 3826
+VPBROADCASTQrZrrk 3827
+VPBROADCASTQrZrrkz 3828
+VPBROADCASTQrm 3829
+VPBROADCASTQrr 3830
+VPBROADCASTWYrm 3831
+VPBROADCASTWYrr 3832
+VPBROADCASTWZ 3833
+VPBROADCASTWZrm 3834
+VPBROADCASTWZrmk 3835
+VPBROADCASTWZrmkz 3836
+VPBROADCASTWZrr 3837
+VPBROADCASTWZrrk 3838
+VPBROADCASTWZrrkz 3839
+VPBROADCASTWrZ 3840
+VPBROADCASTWrZrr 3841
+VPBROADCASTWrZrrk 3842
+VPBROADCASTWrZrrkz 3843
+VPBROADCASTWrm 3844
+VPBROADCASTWrr 3845
+VPCLMULQDQYrmi 3846
+VPCLMULQDQYrri 3847
+VPCLMULQDQZ 3848
+VPCLMULQDQZrmi 3849
+VPCLMULQDQZrri 3850
+VPCLMULQDQrmi 3851
+VPCLMULQDQrri 3852
+VPCMOVYrmr 3853
+VPCMOVYrrm 3854
+VPCMOVYrrr 3855
+VPCMOVYrrr_REV 3856
+VPCMOVrmr 3857
+VPCMOVrrm 3858
+VPCMOVrrr 3859
+VPCMOVrrr_REV 3860
+VPCMPBZ 3861
+VPCMPBZrmi 3862
+VPCMPBZrmik 3863
+VPCMPBZrri 3864
+VPCMPBZrrik 3865
+VPCMPDZ 3866
+VPCMPDZrmbi 3867
+VPCMPDZrmbik 3868
+VPCMPDZrmi 3869
+VPCMPDZrmik 3870
+VPCMPDZrri 3871
+VPCMPDZrrik 3872
+VPCMPEQBYrm 3873
+VPCMPEQBYrr 3874
+VPCMPEQBZ 3875
+VPCMPEQBZrm 3876
+VPCMPEQBZrmk 3877
+VPCMPEQBZrr 3878
+VPCMPEQBZrrk 3879
+VPCMPEQBrm 3880
+VPCMPEQBrr 3881
+VPCMPEQDYrm 3882
+VPCMPEQDYrr 3883
+VPCMPEQDZ 3884
+VPCMPEQDZrm 3885
+VPCMPEQDZrmb 3886
+VPCMPEQDZrmbk 3887
+VPCMPEQDZrmk 3888
+VPCMPEQDZrr 3889
+VPCMPEQDZrrk 3890
+VPCMPEQDrm 3891
+VPCMPEQDrr 3892
+VPCMPEQQYrm 3893
+VPCMPEQQYrr 3894
+VPCMPEQQZ 3895
+VPCMPEQQZrm 3896
+VPCMPEQQZrmb 3897
+VPCMPEQQZrmbk 3898
+VPCMPEQQZrmk 3899
+VPCMPEQQZrr 3900
+VPCMPEQQZrrk 3901
+VPCMPEQQrm 3902
+VPCMPEQQrr 3903
+VPCMPEQWYrm 3904
+VPCMPEQWYrr 3905
+VPCMPEQWZ 3906
+VPCMPEQWZrm 3907
+VPCMPEQWZrmk 3908
+VPCMPEQWZrr 3909
+VPCMPEQWZrrk 3910
+VPCMPEQWrm 3911
+VPCMPEQWrr 3912
+VPCMPESTRIrmi 3913
+VPCMPESTRIrri 3914
+VPCMPESTRMrmi 3915
+VPCMPESTRMrri 3916
+VPCMPGTBYrm 3917
+VPCMPGTBYrr 3918
+VPCMPGTBZ 3919
+VPCMPGTBZrm 3920
+VPCMPGTBZrmk 3921
+VPCMPGTBZrr 3922
+VPCMPGTBZrrk 3923
+VPCMPGTBrm 3924
+VPCMPGTBrr 3925
+VPCMPGTDYrm 3926
+VPCMPGTDYrr 3927
+VPCMPGTDZ 3928
+VPCMPGTDZrm 3929
+VPCMPGTDZrmb 3930
+VPCMPGTDZrmbk 3931
+VPCMPGTDZrmk 3932
+VPCMPGTDZrr 3933
+VPCMPGTDZrrk 3934
+VPCMPGTDrm 3935
+VPCMPGTDrr 3936
+VPCMPGTQYrm 3937
+VPCMPGTQYrr 3938
+VPCMPGTQZ 3939
+VPCMPGTQZrm 3940
+VPCMPGTQZrmb 3941
+VPCMPGTQZrmbk 3942
+VPCMPGTQZrmk 3943
+VPCMPGTQZrr 3944
+VPCMPGTQZrrk 3945
+VPCMPGTQrm 3946
+VPCMPGTQrr 3947
+VPCMPGTWYrm 3948
+VPCMPGTWYrr 3949
+VPCMPGTWZ 3950
+VPCMPGTWZrm 3951
+VPCMPGTWZrmk 3952
+VPCMPGTWZrr 3953
+VPCMPGTWZrrk 3954
+VPCMPGTWrm 3955
+VPCMPGTWrr 3956
+VPCMPISTRIrmi 3957
+VPCMPISTRIrri 3958
+VPCMPISTRMrmi 3959
+VPCMPISTRMrri 3960
+VPCMPQZ 3961
+VPCMPQZrmbi 3962
+VPCMPQZrmbik 3963
+VPCMPQZrmi 3964
+VPCMPQZrmik 3965
+VPCMPQZrri 3966
+VPCMPQZrrik 3967
+VPCMPUBZ 3968
+VPCMPUBZrmi 3969
+VPCMPUBZrmik 3970
+VPCMPUBZrri 3971
+VPCMPUBZrrik 3972
+VPCMPUDZ 3973
+VPCMPUDZrmbi 3974
+VPCMPUDZrmbik 3975
+VPCMPUDZrmi 3976
+VPCMPUDZrmik 3977
+VPCMPUDZrri 3978
+VPCMPUDZrrik 3979
+VPCMPUQZ 3980
+VPCMPUQZrmbi 3981
+VPCMPUQZrmbik 3982
+VPCMPUQZrmi 3983
+VPCMPUQZrmik 3984
+VPCMPUQZrri 3985
+VPCMPUQZrrik 3986
+VPCMPUWZ 3987
+VPCMPUWZrmi 3988
+VPCMPUWZrmik 3989
+VPCMPUWZrri 3990
+VPCMPUWZrrik 3991
+VPCMPWZ 3992
+VPCMPWZrmi 3993
+VPCMPWZrmik 3994
+VPCMPWZrri 3995
+VPCMPWZrrik 3996
+VPCOMBmi 3997
+VPCOMBri 3998
+VPCOMDmi 3999
+VPCOMDri 4000
+VPCOMPRESSBZ 4001
+VPCOMPRESSBZmr 4002
+VPCOMPRESSBZmrk 4003
+VPCOMPRESSBZrr 4004
+VPCOMPRESSBZrrk 4005
+VPCOMPRESSBZrrkz 4006
+VPCOMPRESSDZ 4007
+VPCOMPRESSDZmr 4008
+VPCOMPRESSDZmrk 4009
+VPCOMPRESSDZrr 4010
+VPCOMPRESSDZrrk 4011
+VPCOMPRESSDZrrkz 4012
+VPCOMPRESSQZ 4013
+VPCOMPRESSQZmr 4014
+VPCOMPRESSQZmrk 4015
+VPCOMPRESSQZrr 4016
+VPCOMPRESSQZrrk 4017
+VPCOMPRESSQZrrkz 4018
+VPCOMPRESSWZ 4019
+VPCOMPRESSWZmr 4020
+VPCOMPRESSWZmrk 4021
+VPCOMPRESSWZrr 4022
+VPCOMPRESSWZrrk 4023
+VPCOMPRESSWZrrkz 4024
+VPCOMQmi 4025
+VPCOMQri 4026
+VPCOMUBmi 4027
+VPCOMUBri 4028
+VPCOMUDmi 4029
+VPCOMUDri 4030
+VPCOMUQmi 4031
+VPCOMUQri 4032
+VPCOMUWmi 4033
+VPCOMUWri 4034
+VPCOMWmi 4035
+VPCOMWri 4036
+VPCONFLICTDZ 4037
+VPCONFLICTDZrm 4038
+VPCONFLICTDZrmb 4039
+VPCONFLICTDZrmbk 4040
+VPCONFLICTDZrmbkz 4041
+VPCONFLICTDZrmk 4042
+VPCONFLICTDZrmkz 4043
+VPCONFLICTDZrr 4044
+VPCONFLICTDZrrk 4045
+VPCONFLICTDZrrkz 4046
+VPCONFLICTQZ 4047
+VPCONFLICTQZrm 4048
+VPCONFLICTQZrmb 4049
+VPCONFLICTQZrmbk 4050
+VPCONFLICTQZrmbkz 4051
+VPCONFLICTQZrmk 4052
+VPCONFLICTQZrmkz 4053
+VPCONFLICTQZrr 4054
+VPCONFLICTQZrrk 4055
+VPCONFLICTQZrrkz 4056
+VPDPBSSDSYrm 4057
+VPDPBSSDSYrr 4058
+VPDPBSSDSZ 4059
+VPDPBSSDSZrm 4060
+VPDPBSSDSZrmb 4061
+VPDPBSSDSZrmbk 4062
+VPDPBSSDSZrmbkz 4063
+VPDPBSSDSZrmk 4064
+VPDPBSSDSZrmkz 4065
+VPDPBSSDSZrr 4066
+VPDPBSSDSZrrk 4067
+VPDPBSSDSZrrkz 4068
+VPDPBSSDSrm 4069
+VPDPBSSDSrr 4070
+VPDPBSSDYrm 4071
+VPDPBSSDYrr 4072
+VPDPBSSDZ 4073
+VPDPBSSDZrm 4074
+VPDPBSSDZrmb 4075
+VPDPBSSDZrmbk 4076
+VPDPBSSDZrmbkz 4077
+VPDPBSSDZrmk 4078
+VPDPBSSDZrmkz 4079
+VPDPBSSDZrr 4080
+VPDPBSSDZrrk 4081
+VPDPBSSDZrrkz 4082
+VPDPBSSDrm 4083
+VPDPBSSDrr 4084
+VPDPBSUDSYrm 4085
+VPDPBSUDSYrr 4086
+VPDPBSUDSZ 4087
+VPDPBSUDSZrm 4088
+VPDPBSUDSZrmb 4089
+VPDPBSUDSZrmbk 4090
+VPDPBSUDSZrmbkz 4091
+VPDPBSUDSZrmk 4092
+VPDPBSUDSZrmkz 4093
+VPDPBSUDSZrr 4094
+VPDPBSUDSZrrk 4095
+VPDPBSUDSZrrkz 4096
+VPDPBSUDSrm 4097
+VPDPBSUDSrr 4098
+VPDPBSUDYrm 4099
+VPDPBSUDYrr 4100
+VPDPBSUDZ 4101
+VPDPBSUDZrm 4102
+VPDPBSUDZrmb 4103
+VPDPBSUDZrmbk 4104
+VPDPBSUDZrmbkz 4105
+VPDPBSUDZrmk 4106
+VPDPBSUDZrmkz 4107
+VPDPBSUDZrr 4108
+VPDPBSUDZrrk 4109
+VPDPBSUDZrrkz 4110
+VPDPBSUDrm 4111
+VPDPBSUDrr 4112
+VPDPBUSDSYrm 4113
+VPDPBUSDSYrr 4114
+VPDPBUSDSZ 4115
+VPDPBUSDSZrm 4116
+VPDPBUSDSZrmb 4117
+VPDPBUSDSZrmbk 4118
+VPDPBUSDSZrmbkz 4119
+VPDPBUSDSZrmk 4120
+VPDPBUSDSZrmkz 4121
+VPDPBUSDSZrr 4122
+VPDPBUSDSZrrk 4123
+VPDPBUSDSZrrkz 4124
+VPDPBUSDSrm 4125
+VPDPBUSDSrr 4126
+VPDPBUSDYrm 4127
+VPDPBUSDYrr 4128
+VPDPBUSDZ 4129
+VPDPBUSDZrm 4130
+VPDPBUSDZrmb 4131
+VPDPBUSDZrmbk 4132
+VPDPBUSDZrmbkz 4133
+VPDPBUSDZrmk 4134
+VPDPBUSDZrmkz 4135
+VPDPBUSDZrr 4136
+VPDPBUSDZrrk 4137
+VPDPBUSDZrrkz 4138
+VPDPBUSDrm 4139
+VPDPBUSDrr 4140
+VPDPBUUDSYrm 4141
+VPDPBUUDSYrr 4142
+VPDPBUUDSZ 4143
+VPDPBUUDSZrm 4144
+VPDPBUUDSZrmb 4145
+VPDPBUUDSZrmbk 4146
+VPDPBUUDSZrmbkz 4147
+VPDPBUUDSZrmk 4148
+VPDPBUUDSZrmkz 4149
+VPDPBUUDSZrr 4150
+VPDPBUUDSZrrk 4151
+VPDPBUUDSZrrkz 4152
+VPDPBUUDSrm 4153
+VPDPBUUDSrr 4154
+VPDPBUUDYrm 4155
+VPDPBUUDYrr 4156
+VPDPBUUDZ 4157
+VPDPBUUDZrm 4158
+VPDPBUUDZrmb 4159
+VPDPBUUDZrmbk 4160
+VPDPBUUDZrmbkz 4161
+VPDPBUUDZrmk 4162
+VPDPBUUDZrmkz 4163
+VPDPBUUDZrr 4164
+VPDPBUUDZrrk 4165
+VPDPBUUDZrrkz 4166
+VPDPBUUDrm 4167
+VPDPBUUDrr 4168
+VPDPWSSDSYrm 4169
+VPDPWSSDSYrr 4170
+VPDPWSSDSZ 4171
+VPDPWSSDSZrm 4172
+VPDPWSSDSZrmb 4173
+VPDPWSSDSZrmbk 4174
+VPDPWSSDSZrmbkz 4175
+VPDPWSSDSZrmk 4176
+VPDPWSSDSZrmkz 4177
+VPDPWSSDSZrr 4178
+VPDPWSSDSZrrk 4179
+VPDPWSSDSZrrkz 4180
+VPDPWSSDSrm 4181
+VPDPWSSDSrr 4182
+VPDPWSSDYrm 4183
+VPDPWSSDYrr 4184
+VPDPWSSDZ 4185
+VPDPWSSDZrm 4186
+VPDPWSSDZrmb 4187
+VPDPWSSDZrmbk 4188
+VPDPWSSDZrmbkz 4189
+VPDPWSSDZrmk 4190
+VPDPWSSDZrmkz 4191
+VPDPWSSDZrr 4192
+VPDPWSSDZrrk 4193
+VPDPWSSDZrrkz 4194
+VPDPWSSDrm 4195
+VPDPWSSDrr 4196
+VPDPWSUDSYrm 4197
+VPDPWSUDSYrr 4198
+VPDPWSUDSZ 4199
+VPDPWSUDSZrm 4200
+VPDPWSUDSZrmb 4201
+VPDPWSUDSZrmbk 4202
+VPDPWSUDSZrmbkz 4203
+VPDPWSUDSZrmk 4204
+VPDPWSUDSZrmkz 4205
+VPDPWSUDSZrr 4206
+VPDPWSUDSZrrk 4207
+VPDPWSUDSZrrkz 4208
+VPDPWSUDSrm 4209
+VPDPWSUDSrr 4210
+VPDPWSUDYrm 4211
+VPDPWSUDYrr 4212
+VPDPWSUDZ 4213
+VPDPWSUDZrm 4214
+VPDPWSUDZrmb 4215
+VPDPWSUDZrmbk 4216
+VPDPWSUDZrmbkz 4217
+VPDPWSUDZrmk 4218
+VPDPWSUDZrmkz 4219
+VPDPWSUDZrr 4220
+VPDPWSUDZrrk 4221
+VPDPWSUDZrrkz 4222
+VPDPWSUDrm 4223
+VPDPWSUDrr 4224
+VPDPWUSDSYrm 4225
+VPDPWUSDSYrr 4226
+VPDPWUSDSZ 4227
+VPDPWUSDSZrm 4228
+VPDPWUSDSZrmb 4229
+VPDPWUSDSZrmbk 4230
+VPDPWUSDSZrmbkz 4231
+VPDPWUSDSZrmk 4232
+VPDPWUSDSZrmkz 4233
+VPDPWUSDSZrr 4234
+VPDPWUSDSZrrk 4235
+VPDPWUSDSZrrkz 4236
+VPDPWUSDSrm 4237
+VPDPWUSDSrr 4238
+VPDPWUSDYrm 4239
+VPDPWUSDYrr 4240
+VPDPWUSDZ 4241
+VPDPWUSDZrm 4242
+VPDPWUSDZrmb 4243
+VPDPWUSDZrmbk 4244
+VPDPWUSDZrmbkz 4245
+VPDPWUSDZrmk 4246
+VPDPWUSDZrmkz 4247
+VPDPWUSDZrr 4248
+VPDPWUSDZrrk 4249
+VPDPWUSDZrrkz 4250
+VPDPWUSDrm 4251
+VPDPWUSDrr 4252
+VPDPWUUDSYrm 4253
+VPDPWUUDSYrr 4254
+VPDPWUUDSZ 4255
+VPDPWUUDSZrm 4256
+VPDPWUUDSZrmb 4257
+VPDPWUUDSZrmbk 4258
+VPDPWUUDSZrmbkz 4259
+VPDPWUUDSZrmk 4260
+VPDPWUUDSZrmkz 4261
+VPDPWUUDSZrr 4262
+VPDPWUUDSZrrk 4263
+VPDPWUUDSZrrkz 4264
+VPDPWUUDSrm 4265
+VPDPWUUDSrr 4266
+VPDPWUUDYrm 4267
+VPDPWUUDYrr 4268
+VPDPWUUDZ 4269
+VPDPWUUDZrm 4270
+VPDPWUUDZrmb 4271
+VPDPWUUDZrmbk 4272
+VPDPWUUDZrmbkz 4273
+VPDPWUUDZrmk 4274
+VPDPWUUDZrmkz 4275
+VPDPWUUDZrr 4276
+VPDPWUUDZrrk 4277
+VPDPWUUDZrrkz 4278
+VPDPWUUDrm 4279
+VPDPWUUDrr 4280
+VPERM 4281
+VPERMBZ 4282
+VPERMBZrm 4283
+VPERMBZrmk 4284
+VPERMBZrmkz 4285
+VPERMBZrr 4286
+VPERMBZrrk 4287
+VPERMBZrrkz 4288
+VPERMDYrm 4289
+VPERMDYrr 4290
+VPERMDZ 4291
+VPERMDZrm 4292
+VPERMDZrmb 4293
+VPERMDZrmbk 4294
+VPERMDZrmbkz 4295
+VPERMDZrmk 4296
+VPERMDZrmkz 4297
+VPERMDZrr 4298
+VPERMDZrrk 4299
+VPERMDZrrkz 4300
+VPERMI 4301
+VPERMIL 4302
+VPERMILPDYmi 4303
+VPERMILPDYri 4304
+VPERMILPDYrm 4305
+VPERMILPDYrr 4306
+VPERMILPDZ 4307
+VPERMILPDZmbi 4308
+VPERMILPDZmbik 4309
+VPERMILPDZmbikz 4310
+VPERMILPDZmi 4311
+VPERMILPDZmik 4312
+VPERMILPDZmikz 4313
+VPERMILPDZri 4314
+VPERMILPDZrik 4315
+VPERMILPDZrikz 4316
+VPERMILPDZrm 4317
+VPERMILPDZrmb 4318
+VPERMILPDZrmbk 4319
+VPERMILPDZrmbkz 4320
+VPERMILPDZrmk 4321
+VPERMILPDZrmkz 4322
+VPERMILPDZrr 4323
+VPERMILPDZrrk 4324
+VPERMILPDZrrkz 4325
+VPERMILPDmi 4326
+VPERMILPDri 4327
+VPERMILPDrm 4328
+VPERMILPDrr 4329
+VPERMILPSYmi 4330
+VPERMILPSYri 4331
+VPERMILPSYrm 4332
+VPERMILPSYrr 4333
+VPERMILPSZ 4334
+VPERMILPSZmbi 4335
+VPERMILPSZmbik 4336
+VPERMILPSZmbikz 4337
+VPERMILPSZmi 4338
+VPERMILPSZmik 4339
+VPERMILPSZmikz 4340
+VPERMILPSZri 4341
+VPERMILPSZrik 4342
+VPERMILPSZrikz 4343
+VPERMILPSZrm 4344
+VPERMILPSZrmb 4345
+VPERMILPSZrmbk 4346
+VPERMILPSZrmbkz 4347
+VPERMILPSZrmk 4348
+VPERMILPSZrmkz 4349
+VPERMILPSZrr 4350
+VPERMILPSZrrk 4351
+VPERMILPSZrrkz 4352
+VPERMILPSmi 4353
+VPERMILPSri 4354
+VPERMILPSrm 4355
+VPERMILPSrr 4356
+VPERMPDYmi 4357
+VPERMPDYri 4358
+VPERMPDZ 4359
+VPERMPDZmbi 4360
+VPERMPDZmbik 4361
+VPERMPDZmbikz 4362
+VPERMPDZmi 4363
+VPERMPDZmik 4364
+VPERMPDZmikz 4365
+VPERMPDZri 4366
+VPERMPDZrik 4367
+VPERMPDZrikz 4368
+VPERMPDZrm 4369
+VPERMPDZrmb 4370
+VPERMPDZrmbk 4371
+VPERMPDZrmbkz 4372
+VPERMPDZrmk 4373
+VPERMPDZrmkz 4374
+VPERMPDZrr 4375
+VPERMPDZrrk 4376
+VPERMPDZrrkz 4377
+VPERMPSYrm 4378
+VPERMPSYrr 4379
+VPERMPSZ 4380
+VPERMPSZrm 4381
+VPERMPSZrmb 4382
+VPERMPSZrmbk 4383
+VPERMPSZrmbkz 4384
+VPERMPSZrmk 4385
+VPERMPSZrmkz 4386
+VPERMPSZrr 4387
+VPERMPSZrrk 4388
+VPERMPSZrrkz 4389
+VPERMQYmi 4390
+VPERMQYri 4391
+VPERMQZ 4392
+VPERMQZmbi 4393
+VPERMQZmbik 4394
+VPERMQZmbikz 4395
+VPERMQZmi 4396
+VPERMQZmik 4397
+VPERMQZmikz 4398
+VPERMQZri 4399
+VPERMQZrik 4400
+VPERMQZrikz 4401
+VPERMQZrm 4402
+VPERMQZrmb 4403
+VPERMQZrmbk 4404
+VPERMQZrmbkz 4405
+VPERMQZrmk 4406
+VPERMQZrmkz 4407
+VPERMQZrr 4408
+VPERMQZrrk 4409
+VPERMQZrrkz 4410
+VPERMT 4411
+VPERMWZ 4412
+VPERMWZrm 4413
+VPERMWZrmk 4414
+VPERMWZrmkz 4415
+VPERMWZrr 4416
+VPERMWZrrk 4417
+VPERMWZrrkz 4418
+VPEXPANDBZ 4419
+VPEXPANDBZrm 4420
+VPEXPANDBZrmk 4421
+VPEXPANDBZrmkz 4422
+VPEXPANDBZrr 4423
+VPEXPANDBZrrk 4424
+VPEXPANDBZrrkz 4425
+VPEXPANDDZ 4426
+VPEXPANDDZrm 4427
+VPEXPANDDZrmk 4428
+VPEXPANDDZrmkz 4429
+VPEXPANDDZrr 4430
+VPEXPANDDZrrk 4431
+VPEXPANDDZrrkz 4432
+VPEXPANDQZ 4433
+VPEXPANDQZrm 4434
+VPEXPANDQZrmk 4435
+VPEXPANDQZrmkz 4436
+VPEXPANDQZrr 4437
+VPEXPANDQZrrk 4438
+VPEXPANDQZrrkz 4439
+VPEXPANDWZ 4440
+VPEXPANDWZrm 4441
+VPEXPANDWZrmk 4442
+VPEXPANDWZrmkz 4443
+VPEXPANDWZrr 4444
+VPEXPANDWZrrk 4445
+VPEXPANDWZrrkz 4446
+VPEXTRBZmri 4447
+VPEXTRBZrri 4448
+VPEXTRBmri 4449
+VPEXTRBrri 4450
+VPEXTRDZmri 4451
+VPEXTRDZrri 4452
+VPEXTRDmri 4453
+VPEXTRDrri 4454
+VPEXTRQZmri 4455
+VPEXTRQZrri 4456
+VPEXTRQmri 4457
+VPEXTRQrri 4458
+VPEXTRWZmri 4459
+VPEXTRWZrri 4460
+VPEXTRWZrri_REV 4461
+VPEXTRWmri 4462
+VPEXTRWrri 4463
+VPEXTRWrri_REV 4464
+VPGATHERDDYrm 4465
+VPGATHERDDZ 4466
+VPGATHERDDZrm 4467
+VPGATHERDDrm 4468
+VPGATHERDQYrm 4469
+VPGATHERDQZ 4470
+VPGATHERDQZrm 4471
+VPGATHERDQrm 4472
+VPGATHERQDYrm 4473
+VPGATHERQDZ 4474
+VPGATHERQDZrm 4475
+VPGATHERQDrm 4476
+VPGATHERQQYrm 4477
+VPGATHERQQZ 4478
+VPGATHERQQZrm 4479
+VPGATHERQQrm 4480
+VPHADDBDrm 4481
+VPHADDBDrr 4482
+VPHADDBQrm 4483
+VPHADDBQrr 4484
+VPHADDBWrm 4485
+VPHADDBWrr 4486
+VPHADDDQrm 4487
+VPHADDDQrr 4488
+VPHADDDYrm 4489
+VPHADDDYrr 4490
+VPHADDDrm 4491
+VPHADDDrr 4492
+VPHADDSWYrm 4493
+VPHADDSWYrr 4494
+VPHADDSWrm 4495
+VPHADDSWrr 4496
+VPHADDUBDrm 4497
+VPHADDUBDrr 4498
+VPHADDUBQrm 4499
+VPHADDUBQrr 4500
+VPHADDUBWrm 4501
+VPHADDUBWrr 4502
+VPHADDUDQrm 4503
+VPHADDUDQrr 4504
+VPHADDUWDrm 4505
+VPHADDUWDrr 4506
+VPHADDUWQrm 4507
+VPHADDUWQrr 4508
+VPHADDWDrm 4509
+VPHADDWDrr 4510
+VPHADDWQrm 4511
+VPHADDWQrr 4512
+VPHADDWYrm 4513
+VPHADDWYrr 4514
+VPHADDWrm 4515
+VPHADDWrr 4516
+VPHMINPOSUWrm 4517
+VPHMINPOSUWrr 4518
+VPHSUBBWrm 4519
+VPHSUBBWrr 4520
+VPHSUBDQrm 4521
+VPHSUBDQrr 4522
+VPHSUBDYrm 4523
+VPHSUBDYrr 4524
+VPHSUBDrm 4525
+VPHSUBDrr 4526
+VPHSUBSWYrm 4527
+VPHSUBSWYrr 4528
+VPHSUBSWrm 4529
+VPHSUBSWrr 4530
+VPHSUBWDrm 4531
+VPHSUBWDrr 4532
+VPHSUBWYrm 4533
+VPHSUBWYrr 4534
+VPHSUBWrm 4535
+VPHSUBWrr 4536
+VPINSRBZrmi 4537
+VPINSRBZrri 4538
+VPINSRBrmi 4539
+VPINSRBrri 4540
+VPINSRDZrmi 4541
+VPINSRDZrri 4542
+VPINSRDrmi 4543
+VPINSRDrri 4544
+VPINSRQZrmi 4545
+VPINSRQZrri 4546
+VPINSRQrmi 4547
+VPINSRQrri 4548
+VPINSRWZrmi 4549
+VPINSRWZrri 4550
+VPINSRWrmi 4551
+VPINSRWrri 4552
+VPLZCNTDZ 4553
+VPLZCNTDZrm 4554
+VPLZCNTDZrmb 4555
+VPLZCNTDZrmbk 4556
+VPLZCNTDZrmbkz 4557
+VPLZCNTDZrmk 4558
+VPLZCNTDZrmkz 4559
+VPLZCNTDZrr 4560
+VPLZCNTDZrrk 4561
+VPLZCNTDZrrkz 4562
+VPLZCNTQZ 4563
+VPLZCNTQZrm 4564
+VPLZCNTQZrmb 4565
+VPLZCNTQZrmbk 4566
+VPLZCNTQZrmbkz 4567
+VPLZCNTQZrmk 4568
+VPLZCNTQZrmkz 4569
+VPLZCNTQZrr 4570
+VPLZCNTQZrrk 4571
+VPLZCNTQZrrkz 4572
+VPMACSDDrm 4573
+VPMACSDDrr 4574
+VPMACSDQHrm 4575
+VPMACSDQHrr 4576
+VPMACSDQLrm 4577
+VPMACSDQLrr 4578
+VPMACSSDDrm 4579
+VPMACSSDDrr 4580
+VPMACSSDQHrm 4581
+VPMACSSDQHrr 4582
+VPMACSSDQLrm 4583
+VPMACSSDQLrr 4584
+VPMACSSWDrm 4585
+VPMACSSWDrr 4586
+VPMACSSWWrm 4587
+VPMACSSWWrr 4588
+VPMACSWDrm 4589
+VPMACSWDrr 4590
+VPMACSWWrm 4591
+VPMACSWWrr 4592
+VPMADCSSWDrm 4593
+VPMADCSSWDrr 4594
+VPMADCSWDrm 4595
+VPMADCSWDrr 4596
+VPMADD 4597
+VPMADDUBSWYrm 4598
+VPMADDUBSWYrr 4599
+VPMADDUBSWZ 4600
+VPMADDUBSWZrm 4601
+VPMADDUBSWZrmk 4602
+VPMADDUBSWZrmkz 4603
+VPMADDUBSWZrr 4604
+VPMADDUBSWZrrk 4605
+VPMADDUBSWZrrkz 4606
+VPMADDUBSWrm 4607
+VPMADDUBSWrr 4608
+VPMADDWDYrm 4609
+VPMADDWDYrr 4610
+VPMADDWDZ 4611
+VPMADDWDZrm 4612
+VPMADDWDZrmk 4613
+VPMADDWDZrmkz 4614
+VPMADDWDZrr 4615
+VPMADDWDZrrk 4616
+VPMADDWDZrrkz 4617
+VPMADDWDrm 4618
+VPMADDWDrr 4619
+VPMASKMOVDYmr 4620
+VPMASKMOVDYrm 4621
+VPMASKMOVDmr 4622
+VPMASKMOVDrm 4623
+VPMASKMOVQYmr 4624
+VPMASKMOVQYrm 4625
+VPMASKMOVQmr 4626
+VPMASKMOVQrm 4627
+VPMAXSBYrm 4628
+VPMAXSBYrr 4629
+VPMAXSBZ 4630
+VPMAXSBZrm 4631
+VPMAXSBZrmk 4632
+VPMAXSBZrmkz 4633
+VPMAXSBZrr 4634
+VPMAXSBZrrk 4635
+VPMAXSBZrrkz 4636
+VPMAXSBrm 4637
+VPMAXSBrr 4638
+VPMAXSDYrm 4639
+VPMAXSDYrr 4640
+VPMAXSDZ 4641
+VPMAXSDZrm 4642
+VPMAXSDZrmb 4643
+VPMAXSDZrmbk 4644
+VPMAXSDZrmbkz 4645
+VPMAXSDZrmk 4646
+VPMAXSDZrmkz 4647
+VPMAXSDZrr 4648
+VPMAXSDZrrk 4649
+VPMAXSDZrrkz 4650
+VPMAXSDrm 4651
+VPMAXSDrr 4652
+VPMAXSQZ 4653
+VPMAXSQZrm 4654
+VPMAXSQZrmb 4655
+VPMAXSQZrmbk 4656
+VPMAXSQZrmbkz 4657
+VPMAXSQZrmk 4658
+VPMAXSQZrmkz 4659
+VPMAXSQZrr 4660
+VPMAXSQZrrk 4661
+VPMAXSQZrrkz 4662
+VPMAXSWYrm 4663
+VPMAXSWYrr 4664
+VPMAXSWZ 4665
+VPMAXSWZrm 4666
+VPMAXSWZrmk 4667
+VPMAXSWZrmkz 4668
+VPMAXSWZrr 4669
+VPMAXSWZrrk 4670
+VPMAXSWZrrkz 4671
+VPMAXSWrm 4672
+VPMAXSWrr 4673
+VPMAXUBYrm 4674
+VPMAXUBYrr 4675
+VPMAXUBZ 4676
+VPMAXUBZrm 4677
+VPMAXUBZrmk 4678
+VPMAXUBZrmkz 4679
+VPMAXUBZrr 4680
+VPMAXUBZrrk 4681
+VPMAXUBZrrkz 4682
+VPMAXUBrm 4683
+VPMAXUBrr 4684
+VPMAXUDYrm 4685
+VPMAXUDYrr 4686
+VPMAXUDZ 4687
+VPMAXUDZrm 4688
+VPMAXUDZrmb 4689
+VPMAXUDZrmbk 4690
+VPMAXUDZrmbkz 4691
+VPMAXUDZrmk 4692
+VPMAXUDZrmkz 4693
+VPMAXUDZrr 4694
+VPMAXUDZrrk 4695
+VPMAXUDZrrkz 4696
+VPMAXUDrm 4697
+VPMAXUDrr 4698
+VPMAXUQZ 4699
+VPMAXUQZrm 4700
+VPMAXUQZrmb 4701
+VPMAXUQZrmbk 4702
+VPMAXUQZrmbkz 4703
+VPMAXUQZrmk 4704
+VPMAXUQZrmkz 4705
+VPMAXUQZrr 4706
+VPMAXUQZrrk 4707
+VPMAXUQZrrkz 4708
+VPMAXUWYrm 4709
+VPMAXUWYrr 4710
+VPMAXUWZ 4711
+VPMAXUWZrm 4712
+VPMAXUWZrmk 4713
+VPMAXUWZrmkz 4714
+VPMAXUWZrr 4715
+VPMAXUWZrrk 4716
+VPMAXUWZrrkz 4717
+VPMAXUWrm 4718
+VPMAXUWrr 4719
+VPMINSBYrm 4720
+VPMINSBYrr 4721
+VPMINSBZ 4722
+VPMINSBZrm 4723
+VPMINSBZrmk 4724
+VPMINSBZrmkz 4725
+VPMINSBZrr 4726
+VPMINSBZrrk 4727
+VPMINSBZrrkz 4728
+VPMINSBrm 4729
+VPMINSBrr 4730
+VPMINSDYrm 4731
+VPMINSDYrr 4732
+VPMINSDZ 4733
+VPMINSDZrm 4734
+VPMINSDZrmb 4735
+VPMINSDZrmbk 4736
+VPMINSDZrmbkz 4737
+VPMINSDZrmk 4738
+VPMINSDZrmkz 4739
+VPMINSDZrr 4740
+VPMINSDZrrk 4741
+VPMINSDZrrkz 4742
+VPMINSDrm 4743
+VPMINSDrr 4744
+VPMINSQZ 4745
+VPMINSQZrm 4746
+VPMINSQZrmb 4747
+VPMINSQZrmbk 4748
+VPMINSQZrmbkz 4749
+VPMINSQZrmk 4750
+VPMINSQZrmkz 4751
+VPMINSQZrr 4752
+VPMINSQZrrk 4753
+VPMINSQZrrkz 4754
+VPMINSWYrm 4755
+VPMINSWYrr 4756
+VPMINSWZ 4757
+VPMINSWZrm 4758
+VPMINSWZrmk 4759
+VPMINSWZrmkz 4760
+VPMINSWZrr 4761
+VPMINSWZrrk 4762
+VPMINSWZrrkz 4763
+VPMINSWrm 4764
+VPMINSWrr 4765
+VPMINUBYrm 4766
+VPMINUBYrr 4767
+VPMINUBZ 4768
+VPMINUBZrm 4769
+VPMINUBZrmk 4770
+VPMINUBZrmkz 4771
+VPMINUBZrr 4772
+VPMINUBZrrk 4773
+VPMINUBZrrkz 4774
+VPMINUBrm 4775
+VPMINUBrr 4776
+VPMINUDYrm 4777
+VPMINUDYrr 4778
+VPMINUDZ 4779
+VPMINUDZrm 4780
+VPMINUDZrmb 4781
+VPMINUDZrmbk 4782
+VPMINUDZrmbkz 4783
+VPMINUDZrmk 4784
+VPMINUDZrmkz 4785
+VPMINUDZrr 4786
+VPMINUDZrrk 4787
+VPMINUDZrrkz 4788
+VPMINUDrm 4789
+VPMINUDrr 4790
+VPMINUQZ 4791
+VPMINUQZrm 4792
+VPMINUQZrmb 4793
+VPMINUQZrmbk 4794
+VPMINUQZrmbkz 4795
+VPMINUQZrmk 4796
+VPMINUQZrmkz 4797
+VPMINUQZrr 4798
+VPMINUQZrrk 4799
+VPMINUQZrrkz 4800
+VPMINUWYrm 4801
+VPMINUWYrr 4802
+VPMINUWZ 4803
+VPMINUWZrm 4804
+VPMINUWZrmk 4805
+VPMINUWZrmkz 4806
+VPMINUWZrr 4807
+VPMINUWZrrk 4808
+VPMINUWZrrkz 4809
+VPMINUWrm 4810
+VPMINUWrr 4811
+VPMOVB 4812
+VPMOVD 4813
+VPMOVDBZ 4814
+VPMOVDBZmr 4815
+VPMOVDBZmrk 4816
+VPMOVDBZrr 4817
+VPMOVDBZrrk 4818
+VPMOVDBZrrkz 4819
+VPMOVDWZ 4820
+VPMOVDWZmr 4821
+VPMOVDWZmrk 4822
+VPMOVDWZrr 4823
+VPMOVDWZrrk 4824
+VPMOVDWZrrkz 4825
+VPMOVM 4826
+VPMOVMSKBYrr 4827
+VPMOVMSKBrr 4828
+VPMOVQ 4829
+VPMOVQBZ 4830
+VPMOVQBZmr 4831
+VPMOVQBZmrk 4832
+VPMOVQBZrr 4833
+VPMOVQBZrrk 4834
+VPMOVQBZrrkz 4835
+VPMOVQDZ 4836
+VPMOVQDZmr 4837
+VPMOVQDZmrk 4838
+VPMOVQDZrr 4839
+VPMOVQDZrrk 4840
+VPMOVQDZrrkz 4841
+VPMOVQWZ 4842
+VPMOVQWZmr 4843
+VPMOVQWZmrk 4844
+VPMOVQWZrr 4845
+VPMOVQWZrrk 4846
+VPMOVQWZrrkz 4847
+VPMOVSDBZ 4848
+VPMOVSDBZmr 4849
+VPMOVSDBZmrk 4850
+VPMOVSDBZrr 4851
+VPMOVSDBZrrk 4852
+VPMOVSDBZrrkz 4853
+VPMOVSDWZ 4854
+VPMOVSDWZmr 4855
+VPMOVSDWZmrk 4856
+VPMOVSDWZrr 4857
+VPMOVSDWZrrk 4858
+VPMOVSDWZrrkz 4859
+VPMOVSQBZ 4860
+VPMOVSQBZmr 4861
+VPMOVSQBZmrk 4862
+VPMOVSQBZrr 4863
+VPMOVSQBZrrk 4864
+VPMOVSQBZrrkz 4865
+VPMOVSQDZ 4866
+VPMOVSQDZmr 4867
+VPMOVSQDZmrk 4868
+VPMOVSQDZrr 4869
+VPMOVSQDZrrk 4870
+VPMOVSQDZrrkz 4871
+VPMOVSQWZ 4872
+VPMOVSQWZmr 4873
+VPMOVSQWZmrk 4874
+VPMOVSQWZrr 4875
+VPMOVSQWZrrk 4876
+VPMOVSQWZrrkz 4877
+VPMOVSWBZ 4878
+VPMOVSWBZmr 4879
+VPMOVSWBZmrk 4880
+VPMOVSWBZrr 4881
+VPMOVSWBZrrk 4882
+VPMOVSWBZrrkz 4883
+VPMOVSXBDYrm 4884
+VPMOVSXBDYrr 4885
+VPMOVSXBDZ 4886
+VPMOVSXBDZrm 4887
+VPMOVSXBDZrmk 4888
+VPMOVSXBDZrmkz 4889
+VPMOVSXBDZrr 4890
+VPMOVSXBDZrrk 4891
+VPMOVSXBDZrrkz 4892
+VPMOVSXBDrm 4893
+VPMOVSXBDrr 4894
+VPMOVSXBQYrm 4895
+VPMOVSXBQYrr 4896
+VPMOVSXBQZ 4897
+VPMOVSXBQZrm 4898
+VPMOVSXBQZrmk 4899
+VPMOVSXBQZrmkz 4900
+VPMOVSXBQZrr 4901
+VPMOVSXBQZrrk 4902
+VPMOVSXBQZrrkz 4903
+VPMOVSXBQrm 4904
+VPMOVSXBQrr 4905
+VPMOVSXBWYrm 4906
+VPMOVSXBWYrr 4907
+VPMOVSXBWZ 4908
+VPMOVSXBWZrm 4909
+VPMOVSXBWZrmk 4910
+VPMOVSXBWZrmkz 4911
+VPMOVSXBWZrr 4912
+VPMOVSXBWZrrk 4913
+VPMOVSXBWZrrkz 4914
+VPMOVSXBWrm 4915
+VPMOVSXBWrr 4916
+VPMOVSXDQYrm 4917
+VPMOVSXDQYrr 4918
+VPMOVSXDQZ 4919
+VPMOVSXDQZrm 4920
+VPMOVSXDQZrmk 4921
+VPMOVSXDQZrmkz 4922
+VPMOVSXDQZrr 4923
+VPMOVSXDQZrrk 4924
+VPMOVSXDQZrrkz 4925
+VPMOVSXDQrm 4926
+VPMOVSXDQrr 4927
+VPMOVSXWDYrm 4928
+VPMOVSXWDYrr 4929
+VPMOVSXWDZ 4930
+VPMOVSXWDZrm 4931
+VPMOVSXWDZrmk 4932
+VPMOVSXWDZrmkz 4933
+VPMOVSXWDZrr 4934
+VPMOVSXWDZrrk 4935
+VPMOVSXWDZrrkz 4936
+VPMOVSXWDrm 4937
+VPMOVSXWDrr 4938
+VPMOVSXWQYrm 4939
+VPMOVSXWQYrr 4940
+VPMOVSXWQZ 4941
+VPMOVSXWQZrm 4942
+VPMOVSXWQZrmk 4943
+VPMOVSXWQZrmkz 4944
+VPMOVSXWQZrr 4945
+VPMOVSXWQZrrk 4946
+VPMOVSXWQZrrkz 4947
+VPMOVSXWQrm 4948
+VPMOVSXWQrr 4949
+VPMOVUSDBZ 4950
+VPMOVUSDBZmr 4951
+VPMOVUSDBZmrk 4952
+VPMOVUSDBZrr 4953
+VPMOVUSDBZrrk 4954
+VPMOVUSDBZrrkz 4955
+VPMOVUSDWZ 4956
+VPMOVUSDWZmr 4957
+VPMOVUSDWZmrk 4958
+VPMOVUSDWZrr 4959
+VPMOVUSDWZrrk 4960
+VPMOVUSDWZrrkz 4961
+VPMOVUSQBZ 4962
+VPMOVUSQBZmr 4963
+VPMOVUSQBZmrk 4964
+VPMOVUSQBZrr 4965
+VPMOVUSQBZrrk 4966
+VPMOVUSQBZrrkz 4967
+VPMOVUSQDZ 4968
+VPMOVUSQDZmr 4969
+VPMOVUSQDZmrk 4970
+VPMOVUSQDZrr 4971
+VPMOVUSQDZrrk 4972
+VPMOVUSQDZrrkz 4973
+VPMOVUSQWZ 4974
+VPMOVUSQWZmr 4975
+VPMOVUSQWZmrk 4976
+VPMOVUSQWZrr 4977
+VPMOVUSQWZrrk 4978
+VPMOVUSQWZrrkz 4979
+VPMOVUSWBZ 4980
+VPMOVUSWBZmr 4981
+VPMOVUSWBZmrk 4982
+VPMOVUSWBZrr 4983
+VPMOVUSWBZrrk 4984
+VPMOVUSWBZrrkz 4985
+VPMOVW 4986
+VPMOVWBZ 4987
+VPMOVWBZmr 4988
+VPMOVWBZmrk 4989
+VPMOVWBZrr 4990
+VPMOVWBZrrk 4991
+VPMOVWBZrrkz 4992
+VPMOVZXBDYrm 4993
+VPMOVZXBDYrr 4994
+VPMOVZXBDZ 4995
+VPMOVZXBDZrm 4996
+VPMOVZXBDZrmk 4997
+VPMOVZXBDZrmkz 4998
+VPMOVZXBDZrr 4999
+VPMOVZXBDZrrk 5000
+VPMOVZXBDZrrkz 5001
+VPMOVZXBDrm 5002
+VPMOVZXBDrr 5003
+VPMOVZXBQYrm 5004
+VPMOVZXBQYrr 5005
+VPMOVZXBQZ 5006
+VPMOVZXBQZrm 5007
+VPMOVZXBQZrmk 5008
+VPMOVZXBQZrmkz 5009
+VPMOVZXBQZrr 5010
+VPMOVZXBQZrrk 5011
+VPMOVZXBQZrrkz 5012
+VPMOVZXBQrm 5013
+VPMOVZXBQrr 5014
+VPMOVZXBWYrm 5015
+VPMOVZXBWYrr 5016
+VPMOVZXBWZ 5017
+VPMOVZXBWZrm 5018
+VPMOVZXBWZrmk 5019
+VPMOVZXBWZrmkz 5020
+VPMOVZXBWZrr 5021
+VPMOVZXBWZrrk 5022
+VPMOVZXBWZrrkz 5023
+VPMOVZXBWrm 5024
+VPMOVZXBWrr 5025
+VPMOVZXDQYrm 5026
+VPMOVZXDQYrr 5027
+VPMOVZXDQZ 5028
+VPMOVZXDQZrm 5029
+VPMOVZXDQZrmk 5030
+VPMOVZXDQZrmkz 5031
+VPMOVZXDQZrr 5032
+VPMOVZXDQZrrk 5033
+VPMOVZXDQZrrkz 5034
+VPMOVZXDQrm 5035
+VPMOVZXDQrr 5036
+VPMOVZXWDYrm 5037
+VPMOVZXWDYrr 5038
+VPMOVZXWDZ 5039
+VPMOVZXWDZrm 5040
+VPMOVZXWDZrmk 5041
+VPMOVZXWDZrmkz 5042
+VPMOVZXWDZrr 5043
+VPMOVZXWDZrrk 5044
+VPMOVZXWDZrrkz 5045
+VPMOVZXWDrm 5046
+VPMOVZXWDrr 5047
+VPMOVZXWQYrm 5048
+VPMOVZXWQYrr 5049
+VPMOVZXWQZ 5050
+VPMOVZXWQZrm 5051
+VPMOVZXWQZrmk 5052
+VPMOVZXWQZrmkz 5053
+VPMOVZXWQZrr 5054
+VPMOVZXWQZrrk 5055
+VPMOVZXWQZrrkz 5056
+VPMOVZXWQrm 5057
+VPMOVZXWQrr 5058
+VPMULDQYrm 5059
+VPMULDQYrr 5060
+VPMULDQZ 5061
+VPMULDQZrm 5062
+VPMULDQZrmb 5063
+VPMULDQZrmbk 5064
+VPMULDQZrmbkz 5065
+VPMULDQZrmk 5066
+VPMULDQZrmkz 5067
+VPMULDQZrr 5068
+VPMULDQZrrk 5069
+VPMULDQZrrkz 5070
+VPMULDQrm 5071
+VPMULDQrr 5072
+VPMULHRSWYrm 5073
+VPMULHRSWYrr 5074
+VPMULHRSWZ 5075
+VPMULHRSWZrm 5076
+VPMULHRSWZrmk 5077
+VPMULHRSWZrmkz 5078
+VPMULHRSWZrr 5079
+VPMULHRSWZrrk 5080
+VPMULHRSWZrrkz 5081
+VPMULHRSWrm 5082
+VPMULHRSWrr 5083
+VPMULHUWYrm 5084
+VPMULHUWYrr 5085
+VPMULHUWZ 5086
+VPMULHUWZrm 5087
+VPMULHUWZrmk 5088
+VPMULHUWZrmkz 5089
+VPMULHUWZrr 5090
+VPMULHUWZrrk 5091
+VPMULHUWZrrkz 5092
+VPMULHUWrm 5093
+VPMULHUWrr 5094
+VPMULHWYrm 5095
+VPMULHWYrr 5096
+VPMULHWZ 5097
+VPMULHWZrm 5098
+VPMULHWZrmk 5099
+VPMULHWZrmkz 5100
+VPMULHWZrr 5101
+VPMULHWZrrk 5102
+VPMULHWZrrkz 5103
+VPMULHWrm 5104
+VPMULHWrr 5105
+VPMULLDYrm 5106
+VPMULLDYrr 5107
+VPMULLDZ 5108
+VPMULLDZrm 5109
+VPMULLDZrmb 5110
+VPMULLDZrmbk 5111
+VPMULLDZrmbkz 5112
+VPMULLDZrmk 5113
+VPMULLDZrmkz 5114
+VPMULLDZrr 5115
+VPMULLDZrrk 5116
+VPMULLDZrrkz 5117
+VPMULLDrm 5118
+VPMULLDrr 5119
+VPMULLQZ 5120
+VPMULLQZrm 5121
+VPMULLQZrmb 5122
+VPMULLQZrmbk 5123
+VPMULLQZrmbkz 5124
+VPMULLQZrmk 5125
+VPMULLQZrmkz 5126
+VPMULLQZrr 5127
+VPMULLQZrrk 5128
+VPMULLQZrrkz 5129
+VPMULLWYrm 5130
+VPMULLWYrr 5131
+VPMULLWZ 5132
+VPMULLWZrm 5133
+VPMULLWZrmk 5134
+VPMULLWZrmkz 5135
+VPMULLWZrr 5136
+VPMULLWZrrk 5137
+VPMULLWZrrkz 5138
+VPMULLWrm 5139
+VPMULLWrr 5140
+VPMULTISHIFTQBZ 5141
+VPMULTISHIFTQBZrm 5142
+VPMULTISHIFTQBZrmb 5143
+VPMULTISHIFTQBZrmbk 5144
+VPMULTISHIFTQBZrmbkz 5145
+VPMULTISHIFTQBZrmk 5146
+VPMULTISHIFTQBZrmkz 5147
+VPMULTISHIFTQBZrr 5148
+VPMULTISHIFTQBZrrk 5149
+VPMULTISHIFTQBZrrkz 5150
+VPMULUDQYrm 5151
+VPMULUDQYrr 5152
+VPMULUDQZ 5153
+VPMULUDQZrm 5154
+VPMULUDQZrmb 5155
+VPMULUDQZrmbk 5156
+VPMULUDQZrmbkz 5157
+VPMULUDQZrmk 5158
+VPMULUDQZrmkz 5159
+VPMULUDQZrr 5160
+VPMULUDQZrrk 5161
+VPMULUDQZrrkz 5162
+VPMULUDQrm 5163
+VPMULUDQrr 5164
+VPOPCNTBZ 5165
+VPOPCNTBZrm 5166
+VPOPCNTBZrmk 5167
+VPOPCNTBZrmkz 5168
+VPOPCNTBZrr 5169
+VPOPCNTBZrrk 5170
+VPOPCNTBZrrkz 5171
+VPOPCNTDZ 5172
+VPOPCNTDZrm 5173
+VPOPCNTDZrmb 5174
+VPOPCNTDZrmbk 5175
+VPOPCNTDZrmbkz 5176
+VPOPCNTDZrmk 5177
+VPOPCNTDZrmkz 5178
+VPOPCNTDZrr 5179
+VPOPCNTDZrrk 5180
+VPOPCNTDZrrkz 5181
+VPOPCNTQZ 5182
+VPOPCNTQZrm 5183
+VPOPCNTQZrmb 5184
+VPOPCNTQZrmbk 5185
+VPOPCNTQZrmbkz 5186
+VPOPCNTQZrmk 5187
+VPOPCNTQZrmkz 5188
+VPOPCNTQZrr 5189
+VPOPCNTQZrrk 5190
+VPOPCNTQZrrkz 5191
+VPOPCNTWZ 5192
+VPOPCNTWZrm 5193
+VPOPCNTWZrmk 5194
+VPOPCNTWZrmkz 5195
+VPOPCNTWZrr 5196
+VPOPCNTWZrrk 5197
+VPOPCNTWZrrkz 5198
+VPORDZ 5199
+VPORDZrm 5200
+VPORDZrmb 5201
+VPORDZrmbk 5202
+VPORDZrmbkz 5203
+VPORDZrmk 5204
+VPORDZrmkz 5205
+VPORDZrr 5206
+VPORDZrrk 5207
+VPORDZrrkz 5208
+VPORQZ 5209
+VPORQZrm 5210
+VPORQZrmb 5211
+VPORQZrmbk 5212
+VPORQZrmbkz 5213
+VPORQZrmk 5214
+VPORQZrmkz 5215
+VPORQZrr 5216
+VPORQZrrk 5217
+VPORQZrrkz 5218
+VPORYrm 5219
+VPORYrr 5220
+VPORrm 5221
+VPORrr 5222
+VPPERMrmr 5223
+VPPERMrrm 5224
+VPPERMrrr 5225
+VPPERMrrr_REV 5226
+VPROLDZ 5227
+VPROLDZmbi 5228
+VPROLDZmbik 5229
+VPROLDZmbikz 5230
+VPROLDZmi 5231
+VPROLDZmik 5232
+VPROLDZmikz 5233
+VPROLDZri 5234
+VPROLDZrik 5235
+VPROLDZrikz 5236
+VPROLQZ 5237
+VPROLQZmbi 5238
+VPROLQZmbik 5239
+VPROLQZmbikz 5240
+VPROLQZmi 5241
+VPROLQZmik 5242
+VPROLQZmikz 5243
+VPROLQZri 5244
+VPROLQZrik 5245
+VPROLQZrikz 5246
+VPROLVDZ 5247
+VPROLVDZrm 5248
+VPROLVDZrmb 5249
+VPROLVDZrmbk 5250
+VPROLVDZrmbkz 5251
+VPROLVDZrmk 5252
+VPROLVDZrmkz 5253
+VPROLVDZrr 5254
+VPROLVDZrrk 5255
+VPROLVDZrrkz 5256
+VPROLVQZ 5257
+VPROLVQZrm 5258
+VPROLVQZrmb 5259
+VPROLVQZrmbk 5260
+VPROLVQZrmbkz 5261
+VPROLVQZrmk 5262
+VPROLVQZrmkz 5263
+VPROLVQZrr 5264
+VPROLVQZrrk 5265
+VPROLVQZrrkz 5266
+VPRORDZ 5267
+VPRORDZmbi 5268
+VPRORDZmbik 5269
+VPRORDZmbikz 5270
+VPRORDZmi 5271
+VPRORDZmik 5272
+VPRORDZmikz 5273
+VPRORDZri 5274
+VPRORDZrik 5275
+VPRORDZrikz 5276
+VPRORQZ 5277
+VPRORQZmbi 5278
+VPRORQZmbik 5279
+VPRORQZmbikz 5280
+VPRORQZmi 5281
+VPRORQZmik 5282
+VPRORQZmikz 5283
+VPRORQZri 5284
+VPRORQZrik 5285
+VPRORQZrikz 5286
+VPRORVDZ 5287
+VPRORVDZrm 5288
+VPRORVDZrmb 5289
+VPRORVDZrmbk 5290
+VPRORVDZrmbkz 5291
+VPRORVDZrmk 5292
+VPRORVDZrmkz 5293
+VPRORVDZrr 5294
+VPRORVDZrrk 5295
+VPRORVDZrrkz 5296
+VPRORVQZ 5297
+VPRORVQZrm 5298
+VPRORVQZrmb 5299
+VPRORVQZrmbk 5300
+VPRORVQZrmbkz 5301
+VPRORVQZrmk 5302
+VPRORVQZrmkz 5303
+VPRORVQZrr 5304
+VPRORVQZrrk 5305
+VPRORVQZrrkz 5306
+VPROTBmi 5307
+VPROTBmr 5308
+VPROTBri 5309
+VPROTBrm 5310
+VPROTBrr 5311
+VPROTBrr_REV 5312
+VPROTDmi 5313
+VPROTDmr 5314
+VPROTDri 5315
+VPROTDrm 5316
+VPROTDrr 5317
+VPROTDrr_REV 5318
+VPROTQmi 5319
+VPROTQmr 5320
+VPROTQri 5321
+VPROTQrm 5322
+VPROTQrr 5323
+VPROTQrr_REV 5324
+VPROTWmi 5325
+VPROTWmr 5326
+VPROTWri 5327
+VPROTWrm 5328
+VPROTWrr 5329
+VPROTWrr_REV 5330
+VPSADBWYrm 5331
+VPSADBWYrr 5332
+VPSADBWZ 5333
+VPSADBWZrm 5334
+VPSADBWZrr 5335
+VPSADBWrm 5336
+VPSADBWrr 5337
+VPSCATTERDDZ 5338
+VPSCATTERDDZmr 5339
+VPSCATTERDQZ 5340
+VPSCATTERDQZmr 5341
+VPSCATTERQDZ 5342
+VPSCATTERQDZmr 5343
+VPSCATTERQQZ 5344
+VPSCATTERQQZmr 5345
+VPSHABmr 5346
+VPSHABrm 5347
+VPSHABrr 5348
+VPSHABrr_REV 5349
+VPSHADmr 5350
+VPSHADrm 5351
+VPSHADrr 5352
+VPSHADrr_REV 5353
+VPSHAQmr 5354
+VPSHAQrm 5355
+VPSHAQrr 5356
+VPSHAQrr_REV 5357
+VPSHAWmr 5358
+VPSHAWrm 5359
+VPSHAWrr 5360
+VPSHAWrr_REV 5361
+VPSHLBmr 5362
+VPSHLBrm 5363
+VPSHLBrr 5364
+VPSHLBrr_REV 5365
+VPSHLDDZ 5366
+VPSHLDDZrmbi 5367
+VPSHLDDZrmbik 5368
+VPSHLDDZrmbikz 5369
+VPSHLDDZrmi 5370
+VPSHLDDZrmik 5371
+VPSHLDDZrmikz 5372
+VPSHLDDZrri 5373
+VPSHLDDZrrik 5374
+VPSHLDDZrrikz 5375
+VPSHLDQZ 5376
+VPSHLDQZrmbi 5377
+VPSHLDQZrmbik 5378
+VPSHLDQZrmbikz 5379
+VPSHLDQZrmi 5380
+VPSHLDQZrmik 5381
+VPSHLDQZrmikz 5382
+VPSHLDQZrri 5383
+VPSHLDQZrrik 5384
+VPSHLDQZrrikz 5385
+VPSHLDVDZ 5386
+VPSHLDVDZm 5387
+VPSHLDVDZmb 5388
+VPSHLDVDZmbk 5389
+VPSHLDVDZmbkz 5390
+VPSHLDVDZmk 5391
+VPSHLDVDZmkz 5392
+VPSHLDVDZr 5393
+VPSHLDVDZrk 5394
+VPSHLDVDZrkz 5395
+VPSHLDVQZ 5396
+VPSHLDVQZm 5397
+VPSHLDVQZmb 5398
+VPSHLDVQZmbk 5399
+VPSHLDVQZmbkz 5400
+VPSHLDVQZmk 5401
+VPSHLDVQZmkz 5402
+VPSHLDVQZr 5403
+VPSHLDVQZrk 5404
+VPSHLDVQZrkz 5405
+VPSHLDVWZ 5406
+VPSHLDVWZm 5407
+VPSHLDVWZmk 5408
+VPSHLDVWZmkz 5409
+VPSHLDVWZr 5410
+VPSHLDVWZrk 5411
+VPSHLDVWZrkz 5412
+VPSHLDWZ 5413
+VPSHLDWZrmi 5414
+VPSHLDWZrmik 5415
+VPSHLDWZrmikz 5416
+VPSHLDWZrri 5417
+VPSHLDWZrrik 5418
+VPSHLDWZrrikz 5419
+VPSHLDmr 5420
+VPSHLDrm 5421
+VPSHLDrr 5422
+VPSHLDrr_REV 5423
+VPSHLQmr 5424
+VPSHLQrm 5425
+VPSHLQrr 5426
+VPSHLQrr_REV 5427
+VPSHLWmr 5428
+VPSHLWrm 5429
+VPSHLWrr 5430
+VPSHLWrr_REV 5431
+VPSHRDDZ 5432
+VPSHRDDZrmbi 5433
+VPSHRDDZrmbik 5434
+VPSHRDDZrmbikz 5435
+VPSHRDDZrmi 5436
+VPSHRDDZrmik 5437
+VPSHRDDZrmikz 5438
+VPSHRDDZrri 5439
+VPSHRDDZrrik 5440
+VPSHRDDZrrikz 5441
+VPSHRDQZ 5442
+VPSHRDQZrmbi 5443
+VPSHRDQZrmbik 5444
+VPSHRDQZrmbikz 5445
+VPSHRDQZrmi 5446
+VPSHRDQZrmik 5447
+VPSHRDQZrmikz 5448
+VPSHRDQZrri 5449
+VPSHRDQZrrik 5450
+VPSHRDQZrrikz 5451
+VPSHRDVDZ 5452
+VPSHRDVDZm 5453
+VPSHRDVDZmb 5454
+VPSHRDVDZmbk 5455
+VPSHRDVDZmbkz 5456
+VPSHRDVDZmk 5457
+VPSHRDVDZmkz 5458
+VPSHRDVDZr 5459
+VPSHRDVDZrk 5460
+VPSHRDVDZrkz 5461
+VPSHRDVQZ 5462
+VPSHRDVQZm 5463
+VPSHRDVQZmb 5464
+VPSHRDVQZmbk 5465
+VPSHRDVQZmbkz 5466
+VPSHRDVQZmk 5467
+VPSHRDVQZmkz 5468
+VPSHRDVQZr 5469
+VPSHRDVQZrk 5470
+VPSHRDVQZrkz 5471
+VPSHRDVWZ 5472
+VPSHRDVWZm 5473
+VPSHRDVWZmk 5474
+VPSHRDVWZmkz 5475
+VPSHRDVWZr 5476
+VPSHRDVWZrk 5477
+VPSHRDVWZrkz 5478
+VPSHRDWZ 5479
+VPSHRDWZrmi 5480
+VPSHRDWZrmik 5481
+VPSHRDWZrmikz 5482
+VPSHRDWZrri 5483
+VPSHRDWZrrik 5484
+VPSHRDWZrrikz 5485
+VPSHUFBITQMBZ 5486
+VPSHUFBITQMBZrm 5487
+VPSHUFBITQMBZrmk 5488
+VPSHUFBITQMBZrr 5489
+VPSHUFBITQMBZrrk 5490
+VPSHUFBYrm 5491
+VPSHUFBYrr 5492
+VPSHUFBZ 5493
+VPSHUFBZrm 5494
+VPSHUFBZrmk 5495
+VPSHUFBZrmkz 5496
+VPSHUFBZrr 5497
+VPSHUFBZrrk 5498
+VPSHUFBZrrkz 5499
+VPSHUFBrm 5500
+VPSHUFBrr 5501
+VPSHUFDYmi 5502
+VPSHUFDYri 5503
+VPSHUFDZ 5504
+VPSHUFDZmbi 5505
+VPSHUFDZmbik 5506
+VPSHUFDZmbikz 5507
+VPSHUFDZmi 5508
+VPSHUFDZmik 5509
+VPSHUFDZmikz 5510
+VPSHUFDZri 5511
+VPSHUFDZrik 5512
+VPSHUFDZrikz 5513
+VPSHUFDmi 5514
+VPSHUFDri 5515
+VPSHUFHWYmi 5516
+VPSHUFHWYri 5517
+VPSHUFHWZ 5518
+VPSHUFHWZmi 5519
+VPSHUFHWZmik 5520
+VPSHUFHWZmikz 5521
+VPSHUFHWZri 5522
+VPSHUFHWZrik 5523
+VPSHUFHWZrikz 5524
+VPSHUFHWmi 5525
+VPSHUFHWri 5526
+VPSHUFLWYmi 5527
+VPSHUFLWYri 5528
+VPSHUFLWZ 5529
+VPSHUFLWZmi 5530
+VPSHUFLWZmik 5531
+VPSHUFLWZmikz 5532
+VPSHUFLWZri 5533
+VPSHUFLWZrik 5534
+VPSHUFLWZrikz 5535
+VPSHUFLWmi 5536
+VPSHUFLWri 5537
+VPSIGNBYrm 5538
+VPSIGNBYrr 5539
+VPSIGNBrm 5540
+VPSIGNBrr 5541
+VPSIGNDYrm 5542
+VPSIGNDYrr 5543
+VPSIGNDrm 5544
+VPSIGNDrr 5545
+VPSIGNWYrm 5546
+VPSIGNWYrr 5547
+VPSIGNWrm 5548
+VPSIGNWrr 5549
+VPSLLDQYri 5550
+VPSLLDQZ 5551
+VPSLLDQZmi 5552
+VPSLLDQZri 5553
+VPSLLDQri 5554
+VPSLLDYri 5555
+VPSLLDYrm 5556
+VPSLLDYrr 5557
+VPSLLDZ 5558
+VPSLLDZmbi 5559
+VPSLLDZmbik 5560
+VPSLLDZmbikz 5561
+VPSLLDZmi 5562
+VPSLLDZmik 5563
+VPSLLDZmikz 5564
+VPSLLDZri 5565
+VPSLLDZrik 5566
+VPSLLDZrikz 5567
+VPSLLDZrm 5568
+VPSLLDZrmk 5569
+VPSLLDZrmkz 5570
+VPSLLDZrr 5571
+VPSLLDZrrk 5572
+VPSLLDZrrkz 5573
+VPSLLDri 5574
+VPSLLDrm 5575
+VPSLLDrr 5576
+VPSLLQYri 5577
+VPSLLQYrm 5578
+VPSLLQYrr 5579
+VPSLLQZ 5580
+VPSLLQZmbi 5581
+VPSLLQZmbik 5582
+VPSLLQZmbikz 5583
+VPSLLQZmi 5584
+VPSLLQZmik 5585
+VPSLLQZmikz 5586
+VPSLLQZri 5587
+VPSLLQZrik 5588
+VPSLLQZrikz 5589
+VPSLLQZrm 5590
+VPSLLQZrmk 5591
+VPSLLQZrmkz 5592
+VPSLLQZrr 5593
+VPSLLQZrrk 5594
+VPSLLQZrrkz 5595
+VPSLLQri 5596
+VPSLLQrm 5597
+VPSLLQrr 5598
+VPSLLVDYrm 5599
+VPSLLVDYrr 5600
+VPSLLVDZ 5601
+VPSLLVDZrm 5602
+VPSLLVDZrmb 5603
+VPSLLVDZrmbk 5604
+VPSLLVDZrmbkz 5605
+VPSLLVDZrmk 5606
+VPSLLVDZrmkz 5607
+VPSLLVDZrr 5608
+VPSLLVDZrrk 5609
+VPSLLVDZrrkz 5610
+VPSLLVDrm 5611
+VPSLLVDrr 5612
+VPSLLVQYrm 5613
+VPSLLVQYrr 5614
+VPSLLVQZ 5615
+VPSLLVQZrm 5616
+VPSLLVQZrmb 5617
+VPSLLVQZrmbk 5618
+VPSLLVQZrmbkz 5619
+VPSLLVQZrmk 5620
+VPSLLVQZrmkz 5621
+VPSLLVQZrr 5622
+VPSLLVQZrrk 5623
+VPSLLVQZrrkz 5624
+VPSLLVQrm 5625
+VPSLLVQrr 5626
+VPSLLVWZ 5627
+VPSLLVWZrm 5628
+VPSLLVWZrmk 5629
+VPSLLVWZrmkz 5630
+VPSLLVWZrr 5631
+VPSLLVWZrrk 5632
+VPSLLVWZrrkz 5633
+VPSLLWYri 5634
+VPSLLWYrm 5635
+VPSLLWYrr 5636
+VPSLLWZ 5637
+VPSLLWZmi 5638
+VPSLLWZmik 5639
+VPSLLWZmikz 5640
+VPSLLWZri 5641
+VPSLLWZrik 5642
+VPSLLWZrikz 5643
+VPSLLWZrm 5644
+VPSLLWZrmk 5645
+VPSLLWZrmkz 5646
+VPSLLWZrr 5647
+VPSLLWZrrk 5648
+VPSLLWZrrkz 5649
+VPSLLWri 5650
+VPSLLWrm 5651
+VPSLLWrr 5652
+VPSRADYri 5653
+VPSRADYrm 5654
+VPSRADYrr 5655
+VPSRADZ 5656
+VPSRADZmbi 5657
+VPSRADZmbik 5658
+VPSRADZmbikz 5659
+VPSRADZmi 5660
+VPSRADZmik 5661
+VPSRADZmikz 5662
+VPSRADZri 5663
+VPSRADZrik 5664
+VPSRADZrikz 5665
+VPSRADZrm 5666
+VPSRADZrmk 5667
+VPSRADZrmkz 5668
+VPSRADZrr 5669
+VPSRADZrrk 5670
+VPSRADZrrkz 5671
+VPSRADri 5672
+VPSRADrm 5673
+VPSRADrr 5674
+VPSRAQZ 5675
+VPSRAQZmbi 5676
+VPSRAQZmbik 5677
+VPSRAQZmbikz 5678
+VPSRAQZmi 5679
+VPSRAQZmik 5680
+VPSRAQZmikz 5681
+VPSRAQZri 5682
+VPSRAQZrik 5683
+VPSRAQZrikz 5684
+VPSRAQZrm 5685
+VPSRAQZrmk 5686
+VPSRAQZrmkz 5687
+VPSRAQZrr 5688
+VPSRAQZrrk 5689
+VPSRAQZrrkz 5690
+VPSRAVDYrm 5691
+VPSRAVDYrr 5692
+VPSRAVDZ 5693
+VPSRAVDZrm 5694
+VPSRAVDZrmb 5695
+VPSRAVDZrmbk 5696
+VPSRAVDZrmbkz 5697
+VPSRAVDZrmk 5698
+VPSRAVDZrmkz 5699
+VPSRAVDZrr 5700
+VPSRAVDZrrk 5701
+VPSRAVDZrrkz 5702
+VPSRAVDrm 5703
+VPSRAVDrr 5704
+VPSRAVQZ 5705
+VPSRAVQZrm 5706
+VPSRAVQZrmb 5707
+VPSRAVQZrmbk 5708
+VPSRAVQZrmbkz 5709
+VPSRAVQZrmk 5710
+VPSRAVQZrmkz 5711
+VPSRAVQZrr 5712
+VPSRAVQZrrk 5713
+VPSRAVQZrrkz 5714
+VPSRAVWZ 5715
+VPSRAVWZrm 5716
+VPSRAVWZrmk 5717
+VPSRAVWZrmkz 5718
+VPSRAVWZrr 5719
+VPSRAVWZrrk 5720
+VPSRAVWZrrkz 5721
+VPSRAWYri 5722
+VPSRAWYrm 5723
+VPSRAWYrr 5724
+VPSRAWZ 5725
+VPSRAWZmi 5726
+VPSRAWZmik 5727
+VPSRAWZmikz 5728
+VPSRAWZri 5729
+VPSRAWZrik 5730
+VPSRAWZrikz 5731
+VPSRAWZrm 5732
+VPSRAWZrmk 5733
+VPSRAWZrmkz 5734
+VPSRAWZrr 5735
+VPSRAWZrrk 5736
+VPSRAWZrrkz 5737
+VPSRAWri 5738
+VPSRAWrm 5739
+VPSRAWrr 5740
+VPSRLDQYri 5741
+VPSRLDQZ 5742
+VPSRLDQZmi 5743
+VPSRLDQZri 5744
+VPSRLDQri 5745
+VPSRLDYri 5746
+VPSRLDYrm 5747
+VPSRLDYrr 5748
+VPSRLDZ 5749
+VPSRLDZmbi 5750
+VPSRLDZmbik 5751
+VPSRLDZmbikz 5752
+VPSRLDZmi 5753
+VPSRLDZmik 5754
+VPSRLDZmikz 5755
+VPSRLDZri 5756
+VPSRLDZrik 5757
+VPSRLDZrikz 5758
+VPSRLDZrm 5759
+VPSRLDZrmk 5760
+VPSRLDZrmkz 5761
+VPSRLDZrr 5762
+VPSRLDZrrk 5763
+VPSRLDZrrkz 5764
+VPSRLDri 5765
+VPSRLDrm 5766
+VPSRLDrr 5767
+VPSRLQYri 5768
+VPSRLQYrm 5769
+VPSRLQYrr 5770
+VPSRLQZ 5771
+VPSRLQZmbi 5772
+VPSRLQZmbik 5773
+VPSRLQZmbikz 5774
+VPSRLQZmi 5775
+VPSRLQZmik 5776
+VPSRLQZmikz 5777
+VPSRLQZri 5778
+VPSRLQZrik 5779
+VPSRLQZrikz 5780
+VPSRLQZrm 5781
+VPSRLQZrmk 5782
+VPSRLQZrmkz 5783
+VPSRLQZrr 5784
+VPSRLQZrrk 5785
+VPSRLQZrrkz 5786
+VPSRLQri 5787
+VPSRLQrm 5788
+VPSRLQrr 5789
+VPSRLVDYrm 5790
+VPSRLVDYrr 5791
+VPSRLVDZ 5792
+VPSRLVDZrm 5793
+VPSRLVDZrmb 5794
+VPSRLVDZrmbk 5795
+VPSRLVDZrmbkz 5796
+VPSRLVDZrmk 5797
+VPSRLVDZrmkz 5798
+VPSRLVDZrr 5799
+VPSRLVDZrrk 5800
+VPSRLVDZrrkz 5801
+VPSRLVDrm 5802
+VPSRLVDrr 5803
+VPSRLVQYrm 5804
+VPSRLVQYrr 5805
+VPSRLVQZ 5806
+VPSRLVQZrm 5807
+VPSRLVQZrmb 5808
+VPSRLVQZrmbk 5809
+VPSRLVQZrmbkz 5810
+VPSRLVQZrmk 5811
+VPSRLVQZrmkz 5812
+VPSRLVQZrr 5813
+VPSRLVQZrrk 5814
+VPSRLVQZrrkz 5815
+VPSRLVQrm 5816
+VPSRLVQrr 5817
+VPSRLVWZ 5818
+VPSRLVWZrm 5819
+VPSRLVWZrmk 5820
+VPSRLVWZrmkz 5821
+VPSRLVWZrr 5822
+VPSRLVWZrrk 5823
+VPSRLVWZrrkz 5824
+VPSRLWYri 5825
+VPSRLWYrm 5826
+VPSRLWYrr 5827
+VPSRLWZ 5828
+VPSRLWZmi 5829
+VPSRLWZmik 5830
+VPSRLWZmikz 5831
+VPSRLWZri 5832
+VPSRLWZrik 5833
+VPSRLWZrikz 5834
+VPSRLWZrm 5835
+VPSRLWZrmk 5836
+VPSRLWZrmkz 5837
+VPSRLWZrr 5838
+VPSRLWZrrk 5839
+VPSRLWZrrkz 5840
+VPSRLWri 5841
+VPSRLWrm 5842
+VPSRLWrr 5843
+VPSUBBYrm 5844
+VPSUBBYrr 5845
+VPSUBBZ 5846
+VPSUBBZrm 5847
+VPSUBBZrmk 5848
+VPSUBBZrmkz 5849
+VPSUBBZrr 5850
+VPSUBBZrrk 5851
+VPSUBBZrrkz 5852
+VPSUBBrm 5853
+VPSUBBrr 5854
+VPSUBDYrm 5855
+VPSUBDYrr 5856
+VPSUBDZ 5857
+VPSUBDZrm 5858
+VPSUBDZrmb 5859
+VPSUBDZrmbk 5860
+VPSUBDZrmbkz 5861
+VPSUBDZrmk 5862
+VPSUBDZrmkz 5863
+VPSUBDZrr 5864
+VPSUBDZrrk 5865
+VPSUBDZrrkz 5866
+VPSUBDrm 5867
+VPSUBDrr 5868
+VPSUBQYrm 5869
+VPSUBQYrr 5870
+VPSUBQZ 5871
+VPSUBQZrm 5872
+VPSUBQZrmb 5873
+VPSUBQZrmbk 5874
+VPSUBQZrmbkz 5875
+VPSUBQZrmk 5876
+VPSUBQZrmkz 5877
+VPSUBQZrr 5878
+VPSUBQZrrk 5879
+VPSUBQZrrkz 5880
+VPSUBQrm 5881
+VPSUBQrr 5882
+VPSUBSBYrm 5883
+VPSUBSBYrr 5884
+VPSUBSBZ 5885
+VPSUBSBZrm 5886
+VPSUBSBZrmk 5887
+VPSUBSBZrmkz 5888
+VPSUBSBZrr 5889
+VPSUBSBZrrk 5890
+VPSUBSBZrrkz 5891
+VPSUBSBrm 5892
+VPSUBSBrr 5893
+VPSUBSWYrm 5894
+VPSUBSWYrr 5895
+VPSUBSWZ 5896
+VPSUBSWZrm 5897
+VPSUBSWZrmk 5898
+VPSUBSWZrmkz 5899
+VPSUBSWZrr 5900
+VPSUBSWZrrk 5901
+VPSUBSWZrrkz 5902
+VPSUBSWrm 5903
+VPSUBSWrr 5904
+VPSUBUSBYrm 5905
+VPSUBUSBYrr 5906
+VPSUBUSBZ 5907
+VPSUBUSBZrm 5908
+VPSUBUSBZrmk 5909
+VPSUBUSBZrmkz 5910
+VPSUBUSBZrr 5911
+VPSUBUSBZrrk 5912
+VPSUBUSBZrrkz 5913
+VPSUBUSBrm 5914
+VPSUBUSBrr 5915
+VPSUBUSWYrm 5916
+VPSUBUSWYrr 5917
+VPSUBUSWZ 5918
+VPSUBUSWZrm 5919
+VPSUBUSWZrmk 5920
+VPSUBUSWZrmkz 5921
+VPSUBUSWZrr 5922
+VPSUBUSWZrrk 5923
+VPSUBUSWZrrkz 5924
+VPSUBUSWrm 5925
+VPSUBUSWrr 5926
+VPSUBWYrm 5927
+VPSUBWYrr 5928
+VPSUBWZ 5929
+VPSUBWZrm 5930
+VPSUBWZrmk 5931
+VPSUBWZrmkz 5932
+VPSUBWZrr 5933
+VPSUBWZrrk 5934
+VPSUBWZrrkz 5935
+VPSUBWrm 5936
+VPSUBWrr 5937
+VPTERNLOGDZ 5938
+VPTERNLOGDZrmbi 5939
+VPTERNLOGDZrmbik 5940
+VPTERNLOGDZrmbikz 5941
+VPTERNLOGDZrmi 5942
+VPTERNLOGDZrmik 5943
+VPTERNLOGDZrmikz 5944
+VPTERNLOGDZrri 5945
+VPTERNLOGDZrrik 5946
+VPTERNLOGDZrrikz 5947
+VPTERNLOGQZ 5948
+VPTERNLOGQZrmbi 5949
+VPTERNLOGQZrmbik 5950
+VPTERNLOGQZrmbikz 5951
+VPTERNLOGQZrmi 5952
+VPTERNLOGQZrmik 5953
+VPTERNLOGQZrmikz 5954
+VPTERNLOGQZrri 5955
+VPTERNLOGQZrrik 5956
+VPTERNLOGQZrrikz 5957
+VPTESTMBZ 5958
+VPTESTMBZrm 5959
+VPTESTMBZrmk 5960
+VPTESTMBZrr 5961
+VPTESTMBZrrk 5962
+VPTESTMDZ 5963
+VPTESTMDZrm 5964
+VPTESTMDZrmb 5965
+VPTESTMDZrmbk 5966
+VPTESTMDZrmk 5967
+VPTESTMDZrr 5968
+VPTESTMDZrrk 5969
+VPTESTMQZ 5970
+VPTESTMQZrm 5971
+VPTESTMQZrmb 5972
+VPTESTMQZrmbk 5973
+VPTESTMQZrmk 5974
+VPTESTMQZrr 5975
+VPTESTMQZrrk 5976
+VPTESTMWZ 5977
+VPTESTMWZrm 5978
+VPTESTMWZrmk 5979
+VPTESTMWZrr 5980
+VPTESTMWZrrk 5981
+VPTESTNMBZ 5982
+VPTESTNMBZrm 5983
+VPTESTNMBZrmk 5984
+VPTESTNMBZrr 5985
+VPTESTNMBZrrk 5986
+VPTESTNMDZ 5987
+VPTESTNMDZrm 5988
+VPTESTNMDZrmb 5989
+VPTESTNMDZrmbk 5990
+VPTESTNMDZrmk 5991
+VPTESTNMDZrr 5992
+VPTESTNMDZrrk 5993
+VPTESTNMQZ 5994
+VPTESTNMQZrm 5995
+VPTESTNMQZrmb 5996
+VPTESTNMQZrmbk 5997
+VPTESTNMQZrmk 5998
+VPTESTNMQZrr 5999
+VPTESTNMQZrrk 6000
+VPTESTNMWZ 6001
+VPTESTNMWZrm 6002
+VPTESTNMWZrmk 6003
+VPTESTNMWZrr 6004
+VPTESTNMWZrrk 6005
+VPTESTYrm 6006
+VPTESTYrr 6007
+VPTESTrm 6008
+VPTESTrr 6009
+VPUNPCKHBWYrm 6010
+VPUNPCKHBWYrr 6011
+VPUNPCKHBWZ 6012
+VPUNPCKHBWZrm 6013
+VPUNPCKHBWZrmk 6014
+VPUNPCKHBWZrmkz 6015
+VPUNPCKHBWZrr 6016
+VPUNPCKHBWZrrk 6017
+VPUNPCKHBWZrrkz 6018
+VPUNPCKHBWrm 6019
+VPUNPCKHBWrr 6020
+VPUNPCKHDQYrm 6021
+VPUNPCKHDQYrr 6022
+VPUNPCKHDQZ 6023
+VPUNPCKHDQZrm 6024
+VPUNPCKHDQZrmb 6025
+VPUNPCKHDQZrmbk 6026
+VPUNPCKHDQZrmbkz 6027
+VPUNPCKHDQZrmk 6028
+VPUNPCKHDQZrmkz 6029
+VPUNPCKHDQZrr 6030
+VPUNPCKHDQZrrk 6031
+VPUNPCKHDQZrrkz 6032
+VPUNPCKHDQrm 6033
+VPUNPCKHDQrr 6034
+VPUNPCKHQDQYrm 6035
+VPUNPCKHQDQYrr 6036
+VPUNPCKHQDQZ 6037
+VPUNPCKHQDQZrm 6038
+VPUNPCKHQDQZrmb 6039
+VPUNPCKHQDQZrmbk 6040
+VPUNPCKHQDQZrmbkz 6041
+VPUNPCKHQDQZrmk 6042
+VPUNPCKHQDQZrmkz 6043
+VPUNPCKHQDQZrr 6044
+VPUNPCKHQDQZrrk 6045
+VPUNPCKHQDQZrrkz 6046
+VPUNPCKHQDQrm 6047
+VPUNPCKHQDQrr 6048
+VPUNPCKHWDYrm 6049
+VPUNPCKHWDYrr 6050
+VPUNPCKHWDZ 6051
+VPUNPCKHWDZrm 6052
+VPUNPCKHWDZrmk 6053
+VPUNPCKHWDZrmkz 6054
+VPUNPCKHWDZrr 6055
+VPUNPCKHWDZrrk 6056
+VPUNPCKHWDZrrkz 6057
+VPUNPCKHWDrm 6058
+VPUNPCKHWDrr 6059
+VPUNPCKLBWYrm 6060
+VPUNPCKLBWYrr 6061
+VPUNPCKLBWZ 6062
+VPUNPCKLBWZrm 6063
+VPUNPCKLBWZrmk 6064
+VPUNPCKLBWZrmkz 6065
+VPUNPCKLBWZrr 6066
+VPUNPCKLBWZrrk 6067
+VPUNPCKLBWZrrkz 6068
+VPUNPCKLBWrm 6069
+VPUNPCKLBWrr 6070
+VPUNPCKLDQYrm 6071
+VPUNPCKLDQYrr 6072
+VPUNPCKLDQZ 6073
+VPUNPCKLDQZrm 6074
+VPUNPCKLDQZrmb 6075
+VPUNPCKLDQZrmbk 6076
+VPUNPCKLDQZrmbkz 6077
+VPUNPCKLDQZrmk 6078
+VPUNPCKLDQZrmkz 6079
+VPUNPCKLDQZrr 6080
+VPUNPCKLDQZrrk 6081
+VPUNPCKLDQZrrkz 6082
+VPUNPCKLDQrm 6083
+VPUNPCKLDQrr 6084
+VPUNPCKLQDQYrm 6085
+VPUNPCKLQDQYrr 6086
+VPUNPCKLQDQZ 6087
+VPUNPCKLQDQZrm 6088
+VPUNPCKLQDQZrmb 6089
+VPUNPCKLQDQZrmbk 6090
+VPUNPCKLQDQZrmbkz 6091
+VPUNPCKLQDQZrmk 6092
+VPUNPCKLQDQZrmkz 6093
+VPUNPCKLQDQZrr 6094
+VPUNPCKLQDQZrrk 6095
+VPUNPCKLQDQZrrkz 6096
+VPUNPCKLQDQrm 6097
+VPUNPCKLQDQrr 6098
+VPUNPCKLWDYrm 6099
+VPUNPCKLWDYrr 6100
+VPUNPCKLWDZ 6101
+VPUNPCKLWDZrm 6102
+VPUNPCKLWDZrmk 6103
+VPUNPCKLWDZrmkz 6104
+VPUNPCKLWDZrr 6105
+VPUNPCKLWDZrrk 6106
+VPUNPCKLWDZrrkz 6107
+VPUNPCKLWDrm 6108
+VPUNPCKLWDrr 6109
+VPXORDZ 6110
+VPXORDZrm 6111
+VPXORDZrmb 6112
+VPXORDZrmbk 6113
+VPXORDZrmbkz 6114
+VPXORDZrmk 6115
+VPXORDZrmkz 6116
+VPXORDZrr 6117
+VPXORDZrrk 6118
+VPXORDZrrkz 6119
+VPXORQZ 6120
+VPXORQZrm 6121
+VPXORQZrmb 6122
+VPXORQZrmbk 6123
+VPXORQZrmbkz 6124
+VPXORQZrmk 6125
+VPXORQZrmkz 6126
+VPXORQZrr 6127
+VPXORQZrrk 6128
+VPXORQZrrkz 6129
+VPXORYrm 6130
+VPXORYrr 6131
+VPXORrm 6132
+VPXORrr 6133
+VRANGEPDZ 6134
+VRANGEPDZrmbi 6135
+VRANGEPDZrmbik 6136
+VRANGEPDZrmbikz 6137
+VRANGEPDZrmi 6138
+VRANGEPDZrmik 6139
+VRANGEPDZrmikz 6140
+VRANGEPDZrri 6141
+VRANGEPDZrrib 6142
+VRANGEPDZrribk 6143
+VRANGEPDZrribkz 6144
+VRANGEPDZrrik 6145
+VRANGEPDZrrikz 6146
+VRANGEPSZ 6147
+VRANGEPSZrmbi 6148
+VRANGEPSZrmbik 6149
+VRANGEPSZrmbikz 6150
+VRANGEPSZrmi 6151
+VRANGEPSZrmik 6152
+VRANGEPSZrmikz 6153
+VRANGEPSZrri 6154
+VRANGEPSZrrib 6155
+VRANGEPSZrribk 6156
+VRANGEPSZrribkz 6157
+VRANGEPSZrrik 6158
+VRANGEPSZrrikz 6159
+VRANGESDZrmi 6160
+VRANGESDZrmik 6161
+VRANGESDZrmikz 6162
+VRANGESDZrri 6163
+VRANGESDZrrib 6164
+VRANGESDZrribk 6165
+VRANGESDZrribkz 6166
+VRANGESDZrrik 6167
+VRANGESDZrrikz 6168
+VRANGESSZrmi 6169
+VRANGESSZrmik 6170
+VRANGESSZrmikz 6171
+VRANGESSZrri 6172
+VRANGESSZrrib 6173
+VRANGESSZrribk 6174
+VRANGESSZrribkz 6175
+VRANGESSZrrik 6176
+VRANGESSZrrikz 6177
+VRCP 6178
+VRCPBF 6179
+VRCPPHZ 6180
+VRCPPHZm 6181
+VRCPPHZmb 6182
+VRCPPHZmbk 6183
+VRCPPHZmbkz 6184
+VRCPPHZmk 6185
+VRCPPHZmkz 6186
+VRCPPHZr 6187
+VRCPPHZrk 6188
+VRCPPHZrkz 6189
+VRCPPSYm 6190
+VRCPPSYr 6191
+VRCPPSm 6192
+VRCPPSr 6193
+VRCPSHZrm 6194
+VRCPSHZrmk 6195
+VRCPSHZrmkz 6196
+VRCPSHZrr 6197
+VRCPSHZrrk 6198
+VRCPSHZrrkz 6199
+VRCPSSm 6200
+VRCPSSm_Int 6201
+VRCPSSr 6202
+VRCPSSr_Int 6203
+VREDUCEBF 6204
+VREDUCEPDZ 6205
+VREDUCEPDZrmbi 6206
+VREDUCEPDZrmbik 6207
+VREDUCEPDZrmbikz 6208
+VREDUCEPDZrmi 6209
+VREDUCEPDZrmik 6210
+VREDUCEPDZrmikz 6211
+VREDUCEPDZrri 6212
+VREDUCEPDZrrib 6213
+VREDUCEPDZrribk 6214
+VREDUCEPDZrribkz 6215
+VREDUCEPDZrrik 6216
+VREDUCEPDZrrikz 6217
+VREDUCEPHZ 6218
+VREDUCEPHZrmbi 6219
+VREDUCEPHZrmbik 6220
+VREDUCEPHZrmbikz 6221
+VREDUCEPHZrmi 6222
+VREDUCEPHZrmik 6223
+VREDUCEPHZrmikz 6224
+VREDUCEPHZrri 6225
+VREDUCEPHZrrib 6226
+VREDUCEPHZrribk 6227
+VREDUCEPHZrribkz 6228
+VREDUCEPHZrrik 6229
+VREDUCEPHZrrikz 6230
+VREDUCEPSZ 6231
+VREDUCEPSZrmbi 6232
+VREDUCEPSZrmbik 6233
+VREDUCEPSZrmbikz 6234
+VREDUCEPSZrmi 6235
+VREDUCEPSZrmik 6236
+VREDUCEPSZrmikz 6237
+VREDUCEPSZrri 6238
+VREDUCEPSZrrib 6239
+VREDUCEPSZrribk 6240
+VREDUCEPSZrribkz 6241
+VREDUCEPSZrrik 6242
+VREDUCEPSZrrikz 6243
+VREDUCESDZrmi 6244
+VREDUCESDZrmik 6245
+VREDUCESDZrmikz 6246
+VREDUCESDZrri 6247
+VREDUCESDZrrib 6248
+VREDUCESDZrribk 6249
+VREDUCESDZrribkz 6250
+VREDUCESDZrrik 6251
+VREDUCESDZrrikz 6252
+VREDUCESHZrmi 6253
+VREDUCESHZrmik 6254
+VREDUCESHZrmikz 6255
+VREDUCESHZrri 6256
+VREDUCESHZrrib 6257
+VREDUCESHZrribk 6258
+VREDUCESHZrribkz 6259
+VREDUCESHZrrik 6260
+VREDUCESHZrrikz 6261
+VREDUCESSZrmi 6262
+VREDUCESSZrmik 6263
+VREDUCESSZrmikz 6264
+VREDUCESSZrri 6265
+VREDUCESSZrrib 6266
+VREDUCESSZrribk 6267
+VREDUCESSZrribkz 6268
+VREDUCESSZrrik 6269
+VREDUCESSZrrikz 6270
+VRNDSCALEBF 6271
+VRNDSCALEPDZ 6272
+VRNDSCALEPDZrmbi 6273
+VRNDSCALEPDZrmbik 6274
+VRNDSCALEPDZrmbikz 6275
+VRNDSCALEPDZrmi 6276
+VRNDSCALEPDZrmik 6277
+VRNDSCALEPDZrmikz 6278
+VRNDSCALEPDZrri 6279
+VRNDSCALEPDZrrib 6280
+VRNDSCALEPDZrribk 6281
+VRNDSCALEPDZrribkz 6282
+VRNDSCALEPDZrrik 6283
+VRNDSCALEPDZrrikz 6284
+VRNDSCALEPHZ 6285
+VRNDSCALEPHZrmbi 6286
+VRNDSCALEPHZrmbik 6287
+VRNDSCALEPHZrmbikz 6288
+VRNDSCALEPHZrmi 6289
+VRNDSCALEPHZrmik 6290
+VRNDSCALEPHZrmikz 6291
+VRNDSCALEPHZrri 6292
+VRNDSCALEPHZrrib 6293
+VRNDSCALEPHZrribk 6294
+VRNDSCALEPHZrribkz 6295
+VRNDSCALEPHZrrik 6296
+VRNDSCALEPHZrrikz 6297
+VRNDSCALEPSZ 6298
+VRNDSCALEPSZrmbi 6299
+VRNDSCALEPSZrmbik 6300
+VRNDSCALEPSZrmbikz 6301
+VRNDSCALEPSZrmi 6302
+VRNDSCALEPSZrmik 6303
+VRNDSCALEPSZrmikz 6304
+VRNDSCALEPSZrri 6305
+VRNDSCALEPSZrrib 6306
+VRNDSCALEPSZrribk 6307
+VRNDSCALEPSZrribkz 6308
+VRNDSCALEPSZrrik 6309
+VRNDSCALEPSZrrikz 6310
+VRNDSCALESDZrmi 6311
+VRNDSCALESDZrmi_Int 6312
+VRNDSCALESDZrmik_Int 6313
+VRNDSCALESDZrmikz_Int 6314
+VRNDSCALESDZrri 6315
+VRNDSCALESDZrri_Int 6316
+VRNDSCALESDZrrib_Int 6317
+VRNDSCALESDZrribk_Int 6318
+VRNDSCALESDZrribkz_Int 6319
+VRNDSCALESDZrrik_Int 6320
+VRNDSCALESDZrrikz_Int 6321
+VRNDSCALESHZrmi 6322
+VRNDSCALESHZrmi_Int 6323
+VRNDSCALESHZrmik_Int 6324
+VRNDSCALESHZrmikz_Int 6325
+VRNDSCALESHZrri 6326
+VRNDSCALESHZrri_Int 6327
+VRNDSCALESHZrrib_Int 6328
+VRNDSCALESHZrribk_Int 6329
+VRNDSCALESHZrribkz_Int 6330
+VRNDSCALESHZrrik_Int 6331
+VRNDSCALESHZrrikz_Int 6332
+VRNDSCALESSZrmi 6333
+VRNDSCALESSZrmi_Int 6334
+VRNDSCALESSZrmik_Int 6335
+VRNDSCALESSZrmikz_Int 6336
+VRNDSCALESSZrri 6337
+VRNDSCALESSZrri_Int 6338
+VRNDSCALESSZrrib_Int 6339
+VRNDSCALESSZrribk_Int 6340
+VRNDSCALESSZrribkz_Int 6341
+VRNDSCALESSZrrik_Int 6342
+VRNDSCALESSZrrikz_Int 6343
+VROUNDPDYmi 6344
+VROUNDPDYri 6345
+VROUNDPDmi 6346
+VROUNDPDri 6347
+VROUNDPSYmi 6348
+VROUNDPSYri 6349
+VROUNDPSmi 6350
+VROUNDPSri 6351
+VROUNDSDmi 6352
+VROUNDSDmi_Int 6353
+VROUNDSDri 6354
+VROUNDSDri_Int 6355
+VROUNDSSmi 6356
+VROUNDSSmi_Int 6357
+VROUNDSSri 6358
+VROUNDSSri_Int 6359
+VRSQRT 6360
+VRSQRTBF 6361
+VRSQRTPHZ 6362
+VRSQRTPHZm 6363
+VRSQRTPHZmb 6364
+VRSQRTPHZmbk 6365
+VRSQRTPHZmbkz 6366
+VRSQRTPHZmk 6367
+VRSQRTPHZmkz 6368
+VRSQRTPHZr 6369
+VRSQRTPHZrk 6370
+VRSQRTPHZrkz 6371
+VRSQRTPSYm 6372
+VRSQRTPSYr 6373
+VRSQRTPSm 6374
+VRSQRTPSr 6375
+VRSQRTSHZrm 6376
+VRSQRTSHZrmk 6377
+VRSQRTSHZrmkz 6378
+VRSQRTSHZrr 6379
+VRSQRTSHZrrk 6380
+VRSQRTSHZrrkz 6381
+VRSQRTSSm 6382
+VRSQRTSSm_Int 6383
+VRSQRTSSr 6384
+VRSQRTSSr_Int 6385
+VSCALEFBF 6386
+VSCALEFPDZ 6387
+VSCALEFPDZrm 6388
+VSCALEFPDZrmb 6389
+VSCALEFPDZrmbk 6390
+VSCALEFPDZrmbkz 6391
+VSCALEFPDZrmk 6392
+VSCALEFPDZrmkz 6393
+VSCALEFPDZrr 6394
+VSCALEFPDZrrb 6395
+VSCALEFPDZrrbk 6396
+VSCALEFPDZrrbkz 6397
+VSCALEFPDZrrk 6398
+VSCALEFPDZrrkz 6399
+VSCALEFPHZ 6400
+VSCALEFPHZrm 6401
+VSCALEFPHZrmb 6402
+VSCALEFPHZrmbk 6403
+VSCALEFPHZrmbkz 6404
+VSCALEFPHZrmk 6405
+VSCALEFPHZrmkz 6406
+VSCALEFPHZrr 6407
+VSCALEFPHZrrb 6408
+VSCALEFPHZrrbk 6409
+VSCALEFPHZrrbkz 6410
+VSCALEFPHZrrk 6411
+VSCALEFPHZrrkz 6412
+VSCALEFPSZ 6413
+VSCALEFPSZrm 6414
+VSCALEFPSZrmb 6415
+VSCALEFPSZrmbk 6416
+VSCALEFPSZrmbkz 6417
+VSCALEFPSZrmk 6418
+VSCALEFPSZrmkz 6419
+VSCALEFPSZrr 6420
+VSCALEFPSZrrb 6421
+VSCALEFPSZrrbk 6422
+VSCALEFPSZrrbkz 6423
+VSCALEFPSZrrk 6424
+VSCALEFPSZrrkz 6425
+VSCALEFSDZrm 6426
+VSCALEFSDZrmk 6427
+VSCALEFSDZrmkz 6428
+VSCALEFSDZrr 6429
+VSCALEFSDZrrb_Int 6430
+VSCALEFSDZrrbk_Int 6431
+VSCALEFSDZrrbkz_Int 6432
+VSCALEFSDZrrk 6433
+VSCALEFSDZrrkz 6434
+VSCALEFSHZrm 6435
+VSCALEFSHZrmk 6436
+VSCALEFSHZrmkz 6437
+VSCALEFSHZrr 6438
+VSCALEFSHZrrb_Int 6439
+VSCALEFSHZrrbk_Int 6440
+VSCALEFSHZrrbkz_Int 6441
+VSCALEFSHZrrk 6442
+VSCALEFSHZrrkz 6443
+VSCALEFSSZrm 6444
+VSCALEFSSZrmk 6445
+VSCALEFSSZrmkz 6446
+VSCALEFSSZrr 6447
+VSCALEFSSZrrb_Int 6448
+VSCALEFSSZrrbk_Int 6449
+VSCALEFSSZrrbkz_Int 6450
+VSCALEFSSZrrk 6451
+VSCALEFSSZrrkz 6452
+VSCATTERDPDZ 6453
+VSCATTERDPDZmr 6454
+VSCATTERDPSZ 6455
+VSCATTERDPSZmr 6456
+VSCATTERPF 6457
+VSCATTERQPDZ 6458
+VSCATTERQPDZmr 6459
+VSCATTERQPSZ 6460
+VSCATTERQPSZmr 6461
+VSHA 6462
+VSHUFF 6463
+VSHUFI 6464
+VSHUFPDYrmi 6465
+VSHUFPDYrri 6466
+VSHUFPDZ 6467
+VSHUFPDZrmbi 6468
+VSHUFPDZrmbik 6469
+VSHUFPDZrmbikz 6470
+VSHUFPDZrmi 6471
+VSHUFPDZrmik 6472
+VSHUFPDZrmikz 6473
+VSHUFPDZrri 6474
+VSHUFPDZrrik 6475
+VSHUFPDZrrikz 6476
+VSHUFPDrmi 6477
+VSHUFPDrri 6478
+VSHUFPSYrmi 6479
+VSHUFPSYrri 6480
+VSHUFPSZ 6481
+VSHUFPSZrmbi 6482
+VSHUFPSZrmbik 6483
+VSHUFPSZrmbikz 6484
+VSHUFPSZrmi 6485
+VSHUFPSZrmik 6486
+VSHUFPSZrmikz 6487
+VSHUFPSZrri 6488
+VSHUFPSZrrik 6489
+VSHUFPSZrrikz 6490
+VSHUFPSrmi 6491
+VSHUFPSrri 6492
+VSM 6493
+VSQRTBF 6494
+VSQRTPDYm 6495
+VSQRTPDYr 6496
+VSQRTPDZ 6497
+VSQRTPDZm 6498
+VSQRTPDZmb 6499
+VSQRTPDZmbk 6500
+VSQRTPDZmbkz 6501
+VSQRTPDZmk 6502
+VSQRTPDZmkz 6503
+VSQRTPDZr 6504
+VSQRTPDZrb 6505
+VSQRTPDZrbk 6506
+VSQRTPDZrbkz 6507
+VSQRTPDZrk 6508
+VSQRTPDZrkz 6509
+VSQRTPDm 6510
+VSQRTPDr 6511
+VSQRTPHZ 6512
+VSQRTPHZm 6513
+VSQRTPHZmb 6514
+VSQRTPHZmbk 6515
+VSQRTPHZmbkz 6516
+VSQRTPHZmk 6517
+VSQRTPHZmkz 6518
+VSQRTPHZr 6519
+VSQRTPHZrb 6520
+VSQRTPHZrbk 6521
+VSQRTPHZrbkz 6522
+VSQRTPHZrk 6523
+VSQRTPHZrkz 6524
+VSQRTPSYm 6525
+VSQRTPSYr 6526
+VSQRTPSZ 6527
+VSQRTPSZm 6528
+VSQRTPSZmb 6529
+VSQRTPSZmbk 6530
+VSQRTPSZmbkz 6531
+VSQRTPSZmk 6532
+VSQRTPSZmkz 6533
+VSQRTPSZr 6534
+VSQRTPSZrb 6535
+VSQRTPSZrbk 6536
+VSQRTPSZrbkz 6537
+VSQRTPSZrk 6538
+VSQRTPSZrkz 6539
+VSQRTPSm 6540
+VSQRTPSr 6541
+VSQRTSDZm 6542
+VSQRTSDZm_Int 6543
+VSQRTSDZmk_Int 6544
+VSQRTSDZmkz_Int 6545
+VSQRTSDZr 6546
+VSQRTSDZr_Int 6547
+VSQRTSDZrb_Int 6548
+VSQRTSDZrbk_Int 6549
+VSQRTSDZrbkz_Int 6550
+VSQRTSDZrk_Int 6551
+VSQRTSDZrkz_Int 6552
+VSQRTSDm 6553
+VSQRTSDm_Int 6554
+VSQRTSDr 6555
+VSQRTSDr_Int 6556
+VSQRTSHZm 6557
+VSQRTSHZm_Int 6558
+VSQRTSHZmk_Int 6559
+VSQRTSHZmkz_Int 6560
+VSQRTSHZr 6561
+VSQRTSHZr_Int 6562
+VSQRTSHZrb_Int 6563
+VSQRTSHZrbk_Int 6564
+VSQRTSHZrbkz_Int 6565
+VSQRTSHZrk_Int 6566
+VSQRTSHZrkz_Int 6567
+VSQRTSSZm 6568
+VSQRTSSZm_Int 6569
+VSQRTSSZmk_Int 6570
+VSQRTSSZmkz_Int 6571
+VSQRTSSZr 6572
+VSQRTSSZr_Int 6573
+VSQRTSSZrb_Int 6574
+VSQRTSSZrbk_Int 6575
+VSQRTSSZrbkz_Int 6576
+VSQRTSSZrk_Int 6577
+VSQRTSSZrkz_Int 6578
+VSQRTSSm 6579
+VSQRTSSm_Int 6580
+VSQRTSSr 6581
+VSQRTSSr_Int 6582
+VSTMXCSR 6583
+VSUBBF 6584
+VSUBPDYrm 6585
+VSUBPDYrr 6586
+VSUBPDZ 6587
+VSUBPDZrm 6588
+VSUBPDZrmb 6589
+VSUBPDZrmbk 6590
+VSUBPDZrmbkz 6591
+VSUBPDZrmk 6592
+VSUBPDZrmkz 6593
+VSUBPDZrr 6594
+VSUBPDZrrb 6595
+VSUBPDZrrbk 6596
+VSUBPDZrrbkz 6597
+VSUBPDZrrk 6598
+VSUBPDZrrkz 6599
+VSUBPDrm 6600
+VSUBPDrr 6601
+VSUBPHZ 6602
+VSUBPHZrm 6603
+VSUBPHZrmb 6604
+VSUBPHZrmbk 6605
+VSUBPHZrmbkz 6606
+VSUBPHZrmk 6607
+VSUBPHZrmkz 6608
+VSUBPHZrr 6609
+VSUBPHZrrb 6610
+VSUBPHZrrbk 6611
+VSUBPHZrrbkz 6612
+VSUBPHZrrk 6613
+VSUBPHZrrkz 6614
+VSUBPSYrm 6615
+VSUBPSYrr 6616
+VSUBPSZ 6617
+VSUBPSZrm 6618
+VSUBPSZrmb 6619
+VSUBPSZrmbk 6620
+VSUBPSZrmbkz 6621
+VSUBPSZrmk 6622
+VSUBPSZrmkz 6623
+VSUBPSZrr 6624
+VSUBPSZrrb 6625
+VSUBPSZrrbk 6626
+VSUBPSZrrbkz 6627
+VSUBPSZrrk 6628
+VSUBPSZrrkz 6629
+VSUBPSrm 6630
+VSUBPSrr 6631
+VSUBSDZrm 6632
+VSUBSDZrm_Int 6633
+VSUBSDZrmk_Int 6634
+VSUBSDZrmkz_Int 6635
+VSUBSDZrr 6636
+VSUBSDZrr_Int 6637
+VSUBSDZrrb_Int 6638
+VSUBSDZrrbk_Int 6639
+VSUBSDZrrbkz_Int 6640
+VSUBSDZrrk_Int 6641
+VSUBSDZrrkz_Int 6642
+VSUBSDrm 6643
+VSUBSDrm_Int 6644
+VSUBSDrr 6645
+VSUBSDrr_Int 6646
+VSUBSHZrm 6647
+VSUBSHZrm_Int 6648
+VSUBSHZrmk_Int 6649
+VSUBSHZrmkz_Int 6650
+VSUBSHZrr 6651
+VSUBSHZrr_Int 6652
+VSUBSHZrrb_Int 6653
+VSUBSHZrrbk_Int 6654
+VSUBSHZrrbkz_Int 6655
+VSUBSHZrrk_Int 6656
+VSUBSHZrrkz_Int 6657
+VSUBSSZrm 6658
+VSUBSSZrm_Int 6659
+VSUBSSZrmk_Int 6660
+VSUBSSZrmkz_Int 6661
+VSUBSSZrr 6662
+VSUBSSZrr_Int 6663
+VSUBSSZrrb_Int 6664
+VSUBSSZrrbk_Int 6665
+VSUBSSZrrbkz_Int 6666
+VSUBSSZrrk_Int 6667
+VSUBSSZrrkz_Int 6668
+VSUBSSrm 6669
+VSUBSSrm_Int 6670
+VSUBSSrr 6671
+VSUBSSrr_Int 6672
+VTESTPDYrm 6673
+VTESTPDYrr 6674
+VTESTPDrm 6675
+VTESTPDrr 6676
+VTESTPSYrm 6677
+VTESTPSYrr 6678
+VTESTPSrm 6679
+VTESTPSrr 6680
+VUCOMISDZrm 6681
+VUCOMISDZrm_Int 6682
+VUCOMISDZrr 6683
+VUCOMISDZrr_Int 6684
+VUCOMISDZrrb 6685
+VUCOMISDrm 6686
+VUCOMISDrm_Int 6687
+VUCOMISDrr 6688
+VUCOMISDrr_Int 6689
+VUCOMISHZrm 6690
+VUCOMISHZrm_Int 6691
+VUCOMISHZrr 6692
+VUCOMISHZrr_Int 6693
+VUCOMISHZrrb 6694
+VUCOMISSZrm 6695
+VUCOMISSZrm_Int 6696
+VUCOMISSZrr 6697
+VUCOMISSZrr_Int 6698
+VUCOMISSZrrb 6699
+VUCOMISSrm 6700
+VUCOMISSrm_Int 6701
+VUCOMISSrr 6702
+VUCOMISSrr_Int 6703
+VUCOMXSDZrm 6704
+VUCOMXSDZrm_Int 6705
+VUCOMXSDZrr 6706
+VUCOMXSDZrr_Int 6707
+VUCOMXSDZrrb_Int 6708
+VUCOMXSHZrm 6709
+VUCOMXSHZrm_Int 6710
+VUCOMXSHZrr 6711
+VUCOMXSHZrr_Int 6712
+VUCOMXSHZrrb_Int 6713
+VUCOMXSSZrm 6714
+VUCOMXSSZrm_Int 6715
+VUCOMXSSZrr 6716
+VUCOMXSSZrr_Int 6717
+VUCOMXSSZrrb_Int 6718
+VUNPCKHPDYrm 6719
+VUNPCKHPDYrr 6720
+VUNPCKHPDZ 6721
+VUNPCKHPDZrm 6722
+VUNPCKHPDZrmb 6723
+VUNPCKHPDZrmbk 6724
+VUNPCKHPDZrmbkz 6725
+VUNPCKHPDZrmk 6726
+VUNPCKHPDZrmkz 6727
+VUNPCKHPDZrr 6728
+VUNPCKHPDZrrk 6729
+VUNPCKHPDZrrkz 6730
+VUNPCKHPDrm 6731
+VUNPCKHPDrr 6732
+VUNPCKHPSYrm 6733
+VUNPCKHPSYrr 6734
+VUNPCKHPSZ 6735
+VUNPCKHPSZrm 6736
+VUNPCKHPSZrmb 6737
+VUNPCKHPSZrmbk 6738
+VUNPCKHPSZrmbkz 6739
+VUNPCKHPSZrmk 6740
+VUNPCKHPSZrmkz 6741
+VUNPCKHPSZrr 6742
+VUNPCKHPSZrrk 6743
+VUNPCKHPSZrrkz 6744
+VUNPCKHPSrm 6745
+VUNPCKHPSrr 6746
+VUNPCKLPDYrm 6747
+VUNPCKLPDYrr 6748
+VUNPCKLPDZ 6749
+VUNPCKLPDZrm 6750
+VUNPCKLPDZrmb 6751
+VUNPCKLPDZrmbk 6752
+VUNPCKLPDZrmbkz 6753
+VUNPCKLPDZrmk 6754
+VUNPCKLPDZrmkz 6755
+VUNPCKLPDZrr 6756
+VUNPCKLPDZrrk 6757
+VUNPCKLPDZrrkz 6758
+VUNPCKLPDrm 6759
+VUNPCKLPDrr 6760
+VUNPCKLPSYrm 6761
+VUNPCKLPSYrr 6762
+VUNPCKLPSZ 6763
+VUNPCKLPSZrm 6764
+VUNPCKLPSZrmb 6765
+VUNPCKLPSZrmbk 6766
+VUNPCKLPSZrmbkz 6767
+VUNPCKLPSZrmk 6768
+VUNPCKLPSZrmkz 6769
+VUNPCKLPSZrr 6770
+VUNPCKLPSZrrk 6771
+VUNPCKLPSZrrkz 6772
+VUNPCKLPSrm 6773
+VUNPCKLPSrr 6774
+VXORPDYrm 6775
+VXORPDYrr 6776
+VXORPDZ 6777
+VXORPDZrm 6778
+VXORPDZrmb 6779
+VXORPDZrmbk 6780
+VXORPDZrmbkz 6781
+VXORPDZrmk 6782
+VXORPDZrmkz 6783
+VXORPDZrr 6784
+VXORPDZrrk 6785
+VXORPDZrrkz 6786
+VXORPDrm 6787
+VXORPDrr 6788
+VXORPSYrm 6789
+VXORPSYrr 6790
+VXORPSZ 6791
+VXORPSZrm 6792
+VXORPSZrmb 6793
+VXORPSZrmbk 6794
+VXORPSZrmbkz 6795
+VXORPSZrmk 6796
+VXORPSZrmkz 6797
+VXORPSZrr 6798
+VXORPSZrrk 6799
+VXORPSZrrkz 6800
+VXORPSrm 6801
+VXORPSrr 6802
+VZEROALL 6803
+VZEROUPPER 6804
+V_SET 6805
+V_SETALLONES 6806
+WAIT 6807
+WBINVD 6808
+WBNOINVD 6809
+WRFLAGS 6810
+WRFSBASE 6811
+WRGSBASE 6812
+WRMSR 6813
+WRMSRLIST 6814
+WRMSRNS 6815
+WRMSRNSir 6816
+WRMSRNSir_EVEX 6817
+WRPKRUr 6818
+WRSSD 6819
+WRSSD_EVEX 6820
+WRSSQ 6821
+WRSSQ_EVEX 6822
+WRUSSD 6823
+WRUSSD_EVEX 6824
+WRUSSQ 6825
+WRUSSQ_EVEX 6826
+XABORT 6827
+XABORT_DEF 6828
+XACQUIRE_PREFIX 6829
+XADD 6830
+XAM_F 6831
+XAM_Fp 6832
+XBEGIN 6833
+XCHG 6834
+XCH_F 6835
+XCRYPTCBC 6836
+XCRYPTCFB 6837
+XCRYPTCTR 6838
+XCRYPTECB 6839
+XCRYPTOFB 6840
+XEND 6841
+XGETBV 6842
+XLAT 6843
+XOR 6844
+XORPDrm 6845
+XORPDrr 6846
+XORPSrm 6847
+XORPSrr 6848
+XRELEASE_PREFIX 6849
+XRESLDTRK 6850
+XRSTOR 6851
+XRSTORS 6852
+XSAVE 6853
+XSAVEC 6854
+XSAVEOPT 6855
+XSAVES 6856
+XSETBV 6857
+XSHA 6858
+XSTORE 6859
+XSUSLDTRK 6860
+XTEST 6861
+Immediate 6862
+CImmediate 6863
+FPImmediate 6864
+MBB 6865
+FrameIndex 6866
+ConstantPoolIndex 6867
+TargetIndex 6868
+JumpTableIndex 6869
+ExternalSymbol 6870
+GlobalAddress 6871
+BlockAddress 6872
+RegisterMask 6873
+RegisterLiveOut 6874
+Metadata 6875
+MCSymbol 6876
+CFIIndex 6877
+IntrinsicID 6878
+Predicate 6879
+ShuffleMask 6880
+PhyReg_GR8 6881
+PhyReg_GRH8 6882
+PhyReg_GR8_NOREX2 6883
+PhyReg_GR8_NOREX 6884
+PhyReg_GR8_ABCD_H 6885
+PhyReg_GR8_ABCD_L 6886
+PhyReg_GRH16 6887
+PhyReg_GR16 6888
+PhyReg_GR16_NOREX2 6889
+PhyReg_GR16_NOREX 6890
+PhyReg_VK1 6891
+PhyReg_VK16 6892
+PhyReg_VK2 6893
+PhyReg_VK4 6894
+PhyReg_VK8 6895
+PhyReg_VK16WM 6896
+PhyReg_VK1WM 6897
+PhyReg_VK2WM 6898
+PhyReg_VK4WM 6899
+PhyReg_VK8WM 6900
+PhyReg_SEGMENT_REG 6901
+PhyReg_GR16_ABCD 6902
+PhyReg_FPCCR 6903
+PhyReg_FR16X 6904
+PhyReg_FR16 6905
+PhyReg_VK16PAIR 6906
+PhyReg_VK1PAIR 6907
+PhyReg_VK2PAIR 6908
+PhyReg_VK4PAIR 6909
+PhyReg_VK8PAIR 6910
+PhyReg_VK1PAIR_with_sub_mask_0_in_VK1WM 6911
+PhyReg_LOW32_ADDR_ACCESS_RBP 6912
+PhyReg_LOW32_ADDR_ACCESS 6913
+PhyReg_LOW32_ADDR_ACCESS_RBP_with_sub_8bit 6914
+PhyReg_FR32X 6915
+PhyReg_GR32 6916
+PhyReg_GR32_NOSP 6917
+PhyReg_LOW32_ADDR_ACCESS_RBP_with_sub_16bit_in_GR16_NOREX2 6918
+PhyReg_DEBUG_REG 6919
+PhyReg_FR32 6920
+PhyReg_GR32_NOREX2 6921
+PhyReg_GR32_NOREX2_NOSP 6922
+PhyReg_LOW32_ADDR_ACCESS_RBP_with_sub_16bit_in_GR16_NOREX 6923
+PhyReg_GR32_NOREX 6924
+PhyReg_VK32 6925
+PhyReg_GR32_NOREX_NOSP 6926
+PhyReg_RFP32 6927
+PhyReg_VK32WM 6928
+PhyReg_GR32_ABCD 6929
+PhyReg_GR32_TC 6930
+PhyReg_GR32_ABCD_and_GR32_TC 6931
+PhyReg_GR32_AD 6932
+PhyReg_GR32_ArgRef 6933
+PhyReg_GR32_BPSP 6934
+PhyReg_GR32_BSI 6935
+PhyReg_GR32_CB 6936
+PhyReg_GR32_DC 6937
+PhyReg_GR32_DIBP 6938
+PhyReg_GR32_SIDI 6939
+PhyReg_LOW32_ADDR_ACCESS_RBP_with_sub_32bit 6940
+PhyReg_CCR 6941
+PhyReg_DFCCR 6942
+PhyReg_GR32_ABCD_and_GR32_BSI 6943
+PhyReg_GR32_AD_and_GR32_ArgRef 6944
+PhyReg_GR32_ArgRef_and_GR32_CB 6945
+PhyReg_GR32_BPSP_and_GR32_DIBP 6946
+PhyReg_GR32_BPSP_and_GR32_TC 6947
+PhyReg_GR32_BSI_and_GR32_SIDI 6948
+PhyReg_GR32_DIBP_and_GR32_SIDI 6949
+PhyReg_LOW32_ADDR_ACCESS_RBP_with_sub_8bit_with_sub_32bit 6950
+PhyReg_LOW32_ADDR_ACCESS_with_sub_32bit 6951
+PhyReg_RFP64 6952
+PhyReg_GR64 6953
+PhyReg_FR64X 6954
+PhyReg_GR64_with_sub_8bit 6955
+PhyReg_GR64_NOSP 6956
+PhyReg_GR64_NOREX2 6957
+PhyReg_CONTROL_REG 6958
+PhyReg_FR64 6959
+PhyReg_GR64_with_sub_16bit_in_GR16_NOREX2 6960
+PhyReg_GR64_NOREX2_NOSP 6961
+PhyReg_GR64PLTSafe 6962
+PhyReg_GR64_TC 6963
+PhyReg_GR64_NOREX 6964
+PhyReg_GR64_TCW64 6965
+PhyReg_GR64_TC_with_sub_8bit 6966
+PhyReg_GR64_NOREX2_NOSP_and_GR64_TC 6967
+PhyReg_GR64_TCW64_with_sub_8bit 6968
+PhyReg_GR64_TC_and_GR64_TCW64 6969
+PhyReg_GR64_with_sub_16bit_in_GR16_NOREX 6970
+PhyReg_VK64 6971
+PhyReg_VR64 6972
+PhyReg_GR64PLTSafe_and_GR64_TC 6973
+PhyReg_GR64_NOREX2_NOSP_and_GR64_TCW64 6974
+PhyReg_GR64_NOREX_NOSP 6975
+PhyReg_GR64_NOREX_and_GR64_TC 6976
+PhyReg_GR64_TCW64_and_GR64_TC_with_sub_8bit 6977
+PhyReg_VK64WM 6978
+PhyReg_GR64_TC_and_GR64_NOREX2_NOSP_and_GR64_TCW64 6979
+PhyReg_GR64_TC_and_GR64_with_sub_16bit_in_GR16_NOREX 6980
+PhyReg_GR64PLTSafe_and_GR64_TCW64 6981
+PhyReg_GR64_NOREX_and_GR64PLTSafe_and_GR64_TC 6982
+PhyReg_GR64_NOREX_and_GR64_TCW64 6983
+PhyReg_GR64_ABCD 6984
+PhyReg_GR64_with_sub_32bit_in_GR32_TC 6985
+PhyReg_GR64_with_sub_32bit_in_GR32_ABCD_and_GR32_TC 6986
+PhyReg_GR64_AD 6987
+PhyReg_GR64_ArgRef 6988
+PhyReg_GR64_and_LOW32_ADDR_ACCESS_RBP 6989
+PhyReg_GR64_with_sub_32bit_in_GR32_ArgRef 6990
+PhyReg_GR64_with_sub_32bit_in_GR32_BPSP 6991
+PhyReg_GR64_with_sub_32bit_in_GR32_BSI 6992
+PhyReg_GR64_with_sub_32bit_in_GR32_CB 6993
+PhyReg_GR64_with_sub_32bit_in_GR32_DIBP 6994
+PhyReg_GR64_with_sub_32bit_in_GR32_SIDI 6995
+PhyReg_GR64_A 6996
+PhyReg_GR64_ArgRef_and_GR64_TC 6997
+PhyReg_GR64_and_LOW32_ADDR_ACCESS 6998
+PhyReg_GR64_with_sub_32bit_in_GR32_ABCD_and_GR32_BSI 6999
+PhyReg_GR64_with_sub_32bit_in_GR32_AD_and_GR32_ArgRef 7000
+PhyReg_GR64_with_sub_32bit_in_GR32_ArgRef_and_GR32_CB 7001
+PhyReg_GR64_with_sub_32bit_in_GR32_BPSP_and_GR32_DIBP 7002
+PhyReg_GR64_with_sub_32bit_in_GR32_BPSP_and_GR32_TC 7003
+PhyReg_GR64_with_sub_32bit_in_GR32_BSI_and_GR32_SIDI 7004
+PhyReg_GR64_with_sub_32bit_in_GR32_DIBP_and_GR32_SIDI 7005
+PhyReg_RST 7006
+PhyReg_RFP80 7007
+PhyReg_RFP80_7 7008
+PhyReg_VR128X 7009
+PhyReg_VR128 7010
+PhyReg_VR256X 7011
+PhyReg_VR256 7012
+PhyReg_VR512 7013
+PhyReg_VR512_0_15 7014
+PhyReg_TILE 7015
+VirtReg_GR8 7016
+VirtReg_GRH8 7017
+VirtReg_GR8_NOREX2 7018
+VirtReg_GR8_NOREX 7019
+VirtReg_GR8_ABCD_H 7020
+VirtReg_GR8_ABCD_L 7021
+VirtReg_GRH16 7022
+VirtReg_GR16 7023
+VirtReg_GR16_NOREX2 7024
+VirtReg_GR16_NOREX 7025
+VirtReg_VK1 7026
+VirtReg_VK16 7027
+VirtReg_VK2 7028
+VirtReg_VK4 7029
+VirtReg_VK8 7030
+VirtReg_VK16WM 7031
+VirtReg_VK1WM 7032
+VirtReg_VK2WM 7033
+VirtReg_VK4WM 7034
+VirtReg_VK8WM 7035
+VirtReg_SEGMENT_REG 7036
+VirtReg_GR16_ABCD 7037
+VirtReg_FPCCR 7038
+VirtReg_FR16X 7039
+VirtReg_FR16 7040
+VirtReg_VK16PAIR 7041
+VirtReg_VK1PAIR 7042
+VirtReg_VK2PAIR 7043
+VirtReg_VK4PAIR 7044
+VirtReg_VK8PAIR 7045
+VirtReg_VK1PAIR_with_sub_mask_0_in_VK1WM 7046
+VirtReg_LOW32_ADDR_ACCESS_RBP 7047
+VirtReg_LOW32_ADDR_ACCESS 7048
+VirtReg_LOW32_ADDR_ACCESS_RBP_with_sub_8bit 7049
+VirtReg_FR32X 7050
+VirtReg_GR32 7051
+VirtReg_GR32_NOSP 7052
+VirtReg_LOW32_ADDR_ACCESS_RBP_with_sub_16bit_in_GR16_NOREX2 7053
+VirtReg_DEBUG_REG 7054
+VirtReg_FR32 7055
+VirtReg_GR32_NOREX2 7056
+VirtReg_GR32_NOREX2_NOSP 7057
+VirtReg_LOW32_ADDR_ACCESS_RBP_with_sub_16bit_in_GR16_NOREX 7058
+VirtReg_GR32_NOREX 7059
+VirtReg_VK32 7060
+VirtReg_GR32_NOREX_NOSP 7061
+VirtReg_RFP32 7062
+VirtReg_VK32WM 7063
+VirtReg_GR32_ABCD 7064
+VirtReg_GR32_TC 7065
+VirtReg_GR32_ABCD_and_GR32_TC 7066
+VirtReg_GR32_AD 7067
+VirtReg_GR32_ArgRef 7068
+VirtReg_GR32_BPSP 7069
+VirtReg_GR32_BSI 7070
+VirtReg_GR32_CB 7071
+VirtReg_GR32_DC 7072
+VirtReg_GR32_DIBP 7073
+VirtReg_GR32_SIDI 7074
+VirtReg_LOW32_ADDR_ACCESS_RBP_with_sub_32bit 7075
+VirtReg_CCR 7076
+VirtReg_DFCCR 7077
+VirtReg_GR32_ABCD_and_GR32_BSI 7078
+VirtReg_GR32_AD_and_GR32_ArgRef 7079
+VirtReg_GR32_ArgRef_and_GR32_CB 7080
+VirtReg_GR32_BPSP_and_GR32_DIBP 7081
+VirtReg_GR32_BPSP_and_GR32_TC 7082
+VirtReg_GR32_BSI_and_GR32_SIDI 7083
+VirtReg_GR32_DIBP_and_GR32_SIDI 7084
+VirtReg_LOW32_ADDR_ACCESS_RBP_with_sub_8bit_with_sub_32bit 7085
+VirtReg_LOW32_ADDR_ACCESS_with_sub_32bit 7086
+VirtReg_RFP64 7087
+VirtReg_GR64 7088
+VirtReg_FR64X 7089
+VirtReg_GR64_with_sub_8bit 7090
+VirtReg_GR64_NOSP 7091
+VirtReg_GR64_NOREX2 7092
+VirtReg_CONTROL_REG 7093
+VirtReg_FR64 7094
+VirtReg_GR64_with_sub_16bit_in_GR16_NOREX2 7095
+VirtReg_GR64_NOREX2_NOSP 7096
+VirtReg_GR64PLTSafe 7097
+VirtReg_GR64_TC 7098
+VirtReg_GR64_NOREX 7099
+VirtReg_GR64_TCW64 7100
+VirtReg_GR64_TC_with_sub_8bit 7101
+VirtReg_GR64_NOREX2_NOSP_and_GR64_TC 7102
+VirtReg_GR64_TCW64_with_sub_8bit 7103
+VirtReg_GR64_TC_and_GR64_TCW64 7104
+VirtReg_GR64_with_sub_16bit_in_GR16_NOREX 7105
+VirtReg_VK64 7106
+VirtReg_VR64 7107
+VirtReg_GR64PLTSafe_and_GR64_TC 7108
+VirtReg_GR64_NOREX2_NOSP_and_GR64_TCW64 7109
+VirtReg_GR64_NOREX_NOSP 7110
+VirtReg_GR64_NOREX_and_GR64_TC 7111
+VirtReg_GR64_TCW64_and_GR64_TC_with_sub_8bit 7112
+VirtReg_VK64WM 7113
+VirtReg_GR64_TC_and_GR64_NOREX2_NOSP_and_GR64_TCW64 7114
+VirtReg_GR64_TC_and_GR64_with_sub_16bit_in_GR16_NOREX 7115
+VirtReg_GR64PLTSafe_and_GR64_TCW64 7116
+VirtReg_GR64_NOREX_and_GR64PLTSafe_and_GR64_TC 7117
+VirtReg_GR64_NOREX_and_GR64_TCW64 7118
+VirtReg_GR64_ABCD 7119
+VirtReg_GR64_with_sub_32bit_in_GR32_TC 7120
+VirtReg_GR64_with_sub_32bit_in_GR32_ABCD_and_GR32_TC 7121
+VirtReg_GR64_AD 7122
+VirtReg_GR64_ArgRef 7123
+VirtReg_GR64_and_LOW32_ADDR_ACCESS_RBP 7124
+VirtReg_GR64_with_sub_32bit_in_GR32_ArgRef 7125
+VirtReg_GR64_with_sub_32bit_in_GR32_BPSP 7126
+VirtReg_GR64_with_sub_32bit_in_GR32_BSI 7127
+VirtReg_GR64_with_sub_32bit_in_GR32_CB 7128
+VirtReg_GR64_with_sub_32bit_in_GR32_DIBP 7129
+VirtReg_GR64_with_sub_32bit_in_GR32_SIDI 7130
+VirtReg_GR64_A 7131
+VirtReg_GR64_ArgRef_and_GR64_TC 7132
+VirtReg_GR64_and_LOW32_ADDR_ACCESS 7133
+VirtReg_GR64_with_sub_32bit_in_GR32_ABCD_and_GR32_BSI 7134
+VirtReg_GR64_with_sub_32bit_in_GR32_AD_and_GR32_ArgRef 7135
+VirtReg_GR64_with_sub_32bit_in_GR32_ArgRef_and_GR32_CB 7136
+VirtReg_GR64_with_sub_32bit_in_GR32_BPSP_and_GR32_DIBP 7137
+VirtReg_GR64_with_sub_32bit_in_GR32_BPSP_and_GR32_TC 7138
+VirtReg_GR64_with_sub_32bit_in_GR32_BSI_and_GR32_SIDI 7139
+VirtReg_GR64_with_sub_32bit_in_GR32_DIBP_and_GR32_SIDI 7140
+VirtReg_RST 7141
+VirtReg_RFP80 7142
+VirtReg_RFP80_7 7143
+VirtReg_VR128X 7144
+VirtReg_VR128 7145
+VirtReg_VR256X 7146
+VirtReg_VR256 7147
+VirtReg_VR512 7148
+VirtReg_VR512_0_15 7149
+VirtReg_TILE 7150
diff --git a/llvm/test/tools/llvm-readobj/ELF/bb-addr-map-feature-warning.test b/llvm/test/tools/llvm-readobj/ELF/bb-addr-map-feature-warning.test
new file mode 100644
index 0000000..24726c34
--- /dev/null
+++ b/llvm/test/tools/llvm-readobj/ELF/bb-addr-map-feature-warning.test
@@ -0,0 +1,37 @@
+## This test checks that we output a warning when the specified version is too old to support the given features.
+
+# RUN: yaml2obj %s -o %t
+# RUN: llvm-readobj --bb-addr-map %t 2>&1 | FileCheck -DFILE=%t %s
+
+--- !ELF
+FileHeader:
+ Class: ELFCLASS64
+ Data: ELFDATA2LSB
+ Type: ET_EXEC
+
+# CHECK: BBAddrMap [
+# CHECK-NEXT: warning: '[[FILE]]': unable to dump SHT_LLVM_BB_ADDR_MAP section with index 1: version should be >= 3 for SHT_LLVM_BB_ADDR_MAP when callsite offsets feature is enabled: version = 2 feature = 32
+Sections:
+ - Name: '.llvm_bb_addr_map (1)'
+ Type: SHT_LLVM_BB_ADDR_MAP
+ Entries:
+ - Version: 2
+ Feature: 0x20
+
+# CHECK: BBAddrMap [
+# CHECK-NEXT: warning: '[[FILE]]': unable to dump SHT_LLVM_BB_ADDR_MAP section with index 2: version should be >= 4 for SHT_LLVM_BB_ADDR_MAP when basic block hash feature is enabled: version = 3 feature = 64
+
+ - Name: '.llvm_bb_addr_map (2)'
+ Type: SHT_LLVM_BB_ADDR_MAP
+ Entries:
+ - Version: 3
+ Feature: 0x40
+
+# CHECK: BBAddrMap [
+# CHECK-NEXT: warning: '[[FILE]]': unable to dump SHT_LLVM_BB_ADDR_MAP section with index 3: version should be >= 5 for SHT_LLVM_BB_ADDR_MAP when post link cfg feature is enabled: version = 4 feature = 128
+
+ - Name: '.llvm_bb_addr_map (3)'
+ Type: SHT_LLVM_BB_ADDR_MAP
+ Entries:
+ - Version: 4
+ Feature: 0x80
diff --git a/llvm/test/tools/llvm-readobj/ELF/bb-addr-map-pgo-analysis-map.test b/llvm/test/tools/llvm-readobj/ELF/bb-addr-map-pgo-analysis-map.test
index 5faafd4..8e9d227 100644
--- a/llvm/test/tools/llvm-readobj/ELF/bb-addr-map-pgo-analysis-map.test
+++ b/llvm/test/tools/llvm-readobj/ELF/bb-addr-map-pgo-analysis-map.test
@@ -15,7 +15,7 @@
## Check that a malformed section can be handled.
# RUN: yaml2obj %s -DBITS=32 -DSIZE=24 -o %t2.o
-# RUN: llvm-readobj %t2.o --bb-addr-map 2>&1 | FileCheck --match-full-lines %s -DOFFSET=0x00000018 -DFILE=%t2.o --check-prefix=TRUNCATED
+# RUN: llvm-readobj %t2.o --bb-addr-map 2>&1 | FileCheck --match-full-lines %s -DOFFSET=0x00000015 -DFILE=%t2.o --check-prefix=TRUNCATED
## Check that missing features can be handled.
# RUN: yaml2obj %s -DBITS=32 -DFEATURE=0x2 -o %t3.o
@@ -59,17 +59,20 @@
# CHECK-NEXT: {
# RAW-NEXT: Frequency: 100
# PRETTY-NEXT: Frequency: 1.0
+# CHECK-NEXT: PostLink Frequency: 10
# CHECK-NEXT: Successors [
# CHECK-NEXT: {
# CHECK-NEXT: ID: 2
# RAW-NEXT: Probability: 0x80000000
# PRETTY-NEXT: Probability: 0x80000000 / 0x80000000 = 100.00%
+# CHECK-NEXT: PostLink Probability: 7
# CHECK-NEXT: }
# CHECK-NEXT: ]
# CHECK-NEXT: }
# CHECK-NEXT: {
# RAW-NEXT: Frequency: 100
# PRETTY-NEXT: Frequency: 1.0
+# CHECK-NEXT: PostLink Frequency: 0
# CHECK-NEXT: Successors [
# CHECK-NEXT: ]
# CHECK-NEXT: }
@@ -172,8 +175,8 @@ Sections:
ShSize: [[SIZE=<none>]]
Link: .text
Entries:
- - Version: 2
- Feature: 0x7
+ - Version: 5
+ Feature: 0x87
BBRanges:
- BaseAddress: [[ADDR=0x11111]]
BBEntries:
@@ -197,10 +200,12 @@ Sections:
PGOAnalyses:
- FuncEntryCount: 100
PGOBBEntries:
- - BBFreq: 100
+ - BBFreq: 100
+ PostLinkBBFreq: 10
Successors:
- - ID: 2
- BrProb: 0x80000000
+ - ID: 2
+ BrProb: 0x80000000
+ PostLinkBrFreq: 7
- BBFreq: 100
Successors: []
- FuncEntryCount: 8888
diff --git a/llvm/test/tools/obj2yaml/ELF/bb-addr-map-pgo-analysis-map.yaml b/llvm/test/tools/obj2yaml/ELF/bb-addr-map-pgo-analysis-map.yaml
index 299bf46..645507a 100644
--- a/llvm/test/tools/obj2yaml/ELF/bb-addr-map-pgo-analysis-map.yaml
+++ b/llvm/test/tools/obj2yaml/ELF/bb-addr-map-pgo-analysis-map.yaml
@@ -15,7 +15,7 @@
# VALID-NEXT: Type: SHT_LLVM_BB_ADDR_MAP
# VALID-NEXT: Entries:
# VALID-NEXT: - Version: 2
-# VALID-NEXT: Feature: 0x7
+# VALID-NEXT: Feature: 0x87
## The 'BaseAddress' field is omitted when it's zero.
# VALID-NEXT: BBRanges:
# VALID-NEXT: - BBEntries:
@@ -43,17 +43,23 @@
# VALID-NEXT: PGOAnalyses:
# VALID-NEXT: - FuncEntryCount: 100
# VALID-NEXT: PGOBBEntries:
-# VALID-NEXT: - BBFreq: 100
+# VALID-NEXT: - BBFreq: 100
+# VALID-NEXT: PostLinkBBFreq: 10
# VALID-NEXT: Successors:
-# VALID-NEXT: - ID: 2
-# VALID-NEXT: BrProb: 0x80000000
-# VALID-NEXT: - ID: 4
-# VALID-NEXT: BrProb: 0x80000000
-# VALID-NEXT: - BBFreq: 50
+# VALID-NEXT: - ID: 2
+# VALID-NEXT: BrProb: 0x80000000
+# VALID-NEXT: PostLinkBrFreq: 7
+# VALID-NEXT: - ID: 4
+# VALID-NEXT: BrProb: 0x80000000
+# VALID-NEXT: PostLinkBrFreq: 0
+# VALID-NEXT: - BBFreq: 50
+# VALID-NEXT: PostLinkBBFreq: 0
# VALID-NEXT: Successors:
-# VALID-NEXT: - ID: 4
-# VALID-NEXT: BrProb: 0xFFFFFFFF
-# VALID-NEXT: - BBFreq: 100
+# VALID-NEXT: - ID: 4
+# VALID-NEXT: BrProb: 0xFFFFFFFF
+# VALID-NEXT: PostLinkBrFreq: 0
+# VALID-NEXT: - BBFreq: 100
+# VALID-NEXT: PostLinkBBFreq: 3
# VALID-NEXT: Successors: []
# VALID-NEXT: PGOBBEntries:
# VALID-NEXT: - BBFreq: 20
@@ -69,7 +75,7 @@ Sections:
ShSize: [[SIZE=<none>]]
Entries:
- Version: 2
- Feature: 0x7
+ Feature: 0x87
BBRanges:
- BaseAddress: 0x0
BBEntries:
@@ -97,17 +103,20 @@ Sections:
PGOAnalyses:
- FuncEntryCount: 100
PGOBBEntries:
- - BBFreq: 100
+ - BBFreq: 100
+ PostLinkBBFreq: 10
Successors:
- - ID: 2
- BrProb: 0x80000000
- - ID: 4
- BrProb: 0x80000000
- - BBFreq: 50
+ - ID: 2
+ BrProb: 0x80000000
+ PostLinkBrFreq: 7
+ - ID: 4
+ BrProb: 0x80000000
+ - BBFreq: 50
Successors:
- - ID: 4
- BrProb: 0xFFFFFFFF
- - BBFreq: 100
+ - ID: 4
+ BrProb: 0xFFFFFFFF
+ - BBFreq: 100
+ PostLinkBBFreq: 3
Successors: []
- PGOBBEntries:
- BBFreq: 20
diff --git a/llvm/test/tools/yaml2obj/ELF/bb-addr-map-pgo-analysis-map.yaml b/llvm/test/tools/yaml2obj/ELF/bb-addr-map-pgo-analysis-map.yaml
index a4cb572..ac9c8d4 100644
--- a/llvm/test/tools/yaml2obj/ELF/bb-addr-map-pgo-analysis-map.yaml
+++ b/llvm/test/tools/yaml2obj/ELF/bb-addr-map-pgo-analysis-map.yaml
@@ -6,8 +6,9 @@
# Case 4: Specify Entries.
# CHECK: Name: .llvm_bb_addr_map (1)
# CHECK: SectionData (
-# CHECK-NEXT: 0000: 02072000 00000000 0000010B 010203E8
-# CHECK-NEXT: 0010: 07E80702 0CEEDDBB F70E0D91 A2C48801
+# CHECK-NEXT: 0000: 02872000 00000000 0000010B 010203E8
+# CHECK-NEXT: 0010: 07E80764 020CEEDD BBF70E28 0D91A2C4
+# CHECK-NEXT: 0020: 880100
# CHECK-NEXT: )
# Case 7: Not including a field which is enabled in feature doesn't emit value
@@ -26,12 +27,12 @@ Sections:
## Test the following cases:
## 1) We can produce an .llvm_bb_addr_map section from a description with
-## Entries and PGO Analysis data.
+## Entries and PGO Analysis and Post Link data.
- Name: '.llvm_bb_addr_map (1)'
Type: SHT_LLVM_BB_ADDR_MAP
Entries:
- Version: 2
- Feature: 0x7
+ Feature: 0x87
BBRanges:
- BaseAddress: 0x0000000000000020
BBEntries:
@@ -42,12 +43,14 @@ Sections:
PGOAnalyses:
- FuncEntryCount: 1000
PGOBBEntries:
- - BBFreq: 1000
+ - BBFreq: 1000
+ PostLinkBBFreq: 100
Successors:
- - ID: 12
- BrProb: 0xeeeeeeee
- - ID: 13
- BrProb: 0x11111111
+ - ID: 12
+ BrProb: 0xeeeeeeee
+ PostLinkBrFreq: 40
+ - ID: 13
+ BrProb: 0x11111111
## 2) According to feature we have FuncEntryCount but none is provided in yaml
- Name: '.llvm_bb_addr_map (2)'
@@ -66,7 +69,7 @@ Sections:
## Check that yaml2obj generates a warning when we use unsupported feature.
# RUN: yaml2obj --docnum=2 %s 2>&1 | FileCheck %s --check-prefix=INVALID-FEATURE
-# INVALID-FEATURE: warning: invalid encoding for BBAddrMap::Features: 0xf0
+# INVALID-FEATURE: warning: invalid encoding for BBAddrMap::Features: 0x100
--- !ELF
FileHeader:
@@ -79,4 +82,4 @@ Sections:
Entries:
- Version: 2
## Specify unsupported feature
- Feature: 0xF0
+ Feature: 0x100
diff --git a/llvm/test/tools/yaml2obj/ELF/bb-addr-map.yaml b/llvm/test/tools/yaml2obj/ELF/bb-addr-map.yaml
index 339e419..05d77d6 100644
--- a/llvm/test/tools/yaml2obj/ELF/bb-addr-map.yaml
+++ b/llvm/test/tools/yaml2obj/ELF/bb-addr-map.yaml
@@ -220,7 +220,7 @@ Sections:
## Check that yaml2obj generates a warning when we use unsupported versions.
# RUN: yaml2obj --docnum=3 %s 2>&1 | FileCheck %s --check-prefix=INVALID-VERSION
-# INVALID-VERSION: warning: unsupported SHT_LLVM_BB_ADDR_MAP version: 5; encoding using the most recent version
+# INVALID-VERSION: warning: unsupported SHT_LLVM_BB_ADDR_MAP version: 6; encoding using the most recent version
--- !ELF
FileHeader:
@@ -232,4 +232,4 @@ Sections:
Type: SHT_LLVM_BB_ADDR_MAP
Entries:
## Specify unsupported version
- - Version: 5
+ - Version: 6